From: Olivier Matz Date: Sun, 29 Nov 2015 10:51:34 +0000 (+0100) Subject: utils: fix decoding of invalid headers X-Git-Url: http://git.droids-corp.org/?p=imapami.git;a=commitdiff_plain;h=6a57a643b9616029a8142197a27cd76caf7dc5f5 utils: fix decoding of invalid headers For some mails, the following exception was thrown: Traceback (most recent call last): File "/usr/local/bin/imapami", line 9, in load_entry_point('imapami==0.1.0', 'console_scripts', 'imapami')() File "build/bdist.linux-x86_64/egg/imapami/__init__.py", line 321, in main File "build/bdist.linux-x86_64/egg/imapami/__init__.py", line 221, in process_rules File "build/bdist.linux-x86_64/egg/imapami/rules.py", line 251, in process File "build/bdist.linux-x86_64/egg/imapami/actions.py", line 149, in process File "build/bdist.linux-x86_64/egg/imapami/actions.py", line 317, in process File "build/bdist.linux-x86_64/egg/imapami/actions.py", line 98, in evaluate File "build/bdist.linux-x86_64/egg/imapami/utils.py", line 65, in headers_to_unicode File "/usr/lib/python2.7/email/header.py", line 108, in decode_header raise HeaderParseError email.errors.HeaderParseError It can be reproduced manually with: >>> import email.header >>> email.header.decode_header('=?UTF-8?B?UG91ciB2b3MgYWNoYXRzIGRlIE5vw6tsLCBwcm9maXRleiBkJ3VuIGRv?==?UTF-8?B?dWJsZSBhdmFudGFnZSAh?=') According to: http://stackoverflow.com/questions/7384006/email-header-decode-headers-throws-an-headerparseerror A workaround is to fix the header manually. Signed-off-by: Olivier Matz --- diff --git a/imapami/utils.py b/imapami/utils.py index abf79b0..291b71f 100644 --- a/imapami/utils.py +++ b/imapami/utils.py @@ -28,6 +28,7 @@ # import email.header +import re # pylint: disable=deprecated-module # see https://www.logilab.org/ticket/2481 @@ -62,7 +63,16 @@ def headers_to_unicode(headers): """ unicode_headers = {} for key, hdr in headers.items(): - value, encoding = email.header.decode_header(hdr)[0] + try: + value, encoding = email.header.decode_header(hdr)[0] + except email.header.HeaderParseError: + try: + # try to workaround badly formatted RFC2047 tokens + hdr = re.sub(r"(==)(?!$)", u"= =", hdr) + value, encoding = email.header.decode_header(hdr)[0] + except email.header.HeaderParseError: + # fallback to wrong decoding + value, encoding = hdr, 'utf-8' if encoding is None: value = unicode(value) else: