If you use Python's email package, the eml file that can save emails is just the standard library. It's easy to analyze.
Get attachments, subject, body, etc. from eml file I made a class.
# coding:utf-8
Get data based on eml file for easy handling
There may be omissions in consideration due to the minimum implementation. .. ..
import sys
import email
from email.header import decode_header
class MailParser(object):
A class that takes the path of a mail file and parses it
def __init__(self, mail_file_path):
self.mail_file_path = mail_file_path
#email from eml file.message.Get a Message instance
with open(mail_file_path, 'rb') as email_file:
self.email_message = email.message_from_bytes(email_file.read())
self.subject = None
self.to_address = None
self.cc_address = None
self.from_address = None
self.body = ""
#Attachment related information
# {name: file_name, data: data}
self.attach_file_list = []
#Interpretation of eml
def get_attr_data(self):
Get email data
result = """\
FROM: {}
TO: {}
CC: {}
",".join([ x["name"] for x in self.attach_file_list])
return result
def _parse(self):
Parsing mail files
__init__Calling in
self.subject = self._get_decoded_header("Subject")
self.to_address = self._get_decoded_header("To")
self.cc_address = self._get_decoded_header("Cc")
self.from_address = self._get_decoded_header("From")
#Processing of message body part
for part in self.email_message.walk():
#If the ContentType is multipart, the actual content is even more
#Since it is in the inside part, skip it
if part.get_content_maintype() == 'multipart':
#Get file name
attach_fname = part.get_filename()
#Should be the body if there is no file name
if not attach_fname:
charset = str(part.get_content_charset())
if charset:
self.body += part.get_payload(decode=True).decode(charset, errors="replace")
self.body += part.get_payload(decode=True)
#If there is a file name, it's an attachment
#Get the data
"name": attach_fname,
"data": part.get_payload(decode=True)
def _get_decoded_header(self, key_name):
Get the decoded result from the header object
ret = ""
#Keys that do not have the corresponding item return an empty string
raw_obj = self.email_message.get(key_name)
if raw_obj is None:
return ""
#Make the decoded result unicode
for fragment, encoding in decode_header(raw_obj):
if not hasattr(fragment, "decode"):
ret += fragment
#UTF for the time being without encode-Decode with 8
if encoding:
ret += fragment.decode(encoding)
ret += fragment.decode("UTF-8")
return ret
if __name__ == "__main__":
result = MailParser(sys.argv[1]).get_attr_data()
For the time being, the expected results have been obtained. I hope it will be helpful in handling emails.
Recommended Posts