I am the general affairs of a small company (about 30 people). Of course, there is no money to introduce an attendance management system, what is an attendance management system in the first place? Because it is a company called, we receive a large amount of paid holiday notifications and holiday work notifications entered in the format made with Excel graph paper every month. So, after studying, I thought about how to search emails by date from thuderbird (forced), save them by drag and drop, and extract all attachments.
https://qiita.com/denzow/items/a42d344fa343cd80cf86 Changed as follows so that the date can be extracted with reference to
eml_read.py
import sys
import email
from email.header import decode_header
import datetime
class MailParser(object):
"""
A class that takes the path of a mail file and parses it
"""
def __init__(self, mail_file_path):
self.mail_file_path = mail_file_path
#email from eml file.message.Get a Message instance
with open(mail_file_path, 'rb') as email_file:
self.email_message = email.message_from_bytes(email_file.read())
self.subject = None
self.to_address = None
self.cc_address = None
self.from_address = None
self.body = ""
self.date = None
#Attachment related information
# {name: file_name, data: data}
self.attach_file_list = []
#Interpretation of eml
self._parse()
def get_attr_data(self):
"""
Get email data
"""
result = """\
DATE: {}
FROM: {}
TO: {}
CC: {}
-----------------------
BODY:
{}
-----------------------
ATTACH_FILE_NAME:
{}
""".format(
self.date,
self.from_address,
self.to_address,
self.cc_address,
self.body,
",".join([ x["name"] for x in self.attach_file_list])
)
return result
def _parse(self):
"""
Parsing mail files
__init__Calling in
"""
self.subject = self._get_decoded_header("Subject")
self.to_address = self._get_decoded_header("To")
self.cc_address = self._get_decoded_header("Cc")
self.from_address = self._get_decoded_header("From")
#Where I changed
self.date = datetime.datetime.strptime(
self._get_decoded_header("Date"),
"%a, %d %b %Y %H:%M:%S %z"
)
#Processing of message body part
for part in self.email_message.walk():
#If the ContentType is multipart, the actual content is even more
#Since it is in the inside part, skip it
if part.get_content_maintype() == 'multipart':
continue
#Get file name
attach_fname = part.get_filename()
#Should be the body if there is no file name
if not attach_fname:
charset = str(part.get_content_charset())
if charset:
self.body += part.get_payload(decode=True).decode(charset, errors="replace")
else:
self.body += part.get_payload(decode=True)
else:
#If there is a file name, it's an attachment
#Get the data
self.attach_file_list.append({
"name": attach_fname,
"data": part.get_payload(decode=True)
})
def _get_decoded_header(self, key_name):
"""
Get the decoded result from the header object
"""
ret = ""
#Keys that do not have the corresponding item return an empty string
raw_obj = self.email_message.get(key_name)
if raw_obj is None:
return ""
#Make the decoded result unicode
for fragment, encoding in decode_header(raw_obj):
if not hasattr(fragment, "decode"):
ret += fragment
continue
#UTF for the time being without encode-Decode with 8
if encoding:
ret += fragment.decode(encoding)
else:
ret += fragment.decode("UTF-8")
return ret
if __name__ == "__main__":
result = MailParser(sys.argv[1]).get_attr_data()
print(result)
Since I am an amateur, please forgive me for the strange way of writing variables.
save_attachmentfile.py
import eml_read
import glob
import os
from email.header import decode_header
import datetime
PATH = r"C:\Users\toshi\***" + "\\" #Where you saved the eml file
save_file_path = r"C:\Users\***" + "\\" #Where you want to save attachments
def save_attachmentfile(file_path_list):
# search_Receive the list from eml and save the attachment_Save to path
for file_path in file_path_list:
obj_eml = eml_read.MailParser(file_path)
from_adderss = obj_eml.from_address[0:3]
eml_date = obj_eml.date
print(eml_date)
#date
str_year = eml_date.strftime("%y")
str_month = eml_date.strftime("%m").lstrip("0")
str_day = eml_date.strftime("%d").lstrip("0")
str_date = str_year + "." + str_month + "." + str_day
for a in obj_eml.attach_file_list:
print(type(decode_header(a["name"])[0][0]))
if type(decode_header(a["name"])[0][0]) == bytes:
file_name = str_date + from_adderss + decode_header(a["name"])[0][0].decode(decode_header(a["name"])[0][1])
else:
file_name = str_date + from_adderss + decode_header(a["name"])[0][0]
file_name = (
file_name
).translate(str.maketrans(
{'<': '', '>': '', '!': '', '/': '', ':': '', '*': '', '"': '', '|': ''}
))
with open(save_file_path + file_name,
mode="bw") as f:
f.write(a["data"])
def search_eml(file_path):
#returns a list of eml filenames
emlPATHS = []
filepaths = glob.glob(os.path.join(file_path, '*.eml'))
for filepath in filepaths:
emlPATHS.append(filepath)
filepaths = None
return emlPATHS
if __name__ == "__main__":
lst = search_eml(PATH)
save_attachmentfile(lst)
https://qiita.com/denzow/items/a42d344fa343cd80cf86 https://stackoverflow.com/questions/21711404/how-to-get-decode-attachment-filename-with-python-email
Recommended Posts