-
Notifications
You must be signed in to change notification settings - Fork 43
/
Copy pathparseeml.py
110 lines (100 loc) · 3.42 KB
/
parseeml.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import argparse
import os
import sys
import re
import base64
from email.header import decode_header
import email
from lxml import html
import quopri
def decode_html(body, quoted=True):
"""
Extract links and images from html
"""
if quoted:
tree = html.fromstring(quopri.decodestring(body))
else:
tree = html.fromstring(body)
links = set(tree.xpath("//a/@href"))
images = set(tree.xpath("//img/@src"))
print("Links:")
for a in links:
print(a)
print("")
print("Images:")
for a in images:
print(a)
def decode_plain(body, quoted=True):
if quoted:
body = quopri.decodestring(body)
urls = re.findall(
"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+",
body,
)
if len(urls) > 0:
print("Urls:")
for u in set(urls):
print(u)
else:
print("No urls identified")
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Extract attached files from an eml file"
)
parser.add_argument("EMAIL", help="EML file")
parser.add_argument("--dump", "-D", help="Dump attachments in the given folder")
parser.add_argument("--show", "-s", type=int, help="Show one object")
args = parser.parse_args()
if not os.path.isfile(args.EMAIL):
print("Invalid file path")
sys.exit(1)
with open(args.EMAIL, "rb") as ff:
raw_email = ff.read()
msg = email.message_from_bytes(raw_email)
print("==== Headers")
print("From: {}".format(msg["From"]))
print("To: {}".format(msg["To"]))
if msg["Subject"].startswith("=?"):
h = decode_header(msg["Subject"])
print("Subject: {}".format(h[0][0].decode(h[0][1])))
else:
print("Subject: {}".format(msg["Subject"]))
print("Date: {}".format(msg["Date"]))
if msg["Reply(To"]:
print("Reply-To: {}".format(msg["Reply-To"]))
if msg.is_multipart():
print("Multipart Email")
print("")
if msg.is_multipart():
for p in msg.get_payload():
print("==== Part")
print("Type: {}".format(p.get_content_type()))
if p.get_filename():
print("Name: {}".format(p.get_filename()))
print("Length: {} bytes".format(len(p.get_payload(decode=True))))
if p.get_content_type() == "text/plain":
content = p.get_payload(decode=True).decode(p.get_content_charset())
decode_plain(content, quoted=False)
elif p.get_content_type() == "text/html":
content = p.get_payload(decode=True)
decode_html(content)
else:
print("Content type not analyzed")
if args.dump:
if not os.path.isdir(args.dump):
os.mkdir(args.dump)
if p.get_filename() is None:
filename = "default_" + p.get_content_type().replace("/", "_")
else:
filename = p.get_filename()
with open(os.path.join(args.dump, filename), "wb+") as f:
f.write(p.get_payload(decode=True))
print("File dumped in {}".format(filename))
print("")
else:
body = msg.get_payload()
ptype = msg["Content-Type"]
if "text/html" in ptype:
decode_html(body)
elif "text/plain" in ptype:
decode_plain(body)