Skip to content

Commit 666e12f

Browse files
committedJan 31, 2015
A hastily-written Gmail inbox state logger
0 parents  commit 666e12f

File tree

3 files changed

+108
-0
lines changed

3 files changed

+108
-0
lines changed
 

‎.gitignore

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
*.pyc
2+
.DS_Store
3+
secret.py

‎imap_inbox_check.py

+79
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
"""Count the number of unique message threads in a Gmail inbox, by logging
2+
in with IMAP and checking the count of unique values of the X-GM-THRID header.
3+
4+
This example was helpful:
5+
http://yuji.wordpress.com/2011/06/22/python-imaplib-imap-example-with-gmail/
6+
7+
"""
8+
import imaplib
9+
import re
10+
from collections import defaultdict
11+
from dateutil.parser import parser
12+
from operator import itemgetter
13+
14+
# messages fetched with query '(ENVELOPE X-GM-THRID)' look like:
15+
# 145 (X-GM-THRID 1490479738467641057 UID 63980 ENVELOPE ("Fri, 16 Jan 2015 18:17:21 +0000 (UTC)" "Mark, please add me to your LinkedIn network" (("Terry Kim" NIL "member" "linkedin.com")) (("Terry Kim" NIL "member" "linkedin.com")) ((NIL NIL "terry" "yelp.com")) (("Mark Wilson" NIL "mark.wilson" "aya.yale.edu")) NIL NIL NIL "<979753161.7821475.1421432241882.JavaMail.app@lva1-app1733.prod>"))
16+
17+
thread_id_re = re.compile('X-GM-THRID (\d+)')
18+
date_re = re.compile('ENVELOPE \("([^"]+)"')
19+
subject_re = re.compile('ENVELOPE \("[^"]+" "([^"]*)"')
20+
from_re = re.compile('ENVELOPE \("[^"]+" "[^"]*" \(\("([^"]*)" NIL "([^"]*)" "([^"]*)"')
21+
22+
date_parser = parser()
23+
24+
def re_partial(regex):
25+
return lambda envelope: regex.search(envelope).group(1)
26+
27+
def date_str_to_timestamp(date_str):
28+
return int(date_parser.parse(date_str).strftime('%s'))
29+
30+
def get_from(envelope):
31+
result = from_re.search(envelope)
32+
_, name, email1, email2 = [result.group(i) for i in range(4)]
33+
return [name, '%s@%s' % (email1, email2)]
34+
35+
def gmail_thread_info(email, password):
36+
mail = imaplib.IMAP4_SSL('imap.gmail.com')
37+
mail.login(email, password)
38+
39+
mail.select('INBOX')
40+
41+
_, (uid_list,) = mail.uid('search', None, 'ALL')
42+
uids = uid_list.split(' ')
43+
44+
_, inbox = mail.uid('fetch', ','.join(uids), '(ENVELOPE X-GM-THRID)')
45+
46+
# Group messages into Gmail threads
47+
thread_id_to_messages = defaultdict(list)
48+
for m in inbox:
49+
thread_id_to_messages[re_partial(thread_id_re)(m)] += [m]
50+
51+
# Pick the one whose subject doesn't start with "Re:"
52+
thread_id_to_single_message = {}
53+
for thread_id, messages in thread_id_to_messages.iteritems():
54+
if len(messages) == 1:
55+
thread_id_to_single_message[thread_id] = messages[0]
56+
else:
57+
subjects_without_reply = [ m for m in messages if not re_partial(subject_re)(m).startswith('Re: ') ]
58+
if subjects_without_reply:
59+
thread_id_to_single_message[thread_id] = subjects_without_reply[0]
60+
else:
61+
thread_id_to_single_message[thread_id] = messages[0]
62+
63+
out = []
64+
65+
# Pull out the important stuff
66+
for message in thread_id_to_single_message.itervalues():
67+
date = re_partial(date_re)(message)
68+
out.append({
69+
'thread_id': re_partial(thread_id_re)(message),
70+
'date': date,
71+
'date_ts': date_str_to_timestamp(date),
72+
'subject': re_partial(subject_re)(message),
73+
'from': get_from(message),
74+
})
75+
76+
# Sort by timestamp
77+
out = list(sorted(out, key=itemgetter('date_ts'), reverse=True))
78+
79+
return out

‎log_inbox.py

+26
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
"""Check the state of a Gmail inbox, then log the number of threads to a
2+
tab-separated file, and summary information about the threads to a JSON file.
3+
"""
4+
from datetime import datetime
5+
6+
import simplejson as json
7+
8+
import secret
9+
from imap_inbox_check import gmail_thread_info
10+
11+
LOG_FILE = 'inbox_count.log'
12+
13+
log_date = lambda d: d.strftime('%Y-%m-%d %H:%M:%S')
14+
json_file_date = lambda d: d.strftime('%Y-%m-%d_%H.%M.%S')
15+
unix_date = lambda d: d.strftime('%s')
16+
17+
now = datetime.now()
18+
info = gmail_thread_info(secret.email, secret.password)
19+
log_line = '%s\t%s\t%s' % (unix_date(now), log_date(now), len(info))
20+
print log_line
21+
22+
with open(LOG_FILE, 'a') as f:
23+
f.write(log_line + '\n')
24+
25+
with open(json_file_date(now) + '.json', 'w') as f:
26+
f.write(json.dumps(info))

0 commit comments

Comments
 (0)
Please sign in to comment.