#!/usr/bin/env python import urllib2 from urllib2 import urlopen from urllib import urlencode from cookielib import CookieJar from BeautifulSoup import BeautifulSoup import re import os # Uncomment this line if you use Google Apps for your domain, and insert your domain #loginl = 'https://mail.google.com/a/example.com/' # Uncomment this line if you use normal Gmail #loginl = 'https://mail.google.com/mail/' # Enter your username and password username = 'username' password = 'password' clean = re.compile('^ *\n') # Setup cookie handling cookies = CookieJar() opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookies)) urllib2.install_opener(opener) # Load the login page print 'Loading login page' loginf = urlopen(loginl) login = BeautifulSoup(loginf.read()) loginf.close() # Fill in the POST data data = {} for input in login.form.findAll('input'): if input.has_key('value'): data[input['name']] = input['value'] data['Email'] = username data['Passwd'] = password # Submit the login information and get the "loading" page print 'Logging in' loadf = urlopen(login.form['action'], urlencode(data)) load = BeautifulSoup(loadf.read()) loadf.close() # Follow the redirect (only for normal Gmail) redirect = load.findAll('meta', attrs={'http-equiv': 'refresh'}) if len(redirect) > 0: url = redirect[0]['content'].split('=',1)[1].strip("'") loadf = urlopen(url) load = BeautifulSoup(loadf.read()) loadf.close() # Load the HTML view print 'Loading main page' mainf = urlopen(loadf.url + '?ui=html&zy=c') main = BeautifulSoup(mainf.read()) mainf.close() # Search for chats nextl = main.base['href'] + '?s=q&q=label%3Achat&nvp_site_mail=Search%2DMail' # Loop through all pages of the search while nextl != None: # Load the search result print 'Loading search results: ' + nextl searchf = urlopen(nextl) search = BeautifulSoup(searchf.read()) searchf.close() base = search.html.base['href'] # Process each entry for chat in search.findAll('a', href=re.compile('^\?v=c&.*')): id = dict([part.split('=') for part in chat['href'].split('&')])['th'] # Check if we've already saved this chat if not os.path.isfile(id + '.eml'): print 'Saving chat ' + id origf = urlopen(base + '?v=om&th=' + id) orig = origf.read() origf.close() # Make sure that it's a valid chat if orig.find('Message-ID') < 1: print id + ' is not a valid chat' else: # Clean up and save orig = orig.replace('\r', '') orig = clean.split(orig, 1)[1] save = open(id + '.eml', 'w') save.write(orig) save.close() # Find the link to the next results page older = search.findAll('b', text=re.compile('Older')) if len(older) > 0: nextl = base + older[0].parent.parent['href'] else: nextl = None print 'Done'