#!/usr/bin/env python
# (c) 2008 Eelco v Beek
# This is released under the GPL v3.
# 05/jul 2008 - EvB First Release
# 06/jul 2008 - Fixed a little bug that caused an invalid report at the end
# 07/jul 2008 - Fixed yet another little bug that caused an invalid succesfull transactions in the report

import email.mime.text
import email.Parser

import sys
import os, os.path
import time

import urllib
import urllib2
import httplib

import base64

from xml.dom.minidom import Document
from xml.dom import minidom

# Timer used for beiing nice using Google's API infrastucture
# start off with 10 seconds.
SleepTimer = 10

# This is how much we want to put through per request to Google
# it's set at 1MB to be sure that a network problem won't kill our progress (FIXME: not implemented a recovery scene
# after network failure)
maxbytes = 1*1024*1024

# SETTING: Here you can decide if you want the filepath added as a google label to the message
usePathAsLabel = 1

# list all skipped batches here
reportlist = {}
 
docid = 0

succescount = 0

def AuthenticateToGooge():
	# keeps on going until authentication to google has taken place
	GoogleAuthUrl = "https://www.google.com/accounts/ClientLogin"

	Authenticated = 0
	
	while (not Authenticated):
		# SETTING:
		# encode the settings in an post request
		# below the settings for authentication (ref: http://code.google.com/apis/apps/email_migration/developers_guide_protocol.html)
		data = urllib.urlencode({"Email" : "youremailaddresshere", "Passwd" : "yourpasswordhere", "accountType":"hosted", "service":"apps" })
	
		f = urllib2.urlopen(GoogleAuthUrl, data)
	
		# read the results
		s = f.read()
		# extremely dirty this! I'm just splitting everything from the right at the = character FIXME!!
		print "Incoming from auth request : "+s
		authKeyList = s.rsplit('=')
		authKey = authKeyList[len(authKeyList)-1].rstrip('\n')
		f.close()
		
		# another dirty thing, just checking if the result is at least 10 bytes in size. FIXME!!
		if len(authKey) > 10:
	 		Authenticated = 1
			print "Received auth key ["+authKey+"]"
			return authKey
	
		else:
			time.sleep (SleepTimer)
			print "No authentication, sleeping for "+str(SleepTimer)+ "seconds (this increases exponentially as Google likes it)"
			SleepTimer = SleepTimer * SleepTimer
			
def ParseGoogleResponse(gresponse):
	# uses the google return data to decide what worked and what not
	# parse the XML
	global docid
	global reportlist
	global SleepTimer
	global successcount
	
	NewDocCreated = 0
	
	xmldoc = minidom.parseString(gresponse)
	
	for node in xmldoc.getElementsByTagName('atom:entry'):
		batchid = node.childNodes[4].firstChild.data # batch:id
		# print "Batch ID : "+str(batchid)
		StatusCode = node.childNodes[5].getAttribute('code') # status
		# print "Status   : "+StatusCode
		# print "Google responded batchid "+batchid+" Statuscode "+StatusCode
		# print "Looking up "+str(batchid)
		batchdata = reportlist[str(batchid)]
		# batchdata[0] the filename, batchdata[1] the returncode
		# batchdata[2] is the label, batchdata[3] the message itself (base64 encoded!) 
		
		batchdata[1] = StatusCode
		
		if StatusCode == "201":
			# delete this from the report queue
			del reportlist[str(batchid)]
			succescount = succescount + 1
		elif StatusCode == "503":
			# we're going to fast for google, we need to add this one to a new Queue
			if not NewDocCreated:
				doc,feed = setupXMLDoc()
				NewDocCreated = 1
			print "Adding batchentry "+str(batchid)+" to a retry queue because Google responded 503"
			addEntry(doc, feed, batchdata[3], batchdata[2], batchdata[0], str(batchid))
		else:
			# set the status (other error reports will be catched this way)
			reportlist[str(batchid)] = batchdata
	
	if NewDocCreated:
		# this is where a recursity happends
		# We're going to call the SendToGoogle Function which will call this function to 
		# parse the Google response. To prohibit Google from overheating (again) we'll use 
		# the suggested double back method (not really, double back says exponent, we double), so by doubling the SleepTimer it 
		# will be called in the recurse.
		print "Double backing [time out for "+str(SleepTimer)+" seconds]"
		time.sleep(SleepTimer)
		SleepTimer = SleepTimer + SleepTimer
		
		# call the delivery process
		GoogleDeliver(authKey, doc)

	else:
		# everything went ok, so reset the timer
		SleepTimer = 10
		
	print "<--- Delivery of batch ["+str(docid)+"] Done"
	
	
def SendToGoogle(key, data):
	# SETTING:
	# this wil sent the XML data to google using auth key : key
	GoogleAPIUrl = "http://apps-apis.google.com/a/feeds/migration/2.0/eelco.com/eelco/mail/batch"
	
	req = urllib2.Request(GoogleAPIUrl)
	req.add_header('Authorization','GoogleLogin auth=' + key)
	req.add_header('Content-Type','application/atom+xml')
	
	print "Opening URL"
	# setup debug in the urllib2, won't work with SSL!
	# h=urllib2.HTTPHandler(debuglevel=1)
	# opener = urllib2.build_opener(h)
	# urllib2.install_opener(opener)
	
	try:
		incoming = urllib2.urlopen(req, data).read()
	except HTTPError, e:
		return e.code
		# catch a 403 (auth error) 
		# catch a 503 (too fast for mr Google)
		# It seems the 503 is returned on a batchid level, so the ParseGoogleResponse
		# has to take care of that.
		print "Request had an error" 
	
	# parse the incoming 
	ParseGoogleResponse(incoming)
	
	# success
	return 0
	
	
# FIXME: all imports must be done within 24 hours, need an update on an auth error response
# from google.
authKey = AuthenticateToGooge()
print "Authenticated, starting mail transport session"

# reset the SleepTimer
SleepTimer = 10

# mailcount, use this for the batches per session
mcount = 0

# get the directory to process
# dirty: we're not checking on arguments the right way
scandir = sys.argv[1]

directories = [scandir]

docid = 0

def setupXMLDoc():
	mydoc = Document()
	global docid
	docid = docid + 1
	feed = mydoc.createElement("feed")
	feed.setAttribute('xmlns',"http://www.w3.org/2005/Atom")
	feed.setAttribute('xmlns:batch',"http://schemas.google.com/gdata/batch")
	feed.setAttribute('xmlns:gd',"http://schemas.google.com/g/2005")
	mydoc.appendChild(feed)
	return mydoc, feed

def addEntry(doc, feed, data, label, fname, batchid):
	reportlist[str(batchid)] = [fname,"000",label,data]
	entry = doc.createElement("entry")
	category = doc.createElement("category")
	category.setAttribute('term',"http://schemas.google.com/apps/2006#mailItem")
	category.setAttribute('scheme',"http://schemas.google.com/g/2005#kind")
	entry.appendChild(category)
	appsrfc822msg = doc.createElement('apps:rfc822Msg')
	appsrfc822msg.setAttribute('xmlns:apps',"http://schemas.google.com/apps/2006")
	# use this if you want base64 encoding, remember, change it at the mstring = part as well!
	appsrfc822msg.setAttribute('encoding',"base64")
	appsmailitemproperty = doc.createElement("apps:MailItemProperty")
	appsmailitemproperty.setAttribute('value',"IS_INBOX")
	appsmailitemproperty.setAttribute('xmlns:apps',"http://schemas.google.com/apps/2006")
	
	# create the label xml if set
	if usePathAsLabel:
		appslabel = doc.createElement('apps:label')
		appslabel.setAttribute('labelName',label)
		appslabel.setAttribute('xmlns:apps',"http://schemas.google.com/apps/2006")
	
	# custom label to identify this migration
	customlabel = doc.createElement('apps:label')
	customlabel.setAttribute('labelName',"GoogleMailPy")
	customlabel.setAttribute('xmlns:apps',"http://schemas.google.com/apps/2006")
		
	rfcdata = doc.createTextNode (data)
	appsrfc822msg.appendChild(rfcdata)
	entry.appendChild(appsrfc822msg)
	entry.appendChild(appsmailitemproperty)
	
	# add the label if set
	if usePathAsLabel:
		entry.appendChild(appslabel)
	
	entry.appendChild(customlabel)
	
	batch = doc.createElement('batch:id')
	batchdata = doc.createTextNode(batchid)
	batch.appendChild(batchdata)
	entry.appendChild(batch)
	feed.appendChild(entry)
	
def GoogleDeliver(authKey, doc):
	# Deliver this batch
	print "Queue full, delivering batch ["+str(docid)+"] ==>" 	
	res = SendToGoogle(authKey, doc.toxml("UTF-8"))
	if res == 403:
		# our authentication probably expired, loop for retry
		authKey = AuthenticateToGooge()
	elif res == 503:
		# we're going too fast let sleep a little
		while (res == 503):
			time.sleep(SleepTimer)
			res = sendToGoogle(authKey, doc.toxml("UTF-8"))
			# wait double, Google likes it exponentially though but that takes too much time.
			SleepTimer = SleepTimer + SleepTimer
		# reset the timer when ready
		SleepTimer = 10


# first XML instance 
doc,feed = setupXMLDoc()

# a quick loop to walk though the directories, to find mail files
while len(directories)>0:
	directory = directories.pop()
	for name in os.listdir(directory):
		fullpath = os.path.join(directory,name)
		if os.path.isfile(fullpath):
			print "Checking file ["+name+"]"

			fl = open(fullpath,"rb")
			p = email.Parser.Parser()
			try:
				msg = p.parse(fl)
			except:
				# this exception almost never happends, email.Parser is not really strict
				# although this is a changable option, when set it will be extremely strict
				print "File ["+fullpath+"] is not a valid mail file, skipping"
			else:
				# A little check if the To, From and Data header are in the file
				# this is mostly what Google checks for so we'll do it as well
				
				if (msg['To'] == None) or (msg['From']==None) or (msg['Date']==None):
					print "File ["+name+"] seems to be invalid, skipping"
				else:
					mstring = base64.encodestring(msg.as_string())
					
					# check the size of the current XML doc add the size of the next message
					# if it exceeds the maxbytes (as defined by google) then run this batch
					# else add another entry to the batch
					# the msg.as_string length is multiplied by two because of XML overhead
					if (len(doc.toxml()) + ((len(msg.as_string()))*2)) > maxbytes: 
						
						GoogleDeliver(authKey, doc)
						
						# setup a new document and reset the batch counter
						doc,feed = setupXMLDoc()
						mcount = 0
						# dont forget to add this entry already!
						addEntry(doc, feed, mstring, directory, name, str(docid)+str(mcount))
						mcount = mcount + 1
						
					else:
						# arguments: the xmldoc, thefeed, mstring (the data), directory (the label), mcount (the batch count)
						addEntry(doc, feed, mstring, directory, name, str(docid)+str(mcount))
						doclen = len(doc.toxml())
						
						donepct = str(round((float(doclen) / float(maxbytes)*100),2))
						print "Adding message to batch [total of "+str(mcount)+" items, currently "+str(doclen)+" bytes, "+donepct+"%], using label ["+directory+"]"
						mcount = mcount + 1
						
			# close this file
			fl.close()

		elif os.path.isdir(fullpath):
			directories.append(fullpath)  # It's a directory, store it.

# last file
GoogleDeliver(authKey, doc)

print "=== Process report ==="
for k, v in reportlist.iteritems():
	print "Batch: "+k+" result: file ["+v[0]+"] reason code ["+v[1]+"]"
print "=== Total of "+str(len(reportlist))+" Failed, "+str(succescount)+" were successfull ==="