# Copyright (c) 2009 by Joseph Devietti (devietti@cs.washington.edu). # This file is part of the tbird2gmail program. # Tbird2gmail is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # Tbird2gmail is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with Tbird2gmail. If not, see . import gdata.apps.migration.service from optparse import OptionParser import os, sys, socket, time import email, mailbox import pdb # Google's max upload size is listed as 32MB, but we play it safe MAX_UPLOAD_SIZE = 31 << 20 # 31 MB MAX_BACKOFF_SECONDS = 600 OUTPUT_TAG = "tbird2gmail" FAILURE_REASON_HEADER = 'X-Tbird2gmail-Upload-Failure-Reason' TEST_EMAIL_MESSAGE = '''Received: by 140.23.6.190 with HTTP; Mon, 16 Jul 2007 10:12:26 -0700 (PDT)\r Message-ID: \r Date: Mon, 16 Jul 2007 10:12:26 -0700\r From: "tbird2gmail script" \r To: "You" \r Subject: Testing gmail batch upload\r MIME-Version: 1.0\r Content-Type: text/plain; charset=ISO-8859-1; format=flowed\r Content-Transfer-Encoding: 7bit\r Content-Disposition: inline\r Delivered-To: you@example.com\r \r This is a test message successfully uploaded via tbird2gmail. ''' FailedEmailMailbox = None def initFailedEmailLog( mboxpath ): global FailedEmailMailbox FailedEmailMailbox = mailbox.mbox( mboxpath, create=True ) pass def teardownFailedEmailLog(): global FailedEmailMailbox FailedEmailMailbox.close() pass def logFailedEmail(failureReason, mailMessage): '''Logs the specified mailMessage (a raw RFC822 message) as having failed for the specified reason. The failure reason is encoded in a header field.''' global FailedEmailMailbox emailObject = email.message_from_string( mailMessage ) # add custom "failure header" emailObject[FAILURE_REASON_HEADER] = failureReason FailedEmailMailbox.add( emailObject ) FailedEmailMailbox.flush() pass def uploadBatch( connection, backoffSeconds=30 ): """Upload the pending batch of emails via the specified `connection' object. Optionally specify the time to wait before retrying if the server is busy.""" global batchSize global messageCount global emailOfBatchID global OPTIONS if batchSize == 0: # nothing to do return assert backoffSeconds <= MAX_BACKOFF_SECONDS, "Backoff "+str(backoffSeconds)+" exceeds maximum!" if OPTIONS.Verbose: print "Uploading", batchSize, "bytes" pass assert batchSize < MAX_UPLOAD_SIZE, "Batch is too big!" if not OPTIONS.DryRun: retryEmails = [] try: responses = connection.SubmitBatch( OPTIONS.Username ) time.sleep( 20 ) except socket.error as e: # eep! print "Socket Error:", e raise e # verify that each email was created successfully (status code 201) for batchEntry in responses.entry: if batchEntry.batch_status.code == '503': # server is busy retryEmails.append( emailOfBatchID[batchEntry.batch_id.text] ) pass if batchEntry.batch_status.code != '201': # permanent failure (eg malformed message), log email and keep going failureReason = "Email upload returned status: "+str(batchEntry.batch_status) print failureReason mailMessage = emailOfBatchID[batchEntry.batch_id.text]['mail_message'] logFailedEmail( failureReason=failureReason, mailMessage=mailMessage ) pass pass if retryEmails != []: for remail in retryEmails: connection.AddBatchEntry( **remail ) lastBatchId = connection.mail_batch.entry[-1].batch_id.text emailOfBatchID[ lastBatchId ] = remail pass # retry with exponential backoff print OUTPUT_TAG, "waiting", backoffSeconds, "seconds to retry upload of ", len(connection.mail_batch.entry), "emails..." time.sleep( backoffSeconds ) uploadBatch( connection, backoffSeconds*2 ) pass #pdb.set_trace() #if OPTIONS.Verbose: #print "HTTP Response:", HTTPresponse print OUTPUT_TAG, "uploaded", messageCount, "messages so far..." batchSize = 0 emailOfBatchID = {} return def uploadMboxFile( f, connection ): """Upload all the emails in the given mbox file `f' via the specified gmail connection.""" global batchSize global messageCount global emailOfBatchID global OPTIONS batchSize = 0 messageCount = 0 emailOfBatchID = {} mb = mailbox.mbox( f ) print "Uploading", len( mb ), "messages..." for mailKey in mb.keys(): rawMessage = mb.get_string( mailKey ) rawSize = len( rawMessage ) if OPTIONS.UploadLimit is not None and messageCount >= OPTIONS.UploadLimit: uploadBatch( connection ) return messageCount += 1 # parse headers to get Thunderbird tags emailMessage = email.message_from_string( rawMessage ) labels = [] if 'X-Mozilla-Keys' in emailMessage: tbirdTags = emailMessage['X-Mozilla-Keys'].strip().split( ' ' ) if len( tbirdTags ) > 0 and tbirdTags != ['']: labels += tbirdTags if OPTIONS.Verbose: print "Extracted Thunderbird tags:", tbirdTags pass pass pass # add custom labels labels += OPTIONS.Labels # translate 'important' flag into a gmail star mboxMessage = mb.get_message( mailKey ) properties = [] if 'F' in mboxMessage.get_flags(): properties = ['IS_STARRED'] pass if OPTIONS.SentEmail: properties += ['IS_SENT'] pass if batchSize + rawSize > OPTIONS.SuggestedBatchSize: uploadBatch( connection ) pass batchEntry = { 'mail_message':rawMessage, 'mail_item_properties':properties, 'mail_labels':labels } size = connection.AddBatchEntry( **batchEntry ) batchSize += size lastBatchId = connection.mail_batch.entry[-1].batch_id.text emailOfBatchID[ lastBatchId ] = batchEntry if batchSize > OPTIONS.SuggestedBatchSize: uploadBatch( connection ) pass pass def main(): optParser = OptionParser( usage="%prog [options] mbox files...", description=""" Upload emails from a Unix-style mbox file into Gmail, if your gmail is hosted by Google Apps Premier, Education or Partner Edition. In particular, this won't work for regular gmail accounts. Tags you've applied to an email via Thunderbird will be translated into gmail labels on the uploaded email. Emails flagged as "Important" are translated into "starred" emails in gmail. """) # account information optParser.add_option( "--email-address", dest="EmailAddress", default=None, help="Your gmail login, e.g. username@dom.ain" ) optParser.add_option( "--password", dest="Password", default=None, help="The password for your gmail account" ) # control email labels optParser.add_option( "--label", dest="Labels", default=[], action="append", help="Label all uploaded emails with this label (can specify multiple times)" ) optParser.add_option( "--sent-email", dest="SentEmail", default=False, action="store_true", help="Uploaded emails will be put into gmail's 'Sent Email' folder" ) # control upload optParser.add_option( "--batch-size", dest="SuggestedBatchSize", default=2, type="int", metavar="N", help="Upload emails in N MB chunks. Larger chunks result in faster uploads, but may also overwhelm the server. Default size: %default. Maximum size: "+("%d" % MAX_UPLOAD_SIZE) ) optParser.add_option( "--retry-mbox", dest="FailedEmailFile", default="%s-retry.mbox" % OUTPUT_TAG, help="File in which to log emails that failed to upload. This file is in mbox format for easy future uploading. The failure reason is encoded in each mail in the %s header." % FAILURE_REASON_HEADER ) # testing/debugging optParser.add_option( "--upload-test-email", dest="UploadTestEmail", default=False, action="store_true", help="Upload a single test email to your gmail Inbox. The email will be starred and unread." ) optParser.add_option( "--verbose", dest="Verbose", default=False, action="store_true", help="Enable debugging output" ) optParser.add_option( "--upload-only", dest="UploadLimit", default=None, type="int", metavar="N", help="Upload only the first N emails. Default: upload all emails in the specified mailbox files." ) optParser.add_option( "--dry-run", dest="DryRun", default=False, action="store_true", help="Don't actually upload any emails." ) global OPTIONS (OPTIONS, args) = optParser.parse_args() mboxFiles = args if OPTIONS.EmailAddress is None or OPTIONS.Password is None: print "You must specify your email address and password." sys.exit( 1 ) assert 1 == OPTIONS.EmailAddress.count( '@' ), "Malformed email address" OPTIONS.Username = OPTIONS.EmailAddress.split( '@' )[0] OPTIONS.Domain = OPTIONS.EmailAddress.split( '@' )[1] initFailedEmailLog( OPTIONS.FailedEmailFile ) # convert to megabytes OPTIONS.SuggestedBatchSize = OPTIONS.SuggestedBatchSize << 20; assert OPTIONS.SuggestedBatchSize <= MAX_UPLOAD_SIZE, "Batch size must be < "+str(MAX_UPLOAD_SIZE) gmailConnection = gdata.apps.migration.service.MigrationService( email=OPTIONS.EmailAddress, password=OPTIONS.Password, domain=OPTIONS.Domain ) if OPTIONS.Verbose: gmailConnection.debug = True gmailConnection.ProgrammaticLogin() print OUTPUT_TAG, "connected successfully!" if OPTIONS.UploadTestEmail: gmailConnection.ImportMail( user_name=OPTIONS.Username, mail_message=TEST_EMAIL_MESSAGE, mail_item_properties = ['IS_STARRED','IS_INBOX','IS_UNREAD'], mail_labels=[OUTPUT_TAG], ) return for f in mboxFiles: assert os.path.isfile( f ), "File "+f+" is not accessible!" print OUTPUT_TAG, "uploading emails from", f, "..." uploadMboxFile( f, gmailConnection ) # upload any stragglers uploadBatch( gmailConnection ) teardownFailedEmailLog() print OUTPUT_TAG, "uploaded emails from", f, "successfully!" pass pass if __name__ == '__main__': main()