首页 > 解决方案 > 在 Python 中使用 GMAIL API 下载受密码保护的 PDF

问题描述

这是我在 Python 中使用 GMAIL API 下载 pdf 附件的代码。从某种意义上说,它工作正常,我可以下载 pdf 附件,但它们无法打开。我收到以下错误“打开此文档时出错。文件已损坏,无法修复”。

from __future__ import print_function
import os.path
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
import base64
from apiclient import errors

# If modifying these scopes, delete the file token.json.
SCOPES = ['https://mail.google.com/']

def main():
    """Shows basic usage of the Gmail API.
    Lists the user's Gmail labels.
    """
    creds = None
    # The file token.json stores the user's access and refresh tokens, and is
    # created automatically when the authorization flow completes for the first
    # time.
    # to manage a different gmail account, delete the existing token.json file from the folder

    if os.path.exists('token.json'):
        creds = Credentials.from_authorized_user_file('token.json', SCOPES)
    # If there are no (valid) credentials available, let the user log in.
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file(
                'credentials.json', SCOPES)
            creds = flow.run_local_server(port=0)
        # Save the credentials for the next run
        with open('token.json', 'w') as token:
            token.write(creds.to_json())

    service = build('gmail', 'v1', credentials=creds)

    # Call the Gmail API
    results = service.users().labels().list(userId='me').execute()
    labels = results.get('labels', [])


    # Call the Gmail API to fetch INBOX
    results = service.users().messages().list(userId='me', labelIds=['Label_9213971794059785832']).execute()
    messages = results.get('messages', [])
    print(messages[1])

    if not messages:
        print("No messages found.")
    else:
        print("Message snippets:")
        for message in messages:
            GetAttachments(service,'me',message['id'])
            # msg = service.users().messages().get(userId='me', id=message['id']).execute()
            # print(msg)
            # break



def GetAttachments(service, user_id, msg_id, store_dir="attachments/"):
    """Get and store attachment from Message with given id.
        Args:
            service: Authorized Gmail API service instance.
            user_id: User's email address. The special value "me"
                can be used to indicate the authenticated user.
            msg_id: ID of Message containing attachment.
            store_dir: The directory used to store attachments.
    """
    try:
        message = service.users().messages().get(userId=user_id, id=msg_id).execute()
        parts = [message['payload']]
        while parts:
            part = parts.pop()
            if part.get('parts'):
                parts.extend(part['parts'])
            if part.get('filename'):
                if 'data' in part['body']:
                    file_data = base64.urlsafe_b64decode(part['body']['data'].encode('UTF-8'))
                    #self.stdout.write('FileData for %s, %s found! size: %s' % (message['id'], part['filename'], part['size']))
                elif 'attachmentId' in part['body']:
                    attachment = service.users().messages().attachments().get(
                        userId=user_id, messageId=message['id'], id=part['body']['attachmentId']
                    ).execute()
                    file_data = base64.urlsafe_b64decode(attachment['data'].encode('UTF-8'))
                    #self.stdout.write('FileData for %s, %s found! size: %s' % (message['id'], part['filename'], attachment['size']))
                else:
                    file_data = None
                if file_data:
                    #do some staff, e.g.
                    path = ''.join([store_dir, part['filename']])
                    with open(path, 'w') as f:
                        f.write(str(file_data))
    except errors.HttpError as error:
        print('An error occurred: %s' % error)



if __name__ == '__main__':
    main()

请帮忙!

标签: pythonpdfgmail-api

解决方案


Found the mistake! I needed to write the bytes, not string. modified two lines of the code as following:

            with open(path, 'wb') as f:
                f.write(file_data)

推荐阅读