Converting PDF to TXT

This is a sample script for converting a PDF file to a TXT file. 2 steps are required for this.

  1. Upload a PDF file as a Google Document
  2. Download a Google Document as a TXT file

In this sample, Python Quickstart is used. The detail information is Please read “Step 1: Turn on the Drive API” and “Step 2: Install the Google Client Library”.

from __future__ import print_function
import httplib2
import os
import io

from apiclient import discovery
from oauth2client import client
from oauth2client import tools
from oauth2client.file import Storage
from apiclient.http import MediaFileUpload, MediaIoBaseDownload

    import argparse
    flags = argparse.ArgumentParser(parents=[tools.argparser]).parse_args()
except ImportError:
    flags = None

CLIENT_SECRET_FILE = 'client_secret.json'
APPLICATION_NAME = 'Drive API Python Quickstart'

def get_credentials():
    credential_path = os.path.join("./", 'drive-python-quickstart.json')
    store = Storage(credential_path)
    credentials = store.get()
    if not credentials or credentials.invalid:
        flow = client.flow_from_clientsecrets(CLIENT_SECRET_FILE, SCOPES)
        flow.user_agent = APPLICATION_NAME
        if flags:
            credentials = tools.run_flow(flow, store, flags)
        else:  # Needed only for compatibility with Python 2.6
            credentials =, store)
        print('Storing credentials to ' + credential_path)
    return credentials

def main():
    credentials = get_credentials()
    http = credentials.authorize(httplib2.Http())
    service ='drive', 'v3', http=http)

    pdffile = 'sample.pdf'
    txtfile = 'sample.txt'

    mime = 'application/'
    res = service.files().create(
            'name': pdffile,
            'mimeType': mime
        media_body=MediaFileUpload(pdffile, mimetype=mime, resumable=True)

    dl = MediaIoBaseDownload(
        io.FileIO(txtfile, 'wb'),
        service.files().export_media(fileId=res['id'], mimeType="text/plain")
    done = False
    while done is False:
        status, done = dl.next_chunk()

if __name__ == '__main__':