This is a sample script for converting a PDF file to a TXT file. 2 steps are required for this.
- Upload a PDF file as a Google Document
- Download a Google Document as a TXT file
In this sample, Python Quickstart is used. The detail information is https://developers.google.com/drive/v3/web/quickstart/python. Please read “Step 1: Turn on the Drive API” and “Step 2: Install the Google Client Library”.
from __future__ import print_function
import httplib2
import os
import io
from apiclient import discovery
from oauth2client import client
from oauth2client import tools
from oauth2client.file import Storage
from apiclient.http import MediaFileUpload, MediaIoBaseDownload
try:
import argparse
flags = argparse.ArgumentParser(parents=[tools.argparser]).parse_args()
except ImportError:
flags = None
SCOPES = 'https://www.googleapis.com/auth/drive'
CLIENT_SECRET_FILE = 'client_secret.json'
APPLICATION_NAME = 'Drive API Python Quickstart'
def get_credentials():
credential_path = os.path.join("./", 'drive-python-quickstart.json')
store = Storage(credential_path)
credentials = store.get()
if not credentials or credentials.invalid:
flow = client.flow_from_clientsecrets(CLIENT_SECRET_FILE, SCOPES)
flow.user_agent = APPLICATION_NAME
if flags:
credentials = tools.run_flow(flow, store, flags)
else: # Needed only for compatibility with Python 2.6
credentials = tools.run(flow, store)
print('Storing credentials to ' + credential_path)
return credentials
def main():
credentials = get_credentials()
http = credentials.authorize(httplib2.Http())
service = discovery.build('drive', 'v3', http=http)
pdffile = 'sample.pdf'
txtfile = 'sample.txt'
mime = 'application/vnd.google-apps.document'
res = service.files().create(
body={
'name': pdffile,
'mimeType': mime
},
media_body=MediaFileUpload(pdffile, mimetype=mime, resumable=True)
).execute()
dl = MediaIoBaseDownload(
io.FileIO(txtfile, 'wb'),
service.files().export_media(fileId=res['id'], mimeType="text/plain")
)
done = False
while done is False:
status, done = dl.next_chunk()
print("Done.")
if __name__ == '__main__':
main()