Management of PDF Metadata using Google Apps Script

Gists

Management of PDF Metadata using Google Apps Script

This is a sample script for managing the metadata of PDF data using Google Apps Script.

There might be a case in that you want to retrieve and update the metadata of PDF data using Google Apps Script. In this post, I would like to introduce achieving this.

Class ManagePdfMetadata

This is a Class ManagePdfMetadata. This Class is used for managing the metadata of PDF files using Google Apps Script. And, in this Class, a Javascript library of pdf-lib is used for managing the PDF metadata. This Javascript library is loaded in this Class.

/**
 * ### Description
 * This is a Class for managing the metadata of PDF data using Google Apps Script.
 *
 * Author: Tanaike ( https://tanaikech.github.io/ )
 */
class ManagePdfMetadata {
  /**
   * ### Description
   * Constructor of this class.
   *
   * @return {void}
   */
  constructor() {
    this.cdnjs = "https://cdn.jsdelivr.net/npm/pdf-lib/dist/pdf-lib.min.js"; // or "https://cdnjs.cloudflare.com/ajax/libs/pdf-lib/1.17.1/pdf-lib.min.js"

    this.setTimeout;
    this.loadPdfLib_();
    this.keys = [
      "title",
      "subject",
      "author",
      "creator",
      "creationDate",
      "modificationDate",
      "keywords",
      "producer",
    ];
  }

  /**
   * ### Description
   * Get metadata of the inputted PDF data using pdf-lib.
   *
   * @param {Object} blob Blob of PDF data by retrieving with Google Apps Script.
   * @return {Object} Object including the metadata of the inputted PDF data.
   */
  getPdfMetadata(blob) {
    const setTimeout = this.setTimeout;
    return new Promise(async (resolve, reject) => {
      try {
        const d = await this.getPdfDocFromBlob_(blob).catch((err) =>
          reject(err)
        );
        const metadata = this.keys.reduce(
          (o, k) => (
            (o[k] = d[`get${k.charAt(0).toUpperCase() + k.slice(1)}`]()), o
          ),
          {}
        );
        resolve(metadata);
      } catch (e) {
        reject(e);
      }
    });
  }

  /**
   * ### Description
   * Update metadata of the inputted PDF data using pdf-lib.
   *
   * @param {Object} blob Blob of PDF data by retrieving with Google Apps Script.
   * @param {Object} object Object for updating PDF metadata.
   * @return {Object} Blob of the updated PDF data.
   */
  udpatePdfMetadata(blob, object) {
    if (typeof object != "object" || Object.keys(object).length == 0) {
      throw new Error("Please set valid object.");
    }
    const setTimeout = this.setTimeout;
    return new Promise(async (resolve, reject) => {
      const d = await this.getPdfDocFromBlob_(blob).catch((err) => reject(err));
      try {
        Promise.all(
          this.keys.map((k) => {
            return new Promise(async (r, rj) => {
              try {
                if (object.hasOwnProperty(k)) {
                  const f = `set${k.charAt(0).toUpperCase() + k.slice(1)}`;
                  if (k == "title") {
                    await d[f](...object[k]);
                  } else {
                    await d[f](object[k]);
                  }
                  r("Done");
                }
              } catch (e) {
                rj(e);
              }
            });
          })
        )
          .then(async (_) => {
            const bytes = await d.save();
            const newBlob = Utilities.newBlob(
              [...new Int8Array(bytes)],
              MimeType.PDF,
              blob.getName()
            );
            resolve(newBlob);
          })
          .catch((err) => console.log(err));
      } catch (e) {
        reject(e);
      }
    });
  }

  /**
   * ### Description
   * Load pdf-lib. https://pdf-lib.js.org/docs/api/classes/pdfdocument
   *
   * @return {void}
   */
  loadPdfLib_() {
    eval(UrlFetchApp.fetch(this.cdnjs).getContentText());
    this.setTimeout = function (f, t) {
      Utilities.sleep(t);
      return f();
    };
  }

  /**
   * ### Description
   * Get PDF document object using pdf-lib.
   *
   * @param {Object} blob Blob of PDF data by retrieving with Google Apps Script.
   * @return {Object} PDF document object using pdf-lib.
   */
  async getPdfDocFromBlob_(blob) {
    if (blob.toString() != "Blob") {
      throw new error("Please set PDF blob.");
    }
    const setTimeout = this.setTimeout;
    return await this.PDFLib.PDFDocument.load(new Uint8Array(blob.getBytes()), {
      updateMetadata: true,
    });
  }
}

Sample script 1

This sample script retrieves the metadata from a PDF file on Google Drive using Google Apps Script.

function getPdfMetadata() {
  const fileId = "###"; // Please set the file ID of the PDF file on Google Drive.

  const blob = DriveApp.getFileById(fileId).getBlob();

  const MM = new ManagePdfMetadata();
  MM.getPdfMetadata(blob).then((res) => console.log(res));
}

When this script is run, the following response is returned.

{
  "title": "###",
  "subject": "###",
  "author": "###",
  "creator": "###",
  "creationDate": "### Date object ###",
  "modificationDate": "### Date object ###",
  "keywords": "### key words ###",
  "producer": "###"
}

When I tested this script, I noticed that the values of modificationDate and producer might be a bug in pdf-lib. modificationDate returns the execution time. producer always returns pdf-lib (https://github.com/Hopding/pdf-lib). I guessed that this might be a bug in pdf-lib. And, I would like to believe that this will be resolved in the future update.

Sample script 2

In this sample script, the metadata of a PDF file is updated using Google Apps Script.

function udpatePdfMetadata() {
  const fileId = "###"; // Please set the file ID of the PDF file on Google Drive.
  const blob = DriveApp.getFileById(fileId).getBlob();

  const object = {
    title: ["sample title", { showInWindowTitleBar: true }], // This property is an array.
    subject: "sample subject",
    author: "sample author",
    creator: "sample creator",
    creationDate: new Date("2023-07-01T00:00:00"), // This value is date object.
    modificationDate: new Date("2023-07-01T10:00:00"), // This value is date object.
    keywords: ["sample keyword 1", "sample keyword 2", "sample keyword 3"], // This property is an array.
    producer: "sample producer",
  };
  const MM = new ManagePdfMetadata();
  MM.udpatePdfMetadata(blob, object).then((newBlob) =>
    DriveApp.createFile(newBlob)
  );
}

References

 Share!