Retrieving Glyph Value from List Items of Google Document using Google Apps Script

Gists

This is a sample script for retrieving the glyph value from the list items of Google Document using Google Apps Script.

In the current stage, when the list is put to the Google Document, the count of glyph is automatically calculated. When the glyph values of the list items are tried to be retrieved using the manual operation and the script, unfortunately, the glyph values cannot be retrieved. Only the values of the list are retrieved. Unfortunately, it seems that in the current stage, there are no methods for directly retrieving the glyph value from the list items.

In this sample script, as my challenge, I would like to introduce a sample script for retrieving the glyph value from the list items on Google Document. This sample script is just my challenge. So in the current stage, it seems that all glyph values cannot be retrieved because of the current specification. So please be careful this.

Target

This is the target Google Document. From this document, the glyph values are retrieved.

Sample script

function myFunction() {
  // This is an object for the numbers with the GlyphType. When you want to use more numbers and GlyphTypes, please add them.
  const numObj = {
    NUMBER: ["1", "2", "3", "4", "5", "6"],
    LATIN_LOWER: ["a", "b", "c", "d", "e", "f"],
    LATIN_UPPER: ["A", "B", "C", "D", "E", "F"],
    ROMAN_LOWER: ["i", "ii", "iii", "iv", "v", "vi"],
    ROMAN_UPPER: ["I", "II", "III", "IV", "V", "VI"],
    GLYPH_TYPE_UNSPECIFIED: ["", "", "", "", "", ""],
    noGlyphType: ["", "", "", "", "", ""],
  };

  // 1. Retrieve the values of all lists.
  const doc = DocumentApp.getActiveDocument();
  const searchText = doc
    .getBody()
    .getListItems()
    .map((e) => e.getText().trim());

  // 2. Retrieve list object using Docs API.
  const listObj = Docs.Documents.get(doc.getId(), { fields: "lists" }).lists;

  // 3. Retrieve list items using Document service and create an object for searching the texts.
  const listItems = doc.getBody().getListItems();
  const obj = listItems.reduce((o, e) => {
    const listId = e.getListId();
    const glyphType = e.getGlyphType() || "GLYPH_TYPE_UNSPECIFIED";
    const nestingLevel = e.getNestingLevel().toString();
    const text = e.getText();
    if (o[listId]) {
      if (o[listId][nestingLevel]) {
        if (o[listId][nestingLevel][glyphType]) {
          o[listId][nestingLevel][glyphType].text.push(text);
        } else {
          o[listId][nestingLevel] = {
            [glyphType]: {
              text: [text],
              glyphFormat:
                listObj[listId].listProperties.nestingLevels[nestingLevel]
                  .glyphFormat,
              nestingLevel,
            },
          };
        }
      } else {
        o[listId][nestingLevel] = {
          [glyphType]: {
            text: [text],
            glyphFormat:
              listObj[listId].listProperties.nestingLevels[nestingLevel]
                .glyphFormat,
            nestingLevel,
          },
        };
      }
    } else {
      o[listId] = {
        [nestingLevel]: {
          [glyphType]: {
            text: [text],
            glyphFormat:
              listObj[listId].listProperties.nestingLevels[nestingLevel]
                .glyphFormat,
            nestingLevel,
          },
        },
      };
    }
    return o;
  }, {});

  // 4. Search text and output an object for retrieving glyphs.
  const getRes = (obj, search, res = [], parents = []) => {
    Object.entries(obj).forEach(([k, v]) => {
      if (Array.isArray(v)) {
        v = v.map((vv) => vv.trim());
        const temp = search.filter((s) => v.includes(s));
        if (temp.length > 0) {
          const parent = parents[parents.length - 1];
          res.push(
            temp.map((e) => ({
              search: e,
              k: parent.k == "null" ? "noGlyphType" : parent.k,
              idx: v.indexOf(e),
              glyphFormat: parent.v.glyphFormat,
              nestingLevel: parent.v.nestingLevel,
            }))
          );
        }
      } else if (typeof v == "object") {
        parents.push({ k, v });
        getRes(v, search, res, parents);
      }
    });
    return search.map((s) => res.flat().filter((t) => t.search == s)[0]);
  };

  // 5. Output the values.
  const res = getRes(obj, searchText).map((e) => {
    const count = [...e.glyphFormat].reduce(
      (c, e) => (e == "%" ? c + 1 : c),
      0
    );
    return {
      glyph: e
        ? e.glyphFormat.replace(/%([0-9]+)/g, (_, p, o) =>
            count > 1
              ? numObj[e.k][o == 0 ? Number(p) : e.idx]
              : numObj[e.k][e.idx]
          )
        : "Text was not found.",
      text: e.search,
    };
  });

  console.log(res);
}

Result

When this script is used for the above target Google Document, the following result is obtained.

[
  { "glyph": "1.", "text": "sampleA1" },
  { "glyph": "2.", "text": "sampleA2" },
  { "glyph": "a.", "text": "sampleA2-1" },
  { "glyph": "i.", "text": "sampleA2-2" },
  { "glyph": "b.", "text": "sampleA2-3" },
  { "glyph": "ii.", "text": "sampleA2-3-1" },
  { "glyph": "iii.", "text": "sampleA2-3-2" },
  { "glyph": "1.1", "text": "sampleC1" },
  { "glyph": "(i)", "text": "sampleC1A" },
  { "glyph": "1.2", "text": "sampleC2" },
  { "glyph": "(ii)", "text": "sampleC2AA" },
  { "glyph": "(i)", "text": "sampleC2AAB" },
  { "glyph": "(A)", "text": "sampleC2AAC" },
  { "glyph": "(iii)", "text": "sampleC2AB" },
  { "glyph": "1.3", "text": "sampleC3" },
  { "glyph": "(iv)", "text": "sampleC3A" },
  { "glyph": "(ii)", "text": "sampleC3B" },
  { "glyph": "(B)", "text": "sampleC3C" },
  { "glyph": "SampleName I", "text": "sampleD1" },
  { "glyph": "", "text": "SampleE1" },
  { "glyph": "", "text": "SampleE2" }
]
  • When you see this result value, it is found that I.1, I.2 and I.3 are 1.1, 1.2 and 1.1, respectively. The reason of this is due to that I couldn’t find the glyph type of the 1st value of I.

  • And also, it is found that the checkbox is "". The reason of this is due to the current specification. When the checkbox is used as the glyph value, Document service and Docs API return the glyph type of null and GLYPH_TYPE_UNSPECIFIED, respectively. By this, it is difficult to identify the glyph type by this situation.

Reference

 Share!