Function: convertAdvXMLtoJSON

    +

    Goal: Recursively and generically convert advanced XML strings into JSON.

    • This function convertAdvXMLtoJSON shows how to convert advanced XML strings into JSON.

    • This function will handle items and constructs that convertXMLtoJSON will fail on 1) empty-element tags and 2) attributes.

      <ADVELEM1 adv_attrA="adv_valA" />
      
      <ADVELEM2 adv_attrA="adv_valA" adv_attrB="adv_valB">
          <SUB>
              SUBDATA
          </SUB>
      </ADVELEM2>
    • Requires a metadata bucket and a source bucket with alias "src_bkt" in read+write mode.

    • Will operate on any mutation where the KEY or meta.id starts with "xml:".

    • Will enrich the source document with a new JSON object representing the XML data.

    • Maintains a checksum to prevent the overhead of conversion if the property in_xml is unchanged.

    • convertAdvXMLtoJSON

    • Input Data/Mutation

    • Output Data/Mutation

    function OnUpdate(doc, meta) {
        // filter out non XML
        if (!meta.id.startsWith("advxml:")) return;
        // The KEY started with "advxml" try to process it
        // ===========================================================
        // *** Do other  work required here on non .in_xml changes ***
        // ===========================================================
        // let's see if we need to re-create our json representation.
        var xmlchksum = crc64(doc.in_xml);
        // ===========================================================
        // Don't reprocess if the doc.in_xml has not changed this could be
        // a big performance win if the doc has other fields that mutate.
        // We do this via a checksum of the .in_xml property.
        if (doc.xmlchksum && doc.xmlchksum === xmlchksum) return;
        // Either this is the first pass, or the .in_xml property changed.
        var jsonDoc = parseAdvXmlToJson(doc.in_xml);
        log(meta.id,"1. INPUT  xml doc.in_xml :", doc.in_xml);
        log(meta.id,"2. CHECKSUM doc.in_xml   :", xmlchksum);
        log(meta.id,"3. OUTPUT doc.out_json   :", jsonDoc);
        doc.out_json = jsonDoc;
        doc.xmlchksum = xmlchksum;
        // ===========================================================
        // enrich the source bucket with .out_json and .xmlchksum
        src_bkt[meta.id] = doc;
    }
    
    function* MatchAll(str, regExp) {
      if (!regExp.global) {
        throw new TypeError('Flag /g must be set!');
      }
      const localCopy = new RegExp(regExp, regExp.flags);
      let match;
      while (match = localCopy.exec(str)) {
        yield match;
      }
    }
    
    // 6.6.0 version no String.matchAll need our own MatchAll function, call as parseXmlToJson(xml)
    function parseAdvXmlToJson(xml,recurs) {
        const json = {};
    
        if (!recurs) {
            // 1st call, Fix bad closures,  Transform Example : '<tagName attrName="attrValue" >'  becomes '<tagName attrName="attrValue" />'
            xml = xml.replace(/\"\s*>/g, '" />');
    
            // 1st call, Transform Example : '<tagName attrName="attrValue" />' becomes '<tagName attrName="attrValue"></tagName>'
            xml = xml.replace(/<\s*([^\/><\s]+)\s+(\w[^<>]*)(\s*\/>)/gm, '<$1 $2></$1>');
        }
    
        for (const res of MatchAll(xml, /(?:<([\w:]*)(?:\s[^>]*)*>)((?:(?!<\1).)*)(?:<\/\1>)|<([\w:]*)(?:\s*)*\/>/gm)) {
    
            // find all sest of 1..N attributes if any
            var attrs = {};
            for (const res1 of MatchAll(res[0], /<\s*(\w[^\/><\s]*)\s+(\w[^<]*[^=<]\s*[=]\s*[\'\"][^<]+[\'\"])\s*>([^<>]*)</gm)) {
                attrs[res1[1]] = {};
                if (res1[3] !== "") {
                    // illegal XML: <tag1 attr1="aval1">value1</tag1>
                    log('Illegal can not have bare "' + res1[3] + '" value if we have attr(s) input:', res1[0]);
                    return null;
                } else {
                    for (const res2 of MatchAll(res1[2], /(\w+[^=<>]*)\s*[=]\s*[\"]([^\"]+)[\"]|(\w+[^=<>]*)\s*[=]\s*[\']([^\']+)[\']/gm)) {
                        if (res2[1] !== "") attrs[res1[1]][res2[1]] = res2[2];
                        if (res2[3] !== "") attrs[res1[1]][res2[3]] = res2[4];
                    }
                }
            }
            const key = res[1] || res[3];
            var value = res[2] && parseAdvXmlToJson(res[2],true);
            if (res[2] === "" && Object.keys(attrs).length > 0) {
                value = {};
            }
            if (attrs[key]) {
                for (const p in attrs[key]) {
                    value[p] = attrs[key][p];
                }
            }
            attrs = {};
            json[key] = ((value && Object.keys(value).length) ? value : res[2]) || null;
        }
        return json;
    }
    
    /*
    // 7.0.0 version uses String.matchAll eliminates the need to make our own MatchAll function call as parseXmlToJson(xml)
    function parseAdvXmlToJson(xml,recurs) {
        const json = {};
    
        if (!recurs) {
            // 1st call, Fix bad closures,  Transform Example : '<tagName attrName="attrValue" >'  becomes '<tagName attrName="attrValue" />'
            xml = xml.replace(/\"\s*>/g, '" />');
    
            // 1st call, Transform Example : '<tagName attrName="attrValue" />' becomes '<tagName><attrName>attrValue</attrName></tagName>'
            xml = xml.replace(/<\s*([^\/><\s]+)\s+(\w[^<>]*)(\s*\/>)/gm, '<$1 $2></$1>');
        }
    
        for (const res of xml.matchAll(/(?:<([\w:]*)(?:\s[^>]*)*>)((?:(?!<\1).)*)(?:<\/\1>)|<([\w:]*)(?:\s*)*\/>/gm)) {
    
            // find all sest of 1..N attributes if any
            var attrs = {};
            for (const res1 of res[0].matchAll(/<\s*(\w[^\/><\s]*)\s+(\w[^<]*[^=<]\s*[=]\s*[\'\"][^<]+[\'\"])\s*>([^<>]*)</gm)) {
                attrs[res1[1]] = {};
                if (res1[3] !== "") {
                    // illegal XML: <tag1 attr1="aval1">value1</tag1>
                    log('Illegal can not have bare "' + res1[3] + '" value if we have attr(s) input:', res1[0]);
                    return null;
                } else {
                    for (const res2 of res1[2].matchAll(/(\w+[^=<>]*)\s*[=]\s*[\"]([^\"]+)[\"]|(\w+[^=<>]*)\s*[=]\s*[\']([^\']+)[\']/gm)) {
                        if (res2[1] !== "") attrs[res1[1]][res2[1]] = res2[2];
                        if (res2[3] !== "") attrs[res1[1]][res2[3]] = res2[4];
                    }
                }
            }
            const key = res[1] || res[3];
            var value = res[2] && parseAdvXmlToJson(res[2],true);
            if (res[2] === "" && Object.keys(attrs).length > 0) {
                value = {};
            }
            if (attrs[key]) {
                for (const p in attrs[key]) {
                    value[p] = attrs[key][p];
                }
            }
            attrs = {};
            json[key] = ((value && Object.keys(value).length) ? value : res[2]) || null;
        }
        return json;
    }
    */
    INPUT: KEY advxml::1
    
    {
      "type": "advxml",
      "id": 1,
      "in_xml": "<CD><ADVELEM1 adv_attrA=\"adv_valA\"/><ADVELEM2 adv_attrA=\"adv_valA\" adv_attrB=\"adv_valB\"><SUB>SUBDATA</SUB><TITLE>EmpireBurlesque</TITLE><ARTIST>BobDylan</ARTIST><COUNTRY>USA</COUNTRY><COMPANY>Columbia</COMPANY><PRICE>10.90</PRICE><YEAR>1985</YEAR></CD>"
    }
    UPDATED/OUTPUT: KEY advxml::1
    
    {
      "type": "advxml",
      "id": 1,
      "in_xml": "<CD><ADVELEM1 adv_attrA=\"adv_valA\"/><ADVELEM2 adv_attrA=\"adv_valA\" adv_attrB=\"adv_valB\"><SUB>SUBDATA</SUB></ADVELEM2><TITLE>EmpireBurlesque</TITLE><ARTIST>BobDylan</ARTIST><COUNTRY>USA</COUNTRY><COMPANY>Columbia</COMPANY><PRICE>10.90</PRICE><YEAR>1985</YEAR></CD>",
      "out_json": {
        "CD": {
          "ADVELEM1": {
            "adv_attrA": "adv_valA"
          },
          "ADVELEM2": {
            "SUB": "SUBDATA",
            "adv_attrA": "adv_valA",
            "adv_attrB": "adv_valB"
          },
          "TITLE": "EmpireBurlesque",
          "ARTIST": "BobDylan",
          "COUNTRY": "USA",
          "COMPANY": "Columbia",
          "PRICE": "10.90",
          "YEAR": "1985"
        }
      },
      "xmlchksum": "99b252d9af646320"
    }