A newer version of this documentation is available.

View Latest

Function: convertAdvXMLtoJSON

    March 16, 2025
    + 12

    Goal: Recursively and generically convert advanced XML strings into JSON.

    • This function convertAdvXMLtoJSON shows how to convert advanced XML strings into JSON.

    • This function will handle items and constructs that convertXMLtoJSON will fail on 1) empty-element tags and 2) attributes.

      javascript
      <ADVELEM1 adv_attrA="adv_valA" /> <ADVELEM2 adv_attrA="adv_valA" adv_attrB="adv_valB"> <SUB> SUBDATA </SUB> </ADVELEM2>
    • Requires Eventing Storage (or metadata collection) and a "source" collection.

    • Will operate on any mutation where the KEY or meta.id starts with "xml:".

    • Will enrich the source document with a new JSON object representing the XML data.

    • Maintains a checksum to prevent the overhead of conversion if the property in_xml is unchanged.

    javascript
    // To run configure the settings for this Function, convertAdvXMLtoJSON, as follows: // // Version 7.1+ // "Function Scope" // *.* (or try bulk.data if non-privileged) // "Listen to Location" // bulk.data.source // "Eventing Storage" // rr100.eventing.metadata // Binding(s) // 1. "binding type", "alias name...", "bucket.scope.collection", "Access" // "bucket alias", "src_col", "bulk.data.source", "read and write" // // Version 6.X // "Source Bucket" // source // "MetaData Bucket" // metadata // Binding(s) // 1. "binding type", "alias name...", "bucket", "Access" // "bucket alias", "src_col", "source", "read and write" // =============================================================== // THIS IS AN APPLICATION SPECIFIC CUSTOM TRANSFOMRATION ON NEED function customParseFixups(key,tmp) { // typically we would test the key for a name where we want to transform the value // Example 1: // if (key === "resultStr") { // // makes an array of strings // // var data_array = tmp.trim().split(' '); // return data_array; // } // Example 2: // if (key === "resultNumStr") { // // makes an array of numbers // // var data_array = tmp.trim().split(' ').map(str => Number(str)); // return data_array; // } // Do nothing this is a NOOP return tmp; } // =============================================================== function OnUpdate(doc, meta) { // filter out non XML if (!meta.id.startsWith("advxml:")) return; // The KEY started with "advxml" try to process it // =========================================================== // *** Do other work required here on non .in_xml changes *** // =========================================================== // let's see if we need to re-create our json representation. var xmlchksum = crc64(doc.in_xml); // =========================================================== // Don't reprocess if the doc.in_xml has not changed this could be // a big performance win if the doc has other fields that mutate. // We do this via a checksum of the .in_xml property. if (doc.xmlchksum && doc.xmlchksum === xmlchksum) return; // Either this is the first pass, or the .in_xml property changed. var jsonDoc = parseAdvXmlToJson(doc.in_xml); log(meta.id,"1. INPUT xml doc.in_xml :", doc.in_xml); log(meta.id,"2. CHECKSUM doc.in_xml :", xmlchksum); log(meta.id,"3. OUTPUT doc.out_json :", jsonDoc); doc.out_json = jsonDoc; doc.xmlchksum = xmlchksum; // =========================================================== // enrich the source collection with .out_json and .xmlchksum src_col[meta.id] = doc; } // 7.0.0 version uses String.matchAll eliminates the need to make our own MatchAll function call as parseXmlToJson(xml) function parseAdvXmlToJson(xml, recurs) { const json = {}; if (!recurs) { // 1st call, Fix bad closures, Transform Example : '<tagName attrName="attrValue" >' becomes '<tagName attrName="attrValue" />' xml = xml.replace(/\"\s*>/g, '" />'); // 1st call, Transform Example : '<tagName attrName="attrValue" />' becomes '<tagName><attrName>attrValue</attrName></tagName>' xml = xml.replace(/<\s*([^\/><\s]+)\s+(\w[^<>]*)(\s*\/>)/gm, '<$1 $2></$1>'); } for (const res of xml.matchAll(/(?:<([\w:]*)(?:\s[^>]*)*>)((?:(?!<\1).)*)(?:<\/\1>)|<([\w:]*)(?:\s*)*\/>/gm)) { // find all sest of 1..N attributes if any var attrs = {}; for (const res1 of res[0].matchAll(/<\s*(\w[^\/><\s]*)\s+(\w[^<]*[^=<]\s*[=]\s*[\'\"][^<]+[\'\"])\s*>([^<>]*)</gm)) { attrs[res1[1]] = {}; if (res1[3] !== "") { // illegal XML: <tag1 attr1="aval1">value1</tag1> log('Illegal can not have bare "' + res1[3] + '" value if we have attr(s) input:', res1[0]); return null; } else { for (const res2 of res1[2].matchAll(/(\w+[^=<>]*)\s*[=]\s*[\"]([^\"]+)[\"]|(\w+[^=<>]*)\s*[=]\s*[\']([^\']+)[\']/gm)) { if (res2[1] !== "") attrs[res1[1]][res2[1]] = res2[2]; if (res2[3] !== "") attrs[res1[1]][res2[3]] = res2[4]; } } } const key = res[1] || res[3]; var value = res[2] && parseAdvXmlToJson(res[2], true); if (res[2] === "" && Object.keys(attrs).length > 0) { value = {}; } if (attrs[key]) { for (const p in attrs[key]) { if (attrs[key][p]) { value[p] = attrs[key][p]; } } } attrs = {}; var tmp = ((value && Object.keys(value).length) ? value : res[2]) || null; if (Array.isArray(json[key]) == false) { if (json[key]) { // we have seen this key before change from object to an array of objects var old = json[key]; json[key] = []; json[key].push(old); json[key].push(tmp); } else { // link to a custom function tmp = customParseFixups(key,tmp); json[key] = tmp; } } else { json[key].push(tmp); } } return json; } /* // need this for 6.6.0 version function* MatchAll(str, regExp) { if (!regExp.global) { throw new TypeError('Flag /g must be set!'); } const localCopy = new RegExp(regExp, regExp.flags); let match; while (match = localCopy.exec(str)) { yield match; } } // 6.6.0 version no String.matchAll need our own MatchAll function, call as parseXmlToJson(xml) function parseAdvXmlToJson(xml, recurs) { const json = {}; if (!recurs) { // 1st call, Fix bad closures, Transform Example : '<tagName attrName="attrValue" >' becomes '<tagName attrName="attrValue" />' xml = xml.replace(/\"\s*>/g, '" />'); // 1st call, Transform Example : '<tagName attrName="attrValue" />' becomes '<tagName attrName="attrValue"></tagName>' xml = xml.replace(/<\s*([^\/><\s]+)\s+(\w[^<>]*)(\s*\/>)/gm, '<$1 $2></$1>'); } for (const res of MatchAll(xml, /(?:<([\w:]*)(?:\s[^>]*)*>)((?:(?!<\1).)*)(?:<\/\1>)|<([\w:]*)(?:\s*)*\/>/gm)) { // find all sest of 1..N attributes if any var attrs = {}; for (const res1 of MatchAll(res[0], /<\s*(\w[^\/><\s]*)\s+(\w[^<]*[^=<]\s*[=]\s*[\'\"][^<]+[\'\"])\s*>([^<>]*)</gm)) { attrs[res1[1]] = {}; if (res1[3] !== "") { // illegal XML: <tag1 attr1="aval1">value1</tag1> log('Illegal can not have bare "' + res1[3] + '" value if we have attr(s) input:', res1[0]); return null; } else { for (const res2 of MatchAll(res1[2], /(\w+[^=<>]*)\s*[=]\s*[\"]([^\"]+)[\"]|(\w+[^=<>]*)\s*[=]\s*[\']([^\']+)[\']/gm)) { if (res2[1] !== "") attrs[res1[1]][res2[1]] = res2[2]; if (res2[3] !== "") attrs[res1[1]][res2[3]] = res2[4]; } } } const key = res[1] || res[3]; var value = res[2] && parseAdvXmlToJson(res[2], true); if (res[2] === "" && Object.keys(attrs).length > 0) { value = {}; } if (attrs[key]) { for (const p in attrs[key]) { if (attrs[key][p]) { value[p] = attrs[key][p]; } } } attrs = {}; var tmp = ((value && Object.keys(value).length) ? value : res[2]) || null; if (Array.isArray(json[key]) == false) { if (json[key]) { // we have seen this key before change from object to an array of objects var old = json[key]; json[key] = []; json[key].push(old); json[key].push(tmp); } else { // link to a custom function tmp = customParseFixups(key,tmp); json[key] = tmp; } } else { json[key].push(tmp); } } return json; } */