Goal: Recursively and generically convert advanced XML strings into JSON.
This function convertAdvXMLtoJSON shows how to convert advanced XML strings into JSON.
This function will handle items and constructs that convertXMLtoJSON will fail on 1) empty-element tags and 2) attributes.
javascript<ADVELEM1 adv_attrA="adv_valA" /> <ADVELEM2 adv_attrA="adv_valA" adv_attrB="adv_valB"> <SUB> SUBDATA </SUB> </ADVELEM2>
Requires Eventing Storage (or metadata collection) and a "source" collection.
Will operate on any mutation where the KEY or meta.id starts with "xml:".
Will enrich the source document with a new JSON object representing the XML data.
Maintains a checksum to prevent the overhead of conversion if the property
is unchanged.
Input Data/Mutation
Output Data/Mutation
javascript// To run configure the settings for this Function, convertAdvXMLtoJSON, as follows:
// Version 7.0+
// "Listen to Location"
// bulk.data.source
// "Eventing Storage"
// rr100.eventing.metadata
// Binding(s)
// 1. "binding type", "alias name...", "bucket.scope.collection", "Access"
// "bucket alias", "src_col", "bulk.data.source", "read and write"
// Version 6.X
// "Source Bucket"
// source
// "MetaData Bucket"
// metadata
// Binding(s)
// 1. "binding type", "alias name...", "bucket", "Access"
// "bucket alias", "src_col", "source", "read and write"
// ===============================================================
function customParseFixups(key,tmp) {
// typically we would test the key for a name where we want to transform the value
// Example 1:
// if (key === "resultStr") {
// // makes an array of strings
// // var data_array = tmp.trim().split(' ');
// return data_array;
// }
// Example 2:
// if (key === "resultNumStr") {
// // makes an array of numbers
// // var data_array = tmp.trim().split(' ').map(str => Number(str));
// return data_array;
// }
// Do nothing this is a NOOP
return tmp;
// ===============================================================
function OnUpdate(doc, meta) {
// filter out non XML
if (!meta.id.startsWith("advxml:")) return;
// The KEY started with "advxml" try to process it
// ===========================================================
// *** Do other work required here on non .in_xml changes ***
// ===========================================================
// let's see if we need to re-create our json representation.
var xmlchksum = crc64(doc.in_xml);
// ===========================================================
// Don't reprocess if the doc.in_xml has not changed this could be
// a big performance win if the doc has other fields that mutate.
// We do this via a checksum of the .in_xml property.
if (doc.xmlchksum && doc.xmlchksum === xmlchksum) return;
// Either this is the first pass, or the .in_xml property changed.
var jsonDoc = parseAdvXmlToJson(doc.in_xml);
log(meta.id,"1. INPUT xml doc.in_xml :", doc.in_xml);
log(meta.id,"2. CHECKSUM doc.in_xml :", xmlchksum);
log(meta.id,"3. OUTPUT doc.out_json :", jsonDoc);
doc.out_json = jsonDoc;
doc.xmlchksum = xmlchksum;
// ===========================================================
// enrich the source collection with .out_json and .xmlchksum
src_col[meta.id] = doc;
// 7.0.0 version uses String.matchAll eliminates the need to make our own MatchAll function call as parseXmlToJson(xml)
function parseAdvXmlToJson(xml, recurs) {
const json = {};
if (!recurs) {
// 1st call, Fix bad closures, Transform Example : '<tagName attrName="attrValue" >' becomes '<tagName attrName="attrValue" />'
xml = xml.replace(/\"\s*>/g, '" />');
// 1st call, Transform Example : '<tagName attrName="attrValue" />' becomes '<tagName><attrName>attrValue</attrName></tagName>'
xml = xml.replace(/<\s*([^\/><\s]+)\s+(\w[^<>]*)(\s*\/>)/gm, '<$1 $2></$1>');
for (const res of xml.matchAll(/(?:<([\w:]*)(?:\s[^>]*)*>)((?:(?!<\1).)*)(?:<\/\1>)|<([\w:]*)(?:\s*)*\/>/gm)) {
// find all sest of 1..N attributes if any
var attrs = {};
for (const res1 of res[0].matchAll(/<\s*(\w[^\/><\s]*)\s+(\w[^<]*[^=<]\s*[=]\s*[\'\"][^<]+[\'\"])\s*>([^<>]*)</gm)) {
attrs[res1[1]] = {};
if (res1[3] !== "") {
// illegal XML: <tag1 attr1="aval1">value1</tag1>
log('Illegal can not have bare "' + res1[3] + '" value if we have attr(s) input:', res1[0]);
return null;
} else {
for (const res2 of res1[2].matchAll(/(\w+[^=<>]*)\s*[=]\s*[\"]([^\"]+)[\"]|(\w+[^=<>]*)\s*[=]\s*[\']([^\']+)[\']/gm)) {
if (res2[1] !== "") attrs[res1[1]][res2[1]] = res2[2];
if (res2[3] !== "") attrs[res1[1]][res2[3]] = res2[4];
const key = res[1] || res[3];
var value = res[2] && parseAdvXmlToJson(res[2], true);
if (res[2] === "" && Object.keys(attrs).length > 0) {
value = {};
if (attrs[key]) {
for (const p in attrs[key]) {
if (attrs[key][p]) {
value[p] = attrs[key][p];
attrs = {};
var tmp = ((value && Object.keys(value).length) ? value : res[2]) || null;
if (Array.isArray(json[key]) == false) {
if (json[key]) {
// we have seen this key before change from object to an array of objects
var old = json[key];
json[key] = [];
} else {
// link to a custom function
tmp = customParseFixups(key,tmp);
json[key] = tmp;
} else {
return json;
// need this for 6.6.0 version
function* MatchAll(str, regExp) {
if (!regExp.global) {
throw new TypeError('Flag /g must be set!');
const localCopy = new RegExp(regExp, regExp.flags);
let match;
while (match = localCopy.exec(str)) {
yield match;
// 6.6.0 version no String.matchAll need our own MatchAll function, call as parseXmlToJson(xml)
function parseAdvXmlToJson(xml, recurs) {
const json = {};
if (!recurs) {
// 1st call, Fix bad closures, Transform Example : '<tagName attrName="attrValue" >' becomes '<tagName attrName="attrValue" />'
xml = xml.replace(/\"\s*>/g, '" />');
// 1st call, Transform Example : '<tagName attrName="attrValue" />' becomes '<tagName attrName="attrValue"></tagName>'
xml = xml.replace(/<\s*([^\/><\s]+)\s+(\w[^<>]*)(\s*\/>)/gm, '<$1 $2></$1>');
for (const res of MatchAll(xml, /(?:<([\w:]*)(?:\s[^>]*)*>)((?:(?!<\1).)*)(?:<\/\1>)|<([\w:]*)(?:\s*)*\/>/gm)) {
// find all sest of 1..N attributes if any
var attrs = {};
for (const res1 of MatchAll(res[0], /<\s*(\w[^\/><\s]*)\s+(\w[^<]*[^=<]\s*[=]\s*[\'\"][^<]+[\'\"])\s*>([^<>]*)</gm)) {
attrs[res1[1]] = {};
if (res1[3] !== "") {
// illegal XML: <tag1 attr1="aval1">value1</tag1>
log('Illegal can not have bare "' + res1[3] + '" value if we have attr(s) input:', res1[0]);
return null;
} else {
for (const res2 of MatchAll(res1[2], /(\w+[^=<>]*)\s*[=]\s*[\"]([^\"]+)[\"]|(\w+[^=<>]*)\s*[=]\s*[\']([^\']+)[\']/gm)) {
if (res2[1] !== "") attrs[res1[1]][res2[1]] = res2[2];
if (res2[3] !== "") attrs[res1[1]][res2[3]] = res2[4];
const key = res[1] || res[3];
var value = res[2] && parseAdvXmlToJson(res[2], true);
if (res[2] === "" && Object.keys(attrs).length > 0) {
value = {};
if (attrs[key]) {
for (const p in attrs[key]) {
if (attrs[key][p]) {
value[p] = attrs[key][p];
attrs = {};
var tmp = ((value && Object.keys(value).length) ? value : res[2]) || null;
if (Array.isArray(json[key]) == false) {
if (json[key]) {
// we have seen this key before change from object to an array of objects
var old = json[key];
json[key] = [];
} else {
// link to a custom function
tmp = customParseFixups(key,tmp);
json[key] = tmp;
} else {
return json;
jsonINPUT: KEY advxml::1
"type": "advxml",
"id": 1,
"in_xml": "<CD><ADVELEM1 adv_attrA=\"adv_valA\"/><ADVELEM2 adv_attrA=\"adv_valA\" adv_attrB=\"adv_valB\"><SUB>SUBDATA</SUB><TITLE>EmpireBurlesque</TITLE><ARTIST>BobDylan</ARTIST><COUNTRY>USA</COUNTRY><COMPANY>Columbia</COMPANY><PRICE>10.90</PRICE><YEAR>1985</YEAR></CD>"
jsonUPDATED/OUTPUT: KEY advxml::1
"type": "advxml",
"id": 1,
"in_xml": "<CD><ADVELEM1 adv_attrA=\"adv_valA\"/><ADVELEM2 adv_attrA=\"adv_valA\" adv_attrB=\"adv_valB\"><SUB>SUBDATA</SUB></ADVELEM2><TITLE>EmpireBurlesque</TITLE><ARTIST>BobDylan</ARTIST><COUNTRY>USA</COUNTRY><COMPANY>Columbia</COMPANY><PRICE>10.90</PRICE><YEAR>1985</YEAR></CD>",
"out_json": {
"CD": {
"adv_attrA": "adv_valA"
"adv_attrA": "adv_valA",
"adv_attrB": "adv_valB"
"TITLE": "EmpireBurlesque",
"ARTIST": "BobDylan",
"COMPANY": "Columbia",
"PRICE": "10.90",
"YEAR": "1985"
"xmlchksum": "99b252d9af646320"