Name
sn_nlu_discovery.IntentDiscoveryLargeFileHandler
Description
No description available
Script
var IntentDiscoveryLargeFileHandler = Class.create();
IntentDiscoveryLargeFileHandler.prototype = {
initialize: function() {},
isOpening: function(characters) {
if (characters[0].equals("{")) {
return true;
}
return false;
},
isClose: function(characters) {
if (characters[0].equals("}")) {
return true;
}
return false;
},
isValidJson: function(jsonStr) {
if (jsonStr.substring(0, 1).equals("{") && jsonStr.substring(jsonStr.length - 1).equals("}")) {
return true;
}
return false;
},
// Finds the start and end index of an Intent, intents:[{},{},{}]
// Logic: creates a stack, push when we find {, pop when we find }
// Assuming the first "{" is the start of the json
// The string is valid JSON when the stack size goes back to zero
// If the stack size doesn't go back to zero we need concat the next block of JSON
getIntentFromJson: function(longString, intentsIndex) {
gs.debug("starting at index:" + intentsIndex);
if (longString.length > 50) {
gs.debug("longString:" + longString.substring(0, 50));
}
var stack = [];
var lastPopIndex = intentsIndex;
var started = false;
var startIndex = -1;
for (var i = intentsIndex + 1; i < longString.length; i++) {
var nextChar = longString.substring(i - 1, i);
if (this.isOpening(nextChar)) {
if (startIndex === -1) {
startIndex = i - 1;
}
stack.push(i);
started = true;
} else if (this.isClose(nextChar)) {
stack.pop(i);
lastPopIndex = i + 1;
}
if (stack.length === 0 && started == true) {
gs.debug("breaking at i:" + i);
break;
}
}
//Stack length reached zero, still in an intent need to concat with next chunk.
//Added notFound because checking for if startIndex === null returns true for zero
if (stack.length > 0) {
return {
notFound: true
};
}
//Stack length reached zero, still in an intent need to concat with next chunk.
return {
start: startIndex,
end: lastPopIndex,
};
},
//Loop through chunks of JSON and return an array of Intent Objects
parseJSONChunks: function(jsonChunks) {
var intents = [];
var longStr = "";
for (var i = 0; i < jsonChunks.length; i++) {
if (i === 0) {
var intentStartIndex = jsonChunks[i].indexOf("[");
if (intentStartIndex === -1) {
intentStartIndex = 0;
}
}
gs.info("combining next chunk: longStr length: " + longStr.length + "chunk length: " + jsonChunks[i].length);
longStr = longStr + jsonChunks[i];
var errorCount = 0;
while (longStr.length > 1) {
var indexes = this.getIntentFromJson(longStr, intentStartIndex);
gs.debug("Got back indexes:" + JSON.stringify(indexes));
//if we didnt find start and end index then move on to next chunk of json.
//Added notFound because checking for if indexes.start === null returns true for zero
if (indexes.notFound === true) {
gs.debug("breaking - null:" + longStr.length);
break;
}
var intentStr = longStr.substring(indexes.start, indexes.end - 1);
if (this.isValidJson(intentStr)) {
intents.push(JSON.parse(intentStr));
} else {
gs.info("invalid json" + JSON.stringify(intentStr));
errorCount = errorCount + 1;
if (errorCount > 100) {
gs.error("JSON parsing Error count > 100, aborting parsing JSON File");
return intents;
}
}
longStr = longStr.substring(indexes.end, longStr.length);
intentStartIndex = 0;
}
}
return intents;
},
// Decode base64 in chunks, concat chunks into a string.
// Returns an array of strings, each index is a string of maxSize.
// Concat together to make valid json.
// Attachment is in form of <XML tags ....><objectByteArray>base64 data</objectByteArray>
decodeLargeBase64Text: function(attachmentSysId) {
var is = new GlideSysAttachment().getContentStream(attachmentSysId);
var reader = new GlideTextReader(is);
var docString = "";
var startTag = "<objectByteArray>";
var endTag = "</objectByteArray>";
var startTagRemoved = false;
var endTagRemoved = false;
var maxSize = 16777216;
var chunkSize = 8192;
var chunk = "";
var jsonChunks = [];
while ((ln = reader.readLine()) != null) {
// Remove everything before <objectByteArray>
if (startTagRemoved === false && ln.indexOf(startTag) != -1) {
ln = ln.substring(ln.indexOf(startTag) + startTag.length, ln.length);
startTagRemoved = true;
gs.debug("removed start tag");
}
// Remove everything after </objectByteArray>
if (endTagRemoved === false && ln.indexOf(endTag) != -1) {
ln = ln.substring(0, ln.indexOf(endTag));
endTagRemoved = true;
gs.debug("removed end tag");
}
// concat base64 chunk with next line of base64 data
chunk = chunk + ln;
// decode base64 at interval based on chunksize
if (chunk.length >= chunkSize) {
//decode
var toDecode = chunk.substring(0, chunkSize);
var decoded = gs.base64Decode(toDecode);
chunk = chunk.substring(chunkSize);
// about to append to decoded ln to json string, if json string would be over limit push
// then push existing json string to array.
if (decoded.length + docString.length > maxSize) {
jsonChunks.push(docString);
docString = "";
gs.info("Splitting length is now" + jsonChunks.length);
}
// Append decoded line to json string.
docString = docString + decoded;
}
}
// Handle the very last chunk of data.
// Possible the last chunk < chunkSize (when we append it to the string)
var lastChunk = gs.base64Decode(chunk);
if (lastChunk.length + docString.length < maxSize) {
docString = docString + lastChunk;
jsonChunks.push(docString);
} else {
jsonChunks.push(docString);
jsonChunks.push(lastChunk);
}
for (var i = 0; i < jsonChunks.length; i++) {
gs.debug("succesfully decoded results file, length:" + jsonChunks[i].length);
}
// handle last line
return jsonChunks;
},
//get fields outside of intents
getNonIntentData: function(jsonChunk) {
var startIndex = jsonChunk.indexOf("[") + 1;
var metadataJson = jsonChunk.substring(0, startIndex) + "]}";
var metadata = JSON.parse(metadataJson);
return metadata;
},
// Reconstruct javascript object returned by JSON.parse() by parsing intent data in chunks.
// Assumes no intent names contain { or }.
handleLargeFile: function(attachmentSysID) {
var jsonData = this.decodeLargeBase64Text(attachmentSysID);
if (!jsonData || jsonData.length === 0) {
gs.error("Unable to decode large file attachment:" + attachmentSysID);
return null;
}
var resultJsonObject = {};
var metadata = this.getNonIntentData(jsonData[0]);
gs.debug("got back metadata: " + JSON.stringify(metadata));
if (!this.isPrimaryReportFile(metadata)) {
gs.warn("File does not contain primary report keys, aborting decode");
return null;
}
resultJsonObject.totalMessages = metadata.totalMessages || "0";
resultJsonObject.totalNumIntents = metadata.totalNumIntents || "0";
resultJsonObject.totalNumClusters = metadata.totalNumClusters || "0";
var intents = this.parseJSONChunks(jsonData);
if (!intents || intents.length === 0) {
gs.error("Unable to parse JSON from decoded text");
return null;
}
resultJsonObject.intents = intents;
return resultJsonObject;
},
isPrimaryReportFile: function(jsonObject) {
var keys = Object.keys(jsonObject);
var requiredFields = ["intents", "totalMessages"];
for (var i = 0; i < requiredFields.length; i++) {
if (keys.indexOf(requiredFields[i]) === -1) {
return false;
}
}
return true;
},
type: 'IntentDiscoveryLargeFileHandler'
};
Sys ID
d33919c207303010989f1f00ead300b1