Name
sn_ibm_trans_spoke.IBMTranslationBatchingUtil
Description
Bulk texts handling methods related to IBM Translation actions
Script
var IBMTranslationBatchingUtil = Class.create();
IBMTranslationBatchingUtil.prototype = {
initialize: function() {},
getLimits: function() {
var charLimit = parseInt(gs.getProperty('glide.dynamic.translation.ibm.char.limit', 51200));
var requestBufferSize = parseInt(gs.getProperty('glide.dynamic.translation.ibm.request.buffer', 100));
charLimit = (charLimit >= requestBufferSize) ? (charLimit - requestBufferSize) : charLimit;
return {
'charLimit': charLimit,
'arrayLimit': parseInt(gs.getProperty('glide.dynamic.translation.ibm.array.limit', -1)),
'textBufferSize': parseInt(gs.getProperty('glide.dynamic.translation.ibm.text.buffer', 4))
};
},
getKBSize: function(text, limits) {
var bytes = text.length;
for (var i = text.length - 1; i >= 0; i--) {
var code = text.charCodeAt(i);
if (code > 0x7f && code <= 0x7ff)
bytes++;
else if (code > 0x7ff && code <= 0xffff)
bytes += 2;
if (code >= 0xDC00 && code <= 0xDFFF)
i--;
}
var textBufferSize = limits.textBufferSize;
return bytes + textBufferSize;
},
getBytesData: function(texts, limits) {
var bytesData = {};
for (var i = 0; i < texts.length; i++) {
var kbSize = this.getKBSize(texts[i], limits);
bytesData[texts[i]] = kbSize;
}
return bytesData;
},
classifyBulkTexts: function(texts, charLimit, bytesData) {
var count = 0;
var classifiedData = {};
classifiedData["smallTexts"] = [];
classifiedData["largeTexts"] = [];
classifiedData["isBatchingRequired"] = false;
for (var i = 0; i < texts.length; i++) {
if (bytesData[texts[i]] > charLimit) {
classifiedData["largeTexts"].push(texts[i]);
} else {
classifiedData["smallTexts"].push(texts[i]);
count = count + bytesData[texts[i]];
if (count > charLimit) {
classifiedData["isBatchingRequired"] = true;
}
}
}
return classifiedData;
},
addLargeTextsToArray: function(result, largeTexts, targetLanguages) {
for (var large = 0; large < largeTexts.length; large++) {
this.transformBatchTextsResponse([largeTexts[large]], targetLanguages, result);
}
},
getSortedBytesMap: function(texts, bytesData) {
var bytesList = [];
for (var i = 0; i < texts.length; i++) {
var kbSize = bytesData[texts[i]];
if (kbSize) {
bytesList.push([kbSize, texts[i]]);
}
}
return bytesList.sort();
},
getBatchTexts: function(texts, bytesData, limits) {
var bytesList = this.getSortedBytesMap(texts, bytesData);
var weight = limits.charLimit;
var splitTexts = [];
var startIdx = 0;
var arrayLength = bytesList.length;
var endIdx = arrayLength - 1;
var textsProcessed = 0;
while (textsProcessed < arrayLength) {
var singleSplit = [];
var tempWeight = 0;
while (singleSplit.length != limits.arrayLimit &&
endIdx >= 0 &&
startIdx <= endIdx &&
weight >= (tempWeight + bytesList[endIdx][0])) {
tempWeight += bytesList[endIdx][0];
singleSplit.push(bytesList[endIdx][1]);
endIdx -= 1;
textsProcessed += 1;
}
while (singleSplit.length != limits.arrayLimit &&
startIdx < arrayLength &&
startIdx <= endIdx &&
weight >= (tempWeight + bytesList[startIdx][0])) {
tempWeight += bytesList[startIdx][0];
singleSplit.push(bytesList[startIdx][1]);
startIdx += 1;
textsProcessed += 1;
}
splitTexts.push(singleSplit);
}
return splitTexts;
},
transformBatchTextsResponse: function(texts, targetLanguages, result) {
for (var i = 0; i < targetLanguages.length; i++) {
result.push({
"texts_to_translate": texts,
"target_language": targetLanguages[i]
});
}
},
addSmallTextsToArray: function(smallTexts, result, limits, targetLanguages) {
var tempSmallTexts = [];
for (var i = 0; i < smallTexts.length; i++) {
tempSmallTexts.push(smallTexts[i]);
if (tempSmallTexts.length == limits.arrayLimit) {
this.transformBatchTextsResponse(tempSmallTexts, targetLanguages, result);
tempSmallTexts = [];
}
}
if (tempSmallTexts.length > 0) {
this.transformBatchTextsResponse(tempSmallTexts, targetLanguages, result);
}
},
splitInputTextsIntoBatches: function(texts, limits, targetLanguages) {
var result = [];
var bytesData = this.getBytesData(texts, limits);
var classifiedData = this.classifyBulkTexts(texts, limits.charLimit, bytesData);
if (classifiedData["isBatchingRequired"]) {
var splitTexts = this.getBatchTexts(classifiedData.smallTexts, bytesData, limits);
for (var idx = 0; idx < splitTexts.length; idx++) {
var selectedTexts = splitTexts[idx];
this.transformBatchTextsResponse(selectedTexts, targetLanguages, result);
}
} else {
this.addSmallTextsToArray(classifiedData.smallTexts, result, limits, targetLanguages);
}
this.addLargeTextsToArray(result, classifiedData.largeTexts, targetLanguages);
return result;
},
batchBulkTexts: function(texts, targetLanguages) {
var limits = this.getLimits();
return this.splitInputTextsIntoBatches(texts, limits, targetLanguages);
},
_getProcessedTextResult: function(textResult) {
var processedTextResult = {};
var textResultKeys = Object.keys(textResult);
for (var keyIdx = 0; keyIdx < textResultKeys.length; keyIdx++) {
var key = textResultKeys[keyIdx];
if (key == 'text_translations') {
var textTranslations = [];
var languages = Object.keys(textResult.text_translations);
for (var idx = 0; idx < languages.length; idx++) {
var language = languages[idx];
var textTranslation = {
'translated_text': textResult.text_translations[language],
'target_language': language
};
textTranslations.push(textTranslation);
}
processedTextResult.text_translations = textTranslations;
} else {
processedTextResult[key] = textResult[key];
}
}
return processedTextResult;
},
rearrangeJSONResult: function(texts, result, isTranslation) {
var response = {
'status': 'Success'
};
var rearrangedResponse = [];
for (var i = 0; i < texts.length; i++) {
var eachTextResult = result[texts[i]];
if ('Error' === eachTextResult.status) {
response['status'] = 'Error';
}
var processedTextResult = this._getProcessedTextResult(eachTextResult);
rearrangedResponse.push(processedTextResult);
}
if (isTranslation)
response['translations'] = rearrangedResponse;
else
response['detections'] = rearrangedResponse;
return response;
},
type: 'IBMTranslationBatchingUtil'
};
Sys ID
2a20abf45302101026b0ddeeff7b12fc