Name

sn_ibm_trans_spoke.IBMTranslationBatchingUtil

Description

Bulk texts handling methods related to IBM Translation actions

Script

var IBMTranslationBatchingUtil = Class.create();
IBMTranslationBatchingUtil.prototype = {
  initialize: function() {},

  getLimits: function() {
      var charLimit = parseInt(gs.getProperty('glide.dynamic.translation.ibm.char.limit', 51200));
      var requestBufferSize = parseInt(gs.getProperty('glide.dynamic.translation.ibm.request.buffer', 100));
      charLimit = (charLimit >= requestBufferSize) ? (charLimit - requestBufferSize) : charLimit;
      return {
          'charLimit': charLimit,
          'arrayLimit': parseInt(gs.getProperty('glide.dynamic.translation.ibm.array.limit', -1)),
          'textBufferSize': parseInt(gs.getProperty('glide.dynamic.translation.ibm.text.buffer', 4))
      };
  },

  getKBSize: function(text, limits) {
      var bytes = text.length;
      for (var i = text.length - 1; i >= 0; i--) {
          var code = text.charCodeAt(i);
          if (code > 0x7f && code <= 0x7ff)
              bytes++;
          else if (code > 0x7ff && code <= 0xffff)
              bytes += 2;
          if (code >= 0xDC00 && code <= 0xDFFF)
              i--;
      }
      var textBufferSize = limits.textBufferSize;
      return bytes + textBufferSize;
  },

  getBytesData: function(texts, limits) {
      var bytesData = {};
      for (var i = 0; i < texts.length; i++) {
          var kbSize = this.getKBSize(texts[i], limits);
          bytesData[texts[i]] = kbSize;
      }
      return bytesData;
  },

  classifyBulkTexts: function(texts, charLimit, bytesData) {
      var count = 0;
      var classifiedData = {};
      classifiedData["smallTexts"] = [];
      classifiedData["largeTexts"] = [];
      classifiedData["isBatchingRequired"] = false;
      for (var i = 0; i < texts.length; i++) {
          if (bytesData[texts[i]] > charLimit) {
              classifiedData["largeTexts"].push(texts[i]);
          } else {
              classifiedData["smallTexts"].push(texts[i]);
              count = count + bytesData[texts[i]];
              if (count > charLimit) {
                  classifiedData["isBatchingRequired"] = true;
              }
          }
      }
      return classifiedData;
  },

  addLargeTextsToArray: function(result, largeTexts, targetLanguages) {
      for (var large = 0; large < largeTexts.length; large++) {
          this.transformBatchTextsResponse([largeTexts[large]], targetLanguages, result);
      }
  },

  getSortedBytesMap: function(texts, bytesData) {
      var bytesList = [];
      for (var i = 0; i < texts.length; i++) {
          var kbSize = bytesData[texts[i]];
          if (kbSize) {
              bytesList.push([kbSize, texts[i]]);
          }
      }
      return bytesList.sort();
  },

  getBatchTexts: function(texts, bytesData, limits) {
      var bytesList = this.getSortedBytesMap(texts, bytesData);
      var weight = limits.charLimit;
      var splitTexts = [];
      var startIdx = 0;
      var arrayLength = bytesList.length;
      var endIdx = arrayLength - 1;
      var textsProcessed = 0;
      while (textsProcessed < arrayLength) {
          var singleSplit = [];
          var tempWeight = 0;
          while (singleSplit.length != limits.arrayLimit &&
              endIdx >= 0 &&
              startIdx <= endIdx &&
              weight >= (tempWeight + bytesList[endIdx][0])) {
              tempWeight += bytesList[endIdx][0];
              singleSplit.push(bytesList[endIdx][1]);
              endIdx -= 1;
              textsProcessed += 1;
          }
          while (singleSplit.length != limits.arrayLimit &&
              startIdx < arrayLength &&
              startIdx <= endIdx &&
              weight >= (tempWeight + bytesList[startIdx][0])) {
              tempWeight += bytesList[startIdx][0];
              singleSplit.push(bytesList[startIdx][1]);
              startIdx += 1;
              textsProcessed += 1;
          }
          splitTexts.push(singleSplit);
      }
      return splitTexts;
  },

  transformBatchTextsResponse: function(texts, targetLanguages, result) {
      for (var i = 0; i < targetLanguages.length; i++) {
          result.push({
              "texts_to_translate": texts,
              "target_language": targetLanguages[i]
          });
      }
  },

  addSmallTextsToArray: function(smallTexts, result, limits, targetLanguages) {
      var tempSmallTexts = [];
      for (var i = 0; i < smallTexts.length; i++) {
          tempSmallTexts.push(smallTexts[i]);
          if (tempSmallTexts.length == limits.arrayLimit) {
              this.transformBatchTextsResponse(tempSmallTexts, targetLanguages, result);
              tempSmallTexts = [];
          }
      }
      if (tempSmallTexts.length > 0) {
          this.transformBatchTextsResponse(tempSmallTexts, targetLanguages, result);
      }
  },

  splitInputTextsIntoBatches: function(texts, limits, targetLanguages) {
      var result = [];
      var bytesData = this.getBytesData(texts, limits);
      var classifiedData = this.classifyBulkTexts(texts, limits.charLimit, bytesData);
      if (classifiedData["isBatchingRequired"]) {
          var splitTexts = this.getBatchTexts(classifiedData.smallTexts, bytesData, limits);
          for (var idx = 0; idx < splitTexts.length; idx++) {
              var selectedTexts = splitTexts[idx];
              this.transformBatchTextsResponse(selectedTexts, targetLanguages, result);
          }
      } else {
          this.addSmallTextsToArray(classifiedData.smallTexts, result, limits, targetLanguages);
      }
      this.addLargeTextsToArray(result, classifiedData.largeTexts, targetLanguages);
      return result;
  },

  batchBulkTexts: function(texts, targetLanguages) {
      var limits = this.getLimits();
      return this.splitInputTextsIntoBatches(texts, limits, targetLanguages);
  },

  _getProcessedTextResult: function(textResult) {
      var processedTextResult = {};
      var textResultKeys = Object.keys(textResult);
      for (var keyIdx = 0; keyIdx < textResultKeys.length; keyIdx++) {
          var key = textResultKeys[keyIdx];
          if (key == 'text_translations') {
              var textTranslations = [];
              var languages = Object.keys(textResult.text_translations);
              for (var idx = 0; idx < languages.length; idx++) {
                  var language = languages[idx];
                  var textTranslation = {
                      'translated_text': textResult.text_translations[language],
                      'target_language': language
                  };
                  textTranslations.push(textTranslation);
              }
              processedTextResult.text_translations = textTranslations;
          } else {
              processedTextResult[key] = textResult[key];
          }
      }
      return processedTextResult;
  },

  rearrangeJSONResult: function(texts, result, isTranslation) {
      var response = {
          'status': 'Success'
      };
      var rearrangedResponse = [];
      for (var i = 0; i < texts.length; i++) {
          var eachTextResult = result[texts[i]];
          if ('Error' === eachTextResult.status) {
              response['status'] = 'Error';
          }
          var processedTextResult = this._getProcessedTextResult(eachTextResult);
          rearrangedResponse.push(processedTextResult);
      }
      if (isTranslation)
          response['translations'] = rearrangedResponse;
      else
          response['detections'] = rearrangedResponse;
      return response;
  },

  type: 'IBMTranslationBatchingUtil'
};

Sys ID

2a20abf45302101026b0ddeeff7b12fc

Offical Documentation

Official Docs: