Name

sn_ms_trans_spoke.MicrosoftTranslationBatchingUtil

Description

Bulk texts handling methods related to Microsoft Translation actions

Script

var MicrosoftTranslationBatchingUtil = Class.create();
MicrosoftTranslationBatchingUtil.prototype = {
  initialize: function() {},

  getTranslationLimits: function() {
      return {
          'textLimit': parseInt(gs.getProperty('glide.dynamic.translation.microsoft.translate.array.element.limit', 10000)),
          'charLimit': parseInt(gs.getProperty('glide.dynamic.translation.microsoft.translate.request.limit', 10000)),
          'arrayLimit': parseInt(gs.getProperty('glide.dynamic.translation.microsoft.translate.array.limit', 100))
      };
  },

  getDetectionLimits: function() {
      return {
          'textLimit': parseInt(gs.getProperty('glide.dynamic.translation.microsoft.detect.array.element.limit', 50000)),
          'charLimit': parseInt(gs.getProperty('glide.dynamic.translation.microsoft.detect.request.limit', 50000)),
          'arrayLimit': parseInt(gs.getProperty('glide.dynamic.translation.microsoft.detect.array.limit', 100))
      };
  },

  classifyBulkTexts: function(texts, charLimit) {
      var count = 0;
      var classifiedData = {};
      classifiedData["smallTexts"] = [];
      classifiedData["largeTexts"] = [];
      classifiedData["isBatchingRequired"] = false;
      for (var i = 0; i < texts.length; i++) {
          if (texts[i].length > charLimit) {
              classifiedData["largeTexts"].push(texts[i]);
          } else {
              classifiedData["smallTexts"].push(texts[i]);
              count = count + texts[i].length;
              if (count > charLimit) {
                  classifiedData["isBatchingRequired"] = true;
              }
          }
      }
      return classifiedData;
  },

  addLargeTextsToArray: function(isTranslation, result, largeTexts, charLimit, targetLanguages, targetLanguageLimits) {
      for (var large = 0; large < largeTexts.length; large++) {
          if (isTranslation)
              this.transformBatchTextsResponse([largeTexts[large]], largeTexts[large].length, targetLanguages, charLimit, targetLanguageLimits, result);
          else
              result.push({
                  "texts_to_detect": [largeTexts[large]]
              });
      }
  },

  getBatchTexts: function(texts, limits) {
      var bytesList = [];
      for (var i = 0; i < texts.length; i++) {
          bytesList.push([texts[i].length, texts[i]]);
      }
      bytesList.sort();
      var weight = limits.charLimit;
      var splitTexts = [];
      var startIdx = 0;
      var arrayLength = bytesList.length;
      var endIdx = arrayLength - 1;
      var textsProcessed = 0;
      while (textsProcessed < arrayLength) {
          var singleSplit = [];
          var tempWeight = 0;
          while (singleSplit.length != limits.arrayLimit &&
              endIdx >= 0 &&
              startIdx <= endIdx &&
              weight >= (tempWeight + bytesList[endIdx][0])) {
              tempWeight += bytesList[endIdx][0];
              singleSplit.push(bytesList[endIdx][1]);
              endIdx -= 1;
              textsProcessed += 1;
          }
          while (singleSplit.length != limits.arrayLimit &&
              startIdx < arrayLength &&
              startIdx <= endIdx &&
              weight >= (tempWeight + bytesList[startIdx][0])) {
              tempWeight += bytesList[startIdx][0];
              singleSplit.push(bytesList[startIdx][1]);
              startIdx += 1;
              textsProcessed += 1;
          }
          splitTexts.push(singleSplit);
      }
      return splitTexts;
  },

  buildTargetLanguageLimits: function(targetLanguages, charLimit) {
      var limits = [];
      for (var i = 1; i <= targetLanguages.length; i++) {
          limits.push(charLimit / i);
      }
      return limits;
  },

  getMaxTargetLanguagesPerBatch: function(targetLanguageLimits, charCount) {
      var index;
      var left = 0,
          right = targetLanguageLimits.length - 1;
      while (left <= right) {
          var mid = Math.ceil((left + right) / 2);
          if (charCount > targetLanguageLimits[mid]) {
              right = mid - 1;
          } else {
              index = mid;
              left = mid + 1;
          }
      }
      return index + 1;
  },

  splitTargetLanguagesIntoBatches: function(texts, charCount, targetLanguages, targetLanguageLimits, result) {
      var maxTargetLanguagesPerBatch = this.getMaxTargetLanguagesPerBatch(targetLanguageLimits, charCount);
      var tempTargetLanguages = [];
      for (var i = 0; i < targetLanguages.length; i++) {
          tempTargetLanguages.push(targetLanguages[i]);
          if (tempTargetLanguages.length == maxTargetLanguagesPerBatch) {
              result.push({
                  "texts_to_translate": texts,
                  "target_languages": tempTargetLanguages
              });
              tempTargetLanguages = [];
          }
      }
      if (tempTargetLanguages.length > 0) {
          result.push({
              "texts_to_translate": texts,
              "target_languages": tempTargetLanguages
          });
      }
  },

  transformBatchTextsResponse: function(texts, charCount, targetLanguages, charLimit, targetLanguageLimits, result) {
      if (charCount > charLimit) {
          result.push({
              "texts_to_translate": texts,
              "target_languages": targetLanguages
          });
      } else {
          var limit = charLimit / targetLanguages.length;
          if (charCount > limit) {
              this.splitTargetLanguagesIntoBatches(texts, charCount, targetLanguages, targetLanguageLimits, result);
          } else {
              result.push({
                  "texts_to_translate": texts,
                  "target_languages": targetLanguages
              });
          }
      }
  },

  processSelectedTexts: function(isTranslation, selectedTexts, result, limits, targetLanguages, targetLanguageLimits) {
      var tempSelectedTexts = [],
          charCount = 0;
      for (var idx = 0; idx < selectedTexts.length; idx++) {
          tempSelectedTexts.push(selectedTexts[idx]);
          charCount += selectedTexts[idx].length;
          if (tempSelectedTexts.length == limits.arrayLimit) {
              if (isTranslation)
                  this.transformBatchTextsResponse(tempSelectedTexts, charCount, targetLanguages, limits.charLimit, targetLanguageLimits, result);
              else
                  result.push({
                      "texts_to_detect": tempSelectedTexts
                  });
              tempSelectedTexts = [];
              charCount = 0;
          }
      }
      if (tempSelectedTexts.length > 0) {
          if (isTranslation)
              this.transformBatchTextsResponse(tempSelectedTexts, charCount, targetLanguages, limits.charLimit, targetLanguageLimits, result);
          else
              result.push({
                  "texts_to_detect": tempSelectedTexts
              });
      }
  },

  addSmallTextsToArray: function(isTranslation, smallTexts, result, limits, targetLanguages, targetLanguageLimits) {
      var tempSmallTexts = [],
          charCount = 0;
      for (var i = 0; i < smallTexts.length; i++) {
          tempSmallTexts.push(smallTexts[i]);
          charCount += smallTexts[i].length;
          if (tempSmallTexts.length == limits.arrayLimit) {
              if (isTranslation)
                  this.transformBatchTextsResponse(tempSmallTexts, charCount, targetLanguages, limits.charLimit, targetLanguageLimits, result);
              else
                  result.push({
                      "texts_to_detect": tempSmallTexts
                  });
              tempSmallTexts = [];
              charCount = 0;
          }
      }
      if (tempSmallTexts.length > 0) {
          if (isTranslation)
              this.transformBatchTextsResponse(tempSmallTexts, charCount, targetLanguages, limits.charLimit, targetLanguageLimits, result);
          else
              result.push({
                  "texts_to_detect": tempSmallTexts
              });
      }
  },

  splitInputTextsIntoBatches: function(isTranslation, texts, limits, targetLanguages, targetLanguageLimits) {
      var result = [];
      var classifiedData = this.classifyBulkTexts(texts, limits.textLimit);
      if (classifiedData["isBatchingRequired"]) {
          var splitTexts = this.getBatchTexts(classifiedData.smallTexts, limits);
          for (var idx = 0; idx < splitTexts.length; idx++) {
              var selectedTexts = splitTexts[idx];
              this.processSelectedTexts(isTranslation, selectedTexts, result, limits, targetLanguages, targetLanguageLimits);
          }
      } else {
          this.addSmallTextsToArray(isTranslation, classifiedData.smallTexts, result, limits, targetLanguages, targetLanguageLimits);
      }
      this.addLargeTextsToArray(isTranslation, result, classifiedData.largeTexts, limits.charLimit, targetLanguages, targetLanguageLimits);
      return result;
  },

  batchBulkTexts: function(texts, targetLanguages) {
      var limits = this.getTranslationLimits();
      var targetLanguageLimits = this.buildTargetLanguageLimits(targetLanguages, limits.charLimit);
      return this.splitInputTextsIntoBatches(true, texts, limits, targetLanguages, targetLanguageLimits);
  },

  batchBulkDetectTexts: function(texts) {
      return this.splitInputTextsIntoBatches(false, texts, this.getDetectionLimits(), [], []);
  },

  _getProcessedTextResult: function(textResult) {
      var processedTextResult = {};
      var textResultKeys = Object.keys(textResult);
      for (var keyIdx = 0; keyIdx < textResultKeys.length; keyIdx++) {
          var key = textResultKeys[keyIdx];
          if (key == 'text_translations') {
              var textTranslations = [];
              var languages = Object.keys(textResult.text_translations);
              for (var idx = 0; idx < languages.length; idx++) {
                  var language = languages[idx];
                  var textTranslation = {
                      'translated_text': textResult.text_translations[language],
                      'target_language': language
                  };
                  textTranslations.push(textTranslation);
              }
              processedTextResult.text_translations = textTranslations;
          } else {
              processedTextResult[key] = textResult[key];
          }
      }
      return processedTextResult;
  },

  rearrangeJSONResult: function(texts, result, isTranslation) {
      var response = {
          'status': 'Success'
      };
      var rearrangedResponse = [];
      for (var i = 0; i < texts.length; i++) {
          var eachTextResult = result[texts[i]];
          if ('Error' === eachTextResult.status) {
              response['status'] = 'Error';
          }
          var processedTextResult = this._getProcessedTextResult(eachTextResult);
          rearrangedResponse.push(processedTextResult);
      }
      if (isTranslation)
          response['translations'] = rearrangedResponse;
      else
          response['detections'] = rearrangedResponse;
      return response;
  },

  type: 'MicrosoftTranslationBatchingUtil'
};

Sys ID

854bdb455302101026b0ddeeff7b124f

Offical Documentation

Official Docs: