Name

global.CancelMLSolution

Description

Utility Class to Cancel ML Update Job and store statistics of stuck update job configurations

Script

var CancelMLSolution = Class.create();
CancelMLSolution.prototype = {
  initialize: function() {

      this.smMLTableUtils = new SMMLTableUtils();
      this.afpLogger = new ApplicationsFingerprintLogger('CancelMLSolution');
      this.afpOperations = new ApplicationsFingerprintOperations();
      this.retryableUpdateStats = ["Waiting for Updating", "Updating Request Received", "Fetching Files for Updating", "Updating Solution", "Preparing Data", "Uploading Updated Solution", "Unauthorized", "Configuration OR Network Error"];
      this.isStatsTablePresent = gs.tableExists('sa_ml_solution_failure_stats');

      //We do not want to create load on the system
  	var sampleRateValues = gs.getProperty('sa_ml.sample_rate_values', '4,9');
      this.sampleRateArr = sampleRateValues.split(',');

      this.lastSampleRateValue = this.sampleRateArr[this.sampleRateArr.length - 1];
  },

  checkUpdateStateAndCancelOnError: function(solutionHashKey, solutionStore, forceRetry) {

      var solutionNameGr = this.smMLTableUtils.getSaHashGR(solutionHashKey);
      if (solutionNameGr) {
          var activeSolution = this.smMLTableUtils.getActiveSolution(solutionNameGr.getValue("hash"), solutionStore);
          if (activeSolution) {
              var updateStatus = activeSolution.getUpdateStatus();
              var solutionName = solutionNameGr.getValue("hash");
              var solutionSysID = this.smMLTableUtils.getSolutionProperty(activeSolution, solutionName, "sys_id");
              var lastUpdateGdt = this.smMLTableUtils.getSolutionUpdateTime(solutionSysID);
              var updateLog = this.afpLogger.buildLog(solutionName, solutionSysID, ApplicationsFingerprintOperations.UPDATE, 'Solution Update Time ' + lastUpdateGdt);
              this.afpLogger.info(updateLog);

              //In case there is user unvolve, and there is need to force the retry, so perform force retry.
              if (forceRetry) {
                  var isForceCanceled = this.cancelUpdateJob(activeSolution, solutionNameGr, solutionHashKey, updateStatus);
                  return isForceCanceled;
              }
  			
              if (JSUtil.contains(this.retryableUpdateStats, updateStatus)) {

                  //In case the job stuck for a long time, try to release it if enough time passed.
                  if (this.isJobStuck(solutionHashKey, solutionName, solutionSysID)) {
                      //just in case the retry feature enabled, perform another retry.
                      if (GlideProperties.getBoolean("sa_ml.retrigger.enabled", true)) {

                          //in case the solution did not update for a long time, try to create it again.
                          var timePeriod = GlideProperties.getInt('sa_ml.time_period_in_days', 14);
                          var gdtNow = new GlideDateTime();
                          lastUpdateGdt.addDays(timePeriod);
                          if (gdtNow.after(lastUpdateGdt)) {
                              updateLog = this.afpLogger.buildLog(solutionName, solutionSysID, ApplicationsFingerprintOperations.UPDATE, 'Solution trigger because the solution update passed  ' + timePeriod + ' days');
                              this.afpLogger.info(updateLog);
                              this.afpOperations.deleteStuckHash(solutionName, solutionSysID, solutionHashKey);
                              var isTimeCanceled = this.cancelUpdateJob(activeSolution);
                              return isTimeCanceled;
                          }
                      }
                      return false;
                  }

                  // check no of retries for particular update configuration
                  var retryCount = this.getRetryCount(activeSolution, solutionNameGr);
  				
  				if (parseInt(retryCount) > GlideProperties.getInt('sa_ml.cancel.update.ml.job.max.retry.count', this.lastSampleRateValue)) {
                      updateLog = this.afpLogger.buildLog(solutionName, solutionSysID, ApplicationsFingerprintOperations.ERROR, "Maximum retry count exceeded for solution: " + solutionNameGr.getValue("hash") + " Please contact Support for Additional Assistance");
                      this.afpLogger.error(updateLog);
                      this.smMLTableUtils.updateSaHash("cancel_retry_stuck_" + solutionHashKey, true);
                      return false;
  				}

                  this.smMLTableUtils.deleteSaHash("cancel_retry_stuck_" + solutionHashKey);
  				
  				var retryCountStr = retryCount + '';
                  //do not try to update solution for each retry, just in the retry number exists in the sampleRate array.
                  if (this.sampleRateArr.indexOf(retryCountStr) == -1) {
                      gs.info("CancelMLSolution : Ignore from running this for the retry count: " + retryCount);
                      this.updateStats(activeSolution, solutionNameGr, updateStatus);
                      return false;
                  }

                  //if we got here, it means the job stuck, and the retry is in the sample array. if it so, there is need to perform retry.
                  var isCanceled = this.cancelJobAndRetry(activeSolution, solutionNameGr, solutionHashKey, updateStatus);
                  return isCanceled;
              }
              updateLog = this.afpLogger.buildLog(solutionName, solutionSysID, ApplicationsFingerprintOperations.UPDATE, "CancelMLSolution : Solution " + solutionNameGr.getValue("hash") + " Update State is not in error state");
              this.afpLogger.info(updateLog);
          } else {
              var noActiveSolutionLog = this.afpLogger.buildLog(solutionName, solutionSysID, ApplicationsFingerprintOperations.UPDATE, "CancelMLSolution : Solution " + solutionNameGr.getValue("hash") + " active solution not found no need to cancel");
              this.afpLogger.info(noActiveSolutionLog);
  			//clean the hash of the stuck solution
  			this.afpOperations.deleteStuckHash(solutionName, solutionSysID, solutionHashKey);
          }
      } else {
          var noCancelLog = this.afpLogger.buildLog(solutionName, solutionSysID, ApplicationsFingerprintOperations.UPDATE, "CancelMLSolution : Solution Hash Does Not Exists No need to Cancel");
          this.afpLogger.info(noCancelLog);
      }
      
      return true;

  },

  cancelJobAndRetry: function(activeSolution, solutionNameGr, solutionHashKey, updateStatus) {
      var response = this.cancelUpdateJob(activeSolution);
      if (!response) {
          gs.error("CancelMLSolution : Cancellation of Update Job for solution " + solutionNameGr.getValue("hash") + " failed");
          return false;
      } else {
          gs.debug("CancelMLSolution : Cancellation of Update Job for solution " + solutionNameGr.getValue("hash") + " Successful");
          this.smMLTableUtils.deleteSaHash("first_error_time_" + solutionHashKey);
          this.updateStats(activeSolution, solutionNameGr, updateStatus);
          return true;
      }
  },

  // invoke ml api to cancel update job.
  cancelUpdateJob: function(activeSolution) {
      try {
          activeSolution.cancelUpdateJob();
      } catch (ex) {
          gs.error('Exception caught: ' + ex.getMessage());
          return false;
      }
      return true;
  },

  // get retry count for current update configuration.
  getRetryCount: function(activeSolution, solutionNameGr) {

      var retryCount = 0;
      var solutionHashKey = solutionNameGr.getValue("name");
      var solutionName = solutionNameGr.getValue("hash");
      var solution = this.smMLTableUtils.getSolutionProperty(activeSolution, solutionName, "sys_id");
      var updateConfiguration = this.smMLTableUtils.getSolutionProperty(activeSolution, solutionName, "update_config");
      if (updateConfiguration != null) {
          var filter = JSON.parse(updateConfiguration).filter;
          var currentChecksum = this.getCheckSum(filter);
          if (this.isStatsTablePresent) {

              // if table is present get the retry count from the table
              var gr = new GlideRecord("sa_ml_solution_failure_stats");
              gr.addQuery("checksum", currentChecksum);
              gr.addQuery("solution", solution);
              gr.query();
              retryCount = parseInt(gr.getRowCount());

          } else {

              // update the retry count for the update configuration to the hash.
              var key = solutionHashKey + "_retry_checksum";
              var hashGr = this.smMLTableUtils.getSaHashGR(key);
              retryCount = (hashGr == null || (hashGr.getValue("hash").split("_")[0] != currentChecksum) ? 0 : parseInt(hashGr.getValue("hash").split("_")[1]));

          }
          var retryLog = this.afpLogger.buildLog(solutionName, solution, ApplicationsFingerprintOperations.UPDATE, 'Existing retires for update configuration " + updateConfiguration + " is " + retryCount');
          retryLog['retries'] = retryCount;
          this.afpLogger.info(retryLog);
      }
      return retryCount;
  },

  // update retry count statistics for ml solution.
  updateStats: function(activeSolution, solutionNameGr, updateStatus) {

      var solutionName = solutionNameGr.getValue("hash");
      var updateConfiguration = this.smMLTableUtils.getSolutionProperty(activeSolution, solutionName, "update_config");
      if (updateConfiguration != null) {
          this.updateRetryCount(activeSolution, updateConfiguration, solutionNameGr, updateStatus);
      }

  },

  updateRetryCount: function(activeSolution, updateConfiguration, solutionNameGr, updateStatus) {

      var filter = JSON.parse(updateConfiguration).filter;
      var currentChecksum = this.getCheckSum(filter);
      var solutionHashKey = solutionNameGr.getValue("name");
      var solutionName = solutionNameGr.getValue("hash");
      var solutionId = this.smMLTableUtils.getSolutionProperty(activeSolution, solutionName, "sys_id");
      if (this.isStatsTablePresent) {

          // insert update configuration than is stuck to sa_ml_solution_failure_stats table
          var insertGr = new GlideRecord("sa_ml_solution_failure_stats");
          insertGr.setValue("checksum", currentChecksum);
          insertGr.setValue("update_config", updateConfiguration);
          insertGr.setValue("stuck_update_state", updateStatus);
          insertGr.setValue("solution", solutionId);
          insertGr.insert();

      } else {

          // inserting or updating checksum of update configuration with retry count in hash as table does not exists.
          var key = solutionHashKey + "_retry_checksum";
          var hashGr = this.smMLTableUtils.getSaHashGR(key);
          var retryCount = (hashGr == null || (hashGr.getValue("hash").split("_")[0] != currentChecksum)) ? 0 : hashGr.getValue("hash").split("_")[1];
          var value = currentChecksum + "_" + (parseInt(retryCount) + 1);
          this.smMLTableUtils.updateSaHash(key, value);

      }

      // updating the hash with total retires till date for solution.
      var totalFailuresKey = solutionHashKey + "_" + "total_retries";
      var totalFailuresHash = this.smMLTableUtils.getSaHashGR(totalFailuresKey);
      this.smMLTableUtils.updateSaHash(totalFailuresKey, (totalFailuresHash == null) ? 1 : (parseInt(totalFailuresHash.getValue("hash")) + 1));
  },

  getCheckSum: function(value) {
      return (new GlideChecksum(value)).getMD5();
  },

  isJobStuck: function(solutionHashKey, solutionName, solutionSysID) {
      var stuckHash = this.smMLTableUtils.getSaHashGR("cancel_retry_stuck_" + solutionHashKey);
      if (stuckHash) {
          var stuckLog = this.afpLogger.buildLog(solutionName, solutionSysID, ApplicationsFingerprintOperations.UPDATE, 'AFP Training Job is Stuck');
          this.afpLogger.info(stuckLog);
          return stuckHash.getValue("hash");
      }
      return false;
  },

  type: 'CancelMLSolution'
};

Sys ID

1ad4d256c3e8b01039fc14cb3c40ddc0

Offical Documentation

Official Docs: