Name
global.CancelMLSolution
Description
Utility Class to Cancel ML Update Job and store statistics of stuck update job configurations
Script
var CancelMLSolution = Class.create();
CancelMLSolution.prototype = {
initialize: function() {
this.smMLTableUtils = new SMMLTableUtils();
this.afpLogger = new ApplicationsFingerprintLogger('CancelMLSolution');
this.afpOperations = new ApplicationsFingerprintOperations();
this.retryableUpdateStats = ["Waiting for Updating", "Updating Request Received", "Fetching Files for Updating", "Updating Solution", "Preparing Data", "Uploading Updated Solution", "Unauthorized", "Configuration OR Network Error"];
this.isStatsTablePresent = gs.tableExists('sa_ml_solution_failure_stats');
//We do not want to create load on the system
var sampleRateValues = gs.getProperty('sa_ml.sample_rate_values', '4,9');
this.sampleRateArr = sampleRateValues.split(',');
this.lastSampleRateValue = this.sampleRateArr[this.sampleRateArr.length - 1];
},
checkUpdateStateAndCancelOnError: function(solutionHashKey, solutionStore, forceRetry) {
var solutionNameGr = this.smMLTableUtils.getSaHashGR(solutionHashKey);
if (solutionNameGr) {
var activeSolution = this.smMLTableUtils.getActiveSolution(solutionNameGr.getValue("hash"), solutionStore);
if (activeSolution) {
var updateStatus = activeSolution.getUpdateStatus();
var solutionName = solutionNameGr.getValue("hash");
var solutionSysID = this.smMLTableUtils.getSolutionProperty(activeSolution, solutionName, "sys_id");
var lastUpdateGdt = this.smMLTableUtils.getSolutionUpdateTime(solutionSysID);
var updateLog = this.afpLogger.buildLog(solutionName, solutionSysID, ApplicationsFingerprintOperations.UPDATE, 'Solution Update Time ' + lastUpdateGdt);
this.afpLogger.info(updateLog);
//In case there is user unvolve, and there is need to force the retry, so perform force retry.
if (forceRetry) {
var isForceCanceled = this.cancelUpdateJob(activeSolution, solutionNameGr, solutionHashKey, updateStatus);
return isForceCanceled;
}
if (JSUtil.contains(this.retryableUpdateStats, updateStatus)) {
//In case the job stuck for a long time, try to release it if enough time passed.
if (this.isJobStuck(solutionHashKey, solutionName, solutionSysID)) {
//just in case the retry feature enabled, perform another retry.
if (GlideProperties.getBoolean("sa_ml.retrigger.enabled", true)) {
//in case the solution did not update for a long time, try to create it again.
var timePeriod = GlideProperties.getInt('sa_ml.time_period_in_days', 14);
var gdtNow = new GlideDateTime();
lastUpdateGdt.addDays(timePeriod);
if (gdtNow.after(lastUpdateGdt)) {
updateLog = this.afpLogger.buildLog(solutionName, solutionSysID, ApplicationsFingerprintOperations.UPDATE, 'Solution trigger because the solution update passed ' + timePeriod + ' days');
this.afpLogger.info(updateLog);
this.afpOperations.deleteStuckHash(solutionName, solutionSysID, solutionHashKey);
var isTimeCanceled = this.cancelUpdateJob(activeSolution);
return isTimeCanceled;
}
}
return false;
}
// check no of retries for particular update configuration
var retryCount = this.getRetryCount(activeSolution, solutionNameGr);
if (parseInt(retryCount) > GlideProperties.getInt('sa_ml.cancel.update.ml.job.max.retry.count', this.lastSampleRateValue)) {
updateLog = this.afpLogger.buildLog(solutionName, solutionSysID, ApplicationsFingerprintOperations.ERROR, "Maximum retry count exceeded for solution: " + solutionNameGr.getValue("hash") + " Please contact Support for Additional Assistance");
this.afpLogger.error(updateLog);
this.smMLTableUtils.updateSaHash("cancel_retry_stuck_" + solutionHashKey, true);
return false;
}
this.smMLTableUtils.deleteSaHash("cancel_retry_stuck_" + solutionHashKey);
var retryCountStr = retryCount + '';
//do not try to update solution for each retry, just in the retry number exists in the sampleRate array.
if (this.sampleRateArr.indexOf(retryCountStr) == -1) {
gs.info("CancelMLSolution : Ignore from running this for the retry count: " + retryCount);
this.updateStats(activeSolution, solutionNameGr, updateStatus);
return false;
}
//if we got here, it means the job stuck, and the retry is in the sample array. if it so, there is need to perform retry.
var isCanceled = this.cancelJobAndRetry(activeSolution, solutionNameGr, solutionHashKey, updateStatus);
return isCanceled;
}
updateLog = this.afpLogger.buildLog(solutionName, solutionSysID, ApplicationsFingerprintOperations.UPDATE, "CancelMLSolution : Solution " + solutionNameGr.getValue("hash") + " Update State is not in error state");
this.afpLogger.info(updateLog);
} else {
var noActiveSolutionLog = this.afpLogger.buildLog(solutionName, solutionSysID, ApplicationsFingerprintOperations.UPDATE, "CancelMLSolution : Solution " + solutionNameGr.getValue("hash") + " active solution not found no need to cancel");
this.afpLogger.info(noActiveSolutionLog);
//clean the hash of the stuck solution
this.afpOperations.deleteStuckHash(solutionName, solutionSysID, solutionHashKey);
}
} else {
var noCancelLog = this.afpLogger.buildLog(solutionName, solutionSysID, ApplicationsFingerprintOperations.UPDATE, "CancelMLSolution : Solution Hash Does Not Exists No need to Cancel");
this.afpLogger.info(noCancelLog);
}
return true;
},
cancelJobAndRetry: function(activeSolution, solutionNameGr, solutionHashKey, updateStatus) {
var response = this.cancelUpdateJob(activeSolution);
if (!response) {
gs.error("CancelMLSolution : Cancellation of Update Job for solution " + solutionNameGr.getValue("hash") + " failed");
return false;
} else {
gs.debug("CancelMLSolution : Cancellation of Update Job for solution " + solutionNameGr.getValue("hash") + " Successful");
this.smMLTableUtils.deleteSaHash("first_error_time_" + solutionHashKey);
this.updateStats(activeSolution, solutionNameGr, updateStatus);
return true;
}
},
// invoke ml api to cancel update job.
cancelUpdateJob: function(activeSolution) {
try {
activeSolution.cancelUpdateJob();
} catch (ex) {
gs.error('Exception caught: ' + ex.getMessage());
return false;
}
return true;
},
// get retry count for current update configuration.
getRetryCount: function(activeSolution, solutionNameGr) {
var retryCount = 0;
var solutionHashKey = solutionNameGr.getValue("name");
var solutionName = solutionNameGr.getValue("hash");
var solution = this.smMLTableUtils.getSolutionProperty(activeSolution, solutionName, "sys_id");
var updateConfiguration = this.smMLTableUtils.getSolutionProperty(activeSolution, solutionName, "update_config");
if (updateConfiguration != null) {
var filter = JSON.parse(updateConfiguration).filter;
var currentChecksum = this.getCheckSum(filter);
if (this.isStatsTablePresent) {
// if table is present get the retry count from the table
var gr = new GlideRecord("sa_ml_solution_failure_stats");
gr.addQuery("checksum", currentChecksum);
gr.addQuery("solution", solution);
gr.query();
retryCount = parseInt(gr.getRowCount());
} else {
// update the retry count for the update configuration to the hash.
var key = solutionHashKey + "_retry_checksum";
var hashGr = this.smMLTableUtils.getSaHashGR(key);
retryCount = (hashGr == null || (hashGr.getValue("hash").split("_")[0] != currentChecksum) ? 0 : parseInt(hashGr.getValue("hash").split("_")[1]));
}
var retryLog = this.afpLogger.buildLog(solutionName, solution, ApplicationsFingerprintOperations.UPDATE, 'Existing retires for update configuration " + updateConfiguration + " is " + retryCount');
retryLog['retries'] = retryCount;
this.afpLogger.info(retryLog);
}
return retryCount;
},
// update retry count statistics for ml solution.
updateStats: function(activeSolution, solutionNameGr, updateStatus) {
var solutionName = solutionNameGr.getValue("hash");
var updateConfiguration = this.smMLTableUtils.getSolutionProperty(activeSolution, solutionName, "update_config");
if (updateConfiguration != null) {
this.updateRetryCount(activeSolution, updateConfiguration, solutionNameGr, updateStatus);
}
},
updateRetryCount: function(activeSolution, updateConfiguration, solutionNameGr, updateStatus) {
var filter = JSON.parse(updateConfiguration).filter;
var currentChecksum = this.getCheckSum(filter);
var solutionHashKey = solutionNameGr.getValue("name");
var solutionName = solutionNameGr.getValue("hash");
var solutionId = this.smMLTableUtils.getSolutionProperty(activeSolution, solutionName, "sys_id");
if (this.isStatsTablePresent) {
// insert update configuration than is stuck to sa_ml_solution_failure_stats table
var insertGr = new GlideRecord("sa_ml_solution_failure_stats");
insertGr.setValue("checksum", currentChecksum);
insertGr.setValue("update_config", updateConfiguration);
insertGr.setValue("stuck_update_state", updateStatus);
insertGr.setValue("solution", solutionId);
insertGr.insert();
} else {
// inserting or updating checksum of update configuration with retry count in hash as table does not exists.
var key = solutionHashKey + "_retry_checksum";
var hashGr = this.smMLTableUtils.getSaHashGR(key);
var retryCount = (hashGr == null || (hashGr.getValue("hash").split("_")[0] != currentChecksum)) ? 0 : hashGr.getValue("hash").split("_")[1];
var value = currentChecksum + "_" + (parseInt(retryCount) + 1);
this.smMLTableUtils.updateSaHash(key, value);
}
// updating the hash with total retires till date for solution.
var totalFailuresKey = solutionHashKey + "_" + "total_retries";
var totalFailuresHash = this.smMLTableUtils.getSaHashGR(totalFailuresKey);
this.smMLTableUtils.updateSaHash(totalFailuresKey, (totalFailuresHash == null) ? 1 : (parseInt(totalFailuresHash.getValue("hash")) + 1));
},
getCheckSum: function(value) {
return (new GlideChecksum(value)).getMD5();
},
isJobStuck: function(solutionHashKey, solutionName, solutionSysID) {
var stuckHash = this.smMLTableUtils.getSaHashGR("cancel_retry_stuck_" + solutionHashKey);
if (stuckHash) {
var stuckLog = this.afpLogger.buildLog(solutionName, solutionSysID, ApplicationsFingerprintOperations.UPDATE, 'AFP Training Job is Stuck');
this.afpLogger.info(stuckLog);
return stuckHash.getValue("hash");
}
return false;
},
type: 'CancelMLSolution'
};
Sys ID
1ad4d256c3e8b01039fc14cb3c40ddc0