Name
global.MlValidationHelper
Description
Helper script include to validate solution definition before Training
Script
var MlValidationHelper = Class.create();
MlValidationHelper.prototype = Object.extendsObject(AbstractAjaxProcessor, {
initialize: function() {
this.MLBaseConstants = global.MLBaseConstants;
this.LD = "668619e753220010d1dcddeeff7b125a";
this.minimum_neighbor = "32ddecd053733300d1dcddeeff7b12c2";
this.HDBSCAN = "0197031f533210106c61ddeeff7b1293";
},
//validates input fields, minimum and maximum number of records for training.
minmaxValidation: function(table, filter, capability) {
var validation = true;
var minNumber = 10000;
var maxNumber = 300000;
var errorMsg = "";
var gr = new GlideRecordSecure(table);
gr.addEncodedQuery(filter);
gr.query();
var inputSize = gr.getRowCount();
var trainerName = "";
var minNumberPropName = "";
switch (capability) {
case "similarity_trainer":
minNumber = gs.getProperty('glide.platform_ml.api.min_similarity_window_records', 10);
maxNumber = gs.getProperty('glide.platform_ml.api.max_similarity_window_records', 100000);
trainerName = gs.getMessage('Similarity');
minNumberPropName = "glide.platform_ml.api.min_similarity_window_records";
break;
case "workflow_similarity_trainer":
minNumber = gs.getProperty('glide.platform_ml.api.min_similarity_window_records', 10);
maxNumber = gs.getProperty('glide.platform_ml.api.max_similarity_window_records', 100000);
trainerName = gs.getMessage('Workflow Similarity');
minNumberPropName = "glide.platform_ml.api.min_similarity_window_records";
break;
case "clustering_trainer":
minNumber = gs.getProperty('glide.platform_ml.api.min_clustering_records', 100);
maxNumber = gs.getProperty('glide.platform_ml.api.max_clustering_records', 100000);
trainerName = gs.getMessage('Clustering');
minNumberPropName = "glide.platform_ml.api.min_clustering_records";
break;
case "workflow_clustering_trainer":
minNumber = gs.getProperty('glide.platform_ml.api.min_clustering_records', 100);
maxNumber = gs.getProperty('glide.platform_ml.api.max_clustering_records', 100000);
trainerName = gs.getMessage('Workflow Clustering');
minNumberPropName = "glide.platform_ml.api.min_clustering_records";
break;
case "classification_trainer":
minNumber = gs.getProperty('glide.platform_ml.api.csv_min_line', 10000);
maxNumber = gs.getProperty('glide.platform_ml.api.csv_max_line', 300000);
trainerName = gs.getMessage('Classification');
minNumberPropName = "glide.platform_ml.api.csv_min_line";
break;
case "regression_trainer":
minNumber = gs.getProperty('glide.platform_ml.api.min_regression_records', 10000);
maxNumber = gs.getProperty('glide.platform_ml.api.max_regression_records', 300000);
trainerName = gs.getMessage('Regression');
minNumberPropName = "glide.platform_ml.api.min_regression_records";
break;
case "workflow_classification_trainer":
minNumber = gs.getProperty('glide.platform_ml.api.csv_min_line', 10000);
maxNumber = gs.getProperty('glide.platform_ml.api.csv_max_line', 300000);
trainerName = gs.getMessage('Workflow Classification');
minNumberPropName = "glide.platform_ml.api.csv_min_line";
break;
case "workflow_regression_trainer":
minNumber = gs.getProperty('glide.platform_ml.api.min_regression_records', 10000);
maxNumber = gs.getProperty('glide.platform_ml.api.max_regression_records', 300000);
trainerName = gs.getMessage('Workflow Regression');
minNumberPropName = "glide.platform_ml.api.min_regression_records";
break;
default:
}
if (minNumber <= 0) {
minNumber = 1;
}
if (inputSize < minNumber) {
errorMsg += gs.getMessage("Recommended setting for solution training requires minimum {0} records. Please use the system property {1} to train with a smaller number. ", [minNumber.toString(), minNumberPropName]);
validation = false;
}
//DEF0096884 : If inputSize is more than max row count, training should happen with latest 'maxNumber' records.
//if (inputSize > maxNumber) {
// errorMsg = gs.getMessage("Select a maximum of {0} records in Step 3 for {1} training. Train the solution after you update the filters in step 3. ", [maxNumber.toString(), trainerName]);
// validation = false;
//}
return {
'validation': validation,
'errorMsg': errorMsg
};
},
minClusteringRecordsValidation: function(table, filter, capability, minRecords) {
var validation = true;
var errorMsg = "";
var gr = new GlideRecordSecure(table);
gr.addEncodedQuery(filter);
gr.query();
var inputSize = gr.getRowCount();
if (inputSize <= minRecords) {
errorMsg += gs.getMessage("Minimum number of records per cluster exceeds the total number of records.");
validation = false;
}
return {
'validation': validation,
'errorMsg': errorMsg
};
},
checkInputTypes: function(table, capability, addedInputFields) {
var validation = true;
var errorMsg = "";
var classificationFieldTypesName = "choice,string,html,translated_html,reference,integer,long,longint,decimal,numeric,float,boolean";
var classificationFieldTypesLabel = "Choice, String, HTML, Translated HTML, Reference, Integer, Long, Longint, Decimal, Numeric, Float, Boolean";
var regressionFieldTypesName = "choice,string,html,translated_html,reference,integer,float,longint,decimal";
var regressionFieldTypesLabel = "Choice, String, HTML, Translated HTML, Reference, Integer, Floating Point Number, Long, Decimal";
var tableRecord = new GlideRecord(table);
for (var field in addedInputFields) {
var element = tableRecord.getElement(addedInputFields[field]);
var descriptor = element.getED();
var type = descriptor.getInternalType();
var choiceType = sn_ml.MLRecordUtil.getFieldType(table, addedInputFields[field]);
//validating input field types for regression
if (capability == "regression_trainer") {
if (!regressionFieldTypesName.includes(type) && !regressionFieldTypesName.includes(choiceType)) {
validation = false;
errorMsg = gs.getMessage("Input Fields are only supported for {0} types", regressionFieldTypesLabel.toString());
break;
}
}
//validating input field types for classification
if (capability == "classification_trainer") {
if (!classificationFieldTypesName.includes(type) && !classificationFieldTypesName.includes(choiceType)) {
validation = false;
errorMsg = gs.getMessage("Input Fields are only supported for {0} types", classificationFieldTypesLabel.toString());
break;
}
}
}
return {
'validation': validation,
'errorMsg': errorMsg
};
},
targetSolThresholdValidation: function(threshold_val) {
var validation = true;
var errorMsg = "";
if (JSUtil.nil(threshold_val) || isNaN(threshold_val) || (threshold_val <= 0) || (threshold_val > 100)) {
validation = false;
errorMsg = gs.getMessage("Target Solution Coverage number should be greater than 0 and lesser than or equal to 100");
}
return {
'validation': validation,
'errorMsg': errorMsg
};
},
areClusteringFieldsMutuallyExclusive: function(current) {
var validation = true;
var errorMsg = "";
var fields_set = [];
var total_fields = 0;
addNewFields = function(item) {
if (fields_set.indexOf(item) === -1) {
fields_set.push(item);
}
};
//Add Segmentation field only if use group by is enabled
if (current.getValue("segmentation_field") && current.getValue("use_segmentation") == 1) {
var segmentation_fields = current.getValue("segmentation_field").split(",");
segmentation_fields.forEach(addNewFields);
total_fields += segmentation_fields.length;
}
//Add purity fields only if calculate purity is enabled
if (current.getValue("purity_fields") && current.getValue("cal_purity") == 1) {
var purity_fields = current.getValue("purity_fields").split(",");
purity_fields.forEach(addNewFields);
total_fields += purity_fields.length;
}
var fields = current.getValue("fields").split(",");
fields.forEach(addNewFields);
total_fields += fields.length;
if (fields_set.length < total_fields) {
validation = false;
errorMsg = gs.getMessage("Fields, Group By, Purity Fields should not have columns in common");
}
return {
'validation': validation,
'errorMsg': errorMsg
};
},
preTrainingValidation: function(capability, current) {
var validationObj = {
'validation': true,
'errorMsg': ""
};
//checking entitlements
validationObj = this.checkEntitlements(capability);
if (!validationObj.validation) {
return validationObj;
}
switch (capability) {
case "classification_trainer":
case "workflow_classification_trainer":
//Step 1: Verifying the input field types.
var addedInputFields = current.fields.split(',');
validationObj = this.checkInputTypes(current.getValue("table"), current.capability.getRefRecord().getValue("value"), addedInputFields);
if (!validationObj.validation) {
return validationObj;
}
//Step 2: min max record validation
validationObj = this.minmaxValidation(current.getValue("table"), current.getValue("filter"), current.capability.getRefRecord().getValue("value"), addedInputFields);
if (!validationObj.validation) {
return validationObj;
}
if (new MLGroupbyUtils().isGroupBy(current)) {
return {
'validation': false,
'errorMsg': "Please use APIs to train group-by solutions"
};
}
validationObj = this.missingEncoderWarningMessage(current);
if (!validationObj.validation) {
return validationObj;
}
break;
case "similarity_trainer":
case "workflow_similarity_trainer":
//Step 1 : min max record count validation
validationObj = this.minmaxValidation(current.getValue("table"), current.getValue("filter"), current.capability.getRefRecord().getValue("value"));
if (!validationObj.validation) {
return validationObj;
}
//Step 2: check if word corpus and details should not be empty
if ((gs.getProperty("glide.platform_ml.api.enable_workflow_similarity") == null) ||
(gs.getProperty("glide.platform_ml.api.enable_workflow_similarity") == "false")) {
validationObj = this.wordVectorValidation(current);
if (!validationObj.validation) {
return validationObj;
}
}
break;
case "clustering_trainer":
case "workflow_clustering_trainer":
//Step 1 : min max record count validation
validationObj = this.minmaxValidation(current.getValue("table"), current.getValue("filter"), current.capability.getRefRecord().getValue("value"));
if (!validationObj.validation) {
return validationObj;
}
//Step 2: check if word corpus and details should not be empty
//Paris Release - levenshtein distance Advanced parameter or WVC is present not both'
var solutionDefSysid = current.getUniqueValue();
var advParam = this.advSolParamRec(solutionDefSysid, this.LD);
if (!JSUtil.isEmpty(advParam)) {
if (JSUtil.nil(current.getValue("word_vector_corpus"))) {
var min_neighbors = this.advSolParamRec(solutionDefSysid, this.minimum_neighbor);
if (!JSUtil.isEmpty(min_neighbors) && min_neighbors.isValidRecord() && min_neighbors.getValue('user_inputs') == '1') {
return validationObj;
} else {
return {
'validation': false,
'errorMsg': " For Levenshtein Distance minimum neighbors needs to be 1"
};
}
} else {
return {
'validation': false,
'errorMsg': "Word Vector Corpus and Levenshtein Distance cannot be used together for clustering training"
};
}
}
// Check if fields, group by, purity fields are mutually exclusive.
validationObj = this.areClusteringFieldsMutuallyExclusive(current);
if (!validationObj.validation) {
return validationObj;
}
var hdbscanParam = this.advSolParamRec(solutionDefSysid, this.HDBSCAN);
if (!JSUtil.isEmpty(hdbscanParam)) {
return validationObj;
}
if ((gs.getProperty("glide.platform_ml.api.enable_workflow_clustering") == null) ||
(gs.getProperty("glide.platform_ml.api.enable_workflow_clustering") == "false")) {
validationObj = this.wordVectorValidation(current, ["tfidf"]);
if (!validationObj.validation) {
return validationObj;
}
}
validationObj = this.minClusteringRecordsValidation(current.getValue("table"), current.getValue("filter"), current.capability.getRefRecord().getValue("value"), current.getValue("min_records"));
if (!validationObj.validation) {
return validationObj;
}
break;
case "regression_trainer":
//Step 1: Verifying the input field types.
var addedInputFields = current.fields.split(',');
validationObj = this.checkInputTypes(current.getValue("table"), current.capability.getRefRecord().getValue("value"), addedInputFields);
if (!validationObj.validation) {
return validationObj;
}
//Step 2: min max record validation
validationObj = this.minmaxValidation(current.getValue("table"), current.getValue("filter"), current.capability.getRefRecord().getValue("value"), addedInputFields);
if (!validationObj.validation) {
return validationObj;
}
if (new MLGroupbyUtils().isGroupBy(current)) {
return {
'validation': false,
'errorMsg': "Please use APIs to train group-by solutions"
};
}
validationObj = this.missingEncoderWarningMessage(current);
if (!validationObj.validation) {
return validationObj;
}
break;
default:
return validationObj;
}
return validationObj;
},
wordVectorValidation: function(record, unsupported_corpus_list) {
var validation = true;
var errorMsg = "";
if (JSUtil.isEmpty(record)) {
validation = false;
errorMsg = gs.getMessage("Solution definition record is empty");
return {
'validation': validation,
'errorMsg': errorMsg
};
}
var wvc_gr = record.word_vector_corpus.getRefRecord();
if (!wvc_gr.isValidRecord()) {
validation = false;
errorMsg = gs.getMessage("Word Corpus for solution definition record is empty");
return {
'validation': validation,
'errorMsg': errorMsg
};
}
if (unsupported_corpus_list) {
if (unsupported_corpus_list.indexOf(wvc_gr.getValue('type')) > -1) {
return {
'validation': false,
'errorMsg': gs.getMessage("Unsupported Word Corpus specified: {0}", [wvc_gr.getDisplayValue('type')])
};
}
}
if (wvc_gr.getValue('type') == 'pretrained') {
return {
'validation': validation,
'errorMsg': errorMsg
};
}
var wvc_content_gr = new GlideRecordSecure(this.MLBaseConstants.ML_WVC_DETAILS);
wvc_content_gr.addQuery(this.MLBaseConstants.COL_WVC, wvc_gr.getUniqueValue());
wvc_content_gr.query();
if (wvc_content_gr.getRowCount() <= 0) {
validation = false;
errorMsg = gs.getMessage("Please add word corpus details for the word corpus used");
return {
'validation': validation,
'errorMsg': errorMsg
};
}
while (wvc_content_gr.next()) {
var hasOneRow = this.hasMinOneRow(wvc_content_gr.getValue("table"), wvc_content_gr.getValue("filter"));
if (!hasOneRow) {
validation = false;
errorMsg = gs.getMessage("'{0}' Word corpus content contains 0 records", wvc_content_gr.name);
return {
'validation': validation,
'errorMsg': errorMsg
};
}
}
return {
'validation': validation,
'errorMsg': errorMsg
};
},
missingEncoderWarningMessage: function(record) {
var validation = true;
var warningMsg = "";
var errorMsg = "";
var wvc_gr = record.word_vector_corpus;
var capability = current.capability.getRefRecord().getValue("value");
if (((capability == "classification_trainer" || capability == "workflow_classification_trainer") && gs.getProperty("glide.platform_ml.api.enable_workflow_classification") == "true") ||
((capability == "regression_trainer" || capability == "workflow_regression_trainer") && gs.getProperty("glide.platform_ml.api.enable_workflow_regression") == "true") ||
((capability == "clustering_trainer" || capability == "workflow_clustering_trainer") && gs.getProperty("glide.platform_ml.api.enable_workflow_clustering") == "true") ||
((capability == "similarity_trainer" || capability == "workflow_similarity_trainer") && gs.getProperty("glide.platform_ml.api.enable_workflow_similarity") == "true")) {
warningMsg = gs.getMessage("Word corpus input field is disabled in order to provide streamlined user experience and improved performance. Please reach out to your Servicenow Account Executive for more information.");
return {
'validation': validation,
'warningMsg': warningMsg
};
}
if (JSUtil.nil(wvc_gr)) {
warningMsg = gs.getMessage("Word Corpus for solution definition record is empty. System will attempt to create a word corpus if it identifies atleast one text column in the input data.");
return {
'validation': validation,
'warningMsg': warningMsg
};
}
//STRY50868523 and STRY50868460
else {
wvc_gr = wvc_gr.getRefRecord();
var wvc_type = wvc_gr.getValue('type');
var gr = new GlideRecordSecure(this.MLBaseConstants.ML_ADVANCED_SOL_SETTINGS);
gr.addQuery(this.MLBaseConstants.SOL_DEFINITION_REF_FIELD, current.getUniqueValue());
gr.addQuery("solution_parameters.key", "TF-IDF");
gr.query();
if (gr.next()) {
var solParam = gr.solution_parameters.getRefRecord();
var key = solParam.key.replace('-', '').toLowerCase();
if (key !== wvc_type) {
validation = false;
errorMsg = gs.getMessage("Your word corpus type must match your advanced solution parameter type. Example: If using a TF-IDF word corpus, use the TF-IDF parameter.");
}
} else {
if (wvc_type === "tfidf") {
validation = false;
errorMsg = gs.getMessage("Your word corpus type must match your advanced solution parameter type. Example: If using a TF-IDF word corpus, use the TF-IDF parameter.");
}
}
return {
'validation': validation,
'errorMsg': errorMsg
};
}
},
hasMinOneRow: function(table, encodedQuery) {
var hasOneRow = false;
if (table) {
gr = new GlideRecordSecure(table);
if (encodedQuery)
gr.addEncodedQuery(encodedQuery);
gr.setLimit(1);
gr.query();
hasOneRow = gr.getRowCount() > 0;
}
return hasOneRow;
},
advSolParamRec: function(sol_def_sys_id, key) {
var advSolParamRec = {};
var gr = new GlideRecordSecure("ml_advanced_solution_settings");
gr.addQuery("ml_capability_definition", sol_def_sys_id);
gr.addQuery("solution_parameters", key);
gr.query();
if (gr.next()) {
return gr;
}
return advSolParamRec;
},
checkEntitlements: function(capability) {
var validation = false;
var errorMsg = "";
var entitlementResult = sn_ml.MLEntitlementUtil.hasEntitlement(capability);
if (entitlementResult.hasEntitlement()) {
validation = true;
} else {
errorMsg = entitlementResult.getMessage();
}
return {
'validation': validation,
'errorMsg': errorMsg
};
},
type: 'MlValidationHelper'
});
Sys ID
b798fa0db7280010d1dcf8b8ee11a95b