Name
global.NLUStudioTrainer
Description
Utilities used for train through NLU Model Builder
Script
var NLUStudioTrainer = Class.create();
(function() {
var constants = NLUConstants.constants;
var ENTITY_TYPES = NLUConstants.ENTITY_TYPES;
var VOCAB_TYPES = NLUConstants.VOCAB_TYPES;
var INTENT_ENITITY_RELATIONSHIP = NLUConstants.INTENT_ENITITY_RELATIONSHIP;
var stateTypes = NLUConstants.MODEL_STATE_TYPES;
var tables = NLUConstants.tables;
var sysProps = NLUConstants.SYS_PROPS;
NLUStudioTrainer.canTrainModel = function(modelId) {
return !NLUModel.isInCloneOrTranslation(modelId);
};
NLUStudioTrainer.trainModel = function(modelId, options) {
var nluModel, trainJson, output, modelGr;
var parloIntegrator;
var result = {};
try {
nluModel = new NLUModel(modelId);
modelGr = nluModel.getGR();
if (!modelGr) throw new Error(gs.getMessage('Model id does not exist'));
parloIntegrator = new NLUParloIntegrator(modelGr);
//populate training mode and supported system entities in CapabilityCache
parloIntegrator.populateCapabilityCache('allNers {name, isEnabled}, trainModes');
try {
// Populate train json:
trainJson = NLUStudioTrainer.getTrainJson(nluModel, parloIntegrator);
} catch (e) {
result.status = 'failure';
result.message = e.message;
return result;
}
// Invoke service:
output = parloIntegrator.train(trainJson, options);
result.status = output.status;
if (output.status === stateTypes.training) {
result.trainingVersion = output.response.solutionVersion;
// Update nlu_model_status table to indicate it is being currently trained
nluModel.updateStatus({
state: stateTypes.training
});
result.status = 'success';
} else if (output.status == 'success') {
var solution = parloIntegrator.getSolution();
result.trainedVersion = output.response.solutionVersion;
var solutionVersion = solution.getVersion(result.trainedVersion);
var details = JSON.parse(solutionVersion.getProperties());
// Update nlu_model_status table
nluModel.updateStatus({
state: stateTypes.trained,
trained_version: result.trainedVersion,
last_trained_on: new GlideDateTime(details.sys_updated_on),
dirty: false
});
} else if (output.status == 'failure') {
result.errors = NLUHelper.getErrorsFromResponse(output.response);
} else if (output.status == 'error') {
result.status = 'failure';
result.message = output.response;
}
} catch (e) {
result.status = 'failure';
result.message = gs.getMessage("The system encountered an unexpected error and couldn't complete your request. Please contact your system administrator for help or try again");
gs.debug("Exception caught in NLUStudioTrainer.trainModel: " + e.message);
}
return result;
};
NLUStudioTrainer.cancelTraining = function(modelId) {
var result = {};
try {
nluModel = new NLUModel(modelId);
modelGr = nluModel.getGR();
if (!modelGr)
throw new Error(gs.getMessage('Model id does not exist'));
result = new NLUParloIntegrator(modelGr).cancelTraining();
} catch (e) {
result.status = 'failure';
result.message = e.message;
}
return result;
};
NLUStudioTrainer.getTrainJson = function(nluModel, parloIntegrator) {
var modelId = nluModel.getGR().getUniqueValue();
var trainJson = NLUStudioTrainer.getModelDataFromGR(nluModel.getGR(), parloIntegrator);
trainJson.entities = NLUStudioTrainer.getEntitiesDataFromGR(nluModel.getModelEntities());
var intents = NLUStudioTrainer.getIntentDataFromGR(nluModel.getIntents(constants.ENABLED_INTENTS_QUERY), trainJson.entities);
trainJson.intents = intents.intentsArray;
if ((gs.getProperty(sysProps.SKIP_NO_INTENT, false) == false || gs.getProperty(sysProps.SKIP_NO_INTENT, false) == "false") && NLUSystemUtil.isNLUAdvUtahSupported()) {
var intentCount = (intents && intents.intentsArray && intents.intentsArray.length) || 0;
var testUtteranceCount = intentCount > 0 && intents.intentsArray.reduce(function(result, intentData) {
return result + ((intentData && intentData.testSamples && intentData.testSamples.length) || 0);
}, 0) || 0;
trainJson.intents.push(NLUStudioTrainer.getIrrelevantData(modelId, constants.IRRELEVANT_UTTERANCE_LIMIT, Math.round(testUtteranceCount * 0.1)));
}
trainJson.vocabulary = NLUStudioTrainer.getVocabularyDataFromGR(nluModel.getVocabulary());
trainJson.lookupSources = NLUStudioTrainer.getLookupSources(intents.handleNames, modelId);
trainJson.properties = NLUStudioTrainer.getProperties(parloIntegrator.getSupportedSystemEntities(), nluModel);
return trainJson;
};
NLUStudioTrainer.getProperties = function(supportedSystemEntities, nluModel) {
var properties = [];
var disabledSystemEntities = NLUStudioTrainer.getDisabledSystemEnities(supportedSystemEntities, nluModel);
var ignorePunctuation = nluModel.getGR().getValue("ignore_punctuation");
if (!gs.nil(ignorePunctuation)) {
properties.push({
name: "neuralModelPunctuationCleaning",
value: ignorePunctuation
});
}
if (disabledSystemEntities.length > 0) {
properties.push({
name: "disabledNERTypes",
value: JSON.stringify(disabledSystemEntities)
});
}
return properties;
};
NLUStudioTrainer.getDisabledSystemEnities = function(supportedSystemEntities, nluModel) {
var activeSystemEntities = nluModel.getSystemEntities();
var disabledSystemEntities = [];
var activeSystemEntitiesNames = [];
activeSystemEntities.forEach(function(entity) {
activeSystemEntitiesNames.push(entity.name);
});
var arrayUtil = new global.ArrayUtil();
supportedSystemEntities.forEach(function(systemEntity) {
if (arrayUtil.indexOf(activeSystemEntitiesNames, systemEntity) === -1) {
disabledSystemEntities.push("GLOBAL." + systemEntity);
}
});
return disabledSystemEntities;
};
NLUStudioTrainer.getLookupSources = function(handleNames, modelId) {
var lookupSources = [];
var lookupGr = NLULookup.getGR(handleNames);
while (lookupGr.next()) {
var solutionName = lookupGr.getValue('solution_name');
if (lookupGr.getValue('type') === VOCAB_TYPES.lookup) {
lookupSources.push({
name: lookupGr.getValue('name'),
solutionName: solutionName
});
} else {
var trainStaticLookup = !solutionName;
if (!trainStaticLookup) {
var lookupUpdatedOn = lookupGr.getValue('sys_updated_on');
lookupUpdatedOn = lookupUpdatedOn ? new GlideDateTime(lookupUpdatedOn).getNumericValue() : 0;
var lookupTrainedOn = new NLULookup(null, lookupGr).getLastTrainedOn();
lookupTrainedOn = lookupTrainedOn ? new GlideDateTime(lookupTrainedOn).getNumericValue() : -1;
if (lookupUpdatedOn > lookupTrainedOn) {
trainStaticLookup = true;
} else {
lookupSources.push({
name: lookupGr.getValue('name'),
solutionName: solutionName
});
}
}
if (trainStaticLookup) {
var lookupTrainResult = new NLULookupTrainer(null, lookupGr).trainLookup();
if (lookupTrainResult.status === 'failure') {
gs.log('NLUStudioTrainer static lookup train error: ' + lookupTrainResult.message);
throw new Error(gs.getMessage('Error while synching vocabulary source: {0}. Please check vocabulary source has values.', lookupGr.getValue('name')));
}
lookupSources.push({
name: lookupTrainResult.lookupDetails.handleName,
solutionName: lookupTrainResult.lookupDetails.solutionName
});
}
}
}
return lookupSources;
};
NLUStudioTrainer.getModelDataFromGR = function(modelGr, parloIntegrator) {
var modelJson = NLUSystemUtil.getObjectFromGR(modelGr, {
name: 'name',
language: 'language',
confidenceThreshold: 'confidence_threshold',
modelPurpose: 'category'
});
modelJson.schemaVersion = constants.SCHEMA_VERSION;
modelJson.language = modelJson.language || constants.MODEL_DEFAULT_LANG;
try {
modelJson.version = parloIntegrator.getModelVersion();
} catch (ex) {
var modelName = modelJson.name;
var solutionName = NLUParloIntegrator.getModelSolutionName(modelGr.getUniqueValue());
if (modelName !== solutionName && modelGr.getValue('sys_policy') !== 'read') {
modelGr.setValue('name', solutionName);
if (!modelJson.modelPurpose) {
modelJson.modelPurpose = constants.MODEL_CAT_VA;
modelGr.setValue('category', constants.MODEL_CAT_VA);
}
modelGr.update();
modelJson.name = solutionName;
parloIntegrator.setModelGr(modelGr);
}
modelJson.version = parloIntegrator.getModelVersion(sn_ml.NLUSolutionStore.get(solutionName, {}));
}
return modelJson;
};
NLUStudioTrainer.getDefaultTestSamples = function(modelId, intent, testLimit) {
var testSetGr = new GlideRecord('nlu_batch_test_utterance');
testSetGr.addQuery('test_set.model', modelId);
testLimit.orderByDesc('sys_updated_on');
if (intent)
testSetGr.addEncodedQuery('intentLIKE' + intent);
else
testSetGr.addNullQuery('intent');
if (testLimit) {
testSetGr.setLimit(testLimit);
}
testSetGr.query();
var testSamples = [];
while (testSetGr.next()) {
if (intent) {
var intents = (testSetGr.getValue('intent') || '').split(',');
if (intents.indexOf(intent) == -1)
continue;
}
if (testSetGr.getValue('source') === 'expert_feedback') {
testSamples.push({
utterance: testSetGr.getValue('utterance'),
source: "expert-feedback"
});
} else {
testSamples.push({
utterance: testSetGr.getValue('utterance')
});
}
}
return testSamples;
};
NLUStudioTrainer.getIntentDataFromGR = function(intentsGr, modelEntities) {
var intentsArray = [];
var intentObj, intentId, nluIntent;
var handleNames = [];
var entities = [];
modelEntities.forEach(function(eachModelEntity) {
entities.push(eachModelEntity.name);
});
while (intentsGr.next()) {
intentId = intentsGr.getValue('sys_id');
nluIntent = new NLUIntent(intentId);
var utternace = NLUStudioTrainer.getUtterancesDataFromGR(nluIntent.getUtterances());
var testSamples = [];
if (NLUSystemUtil.isNLUAdvUtahSupported()) {
testSamples = NLUStudioTrainer.getDefaultTestSamples(intentsGr.getValue('model'), intentsGr.getValue('name'));
}
handleNames = handleNames.concat(utternace.handleNames);
intentObj = {
name: intentsGr.getValue('name'),
description: intentsGr.getValue('description'),
samples: utternace.utteranceArray,
testSamples: testSamples,
entities: []
};
if (intentsGr.getValue('override_confidence_threshold') === '1')
intentObj.confidenceThreshold = intentsGr.getValue('confidence_threshold');
// Intent Entities:
var entitiesGr = nluIntent.getEntities();
var openEnded = [];
var allIntentEntities = NLUStudioTrainer.getEntitiesDataFromGR(entitiesGr, intentId, true);
allIntentEntities.forEach(function(eachEntity) {
entities.push(eachEntity.name);
if (eachEntity.type == ENTITY_TYPES.open_ended) openEnded.push(eachEntity);
else intentObj.entities.push(eachEntity);
delete eachEntity.type;
});
if (openEnded.length > 0) intentObj.templates = openEnded;
// Important or sufficient entities
var importantEntities = [];
var sufficientEntities = [];
var arrayUtil = new global.ArrayUtil();
var intentEntitiesGr = nluIntent.getIntentEntities();
while (intentEntitiesGr.next()) {
var entityName = intentEntitiesGr.entity.name;
if (arrayUtil.indexOf(entities, entityName) !== -1) {
var relationship = intentEntitiesGr.getValue('relationship');
if (relationship === INTENT_ENITITY_RELATIONSHIP.important)
importantEntities.push('@entity:' + entityName + '@@');
if (relationship === INTENT_ENITITY_RELATIONSHIP.sufficient)
sufficientEntities.push('@entity:' + entityName + '@@');
}
}
if (importantEntities.length > 0) intentObj.importantEntities = importantEntities;
if (sufficientEntities.length > 0) intentObj.sufficientEntities = sufficientEntities;
intentsArray.push(intentObj);
}
return {
handleNames: handleNames,
intentsArray: intentsArray
};
};
NLUStudioTrainer.getIrrelevantData = function(modelId, trainLimit, testLimit) {
var noIntentObj = {
name: "NO_INTENT",
samples: [],
testSamples: []
};
var statusGr = global.NLUModel.getModelStatusGr(modelId);
var labeledDataGr = new GlideRecord('ml_labeled_data');
labeledDataGr.addEncodedQuery('label_type=irrelevant^ORlabel_type=irrelevant_to_this_model^product=nlu^source=virtual_agent^ORsource=manual');
labeledDataGr.orderByDesc('sys_updated_on');
labeledDataGr.setLimit(trainLimit);
if (statusGr && statusGr.getValue('last_tuned_on')) {
labeledDataGr.addQuery('sys_updated_on', '<=', statusGr.getValue('last_tuned_on'));
}
labeledDataGr.query();
while (labeledDataGr.next()) {
noIntentObj.samples.push({
utterance: labeledDataGr.getValue('text'),
source: 'expert-feedback'
});
}
noIntentObj.testSamples = NLUStudioTrainer.getDefaultTestSamples(modelId, null, testLimit);
return noIntentObj;
};
NLUStudioTrainer.getUtterancesDataFromGR = function(utterancesGr) {
var utterance, utteranceArray = [];
var handleNames = [];
var updatedUtterance;
while (utterancesGr.next()) {
updatedUtterance = NLUStudioTrainer.serializeUtterance(utterancesGr.getValue('utterance'));
handleNames = handleNames.concat(updatedUtterance.handleNames);
var utteranceObj = {
utterance: updatedUtterance.utterance
};
var source = utterancesGr.getValue('source');
if (source && source !== 'manual') utteranceObj.source = "expert-feedback";
utteranceArray.push(utteranceObj);
}
return {
handleNames: handleNames,
utteranceArray: utteranceArray
};
};
NLUStudioTrainer.deserialiseUtterance = function(utterance) {
return utterance.replace(
/{{(.*?)}}/g,
function(m) {
return JSON.parse(m.replace(/\\"/g, '"').slice(1, -1)).phrase;
});
};
NLUStudioTrainer.serializeUtterance = function(utterance) {
var handleNames = [];
var updatedUtterance = utterance;
// replace the annotation with lookupsource
if (/(^|\s)@([^\s]+)(\s|$)/g.test(utterance)) {
var utteranceHandleRegex = /(^|\s)@([^\s]+)(\s|$)/g;
var match;
while (match = utteranceHandleRegex.exec(utterance)) {
var handleName = match[2];
updatedUtterance = updatedUtterance.replace('@' + handleName, '@lookupSources:' + handleName + '@@');
handleNames.push(handleName);
utteranceHandleRegex.lastIndex--;
}
}
return {
utterance: updatedUtterance,
handleNames: handleNames
};
};
NLUStudioTrainer.getVocabularyDataFromGR = function(vocabGr) {
var vocabularyArray = [];
var vocabularyObj, vocabularyId, relatedTerms, type;
while (vocabGr.next()) {
vocabularyObj = {};
type = vocabGr.getValue('type');
if (type === VOCAB_TYPES.pattern) {
vocabularyObj.pattern = vocabGr.getValue('pattern');
} else if (type === VOCAB_TYPES.lookup) {
vocabularyObj.handle = vocabGr.getValue('name');
vocabularyObj.simpleValues = {
values: NLUSystemUtil.getTableFieldValues(vocabGr.getValue('table'),
vocabGr.getValue('field_name'))
};
} else {
vocabularyObj.pattern = NLUHelper.wordToRegex(vocabGr.getValue('name'));
}
relatedTerms = vocabGr.getValue('related_terms');
vocabularyObj.relatedTerms = (relatedTerms && relatedTerms.split(',')) || [];
vocabularyArray.push(vocabularyObj);
}
return vocabularyArray;
};
NLUStudioTrainer.getEntitiesDataFromGR = function(entityRec, intentId, addType) {
var nluEntity, entityArray = [];
var entityObj, entityId, utterances, entityType, entityValues;
while (entityRec.next()) {
entityId = entityRec.getValue('sys_id');
entityType = entityRec.getValue('type');
entityValues = entityRec.getValue('values_list');
entityObj = {
name: entityRec.getValue('name')
};
if (entityRec.getValue('override_confidence_threshold') === '1')
entityObj.confidenceThreshold = entityRec.getValue('confidence_threshold');
if (addType) entityObj['type'] = entityRec.getValue('type');
nluEntity = new NLUEntity(entityId);
utterances = NLUStudioTrainer.getAnnotationsDataFromGR(nluEntity.getAnnotations(intentId, 'utterance.intent.enable=true^ORutterance.intent.enableISEMPTY'));
if (entityType == ENTITY_TYPES.list) {
if (entityRec.lookup) {
var normalizedField = entityRec.getValue('field_name');
var lookupType = entityRec.lookup.getRefRecord().getValue('type');
if (lookupType == VOCAB_TYPES.static_lookup) {
normalizedField = normalizedField === 'actual' ? 'item_name' : 'keywords';
}
entityObj.fieldMapping = [{
name: '@lookupSources:' + entityRec.lookup.name + '@@',
normalizedField: normalizedField
}];
NLUHelper.extend(entityObj, NLUStudioTrainer.getAnnotations(utterances));
} else {
entityValues = entityValues && entityValues.split(',');
entityObj.categories = NLUStudioTrainer.getCategories(entityValues, utterances);
}
} else if (entityType == ENTITY_TYPES.pattern) {
entityObj.samples = [{
pattern: entityValues
}];
} else if (entityType == ENTITY_TYPES.open_ended) {
NLUHelper.extend(entityObj, NLUStudioTrainer.getAnnotations(utterances, null, true));
} else {
NLUHelper.extend(entityObj, NLUStudioTrainer.getAnnotations(utterances));
if (entityType == ENTITY_TYPES.system_derived) {
var parent = entityRec.getValue('parent') || entityRec.base_entity.name.toString();
entityObj.parent = 'entity:GLOBAL.' + parent;
}
}
if (!(typeof entityObj.samples != 'undefined' && entityObj.samples.length == 0)) {
entityArray.push(entityObj);
}
}
return entityArray;
};
NLUStudioTrainer.getAnnotationsDataFromGR = function(utteranceRec) {
var utteranceArray = [];
while (utteranceRec.next()) {
utteranceArray.push(JSON.parse(utteranceRec.getValue('annotation')));
}
return utteranceArray;
};
NLUStudioTrainer.getCategories = function(entityValues, utterances) {
var categories = [],
samples;
if (entityValues && utterances) {
entityValues.forEach(function(category) {
var annotations = NLUStudioTrainer.getAnnotations(utterances, category);
samples = annotations && annotations.samples;
samples && samples.length > 0 && categories.push({
category: category,
samples: samples
});
});
}
return categories;
};
NLUStudioTrainer.getAnnotations = function(utterances, cat, isOpenEnded) {
var samples = [];
var contentSource = [];
if (utterances) {
utterances.forEach(function(annotations) {
annotations.forEach(function(annotation) {
if (cat && cat === annotation.cat || !cat) {
var annotatedString = annotation.annotatedString;
// replace the annotation with lookupsource
if (/(^|\s|")@([^\s]+)(\s|$|")/g.test(annotation.annotatedString)) {
var annotationHandleRegex = /(^|\s|")@([^\s]+)(\s|$|")/g;
var match;
while (match = annotationHandleRegex.exec(annotation.annotatedString)) {
var handleName = match[2];
var position;
//if the annotation occurs as the last word within the phrase, then "}} also becomes part of the capturing group
if ((position = handleName.lastIndexOf('"}}')) > -1 && (position + 3 === handleName.length))
handleName = handleName.substring(0, position);
annotatedString = annotatedString.replace('@' + handleName, '@lookupSources:' + handleName + '@@');
var lookupPhrase = NLUStudioTrainer.getLookupPhrase(annotation.annotatedString);
if (lookupPhrase === handleName) contentSource.push('@lookupSources:' + handleName + '@@');
annotationHandleRegex.lastIndex--;
}
}
samples.push(isOpenEnded ? {
annotation: annotatedString
} : {
entityAnnotation: annotatedString
});
}
});
});
}
if (contentSource.length > 0) {
return {
contentSource: contentSource,
samples: samples
};
}
return {
samples: samples
};
};
NLUStudioTrainer.getLookupPhrase = function(annotatedString) {
var phraseRegex = /{{phrase="@([^)]+)"}}/g;
var match = phraseRegex.exec(annotatedString);
return match && Array.isArray(match) && match.length > 1 && match[1];
};
NLUStudioTrainer.prototype = {
type: 'NLUStudioTrainer',
initialize: function() {},
};
})();
Sys ID
9a7dbfc907dd5010220b0a701ad3002b