Name
sn_cmdb_int_util.DuplicateRowProcessor
Description
Processor for identifiying unchanged data and only inserting data that has updated information.
Script
var DuplicateRowProcessor = Class.create();
DuplicateRowProcessor.prototype = {
ON_OFF_PROPERTY: "sn_cmdb_int_util.duplicate_row_processor_enabled",
RULE_TABLE: "sn_cmdb_int_util_duplicate_row_rule",
HASH_TABLE: "sn_cmdb_int_util_duplicate_row_hash",
_ignore: false,
_insertedCount: 0,
_blockedCount: 0,
/*
* Processor for identifying unchanged data and only inserting
* new or changed rows.
*
* dataSource: The sys_id of the data source you are importing data with. This is used to identify the correct rule
* inserter: an implementation of the inserter interface
* interface inserter {
* int getMaximumRows();
* void insert(row);
* }
* this matches the import_set_table api from a scripted data source.
* If getMaximumRows is greater than 0 do not check the hash
*
* Usage:
* var inserter = new sn_cmdb_int_util.DuplicateRowProcessor("<data source id>", import_set_table);
* rows.forEach(function(row) {
* inserter.insert(row);
* })
* inserter.close();
*
* Close provides a logged report
*/
initialize: function(dataSource, inserter) {
this.inserter = inserter;
// only when set to true is it on. All other values default to off
if ("true" != gs.getProperty(this.ON_OFF_PROPERTY)) {
this._ignore = true;
// no need to load rules if switched off
return;
}
// if we are only calling .clean() there won't be an inserter
if (inserter && inserter.getMaximumRows() > 0) {
this._ignore = true;
return;
}
var rule = this._getRule(dataSource);
if (rule.next()) {
var ignore = rule.getValue("ignore_fields") || "";
this.rule = {
rule: rule.getUniqueValue(),
dataSource: dataSource,
table: rule.data_source.import_set_table_name + "",
ignore: ignore,
};
} else {
gs.info("No data source rule found for Duplicate row processor using data source id " + dataSource);
this._ignore = true;
}
},
/*
* delegate method for inserting data
*/
insert: function(row) {
if (!this.inserter)
return;
if (this._ignore)
return this._insert(row);
// If no data is added to the hash we should fall back to
// inserting the hash so we don't block everything.
// I prefer to fail open and live with the time it takes
// to transform over blocking everything by accident
var hash = this.hash(row);
if (null === hash) {
this._insert(row);
return;
}
if (!this._checkHashExists(hash)) {
this._createBlock(this.rule.rule, hash);
this._insert(row);
} else
this._block();
},
/*
* Destructor function that logs a report
*/
close: function() {
if (this._ignore)
return;
var report = {
name: "DuplicateRowProcessor onComplete Report",
rule: this.rule,
insertedCount: this._insertedCount,
blockedCount: this._blockedCount,
sample: this._sampleFields,
};
gs.info(JSON.stringify(report));
},
/*
* Creates the hash for the given row based on the rule for the
* given data source.
* Sorts given row keys for consistency
*/
hash: function(row) {
// make hash unique for this table
var fields = "";
var ignore = this.rule.ignore;
Object.keys(row).sort().forEach(function(k, i) {
if (!ignore.contains(k)) {
fields += "_" + row[k];
}
});
// if no fields were added to hash we don't want to
// just hash the table or we will block all rows to
// that table
if (fields == "")
return null;
fields += "_" + this.rule.table;
if (!this._sampleFields)
this._sampleFields = fields;
return new GlideDigest().getMD5Hex("SALT_" + fields);
},
clean: function() {
if (this._ignore)
return;
var rule = this._getRule(this.rule.dataSource);
if (rule.next()) {
gs.info("Deleting Hash values for rule " + rule.data_source.name);
var gr = new GlideRecord(this.HASH_TABLE);
gr.addQuery("rule", rule.getUniqueValue());
gr.query();
gr.deleteMultiple();
}
},
_insert: function(row) {
this._insertedCount++;
this.inserter.insert(row);
},
_block: function() {
this._blockedCount++;
},
_checkHashExists: function(hash) {
var block = new GlideRecord(this.HASH_TABLE);
block.addQuery("hash", hash);
block.setLimit(1);
block.query();
return block.hasNext();
},
_createBlock: function(rule, hash) {
var block = new GlideRecord(this.HASH_TABLE);
block.setValue("rule", rule);
block.setValue("hash", hash);
block.insert();
},
_getRule: function(dataSource) {
var rules = new GlideRecord(this.RULE_TABLE);
rules.addQuery("data_source", dataSource);
rules.addActiveQuery();
rules.setLimit(1);
rules.query();
return rules;
},
type: 'DuplicateRowProcessor'
};
Sys ID
76c83ea8c72110109cea1197fdc26025