Name

sn_cmdb_int_util.DuplicateRowProcessor

Description

Processor for identifiying unchanged data and only inserting data that has updated information.

Script

var DuplicateRowProcessor = Class.create();
DuplicateRowProcessor.prototype = {
  ON_OFF_PROPERTY: "sn_cmdb_int_util.duplicate_row_processor_enabled",

  RULE_TABLE: "sn_cmdb_int_util_duplicate_row_rule",
  HASH_TABLE: "sn_cmdb_int_util_duplicate_row_hash",

  _ignore: false,
  _insertedCount: 0,
  _blockedCount: 0,

  /*
   * Processor for identifying unchanged data and only inserting 
   * new or changed rows.
   *
   * dataSource: The sys_id of the data source you are importing data with. This is used to identify the correct rule
   * inserter: an implementation of the inserter interface
   * interface inserter {
   *     int getMaximumRows();
   *     void insert(row);
   * }
   * this matches the import_set_table api from a scripted data source.
   * If getMaximumRows is greater than 0 do not check the hash
   *
   * Usage: 
   * var inserter = new sn_cmdb_int_util.DuplicateRowProcessor("<data source id>", import_set_table);
   * rows.forEach(function(row) {
   *     inserter.insert(row);
   * })
   * inserter.close();
   *
   * Close provides a logged report
   */
  initialize: function(dataSource, inserter) {
      this.inserter = inserter;

      // only when set to true is it on. All other values default to off
      if ("true" != gs.getProperty(this.ON_OFF_PROPERTY)) {
          this._ignore = true;
          // no need to load rules if switched off
          return;
      }

      // if we are only calling .clean() there won't be an inserter
      if (inserter && inserter.getMaximumRows() > 0) {
          this._ignore = true;
          return;
      }

      var rule = this._getRule(dataSource);
      if (rule.next()) {
          var ignore = rule.getValue("ignore_fields") || "";
          this.rule = {
              rule: rule.getUniqueValue(),
              dataSource: dataSource,
              table: rule.data_source.import_set_table_name + "",
              ignore: ignore,
          };
      } else {
          gs.info("No data source rule found for Duplicate row processor using data source id " + dataSource);
          this._ignore = true;
      }
  },

  /*
   * delegate method for inserting data
   */
  insert: function(row) {
      if (!this.inserter)
          return;

      if (this._ignore)
          return this._insert(row);

      // If no data is added to the hash we should fall back to
      // inserting the hash so we don't block everything.
      // I prefer to fail open and live with the time it takes
      // to transform over blocking everything by accident
      var hash = this.hash(row);
      if (null === hash) {
          this._insert(row);
          return;
      }
      if (!this._checkHashExists(hash)) {
          this._createBlock(this.rule.rule, hash);
          this._insert(row);
      } else
          this._block();
  },

  /*
   * Destructor function that logs a report
   */
  close: function() {
      if (this._ignore)
          return;
      var report = {
          name: "DuplicateRowProcessor onComplete Report",
          rule: this.rule,
          insertedCount: this._insertedCount,
          blockedCount: this._blockedCount,
          sample: this._sampleFields,
      };
      gs.info(JSON.stringify(report));
  },

  /*
   * Creates the hash for the given row based on the rule for the 
   * given data source. 
   * Sorts given row keys for consistency
   */
  hash: function(row) {
      // make hash unique for this table
      var fields = "";

      var ignore = this.rule.ignore;
      Object.keys(row).sort().forEach(function(k, i) {
          if (!ignore.contains(k)) {
              fields += "_" + row[k];
          }
      });

      // if no fields were added to hash we don't want to 
      // just hash the table or we will block all rows to 
      // that table
      if (fields == "")
          return null;

      fields += "_" + this.rule.table;

      if (!this._sampleFields)
          this._sampleFields = fields;
      return new GlideDigest().getMD5Hex("SALT_" + fields);
  },

  clean: function() {
      if (this._ignore)
          return;

      var rule = this._getRule(this.rule.dataSource);
      if (rule.next()) {
          gs.info("Deleting Hash values for rule " + rule.data_source.name);
          var gr = new GlideRecord(this.HASH_TABLE);
          gr.addQuery("rule", rule.getUniqueValue());
          gr.query();
          gr.deleteMultiple();
      }
  },

  _insert: function(row) {
      this._insertedCount++;
      this.inserter.insert(row);
  },

  _block: function() {
      this._blockedCount++;
  },

  _checkHashExists: function(hash) {
      var block = new GlideRecord(this.HASH_TABLE);
      block.addQuery("hash", hash);
      block.setLimit(1);
      block.query();
      return block.hasNext();
  },

  _createBlock: function(rule, hash) {
      var block = new GlideRecord(this.HASH_TABLE);
      block.setValue("rule", rule);
      block.setValue("hash", hash);
      block.insert();
  },

  _getRule: function(dataSource) {
      var rules = new GlideRecord(this.RULE_TABLE);
      rules.addQuery("data_source", dataSource);
      rules.addActiveQuery();
      rules.setLimit(1);
      rules.query();
      return rules;
  },

  type: 'DuplicateRowProcessor'
};

Sys ID

76c83ea8c72110109cea1197fdc26025

Offical Documentation

Official Docs: