Name
sn_app_insights.Kmeans
Description
No description available
Script
/**
* Kmeans
* This is a clustering model following Kmean++ algorithm.
* Usage:
* 1. The data needs to be passed to processAndClusterData.
* The data (passed in parameter) needs to have a unique structure:
* It needs to contain two keys ["seriesConfig, "seriesData"] and each key
* should have values similarly to the example bellow.
* read the data in the format given eg:
data = {
"seriesConfig": {
"line0": {
"x": "timeStamp",
"y": "y0",
"type": "line",
"label": "ML-G1-AG25-1"
},
"line1": {
"x": "timeStamp",
"y": "y1",
"type": "line",
"label": "BU-MN4-AG27-4"
},
},
"seriesData": [
{
"timeStamp": 1603899000000,
"y0": 43.694744,
"y1": 49.635857,
},
{
"timeStamp": 1603899300000,
"y0": 47.725418,
"y1": 54.03098,
},
{
"timeStamp": 1603899600000,
"y0": null,
"y1": null,
},
{
"timeStamp": 1603899900000,
"y0": null,
"y1": null,
}
]
}
* 2. Return value:
* After calling processAndClusterData function on the data it will return the clusters in the same
* format that was requested for the input. The return object will contain ["seriesConfig, "seriesData"]
* keys but the difference would be the number of lines in values since the new values are the values
* of the clusters.
* 3. Disclaimer:
* 1. If the passed in object contains any other keys than ["seriesConfig, "seriesData"] the return object
* will not contain those keys with their values; it will only contain these two keys with the calculated
* values
* 2. If the passed in object has missing keys or values that are stated in the example above, the script would
* fail and result in errors.
*/
var Kmeans = Class.create();
Kmeans.prototype = {
initialize: function(opts) {
//If not parameter was provided set parameters to default empty dictionary
if (opts === undefined)
opts = {};
// Maximum number of iterations for training
if (opts.maxIteration)
this.maxIteration = opts.maxIteration;
else
this.maxIteration = 300;
// Number of cluster centroids.
if (opts.numberOfClusters)
this.numberOfClusters = opts.numberOfClusters;
else
this.numberOfClusters = 3;
// Function used for distance metric
if (opts.distanceFunction)
this.distanceFunction = opts.distanceFunction;
else
this.distanceFunction = this.euclidean;
// For speeding up the process on the bigger data sets where each input is very long we need to decrease the
// dimensionality
if (opts.maxNumOfDimensions)
this.maxNumOfDimensions = opts.maxNumOfDimensions;
else
this.maxNumOfDimensions = 10000;
// Keeps track of which cluster centroid index each data point belongs to.
this.assignments = [];
// Keep track of number of times centroids move.
this.iterations = 0;
},
getClusters: function() {
// Generate random cluster centroid points.
this.means = this.seedsPlusPlus();
// Perform work.
this.run();
return this.assignments;
},
/**
* seedsPlusPlus
* Kmean Plus Plus algorithm for initializing the centroids:
* 1. choose one center uniformly at random among the data points.
* 2. For each data point x, compute D(x), the distance between x and the nearest center that has already been
* chosen.
* 3. choose one new data point at random as a new center, using a weighted probability distribution where a point
* x is chosen with probability proportional to D(x)2.
* 4. Repeat Steps 2 and 3 until numberOfClusters centers have been chosen.
* 5. Now that the initial centers have been chosen, proceed using standard numberOfClusters-means clustering.
* **/
seedsPlusPlus: function() {
var means = [];
var randomIndex = Math.floor(Math.random() * this.data.length);
var newMean = this.data[randomIndex];
means.push(newMean);
//Assign the first cluster centroid
// means.push(mean)
//Loop numberOfClusters-1 to assign the rest of the cluster centroids
for (var seedCount = 0; seedCount < this.numberOfClusters - 1; seedCount++) {
// var assignments = [];
var closestDistancesSquare = [];
var distance;
for (var i = 0; i < this.data.length; i++) {
var point = this.data[i];
var distances = [];
for (var j = 0; j < means.length; j++) {
var mean = means[j];
distances[j] = this.distanceFunction(point, mean);
}
// After calculating all the distances from the data point to each cluster centroid,
// we pick the closest (smallest) distances.
distance = Math.min.apply(null, distances);
closestDistancesSquare[i] = Math.pow(distance, 2);
}
var sum = closestDistancesSquare.reduce(function(a, b) {
return a + b;
});
var probabilities = [];
for (i = 0; i < this.data.length; i++) {
if (sum == 0)
probabilities[i] = .01;
else
probabilities[i] = (closestDistancesSquare[i]) / (sum);
}
newMean = this.probabilityBasedChoice(this.data, probabilities);
means.push(newMean);
}
return means;
},
/**
* probabilityBasedChoice
* Select and return an element in the given list of options randomly based on probability distributions received
* **/
probabilityBasedChoice: function(options, probabilities) {
var choices = [];
for (var i = 0; i < probabilities.length; i++) {
for (var j = 0; j < Math.round(probabilities[i] * 100); j++) {
choices.push(options[i]);
}
}
var idx = Math.floor(Math.random() * choices.length);
return choices[idx];
},
/**
* assignClusterToDataPoints
* Calculate distance (based on the distance metric given) between each point and the cluster center.
* Assigns each point to closest mean point.
*/
assignClusterToDataPoints: function() {
var assignments = [];
var point;
var distances;
var mean;
for (var i = 0; i < this.data.length; i++) {
point = this.data[i];
distances = [];
for (var j = 0; j < this.means.length; j++) {
mean = this.means[j];
distances[j] = this.distanceFunction(point, mean);
}
// After calculating all the distances from the data point to each cluster centroid,
// we pick the closest (smallest) distances.
assignments[i] = distances.indexOf(Math.min.apply(null, distances));
}
return assignments;
},
/**
* updateMeans
* Update the positions of the the cluster centroids (means) to the average positions
* of all data points that belong to that mean.
*/
updateMeans: function() {
var newMeans = this.calculateMeans(this.data, this.assignments);
if (this.means.toString() != newMeans.toString()) {
this.means = newMeans;
return true;
}
return false;
},
/**
* calculateMeans
* Get the average of the points assigned under the same clusters.
*/
calculateMeans: function(points, assignments) {
var sums = [];
var counts = this.fillArray(this.means.length, 0);
var dim;
var numOfDims = points[0].length;
var numOfClusters = this.numberOfClusters;
var point;
// Clear location sums for each dimension.
for (var c = 0; c < numOfClusters; c++)
sums[c] = this.fillArray(numOfDims, 0);
// For each cluster, get sum of point coordinates in every dimension.
for (var index = 0; index < assignments.length; index++) {
c = this.assignments[index];
point = points[index];
counts[c]++;
for (dim = 0; dim < numOfDims; dim++)
sums[c][dim] += point[dim];
}
for (c = 0; c < numOfClusters; c++) {
if (counts[c] === 0)
counts[c] = 1;
for (dim = 0; dim < numOfDims; dim++) {
sums[c][dim] /= counts[c];
sums[c][dim] = Math.round(100 * sums[c][dim]) / 100;
}
}
return sums;
},
/**
* run
* Executing model training over multiple cycles until the model converges
* or the number of iteration reaches the maximum number of iterations
*/
run: function() {
++this.iterations;
// Reassign points to nearest cluster centroids.
this.assignments = this.assignClusterToDataPoints();
// Returns true if the cluster centroids have moved location since the last iteration.
var meansUpdated = this.updateMeans();
if (meansUpdated && (this.iterations < this.maxIteration))
this.run();
},
/**
* fillArray
* return an array of size length initialized all to the value of the val
*/
fillArray: function(length, val) {
return Array.apply(null, Array(length)).map(function() {
return val;
});
},
/**
* euclidean
* Custom and the default metric distance used to calculate the distance between each data points
*/
euclidean: function(point, mean) {
var sum = 0;
var difference;
for (var dim = 0; dim < point.length; dim++) {
difference = point[dim] - mean[dim];
difference = Math.pow(difference, 2);
sum += difference;
}
return Math.sqrt(sum);
},
/**
* processAndClusterData
* read the data in the format given eg:
data = {
"seriesConfig": {
"line0": {
"x": "timeStamp",
"y": "y0",
"type": "line",
"label": "ML-G1-AG25-1"
},
"line1": {
"x": "timeStamp",
"y": "y1",
"type": "line",
"label": "BU-MN4-AG27-4"
},
},
"seriesData": [
{
"timeStamp": 1603899000000,
"y0": 43.694744,
"y1": 49.635857,
},
{
"timeStamp": 1603899300000,
"y0": 47.725418,
"y1": 54.03098,
},
{
"timeStamp": 1603899600000,
"y0": null,
"y1": null,
},
{
"timeStamp": 1603899900000,
"y0": null,
"y1": null,
}
]
}
* and create a 2d array that the rows present each line (data point) and the columns
* represent the values through time then the newly generated 2d array (values) has reduced
* to less dimensions based on the maximum dimensions assigned and then this is passed to the model
* for clustering purposes. Then the centroids are selected from the model and formatted in the original
* format of the data received and then it is returned
*/
processAndClusterData: function(data, exclusionLineList) {
if (!exclusionLineList)
exclusionLineList = [];
if ("seriesConfig" in data && "seriesData" in data) {
if (Object.keys(data["seriesConfig"]).length > this.numberOfClusters) {
if (data["seriesData"].length === 0)
throw new Error('seriesData has no elements!!!');
} else
throw new Error('seriesConfig has less data points to be clustered than the number of clusters!!!');
} else
throw new Error('Argument passed in does not contain either seriesConfig or seriesData');
var seriesConfig = JSON.parse(JSON.stringify(data["seriesConfig"]));
var excluded = {};
for (var i = 0; i < exclusionLineList.length; i++) {
var key = exclusionLineList[i];
excluded[key] = seriesConfig[key];
delete seriesConfig[key];
}
var excludedKeys = Object.keys(excluded);
var keys = Object.keys(seriesConfig);
var seriesData = data["seriesData"];
var lines = Object.keys(seriesConfig);
var numberOfLinesToBeClustered = lines.length;
var xTitle = seriesConfig[lines[0]]["x"];
var labels = new Array(numberOfLinesToBeClustered);
var types = new Array(numberOfLinesToBeClustered);
var yTitles = new Array(numberOfLinesToBeClustered);
var times = new Array(seriesData.length);
var values = new Array(numberOfLinesToBeClustered);
for (i = 0; i < numberOfLinesToBeClustered; i++) {
yTitles[i] = seriesConfig[lines[i]]["y"];
labels[i] = seriesConfig[lines[i]]["label"];
types[i] = seriesConfig[lines[i]]["type"];
}
for (i = 0; i < numberOfLinesToBeClustered; i++) {
values[i] = new Array(times.length);
for (j = 0; j < times.length; j++)
values[i][j] = -1;
}
for (i = 0; i < times.length; i++) {
times[i] = seriesData[i][xTitle];
for (var j = 0; j < numberOfLinesToBeClustered; j++) {
var y = yTitles[j];
if (seriesData[i][y] != null)
values[j][i] = seriesData[i][y];
}
}
this.originalData = values;
this.data = this.dimensionReduction(values, this.maxNumOfDimensions);
this.originalMeans = [];
this.getClusters();
this.originalMeans = this.calculateMeans(this.originalData, this.assignments);
this.means = this.originalMeans;
// put the mean results back in the original format of the data provided.
returnData = JSON.parse(JSON.stringify(data));
returnData["allConfigs"] = {};
var groupLines = {};
var label;
for (i = 0; i < this.means.length; i++) {
var groupNumberTemp1 = parseInt(i + 1).toFixed(0); //Using temp variable because of L10N concatenation warnings
label = gs.getMessage("Group {0}", groupNumberTemp1); //Not using a constant for group string as it causes L10N warning
groupLines[label] = JSON.parse(JSON.stringify(exclusionLineList));
}
for (i = 0; i < this.assignments.length; i++) {
var groupNumberTemp2 = parseInt(this.assignments[i] + 1).toFixed(0); //Using temp variable because of L10N concatenation warnings
label = gs.getMessage("Group {0}", groupNumberTemp2); //Not using a constant for group string as it causes L10N warning
groupLines[label].push(keys[i]);
}
returnData["seriesConfig"] = {};
returnData["allConfigs"]["ungrouped"] = data["seriesConfig"];
returnData["allConfigs"]["grouped"] = {};
returnData["seriesData"] = seriesData;
for (i = 0; i < this.means.length; i++) {
mean = this.means[i];
var groupNumberTemp3 = parseInt(i + 1).toFixed(0); //Using temp variable because of L10N concatenation warnings
label = gs.getMessage("Group {0}", groupNumberTemp3); //Not using a constant for group string as it causes L10N warning
returnData["allConfigs"]["grouped"][label] = {
"x": xTitle,
"y": "g" + i,
"type": types[i],
"label": label,
"lines": groupLines[label]
};
returnData["seriesConfig"][label] = {
"x": xTitle,
"y": "g" + i,
"type": types[i],
"label": label
};
for (j = 0; j < times.length; j++)
returnData["seriesData"][j]["g" + i] = mean[j];
}
for (i = 0; i < excludedKeys.length; i++) {
var lineName = excludedKeys[i];
returnData["allConfigs"]["grouped"][lineName] = excluded[lineName];
returnData["seriesConfig"][key] = excluded[key];
}
return returnData;
},
/**
* dimensionReduction
* reduce the dimensions of the columns of the rows in the 2d array passed in to the
* new dimension (newD) by taking the average of every two consecutive points and replacing them
* with the average
*/
dimensionReduction: function(originals, newD) {
var original;
var newArrays = [];
var newList;
function innerFunc(list, num2) {
list[0].push(num2);
if (list[0].length == 2)
list[1].push((list[0].pop() + list[0].pop()) / 2);
return list;
}
for (var index = 0; index < originals.length; index++) {
original = [
[
[],
[]
]
].concat(originals[index]);
while (original.length - 1 > newD) {
newList = original.reduce(innerFunc);
// If there is an extra value in the end (the number of elements was not divisible by two) then
// take its average with the new previous value and add the average back.
if (newList[0].length === 1)
newList[1].push((newList[0][0] + newList[1][newList[1].length - 1]) / 2);
original = [
[
[],
[]
]
].concat(newList[1]);
}
original.splice(0, 1);
newArrays.push(original);
}
return newArrays;
},
type: 'Kmeans'
};
Sys ID
746ff9af1b982010593876a61a4bcb0c