This time I will bring you javascriptHow to make a decision tree, and what are the precautions for making a decision tree using javascript. The following is a practical case, let's take a look.
Decision tree algorithm code implementation1. Prepare test dataHere I assume that there is a young lady in the company meeting on a blind date as an exampleThe following is the result of having met or been Eliminated data (part of the data was generated using mock.js):
var data = [ { "姓名": "余夏", "年龄": 29, "长相": "帅", "体型": "瘦", "收入": "高", 见面: "见" }, { "姓名": "豆豆", "年龄": 25, "长相": "帅", "体型": "瘦", "收入": "高", 见面: "见" }, { "姓名": "帅常荣", "年龄": 26, "长相": "帅", "体型": "胖", "收入": "高", 见面: "见" }, { "姓名": "王涛", "年龄": 22, "长相": "帅", "体型": "瘦", "收入": "高", 见面: "见" }, { "姓名": "李东", "年龄": 23, "长相": "帅", "体型": "瘦", "收入": "高", 见面: "见" }, { "姓名": "王五五", "年龄": 23, "长相": "帅", "体型": "瘦", "收入": "低", 见面: "见" }, { "姓名": "王小涛", "年龄": 22, "长相": "帅", "体型": "瘦", "收入": "低", 见面: "见" }, { "姓名": "李缤", "年龄": 21, "长相": "帅", "体型": "胖", "收入": "高", 见面: "见" }, { "姓名": "刘明", "年龄": 21, "长相": "帅", "体型": "胖", "收入": "低", 见面: "不见" }, { "姓名": "红鹤", "年龄": 21, "长相": "不帅", "体型": "胖", "收入": "高", 见面: "不见" }, { "姓名": "李理", "年龄": 32, "长相": "帅", "体型": "瘦", "收入": "高", 见面: "不见" }, { "姓名": "周州", "年龄": 31, "长相": "帅", "体型": "瘦", "收入": "高", 见面: "不见" }, { "姓名": "李乐", "年龄": 27, "长相": "不帅", "体型": "胖", "收入": "高", 见面: "不见" }, { "姓名": "韩明", "年龄": 24, "长相": "不帅", "体型": "瘦", "收入": "高", 见面: "不见" }, { "姓名": "小吕", "年龄": 28, "长相": "帅", "体型": "瘦", "收入": "低", 见面: "不见" }, { "姓名": "李四", "年龄": 25, "长相": "帅", "体型": "瘦", "收入": "低", 见面: "不见" }, { "姓名": "王鹏", "年龄": 30, "长相": "帅", "体型": "瘦", "收入": "低", 见面: "不见" }, ];
function DecisionTree(config) { if (typeof config == "object" && !Array.isArray(config)) this.training(config); }; DecisionTree.prototype = { //分割函数 _predicates: {}, //统计属性值在数据集中的次数 countUniqueValues(items, attr) {}, //获取对象中值最大的Key 假设 counter={a:9,b:2} 得到 "a" getMaxKey(counter) {}, //寻找最频繁的特定属性值 mostFrequentValue(items, attr) {}, //根据属性切割数据集 split(items, attr, predicate, pivot) {}, //计算熵 entropy(items, attr) {}, //生成决策树 buildDecisionTree(config) {}, //初始化生成决策树 training(config) {}, //预测 测试 predict(data) {}, };var decisionTree = new DecisionTree();
You can go to JS Simple Implementation of Decision Tree (ID3 Algorithm)_demo.html to view the complete code
It contains comments and tests for each function Method
Code:
//......略//统计属性值在数据集中的次数countUniqueValues(items, attr) { var counter = {}; // 获取不同的结果值 与出现次数 for (var i of items) { if (!counter[i[attr]]) counter[i[attr]] = 0; counter[i[attr]] += 1; } return counter; },//......略//计算熵entropy(items, attr) { var counter = this.countUniqueValues(items, attr); //计算值的出现数 var p, entropy = 0; //H(S)=entropy=∑(P(Xi)(log2(P(Xi)))) for (var i in counter) { p = counter[i] / items.length; //P(Xi)概率值 entropy += -p * Math.log2(p); //entropy+=-(P(Xi)(log2(P(Xi)))) } return entropy; },//......略var decisionTree = new DecisionTree();console.log("函数 countUniqueValues 测试:");console.log(" 长相", decisionTree.countUniqueValues(data, "长相")); //测试console.log(" 年龄", decisionTree.countUniqueValues(data, "年龄")); //测试console.log(" 收入", decisionTree.countUniqueValues(data, "收入")); //测试console.log("函数 entropy 测试:");console.log(" 长相", decisionTree.entropy(data, "长相")); //测试console.log(" 年龄", decisionTree.entropy(data, "年龄")); //测试console.log(" 收入", decisionTree.entropy(data, "收入")); //测试
Because p( match)=number of matches/total number of items in the data set
Information gainG=H(S)-((number of matches)xH(match)+(number of on match)xH(on match))/total number of items in the data set
//......略buildDecisionTree(config){ var trainingSet = config.trainingSet;//训练集 var categoryAttr = config.categoryAttr;//用于区分的类别属性 //......略 //初始计算 训练集的熵 var initialEntropy = this.entropy(trainingSet, categoryAttr);//<===H(S) //......略 var alreadyChecked = [];//标识已经计算过了 var bestSplit = { gain: 0 };//储存当前最佳的分割节点数据信息 //遍历数据集 for (var item of trainingSet) { // 遍历项中的所有属性 for (var attr in item) { //跳过区分属性与忽略属性 if ((attr == categoryAttr) || (ignoredAttributes.indexOf(attr) >= 0)) continue; var pivot = item[attr];// 当前属性的值 var predicateName = ((typeof pivot == 'number') ? '>=' : '=='); //根据数据类型选择判断条件 var attrPredPivot = attr + predicateName + pivot; if (alreadyChecked.indexOf(attrPredPivot) >= 0) continue;//已经计算过则跳过 alreadyChecked.push(attrPredPivot);//记录 var predicate = this._predicates[predicateName];//匹配分割方式 var currSplit = this.split(trainingSet, attr, predicate, pivot); var matchEntropy = this.entropy(currSplit.match, categoryAttr);// H(match) 计算分割后合适的数据集的熵 var notMatchEntropy = this.entropy(currSplit.notMatch, categoryAttr);// H(on match) 计算分割后不合适的数据集的熵 //计算信息增益: // IG(A,S)=H(S)-(∑P(t)H(t))) // t为分裂的子集match(匹配),on match(不匹配) // P(match)=match的长度/数据集的长度 // P(on match)=on match的长度/数据集的长度 var iGain = initialEntropy - ((matchEntropy * currSplit.match.length + notMatchEntropy * currSplit.notMatch.length) / trainingSet.length); //不断匹配最佳增益值对应的节点信息 if (iGain > bestSplit.gain) { //......略 } } } //......递归计算分支}
How to use canvas to make a useful graffiti drawing board
How to use s-xlsx to import Excel files and Export (below)
The above is the detailed content of How to make a decision tree in javascript. For more information, please follow other related articles on the PHP Chinese website!