k-means算法是一種簡單的迭代型聚類算法,採用距離做爲類似性指標,從而發現給定數據集中的K個類,且每一個類的中心是根據類中全部值的均值獲得,每一個類用聚類中心來描述。對於給定的一個包含n個d維數據點的數據集X以及要分得的類別K,選取歐式距離做爲類似度指標,聚類目標是使得各種的聚類平方和最小,即最小化:css
結合最小二乘法和拉格朗日原理,聚類中心爲對應類別中各數據點的平均值,同時爲了使得算法收斂,在迭代過程當中,應使最終的聚類中心儘量的不變。html
聚類過程jquery
可視化演示:git
聚類過程:github
不一樣的顏色表明一個類,黑點表明聚簇點web
第一次迭代(隨機選點,還未聚類)算法
進行迭代,更新聚類點bootstrap
最後一次迭代(按點已經完成聚類)canvas
因爲數據太多,能夠在k-means.html中查看變化過程 app
結果演示:
代碼:使用了github.com/muesli/kmeans庫,我對它的註釋進行了翻譯,對程序進行了部分修改,方即可視化演示
package main import ( "io" "os" "log" "fmt" "bufio" "io/ioutil" "strings" "strconv" "github.com/leeli73/go-kmeans-html-plotter/clusters" "github.com/leeli73/go-kmeans-html-plotter/kmeans" ) // 讀取數據 func InitData() clusters.Observations{ fi, err := os.Open("data/iris.dat") if err != nil { log.Fatalln(err) return nil } defer fi.Close() var d clusters.Observations br := bufio.NewReader(fi) for { a, _, c := br.ReadLine() if c == io.EOF { break } temp := string(a) if temp[0] != '@'{ data := strings.Split(temp,", ") num1,_ := strconv.ParseFloat(data[0], 64) num2,_ := strconv.ParseFloat(data[1], 64) num3,_ := strconv.ParseFloat(data[2], 64) num4,_ := strconv.ParseFloat(data[3], 64) d = append(d,clusters.Coordinates{ num1, num2, num3, num4, }) } } return d } func main() { d := InitData() //定義一個k-means km, _ := kmeans.New(0.01, kmeans.SimplePlotter{}) //進行聚類運算 clusters,files, _ := km.Partition(d, 4) //生成演示html strfiles := "\"" + files[0] + "\"," for i:=1;i<len(files) - 1;i++{ strfiles = strfiles + "\"" + files[i] + "\"," } strfiles = strfiles + "\"" + files[len(files)-1] + "\"" clusterout := []string{} for i, c := range clusters { log.Printf("Cluster: %d %+v", i, c.Center) str := fmt.Sprintf("Cluster: %d %+v", i, c.Center) clusterout = append(clusterout,str) } clustersstr := "\"" + clusterout[0] + "\"," for i:=1;i<len(clusterout)-1;i++{ clustersstr = clustersstr + "\"" + clusterout[i] + "\"," } clustersstr = clustersstr + "\"" +clusterout[len(clusterout)-1] + "\"" var Data,err = ioutil.ReadFile("data/web.html") if err != nil{ log.Fatal(err) } html := string(Data) html = strings.Replace(html,"{{images}}",strfiles,-1) html = strings.Replace(html,"{{clusters}}",clustersstr,-1) ioutil.WriteFile("k-means.html",[]byte(html), 0644) }
k-means.html
<!DOCTYPE html> <html lang="en"> <head> <title>Bootstrap 4 Website Example</title> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1"> <link rel="stylesheet" href="https://cdn.staticfile.org/twitter-bootstrap/4.1.0/css/bootstrap.min.css"> <script src="https://cdn.staticfile.org/jquery/3.2.1/jquery.min.js"></script> <script src="https://cdn.staticfile.org/popper.js/1.12.5/umd/popper.min.js"></script> <script src="https://cdn.staticfile.org/twitter-bootstrap/4.1.0/js/bootstrap.min.js"></script> </head> <body> <div class="container" style="margin-top:30px"> <div class="row"> <div class="col-sm-6"> <h2>聚類點</h2> <div style="height: 500px"> <textarea style="width: 100%; height: 100%" id="clusters"></textarea> </div> </div> <div class="col-sm-6"> <h2>k-means聚類過程</h2> <div style="height: 500px"> <canvas id="show" style="width: 100%;height: 100%"></canvas> </div> </div> </div> </div> </body> <script> Images = [{{images}}] Clusters = [{{clusters}}] window.onload = function () { var CANVAS = document.getElementById('show'); context = CANVAS.getContext('2d'); var ratio = getPixelRatio(context) var img = new Image(); SetClusters() img.onload = function () { context.drawImage(img, 0, 0, 300 * ratio,150 * ratio); } var count = 0 var interval = setInterval(function(){ if(count < Images.length) { img.src = Images[count] } else { clearInterval(interval); return } count = count + 1 },1000) } function getPixelRatio(context) { var backingStore = context.backingStorePixelRatio || context.webkitBackingStorePixelRatio || context.mozBackingStorePixelRatio || context.msBackingStorePixelRatio || context.oBackingStorePixelRatio || context.backingStorePixelRatio || 1; return (window.devicePixelRatio || 1) / backingStore; } function SetClusters(){ for(var i=0;i<Clusters.length;i++) { document.getElementById("clusters").value = document.getElementById("clusters").value + Clusters[i] + '\n' } } </script> </html>
項目地址:https://github.com/leeli73/go-kmeans-html-plotter.git
kmeans項目地址:https://github.com/muesli/kmeans.git