[Go]使用Golang對鳶尾花數據集進行k-means聚類

k-means算法是一種簡單的迭代型聚類算法,採用距離做爲類似性指標,從而發現給定數據集中的K個類,且每一個類的中心是根據類中全部值的均值獲得,每一個類用聚類中心來描述。對於給定的一個包含n個d維數據點的數據集X以及要分得的類別K,選取歐式距離做爲類似度指標,聚類目標是使得各種的聚類平方和最小,即最小化:css

結合最小二乘法和拉格朗日原理,聚類中心爲對應類別中各數據點的平均值,同時爲了使得算法收斂,在迭代過程當中,應使最終的聚類中心儘量的不變。html

聚類過程jquery

  • 首先任取k個樣本點做爲k個簇的初始中心
  • 對每個樣本點,計算它們與k箇中心的距離,把它納入距離最小的中心所在的簇
  • 等到全部的樣本點歸類完畢,從新計算k個簇的中心
  • 重複以上過程直至樣本點納入的簇再也不變更或變更範圍極小

可視化演示:git

  • 在每次完成聚類以後,生成圖片並保存。
  • 最後生成一個k-means.html,其中使用JavaScript逐秒繪製過程圖

聚類過程:github

不一樣的顏色表明一個類,黑點表明聚簇點web

第一次迭代(隨機選點,還未聚類)算法

 

進行迭代,更新聚類點bootstrap

 

最後一次迭代(按點已經完成聚類)canvas

 

因爲數據太多,能夠在k-means.html中查看變化過程 app

結果演示:

  • 輸出聚簇點

 

  • 可視化演示

代碼:使用了github.com/muesli/kmeans庫,我對它的註釋進行了翻譯,對程序進行了部分修改,方即可視化演示

 
 
package main

import (
    "io"
    "os"
    "log"
    "fmt"
    "bufio"
    "io/ioutil"
    "strings"
    "strconv"
    "github.com/leeli73/go-kmeans-html-plotter/clusters"
    "github.com/leeli73/go-kmeans-html-plotter/kmeans"
)
// 讀取數據
func InitData() clusters.Observations{
    fi, err := os.Open("data/iris.dat")
    if err != nil {
        log.Fatalln(err)
        return nil
    }
    defer fi.Close()

    var d clusters.Observations
    br := bufio.NewReader(fi)
    for {
        a, _, c := br.ReadLine()
        if c == io.EOF {
            break
        }
        temp := string(a)
        if temp[0] != '@'{
            data := strings.Split(temp,", ")
            num1,_ := strconv.ParseFloat(data[0], 64)
            num2,_ := strconv.ParseFloat(data[1], 64)
            num3,_ := strconv.ParseFloat(data[2], 64)
            num4,_ := strconv.ParseFloat(data[3], 64)
            d = append(d,clusters.Coordinates{
                num1,
                num2,
                num3,
                num4,
            })
        }
    }
    return d
}
func main() {
    d := InitData()
    //定義一個k-means
    km, _ := kmeans.New(0.01, kmeans.SimplePlotter{}) 
    //進行聚類運算
    clusters,files, _ := km.Partition(d, 4)

    //生成演示html
    strfiles := "\"" + files[0] + "\","
    for i:=1;i<len(files) - 1;i++{
        strfiles = strfiles + "\"" + files[i] + "\","
    }
    strfiles = strfiles + "\"" + files[len(files)-1] + "\""

    clusterout := []string{}
    for i, c := range clusters {
        log.Printf("Cluster: %d %+v", i, c.Center)
        str := fmt.Sprintf("Cluster: %d %+v", i, c.Center)
        clusterout = append(clusterout,str)
    }

    clustersstr := "\"" + clusterout[0] + "\","
    for i:=1;i<len(clusterout)-1;i++{
        clustersstr = clustersstr + "\"" + clusterout[i] + "\","
    }
    clustersstr = clustersstr + "\"" +clusterout[len(clusterout)-1] + "\""
    var Data,err = ioutil.ReadFile("data/web.html")
    if err != nil{
        log.Fatal(err)
    }
    html := string(Data)
    html = strings.Replace(html,"{{images}}",strfiles,-1)
    html = strings.Replace(html,"{{clusters}}",clustersstr,-1)
    ioutil.WriteFile("k-means.html",[]byte(html), 0644)
}
 
 

 

 
k-means.html
<!DOCTYPE html>
<html lang="en">

<head>
    <title>Bootstrap 4 Website Example</title>
    <meta charset="utf-8">
    <meta name="viewport" content="width=device-width, initial-scale=1">
    <link rel="stylesheet" href="https://cdn.staticfile.org/twitter-bootstrap/4.1.0/css/bootstrap.min.css">
    <script src="https://cdn.staticfile.org/jquery/3.2.1/jquery.min.js"></script>
    <script src="https://cdn.staticfile.org/popper.js/1.12.5/umd/popper.min.js"></script>
    <script src="https://cdn.staticfile.org/twitter-bootstrap/4.1.0/js/bootstrap.min.js"></script>
</head>

<body>

    <div class="container" style="margin-top:30px">
        <div class="row">
            <div class="col-sm-6">
                <h2>聚類點</h2>
                <div style="height: 500px">
                    <textarea style="width: 100%; height: 100%" id="clusters"></textarea>
                </div>
            </div>
            <div class="col-sm-6">
                <h2>k-means聚類過程</h2>
                <div style="height: 500px">
                    <canvas id="show" style="width: 100%;height: 100%"></canvas>
                </div>
            </div>
        </div>
    </div>

</body>
<script>
    Images = [{{images}}]
    Clusters = [{{clusters}}]
    window.onload = function () {
        var CANVAS = document.getElementById('show');
        context = CANVAS.getContext('2d');
        var ratio = getPixelRatio(context)
        var img = new Image();
        SetClusters()
        img.onload = function () {
            context.drawImage(img, 0, 0, 300 * ratio,150 * ratio);
        }
        var count = 0
        var interval = setInterval(function(){
            if(count < Images.length)
            {
                img.src = Images[count]
            }
            else
            {
                clearInterval(interval);
                return
            }
            count = count + 1
        },1000)
    }
    function getPixelRatio(context) {
        var backingStore = context.backingStorePixelRatio ||
            context.webkitBackingStorePixelRatio ||
            context.mozBackingStorePixelRatio ||
            context.msBackingStorePixelRatio ||
            context.oBackingStorePixelRatio ||
            context.backingStorePixelRatio || 1;

        return (window.devicePixelRatio || 1) / backingStore;
    }
    function SetClusters(){
        for(var i=0;i<Clusters.length;i++)
        {
            document.getElementById("clusters").value = document.getElementById("clusters").value + Clusters[i] + '\n'
        }
    }
</script>

</html>

項目地址:https://github.com/leeli73/go-kmeans-html-plotter.git

kmeans項目地址:https://github.com/muesli/kmeans.git

相關文章
相關標籤/搜索