groupcache經常使用框架以及源碼解讀

時間 2019-11-11

標籤 groupcache 經常使用框架以及源碼解讀简体版

原文原文鏈接

一、groupcache介紹： http://www.csdn.net/article/2013-07-30/2816399-groupcache-readme-go git

二、groupcache經常使用框架：github

通常經常使用以上的框架去使用groupcache，此框架以及框架示例代碼可經過https://github.com/capotej/groupcache-db-experiment 下載，框架示例核心源碼解讀：算法

func main() {

	var port = flag.String("port", "8001", "groupcache port")
	flag.Parse()

	peers := groupcache.NewHTTPPool("http://localhost:" + *port)

	client := new(client.Client)

	var stringcache = groupcache.NewGroup("SlowDBCache", 64<<20, groupcache.GetterFunc(
		func(ctx groupcache.Context, key string, dest groupcache.Sink) error {
			result := client.Get(key)
			fmt.Printf("asking for %s from dbserver\n", key)
			dest.SetBytes([]byte(result))
			return nil
		}))

	peers.Set("http://localhost:8001", "http://localhost:8002", "http://localhost:8003")

	frontendServer := NewServer(stringcache)

	i, err := strconv.Atoi(*port)
	if err != nil {
		// handle error
		fmt.Println(err)
		os.Exit(2)
	}
	var frontEndport = ":" + strconv.Itoa(i+1000)
	go frontendServer.Start(frontEndport)

	fmt.Println(stringcache)
	fmt.Println("cachegroup slave starting on " + *port)
	fmt.Println("frontend starting on " + frontEndport)
	http.ListenAndServe("127.0.0.1:"+*port, http.HandlerFunc(peers.ServeHTTP))
}

理解以上這段代碼須要首先理解groupcache中的peer如何與HttpPool產生關聯，關鍵代碼段：數據庫

func NewHTTPPoolOpts(self string, o *HTTPPoolOptions) *HTTPPool {
	if httpPoolMade {
		panic("groupcache: NewHTTPPool must be called only once")
	}
	httpPoolMade = true

	opts := HTTPPoolOptions{}
	if o != nil {
		opts = *o
	}
	if opts.BasePath == "" {
		opts.BasePath = defaultBasePath
	}
	if opts.Replicas == 0 {
		opts.Replicas = defaultReplicas
	}

	p := &HTTPPool{
		basePath:    opts.BasePath,
		self:        self,
		peers:       consistenthash.New(opts.Replicas, opts.HashFn),
		httpGetters: make(map[string]*httpGetter),
	}
	RegisterPeerPicker(func() PeerPicker { return p })
	return p
}

經過RegisterPeerPicker將獲取httppool的對象返回函數註冊到全局的portPicker，這樣在調用Group的Get接口時，經過調用initPeers接口返回HTTPPool對象，HTTPPool與groupcache的關聯就是經過portPicker函數變量；後端

三、groupcache源碼解讀緩存

A、在使用以上框架的時候或許你會困惑，經過key分片，而後去遠端獲取數據時，在遠端仍然是經過調用HTTPPool的ServeHttp來進行處理，咱們先來看下該接口代碼實現：數據結構

func (p *HTTPPool) ServeHTTP(w http.ResponseWriter, r *http.Request) {
	// Parse request.
	if !strings.HasPrefix(r.URL.Path, p.basePath) {
		panic("HTTPPool serving unexpected path: " + r.URL.Path)
	}
	parts := strings.SplitN(r.URL.Path[len(p.basePath):], "/", 2)
	if len(parts) != 2 {
		http.Error(w, "bad request", http.StatusBadRequest)
		return
	}
	groupName := parts[0]
	key := parts[1]

	// Fetch the value for this group/key.
	group := GetGroup(groupName)
	if group == nil {
		http.Error(w, "no such group: "+groupName, http.StatusNotFound)
		return
	}
	var ctx Context
	if p.Context != nil {
		ctx = p.Context(r)
	}

	group.Stats.ServerRequests.Add(1)
	var value []byte
	err := group.Get(ctx, key, AllocatingByteSliceSink(&value))
	if err != nil {
		http.Error(w, err.Error(), http.StatusInternalServerError)
		return
	}

	// Write the value to the response body as a proto message.
	body, err := proto.Marshal(&pb.GetResponse{Value: value})
	if err != nil {
		http.Error(w, err.Error(), http.StatusInternalServerError)
		return
	}
	w.Header().Set("Content-Type", "application/x-protobuf")
	w.Write(body)
}

首先經過GetGroup獲取本地的group對象指針，而後group.Get(ctx, key, AllocatingByteSliceSink(&value))獲取數據，而在Frontend中也是經過調用Get接口獲取數據，這樣會不會造成死循環？爲解答這一問題，首先咱們來看下groupcache中的Get接口：app

func (g *Group) Get(ctx Context, key string, dest Sink) error {
	g.peersOnce.Do(g.initPeers)
	g.Stats.Gets.Add(1)
	if dest == nil {
		return errors.New("groupcache: nil dest Sink")
	}
	value, cacheHit := g.lookupCache(key)

	if cacheHit {
		fmt.Printf("key %s cache hit!\n", key)
		g.Stats.CacheHits.Add(1)
		return setSinkView(dest, value)
	}

	// Optimization to avoid double unmarshalling or copying: keep
	// track of whether the dest was already populated. One caller
	// (if local) will set this; the losers will not. The common
	// case will likely be one caller.
	destPopulated := false
	value, destPopulated, err := g.load(ctx, key, dest)
	if err != nil {
		return err
	}
	if destPopulated {
		return nil
	}
	return setSinkView(dest, value)
}

大概流程：先執行initPeers獲取遠端peer，查本地緩存是否有數據，若是命中，返回數據，不然load數據，看load實現：框架

func (g *Group) load(ctx Context, key string, dest Sink) (value ByteView, destPopulated bool, err error) {
	g.Stats.Loads.Add(1)
	viewi, err := g.loadGroup.Do(key, func() (interface{}, error) {
		g.Stats.LoadsDeduped.Add(1)
		var value ByteView
		var err error
		if peer, ok := g.peers.PickPeer(key); ok {
			value, err = g.getFromPeer(ctx, peer, key)
			if err == nil {
				g.Stats.PeerLoads.Add(1)
				return value, nil
			}
			g.Stats.PeerErrors.Add(1)
			// TODO(bradfitz): log the peer's error? keep
			// log of the past few for /groupcachez?  It's
			// probably boring (normal task movement), so not
			// worth logging I imagine.
		}
		value, err = g.getLocally(ctx, key, dest)
		if err != nil {
			g.Stats.LocalLoadErrs.Add(1)
			return nil, err
		}
		g.Stats.LocalLoads.Add(1)
		destPopulated = true // only one caller of load gets this return value
		g.populateCache(key, value, &g.mainCache)
		return value, nil
	})
	if err == nil {
		value = viewi.(ByteView)
	}
	return
}

大概流程：根據key選擇一個固定的遠端peer，若是獲取成功，那麼從遠端獲取數據，不然getLocally直接從後端（數據庫或者其餘數據服務）獲取數據；讀到這裏仍然沒法解答這一疑惑，繼續看PickPeer接口：frontend

func (p *HTTPPool) PickPeer(key string) (ProtoGetter, bool) {
	p.mu.Lock()
	defer p.mu.Unlock()
	if p.peers.IsEmpty() {
		return nil, false
	}
	if peer := p.peers.Get(key); peer != p.self {
		return p.httpGetters[peer], true
	}
	return nil, false
}

根據Key獲取一個Peer，若是獲取的peer是本身，那麼認爲失敗，因此會選擇從後端數據服務獲取獲取，並緩存在本地的maincache中，讀到這裏，困惑就消除了，一個key會選取固定的peer，因此若是已經定位到某個peer獲取數據，peer再次調用Get接口時，若是lookupCache失敗，那麼就會調用getLocally嘗試從數據服務獲取，而不會循環從另外的peer去獲取，造成死循環效應；

B、LRU緩存算法爲經常使用算法，通常採用list和hash數據結構結合實現，在此再也不講解；

C、singleflight.go是爲了保證當沒有命中本地緩存是，同一個key在同一時刻只有一個在去remote peer或後端數據服務獲取數據；

總結：

groupcache精小而又強大，直接集成在本身的服務內部，推及使用！