美篇推荐系统架构简介

Published: by Creative Commons Licence

美篇推荐系统架构

核心组件

核心组件都在core目录下:

  • 召回: meipian.cn/feed/core/recaller
  • 排序: meipian.cn/feed/core/ranker
  • 特征: meipian.cn/feed/core/featurer
  • 过滤: meipian.cn/feed/core/filter
  • 包装: meipian.cn/feed/core/wrapper

多路融合

多路融合:多个车轮一起滚动

// multi activity stream
	sp.Annotate(time.Now(), "Do fetch multi activity stream")
	sp.Tag("track_id", req.TrackId)
	for _, wheelConfigJson := range conf.ActivityStreamConfigJson {
		// fetch wheel config => wlc
		wlc, wErr := GetWheelConfigFromJson(req, &wheelConfigJson)
		if wErr != nil {
			log.Err(wheelConfigJson.Name + " wheel config convert failed !")
			continue
		}
		// fetch wheel => wl
		wi, _ := engine.Engine.New(wlc.Type)
		if wl, ok := wi.(wheel.Wheel); ok {
			count++
			// Do wheel roll
			go func(_wl wheel.Wheel, _wlc *wheel.WheelConfig) {
				childspan := zipkinUtil.ZipkinTracer.StartSpan(_wlc.Name, zipkin.Parent(sp.Context()))
				defer childspan.Finish()
				childspan.Annotate(time.Now(), _wlc.Name+" start")
				newCtx := zipkin.NewContext(vctx, childspan)
				vv := _wl.Roll(&newCtx, req, _wlc)
				monitor.GearMonitor(req.Scene, req.TrackId, _wlc.Name, len(vv), req.UserId, _wlc.Type, "activity_stream")
				ch <- vv
			}(wl, &wlc)
		} else {
			log.Err(wheelConfigJson.Name + " wheel convert failed !")
			continue
		}

召回-排序

召回-排序:每个车轮如何滚动

	sp.Annotate(time.Now(), "Do recall")
	recalledFeedCells := AddMultiRecaller(ctx, req, conf.Name, conf.Recaller...)
	//Do filter
	sp.Annotate(time.Now(), "Do filter")
	filterdFeedCells := AddMultiFilter(ctx, req, conf.Name, recalledFeedCells, conf.Filter...)
	//Do featurer
	sp.Annotate(time.Now(), "Do featurer")
	featuredFeedCells := AddMultiFeaturer(ctx, req, conf.Name, filterdFeedCells, conf.Featurer...)
	//Do first ranker
	sp.Annotate(time.Now(), "Do first ranker")
	firstRankedFeedCells := AddMultiFirstRanker(ctx, req, conf.Name, featuredFeedCells, conf.FirstRanker...)
	//Do second ranker
	sp.Annotate(time.Now(), "Do second ranker")
	secondRankedFeedCells := AddMultiSecondRanker(ctx, req, conf.Name, firstRankedFeedCells, conf.SecondRanker...)
	//Do re reanker
	sp.Annotate(time.Now(), "Do re reanker")
	rerankedCells := AddMultiReRanker(ctx, req, conf.Name, secondRankedFeedCells, conf.ReRanker...)

多路召回

sp, _ := zipkinUtil.ZipkinTracer.StartSpanFromContext(*ctx, "MULTI-RECALLER")
	var ch = make(chan []model.FeedCell)
	for _, r := range rs {
		go func(fr recaller.Recaller, amount int, name string) {
			childspan := zipkinUtil.ZipkinTracer.StartSpan(name, zipkin.Parent(sp.Context()))
			defer childspan.Finish()
			childspan.Annotate(time.Now(), name+" start")
			vv := fr.Recall(ctx, req, amount)
			monitor.GearMonitor(req.Scene, req.TrackId, name, len(vv), req.UserId, "recaller", wheelName+"_multi_recaller")
			ch <- vv
			childspan.Annotate(time.Now(), name+" end")
		}(r.Recaller, r.Count, r.Name)
	}
	for i := 0; i < len(rs); i++ {
		feedCells = append(feedCells, <-ch...)
	}

架构图

image-20191008190312093

用户上下文

   var uc common.UserContext
   uc = record.GetCommonUserContext(req.UserId)
   vctx := context.WithValue(sctx, "user_context", uc)

其中用户上下文包括用户曝光,点击记录和用户画像等,贯穿请求处理的始终,可以从context中获取

策略热插拔

抽象工厂模式

  • 齿轮注册器,通过动态配置组合不同策略
  • 高内聚低耦合,不同齿轮之间互相隔离
  • 便于监控不同阶段的数据流

组件注册

engine_creater.go 中调用Engine.Register 方法进行注册,例如:

   Engine.Register("article_floor_hot_recaller", &recaller.FloorHotArticleFeedRecaller{})

动态生成对象

engine_creater.go 中调用Engine.New 方法进行创建。

配置示例

{
   "name": "app_home_feed",
   "global_filter": "unique_item_filter",
   "global_ranker": "app_home_global_ranker",
   "global_wrapper": "app_home_wrapper",
   "experiment_id": "",
   "activity_stream": [{
         "name": "alg_wheel",
         "count": 18,
         "cache_prefix": "alg_wheel:1",
         "recaller": [{
            "name": "article_icf_recaller"
         }, {
            "name": "article_content_based_recaller_v2"
         }, {
            "name": "article_daily_hot_recaller"
         }],
         "filter": [],
         "featurer": [{
            "name": "app_home_article_featurer"
         }],
         "first_ranker": [{
            "name": "app_home_first_ranker"
         }],
         "second_ranker": [{
            "name": "article_app_home_ctr_ranker"
         }],
         "re_ranker": [{
            "name": "article_app_home_reranker"
         }]
      }, {
         "name": "alg_wheel",
         "count": 3,
         "cache_prefix": "alg_wheel:6",
         "recaller": [{
            "name": "video_personalized_recaller"
         }]
      },
      {
         "name": "intervention_wheel",
         "count": 1,
         "recaller": [{
            "name": "article_user_credit_recaller"
         }]
      }
   ]
}

监控

通过 monitor.GearMonitor监控每个组件的数据流情况,定制的数据监控可以通过自定义gear_name来实现