美篇推荐系统架构简介
美篇推荐系统架构
核心组件
核心组件都在core
目录下:
- 召回:
meipian.cn/feed/core/recaller
- 排序:
meipian.cn/feed/core/ranker
- 特征:
meipian.cn/feed/core/featurer
- 过滤:
meipian.cn/feed/core/filter
- 包装:
meipian.cn/feed/core/wrapper
多路融合
多路融合:多个车轮一起滚动
// multi activity stream
sp.Annotate(time.Now(), "Do fetch multi activity stream")
sp.Tag("track_id", req.TrackId)
for _, wheelConfigJson := range conf.ActivityStreamConfigJson {
// fetch wheel config => wlc
wlc, wErr := GetWheelConfigFromJson(req, &wheelConfigJson)
if wErr != nil {
log.Err(wheelConfigJson.Name + " wheel config convert failed !")
continue
}
// fetch wheel => wl
wi, _ := engine.Engine.New(wlc.Type)
if wl, ok := wi.(wheel.Wheel); ok {
count++
// Do wheel roll
go func(_wl wheel.Wheel, _wlc *wheel.WheelConfig) {
childspan := zipkinUtil.ZipkinTracer.StartSpan(_wlc.Name, zipkin.Parent(sp.Context()))
defer childspan.Finish()
childspan.Annotate(time.Now(), _wlc.Name+" start")
newCtx := zipkin.NewContext(vctx, childspan)
vv := _wl.Roll(&newCtx, req, _wlc)
monitor.GearMonitor(req.Scene, req.TrackId, _wlc.Name, len(vv), req.UserId, _wlc.Type, "activity_stream")
ch <- vv
}(wl, &wlc)
} else {
log.Err(wheelConfigJson.Name + " wheel convert failed !")
continue
}
召回-排序
召回-排序:每个车轮如何滚动
sp.Annotate(time.Now(), "Do recall")
recalledFeedCells := AddMultiRecaller(ctx, req, conf.Name, conf.Recaller...)
//Do filter
sp.Annotate(time.Now(), "Do filter")
filterdFeedCells := AddMultiFilter(ctx, req, conf.Name, recalledFeedCells, conf.Filter...)
//Do featurer
sp.Annotate(time.Now(), "Do featurer")
featuredFeedCells := AddMultiFeaturer(ctx, req, conf.Name, filterdFeedCells, conf.Featurer...)
//Do first ranker
sp.Annotate(time.Now(), "Do first ranker")
firstRankedFeedCells := AddMultiFirstRanker(ctx, req, conf.Name, featuredFeedCells, conf.FirstRanker...)
//Do second ranker
sp.Annotate(time.Now(), "Do second ranker")
secondRankedFeedCells := AddMultiSecondRanker(ctx, req, conf.Name, firstRankedFeedCells, conf.SecondRanker...)
//Do re reanker
sp.Annotate(time.Now(), "Do re reanker")
rerankedCells := AddMultiReRanker(ctx, req, conf.Name, secondRankedFeedCells, conf.ReRanker...)
多路召回
sp, _ := zipkinUtil.ZipkinTracer.StartSpanFromContext(*ctx, "MULTI-RECALLER")
var ch = make(chan []model.FeedCell)
for _, r := range rs {
go func(fr recaller.Recaller, amount int, name string) {
childspan := zipkinUtil.ZipkinTracer.StartSpan(name, zipkin.Parent(sp.Context()))
defer childspan.Finish()
childspan.Annotate(time.Now(), name+" start")
vv := fr.Recall(ctx, req, amount)
monitor.GearMonitor(req.Scene, req.TrackId, name, len(vv), req.UserId, "recaller", wheelName+"_multi_recaller")
ch <- vv
childspan.Annotate(time.Now(), name+" end")
}(r.Recaller, r.Count, r.Name)
}
for i := 0; i < len(rs); i++ {
feedCells = append(feedCells, <-ch...)
}
架构图
用户上下文
var uc common.UserContext
uc = record.GetCommonUserContext(req.UserId)
vctx := context.WithValue(sctx, "user_context", uc)
其中用户上下文包括用户曝光,点击记录和用户画像等,贯穿请求处理的始终,可以从context
中获取
策略热插拔
抽象工厂模式
- 齿轮注册器,通过动态配置组合不同策略
- 高内聚低耦合,不同齿轮之间互相隔离
- 便于监控不同阶段的数据流
组件注册
在engine_creater.go
中调用Engine.Register
方法进行注册,例如:
Engine.Register("article_floor_hot_recaller", &recaller.FloorHotArticleFeedRecaller{})
动态生成对象
engine_creater.go
中调用Engine.New
方法进行创建。
配置示例
{
"name": "app_home_feed",
"global_filter": "unique_item_filter",
"global_ranker": "app_home_global_ranker",
"global_wrapper": "app_home_wrapper",
"experiment_id": "",
"activity_stream": [{
"name": "alg_wheel",
"count": 18,
"cache_prefix": "alg_wheel:1",
"recaller": [{
"name": "article_icf_recaller"
}, {
"name": "article_content_based_recaller_v2"
}, {
"name": "article_daily_hot_recaller"
}],
"filter": [],
"featurer": [{
"name": "app_home_article_featurer"
}],
"first_ranker": [{
"name": "app_home_first_ranker"
}],
"second_ranker": [{
"name": "article_app_home_ctr_ranker"
}],
"re_ranker": [{
"name": "article_app_home_reranker"
}]
}, {
"name": "alg_wheel",
"count": 3,
"cache_prefix": "alg_wheel:6",
"recaller": [{
"name": "video_personalized_recaller"
}]
},
{
"name": "intervention_wheel",
"count": 1,
"recaller": [{
"name": "article_user_credit_recaller"
}]
}
]
}
监控
通过 monitor.GearMonitor
监控每个组件的数据流情况,定制的数据监控可以通过自定义gear_name
来实现