商城的商品越来越多,MySQL的LIKE查询扛不住了。这篇接入Elasticsearch做商品搜索——从索引设计、中文分词到搜索排序,把一个基本可用的搜索服务搭起来。
商品搜索的需求
用户搜索商品时的核心需求:
- 关键词搜索(同时搜商品名、描述、品牌)
- 中文分词准确("运动鞋"能搜到"男士运动鞋")
- 结果按相关度排序,同时考虑销量和评分
- 搜索高亮(命中词高亮显示)
- 搜索建议(输入补全)
ES索引设计
先定义商品的索引mapping:
PUT /products
{
"settings": {
"number_of_shards": 3,
"number_of_replicas": 1,
"analysis": {
"analyzer": {
"ik_smart_synonym": {
"type": "custom",
"tokenizer": "ik_max_word",
"filter": ["lowercase", "synonym_filter"]
}
},
"filter": {
"synonym_filter": {
"type": "synonym",
"synonyms": [
"手机,手机电话,移动电话",
"笔记本,笔记本电脑,laptop"
]
}
}
}
},
"mappings": {
"properties": {
"id": { "type": "long" },
"name": {
"type": "text",
"analyzer": "ik_max_word",
"search_analyzer": "ik_smart",
"fields": {
"keyword": { "type": "keyword" }
}
},
"description": {
"type": "text",
"analyzer": "ik_max_word",
"search_analyzer": "ik_smart"
},
"brand": {
"type": "keyword",
"fields": {
"text": {
"type": "text",
"analyzer": "ik_max_word"
}
}
},
"category_id": { "type": "integer" },
"category_name": { "type": "keyword" },
"price": { "type": "scaled_float", "scaling_factor": 100 },
"sales": { "type": "integer" },
"rating": { "type": "float" },
"status": { "type": "keyword" },
"created_at": { "type": "date" },
"suggest": {
"type": "completion",
"analyzer": "ik_max_word"
}
}
}
}
几个关键设计决策:
分词器选择:索引时用 ik_max_word(最细粒度分词),搜索时用 ik_smart(智能分词)。这样索引时会产生更多token(提高召回率),搜索时尽量理解用户意图。
name字段:同时有text和keyword两种类型,text用于全文搜索,keyword用于精确匹配和聚合。
suggest字段:completion类型,专门用于搜索建议的自动补全。
搜索查询构建
核心搜索用 bool + multi_match + function_score:
POST /products/_search
{
"query": {
"function_score": {
"query": {
"bool": {
"must": [
{
"multi_match": {
"query": "运动鞋",
"fields": ["name^3", "description", "brand.text^2"],
"type": "best_fields",
"minimum_should_match": "75%"
}
}
],
"filter": [
{ "term": { "status": "on_sale" } }
]
}
},
"functions": [
{
"field_value_factor": {
"field": "sales",
"modifier": "log1p",
"factor": 0.1
},
"weight": 2
},
{
"field_value_factor": {
"field": "rating",
"modifier": "none",
"factor": 1
},
"weight": 1
}
],
"score_mode": "sum",
"boost_mode": "multiply"
}
},
"highlight": {
"fields": {
"name": { "pre_tags": ["<em>"], "post_tags": ["</em>"] },
"description": {
"pre_tags": ["<em>"],
"post_tags": ["</em>"],
"fragment_size": 100
}
}
},
"from": 0,
"size": 20
}
解释一下这个查询:
multi_match:同时搜name、description、brand,name权重最高(^3)function_score:在文本相关度基础上,综合考虑销量(log1p平滑)和评分filter:只搜在售商品,filter不参与评分,有缓存加速highlight:搜索结果高亮
搜索建议
用户输入时的自动补全:
POST /products/_search
{
"suggest": {
"product_suggest": {
"prefix": "运动",
"completion": {
"field": "suggest",
"size": 5,
"skip_duplicates": true
}
}
}
}
索引文档时需要填充suggest字段:
{
"name": "Nike Air Max 运动鞋",
"suggest": {
"input": ["Nike Air Max", "运动鞋", "Nike运动鞋", "耐克"]
}
}
Go ES客户端
用 olivere/elastic 库(Go的ES客户端)封装搜索服务:
// service/search.go
package service
import (
"context"
"github.com/olivere/elastic/v7"
)
type SearchService struct {
client *elastic.Client
index string
}
type SearchRequest struct {
Keyword string
CategoryID *int
PriceMin *float64
PriceMax *float64
SortBy string // relevance, price_asc, price_desc, sales
Page int
PageSize int
}
type SearchResult struct {
Total int64
Products []ProductDoc
}
func (s *SearchService) Search(ctx context.Context, req SearchRequest) (*SearchResult, error) {
boolQuery := elastic.NewBoolQuery()
// 关键词搜索
if req.Keyword != "" {
boolQuery.Must(
elastic.NewMultiMatchQuery(req.Keyword, "name", "description", "brand.text").
FieldWithBoost("name", 3).
FieldWithBoost("brand.text", 2).
Type("best_fields").
MinimumShouldMatch("75%"),
)
}
// 过滤条件
boolQuery.Filter(elastic.NewTermQuery("status", "on_sale"))
if req.CategoryID != nil {
boolQuery.Filter(elastic.NewTermQuery("category_id", *req.CategoryID))
}
if req.PriceMin != nil || req.PriceMax != nil {
rangeQuery := elastic.NewRangeQuery("price")
if req.PriceMin != nil { rangeQuery.Gte(*req.PriceMin) }
if req.PriceMax != nil { rangeQuery.Lte(*req.PriceMax) }
boolQuery.Filter(rangeQuery)
}
// function_score
query := elastic.NewFunctionScoreQuery().
Query(boolQuery).
AddScoreFunc(
elastic.NewFieldValueFactorFunction().
Field("sales").Modifier("log1p").Factor(0.1).Weight(2),
).
AddScoreFunc(
elastic.NewFieldValueFactorFunction().
Field("rating").Factor(1).Weight(1),
).
ScoreMode("sum").
BoostMode("multiply")
// 构建搜索
searchSource := s.client.Search().
Index(s.index).
Query(query).
Highlight(
elastic.NewHighlight().
Field("name").
Field("description").
PreTags("<em>").PostTags("</em>"),
).
From((req.Page - 1) * req.PageSize).
Size(req.PageSize)
// 排序
switch req.SortBy {
case "price_asc":
searchSource.Sort("price", true)
case "price_desc":
searchSource.Sort("price", false)
case "sales":
searchSource.Sort("sales", false)
}
result, err := searchSource.Do(ctx)
if err != nil {
return nil, err
}
return parseSearchResult(result), nil
}
数据同步
MySQL是主数据源,需要把商品数据同步到ES。方案是用Canal监听MySQL的binlog,商品有变更时实时同步到ES:
MySQL binlog → Canal → Kafka → Consumer → ES
全量同步写一个脚本批量导入,增量靠binlog。这样不需要在业务代码里做双写,耦合度低。
小结
接入ES之后搜索体验有质的提升。中文分词+相关度排序让用户能快速找到想要的商品。后续可以做的优化:
- 搜索词纠错
- 搜索热词统计
- 个性化排序(基于用户偏好)
- A/B测试不同的排序策略