@@ -20,8 +20,10 @@ import (
20
20
"code.gitea.io/gitea/modules/git"
21
21
"code.gitea.io/gitea/modules/log"
22
22
"code.gitea.io/gitea/modules/setting"
23
+ "code.gitea.io/gitea/modules/timeutil"
23
24
24
25
"github.com/olivere/elastic/v7"
26
+ "github.com/src-d/enry/v2"
25
27
)
26
28
27
29
var (
@@ -70,22 +72,34 @@ func NewElasticSearchIndexer(url, indexerName string) (*ElasticSearchIndexer, bo
70
72
client : client ,
71
73
indexerName : indexerName ,
72
74
}
73
- success , err := indexer .init ()
75
+ exists , err := indexer .init ()
74
76
75
- return indexer , success , err
77
+ return indexer , exists , err
76
78
}
77
79
78
80
const (
79
81
defaultMapping = `{
80
82
"mappings": {
81
83
"properties": {
82
84
"repo_id": {
83
- "type": "integer ",
85
+ "type": "long ",
84
86
"index": true
85
87
},
86
88
"content": {
87
89
"type": "text",
88
90
"index": true
91
+ },
92
+ "commit_id": {
93
+ "type": "keyword",
94
+ "index": true
95
+ },
96
+ "language": {
97
+ "type": "keyword",
98
+ "index": true
99
+ },
100
+ "updated_at": {
101
+ "type": "long",
102
+ "index": true
89
103
}
90
104
}
91
105
}
@@ -99,21 +113,21 @@ func (b *ElasticSearchIndexer) init() (bool, error) {
99
113
if err != nil {
100
114
return false , err
101
115
}
116
+ if exists {
117
+ return true , nil
118
+ }
102
119
103
- if ! exists {
104
- var mapping = defaultMapping
120
+ var mapping = defaultMapping
105
121
106
- createIndex , err := b .client .CreateIndex (b .indexerName ).BodyString (mapping ).Do (ctx )
107
- if err != nil {
108
- return false , err
109
- }
110
- if ! createIndex .Acknowledged {
111
- return false , errors .New ("init failed" )
112
- }
113
-
114
- return false , nil
122
+ createIndex , err := b .client .CreateIndex (b .indexerName ).BodyString (mapping ).Do (ctx )
123
+ if err != nil {
124
+ return false , err
115
125
}
116
- return true , nil
126
+ if ! createIndex .Acknowledged {
127
+ return false , errors .New ("init failed" )
128
+ }
129
+
130
+ return false , nil
117
131
}
118
132
119
133
func (b * ElasticSearchIndexer ) addUpdate (sha string , update fileUpdate , repo * models.Repository ) ([]elastic.BulkableRequest , error ) {
@@ -148,7 +162,7 @@ func (b *ElasticSearchIndexer) addUpdate(sha string, update fileUpdate, repo *mo
148
162
"content" : string (charset .ToUTF8DropErrors (fileContents )),
149
163
"commit_id" : sha ,
150
164
"language" : analyze .GetCodeLanguage (update .Filename , fileContents ),
151
- "updated_at" : time . Now (). UTC (),
165
+ "updated_at" : timeutil . TimeStampNow (),
152
166
}),
153
167
}, nil
154
168
}
@@ -203,6 +217,73 @@ func (b *ElasticSearchIndexer) Delete(repoID int64) error {
203
217
return err
204
218
}
205
219
220
+ func convertResult (searchResult * elastic.SearchResult , kw string , pageSize int ) (int64 , []* SearchResult , []* SearchResultLanguages , error ) {
221
+ hits := make ([]* SearchResult , 0 , pageSize )
222
+ for _ , hit := range searchResult .Hits .Hits {
223
+ // FIXME: There is no way to get the position the keyword on the content currently on the same request.
224
+ // So we get it from content, this may made the query slower. See
225
+ // https://discuss.elastic.co/t/fetching-position-of-keyword-in-matched-document/94291
226
+ var startIndex , endIndex int = - 1 , - 1
227
+ c , ok := hit .Highlight ["content" ]
228
+ if ok && len (c ) > 0 {
229
+ var subStr = make ([]rune , 0 , len (kw ))
230
+ startIndex = strings .IndexFunc (c [0 ], func (r rune ) bool {
231
+ if len (subStr ) >= len (kw ) {
232
+ subStr = subStr [1 :]
233
+ }
234
+ subStr = append (subStr , r )
235
+ return strings .EqualFold (kw , string (subStr ))
236
+ })
237
+ if startIndex > - 1 {
238
+ endIndex = startIndex + len (kw )
239
+ } else {
240
+ panic (fmt .Sprintf ("1===%#v" , hit .Highlight ))
241
+ }
242
+ } else {
243
+ panic (fmt .Sprintf ("2===%#v" , hit .Highlight ))
244
+ }
245
+
246
+ repoID , fileName := parseIndexerID (hit .Id )
247
+ var res = make (map [string ]interface {})
248
+ if err := json .Unmarshal (hit .Source , & res ); err != nil {
249
+ return 0 , nil , nil , err
250
+ }
251
+
252
+ language := res ["language" ].(string )
253
+
254
+ hits = append (hits , & SearchResult {
255
+ RepoID : repoID ,
256
+ Filename : fileName ,
257
+ CommitID : res ["commit_id" ].(string ),
258
+ Content : res ["content" ].(string ),
259
+ UpdatedUnix : timeutil .TimeStamp (res ["updated_at" ].(float64 )),
260
+ Language : language ,
261
+ StartIndex : startIndex ,
262
+ EndIndex : endIndex ,
263
+ Color : enry .GetColor (language ),
264
+ })
265
+ }
266
+
267
+ return searchResult .TotalHits (), hits , extractAggs (searchResult ), nil
268
+ }
269
+
270
+ func extractAggs (searchResult * elastic.SearchResult ) []* SearchResultLanguages {
271
+ var searchResultLanguages []* SearchResultLanguages
272
+ agg , found := searchResult .Aggregations .Terms ("language" )
273
+ if found {
274
+ searchResultLanguages = make ([]* SearchResultLanguages , 0 , 10 )
275
+
276
+ for _ , bucket := range agg .Buckets {
277
+ searchResultLanguages = append (searchResultLanguages , & SearchResultLanguages {
278
+ Language : bucket .Key .(string ),
279
+ Color : enry .GetColor (bucket .Key .(string )),
280
+ Count : int (bucket .DocCount ),
281
+ })
282
+ }
283
+ }
284
+ return searchResultLanguages
285
+ }
286
+
206
287
// Search searches for codes and language stats by given conditions.
207
288
func (b * ElasticSearchIndexer ) Search (repoIDs []int64 , language , keyword string , page , pageSize int ) (int64 , []* SearchResult , []* SearchResultLanguages , error ) {
208
289
kwQuery := elastic .NewMultiMatchQuery (keyword , "content" )
@@ -216,10 +297,45 @@ func (b *ElasticSearchIndexer) Search(repoIDs []int64, language, keyword string,
216
297
repoQuery := elastic .NewTermsQuery ("repo_id" , repoStrs ... )
217
298
query = query .Must (repoQuery )
218
299
}
219
- start := 0
300
+
301
+ var (
302
+ start int
303
+ kw = "<em>" + keyword + "</em>"
304
+ aggregation = elastic .NewTermsAggregation ().Field ("language" ).Size (10 ).OrderByCountDesc ()
305
+ )
306
+
220
307
if page > 0 {
221
308
start = (page - 1 ) * pageSize
222
309
}
310
+
311
+ if len (language ) == 0 {
312
+ searchResult , err := b .client .Search ().
313
+ Index (b .indexerName ).
314
+ Aggregation ("language" , aggregation ).
315
+ Query (query ).
316
+ Highlight (elastic .NewHighlight ().Field ("content" )).
317
+ Sort ("repo_id" , true ).
318
+ From (start ).Size (pageSize ).
319
+ Do (context .Background ())
320
+ if err != nil {
321
+ return 0 , nil , nil , err
322
+ }
323
+
324
+ return convertResult (searchResult , kw , pageSize )
325
+ }
326
+
327
+ langQuery := elastic .NewMatchQuery ("language" , language )
328
+ countResult , err := b .client .Search ().
329
+ Index (b .indexerName ).
330
+ Aggregation ("language" , aggregation ).
331
+ Query (query ).
332
+ Size (0 ). // We only needs stats information
333
+ Do (context .Background ())
334
+ if err != nil {
335
+ return 0 , nil , nil , err
336
+ }
337
+
338
+ query = query .Must (langQuery )
223
339
searchResult , err := b .client .Search ().
224
340
Index (b .indexerName ).
225
341
Query (query ).
@@ -231,35 +347,9 @@ func (b *ElasticSearchIndexer) Search(repoIDs []int64, language, keyword string,
231
347
return 0 , nil , nil , err
232
348
}
233
349
234
- var kw = "<em>" + keyword + "</em>"
235
-
236
- hits := make ([]* SearchResult , 0 , pageSize )
237
- for _ , hit := range searchResult .Hits .Hits {
238
- var startIndex , endIndex int = - 1 , - 1
239
- c , ok := hit .Highlight ["content" ]
240
- if ok && len (c ) > 0 {
241
- startIndex = strings .Index (c [0 ], kw )
242
- if startIndex > - 1 {
243
- endIndex = startIndex + len (kw )
244
- }
245
- }
246
-
247
- repoID , fileName := parseIndexerID (hit .Id )
248
- var h = SearchResult {
249
- RepoID : repoID ,
250
- StartIndex : startIndex ,
251
- EndIndex : endIndex ,
252
- Filename : fileName ,
253
- }
254
-
255
- if err := json .Unmarshal (hit .Source , & h ); err != nil {
256
- return 0 , nil , nil , err
257
- }
258
-
259
- hits = append (hits , & h )
260
- }
350
+ total , hits , _ , err := convertResult (searchResult , kw , pageSize )
261
351
262
- return searchResult . TotalHits () , hits , nil , nil
352
+ return total , hits , extractAggs ( countResult ), err
263
353
}
264
354
265
355
// Close implements indexer
0 commit comments