Skip to content

Commit eb09fa6

Browse files
committed
Add new fields to elastic search
1 parent 04f2fe8 commit eb09fa6

File tree

5 files changed

+173
-80
lines changed

5 files changed

+173
-80
lines changed

modules/indexer/code/elastic_search.go

Lines changed: 135 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,10 @@ import (
2020
"code.gitea.io/gitea/modules/git"
2121
"code.gitea.io/gitea/modules/log"
2222
"code.gitea.io/gitea/modules/setting"
23+
"code.gitea.io/gitea/modules/timeutil"
2324

2425
"github.com/olivere/elastic/v7"
26+
"github.com/src-d/enry/v2"
2527
)
2628

2729
var (
@@ -70,22 +72,34 @@ func NewElasticSearchIndexer(url, indexerName string) (*ElasticSearchIndexer, bo
7072
client: client,
7173
indexerName: indexerName,
7274
}
73-
success, err := indexer.init()
75+
exists, err := indexer.init()
7476

75-
return indexer, success, err
77+
return indexer, exists, err
7678
}
7779

7880
const (
7981
defaultMapping = `{
8082
"mappings": {
8183
"properties": {
8284
"repo_id": {
83-
"type": "integer",
85+
"type": "long",
8486
"index": true
8587
},
8688
"content": {
8789
"type": "text",
8890
"index": true
91+
},
92+
"commit_id": {
93+
"type": "keyword",
94+
"index": true
95+
},
96+
"language": {
97+
"type": "keyword",
98+
"index": true
99+
},
100+
"updated_at": {
101+
"type": "long",
102+
"index": true
89103
}
90104
}
91105
}
@@ -99,21 +113,21 @@ func (b *ElasticSearchIndexer) init() (bool, error) {
99113
if err != nil {
100114
return false, err
101115
}
116+
if exists {
117+
return true, nil
118+
}
102119

103-
if !exists {
104-
var mapping = defaultMapping
120+
var mapping = defaultMapping
105121

106-
createIndex, err := b.client.CreateIndex(b.indexerName).BodyString(mapping).Do(ctx)
107-
if err != nil {
108-
return false, err
109-
}
110-
if !createIndex.Acknowledged {
111-
return false, errors.New("init failed")
112-
}
113-
114-
return false, nil
122+
createIndex, err := b.client.CreateIndex(b.indexerName).BodyString(mapping).Do(ctx)
123+
if err != nil {
124+
return false, err
115125
}
116-
return true, nil
126+
if !createIndex.Acknowledged {
127+
return false, errors.New("init failed")
128+
}
129+
130+
return false, nil
117131
}
118132

119133
func (b *ElasticSearchIndexer) addUpdate(sha string, update fileUpdate, repo *models.Repository) ([]elastic.BulkableRequest, error) {
@@ -148,7 +162,7 @@ func (b *ElasticSearchIndexer) addUpdate(sha string, update fileUpdate, repo *mo
148162
"content": string(charset.ToUTF8DropErrors(fileContents)),
149163
"commit_id": sha,
150164
"language": analyze.GetCodeLanguage(update.Filename, fileContents),
151-
"updated_at": time.Now().UTC(),
165+
"updated_at": timeutil.TimeStampNow(),
152166
}),
153167
}, nil
154168
}
@@ -203,6 +217,73 @@ func (b *ElasticSearchIndexer) Delete(repoID int64) error {
203217
return err
204218
}
205219

220+
func convertResult(searchResult *elastic.SearchResult, kw string, pageSize int) (int64, []*SearchResult, []*SearchResultLanguages, error) {
221+
hits := make([]*SearchResult, 0, pageSize)
222+
for _, hit := range searchResult.Hits.Hits {
223+
// FIXME: There is no way to get the position the keyword on the content currently on the same request.
224+
// So we get it from content, this may made the query slower. See
225+
// https://discuss.elastic.co/t/fetching-position-of-keyword-in-matched-document/94291
226+
var startIndex, endIndex int = -1, -1
227+
c, ok := hit.Highlight["content"]
228+
if ok && len(c) > 0 {
229+
var subStr = make([]rune, 0, len(kw))
230+
startIndex = strings.IndexFunc(c[0], func(r rune) bool {
231+
if len(subStr) >= len(kw) {
232+
subStr = subStr[1:]
233+
}
234+
subStr = append(subStr, r)
235+
return strings.EqualFold(kw, string(subStr))
236+
})
237+
if startIndex > -1 {
238+
endIndex = startIndex + len(kw)
239+
} else {
240+
panic(fmt.Sprintf("1===%#v", hit.Highlight))
241+
}
242+
} else {
243+
panic(fmt.Sprintf("2===%#v", hit.Highlight))
244+
}
245+
246+
repoID, fileName := parseIndexerID(hit.Id)
247+
var res = make(map[string]interface{})
248+
if err := json.Unmarshal(hit.Source, &res); err != nil {
249+
return 0, nil, nil, err
250+
}
251+
252+
language := res["language"].(string)
253+
254+
hits = append(hits, &SearchResult{
255+
RepoID: repoID,
256+
Filename: fileName,
257+
CommitID: res["commit_id"].(string),
258+
Content: res["content"].(string),
259+
UpdatedUnix: timeutil.TimeStamp(res["updated_at"].(float64)),
260+
Language: language,
261+
StartIndex: startIndex,
262+
EndIndex: endIndex,
263+
Color: enry.GetColor(language),
264+
})
265+
}
266+
267+
return searchResult.TotalHits(), hits, extractAggs(searchResult), nil
268+
}
269+
270+
func extractAggs(searchResult *elastic.SearchResult) []*SearchResultLanguages {
271+
var searchResultLanguages []*SearchResultLanguages
272+
agg, found := searchResult.Aggregations.Terms("language")
273+
if found {
274+
searchResultLanguages = make([]*SearchResultLanguages, 0, 10)
275+
276+
for _, bucket := range agg.Buckets {
277+
searchResultLanguages = append(searchResultLanguages, &SearchResultLanguages{
278+
Language: bucket.Key.(string),
279+
Color: enry.GetColor(bucket.Key.(string)),
280+
Count: int(bucket.DocCount),
281+
})
282+
}
283+
}
284+
return searchResultLanguages
285+
}
286+
206287
// Search searches for codes and language stats by given conditions.
207288
func (b *ElasticSearchIndexer) Search(repoIDs []int64, language, keyword string, page, pageSize int) (int64, []*SearchResult, []*SearchResultLanguages, error) {
208289
kwQuery := elastic.NewMultiMatchQuery(keyword, "content")
@@ -216,10 +297,45 @@ func (b *ElasticSearchIndexer) Search(repoIDs []int64, language, keyword string,
216297
repoQuery := elastic.NewTermsQuery("repo_id", repoStrs...)
217298
query = query.Must(repoQuery)
218299
}
219-
start := 0
300+
301+
var (
302+
start int
303+
kw = "<em>" + keyword + "</em>"
304+
aggregation = elastic.NewTermsAggregation().Field("language").Size(10).OrderByCountDesc()
305+
)
306+
220307
if page > 0 {
221308
start = (page - 1) * pageSize
222309
}
310+
311+
if len(language) == 0 {
312+
searchResult, err := b.client.Search().
313+
Index(b.indexerName).
314+
Aggregation("language", aggregation).
315+
Query(query).
316+
Highlight(elastic.NewHighlight().Field("content")).
317+
Sort("repo_id", true).
318+
From(start).Size(pageSize).
319+
Do(context.Background())
320+
if err != nil {
321+
return 0, nil, nil, err
322+
}
323+
324+
return convertResult(searchResult, kw, pageSize)
325+
}
326+
327+
langQuery := elastic.NewMatchQuery("language", language)
328+
countResult, err := b.client.Search().
329+
Index(b.indexerName).
330+
Aggregation("language", aggregation).
331+
Query(query).
332+
Size(0). // We only needs stats information
333+
Do(context.Background())
334+
if err != nil {
335+
return 0, nil, nil, err
336+
}
337+
338+
query = query.Must(langQuery)
223339
searchResult, err := b.client.Search().
224340
Index(b.indexerName).
225341
Query(query).
@@ -231,35 +347,9 @@ func (b *ElasticSearchIndexer) Search(repoIDs []int64, language, keyword string,
231347
return 0, nil, nil, err
232348
}
233349

234-
var kw = "<em>" + keyword + "</em>"
235-
236-
hits := make([]*SearchResult, 0, pageSize)
237-
for _, hit := range searchResult.Hits.Hits {
238-
var startIndex, endIndex int = -1, -1
239-
c, ok := hit.Highlight["content"]
240-
if ok && len(c) > 0 {
241-
startIndex = strings.Index(c[0], kw)
242-
if startIndex > -1 {
243-
endIndex = startIndex + len(kw)
244-
}
245-
}
246-
247-
repoID, fileName := parseIndexerID(hit.Id)
248-
var h = SearchResult{
249-
RepoID: repoID,
250-
StartIndex: startIndex,
251-
EndIndex: endIndex,
252-
Filename: fileName,
253-
}
254-
255-
if err := json.Unmarshal(hit.Source, &h); err != nil {
256-
return 0, nil, nil, err
257-
}
258-
259-
hits = append(hits, &h)
260-
}
350+
total, hits, _, err := convertResult(searchResult, kw, pageSize)
261351

262-
return searchResult.TotalHits(), hits, nil, nil
352+
return total, hits, extractAggs(countResult), err
263353
}
264354

265355
// Close implements indexer

modules/indexer/code/indexer.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ func Init() {
107107
cancel()
108108
indexer.Close()
109109
close(waitChannel)
110-
log.Fatal("PID: %d Unable to initialize the Repository Indexer at path: %s Error: %v", os.Getpid(), setting.Indexer.RepoPath, err)
110+
log.Fatal("PID: %d Unable to initialize the bleve Repository Indexer at path: %s Error: %v", os.Getpid(), setting.Indexer.RepoPath, err)
111111
}
112112
populate = created
113113
indexer.set(bleveIndexer)
@@ -121,17 +121,17 @@ func Init() {
121121
}
122122
}()
123123

124-
esIndexer, created, err := NewElasticSearchIndexer(setting.Indexer.RepoConnStr, setting.Indexer.RepoIndexerName)
124+
esIndexer, exists, err := NewElasticSearchIndexer(setting.Indexer.RepoConnStr, setting.Indexer.RepoIndexerName)
125125
if err != nil {
126126
if esIndexer != nil {
127127
esIndexer.Close()
128128
}
129129
cancel()
130130
indexer.Close()
131131
close(waitChannel)
132-
log.Fatal("PID: %d Unable to initialize the Repository Indexer at path: %s Error: %v", os.Getpid(), setting.Indexer.RepoPath, err)
132+
log.Fatal("PID: %d Unable to initialize the elasticsearch Repository Indexer connstr: %s Error: %v", os.Getpid(), setting.Indexer.RepoConnStr, err)
133133
}
134-
populate = created
134+
populate = !exists
135135
indexer.set(esIndexer)
136136
default:
137137
log.Fatal("PID: %d Unknow Indexer type: %s", os.Getpid(), setting.Indexer.RepoType)

modules/indexer/code/indexer_test.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,9 +63,10 @@ func testIndexer(name string, t *testing.T, indexer Indexer) {
6363

6464
for _, kw := range keywords {
6565
t.Run(kw.Keyword, func(t *testing.T) {
66-
total, res, err := indexer.Search(kw.RepoIDs, "", kw.Keyword, 1, 10)
66+
total, res, langs, err := indexer.Search(kw.RepoIDs, "", kw.Keyword, 1, 10)
6767
assert.NoError(t, err)
6868
assert.EqualValues(t, len(kw.IDs), total)
69+
assert.EqualValues(t, kw.Langs, len(langs))
6970

7071
var ids = make([]int64, 0, len(res))
7172
for _, hit := range res {

modules/setting/indexer.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ func newIndexerService() {
8686
if !filepath.IsAbs(Indexer.RepoPath) {
8787
Indexer.RepoPath = path.Join(AppWorkPath, Indexer.RepoPath)
8888
}
89-
Indexer.RepoConnStr = sec.Key("REPO_INDEXER_CONNSTR").MustString("")
89+
Indexer.RepoConnStr = sec.Key("REPO_INDEXER_CONN_STR").MustString("")
9090
Indexer.RepoIndexerName = sec.Key("REPO_INDEXER_NAME").MustString("gitea_codes")
9191

9292
Indexer.IncludePatterns = IndexerGlobFromString(sec.Key("REPO_INDEXER_INCLUDE").MustString(""))

templates/explore/code.tmpl

Lines changed: 31 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -28,37 +28,39 @@
2828
<div class="repository search">
2929
{{range $result := .SearchResults}}
3030
{{$repo := (index $.RepoMaps .RepoID)}}
31-
<div class="diff-file-box diff-box file-content non-diff-file-content repo-search-result">
32-
<h4 class="ui top attached normal header">
33-
<span class="file"><a rel="nofollow" href="{{EscapePound $repo.HTMLURL}}">{{$repo.FullName}}</a> - {{.Filename}}</span>
34-
<a class="ui basic grey tiny button" rel="nofollow" href="{{EscapePound $repo.HTMLURL}}/src/commit/{{$result.CommitID}}/{{EscapePound .Filename}}">{{$.i18n.Tr "repo.diff.view_file"}}</a>
35-
</h4>
36-
<div class="ui attached table segment">
37-
<div class="file-body file-code code-view">
38-
<table>
39-
<tbody>
40-
<tr>
41-
<td class="lines-num">
42-
{{range .LineNumbers}}
43-
<a href="{{EscapePound $repo.HTMLURL}}/src/commit/{{$result.CommitID}}/{{EscapePound $result.Filename}}#L{{.}}"><span>{{.}}</span></a>
44-
{{end}}
45-
</td>
46-
<td class="lines-code"><pre><code class="{{.HighlightClass}}"><ol class="linenums">{{.FormattedLines}}</ol></code></pre></td>
47-
</tr>
48-
</tbody>
49-
</table>
31+
{{if $repo}}
32+
<div class="diff-file-box diff-box file-content non-diff-file-content repo-search-result">
33+
<h4 class="ui top attached normal header">
34+
<span class="file"><a rel="nofollow" href="{{EscapePound $repo.HTMLURL}}">{{$repo.FullName}}</a> - {{.Filename}}</span>
35+
<a class="ui basic grey tiny button" rel="nofollow" href="{{EscapePound $repo.HTMLURL}}/src/commit/{{$result.CommitID}}/{{EscapePound .Filename}}">{{$.i18n.Tr "repo.diff.view_file"}}</a>
36+
</h4>
37+
<div class="ui attached table segment">
38+
<div class="file-body file-code code-view">
39+
<table>
40+
<tbody>
41+
<tr>
42+
<td class="lines-num">
43+
{{range .LineNumbers}}
44+
<a href="{{EscapePound $repo.HTMLURL}}/src/commit/{{$result.CommitID}}/{{EscapePound $result.Filename}}#L{{.}}"><span>{{.}}</span></a>
45+
{{end}}
46+
</td>
47+
<td class="lines-code"><pre><code class="{{.HighlightClass}}"><ol class="linenums">{{.FormattedLines}}</ol></code></pre></td>
48+
</tr>
49+
</tbody>
50+
</table>
51+
</div>
52+
</div>
53+
<div class="ui bottom attached table segment">
54+
{{if $result.Language}}
55+
<i class="color-icon" style="background-color: {{$result.Color}}"></i>{{$result.Language}}
56+
{{end}}
57+
&nbsp;
58+
{{if not $result.UpdatedUnix.IsZero}}
59+
<span class="ui small grey text pull right">{{$.i18n.Tr "explore.code_last_indexed_at" (TimeSinceUnix $result.UpdatedUnix $.i18n.Lang) | Safe}} &nbsp;</span>
60+
{{end}}
5061
</div>
5162
</div>
52-
<div class="ui bottom attached table segment">
53-
{{if $result.Language}}
54-
<i class="color-icon" style="background-color: {{$result.Color}}"></i>{{$result.Language}}
55-
{{end}}
56-
&nbsp;
57-
{{if not $result.UpdatedUnix.IsZero}}
58-
<span class="ui small grey text pull right">{{$.i18n.Tr "explore.code_last_indexed_at" (TimeSinceUnix $result.UpdatedUnix $.i18n.Lang) | Safe}} &nbsp;</span>
59-
{{end}}
60-
</div>
61-
</div>
63+
{{end}}
6264
{{end}}
6365
</div>
6466
{{else}}

0 commit comments

Comments
 (0)