Skip to content

Commit a185c4f

Browse files
committed
Finished elastic search implementation and add some tests
1 parent 1eb0c92 commit a185c4f

File tree

6 files changed

+185
-100
lines changed

6 files changed

+185
-100
lines changed

modules/indexer/code/bleve_test.go

Lines changed: 3 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -7,20 +7,14 @@ package code
77
import (
88
"io/ioutil"
99
"os"
10-
"path/filepath"
1110
"testing"
1211

1312
"code.gitea.io/gitea/models"
14-
"code.gitea.io/gitea/modules/setting"
1513

1614
"github.com/stretchr/testify/assert"
1715
)
1816

19-
func TestMain(m *testing.M) {
20-
models.MainTest(m, filepath.Join("..", "..", ".."))
21-
}
22-
23-
func TestIndexAndSearch(t *testing.T) {
17+
func TestBleveIndexAndSearch(t *testing.T) {
2418
models.PrepareTestEnv(t)
2519

2620
dir, err := ioutil.TempDir("", "bleve.index")
@@ -31,56 +25,15 @@ func TestIndexAndSearch(t *testing.T) {
3125
}
3226
defer os.RemoveAll(dir)
3327

34-
setting.Indexer.RepoIndexerEnabled = true
3528
idx, _, err := NewBleveIndexer(dir)
3629
if err != nil {
37-
assert.Fail(t, "Unable to create indexer Error: %v", err)
30+
assert.Fail(t, "Unable to create bleve indexer Error: %v", err)
3831
if idx != nil {
3932
idx.Close()
4033
}
4134
return
4235
}
4336
defer idx.Close()
4437

45-
err = idx.Index(1)
46-
assert.NoError(t, err)
47-
48-
var (
49-
keywords = []struct {
50-
Keyword string
51-
IDs []int64
52-
Langs int
53-
}{
54-
{
55-
Keyword: "Description",
56-
IDs: []int64{1},
57-
Langs: 1,
58-
},
59-
{
60-
Keyword: "repo1",
61-
IDs: []int64{1},
62-
Langs: 1,
63-
},
64-
{
65-
Keyword: "non-exist",
66-
IDs: []int64{},
67-
Langs: 0,
68-
},
69-
}
70-
)
71-
72-
for _, kw := range keywords {
73-
total, res, langs, err := idx.Search(nil, "", kw.Keyword, 1, 10)
74-
assert.NoError(t, err)
75-
assert.EqualValues(t, len(kw.IDs), total)
76-
77-
assert.NotNil(t, langs)
78-
assert.Len(t, langs, kw.Langs)
79-
80-
var ids = make([]int64, 0, len(res))
81-
for _, hit := range res {
82-
ids = append(ids, hit.RepoID)
83-
}
84-
assert.EqualValues(t, kw.IDs, ids)
85-
}
38+
testIndexer("beleve", t, idx)
8639
}

modules/indexer/code/elastic_search.go

Lines changed: 60 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ package code
66

77
import (
88
"context"
9+
"encoding/json"
910
"errors"
1011
"fmt"
1112
"strconv"
@@ -85,7 +86,7 @@ const (
8586
"content": {
8687
"type": "text",
8788
"index": true
88-
},
89+
}
8990
}
9091
}
9192
}`
@@ -115,65 +116,73 @@ func (b *ElasticSearchIndexer) init() (bool, error) {
115116
return true, nil
116117
}
117118

118-
func (b *ElasticSearchIndexer) addUpdate(sha string, update fileUpdate, repo *models.Repository, reqs []elastic.BulkableRequest) error {
119+
func (b *ElasticSearchIndexer) addUpdate(sha string, update fileUpdate, repo *models.Repository) ([]elastic.BulkableRequest, error) {
119120
stdout, err := git.NewCommand("cat-file", "-s", update.BlobSha).
120121
RunInDir(repo.RepoPath())
121122
if err != nil {
122-
return err
123+
return nil, err
123124
}
124125
if size, err := strconv.Atoi(strings.TrimSpace(stdout)); err != nil {
125-
return fmt.Errorf("Misformatted git cat-file output: %v", err)
126+
return nil, fmt.Errorf("Misformatted git cat-file output: %v", err)
126127
} else if int64(size) > setting.Indexer.MaxIndexerFileSize {
127-
return b.addDelete(update.Filename, repo, reqs)
128+
return b.addDelete(update.Filename, repo)
128129
}
129130

130131
fileContents, err := git.NewCommand("cat-file", "blob", update.BlobSha).
131132
RunInDirBytes(repo.RepoPath())
132133
if err != nil {
133-
return err
134+
return nil, err
134135
} else if !base.IsTextFile(fileContents) {
135136
// FIXME: UTF-16 files will probably fail here
136-
return nil
137+
return nil, nil
137138
}
138139

139140
id := filenameIndexerID(repo.ID, update.Filename)
140141

141-
reqs = append(reqs, elastic.NewBulkIndexRequest().
142-
Index(b.indexerName).
143-
Id(id).
144-
Doc(map[string]interface{}{
145-
"repo_id": repo.ID,
146-
"content": string(charset.ToUTF8DropErrors(fileContents)),
147-
"commit_id": sha,
148-
"language": analyze.GetCodeLanguage(update.Filename, fileContents),
149-
"updated_at": time.Now().UTC(),
150-
}))
151-
152-
return nil
142+
return []elastic.BulkableRequest{
143+
elastic.NewBulkIndexRequest().
144+
Index(b.indexerName).
145+
Id(id).
146+
Doc(map[string]interface{}{
147+
"repo_id": repo.ID,
148+
"content": string(charset.ToUTF8DropErrors(fileContents)),
149+
"commit_id": sha,
150+
"language": analyze.GetCodeLanguage(update.Filename, fileContents),
151+
"updated_at": time.Now().UTC(),
152+
}),
153+
}, nil
153154
}
154155

155-
func (b *ElasticSearchIndexer) addDelete(filename string, repo *models.Repository, reqs []elastic.BulkableRequest) error {
156+
func (b *ElasticSearchIndexer) addDelete(filename string, repo *models.Repository) ([]elastic.BulkableRequest, error) {
156157
id := filenameIndexerID(repo.ID, filename)
157-
reqs = append(reqs,
158+
return []elastic.BulkableRequest{
158159
elastic.NewBulkDeleteRequest().
159160
Index(b.indexerName).
160161
Id(id),
161-
)
162-
return nil
162+
}, nil
163163
}
164164

165165
// Index will save the index data
166166
func (b *ElasticSearchIndexer) Index(repo *models.Repository, sha string, changes *repoChanges) error {
167167
reqs := make([]elastic.BulkableRequest, 0)
168168
for _, update := range changes.Updates {
169-
if err := b.addUpdate(sha, update, repo, reqs); err != nil {
169+
updateReqs, err := b.addUpdate(sha, update, repo)
170+
if err != nil {
170171
return err
171172
}
173+
if len(updateReqs) > 0 {
174+
reqs = append(reqs, updateReqs...)
175+
}
172176
}
177+
173178
for _, filename := range changes.RemovedFilenames {
174-
if err := b.addDelete(filename, repo, reqs); err != nil {
179+
delReqs, err := b.addDelete(filename, repo)
180+
if err != nil {
175181
return err
176182
}
183+
if len(delReqs) > 0 {
184+
reqs = append(reqs, delReqs...)
185+
}
177186
}
178187

179188
if len(reqs) > 0 {
@@ -188,15 +197,13 @@ func (b *ElasticSearchIndexer) Index(repo *models.Repository, sha string, change
188197

189198
// Delete deletes indexes by ids
190199
func (b *ElasticSearchIndexer) Delete(repoID int64) error {
191-
_, err := b.client.Delete().
192-
Index(b.indexerName).
200+
_, err := b.client.DeleteByQuery(b.indexerName).
193201
Query(elastic.NewTermsQuery("repo_id", repoID)).
194202
Do(context.Background())
195203
return err
196204
}
197205

198-
// Search searches for issues by given conditions.
199-
// Returns the matching issue IDs
206+
// Search searches for codes and language stats by given conditions.
200207
func (b *ElasticSearchIndexer) Search(repoIDs []int64, language, keyword string, page, pageSize int) (int64, []*SearchResult, []*SearchResultLanguages, error) {
201208
kwQuery := elastic.NewMultiMatchQuery(keyword, "content")
202209
query := elastic.NewBoolQuery()
@@ -209,38 +216,47 @@ func (b *ElasticSearchIndexer) Search(repoIDs []int64, language, keyword string,
209216
repoQuery := elastic.NewTermsQuery("repo_id", repoStrs...)
210217
query = query.Must(repoQuery)
211218
}
219+
start := 0
220+
if page > 0 {
221+
start = (page - 1) * pageSize
222+
}
212223
searchResult, err := b.client.Search().
213224
Index(b.indexerName).
214225
Query(query).
215-
Sort("id", true).
216-
From(page * pageSize).Size(pageSize).
226+
Highlight(elastic.NewHighlight().Field("content")).
227+
Sort("repo_id", true).
228+
From(start).Size(pageSize).
217229
Do(context.Background())
218230
if err != nil {
219231
return 0, nil, nil, err
220232
}
221233

234+
var kw = "<em>" + keyword + "</em>"
235+
222236
hits := make([]*SearchResult, 0, pageSize)
223237
for _, hit := range searchResult.Hits.Hits {
224238
var startIndex, endIndex int = -1, -1
225-
/*for _, locations := range hit.Fields["Content"] {
226-
location := locations[0]
227-
locationStart := int(location.Start)
228-
locationEnd := int(location.End)
229-
if startIndex < 0 || locationStart < startIndex {
230-
startIndex = locationStart
231-
}
232-
if endIndex < 0 || locationEnd > endIndex {
233-
endIndex = locationEnd
239+
c, ok := hit.Highlight["content"]
240+
if ok && len(c) > 0 {
241+
startIndex = strings.Index(c[0], kw)
242+
if startIndex > -1 {
243+
endIndex = startIndex + len(kw)
234244
}
235-
}*/
245+
}
246+
236247
repoID, fileName := parseIndexerID(hit.Id)
237-
hits = append(hits, &SearchResult{
248+
var h = SearchResult{
238249
RepoID: repoID,
239250
StartIndex: startIndex,
240251
EndIndex: endIndex,
241252
Filename: fileName,
242-
Content: hit.Fields["content"].(string),
243-
})
253+
}
254+
255+
if err := json.Unmarshal(hit.Source, &h); err != nil {
256+
return 0, nil, nil, err
257+
}
258+
259+
hits = append(hits, &h)
244260
}
245261

246262
return searchResult.TotalHits(), hits, nil, nil
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
// Copyright 2020 The Gitea Authors. All rights reserved.
2+
// Use of this source code is governed by a MIT-style
3+
// license that can be found in the LICENSE file.
4+
5+
package code
6+
7+
import (
8+
"os"
9+
"testing"
10+
11+
"code.gitea.io/gitea/models"
12+
13+
"github.com/stretchr/testify/assert"
14+
)
15+
16+
func TestESIndexAndSearch(t *testing.T) {
17+
models.PrepareTestEnv(t)
18+
19+
u := os.Getenv("TEST_INDEXER_CODE_ES_URL")
20+
if u == "" {
21+
t.SkipNow()
22+
return
23+
}
24+
25+
indexer, _, err := NewElasticSearchIndexer(u, "gitea_codes")
26+
if err != nil {
27+
assert.Fail(t, "Unable to create ES indexer Error: %v", err)
28+
if indexer != nil {
29+
indexer.Close()
30+
}
31+
return
32+
}
33+
defer indexer.Close()
34+
35+
testIndexer("elastic_search", t, indexer)
36+
}

modules/indexer/code/indexer_test.go

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
// Copyright 2020 The Gitea Authors. All rights reserved.
2+
// Use of this source code is governed by a MIT-style
3+
// license that can be found in the LICENSE file.
4+
5+
package code
6+
7+
import (
8+
"path/filepath"
9+
"testing"
10+
11+
"code.gitea.io/gitea/models"
12+
"github.com/stretchr/testify/assert"
13+
)
14+
15+
func TestMain(m *testing.M) {
16+
models.MainTest(m, filepath.Join("..", "..", ".."))
17+
}
18+
19+
func testIndexer(name string, t *testing.T, indexer Indexer) {
20+
t.Run(name, func(t *testing.T) {
21+
var repoID int64 = 1
22+
err := index(indexer, repoID)
23+
assert.NoError(t, err)
24+
var (
25+
keywords = []struct {
26+
RepoIDs []int64
27+
Keyword string
28+
IDs []int64
29+
Langs int
30+
}{
31+
{
32+
RepoIDs: nil,
33+
Keyword: "Description",
34+
IDs: []int64{repoID},
35+
Langs: 1,
36+
},
37+
{
38+
RepoIDs: []int64{2},
39+
Keyword: "Description",
40+
IDs: []int64{},
41+
Langs: 0,
42+
},
43+
{
44+
RepoIDs: nil,
45+
Keyword: "repo1",
46+
IDs: []int64{repoID},
47+
Langs: 1,
48+
},
49+
{
50+
RepoIDs: []int64{2},
51+
Keyword: "repo1",
52+
IDs: []int64{},
53+
Langs: 0,
54+
},
55+
{
56+
RepoIDs: nil,
57+
Keyword: "non-exist",
58+
IDs: []int64{},
59+
Langs: 0,
60+
},
61+
}
62+
)
63+
64+
for _, kw := range keywords {
65+
t.Run(kw.Keyword, func(t *testing.T) {
66+
total, res, err := indexer.Search(kw.RepoIDs, "", kw.Keyword, 1, 10)
67+
assert.NoError(t, err)
68+
assert.EqualValues(t, len(kw.IDs), total)
69+
70+
var ids = make([]int64, 0, len(res))
71+
for _, hit := range res {
72+
ids = append(ids, hit.RepoID)
73+
assert.EqualValues(t, "# repo1\n\nDescription for repo1", hit.Content)
74+
}
75+
assert.EqualValues(t, kw.IDs, ids)
76+
77+
})
78+
}
79+
80+
//indexer.Delete(repoID)
81+
})
82+
}

0 commit comments

Comments
 (0)