Skip to content

Commit 1eb0c92

Browse files
committed
Support elastic search for code search
1 parent f422a11 commit 1eb0c92

File tree

6 files changed

+404
-102
lines changed

6 files changed

+404
-102
lines changed

modules/indexer/code/bleve.go

Lines changed: 51 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -57,10 +57,10 @@ func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error {
5757
})
5858
}
5959

60-
// openIndexer open the index at the specified path, checking for metadata
60+
// openBleveIndexer open the index at the specified path, checking for metadata
6161
// updates and bleve version updates. If index needs to be created (or
6262
// re-created), returns (nil, nil)
63-
func openIndexer(path string, latestVersion int) (bleve.Index, error) {
63+
func openBleveIndexer(path string, latestVersion int) (bleve.Index, error) {
6464
_, err := os.Stat(path)
6565
if err != nil && os.IsNotExist(err) {
6666
return nil, nil
@@ -103,54 +103,14 @@ func (d *RepoIndexerData) Type() string {
103103
return repoIndexerDocType
104104
}
105105

106-
func addUpdate(commitSha string, update fileUpdate, repo *models.Repository, batch rupture.FlushingBatch) error {
107-
// Ignore vendored files in code search
108-
if setting.Indexer.ExcludeVendored && enry.IsVendor(update.Filename) {
109-
return nil
110-
}
111-
stdout, err := git.NewCommand("cat-file", "-s", update.BlobSha).
112-
RunInDir(repo.RepoPath())
113-
if err != nil {
114-
return err
115-
}
116-
if size, err := strconv.Atoi(strings.TrimSpace(stdout)); err != nil {
117-
return fmt.Errorf("Misformatted git cat-file output: %v", err)
118-
} else if int64(size) > setting.Indexer.MaxIndexerFileSize {
119-
return addDelete(update.Filename, repo, batch)
120-
}
121-
122-
fileContents, err := git.NewCommand("cat-file", "blob", update.BlobSha).
123-
RunInDirBytes(repo.RepoPath())
124-
if err != nil {
125-
return err
126-
} else if !base.IsTextFile(fileContents) {
127-
// FIXME: UTF-16 files will probably fail here
128-
return nil
129-
}
130-
131-
id := filenameIndexerID(repo.ID, update.Filename)
132-
return batch.Index(id, &RepoIndexerData{
133-
RepoID: repo.ID,
134-
CommitID: commitSha,
135-
Content: string(charset.ToUTF8DropErrors(fileContents)),
136-
Language: analyze.GetCodeLanguage(update.Filename, fileContents),
137-
UpdatedAt: time.Now().UTC(),
138-
})
139-
}
140-
141-
func addDelete(filename string, repo *models.Repository, batch rupture.FlushingBatch) error {
142-
id := filenameIndexerID(repo.ID, filename)
143-
return batch.Delete(id)
144-
}
145-
146106
const (
147107
repoIndexerAnalyzer = "repoIndexerAnalyzer"
148108
repoIndexerDocType = "repoIndexerDocType"
149109
repoIndexerLatestVersion = 5
150110
)
151111

152-
// createRepoIndexer create a repo indexer if one does not already exist
153-
func createRepoIndexer(path string, latestVersion int) (bleve.Index, error) {
112+
// createBleveIndexer create a bleve repo indexer if one does not already exist
113+
func createBleveIndexer(path string, latestVersion int) (bleve.Index, error) {
154114
docMapping := bleve.NewDocumentMapping()
155115
numericFieldMapping := bleve.NewNumericFieldMapping()
156116
numericFieldMapping.IncludeInAll = false
@@ -198,18 +158,6 @@ func createRepoIndexer(path string, latestVersion int) (bleve.Index, error) {
198158
return indexer, nil
199159
}
200160

201-
func filenameIndexerID(repoID int64, filename string) string {
202-
return indexerID(repoID) + "_" + filename
203-
}
204-
205-
func filenameOfIndexerID(indexerID string) string {
206-
index := strings.IndexByte(indexerID, '_')
207-
if index == -1 {
208-
log.Error("Unexpected ID in repo indexer: %s", indexerID)
209-
}
210-
return indexerID[index+1:]
211-
}
212-
213161
var (
214162
_ Indexer = &BleveIndexer{}
215163
)
@@ -229,18 +177,59 @@ func NewBleveIndexer(indexDir string) (*BleveIndexer, bool, error) {
229177
return indexer, created, err
230178
}
231179

180+
func (b *BleveIndexer) addUpdate(commitSha string, update fileUpdate, repo *models.Repository, batch rupture.FlushingBatch) error {
181+
// Ignore vendored files in code search
182+
if setting.Indexer.ExcludeVendored && enry.IsVendor(update.Filename) {
183+
return nil
184+
}
185+
186+
stdout, err := git.NewCommand("cat-file", "-s", update.BlobSha).
187+
RunInDir(repo.RepoPath())
188+
if err != nil {
189+
return err
190+
}
191+
if size, err := strconv.Atoi(strings.TrimSpace(stdout)); err != nil {
192+
return fmt.Errorf("Misformatted git cat-file output: %v", err)
193+
} else if int64(size) > setting.Indexer.MaxIndexerFileSize {
194+
return b.addDelete(update.Filename, repo, batch)
195+
}
196+
197+
fileContents, err := git.NewCommand("cat-file", "blob", update.BlobSha).
198+
RunInDirBytes(repo.RepoPath())
199+
if err != nil {
200+
return err
201+
} else if !base.IsTextFile(fileContents) {
202+
// FIXME: UTF-16 files will probably fail here
203+
return nil
204+
}
205+
206+
id := filenameIndexerID(repo.ID, update.Filename)
207+
return batch.Index(id, &RepoIndexerData{
208+
RepoID: repo.ID,
209+
CommitID: commitSha,
210+
Content: string(charset.ToUTF8DropErrors(fileContents)),
211+
Language: analyze.GetCodeLanguage(update.Filename, fileContents),
212+
UpdatedAt: time.Now().UTC(),
213+
})
214+
}
215+
216+
func (b *BleveIndexer) addDelete(filename string, repo *models.Repository, batch rupture.FlushingBatch) error {
217+
id := filenameIndexerID(repo.ID, filename)
218+
return batch.Delete(id)
219+
}
220+
232221
// init init the indexer
233222
func (b *BleveIndexer) init() (bool, error) {
234223
var err error
235-
b.indexer, err = openIndexer(b.indexDir, repoIndexerLatestVersion)
224+
b.indexer, err = openBleveIndexer(b.indexDir, repoIndexerLatestVersion)
236225
if err != nil {
237226
return false, err
238227
}
239228
if b.indexer != nil {
240229
return false, nil
241230
}
242231

243-
b.indexer, err = createRepoIndexer(b.indexDir, repoIndexerLatestVersion)
232+
b.indexer, err = createBleveIndexer(b.indexDir, repoIndexerLatestVersion)
244233
if err != nil {
245234
return false, err
246235
}
@@ -261,38 +250,19 @@ func (b *BleveIndexer) Close() {
261250
}
262251

263252
// Index indexes the data
264-
func (b *BleveIndexer) Index(repoID int64) error {
265-
repo, err := models.GetRepositoryByID(repoID)
266-
if err != nil {
267-
return err
268-
}
269-
270-
sha, err := getDefaultBranchSha(repo)
271-
if err != nil {
272-
return err
273-
}
274-
changes, err := getRepoChanges(repo, sha)
275-
if err != nil {
276-
return err
277-
} else if changes == nil {
278-
return nil
279-
}
280-
253+
func (b *BleveIndexer) Index(repo *models.Repository, sha string, changes *repoChanges) error {
281254
batch := rupture.NewFlushingBatch(b.indexer, maxBatchSize)
282255
for _, update := range changes.Updates {
283-
if err := addUpdate(sha, update, repo, batch); err != nil {
256+
if err := b.addUpdate(sha, update, repo, batch); err != nil {
284257
return err
285258
}
286259
}
287260
for _, filename := range changes.RemovedFilenames {
288-
if err := addDelete(filename, repo, batch); err != nil {
261+
if err := b.addDelete(filename, repo, batch); err != nil {
289262
return err
290263
}
291264
}
292-
if err = batch.Flush(); err != nil {
293-
return err
294-
}
295-
return repo.UpdateIndexerStatus(models.RepoIndexerTypeCode, sha)
265+
return batch.Flush()
296266
}
297267

298268
// Delete deletes indexes by ids

0 commit comments

Comments
 (0)