From 3e35c90aa34b520c523f42288a666d9c6fbd1381 Mon Sep 17 00:00:00 2001 From: "m.huber" Date: Mon, 4 Mar 2024 16:47:46 +0100 Subject: [PATCH 1/3] Add config option to enable/disable default fuzzy search explizite enable or disable it should be an UI option --- custom/conf/app.example.ini | 4 ++++ docs/content/administration/config-cheat-sheet.en-us.md | 1 + modules/setting/indexer.go | 7 +++++++ 3 files changed, 12 insertions(+) diff --git a/custom/conf/app.example.ini b/custom/conf/app.example.ini index 17d6cd3a35e02..1a2e99d841ee7 100644 --- a/custom/conf/app.example.ini +++ b/custom/conf/app.example.ini @@ -1380,6 +1380,10 @@ LEVEL = Info ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; +;; If the indexer type supports fuzzy search use it by default. Is used by issue- and repo-indexer alike. +;INDEXER_DEFAULT_FUZZY = true +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Issue Indexer settings ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; diff --git a/docs/content/administration/config-cheat-sheet.en-us.md b/docs/content/administration/config-cheat-sheet.en-us.md index 8a01711949a30..0019ec144552a 100644 --- a/docs/content/administration/config-cheat-sheet.en-us.md +++ b/docs/content/administration/config-cheat-sheet.en-us.md @@ -467,6 +467,7 @@ relation to port exhaustion. ## Indexer (`indexer`) +- `INDEXER_DEFAULT_FUZZY`: **true**: If the indexer type supports fuzzy search use it by default. Is used by issue- and repo-indexer alike. - `ISSUE_INDEXER_TYPE`: **bleve**: Issue indexer type, currently supported: `bleve`, `db`, `elasticsearch` or `meilisearch`. - `ISSUE_INDEXER_CONN_STR`: ****: Issue indexer connection string, available when ISSUE_INDEXER_TYPE is elasticsearch (e.g. http://elastic:password@localhost:9200) or meilisearch (e.g. http://:apikey@localhost:7700) - `ISSUE_INDEXER_NAME`: **gitea_issues**: Issue indexer name, available when ISSUE_INDEXER_TYPE is elasticsearch or meilisearch. diff --git a/modules/setting/indexer.go b/modules/setting/indexer.go index 15f61502427db..6520f76de1e1c 100644 --- a/modules/setting/indexer.go +++ b/modules/setting/indexer.go @@ -16,6 +16,8 @@ import ( // Indexer settings var Indexer = struct { + DefaultFuzzy bool + IssueType string IssuePath string IssueConnStr string @@ -34,6 +36,8 @@ var Indexer = struct { ExcludePatterns []glob.Glob ExcludeVendored bool }{ + DefaultFuzzy: true, + IssueType: "bleve", IssuePath: "indexers/issues.bleve", IssueConnStr: "", @@ -52,6 +56,9 @@ var Indexer = struct { func loadIndexerFrom(rootCfg ConfigProvider) { sec := rootCfg.Section("indexer") + + Indexer.DefaultFuzzy = sec.Key("INDEXER_DEFAULT_FUZZY").MustBool(Indexer.DefaultFuzzy) + Indexer.IssueType = sec.Key("ISSUE_INDEXER_TYPE").MustString("bleve") if Indexer.IssueType == "bleve" { Indexer.IssuePath = filepath.ToSlash(sec.Key("ISSUE_INDEXER_PATH").MustString(filepath.ToSlash(filepath.Join(AppDataPath, "indexers/issues.bleve")))) From 7522710a256974dfdf06ea50af3d4ee256fa6136 Mon Sep 17 00:00:00 2001 From: "m.huber" Date: Mon, 4 Mar 2024 17:13:59 +0100 Subject: [PATCH 2/3] handle elasticsearch --- modules/indexer/code/elasticsearch/elasticsearch.go | 3 +++ modules/indexer/code/internal/indexer.go | 1 + modules/indexer/issues/elasticsearch/elasticsearch.go | 7 ++++++- 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/modules/indexer/code/elasticsearch/elasticsearch.go b/modules/indexer/code/elasticsearch/elasticsearch.go index 0f70f1348552c..4a169e59b417a 100644 --- a/modules/indexer/code/elasticsearch/elasticsearch.go +++ b/modules/indexer/code/elasticsearch/elasticsearch.go @@ -288,6 +288,9 @@ func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword } kwQuery := elastic.NewMultiMatchQuery(keyword, "content").Type(searchType) + if !setting.Indexer.DefaultFuzzy { + kwQuery = kwQuery.Fuzziness("0") + } query := elastic.NewBoolQuery() query = query.Must(kwQuery) if len(repoIDs) > 0 { diff --git a/modules/indexer/code/internal/indexer.go b/modules/indexer/code/internal/indexer.go index da3ac3623c92f..1eee1d79cca8a 100644 --- a/modules/indexer/code/internal/indexer.go +++ b/modules/indexer/code/internal/indexer.go @@ -16,6 +16,7 @@ type Indexer interface { internal.Indexer Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *RepoChanges) error Delete(ctx context.Context, repoID int64) error + // TODO: use search option struct Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isMatch bool) (int64, []*SearchResult, []*SearchResultLanguages, error) } diff --git a/modules/indexer/issues/elasticsearch/elasticsearch.go b/modules/indexer/issues/elasticsearch/elasticsearch.go index 3acd3ade71528..39bc7e97e69d6 100644 --- a/modules/indexer/issues/elasticsearch/elasticsearch.go +++ b/modules/indexer/issues/elasticsearch/elasticsearch.go @@ -13,6 +13,7 @@ import ( indexer_internal "code.gitea.io/gitea/modules/indexer/internal" inner_elasticsearch "code.gitea.io/gitea/modules/indexer/internal/elasticsearch" "code.gitea.io/gitea/modules/indexer/issues/internal" + "code.gitea.io/gitea/modules/setting" "github.com/olivere/elastic/v7" ) @@ -141,7 +142,11 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) ( query := elastic.NewBoolQuery() if options.Keyword != "" { - query.Must(elastic.NewMultiMatchQuery(options.Keyword, "title", "content", "comments")) + keywordQuery := elastic.NewMultiMatchQuery(options.Keyword, "title", "content", "comments") + if !setting.Indexer.DefaultFuzzy { + keywordQuery = keywordQuery.Fuzziness("0") + } + query.Must(keywordQuery) } if len(options.RepoIDs) > 0 { From 5ed1d092d6140772882be2a92f41820bdff7c040 Mon Sep 17 00:00:00 2001 From: "m.huber" Date: Mon, 4 Mar 2024 20:44:04 +0100 Subject: [PATCH 3/3] bleach: wip - test if works good --- modules/indexer/code/bleve/bleve.go | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/modules/indexer/code/bleve/bleve.go b/modules/indexer/code/bleve/bleve.go index 8ba50ed77c938..1d2e19648996e 100644 --- a/modules/indexer/code/bleve/bleve.go +++ b/modules/indexer/code/bleve/bleve.go @@ -243,11 +243,17 @@ func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword prefixQuery := bleve.NewPrefixQuery(keyword) prefixQuery.FieldVal = "Content" keywordQuery = prefixQuery - } else { + } else if !setting.Indexer.DefaultFuzzy { phraseQuery := bleve.NewMatchPhraseQuery(keyword) phraseQuery.FieldVal = "Content" phraseQuery.Analyzer = repoIndexerAnalyzer keywordQuery = phraseQuery + } else { + fuzzyQuery := bleve.NewFuzzyQuery(keyword) + fuzzyQuery.FieldVal = "Content" + fuzzyQuery.SetPrefix(len(keyword)) + fuzzyQuery.Fuzziness = 2 //Levenshtein distance + keywordQuery = fuzzyQuery } if len(repoIDs) > 0 {