fix: Fix issue with missing suggestions (#1339)

* fix: Fix an issue with suggestions getting lost Some suggestions would get lost especially when the first character did not match. * fix: make sure accents are preserved when adding words to dictionary * fix: preserve legacy behavior on non-case sensitive dictionaries * dev: update django snapshot
streetsidesoftware · Jun 13, 2021 · cfecde8 · cfecde8
1 parent a948808
commit cfecde8
Show file tree

Hide file tree

Showing 19 changed files with 481 additions and 221 deletions.
diff --git a/cspell.code-workspace b/cspell.code-workspace
@@ -32,6 +32,44 @@
             "path": "./packages/cspell"
         }
     ],
+    "launch": {
+        "configurations": [
+            {
+                "type": "node",
+                "request": "launch",
+                "name": "Test: Jest current-file",
+                "program": "${fileWorkspaceFolder}/node_modules/.bin/jest",
+                "cwd": "${fileDirname}",
+                "args": [
+                    "--runInBand",
+                    "${fileBasename}"
+                ],
+                "console": "integratedTerminal",
+                "internalConsoleOptions": "neverOpen",
+                "disableOptimisticBPs": true,
+                "windows": {
+                    "program": "${fileWorkspaceFolder}/node_modules/jest/bin/jest",
+                }
+            },
+            {
+                "type": "node",
+                "request": "launch",
+                "name": "Test: Jest Entire Folder",
+                "program": "${fileWorkspaceFolder}/node_modules/.bin/jest",
+                "cwd": "${fileWorkspaceFolder}",
+                "args": [
+                    "--runInBand"
+                ],
+                "console": "integratedTerminal",
+                "internalConsoleOptions": "neverOpen",
+                "disableOptimisticBPs": true,
+                "windows": {
+                    "program": "${fileWorkspaceFolder}/node_modules/jest/bin/jest",
+                }
+            }
+        ],
+        "compounds": []
+    },
     "settings": {
         "typescript.tsdk": "cspell-monorepo/node_modules/typescript/lib",
         "cSpell.customWorkspaceDictionaries": [

diff --git a/integration-tests/config/config.json b/integration-tests/config/config.json
@@ -191,7 +191,7 @@
     {
       "path": "django/django",
       "url": "https://github.com/django/django.git",
-      "commit": "8b4983cfd429e17c8092f4ff775915327effa6fa",
+      "commit": "316cc34d046ad86e100227772294f906fae1c2e5",
       "args": [
         "**/*.{md,py}"
       ]

diff --git a/integration-tests/snapshots/django/django/snapshot.txt b/integration-tests/snapshots/django/django/snapshot.txt
@@ -3,7 +3,7 @@ Repository: django/django
 Url: "https://github.com/django/django.git"
 Args: ["**/*.{md,py}"]
 Lines:
- CSpell: Files checked: 2128, Issues found: 7885 in 906 files
+ CSpell: Files checked: 2128, Issues found: 7886 in 907 files
  exit code: 1
 ./django/apps/config.py:128:76    - Unknown word (isclass)    -- getmembers(mod, inspect.isclass)
 ./django/apps/registry.py:132:40    - Unknown word (unconfigured) -- not ready" is due to unconfigured settings, accessing
@@ -440,7 +440,7 @@ Lines:
 ./django/core/management/commands/shell.py:23:97     - Unknown word (pythonrc)   -- environment variable and ~/.pythonrc.py script.',
 ./django/core/management/commands/shell.py:48:32     - Unknown word (rlcompleter) -- try: # Try activating rlcompleter, because it's handy
 ./django/core/management/commands/shell.py:75:45     - Unknown word (cpython)     -- the behavior of the cpython shell where an error
-./django/core/management/commands/showmigrations.py:130:19    - Unknown word (deps)       -- def print_deps(node):
+./django/core/management/commands/showmigrations.py:137:19    - Unknown word (deps)       -- def print_deps(node):
 ./django/core/management/commands/squashmigrations.py:118:13    - Unknown word (smigration) -- for smigration in migrations_to_squash
 ./django/core/management/templates.py:258:28    - Unknown word (mkdtemp)    -- tempdir = tempfile.mkdtemp(prefix=prefix, suffix
 ./django/core/management/templates.py:339:38    - Unknown word (IMODE)      -- permissions = stat.S_IMODE(st.st_mode) | stat.S
@@ -489,7 +489,7 @@ Lines:
 ./django/db/backends/base/operations.py:459:18    - Unknown word (autopk)     -- def validate_autopk_value(self, value):
 ./django/db/backends/base/operations.py:522:15    - Unknown word (ipaddressfield) -- def adapt_ipaddressfield_value(self, value):
 ./django/db/backends/base/schema.py:158:16    - Unknown word (sqls)       -- column_sqls = []
-./django/db/backends/base/schema.py:807:9     - Unknown word (rels)       -- rels_to_update = []
+./django/db/backends/base/schema.py:814:9     - Unknown word (rels)       -- rels_to_update = []
 ./django/db/backends/mysql/base.py:196:14    - Unknown word (conv)        -- 'conv': django_conversions
 ./django/db/backends/mysql/base.py:4:10      - Unknown word (mysqlclient) -- Requires mysqlclient: https://pypi.org/project
 ./django/db/backends/mysql/base.py:55:29     - Unknown word (reraises)    -- exception instances and reraises them with the correct
@@ -1466,6 +1466,7 @@ Lines:
 ./tests/humanize_tests/tests.py:346:24    - Unknown word (lety)       -- 'před 2\xa0lety, 4\xa0měsíci',
 ./tests/humanize_tests/tests.py:352:22    - Unknown word (roky)       -- 'za 2\xa0roky, 4\xa0měsíce',
 ./tests/i18n/commands/__init__.py:12:49     - Unknown word (Foos)       -- s Foo", "%(number)s Foos", number) % {'number
+./tests/i18n/contenttypes/tests.py:25:57     - Unknown word (Société)    -- company_type), 'i18n | Société')
 ./tests/i18n/patterns/tests.py:134:65    - Unknown word (vertaald)   -- prefix-translated'), '/vertaald/')
 ./tests/i18n/patterns/tests.py:146:53    - Unknown word (gebruikers) -- reverse('users'), '/nl/gebruikers/')
 ./tests/i18n/patterns/tests.py:157:81    - Unknown word (profiel)    -- register/', 'nl'), '/nl/profiel/registreren/')
@@ -1823,16 +1824,16 @@ Lines:
 ./tests/responses/tests.py:140:19    - Unknown word (textiowrapper) -- def test_wrap_textiowrapper(self):
 ./tests/responses/tests.py:28:38     - Unknown word (qwer)       -- writelines(['asdf\n', 'qwer\n'])
 ./tests/responses/tests.py:76:34     - Unknown word (J'attendrai) -- HttpResponse(status="J'attendrai")
-./tests/runtests.py:129:9     - Unknown word (subdirs)    -- subdirs_to_skip = SUBDIRS_TO
-./tests/runtests.py:301:40    - Unknown word (finalizer)  -- multiprocessing.util finalizer that tries to remove
+./tests/runtests.py:130:9     - Unknown word (subdirs)    -- subdirs_to_skip = SUBDIRS_TO
+./tests/runtests.py:319:40    - Unknown word (finalizer)  -- multiprocessing.util finalizer that tries to remove
 ./tests/runtests.py:3:8       - Unknown word (atexit)     -- import atexit
-./tests/runtests.py:80:1      - Unknown word (SUBDIRS)    -- SUBDIRS_TO_SKIP = {
+./tests/runtests.py:81:1      - Unknown word (SUBDIRS)    -- SUBDIRS_TO_SKIP = {
 ./tests/schema/models.py:156:21    - Unknown word (INTEGERPK)  -- db_table = "INTEGERPK" # uppercase to ensure
 ./tests/schema/tests.py:1596:35   - Unknown word (tagm)       -- assertEqual(columns['tagm2mtest_id'][0], connection
 ./tests/schema/tests.py:1596:40   - Unknown word (mtest)      -- assertEqual(columns['tagm2mtest_id'][0], connection
 ./tests/schema/tests.py:246:48    - Unknown word (repointing) -- out of FK order, then repointing, works"
 ./tests/schema/tests.py:3176:81   - Unknown word (upcase)     -- and, for Oracle, un-upcase
-./tests/schema/tests.py:3721:27   - Unknown word (dtob)       -- self.assertNotIn("dtob_auto_now", columns)
+./tests/schema/tests.py:3748:27   - Unknown word (dtob)       -- self.assertNotIn("dtob_auto_now", columns)
 ./tests/select_for_update/tests.py:142:44    - Unknown word (eucountry)  -- ['select_for_update_eucountry"."country_ptr_id']
 ./tests/select_for_update/tests.py:170:36    - Unknown word (eucity)     -- 'select_for_update_eucity"."id',
 ./tests/select_for_update/tests.py:323:14    - Unknown word (unsuported) -- def test_unsuported_no_key_raises_error

diff --git a/packages/cspell-lib/docs/accents.md b/packages/cspell-lib/docs/accents.md
@@ -0,0 +1,37 @@
+# Case sensitivity and Accents
+
+## Problem
+
+Many programming languages have their own casing conventions. Things like `camelCase` or Title case for ClassNames might run counter to the proper casing of words.
+For example, `english` in the variable name `list_of_english_words` would be considered incorrect because it is a proper name.
+
+The other challenge is accents. Many programming languages do not allow accented character as variable or class names.
+
+## Approach
+
+Beginning with cspell version 5, the spell checker allows for case sensitive checking. This is achieved by having two effective dictionaries with every word list.
+
+Let take a look at how the German word for business, `Geschäft`, is stored.
+
+There will be three forms: `Geschäft`, `~geschäft`, and `~geschaft`. Words prefixed with `~` that is is a case insensitive version of the word.
+
+Result when spell checking various forms of `Geschäft`:
+
+| word       | cs<sup>1</sup> | non<sup>2</sup> |
+| ---------- | -------------- | --------------- |
+| `Geschäft` | ✅             | ✅              |
+| `GESCHÄFT` | ✅             | ✅              |
+| `Geschaft` | ❌             | ✅              |
+| `geschäft` | ❌             | ✅              |
+| `geschaft` | ❌             | ✅              |
+| `GESCHAFT` | ❌             | ✅              |
+| `gescháft` | ❌             | ❌              |
+| `Gescháft` | ❌             | ❌              |
+
+<sup>1</sup>cs - case sensitive
+
+<sup>2</sup>non - not case sensitive
+
+Note: If accents are present in a word, they must match the accents in the original word.
+
+<!--- cspell:words Geschäft gescháft --->
diff --git a/packages/cspell-lib/src/SpellingDictionary/SpellingDictionary.test.ts b/packages/cspell-lib/src/SpellingDictionary/SpellingDictionary.test.ts
@@ -2,7 +2,6 @@ import { __testMethods } from './SpellingDictionaryMethods';
 import { createSpellingDictionary } from './createSpellingDictionary';
 import { SpellingDictionaryFromTrie } from './SpellingDictionaryFromTrie';
 import { Trie } from 'cspell-trie-lib';
-import { FunctionArgs } from '../util/types';
 
 // cSpell:ignore aple
 
@@ -92,8 +91,9 @@ describe('Verify building Dictionary', () => {
         const trie = Trie.create(words);
         const dict = new SpellingDictionaryFromTrie(trie, 'trie');
         // cspell:ignore cattles
-        const suggestions = dict.suggest('Cattles').map(({ word }) => word);
-        expect(suggestions[0]).toBe('cattle');
+        const results = dict.suggest('Cattles');
+        const suggestions = results.map(({ word }) => word);
+        expect(suggestions).toEqual(['cattle', 'battles', 'rattles', 'tattles', 'battle', 'rattle']);
         expect(suggestions).toEqual(expect.not.arrayContaining(['banana']));
     });
 
@@ -111,107 +111,96 @@ describe('Verify building Dictionary', () => {
         expect(suggestions).toEqual(expect.not.arrayContaining(['banana']));
     });
 
-    test('wordDictionaryFormsCollector', () => {
-        function testCase(word: string, isCaseSensitive: boolean, expected: string[]) {
-            const collector = __testMethods.wordDictionaryFormsCollector(isCaseSensitive);
+    type Test = [string, boolean, string[]];
+    // cspell:ignore café
+    const tests: Test[] = [
+        ['house', false, ['house']],
+        ['House', false, ['House', 'house']],
+        ['café', false, ['cafe', 'café']],
+        ['Café', false, ['Cafe', 'Café', 'cafe', 'café']],
+        ['House', true, ['House', '~house']],
+        ['HOUSE', true, ['HOUSE', '~house']],
+        ['Café', true, ['Café', '~Cafe', '~cafe', '~café']],
+        // Make sure all accent forms work.
+        ['café'.normalize(), false, ['cafe', 'café']],
+        ['café'.normalize('NFD'), false, ['cafe', 'café']],
+        ['café'.normalize('NFKC'), false, ['cafe', 'café']],
+        ['café'.normalize('NFKD'), false, ['cafe', 'café']],
+    ];
+    test.each(tests)(
+        'wordDictionaryFormsCollector %s %o %o',
+        (word: string, isCaseSensitive: boolean, expected: string[]) => {
+            const collector = __testMethods.wordDictionaryFormsCollector(isCaseSensitive ? '~' : '');
             expect([...collector(word)].sort()).toEqual(expected.sort());
             expect([...collector(word)]).toEqual([]);
         }
-        type Test = [string, boolean, string[]];
-        // cspell:ignore café
-        const tests: Test[] = [
-            ['house', false, ['house']],
-            ['House', false, ['House', 'house']],
-            ['café', false, ['cafe', 'café']],
-            ['Café', false, ['Cafe', 'Café', 'cafe', 'café']],
-            ['House', true, ['House', '>house']],
-            ['HOUSE', true, ['HOUSE', '>house']],
-            ['Café', true, ['Café', '>Cafe', '>cafe', '>café']],
-            // Make sure all accent forms work.
-            ['café'.normalize(), false, ['cafe', 'café']],
-            ['café'.normalize('NFD'), false, ['cafe', 'café']],
-            ['café'.normalize('NFKC'), false, ['cafe', 'café']],
-            ['café'.normalize('NFKD'), false, ['cafe', 'café']],
-        ];
-        tests.forEach((t) => testCase(...t));
-    });
+    );
 });
 
 describe('Validate wordSearchForms', () => {
-    function testCase(word: string, isCaseSensitive: boolean, ignoreCase: boolean, expected: string[]) {
-        test(`${word} ${isCaseSensitive} ${ignoreCase} ${expected}`, () => {
-            const words = __testMethods.wordSearchFormsArray(word, isCaseSensitive, ignoreCase);
-            expect(words.sort()).toEqual(expected.sort());
-        });
-    }
-    type TestCase = FunctionArgs<typeof testCase>;
-    // cspell:ignore café
-    const tests: TestCase[] = [
-        // word, dic is case sensitive, ignoreCase on lookup, expected
-        ['house', false, false, ['house']],
-        ['House', false, false, ['house']],
-        ['House', false, false, ['house']],
-        ['House', true, false, ['House', 'house']],
-        ['HOUSE', false, false, ['house']],
-        ['HOUSE', true, false, ['HOUSE', 'House', 'house']],
-        ['café', false, false, ['café']],
-        ['café', true, false, ['café']],
-        ['café', true, true, ['cafe']],
-        ['Café', false, false, ['café']],
-        ['Café', false, true, ['cafe', 'café']],
-        ['Café', true, false, ['Café', 'café']],
-        ['Café', true, true, ['cafe']],
-        ['CAFÉ', false, false, ['café']],
-        ['CAFÉ', false, true, ['cafe', 'café']],
-        ['CAFÉ', true, false, ['CAFÉ', 'Café', 'café']],
-        ['CAFÉ', true, true, ['cafe']],
-        // Make sure all accent forms work.
-        ['café'.normalize(), false, false, ['café']],
-        ['café'.normalize('NFD'), false, false, ['café']],
-        ['café'.normalize('NFKC'), false, false, ['café']],
-        ['café'.normalize('NFKD'), false, false, ['café']],
-    ];
-    tests.forEach((t) => testCase(...t));
+    test.each`
+        word                        | isCaseSensitive | ignoreCase | expected
+        ${'house'}                  | ${false}        | ${false}   | ${['house']}
+        ${'House'}                  | ${false}        | ${false}   | ${['house']}
+        ${'House'}                  | ${false}        | ${false}   | ${['house']}
+        ${'House'}                  | ${true}         | ${false}   | ${['House', 'house']}
+        ${'HOUSE'}                  | ${false}        | ${false}   | ${['house']}
+        ${'HOUSE'}                  | ${true}         | ${false}   | ${['HOUSE', 'House', 'house']}
+        ${'café'}                   | ${false}        | ${false}   | ${['cafe', 'café']}
+        ${'café'}                   | ${true}         | ${false}   | ${['café']}
+        ${'café'}                   | ${true}         | ${true}    | ${['café']}
+        ${'Café'}                   | ${false}        | ${false}   | ${['cafe', 'café']}
+        ${'Café'}                   | ${false}        | ${true}    | ${['cafe', 'café']}
+        ${'Café'}                   | ${true}         | ${false}   | ${['Café', 'café']}
+        ${'Café'}                   | ${true}         | ${true}    | ${['café']}
+        ${'CAFÉ'}                   | ${false}        | ${false}   | ${['cafe', 'café']}
+        ${'CAFÉ'}                   | ${false}        | ${true}    | ${['cafe', 'café']}
+        ${'CAFÉ'}                   | ${true}         | ${false}   | ${['CAFÉ', 'Café', 'café']}
+        ${'CAFÉ'}                   | ${true}         | ${true}    | ${['café']}
+        ${'café'.normalize()}       | ${false}        | ${false}   | ${['cafe', 'café']}
+        ${'café'.normalize('NFD')}  | ${false}        | ${false}   | ${['cafe', 'café']}
+        ${'café'.normalize('NFKC')} | ${false}        | ${false}   | ${['cafe', 'café']}
+        ${'café'.normalize('NFKD')} | ${false}        | ${false}   | ${['cafe', 'café']}
+    `('$word $isCaseSensitive $ignoreCase $expected', ({ word, isCaseSensitive, ignoreCase, expected }) => {
+        const words = __testMethods.wordSearchFormsArray(word, isCaseSensitive, ignoreCase);
+        expect(words.sort()).toEqual(expected.sort());
+    });
 });
 
 describe('Verify Case Sensitive Dictionaries', () => {
-    function testHas(word: string, ignoreCase: boolean | undefined, expected: boolean) {
-        test(`Has ${word} Case: ${ignoreCase} Exp: ${expected}`, async () => {
-            const dict = await sampleDict();
-            expect(dict.has(word, { ignoreCase })).toBe(expected);
-        });
-    }
-
-    const tests: FunctionArgs<typeof testHas>[] = [
-        ['Paris', undefined, true],
-        ['PARIS', undefined, true],
-        ['paris', undefined, true],
-        ['Paris', true, true],
-        ['PARIS', true, true],
-        ['paris', true, true],
-        ['Paris', false, true],
-        ['PARIS', false, true],
-        ['paris', false, false],
-        ['Köln', false, true],
-        ['köln', false, false],
-        ['KÖLN', false, true],
-    ];
-    tests.forEach((t) => testHas(...t));
-
-    test('Suggestions 1', async () => {
-        // cspell:ignore koln
-        const dict = await sampleDict();
-        const sugs = dict.suggest('kuln'); // cspell:disable-line
-        const sugWords = sugs.map((s) => s.word);
-        expect(sugWords).toEqual(['Köln']);
+    test.each`
+        word       | ignoreCase   | expected
+        ${'Paris'} | ${undefined} | ${true}
+        ${'PARIS'} | ${undefined} | ${true}
+        ${'paris'} | ${undefined} | ${true}
+        ${'Paris'} | ${true}      | ${true}
+        ${'PARIS'} | ${true}      | ${true}
+        ${'paris'} | ${true}      | ${true}
+        ${'Paris'} | ${false}     | ${true}
+        ${'PARIS'} | ${false}     | ${true}
+        ${'paris'} | ${false}     | ${false}
+        ${'Köln'}  | ${false}     | ${true}
+        ${'köln'}  | ${false}     | ${false}
+        ${'KÖLN'}  | ${false}     | ${true}
+    `(`Has $word Case: $ignoreCase Exp: $expected`, ({ word, ignoreCase, expected }) => {
+        const dict = sampleDict();
+        expect(dict.has(word, { ignoreCase })).toBe(expected);
     });
 
-    test('Suggestions 2', async () => {
+    // cspell:ignore kuln
+    test.each`
+        word      | ignoreCase | expected
+        ${'köln'} | ${false}   | ${['Köln']}
+        ${'köln'} | ${true}    | ${['köln', 'koln', 'Köln']}
+        ${'koln'} | ${true}    | ${['koln', 'köln', 'Köln']}
+        ${'kuln'} | ${false}   | ${['Köln']}
+        ${'kuln'} | ${true}    | ${['koln', 'köln', 'Köln']}
+    `('Suggestions for $word $ignoreCase $expected', ({ word, ignoreCase, expected }) => {
         // cspell:ignore koln
-        const dict = await sampleDict();
-        const sugs = dict.suggest('kuln', { ignoreCase: false }); // cspell:disable-line
+        const dict = sampleDict();
+        const sugs = dict.suggest(word, { ignoreCase });
         const sugWords = sugs.map((s) => s.word);
-        expect(sugWords).toEqual(['Köln']);
+        expect(sugWords).toEqual(expected);
     });
 });
 

diff --git a/packages/cspell-lib/src/SpellingDictionary/SpellingDictionary.ts b/packages/cspell-lib/src/SpellingDictionary/SpellingDictionary.ts
@@ -41,7 +41,8 @@ export interface SpellingDictionary {
         word: string,
         numSuggestions?: number,
         compoundMethod?: CompoundWordsMethod,
-        numChanges?: number
+        numChanges?: number,
+        ignoreCase?: boolean
     ): SuggestionResult[];
     suggest(word: string, suggestOptions: SuggestOptions): SuggestionResult[];
     genSuggestions(collector: SuggestionCollector, suggestOptions: SuggestOptions): void;