switch tests to ICU tokenizer as default

osm-search · lonvia · May 11, 2022 · May 10, 2022 · May 10, 2022 · May 10, 2022
commit adeebec32a5e75318eb8714a38087055e3b2e757
diff --git a/test/bdd/environment.py b/test/bdd/environment.py
@@ -59,5 +59,5 @@ def after_scenario(context, scenario):
 
 def before_tag(context, tag):
     if tag == 'fail-legacy':
-        if context.config.userdata['TOKENIZER'] in (None, 'legacy'):
+        if context.config.userdata['TOKENIZER'] == 'legacy':
             context.scenario.skip("Not implemented in legacy tokenizer")
diff --git a/test/bdd/steps/nominatim_environment.py b/test/bdd/steps/nominatim_environment.py
@@ -207,7 +207,7 @@ def setup_api_db(self):
                     self.run_nominatim('add-data', '--tiger-data', str((testdata / 'tiger').resolve()))
                     self.run_nominatim('freeze')
 
-                    if self.tokenizer != 'icu':
+                    if self.tokenizer == 'legacy':
                         phrase_file = str((testdata / 'specialphrases_testdb.sql').resolve())
                         run_script(['psql', '-d', self.api_test_db, '-f', phrase_file])
                     else:

diff --git a/test/bdd/steps/steps_db_ops.py b/test/bdd/steps/steps_db_ops.py
@@ -266,7 +266,7 @@ def check_word_table_for_postcodes(context, exclude, postcodes):
     plist.sort()
 
     with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
-        if nctx.tokenizer == 'icu':
+        if nctx.tokenizer != 'legacy':
             cur.execute("SELECT word FROM word WHERE type = 'P' and word = any(%s)",
                         (plist,))
         else:

diff --git a/test/python/conftest.py b/test/python/conftest.py
@@ -211,11 +211,6 @@ def osmline_table(temp_db_with_extensions, table_factory):
                      country_code VARCHAR(2)""")
 
 
-@pytest.fixture
-def word_table(temp_db_conn):
-    return mocks.MockWordTable(temp_db_conn)
-
-
 @pytest.fixture
 def sql_preprocessor_cfg(tmp_path, table_factory, temp_db_with_extensions):
     table_factory('country_name', 'partition INT', ((0, ), (1, ), (2, )))

diff --git a/test/python/mocks.py b/test/python/mocks.py
@@ -14,7 +14,7 @@
 from nominatim.db import properties
 
 # This must always point to the mock word table for the default tokenizer.
-from mock_legacy_word_table import MockLegacyWordTable as MockWordTable
+from mock_icu_word_table import MockIcuWordTable as MockWordTable
 
 class MockPlacexTable:
     """ A placex table for testing.

diff --git a/test/python/tools/test_database_import.py b/test/python/tools/test_database_import.py
@@ -179,7 +179,7 @@ def test_truncate_database_tables(temp_db_conn, temp_db_cursor, table_factory, w
 
 @pytest.mark.parametrize("threads", (1, 5))
 def test_load_data(dsn, place_row, placex_table, osmline_table,
-                   word_table, temp_db_cursor, threads):
+                   temp_db_cursor, threads):
     for func in ('precompute_words', 'getorcreate_housenumber_id', 'make_standard_name'):
         temp_db_cursor.execute(f"""CREATE FUNCTION {func} (src TEXT)
                                   RETURNS TEXT AS $$ SELECT 'a'::TEXT $$ LANGUAGE SQL

diff --git a/test/python/tools/test_migration.py b/test/python/tools/test_migration.py
@@ -14,6 +14,8 @@
 from nominatim.errors import UsageError
 import nominatim.version
 
+from mock_legacy_word_table import MockLegacyWordTable
+
 class DummyTokenizer:
 
     def update_sql_functions(self, config):
@@ -26,6 +28,10 @@ def postprocess_mock(monkeypatch):
     monkeypatch.setattr(migration.tokenizer_factory, 'get_tokenizer_for_db',
                         lambda *args: DummyTokenizer())
 
+@pytest.fixture
+def legacy_word_table(temp_db_conn):
+    return MockLegacyWordTable(temp_db_conn)
+
 
 def test_no_migration_old_versions(temp_db_with_extensions, table_factory, def_config):
     table_factory('country_name', 'name HSTORE, country_code TEXT')
@@ -156,7 +162,7 @@ def test_add_nominatim_property_table_repeat(temp_db_conn, temp_db_cursor,
 
 
 def test_change_housenumber_transliteration(temp_db_conn, temp_db_cursor,
-                                            word_table, placex_table):
+                                            legacy_word_table, placex_table):
     placex_table.add(housenumber='3A')
 
     temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION make_standard_name(name TEXT)

diff --git a/test/python/tools/test_postcodes.py b/test/python/tools/test_postcodes.py
@@ -65,7 +65,7 @@ def tokenizer():
     return dummy_tokenizer.DummyTokenizer(None, None)
 
 @pytest.fixture
-def postcode_table(temp_db_conn, placex_table, word_table):
+def postcode_table(temp_db_conn, placex_table):
     return MockPostcodeTable(temp_db_conn)