From 90fba8345f26a2efcbd57e422e9390fa675a8772 Mon Sep 17 00:00:00 2001 From: Narek Galstyan Date: Sun, 30 Jul 2023 06:09:57 +0000 Subject: [PATCH 1/2] Modify test files to make their outputs deterministic across pg versions 1. Get rid of \d+ in regression tests as its output is postgres-version dependant 2. Add a file of common functions defined in all tests. use this in place of things like \d+ 3. Do not use variables not defined in pg11: --- scripts/run_all_tests.sh | 5 ++++ test/expected/debug_helpers.out | 13 ++++------- test/expected/hnsw.out | 39 ++++++++++++-------------------- test/expected/hnsw_insert.out | 36 +++++++++++++---------------- test/expected/wiki.out | 31 +++++-------------------- test/sql/debug_helpers.sql | 3 ++- test/sql/hnsw.sql | 6 ++--- test/sql/hnsw_insert.sql | 12 +++++----- test/sql/test_helpers/common.sql | 20 ++++++++++++++++ test/sql/wiki.sql | 8 +++---- 10 files changed, 82 insertions(+), 91 deletions(-) create mode 100644 test/sql/test_helpers/common.sql diff --git a/scripts/run_all_tests.sh b/scripts/run_all_tests.sh index b5e18d38f..52cb85ccf 100755 --- a/scripts/run_all_tests.sh +++ b/scripts/run_all_tests.sh @@ -38,6 +38,11 @@ do ${PSQL} postgres -c "drop database if exists ${TESTDB};" ${PSQL} postgres -c "create database ${TESTDB};" base=$(basename $testfile .sql) + + # psql options + # -e: echo commands + # -E: (passed manually, for debugging) echo hidden magic commands (\d, \di+, etc) + ${PSQL} testdb --quiet -f test/sql/test_helpers/common.sql > /dev/null ${PSQL} testdb -ef test/sql/$base.sql > $TMP_OUTDIR/$base.out 2>&1 || true DIFF=$(diff test/expected/$base.out $TMP_OUTDIR/$base.out || true) # diff has non-zero exit code if files differ. ||true gets rid of error value diff --git a/test/expected/debug_helpers.out b/test/expected/debug_helpers.out index 7473840d1..e51c7faef 100644 --- a/test/expected/debug_helpers.out +++ b/test/expected/debug_helpers.out @@ -28,14 +28,11 @@ psql:test/sql/debug_helpers.sql:8: INFO: done init usearch index psql:test/sql/debug_helpers.sql:8: INFO: inserted 8 elements psql:test/sql/debug_helpers.sql:8: INFO: done saving 8 vectors CREATE INDEX - Table "public.small_world" - Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description ---------+----------------------+-----------+----------+---------+----------+-------------+--------------+------------- - id | character varying(3) | | | | extended | | | - vector | vector(3) | | | | extended | | | -Indexes: - "small_world_vector_idx" hnsw (vector) -Access method: heap +SELECT * FROM ldb_get_indexes('small_world'); + indexname | size | indexdef +------------------------+--------+------------------------------------------------------------------------------- + small_world_vector_idx | 176 kB | CREATE INDEX small_world_vector_idx ON public.small_world USING hnsw (vector) +(1 row) SHOW hnsw.init_k; hnsw.init_k diff --git a/test/expected/hnsw.out b/test/expected/hnsw.out index 8b1fe35ab..d8bbfee76 100644 --- a/test/expected/hnsw.out +++ b/test/expected/hnsw.out @@ -53,14 +53,11 @@ psql:test/sql/hnsw.sql:43: INFO: done init usearch index psql:test/sql/hnsw.sql:43: INFO: inserted 8 elements psql:test/sql/hnsw.sql:43: INFO: done saving 8 vectors CREATE INDEX - Table "public.small_world" - Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description ---------+----------------------+-----------+----------+---------+----------+-------------+--------------+------------- - id | character varying(3) | | | | extended | | | - vector | vector(3) | | | | extended | | | -Indexes: - "small_world_vector_idx" hnsw (vector) -Access method: heap +SELECT * FROM ldb_get_indexes('small_world'); + indexname | size | indexdef +------------------------+--------+------------------------------------------------------------------------------- + small_world_vector_idx | 176 kB | CREATE INDEX small_world_vector_idx ON public.small_world USING hnsw (vector) +(1 row) SELECT * FROM ( SELECT id, ROUND( (vector <-> '[0,0,0]')::numeric, 2) as dist @@ -113,14 +110,11 @@ psql:test/sql/hnsw.sql:59: INFO: done init usearch index psql:test/sql/hnsw.sql:59: INFO: inserted 8 elements psql:test/sql/hnsw.sql:59: INFO: done saving 8 vectors CREATE INDEX - Table "public.small_world" - Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description ---------+----------------------+-----------+----------+---------+----------+-------------+--------------+------------- - id | character varying(3) | | | | extended | | | - vector | vector(3) | | | | extended | | | -Indexes: - "small_world_vector_idx" hnsw (vector) WITH (m='2', ef='11', ef_construction='12') -Access method: heap +SELECT * FROM ldb_get_indexes('small_world'); + indexname | size | indexdef +------------------------+--------+--------------------------------------------------------------------------------------------------------------------------- + small_world_vector_idx | 176 kB | CREATE INDEX small_world_vector_idx ON public.small_world USING hnsw (vector) WITH (m='2', ef='11', ef_construction='12') +(1 row) SELECT ROUND( (vector <-> '[0,0,0]')::numeric, 2) as dist FROM small_world @@ -192,14 +186,11 @@ psql:test/sql/hnsw.sql:85: INFO: done init usearch index psql:test/sql/hnsw.sql:85: INFO: inserted 8 elements psql:test/sql/hnsw.sql:85: INFO: done saving 8 vectors CREATE INDEX - Table "public.small_world" - Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description ---------+----------------------+-----------+----------+---------+----------+-------------+--------------+------------- - id | character varying(3) | | | | extended | | | - vector | vector(3) | | | | extended | | | -Indexes: - "small_world_vector_idx" hnsw (vector) WITH (m='11', ef='2', ef_construction='2') -Access method: heap +SELECT * FROM ldb_get_indexes('small_world'); + indexname | size | indexdef +------------------------+--------+-------------------------------------------------------------------------------------------------------------------------- + small_world_vector_idx | 176 kB | CREATE INDEX small_world_vector_idx ON public.small_world USING hnsw (vector) WITH (m='11', ef='2', ef_construction='2') +(1 row) SELECT * FROM ( SELECT id, ROUND( (vector <-> '[0,0,0]')::numeric, 2) as dist diff --git a/test/expected/hnsw_insert.out b/test/expected/hnsw_insert.out index 9eb2e4eee..cc26bf8a7 100644 --- a/test/expected/hnsw_insert.out +++ b/test/expected/hnsw_insert.out @@ -35,9 +35,9 @@ psql:test/sql/hnsw_insert.sql:8: INFO: done saving 1000 vectors CREATE INDEX SET enable_seqscan = off; SET -insert into small_world (id, vector) values ('xxx', '[0,0,0]'); +INSERT INTO small_world (id, vector) VALUES ('xxx', '[0,0,0]'); INSERT 0 1 -insert into small_world (id, vector) values ('x11', '[0,0,110]'); +INSERT INTO small_world (id, vector) VALUES ('x11', '[0,0,110]'); INSERT 0 1 INSERT INTO small_world (id, vector) VALUES ('000', '[0,0,0]'), @@ -217,38 +217,34 @@ psql:test/sql/hnsw_insert.sql:77: INFO: usearch index initialized 010 | 1.00 (10 rows) -select count(*) from sift_base1k; +SELECT count(*) from sift_base1k; psql:test/sql/hnsw_insert.sql:79: INFO: cost estimate count ------- 1000 (1 row) - List of relations - Schema | Name | Type | Owner | Table | Persistence | Access method | Size | Description ---------+----------------------------+-------+-----------+-----------------+-------------+---------------+--------+------------- - public | new_small_world_vector_idx | index | ngalstyan | new_small_world | permanent | hnsw | 176 kB | - public | sift_base1k_pkey | index | ngalstyan | sift_base1k | permanent | btree | 40 kB | - public | sift_base1k_v_idx | index | ngalstyan | sift_base1k | permanent | hnsw | 872 kB | - public | small_world_vector_idx | index | ngalstyan | small_world | permanent | hnsw | 176 kB | -(4 rows) +SELECT * from ldb_get_indexes('sift_base1k'); + indexname | size | indexdef +-------------------+--------+----------------------------------------------------------------------------- + sift_base1k_pkey | 40 kB | CREATE UNIQUE INDEX sift_base1k_pkey ON public.sift_base1k USING btree (id) + sift_base1k_v_idx | 872 kB | CREATE INDEX sift_base1k_v_idx ON public.sift_base1k USING hnsw (v) +(2 rows) INSERT INTO sift_base1k(v) SELECT v FROM sift_base1k WHERE id <= 444 AND v IS NOT NULL; INSERT 0 444 -select count(*) from sift_base1k; +SELECT count(*) from sift_base1k; psql:test/sql/hnsw_insert.sql:83: INFO: cost estimate count ------- 1444 (1 row) - List of relations - Schema | Name | Type | Owner | Table | Persistence | Access method | Size | Description ---------+----------------------------+-------+-----------+-----------------+-------------+---------------+---------+------------- - public | new_small_world_vector_idx | index | ngalstyan | new_small_world | permanent | hnsw | 176 kB | - public | sift_base1k_pkey | index | ngalstyan | sift_base1k | permanent | btree | 48 kB | - public | sift_base1k_v_idx | index | ngalstyan | sift_base1k | permanent | hnsw | 1168 kB | - public | small_world_vector_idx | index | ngalstyan | small_world | permanent | hnsw | 176 kB | -(4 rows) +SELECT * from ldb_get_indexes('sift_base1k'); + indexname | size | indexdef +-------------------+---------+----------------------------------------------------------------------------- + sift_base1k_pkey | 48 kB | CREATE UNIQUE INDEX sift_base1k_pkey ON public.sift_base1k USING btree (id) + sift_base1k_v_idx | 1168 kB | CREATE INDEX sift_base1k_v_idx ON public.sift_base1k USING hnsw (v) +(2 rows) diff --git a/test/expected/wiki.out b/test/expected/wiki.out index 41e393405..8cf8cf12b 100644 --- a/test/expected/wiki.out +++ b/test/expected/wiki.out @@ -29,8 +29,6 @@ SET row_security = off; SET SET default_tablespace = ''; SET -SET default_table_access_method = heap; -SET CREATE TABLE tsv_data ( language text, page_url text, @@ -87,34 +85,17 @@ with t as (select id, page_title, context_page_description_ai <-> (select conte (10 rows) CREATE INDEX index1 ON tsv_data USING hnsw (context_page_description_ai vector_l2_ops); -psql:test/sql/wiki.sql:66: INFO: done init usearch index -psql:test/sql/wiki.sql:66: INFO: inserted 100 elements -psql:test/sql/wiki.sql:66: INFO: done saving 100 vectors +psql:test/sql/wiki.sql:64: INFO: done init usearch index +psql:test/sql/wiki.sql:64: INFO: inserted 100 elements +psql:test/sql/wiki.sql:64: INFO: done saving 100 vectors CREATE INDEX CREATE INDEX ON tsv_data USING hnsw (context_page_description_ai) with (ef = 100, ef_construction=150 , M=11, alg="hnswlib"); -psql:test/sql/wiki.sql:67: INFO: done init usearch index -psql:test/sql/wiki.sql:67: INFO: inserted 100 elements -psql:test/sql/wiki.sql:67: INFO: done saving 100 vectors +psql:test/sql/wiki.sql:65: INFO: done init usearch index +psql:test/sql/wiki.sql:65: INFO: inserted 100 elements +psql:test/sql/wiki.sql:65: INFO: done saving 100 vectors CREATE INDEX set enable_seqscan=false; SET -explain with t as (select id, page_title, context_page_description_ai <-> (select context_page_description_ai from tsv_data where id = 81386) as dist - from tsv_data order by dist limit 10) select id, page_title, ROUND( dist::numeric, 2) from t; -psql:test/sql/wiki.sql:71: INFO: cost estimate -psql:test/sql/wiki.sql:71: INFO: returning small cost to always use the index -psql:test/sql/wiki.sql:71: INFO: cost estimate -psql:test/sql/wiki.sql:71: INFO: returning small cost to always use the index - QUERY PLAN ------------------------------------------------------------------------------------------------------------------------- - Subquery Scan on t (cost=8.16..8.74 rows=10 width=68) - -> Limit (cost=8.16..8.59 rows=10 width=44) - InitPlan 1 (returns $0) - -> Index Scan using tsv_data_pkey on tsv_data tsv_data_1 (cost=0.14..8.16 rows=1 width=32) - Index Cond: (id = 81386) - -> Index Scan using tsv_data_context_page_description_ai_idx on tsv_data (cost=0.00..4.26 rows=100 width=44) - Order By: (context_page_description_ai <-> $0) -(7 rows) - with t as (select id, page_title, context_page_description_ai <-> (select context_page_description_ai from tsv_data where id = 81386) as dist from tsv_data order by dist limit 10) select id, page_title, ROUND( dist::numeric, 2) from t; psql:test/sql/wiki.sql:75: INFO: cost estimate diff --git a/test/sql/debug_helpers.sql b/test/sql/debug_helpers.sql index 9cbce8f28..36f18b125 100644 --- a/test/sql/debug_helpers.sql +++ b/test/sql/debug_helpers.sql @@ -6,7 +6,8 @@ CREATE EXTENSION IF NOT EXISTS lanterndb; SHOW hnsw.init_k; CREATE INDEX ON small_world USING hnsw (vector); -\d+ small_world +-- verify that the index was created +SELECT * FROM ldb_get_indexes('small_world'); -- it exists after we create an index SHOW hnsw.init_k; diff --git a/test/sql/hnsw.sql b/test/sql/hnsw.sql index 7ba08b103..b83b7d36b 100644 --- a/test/sql/hnsw.sql +++ b/test/sql/hnsw.sql @@ -41,7 +41,7 @@ SET enable_seqscan = off; begin; CREATE INDEX ON small_world USING hnsw (vector); -\d+ small_world +SELECT * FROM ldb_get_indexes('small_world'); SELECT * FROM ( SELECT id, ROUND( (vector <-> '[0,0,0]')::numeric, 2) as dist FROM small_world @@ -57,7 +57,7 @@ rollback; begin; CREATE INDEX ON small_world USING hnsw (vector) WITH (M=2, ef=11, ef_construction=12); -\d+ small_world +SELECT * FROM ldb_get_indexes('small_world'); -- Equidistant points from the given vector appear in different order in the output of the inner query -- depending on postgres version and platform. The outder query forces a deterministic order. -- Unfortunately, outer query resorts distances as well so if the index sorted them in a wrong order, @@ -83,7 +83,7 @@ rollback; begin; CREATE INDEX ON small_world USING hnsw (vector) WITH (M=11, ef=2, ef_construction=2); -\d+ small_world +SELECT * FROM ldb_get_indexes('small_world'); SELECT * FROM ( SELECT id, ROUND( (vector <-> '[0,0,0]')::numeric, 2) as dist FROM small_world diff --git a/test/sql/hnsw_insert.sql b/test/sql/hnsw_insert.sql index b8393a6b3..e38772c2c 100644 --- a/test/sql/hnsw_insert.sql +++ b/test/sql/hnsw_insert.sql @@ -9,8 +9,8 @@ CREATE INDEX ON sift_base1k USING hnsw (v); SET enable_seqscan = off; -insert into small_world (id, vector) values ('xxx', '[0,0,0]'); -insert into small_world (id, vector) values ('x11', '[0,0,110]'); +INSERT INTO small_world (id, vector) VALUES ('xxx', '[0,0,0]'); +INSERT INTO small_world (id, vector) VALUES ('x11', '[0,0,110]'); INSERT INTO small_world (id, vector) VALUES ('000', '[0,0,0]'), ('001', '[0,0,1]'), @@ -76,10 +76,10 @@ SELECT '[0,0,0]'::vector as v42 \gset EXPLAIN SELECT id, ROUND((vector <-> :'v42')::numeric, 2) FROM new_small_world ORDER BY vector <-> :'v42' LIMIT 10; SELECT id, ROUND((vector <-> :'v42')::numeric, 2) FROM new_small_world ORDER BY vector <-> :'v42' LIMIT 10; -select count(*) from sift_base1k; -\di+ +SELECT count(*) from sift_base1k; +SELECT * from ldb_get_indexes('sift_base1k'); INSERT INTO sift_base1k(v) SELECT v FROM sift_base1k WHERE id <= 444 AND v IS NOT NULL; -select count(*) from sift_base1k; -\di+ +SELECT count(*) from sift_base1k; +SELECT * from ldb_get_indexes('sift_base1k'); diff --git a/test/sql/test_helpers/common.sql b/test/sql/test_helpers/common.sql new file mode 100644 index 000000000..625439ed3 --- /dev/null +++ b/test/sql/test_helpers/common.sql @@ -0,0 +1,20 @@ +--test helper functions that should exist in all test runs live here +-- there is no need to explicitly include this file in other tests as the test runner will +-- run this before running the actual test + +CREATE EXTENSION pageinspect; + +--todo:: add a columns to this function which returning number of used DB pages and total index size +\set ON_ERROR_STOP on +CREATE OR REPLACE FUNCTION ldb_get_indexes(tblname text) +RETURNS TABLE(indexname name, size text, indexdef text) AS +$BODY$ +BEGIN + RETURN QUERY + SELECT pg_indexes.indexname, + pg_size_pretty(pg_relation_size(pg_indexes.indexname::REGCLASS)) as size, + pg_indexes.indexdef + FROM pg_indexes + WHERE tablename = tblname; +END; +$BODY$ LANGUAGE plpgsql; diff --git a/test/sql/wiki.sql b/test/sql/wiki.sql index 8cc5f2e9f..62255c5ff 100644 --- a/test/sql/wiki.sql +++ b/test/sql/wiki.sql @@ -19,8 +19,6 @@ SET row_security = off; SET default_tablespace = ''; -SET default_table_access_method = heap; - CREATE TABLE tsv_data ( language text, page_url text, @@ -67,8 +65,10 @@ CREATE INDEX index1 ON tsv_data USING hnsw (context_page_description_ai vector_l CREATE INDEX ON tsv_data USING hnsw (context_page_description_ai) with (ef = 100, ef_construction=150 , M=11, alg="hnswlib"); set enable_seqscan=false; -explain with t as (select id, page_title, context_page_description_ai <-> (select context_page_description_ai from tsv_data where id = 81386) as dist - from tsv_data order by dist limit 10) select id, page_title, ROUND( dist::numeric, 2) from t; +-- todo:: find a different way to ensure that the index used. "\set enable_seqscan=false;" is not enough +-- and, the following produces a different output on pg11 +-- explain with t as (select id, page_title, context_page_description_ai <-> (select context_page_description_ai from tsv_data where id = 81386) as dist +-- from tsv_data order by dist limit 10) select id, page_title, ROUND( dist::numeric, 2) from t; -- introduce a WITH statement to round returned distances AFTER a lookup so the index can be used with t as (select id, page_title, context_page_description_ai <-> (select context_page_description_ai from tsv_data where id = 81386) as dist From 6a4b728287381a3048896526e6b48a33fd34eb9b Mon Sep 17 00:00:00 2001 From: Narek Galstyan Date: Sun, 30 Jul 2023 07:58:06 +0000 Subject: [PATCH 2/2] Disable copynodes since it is not supported with inserts yet --- CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2463a4de9..c68405e3e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -24,7 +24,8 @@ option(BUILD_WITH_USEARCH "Build with usearch as hnsw provider" ON) option(BUILD_LIBHNSW "Build libhnsw as hnsw provider" OFF) # options passed into lanterndb sourcecode -option(LANTERNDB_COPYNODES "Copy postgres index tuples for external retriever during scan instead of pinning" ON) +# todo:: tests for copynodes=ON are broken +option(LANTERNDB_COPYNODES "Copy postgres index tuples for external retriever during scan instead of pinning" OFF) set(CMAKE_EXPORT_COMPILE_COMMANDS ON)