diff --git a/.github/workflows/benchmark-linux.yaml b/.github/workflows/benchmark-linux.yaml
new file mode 100644
index 000000000..7cd097d0c
--- /dev/null
+++ b/.github/workflows/benchmark-linux.yaml
@@ -0,0 +1,40 @@
+name: benchmark
+
+on:
+  push:
+    branches:
+      - main
+      - dev
+  pull_request:
+    branches:
+      - main
+      - dev
+
+jobs:
+  ubuntu-build:
+    runs-on: ubuntu-22.04
+
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          submodules: "recursive"
+
+      - name: Build
+        id: build
+        run: sudo su -c "PG_VERSION=15 USE_SOURCE=1 ./ci/scripts/build-linux.sh"
+        env:
+          BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
+
+      - name: Run benchmarking
+        id: test
+        run: sudo su postgres -c "./ci/scripts/run-benchmarks.sh"
+        env:
+          BASE_REF: ${{ github.base_ref }}
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Upload to artifacts
+        uses: actions/upload-artifact@v3
+        with:
+          name: benchmark-results
+          path: |
+            /tmp/benchmarks-out.json
\ No newline at end of file
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 10a0fdfd0..7f2aeeada 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -184,9 +184,26 @@ add_custom_target(
   test
   COMMAND ${CMAKE_SOURCE_DIR}/scripts/run_all_tests.sh
   WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/test
- )
+)
+
+# BENCHMARK
+add_custom_target(
+  benchmark
+  COMMAND ${CMAKE_SOURCE_DIR}/scripts/run_benchmarks.sh
+  WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/build
+)
+add_custom_target(
+  benchmark-skip-setup
+  COMMAND ${CMAKE_SOURCE_DIR}/scripts/run_benchmarks.sh --skip-setup
+  WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/build
+)
+add_custom_target(
+  benchmark-print-only
+  COMMAND ${CMAKE_SOURCE_DIR}/scripts/run_benchmarks.sh --print-only
+  WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/build
+)
 
- # DEVELOPMENT
+# DEVELOPMENT
 find_program(CLANG_FORMAT NAMES clang-format)
 
 if (CLANG_FORMAT)
@@ -226,12 +243,6 @@ if (CLANG_FORMAT)
   )
 endif()
 
-add_custom_target(
-  bench
-  COMMAND ${CMAKE_SOURCE_DIR}/scripts/bench.sh
-  WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
- )
-
 # Package universal install script
 string(REGEX MATCH "^PostgreSQL (\[0-9]+).*"
   PostgreSQL_VERSION_NUMBER ${PostgreSQL_VERSION_STRING})
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 19a456912..b77b38eea 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -1,47 +1,55 @@
-Thanks for considering to contribute!
-The information below is intended to *help* you contribute.
+Thanks for considering contributing! The information below is intended to help you contribute.
 
-## Runing tests
+## Running tests
 
-```
--- run all regression tests
+```bash
+# run all regression tests
 make test
 
--- only run regression tests that have $FILTER in regression sql file path
+# only run regression tests that have $FILTER in regression sql file path
 make test FILTER=hnsw
 ```
 
+## Running benchmarks
+This requires Python to be installed. Please check the `Dockerfile.dev` for pip requirements.
+```bash
+# set up benchmarking, run benchmarks, and print results
+make benchmark
+
+# run benchmarks and print results (skip setup)
+make benchmark-skip-setup
+
+# print most recent benchmark results (skip setup and running benchmarks)
+make benchmark-print-only
+```
+
 ## VSCode and IntelliSense
 
 `.vscode/c_cpp_properties` is configured to use `./build/compile_commands.json`.
-If you build lanterndb in a different directory, make sure to update ``.vscode` config appropriately
-in order to have IntelliSense working.
+If you build lanterndb in a different directory, make sure to update `.vscode` config appropriately in order to have IntelliSense working.
 
 ## Debugging the C codebase
 
-If you make changes to the C codebase, in addition to `make test`, you can also use the `livedebug.py` utility 
-in a `tmux` session to easily attach `gdb` to the psql backend and find out what breaks.
+If you make changes to the C codebase, in addition to `make test`, you can also use the `livedebug.py` utility in a `tmux` session to easily attach `gdb` to the psql backend and find out what breaks.
 Below is a short recording demonstrating the use of `livedebug.py`:
 
 [![asciicast](https://asciinema.org/a/jTsbWdOcTvUl4iAJlAw3Cszbt.svg)](https://asciinema.org/a/jTsbWdOcTvUl4iAJlAw3Cszbt)
 
 ## Adding/modifying LanternDB's SQL interface
 
-When modifying the SQL interface, you add relevant SQL logic under `sql/`. In addition, you add an update script
-under `sql/updates`, in a file named `[CURRENT_VERSION]--latest.sql`. You should create this file if it does not exist.
+When modifying the SQL interface, you add relevant SQL logic under `sql/`. In addition, you add an update script under `sql/updates`, in a file named `[CURRENT_VERSION]--latest.sql`. You should create this file if it does not exist.
 
 Note that you never modify an already existing update file that does not have `latest` in its name.
-The files that do not have `latest` in the name are part of a previous releases and help LanternDB users update
-to a newer version of the extension via `ALTER EXTENSION lanterndb UPDATE`.
+
+The files that do not have `latest` in the name are part of a previous releases and help LanternDB users update to a newer version of the extension via `ALTER EXTENSION lanterndb UPDATE`.
 
 ## Browsing the Postgres repository offline
 
 You can download PostgreSQL source code from [their ftp server](https://www.postgresql.org/ftp/source/). Alternatively, can clone their git repository.
 
 ```bash
-#full repository
+# full repository
 git clone https://git.postgresql.org/git/postgresql.git
-#release head only
+# release head only
 git clone --single-branch --branch REL_15_STABLE https://git.postgresql.org/git/postgresql.git --depth=1
-
 ```
diff --git a/Dockerfile.dev b/Dockerfile.dev
index ea7a4a095..f13c0ac85 100644
--- a/Dockerfile.dev
+++ b/Dockerfile.dev
@@ -31,4 +31,11 @@ RUN rm -rf build \
     && mkdir build \
     && cd build \
     && cmake -DUSEARCH_NO_MARCH_NATIVE=ON -DCMAKE_BUILD_TYPE=Debug .. \
-    && make install
\ No newline at end of file
+    && make install
+
+# Install benchmarking tools in build folder
+RUN git clone https://github.com/lanterndata/benchmark \
+    && cd benchmark \
+    && pip install -r core/requirements.txt --break-system-packages \
+    && pip install -r external/requirements.txt --break-system-packages
+ENV DATABASE_URL=postgres://postgres:postgres@localhost:5432/postgres
\ No newline at end of file
diff --git a/bench.sql b/bench.sql
deleted file mode 100644
index 7dd97c422..000000000
--- a/bench.sql
+++ /dev/null
@@ -1,74 +0,0 @@
-
-\set ON_ERROR_STOP on
-\timing
-
-DROP EXTENSION IF EXISTS vector CASCADE;
-CREATE EXTENSION IF NOT EXISTS vector;
-DROP EXTENSION IF EXISTS lanterndb CASCADE;
-CREATE EXTENSION IF NOT EXISTS lanterndb;
-
--- Create SIFT tables for benchmarking
-DROP TABLE IF EXISTS sift_base10k;
- CREATE TABLE sift_base10k (
-     id SERIAL PRIMARY KEY,
-     v vector(128));
-
- \copy sift_base10k (v) FROM 'base10k.csv' with csv;
-
---  CREATE TABLE sift_base1m (
---      id SERIAL PRIMARY KEY,
---      v vector(128));
-
---  CREATE TABLE gist_base1m (
---      id SERIAL PRIMARY KEY,
---      v vector(960));
-
---  CREATE TABLE sift_base1b (
---      id SERIAL PRIMARY KEY,
---      v vector(128));
-
---  \copy sift_base1m (v) FROM 'base1m.csv' with csv;
-
-select v as v4444  from sift_base10k where id = 4444 \gset
-EXPLAIN (ANALYZE, TIMING FALSE) select * from sift_base10k order by v <-> :'v4444'
-limit 10;
-
-select id, vector_l2sq_dist(v, :'v4444')
-as dist
-from sift_base10k order by dist limit 10;
-
-\set GROUP_LIMIT 10000
-
--- CREATE INDEX ON sift_base1m USING hnsw (v vector_l2_ops) WITH (M=2, ef_construction=14, alg="diskann");
-CREATE INDEX ON sift_base10k USING hnsw (v dist_vec_l2sq_ops) WITH (M=2, ef_construction=10, ef=4, alg="diskann");
-CREATE INDEX ON sift_base10k USING ivfflat (v vector_l2_ops);
-
-\echo "running" v4444 "vector queries"
-\echo "@@@@@@@@@@@@@@@@@@@@ ivfflat index is also created @@@@@@@@@@@@@@"
-begin;
-drop index sift_base10k_v_idx;
-explain (analyze,buffers) select q.id AS query_id,
-  ARRAY_AGG(b.id ORDER BY q.v <-> b.v) AS base_ids
-FROM
-  sift_base10k q
-JOIN LATERAL (
-  SELECT id,v
-  FROM sift_base10k
-  ORDER BY q.v <-> v limit 10
-) b ON true
-GROUP BY
-  q.id limit :GROUP_LIMIT;
-rollback;
-\echo "^^^^^^^^^^^^^^^^^^^^ ivfflat performance above ^^^^^^^^^^^^^^"
-
-explain (analyze,buffers) select q.id AS query_id,
-  ARRAY_AGG(b.id ORDER BY q.v <-> b.v) AS base_ids
-FROM
-  sift_base10k q
-JOIN LATERAL (
-  SELECT id,v
-  FROM sift_base10k
-  ORDER BY q.v <-> v limit 10
-) b ON true
-GROUP BY
-  q.id limit :GROUP_LIMIT;
diff --git a/ci/scripts/run-benchmarks.sh b/ci/scripts/run-benchmarks.sh
new file mode 100755
index 000000000..8d971659f
--- /dev/null
+++ b/ci/scripts/run-benchmarks.sh
@@ -0,0 +1,42 @@
+#!/bin/bash
+set -e
+
+wait_for_pg(){
+ tries=0
+ until pg_isready -U postgres 2>/dev/null; do
+   if [ $tries -eq 10 ];
+   then
+     echo "Can not connect to postgres"
+     exit 1
+   fi
+   
+   sleep 1
+   tries=$((tries+1))
+ done
+}
+
+export WORKDIR=/tmp/lanterndb
+export PG_VERSION=15
+export GITHUB_OUTPUT=/dev/null
+export PGDATA=/etc/postgresql/$PG_VERSION/main/
+
+echo "port = 5432" >> $PGDATA/postgresql.conf
+# Enable auth without password
+echo "local   all             all                                     trust" >  $PGDATA/pg_hba.conf
+echo "host    all             all             127.0.0.1/32            trust" >>  $PGDATA/pg_hba.conf
+echo "host    all             all             ::1/128                 trust" >>  $PGDATA/pg_hba.conf
+
+POSTGRES_HOST_AUTH_METHOD=trust /usr/lib/postgresql/$PG_VERSION/bin/postgres 1>/tmp/pg-out.log 2>/tmp/pg-error.log &
+wait_for_pg
+cd $WORKDIR/build
+
+export DATABASE_URL=postgresql://localhost:5432/postgres
+git clone https://github.com/lanterndata/benchmark
+cd benchmark
+pip install -r core/requirements.txt
+pip install -r external/requirements.txt
+cd ..
+
+make benchmark
+killall postgres
+
diff --git a/ci/scripts/run-tests.sh b/ci/scripts/run-tests.sh
index b5c007e80..974c0c1d8 100755
--- a/ci/scripts/run-tests.sh
+++ b/ci/scripts/run-tests.sh
@@ -1,4 +1,5 @@
 #!/bin/bash
+set -e
 
 wait_for_pg(){
  tries=0
diff --git a/scripts/bench.sh b/scripts/bench.sh
deleted file mode 100755
index 8e820a13d..000000000
--- a/scripts/bench.sh
+++ /dev/null
@@ -1,3 +0,0 @@
-#!/bin/bash
-PSQL=psql
-${PSQL} bench_db < ./bench.sql
\ No newline at end of file
diff --git a/scripts/run_benchmarks.sh b/scripts/run_benchmarks.sh
new file mode 100755
index 000000000..95749f128
--- /dev/null
+++ b/scripts/run_benchmarks.sh
@@ -0,0 +1,40 @@
+#!/bin/bash
+set -e
+
+# Benchmarking parameters
+BASE_PARAMS="--extension lantern --dataset sift --N 10k"
+INDEX_PARAMS="--m 4 --ef_construction 128 --ef 10"
+PARAMS="$BASE_PARAMS $INDEX_PARAMS --K 5"
+
+# Settings
+SKIP_SETUP=0
+PRINT_ONLY=0
+while [[ "$#" -gt 0 ]]; do
+    case $1 in
+        --skip-setup) SKIP_SETUP=1 ;;
+        --print-only) PRINT_ONLY=1 ;;
+    esac
+    shift
+done
+
+# Go to benchmark directory
+cd benchmark
+
+# Run setup
+if [ "$SKIP_SETUP" -ne 1 ] && [ "$PRINT_ONLY" -ne 1 ]; then
+    echo "Running data setup"
+    python3 -m core.setup --datapath /tmp/benchmark_data $BASE_PARAMS
+else
+    echo "Skipping data setup"
+fi
+
+# Run benchmarks
+if [ "$PRINT_ONLY" -ne 1 ]; then
+    echo "Running benchmarks"
+    python3 -m external.run_benchmarks $PARAMS
+fi
+
+# Render benchmarks
+python3 -m external.show_benchmarks $PARAMS
+python3 -m external.validate_benchmarks $PARAMS
+python3 -m external.get_benchmarks_json $PARAMS > /tmp/benchmarks-out.json
\ No newline at end of file
diff --git a/test/sql/test_helpers/princeton_places.sql b/test/sql/test_helpers/princeton_places.sql
deleted file mode 100644
index 09e0d5324..000000000
--- a/test/sql/test_helpers/princeton_places.sql
+++ /dev/null
@@ -1,8 +0,0 @@
-CREATE TABLE princeton_places (
-    name text,
-    street text,
-    long float,
-    lat float,
-    v vector(2) 
-);
-\copy pton_area(name, street, long, lat) FROM '/tmp/lanterndb/vector_datasets/sift_base1k.csv' DELIMITER E',';