From 97abbca063f0ccf1b69607638d42f5ccc6ee1e2d Mon Sep 17 00:00:00 2001
From: "Gilad S." <7817232+giladgd@users.noreply.github.com>
Date: Thu, 28 Nov 2024 01:03:52 +0200
Subject: [PATCH] feat: Llama 3.2 3B function calling support (#386)

* feat: Llama 3.2 3B function calling support
* feat: use `llama.cpp` backend registry for GPUs instead of custom implementations
* feat(`getLlama`): `build: "try"` option
* feat(`init` command): `--model` flag
* fix: improve binary compatibility testing on Electron apps
* fix: too many abort signal listeners
* fix: log level of some lower level logs
* fix: context window missing response during generation on specific extreme conditions
* fix: adapt to breaking `llama.cpp` changes
* fix: automatically resolve `compiler is out of heap space` CUDA build error
* chore: update bug report template
* docs: separate open source and proprietary sections in the awesome list
* docs(troubleshooting): Electron build error on Windows
* docs(Electron): GitHub Actions template for cross-compilation
---
 .github/ISSUE_TEMPLATE/bug-report.yml         |  10 +-
 .github/workflows/build.yml                   |  25 +-
 .gitignore                                    |   3 +
 .vitepress/assets/ogTemplate.svg              |  10 +-
 .vitepress/config.ts                          |  21 +-
 docs/guide/awesome.md                         |   9 +-
 docs/guide/electron.md                        |  59 +-
 docs/guide/troubleshooting.md                 |   7 +
 docs/public/robots.txt                        |   1 +
 llama/CMakeLists.txt                          | 100 +--
 llama/addon/addon.cpp                         |  17 +
 llama/addon/globals/getGpuInfo.cpp            | 106 +--
 llama/gpuInfo/cuda-gpu-info.cu                | 120 ----
 llama/gpuInfo/cuda-gpu-info.h                 |  10 -
 llama/gpuInfo/metal-gpu-info.h                |   8 -
 llama/gpuInfo/metal-gpu-info.mm               |  37 -
 llama/gpuInfo/vulkan-gpu-info.cpp             |   8 -
 llama/gpuInfo/vulkan-gpu-info.h               |   1 -
 package-lock.json                             | 649 ++++++++++++++++++
 package.json                                  |   1 +
 src/bindings/AddonTypes.ts                    |   3 +-
 src/bindings/Llama.ts                         |  22 +-
 src/bindings/getLlama.ts                      |  48 +-
 src/bindings/utils/compileLLamaCpp.ts         |  58 +-
 src/bindings/utils/testBindingBinary.ts       | 198 ++++--
 src/chatWrappers/Llama3_1ChatWrapper.ts       |   5 +-
 .../Llama3_2LightweightChatWrapper.ts         | 354 ++++++++++
 ...hatModelFunctionsDocumentationGenerator.ts |  37 +
 src/chatWrappers/utils/getModelLinageNames.ts |  23 +
 .../utils/isLlama3_2LightweightModel.ts       |  11 +
 src/chatWrappers/utils/resolveChatWrapper.ts  |  53 +-
 src/cli/commands/ChatCommand.ts               |   2 +-
 src/cli/commands/InitCommand.ts               |  47 +-
 src/cli/recommendedModels.ts                  |  12 +
 src/evaluator/LlamaChat/LlamaChat.ts          |  48 +-
 .../LlamaChatSession/LlamaChatSession.ts      | 323 ++++-----
 src/index.ts                                  |   2 +
 src/utils/includesText.ts                     |  18 +
 src/utils/wrapAbortSignal.ts                  |  17 +-
 templates/README.md                           |   6 +
 .../electron-builder.ts                       |   6 +-
 .../llama3.2/completion.test.ts               |  25 +
 test/utils/modelFiles.ts                      |   3 +-
 43 files changed, 1890 insertions(+), 633 deletions(-)
 delete mode 100644 llama/gpuInfo/cuda-gpu-info.cu
 delete mode 100644 llama/gpuInfo/cuda-gpu-info.h
 delete mode 100644 llama/gpuInfo/metal-gpu-info.h
 delete mode 100644 llama/gpuInfo/metal-gpu-info.mm
 create mode 100644 src/chatWrappers/Llama3_2LightweightChatWrapper.ts
 create mode 100644 src/chatWrappers/utils/getModelLinageNames.ts
 create mode 100644 src/chatWrappers/utils/isLlama3_2LightweightModel.ts
 create mode 100644 src/utils/includesText.ts
 create mode 100644 templates/README.md
 create mode 100644 test/modelDependent/llama3.2/completion.test.ts

diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml
index 4e2032b4..40b0f4a0 100644
--- a/.github/ISSUE_TEMPLATE/bug-report.yml
+++ b/.github/ISSUE_TEMPLATE/bug-report.yml
@@ -27,8 +27,9 @@ body:
     attributes:
       label: Actual Behavior
       description: >-
-        A clear and concise description of what actually happened. Please wrap
-        any error messages or output in code tags, instead of images.
+        A clear and concise description of what actually happened.
+        Please wrap any error messages or output in code tags, instead of images.
+        Please attach logs if relevant.
     validations:
       required: true
   - type: textarea
@@ -60,6 +61,11 @@ body:
         | Node.js version          | x.y.zzz             |
         | Typescript version       | x.y.zzz             |
         | `node-llama-cpp` version | x.y.zzz             |
+        
+        `npx --yes node-llama-cpp inspect gpu` output:
+        ```
+        Result of running `npx --yes node-llama-cpp inspect gpu`
+        ```
     validations:
       required: true
   - type: textarea
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 8c5270ea..c574af59 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -65,9 +65,12 @@ jobs:
           - name: "Ubuntu"
             os: ubuntu-22.04
             artifact: "linux"
-          - name: "macOS"
+          - name: "macOS x64"
             os: macos-13
-            artifact: "mac"
+            artifact: "mac-x64"
+          - name: "macOS arm64"
+            os: macos-14
+            artifact: "mac-arm64"
 
     steps:
       - uses: actions/checkout@v4
@@ -87,11 +90,18 @@ jobs:
           name: llama.cpp
           path: llama
 
-      - name: Install dependencies on Windows
-        if: startsWith(matrix.config.os, 'windows')
+      - name: Install dependencies on Windows for x64
+        if: matrix.config.name == 'Windows for x64'
         run: |
           choco install ninja cmake
 
+      - name: Install dependencies on Windows
+        if: matrix.config.name == 'Windows for Arm'
+        run: |
+          choco install cmake.install --version=3.31.1
+          choco install cmake --version=3.31.1
+          choco install ninja
+
       - name: Install dependencies on Ubuntu
         if: matrix.config.name == 'Ubuntu'
         run: |
@@ -148,7 +158,7 @@ jobs:
       - name: Setup & Build
         id: build
         shell: bash
-        timeout-minutes: 200
+        timeout-minutes: 300
         env:
           ARTIFACT_NAME: ${{ matrix.config.artifact }}
         run: |
@@ -212,9 +222,10 @@ jobs:
             await buildBinary("x64", ["--gpu", "vulkan"]);
             await buildBinary("arm64", ["--gpu", "false"]);
             await buildBinary("armv7l", ["--gpu", "false"]);
-          } else if (process.env.ARTIFACT_NAME === "mac") {
-            await buildBinary("arm64", ["--gpu", "metal"]);
+          } else if (process.env.ARTIFACT_NAME === "mac-x64") {
             await buildBinary("x64", ["--gpu", "false"]);
+          } else if (process.env.ARTIFACT_NAME === "mac-arm64") {
+            await buildBinary("arm64", ["--gpu", "metal"]);
           }
           
           // move binaries to bins
diff --git a/.gitignore b/.gitignore
index 69ccf614..8eda1fd0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -24,7 +24,10 @@ node_modules
 /llama/lastBuild.json
 /llama/gitRelease.bundle
 /llama/.temp
+/llama/.cache
+/llama/build
 /llama/.idea
+/llama/.vscode
 /llama/cmake-build-debug
 /llama/localBuilds
 /llama/Release
diff --git a/.vitepress/assets/ogTemplate.svg b/.vitepress/assets/ogTemplate.svg
index 53673b64..95f55af1 100644
--- a/.vitepress/assets/ogTemplate.svg
+++ b/.vitepress/assets/ogTemplate.svg
@@ -75,12 +75,12 @@
     <circle id="logo-glow" fill="url(#LogoGlowGradient)" cx="3456" cy="1408" r="1472" />
     <image id="logo" href="https://raw.githubusercontent.com/withcatai/node-llama-cpp/master/assets/logo.v3.roundEdges.png" x="3072" y="1024" width="768" height="768" mask="url(#LogoMask)"/>
 
-    <text id="logo-text" x="256" y="1792" font-size="90" font-weight="600" fill="url(#ThemeGradient)" alignment-baseline="baseline" font-family="Inter, sans-serif">node-llama-cpp</text>
+    <text id="logo-text" x="256" y="1792" font-size="132" font-weight="600" fill="url(#ThemeGradient)" alignment-baseline="baseline" font-family="Inter, sans-serif">node-llama-cpp</text>
 
-    <text id="category" font-weight="400" x="256" y="448" opacity=".42" font-size="108" fill="#FFFFF5" alignment-baseline="baseline" font-family="Inter, sans-serif">{{category}}</text>
-    <text id="text" font-weight="500" x="256" y="520" opacity=".86" font-size="216" fill="#FFFFF5" alignment-baseline="before-edge" font-family="Inter, sans-serif">{{line1}}</text>
-    <text id="text" font-weight="500" x="256" y="780" opacity=".86" font-size="216" fill="#FFFFF5" alignment-baseline="before-edge" font-family="Inter, sans-serif">{{line2}}</text>
-    <text id="text" font-weight="500" x="256" y="1039" opacity=".86" font-size="216" fill="#FFFFF5" alignment-baseline="before-edge" font-family="Inter, sans-serif">{{line3}}</text>
+    <text id="category" font-weight="400" x="256" y="488" opacity=".42" font-size="148" fill="#FFFFF5" alignment-baseline="baseline" font-family="Inter, sans-serif">{{category}}</text>
+    <text id="text" font-weight="500" x="256" y="560" opacity=".86" font-size="216" fill="#FFFFF5" alignment-baseline="before-edge" font-family="Inter, sans-serif">{{line1}}</text>
+    <text id="text" font-weight="500" x="256" y="820" opacity=".86" font-size="216" fill="#FFFFF5" alignment-baseline="before-edge" font-family="Inter, sans-serif">{{line2}}</text>
+    <text id="text" font-weight="500" x="256" y="1079" opacity=".86" font-size="216" fill="#FFFFF5" alignment-baseline="before-edge" font-family="Inter, sans-serif">{{line3}}</text>
 
     <rect id="view-guard-top" x="0" y="-400" width="4096" height="400" fill="black"/>
     <rect id="view-guard-bottom" x="0" y="2048" width="4096" height="400" fill="black"/>
diff --git a/.vitepress/config.ts b/.vitepress/config.ts
index 433fa506..b4bd72d4 100644
--- a/.vitepress/config.ts
+++ b/.vitepress/config.ts
@@ -122,12 +122,22 @@ export default defineConfig({
 
             return items
                 .map((item) => {
-                    if (item.url === "" || item.url === "blog/") {
+                    if (item.url === "") {
+                        item.lastmod = undefined;
+                        item.changefreq = "daily";
+                        item.priority = 1;
+                    } else if (item.url === "blog/") {
                         item.lastmod = new Date(buildDate);
+                        item.changefreq = "daily";
+                        item.priority = 0.9;
                     } else if (item.url.startsWith("api/") || item.url.startsWith("cli/")) {
                         item = {
                             ...item,
-                            lastmod: new Date(buildDate)
+                            lastmod: new Date(buildDate),
+                            changefreq: "weekly",
+                            priority: item.url.startsWith("cli/")
+                                ? 0.7
+                                : 0.5
                         };
                     } else if (item.lastmod == null && item.url.startsWith("blog/")) {
                         const postDate = blogPostMap.get(item.url)?.frontmatter.date;
@@ -138,6 +148,13 @@ export default defineConfig({
                         }
                     } else if (item.lastmod == null) {
                         item.lastmod = new Date(buildDate);
+                        item.changefreq = "weekly";
+                        item.priority = 0.4;
+                    }
+
+                    if (item.url !== "blog/" && item.url.startsWith("blog/")) {
+                        item.priority = 0.8;
+                        item.changefreq = "hourly";
                     }
 
                     return item;
diff --git a/docs/guide/awesome.md b/docs/guide/awesome.md
index 1632b809..1e8df827 100644
--- a/docs/guide/awesome.md
+++ b/docs/guide/awesome.md
@@ -1,10 +1,15 @@
 # Awesome `node-llama-cpp`
 Awesome projects that use `node-llama-cpp`.
 
----
-
+## Open Source
 * [CatAI](https://github.com/withcatai/catai) - a simplified AI assistant API for Node.js, with REST API support
 
+## Proprietary
+> List your project here!
+
+
+<br />
+
 ---
 
 > To have a project listed here, it should clearly state that it uses `node-llama-cpp`.
diff --git a/docs/guide/electron.md b/docs/guide/electron.md
index dc1fc2aa..9fe74e0c 100644
--- a/docs/guide/electron.md
+++ b/docs/guide/electron.md
@@ -34,10 +34,67 @@ You also need to call [`getLlama`](../api/functions/getLlama.md) with the CMake
 so that `node-llama-cpp` can find them.
 
 ## Cross Compilation
-Cross packaging from one platform to another is not supported, since binaries for other platforms are not downloaded to you machine when your run `npm install`.
+Cross packaging from one platform to another is not supported, since binaries for other platforms are not downloaded to your machine when you run `npm install`.
 
 Packaging an `arm64` app on an `x64` machine is supported, but packaging an `x64` app on an `arm64` machine is not.
 
+::: details GitHub Actions template for cross-compilation
+
+<span v-pre>
+
+```yml
+name: Build
+on: [push]
+
+jobs:
+  build-electron:
+    name: Build Electron app - ${{ matrix.config.name }}
+    runs-on: ${{ matrix.config.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        config:
+          - name: "Windows"
+            os: windows-2022
+          - name: "Ubuntu"
+            os: ubuntu-22.04
+          - name: "macOS"
+            os: macos-13
+
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-node@v4
+        with:
+          node-version: "20"
+
+      - name: Install dependencies on Ubuntu
+        if: matrix.config.name == 'Ubuntu'
+        run: |
+          sudo apt-get update
+          sudo apt-get install libarchive-tools rpm
+          sudo snap install snapcraft --classic
+
+      - name: Install modules
+        run: npm ci
+
+      - name: Build electron app
+        id: build
+        shell: bash
+        timeout-minutes: 480
+        run: npm run build
+
+      - name: Upload artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          include-hidden-files: true
+          name: "electron-app-${{ matrix.config.name }}"
+          path: "./release"
+```
+
+</span>
+
+:::
+
 ## Bundling
 When bundling your code for Electron using [Electron Vite](https://electron-vite.org) or Webpack,
 ensure that `node-llama-cpp` is not bundled, and is instead treated as an external module.
diff --git a/docs/guide/troubleshooting.md b/docs/guide/troubleshooting.md
index 304da350..d3bfbe8c 100644
--- a/docs/guide/troubleshooting.md
+++ b/docs/guide/troubleshooting.md
@@ -156,3 +156,10 @@ please [open a new issue on GitHub](https://github.com/withcatai/node-llama-cpp/
 The common cause for this issue is when using the `Administrator` to run `npm install` and then trying to run the code with a different user.
 
 Ensure you're not using the `Administrator` user for `npm install` nor to run the code.
+
+## Getting an `EPERM: operation not permitted` Error on a Windows Machine When Building an Electron App
+`electron-builder` needs to create symlinks to perform the build process, which requires enabling Developer Mode on Windows.
+
+To do that, go to `Settings > Update & Security > For developers` and enable `Developer mode`.
+
+After that, delete the `.cache` folder under your user directory and try building the app again.
diff --git a/docs/public/robots.txt b/docs/public/robots.txt
index b9756169..6ba151d4 100644
--- a/docs/public/robots.txt
+++ b/docs/public/robots.txt
@@ -1,2 +1,3 @@
 User-agent: *
+
 Sitemap: https://node-llama-cpp.withcat.ai/sitemap.xml
diff --git a/llama/CMakeLists.txt b/llama/CMakeLists.txt
index 4340c82c..08c7a86b 100644
--- a/llama/CMakeLists.txt
+++ b/llama/CMakeLists.txt
@@ -28,6 +28,16 @@ if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "Ap
     add_compile_options(-Wno-c++17-extensions)
 endif()
 
+if(APPLE)
+    set(CMAKE_SKIP_BUILD_RPATH  FALSE)
+    set(CMAKE_INSTALL_RPATH_USE_LINK_PATH FALSE)
+    set(CMAKE_BUILD_RPATH "@loader_path")
+    set(CMAKE_INSTALL_RPATH "@loader_path")
+    set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE)
+else()
+    set(CMAKE_BUILD_RPATH_USE_ORIGIN ON)
+endif()
+
 include_directories(${NODE_ADDON_API_DIR} ${CMAKE_JS_INC})
 
 add_subdirectory("llama.cpp")
@@ -39,41 +49,6 @@ unset(GPU_INFO_HEADERS)
 unset(GPU_INFO_SOURCES)
 unset(GPU_INFO_EXTRA_LIBS)
 
-if (GGML_CUDA)
-    cmake_minimum_required(VERSION 3.17)
-
-    find_package(CUDAToolkit)
-    if (CUDAToolkit_FOUND)
-        message(STATUS "Using CUDA for GPU info")
-
-        enable_language(CUDA)
-
-        list(APPEND GPU_INFO_HEADERS gpuInfo/cuda-gpu-info.h)
-        list(APPEND GPU_INFO_SOURCES gpuInfo/cuda-gpu-info.cu)
-
-        add_compile_definitions(GPU_INFO_USE_CUDA)
-
-        if (GGML_STATIC)
-            list(APPEND GPU_INFO_EXTRA_LIBS CUDA::cudart_static)
-        else()
-            list(APPEND GPU_INFO_EXTRA_LIBS CUDA::cudart)
-        endif()
-
-        list(APPEND GPU_INFO_EXTRA_LIBS CUDA::cuda_driver)
-
-        if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
-            # copied from llama.cpp/CMakLists.txt under "if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES)"
-            if (GGML_CUDA_F16 OR GGML_CUDA_DMMV_F16)
-                set(CMAKE_CUDA_ARCHITECTURES "60;61;70;75")
-            else()
-                set(CMAKE_CUDA_ARCHITECTURES "52;61;70;75")
-            endif()
-        endif()
-    else()
-        message(FATAL_ERROR "CUDA was not found")
-    endif()
-endif()
-
 if (GGML_VULKAN OR GGML_KOMPUTE)
     find_package(Vulkan)
     if (Vulkan_FOUND)
@@ -94,67 +69,12 @@ if (GGML_VULKAN OR GGML_KOMPUTE)
     endif()
 endif()
 
-if (GGML_HIPBLAS)
-    list(APPEND CMAKE_PREFIX_PATH /opt/rocm)
-
-    if (NOT ${CMAKE_C_COMPILER_ID} MATCHES "Clang")
-        message(WARNING "Only LLVM is supported for HIP, hint: CC=/opt/rocm/llvm/bin/clang")
-    endif()
-    if (NOT ${CMAKE_CXX_COMPILER_ID} MATCHES "Clang")
-        message(WARNING "Only LLVM is supported for HIP, hint: CXX=/opt/rocm/llvm/bin/clang++")
-    endif()
-
-    find_package(hip)
-    find_package(hipblas)
-    find_package(rocblas)
-
-    if (${hipblas_FOUND} AND ${hip_FOUND})
-        message(STATUS "Using HIP and hipBLAS for GPU info")
-        add_compile_definitions(GPU_INFO_USE_HIPBLAS GPU_INFO_USE_CUDA)
-        add_library(gpu-info-rocm OBJECT gpuInfo/cuda-gpu-info.cu gpuInfo/cuda-gpu-info.h)
-        set_source_files_properties(gpuInfo/cuda-gpu-info.cu PROPERTIES LANGUAGE CXX)
-        target_link_libraries(gpu-info-rocm PRIVATE hip::device PUBLIC hip::host roc::rocblas roc::hipblas)
-
-        list(APPEND GPU_INFO_EXTRA_LIBS gpu-info-rocm)
-    else()
-        message(FATAL_ERROR "hipBLAS or HIP was not found. Try setting CMAKE_PREFIX_PATH=/opt/rocm")
-    endif()
-endif()
-
-if (GGML_METAL)
-    find_library(FOUNDATION_LIBRARY Foundation REQUIRED)
-    find_library(METAL_FRAMEWORK    Metal      REQUIRED)
-    find_library(METALKIT_FRAMEWORK MetalKit   REQUIRED)
-
-    message(STATUS "Using Metal for GPU info")
-    list(APPEND GPU_INFO_HEADERS gpuInfo/metal-gpu-info.h)
-    list(APPEND GPU_INFO_SOURCES gpuInfo/metal-gpu-info.mm)
-
-    add_compile_definitions(GPU_INFO_USE_METAL)
-
-    list(APPEND GPU_INFO_EXTRA_LIBS
-        ${FOUNDATION_LIBRARY}
-        ${METAL_FRAMEWORK}
-        ${METALKIT_FRAMEWORK}
-        )
-endif()
-
 list(REMOVE_DUPLICATES GPU_INFO_HEADERS)
 list(REMOVE_DUPLICATES GPU_INFO_SOURCES)
 list(REMOVE_DUPLICATES GPU_INFO_EXTRA_LIBS)
 
 file(GLOB SOURCE_FILES "addon/*.cpp" "addon/**/*.cpp" ${GPU_INFO_SOURCES})
 
-if(APPLE)
-    set(CMAKE_SKIP_BUILD_RPATH  FALSE)
-    set(CMAKE_INSTALL_RPATH_USE_LINK_PATH FALSE)
-    set(CMAKE_BUILD_RPATH "@loader_path")
-    set(CMAKE_INSTALL_RPATH "@loader_path")
-    set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE)
-else()
-    set(CMAKE_BUILD_RPATH_USE_ORIGIN ON)
-endif()
-
 add_library(${PROJECT_NAME} SHARED ${SOURCE_FILES} ${CMAKE_JS_SRC} ${GPU_INFO_HEADERS})
 set_target_properties(${PROJECT_NAME} PROPERTIES PREFIX "" SUFFIX ".node")
 target_link_libraries(${PROJECT_NAME} ${CMAKE_JS_LIB})
diff --git a/llama/addon/addon.cpp b/llama/addon/addon.cpp
index 5c2d1c52..7b014079 100644
--- a/llama/addon/addon.cpp
+++ b/llama/addon/addon.cpp
@@ -151,6 +151,22 @@ class AddonBackendUnloadWorker : public Napi::AsyncWorker {
         }
 };
 
+Napi::Value addonLoadBackends(const Napi::CallbackInfo& info) {
+    const bool forceLoadLibraries = info.Length() == 0
+        ? false
+        : info[0].IsBoolean()
+            ? info[0].As<Napi::Boolean>().Value()
+            : false;
+
+    ggml_backend_reg_count();
+
+    if (forceLoadLibraries) {
+        ggml_backend_load_all();
+    }
+
+    return info.Env().Undefined();
+}
+
 Napi::Value addonInit(const Napi::CallbackInfo& info) {
     if (backendInitialized) {
         Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env());
@@ -205,6 +221,7 @@ Napi::Object registerCallback(Napi::Env env, Napi::Object exports) {
         Napi::PropertyDescriptor::Function("getGpuDeviceInfo", getGpuDeviceInfo),
         Napi::PropertyDescriptor::Function("getGpuType", getGpuType),
         Napi::PropertyDescriptor::Function("getSwapInfo", getSwapInfo),
+        Napi::PropertyDescriptor::Function("loadBackends", addonLoadBackends),
         Napi::PropertyDescriptor::Function("init", addonInit),
         Napi::PropertyDescriptor::Function("dispose", addonDispose),
     });
diff --git a/llama/addon/globals/getGpuInfo.cpp b/llama/addon/globals/getGpuInfo.cpp
index ef51c1cd..cb15501f 100644
--- a/llama/addon/globals/getGpuInfo.cpp
+++ b/llama/addon/globals/getGpuInfo.cpp
@@ -1,22 +1,15 @@
 #include "getGpuInfo.h"
 #include "addonLog.h"
 
-#ifdef GPU_INFO_USE_CUDA
-#  include "../../gpuInfo/cuda-gpu-info.h"
+#ifdef __APPLE__
+    #include <TargetConditionals.h>
 #endif
+
 #ifdef GPU_INFO_USE_VULKAN
 #  include "../../gpuInfo/vulkan-gpu-info.h"
 #endif
-#ifdef GPU_INFO_USE_METAL
-#  include "../../gpuInfo/metal-gpu-info.h"
-#endif
 
 
-#ifdef GPU_INFO_USE_CUDA
-void logCudaError(const char* message) {
-    addonLlamaCppLogCallback(GGML_LOG_LEVEL_ERROR, (std::string("CUDA error: ") + std::string(message)).c_str(), nullptr);
-}
-#endif
 #ifdef GPU_INFO_USE_VULKAN
 void logVulkanWarning(const char* message) {
     addonLlamaCppLogCallback(GGML_LOG_LEVEL_WARN, (std::string("Vulkan warning: ") + std::string(message)).c_str(), nullptr);
@@ -24,20 +17,31 @@ void logVulkanWarning(const char* message) {
 #endif
 
 Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) {
+    ggml_backend_dev_t device = NULL;
+    size_t deviceTotal = 0;
+    size_t deviceFree = 0;
+
     uint64_t total = 0;
     uint64_t used = 0;
     uint64_t unifiedVramSize = 0;
 
-#ifdef GPU_INFO_USE_CUDA
-    size_t cudaDeviceTotal = 0;
-    size_t cudaDeviceUsed = 0;
-    bool cudeGetInfoSuccess = gpuInfoGetTotalCudaDevicesInfo(&cudaDeviceTotal, &cudaDeviceUsed, logCudaError);
+    for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
+        device = ggml_backend_dev_get(i);
+        if (ggml_backend_dev_type(device) == GGML_BACKEND_DEVICE_TYPE_GPU) {
+            deviceTotal = 0;
+            deviceFree = 0;
+            ggml_backend_dev_memory(device, &deviceFree, &deviceTotal);
 
-    if (cudeGetInfoSuccess) {
-        total += cudaDeviceTotal;
-        used += cudaDeviceUsed;
-    }
+            total += deviceTotal;
+            used += deviceTotal - deviceFree;
+
+#if defined(__arm64__) || defined(__aarch64__)
+            if (std::string(ggml_backend_dev_name(device)) == "Metal") {
+                unifiedVramSize += deviceTotal;
+            }
 #endif
+        }
+    }
 
 #ifdef GPU_INFO_USE_VULKAN
     uint64_t vulkanDeviceTotal = 0;
@@ -46,23 +50,15 @@ Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) {
     const bool vulkanDeviceSupportsMemoryBudgetExtension = gpuInfoGetTotalVulkanDevicesInfo(&vulkanDeviceTotal, &vulkanDeviceUsed, &vulkanDeviceUnifiedVramSize, logVulkanWarning);
 
     if (vulkanDeviceSupportsMemoryBudgetExtension) {
-        total += vulkanDeviceTotal;
-        used += vulkanDeviceUsed;
+        if (vulkanDeviceUnifiedVramSize > total) {
+            // this means that we counted memory from devices that aren't used by llama.cpp
+            vulkanDeviceUnifiedVramSize = 0;
+        }
+        
         unifiedVramSize += vulkanDeviceUnifiedVramSize;
     }
 #endif
 
-#ifdef GPU_INFO_USE_METAL
-    uint64_t metalDeviceTotal = 0;
-    uint64_t metalDeviceUsed = 0;
-    uint64_t metalDeviceUnifiedVramSize = 0;
-    getMetalGpuInfo(&metalDeviceTotal, &metalDeviceUsed, &metalDeviceUnifiedVramSize);
-
-    total += metalDeviceTotal;
-    used += metalDeviceUsed;
-    unifiedVramSize += metalDeviceUnifiedVramSize;
-#endif
-
     Napi::Object result = Napi::Object::New(info.Env());
     result.Set("total", Napi::Number::From(info.Env(), total));
     result.Set("used", Napi::Number::From(info.Env(), used));
@@ -74,17 +70,13 @@ Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) {
 Napi::Value getGpuDeviceInfo(const Napi::CallbackInfo& info) {
     std::vector<std::string> deviceNames;
 
-#ifdef GPU_INFO_USE_CUDA
-    gpuInfoGetCudaDeviceNames(&deviceNames, logCudaError);
-#endif
+    for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
+        ggml_backend_dev_t device = ggml_backend_dev_get(i);
+        if (ggml_backend_dev_type(device) == GGML_BACKEND_DEVICE_TYPE_GPU) {
 
-#ifdef GPU_INFO_USE_VULKAN
-    gpuInfoGetVulkanDeviceNames(&deviceNames, logVulkanWarning);
-#endif
-
-#ifdef GPU_INFO_USE_METAL
-    getMetalGpuDeviceNames(&deviceNames);
-#endif
+            deviceNames.push_back(std::string(ggml_backend_dev_description(device)));
+        }
+    }
 
     Napi::Object result = Napi::Object::New(info.Env());
 
@@ -98,17 +90,27 @@ Napi::Value getGpuDeviceInfo(const Napi::CallbackInfo& info) {
 }
 
 Napi::Value getGpuType(const Napi::CallbackInfo& info) {
-#ifdef GPU_INFO_USE_CUDA
-    return Napi::String::New(info.Env(), "cuda");
-#endif
-
-#ifdef GPU_INFO_USE_VULKAN
-    return Napi::String::New(info.Env(), "vulkan");
-#endif
+    for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
+        ggml_backend_dev_t device = ggml_backend_dev_get(i);
+        const auto deviceName = std::string(ggml_backend_dev_name(device));
+        
+        if (deviceName == "Metal") {
+            return Napi::String::New(info.Env(), "metal");
+        } else if (std::string(deviceName).find("Vulkan") == 0) {
+            return Napi::String::New(info.Env(), "vulkan");
+        } else if (std::string(deviceName).find("CUDA") == 0 || std::string(deviceName).find("ROCm") == 0 || std::string(deviceName).find("MUSA") == 0) {
+            return Napi::String::New(info.Env(), "cuda");
+        }
+    }
 
-#ifdef GPU_INFO_USE_METAL
-    return Napi::String::New(info.Env(), "metal");
-#endif
+    for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
+        ggml_backend_dev_t device = ggml_backend_dev_get(i);
+        const auto deviceName = std::string(ggml_backend_dev_name(device));
+        
+        if (deviceName == "CPU") {
+            return Napi::Boolean::New(info.Env(), false);
+        }
+    }
 
     return info.Env().Undefined();
-}
\ No newline at end of file
+}
diff --git a/llama/gpuInfo/cuda-gpu-info.cu b/llama/gpuInfo/cuda-gpu-info.cu
deleted file mode 100644
index 1559fc0b..00000000
--- a/llama/gpuInfo/cuda-gpu-info.cu
+++ /dev/null
@@ -1,120 +0,0 @@
-#include <stddef.h>
-#include <vector>
-#include <string>
-
-#if defined(GPU_INFO_USE_HIPBLAS)
-#include <hip/hip_runtime.h>
-#include <hipblas/hipblas.h>
-#define cudaGetDevice hipGetDevice
-#define cudaGetDeviceCount hipGetDeviceCount
-#define cudaGetErrorString hipGetErrorString
-#define cudaMemGetInfo hipMemGetInfo
-#define cudaSetDevice hipSetDevice
-#define cudaSuccess hipSuccess
-#else
-#include <cuda_runtime.h>
-#include <cuda.h>
-#endif
-
-
-typedef void (*gpuInfoCudaErrorLogCallback_t)(const char* message);
-
-bool gpuInfoSetCudaDevice(const int device, gpuInfoCudaErrorLogCallback_t errorLogCallback) {
-    int current_device;
-    auto getDeviceResult = cudaGetDevice(&current_device);
-
-    if (getDeviceResult != cudaSuccess) {
-        errorLogCallback(cudaGetErrorString(getDeviceResult));
-        return false;
-    }
-
-    if (device == current_device) {
-        return true;
-    }
-
-    const auto setDeviceResult = cudaSetDevice(device);
-
-    if (setDeviceResult != cudaSuccess) {
-        errorLogCallback(cudaGetErrorString(setDeviceResult));
-        return false;
-    }
-
-    return true;
-}
-
-bool gpuInfoGetCudaDeviceInfo(int device, size_t * total, size_t * used, gpuInfoCudaErrorLogCallback_t errorLogCallback) {
-    gpuInfoSetCudaDevice(device, errorLogCallback);
-
-    size_t freeMem;
-    size_t totalMem;
-    auto getMemInfoResult = cudaMemGetInfo(&freeMem, &totalMem);
-
-    if (getMemInfoResult != cudaSuccess) {
-        errorLogCallback(cudaGetErrorString(getMemInfoResult));
-        return false;
-    }
-
-    *total = totalMem;
-    *used = totalMem - freeMem;
-
-    return true;
-}
-
-int gpuInfoGetCudaDeviceCount(gpuInfoCudaErrorLogCallback_t errorLogCallback) {
-    int deviceCount;
-    auto getDeviceCountResult = cudaGetDeviceCount(&deviceCount);
-
-    if (getDeviceCountResult != cudaSuccess) {
-        errorLogCallback(cudaGetErrorString(getDeviceCountResult));
-        return -1;
-    }
-
-    return deviceCount;
-}
-
-bool gpuInfoGetTotalCudaDevicesInfo(size_t * total, size_t * used, gpuInfoCudaErrorLogCallback_t errorLogCallback) {
-    int deviceCount = gpuInfoGetCudaDeviceCount(errorLogCallback);
-
-    if (deviceCount < 0) {
-        return false;
-    }
-
-    size_t usedMem = 0;
-    size_t totalMem = 0;
-
-    for (int i = 0; i < deviceCount; i++) {
-        size_t deviceUsedMem;
-        size_t deviceTotalMem;
-
-        if (!gpuInfoGetCudaDeviceInfo(i, &deviceTotalMem, &deviceUsedMem, errorLogCallback)) {
-            return false;
-        }
-
-        usedMem += deviceUsedMem;
-        totalMem += deviceTotalMem;
-    }
-
-    *total = totalMem;
-    *used = usedMem;
-
-    return true;
-}
-
-void gpuInfoGetCudaDeviceNames(std::vector<std::string> * deviceNames, gpuInfoCudaErrorLogCallback_t errorLogCallback) {
-    int deviceCount = gpuInfoGetCudaDeviceCount(errorLogCallback);
-
-    if (deviceCount < 0) {
-        return;
-    }
-
-    for (int i = 0; i < deviceCount; i++) {
-        cudaDeviceProp prop;
-        auto getDevicePropertiesResult = cudaGetDeviceProperties(&prop, i);
-
-        if (getDevicePropertiesResult != cudaSuccess) {
-            errorLogCallback(cudaGetErrorString(getDevicePropertiesResult));
-        } else {
-            (*deviceNames).push_back(std::string(prop.name));
-        }
-    }
-}
diff --git a/llama/gpuInfo/cuda-gpu-info.h b/llama/gpuInfo/cuda-gpu-info.h
deleted file mode 100644
index e77b6f29..00000000
--- a/llama/gpuInfo/cuda-gpu-info.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#pragma once
-
-#include <stddef.h>
-#include <vector>
-#include <string>
-
-typedef void (*gpuInfoCudaErrorLogCallback_t)(const char* message);
-
-bool gpuInfoGetTotalCudaDevicesInfo(size_t * total, size_t * used, gpuInfoCudaErrorLogCallback_t errorLogCallback);
-void gpuInfoGetCudaDeviceNames(std::vector<std::string> * deviceNames, gpuInfoCudaErrorLogCallback_t errorLogCallback);
diff --git a/llama/gpuInfo/metal-gpu-info.h b/llama/gpuInfo/metal-gpu-info.h
deleted file mode 100644
index 9a199bee..00000000
--- a/llama/gpuInfo/metal-gpu-info.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#pragma once
-
-#include <stdint.h>
-#include <string>
-#include <vector>
-
-void getMetalGpuInfo(uint64_t * total, uint64_t * used, uint64_t * unifiedMemorySize);
-void getMetalGpuDeviceNames(std::vector<std::string> * deviceNames);
\ No newline at end of file
diff --git a/llama/gpuInfo/metal-gpu-info.mm b/llama/gpuInfo/metal-gpu-info.mm
deleted file mode 100644
index 46ac0b18..00000000
--- a/llama/gpuInfo/metal-gpu-info.mm
+++ /dev/null
@@ -1,37 +0,0 @@
-#include <stdint.h>
-#include <vector>
-#include <string>
-#import <Metal/Metal.h>
-
-void getMetalGpuInfo(uint64_t * total, uint64_t * used, uint64_t * unifiedMemorySize) {
-    id<MTLDevice> device = MTLCreateSystemDefaultDevice();
-
-    if (device) {
-        *total = device.recommendedMaxWorkingSetSize;
-        *used = device.currentAllocatedSize;
-
-        if (device.hasUnifiedMemory) {
-            *unifiedMemorySize = device.recommendedMaxWorkingSetSize;
-        } else {
-            *unifiedMemorySize = 0;
-        }
-    } else {
-        *total = 0;
-        *used = 0;
-        *unifiedMemorySize = 0;
-    }
-
-    [device release];
-    device = nil;
-}
-
-void getMetalGpuDeviceNames(std::vector<std::string> * deviceNames) {
-    NSArray<id<MTLDevice>> *devices = MTLCopyAllDevices();
-
-    for (id<MTLDevice> device in devices) {
-        (*deviceNames).push_back(std::string(([NSString stringWithUTF8String:device.name.UTF8String]).UTF8String));
-    }
-
-    [devices release];
-    devices = nil;
-}
diff --git a/llama/gpuInfo/vulkan-gpu-info.cpp b/llama/gpuInfo/vulkan-gpu-info.cpp
index 25356546..b47f92a8 100644
--- a/llama/gpuInfo/vulkan-gpu-info.cpp
+++ b/llama/gpuInfo/vulkan-gpu-info.cpp
@@ -80,11 +80,3 @@ static bool enumerateVulkanDevices(size_t* total, size_t* used, size_t* unifiedM
 bool gpuInfoGetTotalVulkanDevicesInfo(size_t* total, size_t* used, size_t* unifiedMemorySize, gpuInfoVulkanWarningLogCallback_t warningLogCallback) {
     return enumerateVulkanDevices(total, used, unifiedMemorySize, false, nullptr, warningLogCallback);
 }
-
-bool gpuInfoGetVulkanDeviceNames(std::vector<std::string> * deviceNames, gpuInfoVulkanWarningLogCallback_t warningLogCallback) {
-    size_t vulkanDeviceTotal = 0;
-    size_t vulkanDeviceUsed = 0;
-    size_t unifiedMemorySize = 0;
-
-    return enumerateVulkanDevices(&vulkanDeviceTotal, &vulkanDeviceUsed, &unifiedMemorySize, true, deviceNames, warningLogCallback);
-}
diff --git a/llama/gpuInfo/vulkan-gpu-info.h b/llama/gpuInfo/vulkan-gpu-info.h
index f8eb0527..af03026e 100644
--- a/llama/gpuInfo/vulkan-gpu-info.h
+++ b/llama/gpuInfo/vulkan-gpu-info.h
@@ -6,4 +6,3 @@
 typedef void (*gpuInfoVulkanWarningLogCallback_t)(const char* message);
 
 bool gpuInfoGetTotalVulkanDevicesInfo(size_t* total, size_t* used, size_t* unifiedMemorySize, gpuInfoVulkanWarningLogCallback_t warningLogCallback);
-bool gpuInfoGetVulkanDeviceNames(std::vector<std::string> * deviceNames, gpuInfoVulkanWarningLogCallback_t warningLogCallback);
\ No newline at end of file
diff --git a/package-lock.json b/package-lock.json
index 6db1793e..84b212d3 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -70,6 +70,7 @@
         "@types/yargs": "^17.0.33",
         "@vitest/coverage-v8": "^2.1.4",
         "@vitest/ui": "^2.1.4",
+        "electron": "^33.2.0",
         "eslint": "^9.13.0",
         "eslint-import-resolver-typescript": "^3.6.3",
         "eslint-plugin-import": "^2.31.0",
@@ -933,6 +934,73 @@
         }
       }
     },
+    "node_modules/@electron/get": {
+      "version": "2.0.3",
+      "resolved": "https://registry.npmjs.org/@electron/get/-/get-2.0.3.tgz",
+      "integrity": "sha512-Qkzpg2s9GnVV2I2BjRksUi43U5e6+zaQMcjoJy0C+C5oxaKl+fmckGDQFtRpZpZV0NQekuZZ+tGz7EA9TVnQtQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "debug": "^4.1.1",
+        "env-paths": "^2.2.0",
+        "fs-extra": "^8.1.0",
+        "got": "^11.8.5",
+        "progress": "^2.0.3",
+        "semver": "^6.2.0",
+        "sumchecker": "^3.0.1"
+      },
+      "engines": {
+        "node": ">=12"
+      },
+      "optionalDependencies": {
+        "global-agent": "^3.0.0"
+      }
+    },
+    "node_modules/@electron/get/node_modules/fs-extra": {
+      "version": "8.1.0",
+      "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-8.1.0.tgz",
+      "integrity": "sha512-yhlQgA6mnOJUKOsRUFsgJdQCvkKhcz8tlZG5HBQfReYZy46OwLcY+Zia0mtdHsOo9y/hP+CxMN0TU9QxoOtG4g==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "graceful-fs": "^4.2.0",
+        "jsonfile": "^4.0.0",
+        "universalify": "^0.1.0"
+      },
+      "engines": {
+        "node": ">=6 <7 || >=8"
+      }
+    },
+    "node_modules/@electron/get/node_modules/jsonfile": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/jsonfile/-/jsonfile-4.0.0.tgz",
+      "integrity": "sha512-m6F1R3z8jjlf2imQHS2Qez5sjKWQzbuuhuJ/FKYFRZvPE3PuHcSMVZzfsLhGVOkfd20obL5SWEBew5ShlquNxg==",
+      "dev": true,
+      "license": "MIT",
+      "optionalDependencies": {
+        "graceful-fs": "^4.1.6"
+      }
+    },
+    "node_modules/@electron/get/node_modules/semver": {
+      "version": "6.3.1",
+      "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.1.tgz",
+      "integrity": "sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==",
+      "dev": true,
+      "license": "ISC",
+      "bin": {
+        "semver": "bin/semver.js"
+      }
+    },
+    "node_modules/@electron/get/node_modules/universalify": {
+      "version": "0.1.2",
+      "resolved": "https://registry.npmjs.org/universalify/-/universalify-0.1.2.tgz",
+      "integrity": "sha512-rBJeI5CXAlmy1pV+617WB9J63U6XcazHHF2f2dbJix4XzpUF0RS3Zbj0FGIOCAva5P/d/GBOYaACQ1w+0azUkg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 4.0.0"
+      }
+    },
     "node_modules/@emnapi/runtime": {
       "version": "1.2.0",
       "resolved": "https://registry.npmjs.org/@emnapi/runtime/-/runtime-1.2.0.tgz",
@@ -4408,6 +4476,19 @@
         "url": "https://github.com/sponsors/jonschlinkert"
       }
     },
+    "node_modules/@szmarczak/http-timer": {
+      "version": "4.0.6",
+      "resolved": "https://registry.npmjs.org/@szmarczak/http-timer/-/http-timer-4.0.6.tgz",
+      "integrity": "sha512-4BAffykYOgO+5nzBWYwE3W90sBgLJoUPRWWcL8wlyiM8IB8ipJz3UMJ9KXQd1RKQXpKp8Tutn80HZtWsu2u76w==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "defer-to-connect": "^2.0.0"
+      },
+      "engines": {
+        "node": ">=10"
+      }
+    },
     "node_modules/@tinyhttp/content-disposition": {
       "version": "2.2.0",
       "resolved": "https://registry.npmjs.org/@tinyhttp/content-disposition/-/content-disposition-2.2.0.tgz",
@@ -4441,6 +4522,19 @@
       "integrity": "sha512-A0uYgOj3zNc4hNjHc5lYUfJQ/HVyBXiUMKdXd7ysclaE6k9oJdavQzODHuwjpUu2/boCP8afjQYi8z/GtvNCWA==",
       "dev": true
     },
+    "node_modules/@types/cacheable-request": {
+      "version": "6.0.3",
+      "resolved": "https://registry.npmjs.org/@types/cacheable-request/-/cacheable-request-6.0.3.tgz",
+      "integrity": "sha512-IQ3EbTzGxIigb1I3qPZc1rWJnH0BmSKv5QYTalEwweFvyBDLSAe24zP0le/hyi7ecGfZVlIVAg4BZqb8WBwKqw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@types/http-cache-semantics": "*",
+        "@types/keyv": "^3.1.4",
+        "@types/node": "*",
+        "@types/responselike": "^1.0.0"
+      }
+    },
     "node_modules/@types/conventional-commits-parser": {
       "version": "5.0.0",
       "resolved": "https://registry.npmjs.org/@types/conventional-commits-parser/-/conventional-commits-parser-5.0.0.tgz",
@@ -4495,6 +4589,13 @@
         "@types/unist": "*"
       }
     },
+    "node_modules/@types/http-cache-semantics": {
+      "version": "4.0.4",
+      "resolved": "https://registry.npmjs.org/@types/http-cache-semantics/-/http-cache-semantics-4.0.4.tgz",
+      "integrity": "sha512-1m0bIFVc7eJWyve9S0RnuRgcQqF/Xd5QsUZAZeQFr1Q3/p9JWoQQEqmVy+DPTNpGXwhgIetAoYF8JSc33q29QA==",
+      "dev": true,
+      "license": "MIT"
+    },
     "node_modules/@types/json-schema": {
       "version": "7.0.15",
       "resolved": "https://registry.npmjs.org/@types/json-schema/-/json-schema-7.0.15.tgz",
@@ -4517,6 +4618,16 @@
         "@types/node": "*"
       }
     },
+    "node_modules/@types/keyv": {
+      "version": "3.1.4",
+      "resolved": "https://registry.npmjs.org/@types/keyv/-/keyv-3.1.4.tgz",
+      "integrity": "sha512-BQ5aZNSCpj7D6K2ksrRCTmKRLEpnPvWDiLPfoGyhZ++8YtiK9d/3DBKPJgry359X/P1PfruyYwvnvwFjuEiEIg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@types/node": "*"
+      }
+    },
     "node_modules/@types/linkify-it": {
       "version": "5.0.0",
       "resolved": "https://registry.npmjs.org/@types/linkify-it/-/linkify-it-5.0.0.tgz",
@@ -4582,6 +4693,16 @@
         "@types/retry": "*"
       }
     },
+    "node_modules/@types/responselike": {
+      "version": "1.0.3",
+      "resolved": "https://registry.npmjs.org/@types/responselike/-/responselike-1.0.3.tgz",
+      "integrity": "sha512-H/+L+UkTV33uf49PH5pCAUBVPNj2nDBXTN+qS1dOwyyg24l3CcicicCA7ca+HMvJBZcFgl5r8e+RR6elsb4Lyw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@types/node": "*"
+      }
+    },
     "node_modules/@types/retry": {
       "version": "0.12.5",
       "resolved": "https://registry.npmjs.org/@types/retry/-/retry-0.12.5.tgz",
@@ -4635,6 +4756,17 @@
       "integrity": "sha512-I4q9QU9MQv4oEOz4tAHJtNz1cwuLxn2F3xcc2iV5WdqLPpUnj30aUuxt1mAxYTG+oe8CZMV/+6rU4S4gRDzqtQ==",
       "dev": true
     },
+    "node_modules/@types/yauzl": {
+      "version": "2.10.3",
+      "resolved": "https://registry.npmjs.org/@types/yauzl/-/yauzl-2.10.3.tgz",
+      "integrity": "sha512-oJoftv0LSuaDZE3Le4DbKX+KS9G36NzOeSap90UIK0yMA/NhKJhqlSGtNDORNRaIbQfzjXDrQa0ytJ6mNRGz/Q==",
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "dependencies": {
+        "@types/node": "*"
+      }
+    },
     "node_modules/@typescript-eslint/eslint-plugin": {
       "version": "8.12.2",
       "resolved": "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-8.12.2.tgz",
@@ -5848,6 +5980,15 @@
       "dev": true,
       "license": "ISC"
     },
+    "node_modules/boolean": {
+      "version": "3.2.0",
+      "resolved": "https://registry.npmjs.org/boolean/-/boolean-3.2.0.tgz",
+      "integrity": "sha512-d0II/GO9uf9lfUHH2BQsjxzRJZBdsjgsBiW4BvhWk/3qoKwQFjIDVN19PfX8F2D/r9PCMTtLWjYVCFrpeYUzsw==",
+      "deprecated": "Package no longer supported. Contact Support at https://www.npmjs.com/support for more info.",
+      "dev": true,
+      "license": "MIT",
+      "optional": true
+    },
     "node_modules/bottleneck": {
       "version": "2.19.5",
       "resolved": "https://registry.npmjs.org/bottleneck/-/bottleneck-2.19.5.tgz",
@@ -5874,6 +6015,16 @@
         "node": ">=8"
       }
     },
+    "node_modules/buffer-crc32": {
+      "version": "0.2.13",
+      "resolved": "https://registry.npmjs.org/buffer-crc32/-/buffer-crc32-0.2.13.tgz",
+      "integrity": "sha512-VO9Ht/+p3SN7SKWqcrgEzjGbRSJYTx+Q1pTQC0wrWqHx0vpJraQ6GtHx8tvcg1rlK1byhU5gccxgOgj7B0TDkQ==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": "*"
+      }
+    },
     "node_modules/bytes": {
       "version": "3.1.2",
       "resolved": "https://registry.npmjs.org/bytes/-/bytes-3.1.2.tgz",
@@ -5892,6 +6043,64 @@
         "node": ">=8"
       }
     },
+    "node_modules/cacheable-lookup": {
+      "version": "5.0.4",
+      "resolved": "https://registry.npmjs.org/cacheable-lookup/-/cacheable-lookup-5.0.4.tgz",
+      "integrity": "sha512-2/kNscPhpcxrOigMZzbiWF7dz8ilhb/nIHU3EyZiXWXpeq/au8qJ8VhdftMkty3n7Gj6HIGalQG8oiBNB3AJgA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=10.6.0"
+      }
+    },
+    "node_modules/cacheable-request": {
+      "version": "7.0.4",
+      "resolved": "https://registry.npmjs.org/cacheable-request/-/cacheable-request-7.0.4.tgz",
+      "integrity": "sha512-v+p6ongsrp0yTGbJXjgxPow2+DL93DASP4kXCDKb8/bwRtt9OEF3whggkkDkGNzgcWy2XaF4a8nZglC7uElscg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "clone-response": "^1.0.2",
+        "get-stream": "^5.1.0",
+        "http-cache-semantics": "^4.0.0",
+        "keyv": "^4.0.0",
+        "lowercase-keys": "^2.0.0",
+        "normalize-url": "^6.0.1",
+        "responselike": "^2.0.0"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/cacheable-request/node_modules/get-stream": {
+      "version": "5.2.0",
+      "resolved": "https://registry.npmjs.org/get-stream/-/get-stream-5.2.0.tgz",
+      "integrity": "sha512-nBF+F1rAZVCu/p7rjzgA+Yb4lfYXrpl7a6VmJrU8wF9I1CKvP/QwPNZHnOlwbTkY6dvtFIzFMSyQXbLoTQPRpA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "pump": "^3.0.0"
+      },
+      "engines": {
+        "node": ">=8"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/cacheable-request/node_modules/normalize-url": {
+      "version": "6.1.0",
+      "resolved": "https://registry.npmjs.org/normalize-url/-/normalize-url-6.1.0.tgz",
+      "integrity": "sha512-DlL+XwOy3NxAQ8xuC0okPgK46iuVNAK01YN7RueYBqqFeGsBjV9XmCAzAdgt+667bCl5kPh9EqKKDwnaPG1I7A==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
     "node_modules/call-bind": {
       "version": "1.0.7",
       "resolved": "https://registry.npmjs.org/call-bind/-/call-bind-1.0.7.tgz",
@@ -6248,6 +6457,19 @@
         "node": ">=8"
       }
     },
+    "node_modules/clone-response": {
+      "version": "1.0.3",
+      "resolved": "https://registry.npmjs.org/clone-response/-/clone-response-1.0.3.tgz",
+      "integrity": "sha512-ROoL94jJH2dUVML2Y/5PEDNaSHgeOdSDicUyS7izcF63G6sTc/FTjLub4b8Il9S8S0beOfYt0TaA5qvFK+w0wA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "mimic-response": "^1.0.0"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
     "node_modules/cmake-js": {
       "version": "7.3.0",
       "resolved": "https://registry.npmjs.org/cmake-js/-/cmake-js-7.3.0.tgz",
@@ -6817,6 +7039,35 @@
         "url": "https://github.com/sponsors/wooorm"
       }
     },
+    "node_modules/decompress-response": {
+      "version": "6.0.0",
+      "resolved": "https://registry.npmjs.org/decompress-response/-/decompress-response-6.0.0.tgz",
+      "integrity": "sha512-aW35yZM6Bb/4oJlZncMH2LCoZtJXTRxES17vE3hoRiowU2kWHaJKFkSBDnDR+cm9J+9QhXmREyIfv0pji9ejCQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "mimic-response": "^3.1.0"
+      },
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/decompress-response/node_modules/mimic-response": {
+      "version": "3.1.0",
+      "resolved": "https://registry.npmjs.org/mimic-response/-/mimic-response-3.1.0.tgz",
+      "integrity": "sha512-z0yWI+4FDrrweS8Zmt4Ej5HdJmky15+L2e6Wgn3+iK5fWzb6T3fhNFq2+MeTRb064c6Wr4N/wv0DzQTjNzHNGQ==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
     "node_modules/deep-eql": {
       "version": "5.0.2",
       "resolved": "https://registry.npmjs.org/deep-eql/-/deep-eql-5.0.2.tgz",
@@ -6841,6 +7092,16 @@
       "integrity": "sha512-oIPzksmTg4/MriiaYGO+okXDT7ztn/w3Eptv/+gSIdMdKsJo0u4CfYNFJPy+4SKMuCqGw2wxnA+URMg3t8a/bQ==",
       "dev": true
     },
+    "node_modules/defer-to-connect": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/defer-to-connect/-/defer-to-connect-2.0.1.tgz",
+      "integrity": "sha512-4tvttepXG1VaYGrRibk5EwJd1t4udunSOVMdLSAL6mId1ix438oPwPZMALY41FCijukO1L0twNcGsdzS7dHgDg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=10"
+      }
+    },
     "node_modules/define-data-property": {
       "version": "1.1.4",
       "resolved": "https://registry.npmjs.org/define-data-property/-/define-data-property-1.1.4.tgz",
@@ -6914,6 +7175,14 @@
         "node": ">=8"
       }
     },
+    "node_modules/detect-node": {
+      "version": "2.1.0",
+      "resolved": "https://registry.npmjs.org/detect-node/-/detect-node-2.1.0.tgz",
+      "integrity": "sha512-T0NIuQpnTvFDATNuHN5roPwSBG83rFsuO+MXXH9/3N1eFbn4wcPjttvjMLEPWJ0RGUYgQE7cGgS3tNxbqCGM7g==",
+      "dev": true,
+      "license": "MIT",
+      "optional": true
+    },
     "node_modules/devlop": {
       "version": "1.1.0",
       "resolved": "https://registry.npmjs.org/devlop/-/devlop-1.1.0.tgz",
@@ -7016,6 +7285,35 @@
       "integrity": "sha512-I88TYZWc9XiYHRQ4/3c5rjjfgkjhLyW2luGIheGERbNQ6OY7yTybanSpDXZa8y7VUP9YmDcYa+eyq4ca7iLqWA==",
       "dev": true
     },
+    "node_modules/electron": {
+      "version": "33.2.0",
+      "resolved": "https://registry.npmjs.org/electron/-/electron-33.2.0.tgz",
+      "integrity": "sha512-PVw1ICAQDPsnnsmpNFX/b1i/49h67pbSPxuIENd9K9WpGO1tsRaQt+K2bmXqTuoMJsbzIc75Ce8zqtuwBPqawA==",
+      "dev": true,
+      "hasInstallScript": true,
+      "license": "MIT",
+      "dependencies": {
+        "@electron/get": "^2.0.0",
+        "@types/node": "^20.9.0",
+        "extract-zip": "^2.0.1"
+      },
+      "bin": {
+        "electron": "cli.js"
+      },
+      "engines": {
+        "node": ">= 12.20.55"
+      }
+    },
+    "node_modules/electron/node_modules/@types/node": {
+      "version": "20.17.6",
+      "resolved": "https://registry.npmjs.org/@types/node/-/node-20.17.6.tgz",
+      "integrity": "sha512-VEI7OdvK2wP7XHnsuXbAJnEpEkF6NjSN45QJlL4VGqZSXsnicpesdTWsg9RISeSdYd3yeRj/y3k5KGjUXYnFwQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "undici-types": "~6.19.2"
+      }
+    },
     "node_modules/emoji-regex": {
       "version": "8.0.0",
       "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz",
@@ -7027,6 +7325,16 @@
       "integrity": "sha512-5U0rVMU5Y2n2+ykNLQqMoqklN9ICBT/KsvC1Gz6vqHbz2AXXGkG+Pm5rMWk/8Vjrr/mY9985Hi8DYzn1F09Nyw==",
       "dev": true
     },
+    "node_modules/end-of-stream": {
+      "version": "1.4.4",
+      "resolved": "https://registry.npmjs.org/end-of-stream/-/end-of-stream-1.4.4.tgz",
+      "integrity": "sha512-+uw1inIHVPQoaVuHzRyXd21icM+cnt4CzD5rW+NC1wjOUSTOs+Te7FOv7AhN7vS9x/oIyhLP5PR1H+phQAHu5Q==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "once": "^1.4.0"
+      }
+    },
     "node_modules/enhanced-resolve": {
       "version": "5.17.1",
       "resolved": "https://registry.npmjs.org/enhanced-resolve/-/enhanced-resolve-5.17.1.tgz",
@@ -7257,6 +7565,14 @@
         "benchmarks"
       ]
     },
+    "node_modules/es6-error": {
+      "version": "4.1.1",
+      "resolved": "https://registry.npmjs.org/es6-error/-/es6-error-4.1.1.tgz",
+      "integrity": "sha512-Um/+FxMr9CISWh0bi5Zv0iOD+4cFh5qLeks1qhAopKVAJw3drgKbKySikp7wGhDL0HPeaja0P5ULZrxLkniUVg==",
+      "dev": true,
+      "license": "MIT",
+      "optional": true
+    },
     "node_modules/esbuild": {
       "version": "0.21.5",
       "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.21.5.tgz",
@@ -8031,6 +8347,43 @@
         "node": ">=0.10.0"
       }
     },
+    "node_modules/extract-zip": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/extract-zip/-/extract-zip-2.0.1.tgz",
+      "integrity": "sha512-GDhU9ntwuKyGXdZBUgTIe+vXnWj0fppUEtMDL0+idd5Sta8TGpHssn/eusA9mrPr9qNDym6SxAYZjNvCn/9RBg==",
+      "dev": true,
+      "license": "BSD-2-Clause",
+      "dependencies": {
+        "debug": "^4.1.1",
+        "get-stream": "^5.1.0",
+        "yauzl": "^2.10.0"
+      },
+      "bin": {
+        "extract-zip": "cli.js"
+      },
+      "engines": {
+        "node": ">= 10.17.0"
+      },
+      "optionalDependencies": {
+        "@types/yauzl": "^2.9.1"
+      }
+    },
+    "node_modules/extract-zip/node_modules/get-stream": {
+      "version": "5.2.0",
+      "resolved": "https://registry.npmjs.org/get-stream/-/get-stream-5.2.0.tgz",
+      "integrity": "sha512-nBF+F1rAZVCu/p7rjzgA+Yb4lfYXrpl7a6VmJrU8wF9I1CKvP/QwPNZHnOlwbTkY6dvtFIzFMSyQXbLoTQPRpA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "pump": "^3.0.0"
+      },
+      "engines": {
+        "node": ">=8"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
     "node_modules/fast-deep-equal": {
       "version": "3.1.3",
       "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz",
@@ -8093,6 +8446,16 @@
         "reusify": "^1.0.4"
       }
     },
+    "node_modules/fd-slicer": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/fd-slicer/-/fd-slicer-1.1.0.tgz",
+      "integrity": "sha512-cE1qsB/VwyQozZ+q1dGxR8LBYNZeofhEdUNGSMbQD3Gw2lAzX9Zb3uIU6Ebc/Fmyjo9AWWfnn0AUCHqtevs/8g==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "pend": "~1.2.0"
+      }
+    },
     "node_modules/feed": {
       "version": "4.2.2",
       "resolved": "https://registry.npmjs.org/feed/-/feed-4.2.2.tgz",
@@ -8655,6 +9018,25 @@
         "node": ">=10.13.0"
       }
     },
+    "node_modules/global-agent": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/global-agent/-/global-agent-3.0.0.tgz",
+      "integrity": "sha512-PT6XReJ+D07JvGoxQMkT6qji/jVNfX/h364XHZOWeRzy64sSFr+xJ5OX7LI3b4MPQzdL4H8Y8M0xzPpsVMwA8Q==",
+      "dev": true,
+      "license": "BSD-3-Clause",
+      "optional": true,
+      "dependencies": {
+        "boolean": "^3.0.1",
+        "es6-error": "^4.1.1",
+        "matcher": "^3.0.0",
+        "roarr": "^2.15.3",
+        "semver": "^7.3.2",
+        "serialize-error": "^7.0.1"
+      },
+      "engines": {
+        "node": ">=10.0"
+      }
+    },
     "node_modules/global-directory": {
       "version": "4.0.1",
       "resolved": "https://registry.npmjs.org/global-directory/-/global-directory-4.0.1.tgz",
@@ -8712,6 +9094,32 @@
         "url": "https://github.com/sponsors/ljharb"
       }
     },
+    "node_modules/got": {
+      "version": "11.8.6",
+      "resolved": "https://registry.npmjs.org/got/-/got-11.8.6.tgz",
+      "integrity": "sha512-6tfZ91bOr7bOXnK7PRDCGBLa1H4U080YHNaAQ2KsMGlLEzRbk44nsZF2E1IeRc3vtJHPVbKCYgdFbaGO2ljd8g==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@sindresorhus/is": "^4.0.0",
+        "@szmarczak/http-timer": "^4.0.5",
+        "@types/cacheable-request": "^6.0.1",
+        "@types/responselike": "^1.0.0",
+        "cacheable-lookup": "^5.0.3",
+        "cacheable-request": "^7.0.2",
+        "decompress-response": "^6.0.0",
+        "http2-wrapper": "^1.0.0-beta.5.2",
+        "lowercase-keys": "^2.0.0",
+        "p-cancelable": "^2.0.0",
+        "responselike": "^2.0.0"
+      },
+      "engines": {
+        "node": ">=10.19.0"
+      },
+      "funding": {
+        "url": "https://github.com/sindresorhus/got?sponsor=1"
+      }
+    },
     "node_modules/graceful-fs": {
       "version": "4.2.11",
       "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz",
@@ -9152,6 +9560,13 @@
         "url": "https://github.com/sponsors/wooorm"
       }
     },
+    "node_modules/http-cache-semantics": {
+      "version": "4.1.1",
+      "resolved": "https://registry.npmjs.org/http-cache-semantics/-/http-cache-semantics-4.1.1.tgz",
+      "integrity": "sha512-er295DKPVsV82j5kw1Gjt+ADA/XYHsajl82cGNQG2eyoPkvgUhX+nDIyelzhIWbbsXP39EHcI6l5tYs2FYqYXQ==",
+      "dev": true,
+      "license": "BSD-2-Clause"
+    },
     "node_modules/http-proxy-agent": {
       "version": "7.0.2",
       "resolved": "https://registry.npmjs.org/http-proxy-agent/-/http-proxy-agent-7.0.2.tgz",
@@ -9165,6 +9580,20 @@
         "node": ">= 14"
       }
     },
+    "node_modules/http2-wrapper": {
+      "version": "1.0.3",
+      "resolved": "https://registry.npmjs.org/http2-wrapper/-/http2-wrapper-1.0.3.tgz",
+      "integrity": "sha512-V+23sDMr12Wnz7iTcDeJr3O6AIxlnvT/bmaAAAP/Xda35C90p9599p0F1eHR/N1KILWSoWVAiOMFjBBXaXSMxg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "quick-lru": "^5.1.1",
+        "resolve-alpn": "^1.0.0"
+      },
+      "engines": {
+        "node": ">=10.19.0"
+      }
+    },
     "node_modules/https-proxy-agent": {
       "version": "7.0.4",
       "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.4.tgz",
@@ -9970,6 +10399,14 @@
       "integrity": "sha512-Bdboy+l7tA3OGW6FjyFHWkP5LuByj1Tk33Ljyq0axyzdk9//JSi2u3fP1QSmd1KNwq6VOKYGlAu87CisVir6Pw==",
       "dev": true
     },
+    "node_modules/json-stringify-safe": {
+      "version": "5.0.1",
+      "resolved": "https://registry.npmjs.org/json-stringify-safe/-/json-stringify-safe-5.0.1.tgz",
+      "integrity": "sha512-ZClg6AaYvamvYEE82d3Iyd3vSSIjQ+odgjaTzRuO3s7toCdFKczob2i0zCh7JE8kWn17yvAWhUVxvqGwUalsRA==",
+      "dev": true,
+      "license": "ISC",
+      "optional": true
+    },
     "node_modules/json5": {
       "version": "1.0.2",
       "resolved": "https://registry.npmjs.org/json5/-/json5-1.0.2.tgz",
@@ -10343,6 +10780,16 @@
         "url": "https://github.com/sponsors/typicode"
       }
     },
+    "node_modules/lowercase-keys": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/lowercase-keys/-/lowercase-keys-2.0.0.tgz",
+      "integrity": "sha512-tqNXrS78oMOE73NMxK4EMLQsQowWf8jKooH9g7xPavRT706R6bkQJ6DY2Te7QukaZsulxa30wQ7bk0pm4XiHmA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=8"
+      }
+    },
     "node_modules/lru-cache": {
       "version": "10.2.2",
       "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-10.2.2.tgz",
@@ -10461,6 +10908,20 @@
         "marked": ">=1 <13"
       }
     },
+    "node_modules/matcher": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/matcher/-/matcher-3.0.0.tgz",
+      "integrity": "sha512-OkeDaAZ/bQCxeFAozM55PKcKU0yJMPGifLwV4Qgjitu+5MoAfSQN4lsLJeXZ1b8w0x+/Emda6MZgXS1jvsapng==",
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "dependencies": {
+        "escape-string-regexp": "^4.0.0"
+      },
+      "engines": {
+        "node": ">=10"
+      }
+    },
     "node_modules/mdast-util-find-and-replace": {
       "version": "3.0.1",
       "resolved": "https://registry.npmjs.org/mdast-util-find-and-replace/-/mdast-util-find-and-replace-3.0.1.tgz",
@@ -11239,6 +11700,16 @@
         "url": "https://github.com/sponsors/sindresorhus"
       }
     },
+    "node_modules/mimic-response": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/mimic-response/-/mimic-response-1.0.1.tgz",
+      "integrity": "sha512-j5EctnkH7amfV/q5Hgmoal1g2QHFJRraOtmx0JpIqkxhBhI/lJSl1nMpQ45hVarwNETOoWEimndZ4QK0RHxuxQ==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=4"
+      }
+    },
     "node_modules/minimatch": {
       "version": "9.0.5",
       "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.5.tgz",
@@ -14246,6 +14717,16 @@
         "node": ">= 18"
       }
     },
+    "node_modules/once": {
+      "version": "1.4.0",
+      "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz",
+      "integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==",
+      "dev": true,
+      "license": "ISC",
+      "dependencies": {
+        "wrappy": "1"
+      }
+    },
     "node_modules/onetime": {
       "version": "6.0.0",
       "resolved": "https://registry.npmjs.org/onetime/-/onetime-6.0.0.tgz",
@@ -14365,6 +14846,16 @@
         "url": "https://github.com/sponsors/sindresorhus"
       }
     },
+    "node_modules/p-cancelable": {
+      "version": "2.1.1",
+      "resolved": "https://registry.npmjs.org/p-cancelable/-/p-cancelable-2.1.1.tgz",
+      "integrity": "sha512-BZOr3nRQHOntUjTrH8+Lh54smKHoHyur8We1V8DSMVrl5A2malOOwuJRnKRDjSnkoeBh4at6BwEnb5I7Jl31wg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=8"
+      }
+    },
     "node_modules/p-each-series": {
       "version": "3.0.0",
       "resolved": "https://registry.npmjs.org/p-each-series/-/p-each-series-3.0.0.tgz",
@@ -14633,6 +15124,13 @@
         "node": ">= 14.16"
       }
     },
+    "node_modules/pend": {
+      "version": "1.2.0",
+      "resolved": "https://registry.npmjs.org/pend/-/pend-1.2.0.tgz",
+      "integrity": "sha512-F3asv42UuXchdzt+xXqfW1OGlVBe+mxa2mqI0pg5yAHZPvFmY3Y6drSf/GQ1A86WgWEN9Kzh/WrgKa6iGcHXLg==",
+      "dev": true,
+      "license": "MIT"
+    },
     "node_modules/perfect-debounce": {
       "version": "1.0.0",
       "resolved": "https://registry.npmjs.org/perfect-debounce/-/perfect-debounce-1.0.0.tgz",
@@ -14847,6 +15345,16 @@
       "integrity": "sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag==",
       "dev": true
     },
+    "node_modules/progress": {
+      "version": "2.0.3",
+      "resolved": "https://registry.npmjs.org/progress/-/progress-2.0.3.tgz",
+      "integrity": "sha512-7PiHtLll5LdnKIMw100I+8xJXR5gW2QwWYkT6iJva0bXitZKa/XMrSbdmg3r2Xnaidz9Qumd0VPaMrZlF9V9sA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.4.0"
+      }
+    },
     "node_modules/proper-lockfile": {
       "version": "4.1.2",
       "resolved": "https://registry.npmjs.org/proper-lockfile/-/proper-lockfile-4.1.2.tgz",
@@ -14900,6 +15408,17 @@
       "license": "MIT",
       "optional": true
     },
+    "node_modules/pump": {
+      "version": "3.0.2",
+      "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.2.tgz",
+      "integrity": "sha512-tUPXtzlGM8FE3P0ZL6DVs/3P58k9nk8/jZeQCurTJylQA8qFYzHFfhBJkuqyE0FifOsQ0uKWekiZ5g8wtr28cw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "end-of-stream": "^1.1.0",
+        "once": "^1.3.1"
+      }
+    },
     "node_modules/punycode": {
       "version": "2.3.1",
       "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.1.tgz",
@@ -14939,6 +15458,19 @@
         }
       ]
     },
+    "node_modules/quick-lru": {
+      "version": "5.1.1",
+      "resolved": "https://registry.npmjs.org/quick-lru/-/quick-lru-5.1.1.tgz",
+      "integrity": "sha512-WuyALRjWPDGtt/wzJiadO5AXY+8hZ80hVpe6MyivgraREW751X3SbhRvG3eLKOYN+8VEvqLcf3wdnt44Z4S4SA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
     "node_modules/rc": {
       "version": "1.2.8",
       "resolved": "https://registry.npmjs.org/rc/-/rc-1.2.8.tgz",
@@ -15213,6 +15745,13 @@
         "url": "https://github.com/sponsors/ljharb"
       }
     },
+    "node_modules/resolve-alpn": {
+      "version": "1.2.1",
+      "resolved": "https://registry.npmjs.org/resolve-alpn/-/resolve-alpn-1.2.1.tgz",
+      "integrity": "sha512-0a1F4l73/ZFZOakJnQ3FvkJ2+gSTQWz/r2KE5OdDY0TxPm5h4GkqkWWfM47T7HsbnOtcJVEF4epCVy6u7Q3K+g==",
+      "dev": true,
+      "license": "MIT"
+    },
     "node_modules/resolve-from": {
       "version": "5.0.0",
       "resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-5.0.0.tgz",
@@ -15232,6 +15771,19 @@
         "url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1"
       }
     },
+    "node_modules/responselike": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/responselike/-/responselike-2.0.1.tgz",
+      "integrity": "sha512-4gl03wn3hj1HP3yzgdI7d3lCkF95F21Pz4BPGvKHinyQzALR5CapwC8yIi0Rh58DEMQ/SguC03wFj2k0M/mHhw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "lowercase-keys": "^2.0.0"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
     "node_modules/restore-cursor": {
       "version": "5.1.0",
       "resolved": "https://registry.npmjs.org/restore-cursor/-/restore-cursor-5.1.0.tgz",
@@ -15404,6 +15956,33 @@
         "url": "https://github.com/sponsors/isaacs"
       }
     },
+    "node_modules/roarr": {
+      "version": "2.15.4",
+      "resolved": "https://registry.npmjs.org/roarr/-/roarr-2.15.4.tgz",
+      "integrity": "sha512-CHhPh+UNHD2GTXNYhPWLnU8ONHdI+5DI+4EYIAOaiD63rHeYlZvyh8P+in5999TTSFgUYuKUAjzRI4mdh/p+2A==",
+      "dev": true,
+      "license": "BSD-3-Clause",
+      "optional": true,
+      "dependencies": {
+        "boolean": "^3.0.1",
+        "detect-node": "^2.0.4",
+        "globalthis": "^1.0.1",
+        "json-stringify-safe": "^5.0.1",
+        "semver-compare": "^1.0.0",
+        "sprintf-js": "^1.1.2"
+      },
+      "engines": {
+        "node": ">=8.0"
+      }
+    },
+    "node_modules/roarr/node_modules/sprintf-js": {
+      "version": "1.1.3",
+      "resolved": "https://registry.npmjs.org/sprintf-js/-/sprintf-js-1.1.3.tgz",
+      "integrity": "sha512-Oo+0REFV59/rz3gfJNKQiBlwfHaSESl1pcGyABQsnnIfWOFt6JNj5gCog2U6MLZ//IGYD+nA8nI+mTShREReaA==",
+      "dev": true,
+      "license": "BSD-3-Clause",
+      "optional": true
+    },
     "node_modules/rollup": {
       "version": "4.21.2",
       "resolved": "https://registry.npmjs.org/rollup/-/rollup-4.21.2.tgz",
@@ -15791,6 +16370,14 @@
         "node": ">=10"
       }
     },
+    "node_modules/semver-compare": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/semver-compare/-/semver-compare-1.0.0.tgz",
+      "integrity": "sha512-YM3/ITh2MJ5MtzaM429anh+x2jiLVjqILF4m4oyQB18W7Ggea7BfqdH/wGMK7dDiMghv/6WG7znWMwUDzJiXow==",
+      "dev": true,
+      "license": "MIT",
+      "optional": true
+    },
     "node_modules/semver-diff": {
       "version": "4.0.0",
       "resolved": "https://registry.npmjs.org/semver-diff/-/semver-diff-4.0.0.tgz",
@@ -15818,6 +16405,23 @@
         "url": "https://github.com/sponsors/sindresorhus"
       }
     },
+    "node_modules/serialize-error": {
+      "version": "7.0.1",
+      "resolved": "https://registry.npmjs.org/serialize-error/-/serialize-error-7.0.1.tgz",
+      "integrity": "sha512-8I8TjW5KMOKsZQTvoxjuSIa7foAwPWGOts+6o7sgjz41/qMD9VQHEDxi6PBvK2l0MXUmqZyNpUK+T2tQaaElvw==",
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "dependencies": {
+        "type-fest": "^0.13.1"
+      },
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
     "node_modules/set-blocking": {
       "version": "2.0.0",
       "resolved": "https://registry.npmjs.org/set-blocking/-/set-blocking-2.0.0.tgz",
@@ -16613,6 +17217,19 @@
         "url": "https://github.com/sponsors/sindresorhus"
       }
     },
+    "node_modules/sumchecker": {
+      "version": "3.0.1",
+      "resolved": "https://registry.npmjs.org/sumchecker/-/sumchecker-3.0.1.tgz",
+      "integrity": "sha512-MvjXzkz/BOfyVDkG0oFOtBxHX2u3gKbMHIF/dXblZsgD3BWOFLmHovIpZY7BykJdAjcqRCBi1WYBNdEC9yI7vg==",
+      "dev": true,
+      "license": "Apache-2.0",
+      "dependencies": {
+        "debug": "^4.1.0"
+      },
+      "engines": {
+        "node": ">= 8.0"
+      }
+    },
     "node_modules/super-regex": {
       "version": "1.0.0",
       "resolved": "https://registry.npmjs.org/super-regex/-/super-regex-1.0.0.tgz",
@@ -17188,6 +17805,20 @@
         "node": ">= 0.8.0"
       }
     },
+    "node_modules/type-fest": {
+      "version": "0.13.1",
+      "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-0.13.1.tgz",
+      "integrity": "sha512-34R7HTnG0XIJcBSn5XhDd7nNFPRcXYRZrBB2O2jdKqYODldSzBAqzsWoZYYvduky73toYS/ESqxPvkDf/F0XMg==",
+      "dev": true,
+      "license": "(MIT OR CC0-1.0)",
+      "optional": true,
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
     "node_modules/typed-array-buffer": {
       "version": "1.0.2",
       "resolved": "https://registry.npmjs.org/typed-array-buffer/-/typed-array-buffer-1.0.2.tgz",
@@ -18121,6 +18752,13 @@
         "node": ">=8"
       }
     },
+    "node_modules/wrappy": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz",
+      "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==",
+      "dev": true,
+      "license": "ISC"
+    },
     "node_modules/xml-js": {
       "version": "1.6.11",
       "resolved": "https://registry.npmjs.org/xml-js/-/xml-js-1.6.11.tgz",
@@ -18194,6 +18832,17 @@
         "node": ">=12"
       }
     },
+    "node_modules/yauzl": {
+      "version": "2.10.0",
+      "resolved": "https://registry.npmjs.org/yauzl/-/yauzl-2.10.0.tgz",
+      "integrity": "sha512-p4a9I6X6nu6IhoGmBqAcbJy1mlC4j27vEPZX9F4L4/vZT3Lyq1VkFHw/V/PUcB9Buo+DG3iHkT0x3Qya58zc3g==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "buffer-crc32": "~0.2.3",
+        "fd-slicer": "~1.1.0"
+      }
+    },
     "node_modules/yocto-queue": {
       "version": "1.1.1",
       "resolved": "https://registry.npmjs.org/yocto-queue/-/yocto-queue-1.1.1.tgz",
diff --git a/package.json b/package.json
index b8992eb4..372e7778 100644
--- a/package.json
+++ b/package.json
@@ -148,6 +148,7 @@
     "@types/yargs": "^17.0.33",
     "@vitest/coverage-v8": "^2.1.4",
     "@vitest/ui": "^2.1.4",
+    "electron": "^33.2.0",
     "eslint": "^9.13.0",
     "eslint-import-resolver-typescript": "^3.6.3",
     "eslint-plugin-import": "^2.31.0",
diff --git a/src/bindings/AddonTypes.ts b/src/bindings/AddonTypes.ts
index 891d9df4..75b7de29 100644
--- a/src/bindings/AddonTypes.ts
+++ b/src/bindings/AddonTypes.ts
@@ -69,13 +69,14 @@ export type BindingModule = {
     getGpuDeviceInfo(): {
         deviceNames: string[]
     },
-    getGpuType(): "cuda" | "vulkan" | "metal" | undefined,
+    getGpuType(): "cuda" | "vulkan" | "metal" | false | undefined,
     getSwapInfo(): {
         total: number,
         maxSize: number,
         free: number
     },
     init(): Promise<void>,
+    loadBackends(forceLoadLibraries?: boolean): void,
     dispose(): Promise<void>
 };
 
diff --git a/src/bindings/Llama.ts b/src/bindings/Llama.ts
index c9effbc7..bff6f34b 100644
--- a/src/bindings/Llama.ts
+++ b/src/bindings/Llama.ts
@@ -10,7 +10,7 @@ import {LlamaGrammar, LlamaGrammarOptions} from "../evaluator/LlamaGrammar.js";
 import {ThreadsSplitter} from "../utils/ThreadsSplitter.js";
 import {getLlamaClasses, LlamaClasses} from "../utils/getLlamaClasses.js";
 import {BindingModule} from "./AddonTypes.js";
-import {BuildGpu, BuildMetadataFile, LlamaGpuType, LlamaLocks, LlamaLogLevel} from "./types.js";
+import {BuildGpu, BuildMetadataFile, LlamaGpuType, LlamaLocks, LlamaLogLevel, LlamaLogLevelGreaterThanOrEqual} from "./types.js";
 import {MemoryOrchestrator, MemoryReservation} from "./utils/MemoryOrchestrator.js";
 
 const LlamaLogLevelToAddonLogLevel: ReadonlyMap<LlamaLogLevel, number> = new Map([
@@ -126,6 +126,11 @@ export class Llama {
             this._bindings.setLoggerLogLevel(LlamaLogLevelToAddonLogLevel.get(this._logLevel) ?? defaultLogLevel);
         }
 
+        this._bindings.loadBackends();
+        const loadedGpu = bindings.getGpuType();
+        if (loadedGpu == null || (loadedGpu === false && gpu !== false))
+            this._bindings.loadBackends(true);
+
         this._onExit = this._onExit.bind(this);
 
         process.on("exit", this._onExit);
@@ -413,7 +418,9 @@ export class Llama {
             }
 
             try {
-                this._logger(level, message);
+                const transformedLogLevel = getTransformedLogLevel(level, message);
+                if (LlamaLogLevelGreaterThanOrEqual(transformedLogLevel, this._logLevel))
+                    this._logger(transformedLogLevel, message);
             } catch (err) {
                 // the native addon code calls this function, so there's no use to throw an error here
             }
@@ -597,3 +604,14 @@ function logMessageIsOnlyDots(message: string | null) {
 
     return true;
 }
+
+function getTransformedLogLevel(level: LlamaLogLevel, message: string): LlamaLogLevel {
+    if (level === LlamaLogLevel.warn && message.endsWith("the full capacity of the model will not be utilized"))
+        return LlamaLogLevel.info;
+    else if (level === LlamaLogLevel.warn && message.startsWith("ggml_metal_init: skipping kernel_") && message.endsWith("(not supported)"))
+        return LlamaLogLevel.log;
+    else if (level === LlamaLogLevel.warn && message.startsWith("ggml_cuda_init: GGML_CUDA_FORCE_") && message.endsWith(" no"))
+        return LlamaLogLevel.log;
+
+    return level;
+}
diff --git a/src/bindings/getLlama.ts b/src/bindings/getLlama.ts
index d9e3255f..dffeea50 100644
--- a/src/bindings/getLlama.ts
+++ b/src/bindings/getLlama.ts
@@ -72,6 +72,9 @@ export type LlamaOptions = {
      * Otherwise, throw a `NoBinaryFoundError` error.
      * - **`"forceRebuild"`**: Always build from source.
      * Be cautious with this option, as it will cause the build to fail on Windows when the binaries are in use by another process.
+     * - **`"try"`**: If a local build is found, use it.
+     * Otherwise, try to build from source and use the resulting binary.
+     * If building from source fails, use a prebuilt binary if found.
      *
      * When running from inside an Asar archive in Electron, building from source is not possible, so it'll never build from source.
      * To allow building from source in Electron apps, make sure you ship `node-llama-cpp` as an unpacked module.
@@ -79,7 +82,7 @@ export type LlamaOptions = {
      * Defaults to `"auto"`.
      * On Electron, defaults to `"never"`.
      */
-    build?: "auto" | "never" | "forceRebuild",
+    build?: "auto" | "never" | "forceRebuild" | "try",
 
     /**
      * Set custom CMake options for llama.cpp
@@ -380,6 +383,45 @@ export async function getLlamaForOptions({
     if (buildGpusToTry.length === 0)
         throw new Error("No GPU types available to try building with");
 
+    if (build === "try") {
+        if (canUsePrebuiltBinaries) {
+            try {
+                return await getLlamaForOptions({
+                    gpu,
+                    logLevel,
+                    logger,
+                    build: "auto",
+                    cmakeOptions,
+                    existingPrebuiltBinaryMustMatchBuildOptions,
+                    usePrebuiltBinaries: false,
+                    progressLogs,
+                    skipDownload,
+                    maxThreads,
+                    vramPadding,
+                    ramPadding,
+                    debug
+                });
+            } catch (err) {
+                return await getLlamaForOptions({
+                    gpu,
+                    logLevel,
+                    logger,
+                    build: "never",
+                    cmakeOptions,
+                    existingPrebuiltBinaryMustMatchBuildOptions,
+                    usePrebuiltBinaries,
+                    progressLogs,
+                    skipDownload,
+                    maxThreads,
+                    vramPadding,
+                    ramPadding,
+                    debug
+                });
+            }
+        } else
+            build = "auto";
+    }
+
     if (build === "auto" || build === "never") {
         for (let i = 0; i < buildGpusToTry.length; i++) {
             const gpu = buildGpusToTry[i];
@@ -544,7 +586,7 @@ async function loadExistingLlamaBinary({
                 buildMetadata
             });
             const binaryCompatible = shouldTestBinaryBeforeLoading
-                ? await testBindingBinary(localBuildBinPath)
+                ? await testBindingBinary(localBuildBinPath, buildOptions.gpu)
                 : true;
 
             if (binaryCompatible) {
@@ -601,7 +643,7 @@ async function loadExistingLlamaBinary({
                     buildMetadata
                 });
                 const binaryCompatible = shouldTestBinaryBeforeLoading
-                    ? await testBindingBinary(prebuiltBinDetails.binaryPath)
+                    ? await testBindingBinary(prebuiltBinDetails.binaryPath, buildOptions.gpu)
                     : true;
 
                 if (binaryCompatible) {
diff --git a/src/bindings/utils/compileLLamaCpp.ts b/src/bindings/utils/compileLLamaCpp.ts
index aceb9279..ec9655b4 100644
--- a/src/bindings/utils/compileLLamaCpp.ts
+++ b/src/bindings/utils/compileLLamaCpp.ts
@@ -9,7 +9,7 @@ import {
     buildMetadataFileName, documentationPageUrls, llamaCppDirectory, llamaDirectory, llamaLocalBuildBinsDirectory,
     llamaPrebuiltBinsDirectory, llamaToolchainsDirectory
 } from "../../config.js";
-import {BuildMetadataFile, BuildOptions, convertBuildOptionsToBuildOptionsJSON} from "../types.js";
+import {BuildGpu, BuildMetadataFile, BuildOptions, convertBuildOptionsToBuildOptionsJSON} from "../types.js";
 import {spawnCommand, SpawnError} from "../../utils/spawnCommand.js";
 import {downloadCmakeIfNeeded, fixXpackPermissions, getCmakePath, hasBuiltinCmake} from "../../utils/cmake.js";
 import {getConsoleLogPrefix} from "../../utils/getConsoleLogPrefix.js";
@@ -31,7 +31,7 @@ export async function compileLlamaCpp(buildOptions: BuildOptions, compileOptions
     includeBuildOptionsInBinaryFolderName?: boolean,
     ensureLlamaCppRepoIsCloned?: boolean,
     downloadCmakeIfNeeded?: boolean,
-    ignoreWorkarounds?: ("cudaArchitecture")[],
+    ignoreWorkarounds?: ("cudaArchitecture" | "reduceParallelBuildThreads" | "singleBuildThread")[],
     envVars?: typeof process.env,
     ciMode?: boolean
 }): Promise<void> {
@@ -54,6 +54,12 @@ export async function compileLlamaCpp(buildOptions: BuildOptions, compileOptions
 
     const outDirectory = path.join(llamaLocalBuildBinsDirectory, finalBuildFolderName);
 
+    let parallelBuildThreads = getParallelBuildThreadsToUse(platform, buildOptions.gpu, ciMode);
+    if (ignoreWorkarounds.includes("singleBuildThread"))
+        parallelBuildThreads = 1;
+    else if (ignoreWorkarounds.includes("reduceParallelBuildThreads"))
+        parallelBuildThreads = reduceParallelBuildThreads(parallelBuildThreads);
+
     await fs.mkdirp(llamaLocalBuildBinsDirectory);
     try {
         await withLockfile({
@@ -99,6 +105,9 @@ export async function compileLlamaCpp(buildOptions: BuildOptions, compileOptions
 
                     if (!cmakeCustomOptions.has("GGML_AMX"))
                         cmakeCustomOptions.set("GGML_AMX", "OFF");
+
+                    if (!cmakeCustomOptions.has("GGML_NATIVE") && buildOptions.platform !== "mac")
+                        cmakeCustomOptions.set("GGML_NATIVE", "OFF");
                 }
 
                 await fs.remove(outDirectory);
@@ -125,7 +134,7 @@ export async function compileLlamaCpp(buildOptions: BuildOptions, compileOptions
                         "--arch=" + buildOptions.arch,
                         "--out", path.relative(llamaDirectory, outDirectory),
                         "--runtime-version=" + runtimeVersion,
-                        "--parallel=" + getParallelBuildThreadsToUse(platform),
+                        "--parallel=" + parallelBuildThreads,
                         ...cmakePathArgs,
                         ...(
                             [...cmakeCustomOptions].map(([key, value]) => "--CD" + key + "=" + value)
@@ -239,6 +248,40 @@ export async function compileLlamaCpp(buildOptions: BuildOptions, compileOptions
                             console.error(getConsoleLogPrefix(true, false), err);
                     }
                 }
+            } else if (
+                (!ignoreWorkarounds.includes("reduceParallelBuildThreads") || !ignoreWorkarounds.includes("singleBuildThread")) &&
+                (platform === "win" || platform === "linux") &&
+                err instanceof SpawnError &&
+                reduceParallelBuildThreads(parallelBuildThreads) !== parallelBuildThreads && (
+                    err.combinedStd.toLowerCase().includes("LLVM error : out of memory".toLowerCase()) ||
+                    err.combinedStd.toLowerCase().includes("compiler is out of heap space".toLowerCase())
+                )
+            ) {
+                if (buildOptions.progressLogs) {
+                    if (ignoreWorkarounds.includes("reduceParallelBuildThreads"))
+                        console.info(
+                            getConsoleLogPrefix(true) + "Trying to compile again with a single build thread"
+                        );
+                    else
+                        console.info(
+                            getConsoleLogPrefix(true) + "Trying to compile again with reduced parallel build threads"
+                        );
+                }
+
+                try {
+                    return await compileLlamaCpp(buildOptions, {
+                        ...compileOptions,
+                        ignoreWorkarounds: [
+                            ...ignoreWorkarounds,
+                            ignoreWorkarounds.includes("reduceParallelBuildThreads")
+                                ? "singleBuildThread"
+                                : "reduceParallelBuildThreads"
+                        ]
+                    });
+                } catch (err) {
+                    if (buildOptions.progressLogs)
+                        console.error(getConsoleLogPrefix(true, false), err);
+                }
             }
 
             console.info("\n" +
@@ -461,9 +504,12 @@ async function getToolchainFileForArch(targetArch: string) {
     return null;
 }
 
-function getParallelBuildThreadsToUse(platform: BinaryPlatform) {
+function getParallelBuildThreadsToUse(platform: BinaryPlatform, gpu?: BuildGpu, ciMode: boolean = false) {
     const cpuCount = os.cpus().length;
 
+    if (ciMode && platform === "win" && gpu === "cuda" && cpuCount === 4)
+        return 3; // workaround for `compiler is out of heap space` error on GitHub Actions on Windows when building with CUDA
+
     if (cpuCount <= 4)
         return cpuCount;
 
@@ -472,3 +518,7 @@ function getParallelBuildThreadsToUse(platform: BinaryPlatform) {
 
     return cpuCount - 2;
 }
+
+function reduceParallelBuildThreads(originalParallelBuildThreads: number) {
+    return Math.max(1, Math.round(originalParallelBuildThreads / 2));
+}
diff --git a/src/bindings/utils/testBindingBinary.ts b/src/bindings/utils/testBindingBinary.ts
index f49dbdc6..43e47ebe 100644
--- a/src/bindings/utils/testBindingBinary.ts
+++ b/src/bindings/utils/testBindingBinary.ts
@@ -3,6 +3,8 @@ import {fileURLToPath} from "url";
 import {createRequire} from "module";
 import path from "path";
 import {getConsoleLogPrefix} from "../../utils/getConsoleLogPrefix.js";
+import {runningInElectron} from "../../utils/runtime.js";
+import {BuildGpu} from "../types.js";
 import type {BindingModule} from "../AddonTypes.js";
 
 const require = createRequire(import.meta.url);
@@ -10,7 +12,7 @@ const __filename = fileURLToPath(import.meta.url);
 const detectedFileName = path.basename(__filename);
 const expectedFileName = "testBindingBinary";
 
-export function testBindingBinary(bindingBinaryPath: string, testTimeout: number = 1000 * 60 * 5): Promise<boolean> {
+export async function testBindingBinary(bindingBinaryPath: string, gpu: BuildGpu, testTimeout: number = 1000 * 60 * 5): Promise<boolean> {
     if (!detectedFileName.startsWith(expectedFileName)) {
         console.warn(
             getConsoleLogPrefix() +
@@ -22,32 +24,125 @@ export function testBindingBinary(bindingBinaryPath: string, testTimeout: number
             'To resolve this issue, make sure that "node-llama-cpp" is not bundled together with other code and is imported as an external module with its original file structure.'
         );
 
-        return Promise.resolve(true);
+        return true;
     }
 
-    const subProcess = fork(__filename, [], {
-        detached: false,
-        env: {
-            ...process.env,
-            TEST_BINDING_CP: "true"
+    async function getForkFunction() {
+        if (runningInElectron) {
+            try {
+                const {utilityProcess} = await import("electron");
+
+                return {
+                    type: "electron",
+                    fork: utilityProcess.fork.bind(utilityProcess)
+                } as const;
+            } catch (err) {
+                // do nothing
+            }
+        }
+
+        return {
+            type: "node",
+            fork
+        } as const;
+    }
+
+    const forkFunction = await getForkFunction();
+
+    function createTestProcess({
+        onMessage,
+        onExit
+    }: {
+        onMessage(message: ChildToParentMessage): void,
+        onExit(code: number): void
+    }): {
+        sendMessage(message: ParentToChildMessage): void,
+        killProcess(): void
+    } {
+        if (forkFunction.type === "electron") {
+            let exited = false;
+            const subProcess = forkFunction.fork(__filename, [], {
+                env: {
+                    ...process.env,
+                    TEST_BINDING_CP: "true"
+                }
+            });
+
+            function cleanupElectronFork() {
+                if (subProcess.pid != null || !exited) {
+                    subProcess.kill();
+                    exited = true;
+                }
+
+                process.off("exit", cleanupElectronFork);
+            }
+
+            process.on("exit", cleanupElectronFork);
+
+            subProcess.on("message", onMessage);
+            subProcess.on("exit", (code) => {
+                exited = true;
+                cleanupElectronFork();
+                onExit(code);
+            });
+
+            return {
+                sendMessage: (message: ParentToChildMessage) => subProcess.postMessage(message),
+                killProcess: cleanupElectronFork
+            };
+        }
+
+        const subProcess = forkFunction.fork(__filename, [], {
+            detached: false,
+            silent: true,
+            env: {
+                ...process.env,
+                TEST_BINDING_CP: "true"
+            }
+        });
+
+        function cleanupNodeFork() {
+            if (subProcess.exitCode == null)
+                subProcess.kill("SIGKILL");
+
+            process.off("exit", cleanupNodeFork);
+        }
+
+        process.on("exit", cleanupNodeFork);
+
+        subProcess.on("message", onMessage);
+        subProcess.on("exit", (code) => {
+            cleanupNodeFork();
+            onExit(code ?? -1);
+        });
+
+        if (subProcess.killed || subProcess.exitCode != null) {
+            cleanupNodeFork();
+            onExit(subProcess.exitCode ?? -1);
         }
-    });
+
+        return {
+            sendMessage: (message: ParentToChildMessage) => subProcess.send(message),
+            killProcess: cleanupNodeFork
+        };
+    }
+
     let testPassed = false;
     let forkSucceeded = false;
     let timeoutHandle: ReturnType<typeof setTimeout> | null = null;
 
+    let subProcess: ReturnType<typeof createTestProcess> | undefined = undefined;
+    let testFinished = false;
+
     function cleanup() {
-        if (subProcess.exitCode == null)
-            subProcess.kill("SIGKILL");
+        testFinished = true;
 
         if (timeoutHandle != null)
             clearTimeout(timeoutHandle);
 
-        process.off("exit", cleanup);
+        subProcess?.killProcess();
     }
 
-    process.on("exit", cleanup);
-
     return Promise.race([
         new Promise<boolean>((_, reject) => {
             timeoutHandle = setTimeout(() => {
@@ -65,45 +160,58 @@ export function testBindingBinary(bindingBinaryPath: string, testTimeout: number
                 cleanup();
             }
 
-            subProcess.on("message", (message: ChildToParentMessage) => {
-                if (message.type === "ready") {
-                    forkSucceeded = true;
-                    subProcess.send({type: "start", bindingBinaryPath} satisfies ParentToChildMessage);
-                } else if (message.type === "done") {
-                    testPassed = true;
-                    subProcess.send({type: "exit"} satisfies ParentToChildMessage);
+            subProcess = createTestProcess({
+                onMessage(message: ChildToParentMessage) {
+                    if (message.type === "ready") {
+                        forkSucceeded = true;
+                        subProcess!.sendMessage({
+                            type: "start",
+                            bindingBinaryPath,
+                            gpu
+                        });
+                    } else if (message.type === "done") {
+                        testPassed = true;
+                        subProcess!.sendMessage({type: "exit"});
+                    }
+                },
+                onExit(code: number) {
+                    if (code !== 0)
+                        testPassed = false;
+
+                    done();
                 }
             });
 
-            subProcess.on("exit", (code) => {
-                if (code !== 0)
-                    testPassed = false;
-
-                done();
-            });
-
-            if (subProcess.killed || subProcess.exitCode != null) {
-                if (subProcess.exitCode !== 0)
-                    testPassed = false;
-
-                done();
-            }
+            if (testFinished)
+                subProcess.killProcess();
         })
     ]);
 }
 
-if (process.env.TEST_BINDING_CP === "true" && process.send != null) {
-    process.on("message", async (message: ParentToChildMessage) => {
+if (process.env.TEST_BINDING_CP === "true" && (process.parentPort != null || process.send != null)) {
+    const sendMessage = process.parentPort != null
+        ? (message: ChildToParentMessage) => process.parentPort.postMessage(message)
+        : (message: ChildToParentMessage) => process.send!(message);
+    const onMessage = async (message: ParentToChildMessage) => {
         if (message.type === "start") {
-            if (process.send == null)
-                process.exit(1);
-
             try {
                 const binding: BindingModule = require(message.bindingBinaryPath);
+
+                binding.loadBackends();
+                const loadedGpu = binding.getGpuType();
+                if (loadedGpu == null || (loadedGpu === false && message.gpu !== false))
+                    binding.loadBackends(true);
+
                 await binding.init();
                 binding.getGpuVramInfo();
                 binding.getGpuDeviceInfo();
-                process.send({type: "done"} satisfies ChildToParentMessage);
+
+                const gpuType = binding.getGpuType();
+                void (gpuType as BuildGpu satisfies typeof gpuType);
+                if (gpuType !== message.gpu)
+                    throw new Error(`Binary GPU type mismatch. Expected: ${message.gpu}, got: ${gpuType}`);
+
+                sendMessage({type: "done"});
             } catch (err) {
                 console.error(err);
                 process.exit(1);
@@ -111,14 +219,20 @@ if (process.env.TEST_BINDING_CP === "true" && process.send != null) {
         } else if (message.type === "exit") {
             process.exit(0);
         }
-    });
+    };
+
+    if (process.parentPort != null)
+        process.parentPort.on("message", (message) => onMessage(message.data));
+    else
+        process.on("message", onMessage);
 
-    process.send({type: "ready"} satisfies ChildToParentMessage);
+    sendMessage({type: "ready"});
 }
 
 type ParentToChildMessage = {
     type: "start",
-    bindingBinaryPath: string
+    bindingBinaryPath: string,
+    gpu: BuildGpu
 } | {
     type: "exit"
 };
diff --git a/src/chatWrappers/Llama3_1ChatWrapper.ts b/src/chatWrappers/Llama3_1ChatWrapper.ts
index 00ffcf3b..d7c2f412 100644
--- a/src/chatWrappers/Llama3_1ChatWrapper.ts
+++ b/src/chatWrappers/Llama3_1ChatWrapper.ts
@@ -6,6 +6,7 @@ import {
 import {SpecialToken, LlamaText, SpecialTokensText} from "../utils/LlamaText.js";
 import {ChatModelFunctionsDocumentationGenerator} from "./utils/ChatModelFunctionsDocumentationGenerator.js";
 import {jsonDumps} from "./utils/jsonDumps.js";
+import {isLlama3_2LightweightModel} from "./utils/isLlama3_2LightweightModel.js";
 
 // source: https://llama.meta.com/docs/model-cards-and-prompt-formats/llama3_1
 export class Llama3_1ChatWrapper extends ChatWrapper {
@@ -332,10 +333,10 @@ export class Llama3_1ChatWrapper extends ChatWrapper {
     public static override _checkModelCompatibility(options: ChatWrapperCheckModelCompatibilityParams): boolean {
         if (options.tokenizer != null) {
             const tokens = options.tokenizer("<|eom_id|>", true, "trimLeadingSpace");
-            return tokens.length === 1 && options.tokenizer.isSpecialToken(tokens[0]!);
+            return tokens.length === 1 && options.tokenizer.isSpecialToken(tokens[0]!) && !isLlama3_2LightweightModel(options);
         }
 
-        return true;
+        return !isLlama3_2LightweightModel(options);
     }
 
     /** @internal */
diff --git a/src/chatWrappers/Llama3_2LightweightChatWrapper.ts b/src/chatWrappers/Llama3_2LightweightChatWrapper.ts
new file mode 100644
index 00000000..e38ff973
--- /dev/null
+++ b/src/chatWrappers/Llama3_2LightweightChatWrapper.ts
@@ -0,0 +1,354 @@
+import {ChatWrapper, ChatWrapperJinjaMatchConfiguration} from "../ChatWrapper.js";
+import {
+    ChatHistoryItem, ChatModelFunctions, ChatSystemMessage, ChatWrapperCheckModelCompatibilityParams,
+    ChatWrapperGenerateContextStateOptions, ChatWrapperGeneratedContextState, ChatWrapperSettings
+} from "../types.js";
+import {SpecialToken, LlamaText, SpecialTokensText} from "../utils/LlamaText.js";
+import {ChatModelFunctionsDocumentationGenerator} from "./utils/ChatModelFunctionsDocumentationGenerator.js";
+import {isLlama3_2LightweightModel} from "./utils/isLlama3_2LightweightModel.js";
+
+// source: https://llama.meta.com/docs/model-cards-and-prompt-formats/llama3_2/
+export class Llama3_2LightweightChatWrapper extends ChatWrapper {
+    public readonly wrapperName: string = "Llama 3.2 lightweight";
+
+    public readonly cuttingKnowledgeDate?: Date | (() => Date) | null;
+    public readonly todayDate: Date | (() => Date) | null;
+    public readonly noToolInstructions: boolean;
+
+    /** @internal */ private readonly _specialTokensTextForPreamble: boolean;
+
+    public override readonly settings: ChatWrapperSettings = {
+        supportsSystemMessages: true,
+        functions: {
+            call: {
+                optionalPrefixSpace: true,
+                prefix: '{"name": "',
+                paramsPrefix: '", "parameters": ',
+                suffix: LlamaText("}", new SpecialToken("EOT"))
+            },
+            result: {
+                prefix: LlamaText(new SpecialToken("EOT"), new SpecialTokensText("<|start_header_id|>ipython<|end_header_id|>\n\n")),
+                suffix: LlamaText(new SpecialToken("EOT"), new SpecialTokensText("<|start_header_id|>assistant<|end_header_id|>\n\n"))
+            }
+        }
+    };
+
+    /**
+     * @param options
+     */
+    public constructor(options: {
+        /**
+         * Set to `null` to disable
+         *
+         * Defaults to December 2023
+         */
+        cuttingKnowledgeDate?: Date | (() => Date) | number | string | null,
+
+        /**
+         * Set to `null` to disable
+         *
+         * Defaults to current date
+         */
+        todayDate?: Date | (() => Date) | number | string | null,
+
+        noToolInstructions?: boolean,
+
+        /** @internal */
+        _specialTokensTextForPreamble?: boolean
+    } = {}) {
+        super();
+
+        const {
+            cuttingKnowledgeDate = new Date("2023-12-01T00:00:00Z"),
+            todayDate = () => new Date(),
+            noToolInstructions = false,
+
+            _specialTokensTextForPreamble = false
+        } = options;
+
+        this.cuttingKnowledgeDate = cuttingKnowledgeDate == null
+            ? null
+            : cuttingKnowledgeDate instanceof Function
+                ? cuttingKnowledgeDate
+                : new Date(cuttingKnowledgeDate);
+        this.todayDate = todayDate == null
+            ? null
+            : todayDate instanceof Function
+                ? todayDate
+                : new Date(todayDate);
+        this.noToolInstructions = noToolInstructions;
+
+        this._specialTokensTextForPreamble = _specialTokensTextForPreamble;
+    }
+
+    public override addAvailableFunctionsSystemMessageToHistory(
+        history: readonly ChatHistoryItem[],
+        availableFunctions?: ChatModelFunctions, {
+            documentParams = true
+        }: {
+            documentParams?: boolean
+        } = {}
+    ) {
+        const availableFunctionNames = Object.keys(availableFunctions ?? {});
+
+        if (availableFunctions == null || availableFunctionNames.length === 0)
+            return history;
+
+        const res = history.slice();
+
+        const functionsSystemMessage: ChatSystemMessage = {
+            type: "system",
+            text: this.generateAvailableFunctionsSystemText(availableFunctions, {documentParams}).toJSON()
+        };
+
+        if (res.length >= 2 && res[0]!.type === "system" && res[1]!.type === "system")
+            res.splice(1, 0, functionsSystemMessage);
+        else
+            res.unshift({
+                type: "system",
+                text: this.generateAvailableFunctionsSystemText(availableFunctions, {documentParams}).toJSON()
+            });
+
+        return res;
+    }
+
+    public override generateContextState({
+        chatHistory, availableFunctions, documentFunctionParams
+    }: ChatWrapperGenerateContextStateOptions): ChatWrapperGeneratedContextState {
+        const chatHistoryWithPreamble = this.prependPreambleToChatHistory(chatHistory);
+        const historyWithFunctions = this.addAvailableFunctionsSystemMessageToHistory(chatHistoryWithPreamble, availableFunctions, {
+            documentParams: documentFunctionParams
+        });
+
+        const resultItems: Array<{
+            system: LlamaText | null,
+            user: LlamaText | null,
+            model: LlamaText | null
+        }> = [];
+
+        let systemTexts: LlamaText[] = [];
+        let userTexts: LlamaText[] = [];
+        let modelTexts: LlamaText[] = [];
+        let currentAggregateFocus: "system" | "user" | "model" | null = null;
+
+        const flush = () => {
+            if (systemTexts.length > 0 || userTexts.length > 0 || modelTexts.length > 0)
+                resultItems.push({
+                    system: systemTexts.length === 0
+                        ? null
+                        : LlamaText.joinValues(
+                            resultItems.length === 0 && this._specialTokensTextForPreamble
+                                ? LlamaText(new SpecialTokensText("\n\n"))
+                                : "\n\n",
+                            systemTexts
+                        ),
+                    user: userTexts.length === 0
+                        ? null
+                        : LlamaText.joinValues("\n\n", userTexts),
+                    model: modelTexts.length === 0
+                        ? null
+                        : LlamaText.joinValues("\n\n", modelTexts)
+                });
+
+            systemTexts = [];
+            userTexts = [];
+            modelTexts = [];
+        };
+
+        for (const item of historyWithFunctions) {
+            if (item.type === "system") {
+                if (currentAggregateFocus !== "system")
+                    flush();
+
+                currentAggregateFocus = "system";
+                systemTexts.push(LlamaText.fromJSON(item.text));
+            } else if (item.type === "user") {
+                if (currentAggregateFocus !== "user")
+                    flush();
+
+                currentAggregateFocus = "user";
+                userTexts.push(LlamaText(item.text));
+            } else if (item.type === "model") {
+                if (currentAggregateFocus !== "model")
+                    flush();
+
+                currentAggregateFocus = "model";
+                modelTexts.push(this.generateModelResponseText(item.response));
+            } else
+                void (item satisfies never);
+        }
+
+        flush();
+
+        const contextText = LlamaText(
+            new SpecialToken("BOS"),
+            resultItems.map((item, index) => {
+                const isLastItem = index === resultItems.length - 1;
+                const res: LlamaText[] = [];
+
+                if (item.system != null) {
+                    res.push(
+                        LlamaText([
+                            new SpecialTokensText("<|start_header_id|>system<|end_header_id|>\n\n"),
+                            item.system,
+                            new SpecialToken("EOT")
+                        ])
+                    );
+                }
+
+                if (item.user != null) {
+                    res.push(
+                        LlamaText([
+                            new SpecialTokensText("<|start_header_id|>user<|end_header_id|>\n\n"),
+                            item.user,
+                            new SpecialToken("EOT")
+                        ])
+                    );
+                }
+
+                if (item.model != null) {
+                    res.push(
+                        LlamaText([
+                            new SpecialTokensText("<|start_header_id|>assistant<|end_header_id|>\n\n"),
+                            item.model,
+                            isLastItem
+                                ? LlamaText([])
+                                : new SpecialToken("EOT")
+                        ])
+                    );
+                }
+
+                return LlamaText(res);
+            })
+        );
+
+        return {
+            contextText,
+            stopGenerationTriggers: [
+                LlamaText(new SpecialToken("EOS")),
+                LlamaText(new SpecialToken("EOT")),
+                LlamaText(new SpecialTokensText("<|eot_id|>")),
+                LlamaText(new SpecialTokensText("<|end_of_text|>")),
+                LlamaText("<|eot_id|>"),
+                LlamaText("<|end_of_text|>")
+            ]
+        };
+    }
+
+    public override generateAvailableFunctionsSystemText(availableFunctions: ChatModelFunctions, {documentParams = true}: {
+        documentParams?: boolean
+    }) {
+        const functionsDocumentationGenerator = new ChatModelFunctionsDocumentationGenerator(availableFunctions);
+
+        if (!functionsDocumentationGenerator.hasAnyFunctions)
+            return LlamaText([]);
+
+        return LlamaText.joinValues("\n", [
+            "You have access to the following functions. To call a function, please respond with JSON for a function call.",
+            'Respond in the format {"name": function name, "parameters": function call parameters}.',
+            "Do not use variables.",
+            "",
+            functionsDocumentationGenerator.getLlama3_2LightweightFunctionSignatures({documentParams}),
+            "",
+            "After calling a function, the result will appear afterwards and is only visible to you.",
+            "To make information visible to the user, you must include it in your response.",
+            "Do not tell the user about the functions your are using.",
+            "Only call functions when needed."
+        ]);
+    }
+
+    public prependPreambleToChatHistory(chatHistory: readonly ChatHistoryItem[]): readonly ChatHistoryItem[] {
+        const res = chatHistory.slice();
+
+        const formatMonthDate = (date: Date, timezone?: "UTC") => {
+            const today = this.todayDate instanceof Function
+                ? this.todayDate()
+                : (this.todayDate ?? new Date());
+
+            if (today.getUTCMonth() === date.getUTCMonth() && today.getUTCFullYear() === date.getUTCFullYear())
+                return formatDate(date, timezone);
+
+            const month = date.toLocaleDateString("en-US", {month: "long", timeZone: timezone});
+            const year = date.toLocaleDateString("en-US", {year: "numeric", timeZone: timezone});
+            return `${month} ${year}`;
+        };
+
+        const lines: string[] = [];
+
+        if (this.cuttingKnowledgeDate != null) {
+            const date = this.cuttingKnowledgeDate instanceof Function
+                ? this.cuttingKnowledgeDate()
+                : this.cuttingKnowledgeDate;
+
+            lines.push(`Cutting Knowledge Date: ${formatMonthDate(date, "UTC")}`);
+        }
+
+        if (this.todayDate != null) {
+            const date = this.todayDate instanceof Function
+                ? this.todayDate()
+                : this.todayDate;
+            lines.push(`Today Date: ${formatDate(date, undefined)}`);
+        }
+
+        if (lines.length > 0)
+            res.unshift({
+                type: "system",
+                text: this._specialTokensTextForPreamble
+                    ? LlamaText(new SpecialTokensText(lines.join("\n"))).toJSON()
+                    : LlamaText.joinValues("\n", lines).toJSON()
+            });
+
+        return res;
+    }
+
+    /** @internal */
+    public static override _checkModelCompatibility(options: ChatWrapperCheckModelCompatibilityParams): boolean {
+        if (options.tokenizer != null) {
+            const tokens = options.tokenizer("<|eom_id|>", true, "trimLeadingSpace");
+            return tokens.length === 1 && options.tokenizer.isSpecialToken(tokens[0]!) && isLlama3_2LightweightModel(options);
+        }
+
+        return isLlama3_2LightweightModel(options);
+    }
+
+    /** @internal */
+    public static override _getOptionConfigurationsToTestIfCanSupersedeJinjaTemplate() {
+        return [
+            {},
+            [{todayDate: null}, {}],
+            [{cuttingKnowledgeDate: null}, {}],
+            [{noToolInstructions: true}, {}],
+            [{todayDate: null, cuttingKnowledgeDate: null}, {}],
+            [{todayDate: null, cuttingKnowledgeDate: null, noToolInstructions: true}, {}],
+            [{todayDate: new Date("2024-07-26T00:00:00"), cuttingKnowledgeDate: null, noToolInstructions: true}, {}],
+
+            [
+                {
+                    todayDate: new Date("2024-07-26T00:00:00"),
+                    cuttingKnowledgeDate: new Date("2023-12-01T00:00:00Z"),
+                    noToolInstructions: true
+                },
+                {cuttingKnowledgeDate: new Date("2023-12-01T00:00:00Z")},
+                {"date_string": formatDate(new Date("2024-07-26T00:00:00"), undefined)}
+            ],
+
+            [
+                {
+                    todayDate: new Date("2024-07-26T00:00:00"),
+                    cuttingKnowledgeDate: new Date("2023-12-01T00:00:00Z"),
+                    noToolInstructions: true,
+                    _specialTokensTextForPreamble: true
+                },
+                {cuttingKnowledgeDate: new Date("2023-12-01T00:00:00Z")},
+                {"date_string": formatDate(new Date("2024-07-26T00:00:00"), undefined)}
+            ]
+        ] satisfies ChatWrapperJinjaMatchConfiguration<typeof this>;
+    }
+}
+
+function formatDate(date: Date, timezone?: "UTC") {
+    const day = date.toLocaleDateString("en-US", {day: "numeric", timeZone: timezone});
+    const month = date.toLocaleDateString("en-US", {month: "short", timeZone: timezone});
+    const year = date.toLocaleDateString("en-US", {year: "numeric", timeZone: timezone});
+    return `${day} ${month} ${year}`;
+}
diff --git a/src/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.ts b/src/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.ts
index 50916079..32b5642b 100644
--- a/src/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.ts
+++ b/src/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.ts
@@ -147,4 +147,41 @@ export class ChatModelFunctionsDocumentationGenerator {
             .join("\n\n");
     }
     /* eslint-enable @stylistic/max-len */
+
+    /* eslint-disable @stylistic/max-len */
+    /**
+     * Example:
+     * ```
+     * {"name": "getDate", "description": "Retrieve the current date"}
+     *
+     * {"name": "getTime", "description": "Retrieve the current time", "parameters": {"type": "object", "properties": {"hours": {"enum": ["24", "12"]}, "seconds": {"type": "boolean"}}}}
+     * ```
+     * @param options
+     * @param [options.documentParams] - Whether to document the parameters of the functions
+     */
+    public getLlama3_2LightweightFunctionSignatures({documentParams = true}: {documentParams?: boolean} = {}) {
+        const chatModelFunctions = this.chatModelFunctions;
+
+        if (!this.hasAnyFunctions || chatModelFunctions == null)
+            return "";
+
+        const functionNames = Object.keys(chatModelFunctions);
+
+        const functionsLines = functionNames
+            .map((functionName) => {
+                const functionDefinition = chatModelFunctions[functionName];
+
+                const addDescription = functionDefinition?.description != null && functionDefinition.description.trim() !== "";
+
+                return jsonDumps({
+                    name: functionName,
+                    ...(addDescription ? {description: functionDefinition.description} : {}),
+                    ...(documentParams && functionDefinition?.params != null ? {parameters: functionDefinition.params} : {})
+                });
+            })
+            .join("\n\n");
+
+        return functionsLines;
+    }
+    /* eslint-enable @stylistic/max-len */
 }
diff --git a/src/chatWrappers/utils/getModelLinageNames.ts b/src/chatWrappers/utils/getModelLinageNames.ts
new file mode 100644
index 00000000..683bb844
--- /dev/null
+++ b/src/chatWrappers/utils/getModelLinageNames.ts
@@ -0,0 +1,23 @@
+import {GgufMetadata} from "../../gguf/types/GgufMetadataTypes.js";
+
+export function getModelLinageNames(ggufMetadata?: GgufMetadata) {
+    const res: string[][] = [];
+
+    if (ggufMetadata == null)
+        return res;
+
+    const currentModelInfo = [ggufMetadata?.general?.name, ggufMetadata?.general?.basename]
+        .filter((v): v is string => v != null);
+    if (currentModelInfo.length > 0)
+        res.push(currentModelInfo);
+
+    if (typeof ggufMetadata?.general?.base_model?.count === "number") {
+        for (let i = 0; i < ggufMetadata.general.base_model.count; i++) {
+            const baseModel = ggufMetadata.general.base_model[String(i) as `${bigint}`];
+            if (baseModel?.name != null)
+                res.push([baseModel.name]);
+        }
+    }
+
+    return res;
+}
diff --git a/src/chatWrappers/utils/isLlama3_2LightweightModel.ts b/src/chatWrappers/utils/isLlama3_2LightweightModel.ts
new file mode 100644
index 00000000..bf71e1e3
--- /dev/null
+++ b/src/chatWrappers/utils/isLlama3_2LightweightModel.ts
@@ -0,0 +1,11 @@
+import {ChatWrapperCheckModelCompatibilityParams} from "../../types.js";
+import {includesText} from "../../utils/includesText.js";
+import {getModelLinageNames} from "./getModelLinageNames.js";
+
+export function isLlama3_2LightweightModel(options: ChatWrapperCheckModelCompatibilityParams) {
+    const isLlama3_2 = getModelLinageNames(options.fileInfo?.metadata)
+        .some((modelNames) => includesText(modelNames, ["llama 3.2", "llama-3.2", "llama3.2"]));
+    const isSmallModel = (["1B", "3B"] as string[]).includes(options.fileInfo?.metadata?.general?.size_label ?? "");
+
+    return isLlama3_2 && isSmallModel;
+}
diff --git a/src/chatWrappers/utils/resolveChatWrapper.ts b/src/chatWrappers/utils/resolveChatWrapper.ts
index 001bb0a5..8be8dc27 100644
--- a/src/chatWrappers/utils/resolveChatWrapper.ts
+++ b/src/chatWrappers/utils/resolveChatWrapper.ts
@@ -11,14 +11,17 @@ import {JinjaTemplateChatWrapper, JinjaTemplateChatWrapperOptions} from "../gene
 import {TemplateChatWrapper} from "../generic/TemplateChatWrapper.js";
 import {getConsoleLogPrefix} from "../../utils/getConsoleLogPrefix.js";
 import {Llama3_1ChatWrapper} from "../Llama3_1ChatWrapper.js";
+import {Llama3_2LightweightChatWrapper} from "../Llama3_2LightweightChatWrapper.js";
 import {MistralChatWrapper} from "../MistralChatWrapper.js";
 import {Tokenizer} from "../../types.js";
+import {includesText} from "../../utils/includesText.js";
 import {isJinjaTemplateEquivalentToSpecializedChatWrapper} from "./isJinjaTemplateEquivalentToSpecializedChatWrapper.js";
+import {getModelLinageNames} from "./getModelLinageNames.js";
 import type {GgufFileInfo} from "../../gguf/types/GgufFileInfoTypes.js";
 
 
 export const specializedChatWrapperTypeNames = Object.freeze([
-    "general", "llama3.1", "llama3", "llama2Chat", "mistral", "alpacaChat", "functionary", "chatML", "falconChat", "gemma"
+    "general", "llama3.2-lightweight", "llama3.1", "llama3", "llama2Chat", "mistral", "alpacaChat", "functionary", "chatML", "falconChat", "gemma"
 ] as const);
 export type SpecializedChatWrapperTypeName = (typeof specializedChatWrapperTypeNames)[number];
 
@@ -37,6 +40,7 @@ export type ResolvableChatWrapperTypeName = (typeof resolvableChatWrapperTypeNam
 export const chatWrappers = Object.freeze({
     "general": GeneralChatWrapper,
     "llama3.1": Llama3_1ChatWrapper,
+    "llama3.2-lightweight": Llama3_2LightweightChatWrapper,
     "llama3": Llama3ChatWrapper,
     "llama2Chat": Llama2ChatWrapper,
     "mistral": MistralChatWrapper,
@@ -145,28 +149,6 @@ export function resolveChatWrapper(options: ResolveChatWrapperOptions): BuiltInC
         });
     }
 
-    function getModelLinageNames(): string[][] {
-        const res: string[][] = [];
-
-        if (fileInfo == null)
-            return res;
-
-        const currentModelInfo = [fileInfo.metadata?.general?.name, fileInfo.metadata?.general?.basename]
-            .filter((v): v is string => v != null);
-        if (currentModelInfo.length > 0)
-            res.push(currentModelInfo);
-
-        if (typeof fileInfo.metadata?.general?.base_model?.count === "number") {
-            for (let i = 0; i < fileInfo.metadata.general.base_model.count; i++) {
-                const baseModel = fileInfo.metadata.general.base_model[String(i) as `${bigint}`];
-                if (baseModel?.name != null)
-                    res.push([baseModel.name]);
-            }
-        }
-
-        return res;
-    }
-
     if (type !== "auto" && type != null) {
         if (isTemplateChatWrapperType(type)) {
             const Wrapper = chatWrappers[type];
@@ -293,8 +275,10 @@ export function resolveChatWrapper(options: ResolveChatWrapperOptions): BuiltInC
         }
     }
 
-    for (const modelNames of getModelLinageNames()) {
-        if (includesText(modelNames, ["llama 3.1", "llama-3.1", "llama3.1"]) && Llama3_1ChatWrapper._checkModelCompatibility({tokenizer, fileInfo}))
+    for (const modelNames of getModelLinageNames(fileInfo?.metadata)) {
+        if (includesText(modelNames, ["llama 3.2", "llama-3.2", "llama3.2"]) && Llama3_2LightweightChatWrapper._checkModelCompatibility({tokenizer, fileInfo}))
+            return createSpecializedChatWrapper(Llama3_2LightweightChatWrapper);
+        else if (includesText(modelNames, ["llama 3.1", "llama-3.1", "llama3.1"]) && Llama3_1ChatWrapper._checkModelCompatibility({tokenizer, fileInfo}))
             return createSpecializedChatWrapper(Llama3_1ChatWrapper);
         else if (includesText(modelNames, ["llama 3", "llama-3", "llama3"]))
             return createSpecializedChatWrapper(Llama3ChatWrapper);
@@ -393,25 +377,6 @@ export function isTemplateChatWrapperType(type: string): type is TemplateChatWra
     return templateChatWrapperTypeNames.includes(type as any);
 }
 
-function includesText(
-    value: string | string[] | null | undefined,
-    textToCheckFor: string | string[],
-    strictCase: boolean = false
-): boolean {
-    if (value instanceof Array)
-        return value.some((v) => includesText(v, textToCheckFor, strictCase));
-    else if (typeof value !== "string")
-        return false;
-
-    if (textToCheckFor instanceof Array)
-        return textToCheckFor.some((t) => includesText(value, t, strictCase));
-
-    if (strictCase)
-        return value.includes(textToCheckFor);
-
-    return value.toLowerCase().includes(textToCheckFor.toLowerCase());
-}
-
 // this is needed because TypeScript guards don't work automatically with class references
 function isClassReference<T>(value: any, classReference: T): value is T {
     return value === classReference;
diff --git a/src/cli/commands/ChatCommand.ts b/src/cli/commands/ChatCommand.ts
index 95496021..c80a4760 100644
--- a/src/cli/commands/ChatCommand.ts
+++ b/src/cli/commands/ChatCommand.ts
@@ -131,7 +131,7 @@ export const ChatCommand: CommandModule<object, ChatCommand> = {
                 type: "string",
                 default: "auto" as ChatCommand["wrapper"],
                 choices: ["auto", ...specializedChatWrapperTypeNames] as const,
-                description: "Chat wrapper to use. Use `auto` to automatically select a wrapper based on the model's BOS token"
+                description: "Chat wrapper to use. Use `auto` to automatically select a wrapper based on the model's metadata and tokenizer"
             })
             .option("noJinja", {
                 type: "boolean",
diff --git a/src/cli/commands/InitCommand.ts b/src/cli/commands/InitCommand.ts
index c73ea1ad..fbe0601c 100644
--- a/src/cli/commands/InitCommand.ts
+++ b/src/cli/commands/InitCommand.ts
@@ -25,6 +25,7 @@ import {resolveModelDestination} from "../../utils/resolveModelDestination.js";
 type InitCommand = {
     name?: string,
     template?: string,
+    model?: string,
     gpu?: BuildGpu | "auto"
 };
 
@@ -45,6 +46,10 @@ export const InitCommand: CommandModule<object, InitCommand> = {
                 choices: projectTemplates.map((template) => template.name),
                 description: "Template to use. If omitted, you will be prompted to select one"
             })
+            .option("model", {
+                type: "string",
+                description: "Model URI to use. If omitted, you will be prompted to select one interactively"
+            })
             .option("gpu", {
                 type: "string",
 
@@ -73,7 +78,7 @@ export const CreateCliCommand: CommandModule<object, InitCommand> = {
     handler: InitCommandHandler
 };
 
-export async function InitCommandHandler({name, template, gpu}: InitCommand) {
+export async function InitCommandHandler({name, template, model, gpu}: InitCommand) {
     const currentDirectory = path.resolve(process.cwd());
     const projectName = (name != null && validateNpmPackageName(name ?? "").validForNewPackages)
         ? name
@@ -84,20 +89,36 @@ export async function InitCommandHandler({name, template, gpu}: InitCommand) {
             : undefined
     ) ?? await askForTemplate();
 
-    const llama = gpu == null
-        ? await getLlama("lastBuild", {
-            logLevel: LlamaLogLevel.error
-        })
-        : await getLlama({
-            gpu,
-            logLevel: LlamaLogLevel.error
+    async function resolveModelUri() {
+        if (model != null && model !== "") {
+            try {
+                const resolvedModelDestination = resolveModelDestination(model, true);
+                if (resolvedModelDestination.type === "uri")
+                    return resolvedModelDestination.uri;
+                else if (resolvedModelDestination.type === "url")
+                    return resolvedModelDestination.url;
+            } catch (err) {
+                // do nothing
+            }
+        }
+
+        const llama = gpu == null
+            ? await getLlama("lastBuild", {
+                logLevel: LlamaLogLevel.error
+            })
+            : await getLlama({
+                gpu,
+                logLevel: LlamaLogLevel.error
+            });
+
+        return await interactivelyAskForModel({
+            llama,
+            allowLocalModels: false,
+            downloadIntent: false
         });
+    }
 
-    const modelUri = await interactivelyAskForModel({
-        llama,
-        allowLocalModels: false,
-        downloadIntent: false
-    });
+    const modelUri = await resolveModelUri();
 
     const targetDirectory = path.join(currentDirectory, projectName);
     const readableTargetDirectoryPath = getReadablePath(targetDirectory);
diff --git a/src/cli/recommendedModels.ts b/src/cli/recommendedModels.ts
index 67d9d038..64890baa 100644
--- a/src/cli/recommendedModels.ts
+++ b/src/cli/recommendedModels.ts
@@ -48,6 +48,18 @@ export const recommendedModels: ModelRecommendation[] = [{
         "hf:mradermacher/Mistral-Nemo-Instruct-2407-GGUF/Mistral-Nemo-Instruct-2407.Q4_K_M.gguf",
         "hf:mradermacher/Mistral-Nemo-Instruct-2407-GGUF/Mistral-Nemo-Instruct-2407.Q4_K_S.gguf"
     ]
+}, {
+    name: "Llama 3.2 3B",
+    abilities: ["chat", "complete", "functionCalling"],
+    description: "Llama 3.2 3B model was created by Meta and is optimized for an assistant-like chat use cases, with support for function calling.\n" +
+        "This model is smarter than the 1B model, but is still relatively small and can run on less capable machines.",
+
+    fileOptions: [
+        "hf:mradermacher/Llama-3.2-3B-Instruct-GGUF/Llama-3.2-3B-Instruct.Q8_0.gguf",
+        "hf:mradermacher/Llama-3.2-3B-Instruct-GGUF/Llama-3.2-3B-Instruct.Q6_K.gguf",
+        "hf:mradermacher/Llama-3.2-3B-Instruct-GGUF/Llama-3.2-3B-Instruct.Q4_K_M.gguf",
+        "hf:mradermacher/Llama-3.2-3B-Instruct-GGUF/Llama-3.2-3B-Instruct.Q4_K_S.gguf"
+    ]
 }, {
     name: "Phi 3 3.8B",
     abilities: ["chat", "complete", "functionCalling"],
diff --git a/src/evaluator/LlamaChat/LlamaChat.ts b/src/evaluator/LlamaChat/LlamaChat.ts
index e1a8355d..9f71b2e3 100644
--- a/src/evaluator/LlamaChat/LlamaChat.ts
+++ b/src/evaluator/LlamaChat/LlamaChat.ts
@@ -446,6 +446,7 @@ export class LlamaChat {
                 const loadContextWindow = async (avoidReloadingHistory: boolean = false) => {
                     await generateResponseState.loadContextWindow(
                         generateResponseState.getResolvedHistoryWithCurrentModelResponse(),
+                        generateResponseState.getContextWindowsHistoryWithCurrentModelResponse(),
                         false,
                         avoidReloadingHistory
                     );
@@ -611,10 +612,17 @@ export class LlamaChat {
         return await withLock(this._chatLock, "evaluate", signal, async (): Promise<LlamaChatLoadAndCompleteUserResponse> => {
             try {
                 generateResponseState.ensureLastHistoryItemIsUser();
-                const lastResolvedHistoryItem = generateResponseState.resolvedHistory[generateResponseState.resolvedHistory.length - 1];
-                const initialUserMessage = lastResolvedHistoryItem?.type === "user"
-                    ? lastResolvedHistoryItem.text
-                    : "";
+                const getInitialUserMessage = (history: ChatHistoryItem[]) => {
+                    const lastResolvedHistoryItem = history[history.length - 1];
+
+                    if (lastResolvedHistoryItem?.type === "user")
+                        return lastResolvedHistoryItem.text;
+
+                    return "";
+                };
+
+                const initialUserMessage = getInitialUserMessage(generateResponseState.resolvedHistory);
+                const contextWindowInitialUserMessage = getInitialUserMessage(generateResponseState.lastContextWindowHistory);
 
                 while (true) {
                     generateResponseState.startTokenLoop();
@@ -623,6 +631,11 @@ export class LlamaChat {
                             generateResponseState.resolvedHistory,
                             initialUserMessage + initialUserPrompt + this.model.detokenize(generateResponseState.res)
                         ),
+                        setLastUserTextInChatHistory(
+                            generateResponseState.lastContextWindowHistory,
+                            contextWindowInitialUserMessage + initialUserPrompt +
+                            this.model.detokenize(generateResponseState.contextWindowsRes)
+                        ),
                         true
                     );
                     generateResponseState.functionEvaluationMode = false;
@@ -1240,7 +1253,6 @@ class GenerateResponseState<const Functions extends ChatModelFunctions | undefin
     private readonly maxParallelFunctionCalls: LLamaChatGenerateResponseOptions<Functions>["maxParallelFunctionCalls"];
     private readonly contextShift: LLamaChatGenerateResponseOptions<Functions>["contextShift"];
     private readonly customStopTriggers: LLamaChatGenerateResponseOptions<Functions>["customStopTriggers"];
-    private readonly lastEvaluationContextWindowHistory: Exclude<LLamaChatGenerateResponseOptions<Functions>["lastEvaluationContextWindow"], undefined>["history"];
     private readonly minimumOverlapPercentageToPreventContextShift: Exclude<Exclude<LLamaChatGenerateResponseOptions<Functions>["lastEvaluationContextWindow"], undefined>["minimumOverlapPercentageToPreventContextShift"], undefined>;
 
     public readonly functionsEnabled: boolean;
@@ -1369,7 +1381,6 @@ class GenerateResponseState<const Functions extends ChatModelFunctions | undefin
         this.maxParallelFunctionCalls = maxParallelFunctionCalls;
         this.contextShift = contextShift;
         this.customStopTriggers = customStopTriggers;
-        this.lastEvaluationContextWindowHistory = lastEvaluationContextWindowHistory;
         this.minimumOverlapPercentageToPreventContextShift = minimumOverlapPercentageToPreventContextShift;
 
         this.functionsEnabled = (this.functions != null && Object.keys(this.functions).length > 0);
@@ -1404,7 +1415,7 @@ class GenerateResponseState<const Functions extends ChatModelFunctions | undefin
         this.functionsGrammar = undefined;
         this.functionsEvaluationState = undefined;
 
-        this.lastContextWindowHistory = this.resolvedHistory;
+        this.lastContextWindowHistory = lastEvaluationContextWindowHistory ?? this.resolvedHistory;
         this.lastHistoryCompressionMetadata = this.resolvedContextShift;
 
         if (this.customStopTriggers != null)
@@ -1498,6 +1509,24 @@ class GenerateResponseState<const Functions extends ChatModelFunctions | undefin
         );
     }
 
+    public getContextWindowsHistoryWithCurrentModelResponse() {
+        if (this.contextWindowsRes.length === 0)
+            return this.lastContextWindowHistory;
+
+        let modelResponse = this.llamaChat.model.detokenize(this.contextWindowsRes);
+
+        if (this.grammar?.trimWhitespaceSuffix || this.trimWhitespaceSuffix)
+            modelResponse = modelResponse.trimEnd();
+
+        if (modelResponse === "")
+            return this.lastContextWindowHistory;
+
+        return setLastModelTextResponseInChatHistory(
+            this.lastContextWindowHistory,
+            this.contextWindowLastModelResponse + modelResponse
+        );
+    }
+
     public removeFoundStartIgnoreTextsFromPendingTokens(forceRemove: boolean = false) {
         if (!this.removedStartTextToIgnore && this.res.length === 0 && this.pendingTokens.length > 0 &&
             this.ignoreStartTextDetector.hasTriggeredStops && (forceRemove || !this.ignoreStartTextDetector.hasInProgressStops)
@@ -1598,6 +1627,7 @@ class GenerateResponseState<const Functions extends ChatModelFunctions | undefin
 
     public async loadContextWindow(
         resolvedHistory: ChatHistoryItem[],
+        resolvedContextWindowsHistory: ChatHistoryItem[],
         endWithUserText: boolean = false,
         avoidReloadingHistory: boolean = false
     ): Promise<{userTextSuffix?: LlamaText}> {
@@ -1622,7 +1652,7 @@ class GenerateResponseState<const Functions extends ChatModelFunctions | undefin
                 pendingTokensCount: this.pendingTokens.length + queuedChunkTokens.length + functionCallsTokens.length,
                 isFirstEvaluation: this.isFirstEvaluation,
                 chatWrapper: this.chatWrapper,
-                lastEvaluationContextWindowHistory: this.lastEvaluationContextWindowHistory,
+                lastEvaluationContextWindowHistory: resolvedContextWindowsHistory,
                 minimumOverlapPercentageToPreventContextShift: this.minimumOverlapPercentageToPreventContextShift,
                 sequence: this.llamaChat.sequence,
                 minFreeContextTokens: 1,
@@ -1658,7 +1688,7 @@ class GenerateResponseState<const Functions extends ChatModelFunctions | undefin
         ];
 
         if (avoidReloadingHistory && this.tokens.length >= this.llamaChat.sequence.context.contextSize - 1)
-            return await this.loadContextWindow(resolvedHistory, endWithUserText, false);
+            return await this.loadContextWindow(resolvedHistory, resolvedContextWindowsHistory, endWithUserText, false);
 
         return {
             userTextSuffix: this.userTextSuffix
diff --git a/src/evaluator/LlamaChatSession/LlamaChatSession.ts b/src/evaluator/LlamaChatSession/LlamaChatSession.ts
index e49ea9cf..f66e9cc0 100644
--- a/src/evaluator/LlamaChatSession/LlamaChatSession.ts
+++ b/src/evaluator/LlamaChatSession/LlamaChatSession.ts
@@ -470,7 +470,7 @@ export class LlamaChatSession {
                 throw new DisposedError();
 
             const supportsParallelFunctionCalling = this._chat.chatWrapper.settings.functions.parallelism != null;
-            const abortController = wrapAbortSignal(signal);
+            const [abortController, disposeAbortController] = wrapAbortSignal(signal);
             let lastEvaluation = this._lastEvaluation;
             let newChatHistory = appendUserMessageToChatHistory(this._chatHistory, prompt);
             let newContextWindowChatHistory = lastEvaluation?.contextWindow == null
@@ -501,179 +501,185 @@ export class LlamaChatSession {
                 safeEventCallback(onTextChunk)?.(resolvedResponsePrefix);
             }
 
-            while (true) {
-                const functionCallsAndResults: Array<Promise<null | {
-                    functionCall: LlamaChatResponseFunctionCall<Functions extends ChatModelFunctions ? Functions : ChatModelFunctions>,
-                    functionDefinition: ChatSessionModelFunction<any>,
-                    functionCallResult: any
-                }>> = [];
-                let canThrowFunctionCallingErrors = false;
-                let abortedOnFunctionCallError = false;
-
-                const initialOutputTokens = this._chat.sequence.tokenMeter.usedOutputTokens;
-                const {
-                    lastEvaluation: currentLastEvaluation,
-                    metadata
-                } = await this._chat.generateResponse<Functions>(newChatHistory, {
-                    functions,
-                    documentFunctionParams,
-                    maxParallelFunctionCalls,
-                    grammar: grammar as undefined, // this is a workaround to allow passing both `functions` and `grammar`
-                    onTextChunk: safeEventCallback(onTextChunk),
-                    onToken: safeEventCallback(onToken),
-                    signal: abortController.signal,
-                    stopOnAbortSignal,
-                    repeatPenalty,
-                    minP,
-                    topK,
-                    topP,
-                    seed,
-                    tokenBias,
-                    customStopTriggers,
-                    maxTokens,
-                    temperature,
-                    trimWhitespaceSuffix,
-                    contextShift: {
-                        ...this._contextShift,
-                        lastEvaluationMetadata: lastEvaluation?.contextShiftMetadata
-                    },
-                    evaluationPriority,
-                    lastEvaluationContextWindow: {
-                        history: newContextWindowChatHistory,
-                        minimumOverlapPercentageToPreventContextShift: 0.5
-                    },
-                    onFunctionCall: async (functionCall) => {
-                        functionCallsAndResults.push(
-                            (async () => {
-                                try {
-                                    const functionDefinition = functions?.[functionCall.functionName];
-
-                                    if (functionDefinition == null)
-                                        throw new Error(
-                                            `The model tried to call function "${functionCall.functionName}" which is not defined`
-                                        );
-
-                                    const functionCallResult = await functionDefinition.handler(functionCall.params);
-
-                                    return {
-                                        functionCall,
-                                        functionDefinition,
-                                        functionCallResult
-                                    };
-                                } catch (err) {
-                                    if (!abortController.signal.aborted) {
-                                        abortedOnFunctionCallError = true;
-                                        abortController.abort(err);
+            try {
+                while (true) {
+                    const functionCallsAndResults: Array<Promise<null | {
+                        functionCall: LlamaChatResponseFunctionCall<Functions extends ChatModelFunctions ? Functions : ChatModelFunctions>,
+                        functionDefinition: ChatSessionModelFunction<any>,
+                        functionCallResult: any
+                    }>> = [];
+                    let canThrowFunctionCallingErrors = false;
+                    let abortedOnFunctionCallError = false;
+
+                    const initialOutputTokens = this._chat.sequence.tokenMeter.usedOutputTokens;
+                    const {
+                        lastEvaluation: currentLastEvaluation,
+                        metadata
+                    } = await this._chat.generateResponse<Functions>(newChatHistory, {
+                        functions,
+                        documentFunctionParams,
+                        maxParallelFunctionCalls,
+                        grammar: grammar as undefined, // this is a workaround to allow passing both `functions` and `grammar`
+                        onTextChunk: safeEventCallback(onTextChunk),
+                        onToken: safeEventCallback(onToken),
+                        signal: abortController.signal,
+                        stopOnAbortSignal,
+                        repeatPenalty,
+                        minP,
+                        topK,
+                        topP,
+                        seed,
+                        tokenBias,
+                        customStopTriggers,
+                        maxTokens,
+                        temperature,
+                        trimWhitespaceSuffix,
+                        contextShift: {
+                            ...this._contextShift,
+                            lastEvaluationMetadata: lastEvaluation?.contextShiftMetadata
+                        },
+                        evaluationPriority,
+                        lastEvaluationContextWindow: {
+                            history: newContextWindowChatHistory,
+                            minimumOverlapPercentageToPreventContextShift: 0.5
+                        },
+                        onFunctionCall: async (functionCall) => {
+                            functionCallsAndResults.push(
+                                (async () => {
+                                    try {
+                                        const functionDefinition = functions?.[functionCall.functionName];
+
+                                        if (functionDefinition == null)
+                                            throw new Error(
+                                                `The model tried to call function "${functionCall.functionName}" which is not defined`
+                                            );
+
+                                        const functionCallResult = await functionDefinition.handler(functionCall.params);
+
+                                        return {
+                                            functionCall,
+                                            functionDefinition,
+                                            functionCallResult
+                                        };
+                                    } catch (err) {
+                                        if (!abortController.signal.aborted) {
+                                            abortedOnFunctionCallError = true;
+                                            abortController.abort(err);
+                                        }
+
+                                        if (canThrowFunctionCallingErrors)
+                                            throw err;
+
+                                        return null;
                                     }
-
-                                    if (canThrowFunctionCallingErrors)
-                                        throw err;
-
-                                    return null;
+                                })()
+                            );
+                        }
+                    });
+                    this._ensureNotDisposed();
+                    if (abortController.signal.aborted && (abortedOnFunctionCallError || !stopOnAbortSignal))
+                        throw abortController.signal.reason;
+
+                    if (maxTokens != null)
+                        maxTokens = Math.max(0, maxTokens - (this._chat.sequence.tokenMeter.usedOutputTokens - initialOutputTokens));
+
+                    lastEvaluation = currentLastEvaluation;
+                    newChatHistory = lastEvaluation.cleanHistory;
+
+                    if (functionCallsAndResults.length > 0) {
+                        canThrowFunctionCallingErrors = true;
+                        const functionCallResultsPromise = Promise.all(functionCallsAndResults);
+                        const raceEventAbortController = new AbortController();
+                        await Promise.race([
+                            functionCallResultsPromise,
+                            new Promise<void>((accept, reject) => {
+                                abortController.signal.addEventListener("abort", () => {
+                                    if (abortedOnFunctionCallError || !stopOnAbortSignal)
+                                        reject(abortController.signal.reason);
+                                    else
+                                        accept();
+                                }, {signal: raceEventAbortController.signal});
+
+                                if (abortController.signal.aborted) {
+                                    if (abortedOnFunctionCallError || !stopOnAbortSignal)
+                                        reject(abortController.signal.reason);
+                                    else
+                                        accept();
                                 }
-                            })()
-                        );
-                    }
-                });
-                this._ensureNotDisposed();
-                if (abortController.signal.aborted && (abortedOnFunctionCallError || !stopOnAbortSignal))
-                    throw abortController.signal.reason;
+                            })
+                        ]);
+                        raceEventAbortController.abort();
+                        this._ensureNotDisposed();
 
-                if (maxTokens != null)
-                    maxTokens = Math.max(0, maxTokens - (this._chat.sequence.tokenMeter.usedOutputTokens - initialOutputTokens));
-
-                lastEvaluation = currentLastEvaluation;
-                newChatHistory = lastEvaluation.cleanHistory;
-
-                if (functionCallsAndResults.length > 0) {
-                    canThrowFunctionCallingErrors = true;
-                    const functionCallResultsPromise = Promise.all(functionCallsAndResults);
-                    await Promise.race([
-                        functionCallResultsPromise,
-                        new Promise<void>((accept, reject) => {
-                            abortController.signal.addEventListener("abort", () => {
-                                if (abortedOnFunctionCallError || !stopOnAbortSignal)
-                                    reject(abortController.signal.reason);
-                                else
-                                    accept();
-                            });
-
-                            if (abortController.signal.aborted) {
-                                if (abortedOnFunctionCallError || !stopOnAbortSignal)
-                                    reject(abortController.signal.reason);
-                                else
-                                    accept();
+                        if (!abortController.signal.aborted) {
+                            const functionCallResults = (await functionCallResultsPromise)
+                                .filter((result): result is Exclude<typeof result, null> => result != null);
+                            this._ensureNotDisposed();
+
+                            if (abortController.signal.aborted)
+                                throw abortController.signal.reason;
+
+                            newContextWindowChatHistory = lastEvaluation.contextWindow;
+
+                            let startNewChunk = supportsParallelFunctionCalling;
+                            for (const {functionCall, functionDefinition, functionCallResult} of functionCallResults) {
+                                newChatHistory = addFunctionCallToChatHistory({
+                                    chatHistory: newChatHistory,
+                                    functionName: functionCall.functionName,
+                                    functionDescription: functionDefinition.description,
+                                    callParams: functionCall.params,
+                                    callResult: functionCallResult,
+                                    rawCall: functionCall.raw,
+                                    startsNewChunk: startNewChunk
+                                });
+
+                                newContextWindowChatHistory = addFunctionCallToChatHistory({
+                                    chatHistory: newContextWindowChatHistory,
+                                    functionName: functionCall.functionName,
+                                    functionDescription: functionDefinition.description,
+                                    callParams: functionCall.params,
+                                    callResult: functionCallResult,
+                                    rawCall: functionCall.raw,
+                                    startsNewChunk: startNewChunk
+                                });
+
+                                startNewChunk = false;
                             }
-                        })
-                    ]);
-                    this._ensureNotDisposed();
 
-                    if (!abortController.signal.aborted) {
-                        const functionCallResults = (await functionCallResultsPromise)
-                            .filter((result): result is Exclude<typeof result, null> => result != null);
-                        this._ensureNotDisposed();
+                            lastEvaluation.cleanHistory = newChatHistory;
+                            lastEvaluation.contextWindow = newContextWindowChatHistory;
 
-                        if (abortController.signal.aborted)
-                            throw abortController.signal.reason;
-
-                        newContextWindowChatHistory = lastEvaluation.contextWindow;
-
-                        let startNewChunk = supportsParallelFunctionCalling;
-                        for (const {functionCall, functionDefinition, functionCallResult} of functionCallResults) {
-                            newChatHistory = addFunctionCallToChatHistory({
-                                chatHistory: newChatHistory,
-                                functionName: functionCall.functionName,
-                                functionDescription: functionDefinition.description,
-                                callParams: functionCall.params,
-                                callResult: functionCallResult,
-                                rawCall: functionCall.raw,
-                                startsNewChunk: startNewChunk
-                            });
-
-                            newContextWindowChatHistory = addFunctionCallToChatHistory({
-                                chatHistory: newContextWindowChatHistory,
-                                functionName: functionCall.functionName,
-                                functionDescription: functionDefinition.description,
-                                callParams: functionCall.params,
-                                callResult: functionCallResult,
-                                rawCall: functionCall.raw,
-                                startsNewChunk: startNewChunk
-                            });
-
-                            startNewChunk = false;
+                            continue;
                         }
-
-                        lastEvaluation.cleanHistory = newChatHistory;
-                        lastEvaluation.contextWindow = newContextWindowChatHistory;
-
-                        continue;
                     }
-                }
 
-                this._lastEvaluation = lastEvaluation;
-                this._chatHistory = newChatHistory;
-                this._chatHistoryStateRef = {};
+                    this._lastEvaluation = lastEvaluation;
+                    this._chatHistory = newChatHistory;
+                    this._chatHistoryStateRef = {};
 
-                const lastModelResponseItem = getLastModelResponseItem(newChatHistory);
-                const responseText = lastModelResponseItem.response
-                    .filter((item): item is string => typeof item === "string")
-                    .join("");
+                    const lastModelResponseItem = getLastModelResponseItem(newChatHistory);
+                    const responseText = lastModelResponseItem.response
+                        .filter((item): item is string => typeof item === "string")
+                        .join("");
+
+                    if (metadata.stopReason === "customStopTrigger")
+                        return {
+                            response: lastModelResponseItem.response,
+                            responseText,
+                            stopReason: metadata.stopReason,
+                            customStopTrigger: metadata.customStopTrigger,
+                            remainingGenerationAfterStop: metadata.remainingGenerationAfterStop
+                        };
 
-                if (metadata.stopReason === "customStopTrigger")
                     return {
                         response: lastModelResponseItem.response,
                         responseText,
                         stopReason: metadata.stopReason,
-                        customStopTrigger: metadata.customStopTrigger,
                         remainingGenerationAfterStop: metadata.remainingGenerationAfterStop
                     };
-
-                return {
-                    response: lastModelResponseItem.response,
-                    responseText,
-                    stopReason: metadata.stopReason,
-                    remainingGenerationAfterStop: metadata.remainingGenerationAfterStop
-                };
+                }
+            } finally {
+                disposeAbortController();
             }
         });
     }
@@ -755,7 +761,7 @@ export class LlamaChatSession {
                 throw new Error("The LlamaGrammar used by passed to this function was created with a different Llama instance than the one used by this sequence's model. Make sure you use the same Llama instance for both the model and the grammar.");
         }
 
-        const abortController = wrapAbortSignal(signal);
+        const [abortController, disposeAbortController] = wrapAbortSignal(signal);
         this._preloadAndCompleteAbortControllers.add(abortController);
 
         try {
@@ -821,6 +827,7 @@ export class LlamaChatSession {
             });
         } finally {
             this._preloadAndCompleteAbortControllers.delete(abortController);
+            disposeAbortController();
         }
     }
 
diff --git a/src/index.ts b/src/index.ts
index 0ec8ab11..2f0f30ec 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -42,6 +42,7 @@ import {UnsupportedError} from "./utils/UnsupportedError.js";
 import {InsufficientMemoryError} from "./utils/InsufficientMemoryError.js";
 import {ChatWrapper} from "./ChatWrapper.js";
 import {EmptyChatWrapper} from "./chatWrappers/EmptyChatWrapper.js";
+import {Llama3_2LightweightChatWrapper} from "./chatWrappers/Llama3_2LightweightChatWrapper.js";
 import {Llama3_1ChatWrapper} from "./chatWrappers/Llama3_1ChatWrapper.js";
 import {Llama3ChatWrapper} from "./chatWrappers/Llama3ChatWrapper.js";
 import {Llama2ChatWrapper} from "./chatWrappers/Llama2ChatWrapper.js";
@@ -178,6 +179,7 @@ export {
     type ChatWrapperGeneratedContextState,
     type ChatWrapperGenerateInitialHistoryOptions,
     EmptyChatWrapper,
+    Llama3_2LightweightChatWrapper,
     Llama3_1ChatWrapper,
     Llama3ChatWrapper,
     Llama2ChatWrapper,
diff --git a/src/utils/includesText.ts b/src/utils/includesText.ts
new file mode 100644
index 00000000..5f8acceb
--- /dev/null
+++ b/src/utils/includesText.ts
@@ -0,0 +1,18 @@
+export function includesText(
+    value: string | string[] | null | undefined,
+    textToCheckFor: string | string[],
+    strictCase: boolean = false
+): boolean {
+    if (value instanceof Array)
+        return value.some((v) => includesText(v, textToCheckFor, strictCase));
+    else if (typeof value !== "string")
+        return false;
+
+    if (textToCheckFor instanceof Array)
+        return textToCheckFor.some((t) => includesText(value, t, strictCase));
+
+    if (strictCase)
+        return value.includes(textToCheckFor);
+
+    return value.toLowerCase().includes(textToCheckFor.toLowerCase());
+}
diff --git a/src/utils/wrapAbortSignal.ts b/src/utils/wrapAbortSignal.ts
index cce2dac4..671a78df 100644
--- a/src/utils/wrapAbortSignal.ts
+++ b/src/utils/wrapAbortSignal.ts
@@ -1,10 +1,17 @@
-export function wrapAbortSignal(abortSignal?: AbortSignal) {
+export function wrapAbortSignal(abortSignal?: AbortSignal): [controller: AbortController, dispose: (() => void)] {
     const controller = new AbortController();
 
+    function onAbort() {
+        controller.abort(abortSignal!.reason);
+    }
+
+    function dispose() {
+        if (abortSignal != null)
+            abortSignal.removeEventListener("abort", onAbort);
+    }
+
     if (abortSignal != null)
-        abortSignal.addEventListener("abort", () => {
-            controller.abort(abortSignal.reason);
-        });
+        abortSignal.addEventListener("abort", onAbort);
 
-    return controller;
+    return [controller, dispose];
 }
diff --git a/templates/README.md b/templates/README.md
new file mode 100644
index 00000000..3796f65d
--- /dev/null
+++ b/templates/README.md
@@ -0,0 +1,6 @@
+Use the following command to scaffold a project from a template:
+```bash
+npm create node-llama-cpp@latest
+```
+
+> Don't use the templates in this repository directly. They are built to be used with `npm create` command ([learn more](https://node-llama-cpp.withcat.ai/guide/))
diff --git a/templates/electron-typescript-react/electron-builder.ts b/templates/electron-typescript-react/electron-builder.ts
index 1e0fcb1f..3b99d153 100644
--- a/templates/electron-typescript-react/electron-builder.ts
+++ b/templates/electron-typescript-react/electron-builder.ts
@@ -35,10 +35,10 @@ export default {
         "dist-electron",
         "!node_modules/node-llama-cpp/bins/**/*",
         "node_modules/node-llama-cpp/bins/${os}-${arch}*/**/*",
-        "!node_modules/@node-llama-cpp/*/bins/**/*",
-        "node_modules/@node-llama-cpp/${os}-${arch}*/bins/**/*",
         "!node_modules/node-llama-cpp/llama/localBuilds/**/*",
-        "node_modules/node-llama-cpp/llama/localBuilds/${os}-${arch}*/**/*"
+        "node_modules/node-llama-cpp/llama/localBuilds/${os}-${arch}*/**/*",
+        "!node_modules/@node-llama-cpp/*/bins/**/*",
+        "node_modules/@node-llama-cpp/${os}-${arch}*/bins/**/*"
     ],
     asarUnpack: [
         "node_modules/node-llama-cpp/bins",
diff --git a/test/modelDependent/llama3.2/completion.test.ts b/test/modelDependent/llama3.2/completion.test.ts
new file mode 100644
index 00000000..5bb88834
--- /dev/null
+++ b/test/modelDependent/llama3.2/completion.test.ts
@@ -0,0 +1,25 @@
+import {describe, expect, test} from "vitest";
+import {LlamaChatSession, Llama3_2LightweightChatWrapper} from "../../../src/index.js";
+import {getModelFile} from "../../utils/modelFiles.js";
+import {getTestLlama} from "../../utils/getTestLlama.js";
+
+describe("llama 3.2", () => {
+    describe("chatSession", () => {
+        test("resolved to the correct chat wrapper", {timeout: 1000 * 60 * 60 * 2}, async () => {
+            const modelPath = await getModelFile("Llama-3.2-3B-Instruct.Q4_K_M.gguf");
+            const llama = await getTestLlama();
+
+            const model = await llama.loadModel({
+                modelPath
+            });
+            const context = await model.createContext({
+                contextSize: 4096
+            });
+            const chatSession = new LlamaChatSession({
+                contextSequence: context.getSequence()
+            });
+
+            expect(chatSession.chatWrapper).to.be.instanceof(Llama3_2LightweightChatWrapper);
+        });
+    });
+});
diff --git a/test/utils/modelFiles.ts b/test/utils/modelFiles.ts
index c4e2f9e7..94d54664 100644
--- a/test/utils/modelFiles.ts
+++ b/test/utils/modelFiles.ts
@@ -17,7 +17,8 @@ const supportedModels = {
     "Meta-Llama-3-8B-Instruct-Q4_K_M.gguf": "https://huggingface.co/bartowski/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf?download=true",
     "lora-Llama-3-Instruct-abliteration-LoRA-8B-f16.gguf": "https://huggingface.co/ngxson/test_gguf_lora_adapter/resolve/main/lora-Llama-3-Instruct-abliteration-LoRA-8B-f16.gguf?download=true",
     "Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf": "https://huggingface.co/mradermacher/Meta-Llama-3.1-8B-Instruct-GGUF/resolve/main/Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf?download=true",
-    "codegemma-2b-Q4_K_M.gguf": "https://huggingface.co/bartowski/codegemma-2b-GGUF/resolve/main/codegemma-2b-Q4_K_M.gguf?download=true"
+    "codegemma-2b-Q4_K_M.gguf": "https://huggingface.co/bartowski/codegemma-2b-GGUF/resolve/main/codegemma-2b-Q4_K_M.gguf?download=true",
+    "Llama-3.2-3B-Instruct.Q4_K_M.gguf": "https://huggingface.co/mradermacher/Llama-3.2-3B-Instruct-GGUF/resolve/main/Llama-3.2-3B-Instruct.Q4_K_M.gguf?download=true"
 } as const;
 
 export async function getModelFile(modelName: keyof typeof supportedModels) {