From 97abbca063f0ccf1b69607638d42f5ccc6ee1e2d Mon Sep 17 00:00:00 2001 From: "Gilad S." <7817232+giladgd@users.noreply.github.com> Date: Thu, 28 Nov 2024 01:03:52 +0200 Subject: [PATCH] feat: Llama 3.2 3B function calling support (#386) * feat: Llama 3.2 3B function calling support * feat: use `llama.cpp` backend registry for GPUs instead of custom implementations * feat(`getLlama`): `build: "try"` option * feat(`init` command): `--model` flag * fix: improve binary compatibility testing on Electron apps * fix: too many abort signal listeners * fix: log level of some lower level logs * fix: context window missing response during generation on specific extreme conditions * fix: adapt to breaking `llama.cpp` changes * fix: automatically resolve `compiler is out of heap space` CUDA build error * chore: update bug report template * docs: separate open source and proprietary sections in the awesome list * docs(troubleshooting): Electron build error on Windows * docs(Electron): GitHub Actions template for cross-compilation --- .github/ISSUE_TEMPLATE/bug-report.yml | 10 +- .github/workflows/build.yml | 25 +- .gitignore | 3 + .vitepress/assets/ogTemplate.svg | 10 +- .vitepress/config.ts | 21 +- docs/guide/awesome.md | 9 +- docs/guide/electron.md | 59 +- docs/guide/troubleshooting.md | 7 + docs/public/robots.txt | 1 + llama/CMakeLists.txt | 100 +-- llama/addon/addon.cpp | 17 + llama/addon/globals/getGpuInfo.cpp | 106 +-- llama/gpuInfo/cuda-gpu-info.cu | 120 ---- llama/gpuInfo/cuda-gpu-info.h | 10 - llama/gpuInfo/metal-gpu-info.h | 8 - llama/gpuInfo/metal-gpu-info.mm | 37 - llama/gpuInfo/vulkan-gpu-info.cpp | 8 - llama/gpuInfo/vulkan-gpu-info.h | 1 - package-lock.json | 649 ++++++++++++++++++ package.json | 1 + src/bindings/AddonTypes.ts | 3 +- src/bindings/Llama.ts | 22 +- src/bindings/getLlama.ts | 48 +- src/bindings/utils/compileLLamaCpp.ts | 58 +- src/bindings/utils/testBindingBinary.ts | 198 ++++-- src/chatWrappers/Llama3_1ChatWrapper.ts | 5 +- .../Llama3_2LightweightChatWrapper.ts | 354 ++++++++++ ...hatModelFunctionsDocumentationGenerator.ts | 37 + src/chatWrappers/utils/getModelLinageNames.ts | 23 + .../utils/isLlama3_2LightweightModel.ts | 11 + src/chatWrappers/utils/resolveChatWrapper.ts | 53 +- src/cli/commands/ChatCommand.ts | 2 +- src/cli/commands/InitCommand.ts | 47 +- src/cli/recommendedModels.ts | 12 + src/evaluator/LlamaChat/LlamaChat.ts | 48 +- .../LlamaChatSession/LlamaChatSession.ts | 323 ++++----- src/index.ts | 2 + src/utils/includesText.ts | 18 + src/utils/wrapAbortSignal.ts | 17 +- templates/README.md | 6 + .../electron-builder.ts | 6 +- .../llama3.2/completion.test.ts | 25 + test/utils/modelFiles.ts | 3 +- 43 files changed, 1890 insertions(+), 633 deletions(-) delete mode 100644 llama/gpuInfo/cuda-gpu-info.cu delete mode 100644 llama/gpuInfo/cuda-gpu-info.h delete mode 100644 llama/gpuInfo/metal-gpu-info.h delete mode 100644 llama/gpuInfo/metal-gpu-info.mm create mode 100644 src/chatWrappers/Llama3_2LightweightChatWrapper.ts create mode 100644 src/chatWrappers/utils/getModelLinageNames.ts create mode 100644 src/chatWrappers/utils/isLlama3_2LightweightModel.ts create mode 100644 src/utils/includesText.ts create mode 100644 templates/README.md create mode 100644 test/modelDependent/llama3.2/completion.test.ts diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml index 4e2032b4..40b0f4a0 100644 --- a/.github/ISSUE_TEMPLATE/bug-report.yml +++ b/.github/ISSUE_TEMPLATE/bug-report.yml @@ -27,8 +27,9 @@ body: attributes: label: Actual Behavior description: >- - A clear and concise description of what actually happened. Please wrap - any error messages or output in code tags, instead of images. + A clear and concise description of what actually happened. + Please wrap any error messages or output in code tags, instead of images. + Please attach logs if relevant. validations: required: true - type: textarea @@ -60,6 +61,11 @@ body: | Node.js version | x.y.zzz | | Typescript version | x.y.zzz | | `node-llama-cpp` version | x.y.zzz | + + `npx --yes node-llama-cpp inspect gpu` output: + ``` + Result of running `npx --yes node-llama-cpp inspect gpu` + ``` validations: required: true - type: textarea diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 8c5270ea..c574af59 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -65,9 +65,12 @@ jobs: - name: "Ubuntu" os: ubuntu-22.04 artifact: "linux" - - name: "macOS" + - name: "macOS x64" os: macos-13 - artifact: "mac" + artifact: "mac-x64" + - name: "macOS arm64" + os: macos-14 + artifact: "mac-arm64" steps: - uses: actions/checkout@v4 @@ -87,11 +90,18 @@ jobs: name: llama.cpp path: llama - - name: Install dependencies on Windows - if: startsWith(matrix.config.os, 'windows') + - name: Install dependencies on Windows for x64 + if: matrix.config.name == 'Windows for x64' run: | choco install ninja cmake + - name: Install dependencies on Windows + if: matrix.config.name == 'Windows for Arm' + run: | + choco install cmake.install --version=3.31.1 + choco install cmake --version=3.31.1 + choco install ninja + - name: Install dependencies on Ubuntu if: matrix.config.name == 'Ubuntu' run: | @@ -148,7 +158,7 @@ jobs: - name: Setup & Build id: build shell: bash - timeout-minutes: 200 + timeout-minutes: 300 env: ARTIFACT_NAME: ${{ matrix.config.artifact }} run: | @@ -212,9 +222,10 @@ jobs: await buildBinary("x64", ["--gpu", "vulkan"]); await buildBinary("arm64", ["--gpu", "false"]); await buildBinary("armv7l", ["--gpu", "false"]); - } else if (process.env.ARTIFACT_NAME === "mac") { - await buildBinary("arm64", ["--gpu", "metal"]); + } else if (process.env.ARTIFACT_NAME === "mac-x64") { await buildBinary("x64", ["--gpu", "false"]); + } else if (process.env.ARTIFACT_NAME === "mac-arm64") { + await buildBinary("arm64", ["--gpu", "metal"]); } // move binaries to bins diff --git a/.gitignore b/.gitignore index 69ccf614..8eda1fd0 100644 --- a/.gitignore +++ b/.gitignore @@ -24,7 +24,10 @@ node_modules /llama/lastBuild.json /llama/gitRelease.bundle /llama/.temp +/llama/.cache +/llama/build /llama/.idea +/llama/.vscode /llama/cmake-build-debug /llama/localBuilds /llama/Release diff --git a/.vitepress/assets/ogTemplate.svg b/.vitepress/assets/ogTemplate.svg index 53673b64..95f55af1 100644 --- a/.vitepress/assets/ogTemplate.svg +++ b/.vitepress/assets/ogTemplate.svg @@ -75,12 +75,12 @@ - node-llama-cpp + node-llama-cpp - {{category}} - {{line1}} - {{line2}} - {{line3}} + {{category}} + {{line1}} + {{line2}} + {{line3}} diff --git a/.vitepress/config.ts b/.vitepress/config.ts index 433fa506..b4bd72d4 100644 --- a/.vitepress/config.ts +++ b/.vitepress/config.ts @@ -122,12 +122,22 @@ export default defineConfig({ return items .map((item) => { - if (item.url === "" || item.url === "blog/") { + if (item.url === "") { + item.lastmod = undefined; + item.changefreq = "daily"; + item.priority = 1; + } else if (item.url === "blog/") { item.lastmod = new Date(buildDate); + item.changefreq = "daily"; + item.priority = 0.9; } else if (item.url.startsWith("api/") || item.url.startsWith("cli/")) { item = { ...item, - lastmod: new Date(buildDate) + lastmod: new Date(buildDate), + changefreq: "weekly", + priority: item.url.startsWith("cli/") + ? 0.7 + : 0.5 }; } else if (item.lastmod == null && item.url.startsWith("blog/")) { const postDate = blogPostMap.get(item.url)?.frontmatter.date; @@ -138,6 +148,13 @@ export default defineConfig({ } } else if (item.lastmod == null) { item.lastmod = new Date(buildDate); + item.changefreq = "weekly"; + item.priority = 0.4; + } + + if (item.url !== "blog/" && item.url.startsWith("blog/")) { + item.priority = 0.8; + item.changefreq = "hourly"; } return item; diff --git a/docs/guide/awesome.md b/docs/guide/awesome.md index 1632b809..1e8df827 100644 --- a/docs/guide/awesome.md +++ b/docs/guide/awesome.md @@ -1,10 +1,15 @@ # Awesome `node-llama-cpp` Awesome projects that use `node-llama-cpp`. ---- - +## Open Source * [CatAI](https://github.com/withcatai/catai) - a simplified AI assistant API for Node.js, with REST API support +## Proprietary +> List your project here! + + +
+ --- > To have a project listed here, it should clearly state that it uses `node-llama-cpp`. diff --git a/docs/guide/electron.md b/docs/guide/electron.md index dc1fc2aa..9fe74e0c 100644 --- a/docs/guide/electron.md +++ b/docs/guide/electron.md @@ -34,10 +34,67 @@ You also need to call [`getLlama`](../api/functions/getLlama.md) with the CMake so that `node-llama-cpp` can find them. ## Cross Compilation -Cross packaging from one platform to another is not supported, since binaries for other platforms are not downloaded to you machine when your run `npm install`. +Cross packaging from one platform to another is not supported, since binaries for other platforms are not downloaded to your machine when you run `npm install`. Packaging an `arm64` app on an `x64` machine is supported, but packaging an `x64` app on an `arm64` machine is not. +::: details GitHub Actions template for cross-compilation + + + +```yml +name: Build +on: [push] + +jobs: + build-electron: + name: Build Electron app - ${{ matrix.config.name }} + runs-on: ${{ matrix.config.os }} + strategy: + fail-fast: false + matrix: + config: + - name: "Windows" + os: windows-2022 + - name: "Ubuntu" + os: ubuntu-22.04 + - name: "macOS" + os: macos-13 + + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: + node-version: "20" + + - name: Install dependencies on Ubuntu + if: matrix.config.name == 'Ubuntu' + run: | + sudo apt-get update + sudo apt-get install libarchive-tools rpm + sudo snap install snapcraft --classic + + - name: Install modules + run: npm ci + + - name: Build electron app + id: build + shell: bash + timeout-minutes: 480 + run: npm run build + + - name: Upload artifacts + uses: actions/upload-artifact@v4 + with: + include-hidden-files: true + name: "electron-app-${{ matrix.config.name }}" + path: "./release" +``` + + + +::: + ## Bundling When bundling your code for Electron using [Electron Vite](https://electron-vite.org) or Webpack, ensure that `node-llama-cpp` is not bundled, and is instead treated as an external module. diff --git a/docs/guide/troubleshooting.md b/docs/guide/troubleshooting.md index 304da350..d3bfbe8c 100644 --- a/docs/guide/troubleshooting.md +++ b/docs/guide/troubleshooting.md @@ -156,3 +156,10 @@ please [open a new issue on GitHub](https://github.com/withcatai/node-llama-cpp/ The common cause for this issue is when using the `Administrator` to run `npm install` and then trying to run the code with a different user. Ensure you're not using the `Administrator` user for `npm install` nor to run the code. + +## Getting an `EPERM: operation not permitted` Error on a Windows Machine When Building an Electron App +`electron-builder` needs to create symlinks to perform the build process, which requires enabling Developer Mode on Windows. + +To do that, go to `Settings > Update & Security > For developers` and enable `Developer mode`. + +After that, delete the `.cache` folder under your user directory and try building the app again. diff --git a/docs/public/robots.txt b/docs/public/robots.txt index b9756169..6ba151d4 100644 --- a/docs/public/robots.txt +++ b/docs/public/robots.txt @@ -1,2 +1,3 @@ User-agent: * + Sitemap: https://node-llama-cpp.withcat.ai/sitemap.xml diff --git a/llama/CMakeLists.txt b/llama/CMakeLists.txt index 4340c82c..08c7a86b 100644 --- a/llama/CMakeLists.txt +++ b/llama/CMakeLists.txt @@ -28,6 +28,16 @@ if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "Ap add_compile_options(-Wno-c++17-extensions) endif() +if(APPLE) + set(CMAKE_SKIP_BUILD_RPATH FALSE) + set(CMAKE_INSTALL_RPATH_USE_LINK_PATH FALSE) + set(CMAKE_BUILD_RPATH "@loader_path") + set(CMAKE_INSTALL_RPATH "@loader_path") + set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE) +else() + set(CMAKE_BUILD_RPATH_USE_ORIGIN ON) +endif() + include_directories(${NODE_ADDON_API_DIR} ${CMAKE_JS_INC}) add_subdirectory("llama.cpp") @@ -39,41 +49,6 @@ unset(GPU_INFO_HEADERS) unset(GPU_INFO_SOURCES) unset(GPU_INFO_EXTRA_LIBS) -if (GGML_CUDA) - cmake_minimum_required(VERSION 3.17) - - find_package(CUDAToolkit) - if (CUDAToolkit_FOUND) - message(STATUS "Using CUDA for GPU info") - - enable_language(CUDA) - - list(APPEND GPU_INFO_HEADERS gpuInfo/cuda-gpu-info.h) - list(APPEND GPU_INFO_SOURCES gpuInfo/cuda-gpu-info.cu) - - add_compile_definitions(GPU_INFO_USE_CUDA) - - if (GGML_STATIC) - list(APPEND GPU_INFO_EXTRA_LIBS CUDA::cudart_static) - else() - list(APPEND GPU_INFO_EXTRA_LIBS CUDA::cudart) - endif() - - list(APPEND GPU_INFO_EXTRA_LIBS CUDA::cuda_driver) - - if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES) - # copied from llama.cpp/CMakLists.txt under "if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES)" - if (GGML_CUDA_F16 OR GGML_CUDA_DMMV_F16) - set(CMAKE_CUDA_ARCHITECTURES "60;61;70;75") - else() - set(CMAKE_CUDA_ARCHITECTURES "52;61;70;75") - endif() - endif() - else() - message(FATAL_ERROR "CUDA was not found") - endif() -endif() - if (GGML_VULKAN OR GGML_KOMPUTE) find_package(Vulkan) if (Vulkan_FOUND) @@ -94,67 +69,12 @@ if (GGML_VULKAN OR GGML_KOMPUTE) endif() endif() -if (GGML_HIPBLAS) - list(APPEND CMAKE_PREFIX_PATH /opt/rocm) - - if (NOT ${CMAKE_C_COMPILER_ID} MATCHES "Clang") - message(WARNING "Only LLVM is supported for HIP, hint: CC=/opt/rocm/llvm/bin/clang") - endif() - if (NOT ${CMAKE_CXX_COMPILER_ID} MATCHES "Clang") - message(WARNING "Only LLVM is supported for HIP, hint: CXX=/opt/rocm/llvm/bin/clang++") - endif() - - find_package(hip) - find_package(hipblas) - find_package(rocblas) - - if (${hipblas_FOUND} AND ${hip_FOUND}) - message(STATUS "Using HIP and hipBLAS for GPU info") - add_compile_definitions(GPU_INFO_USE_HIPBLAS GPU_INFO_USE_CUDA) - add_library(gpu-info-rocm OBJECT gpuInfo/cuda-gpu-info.cu gpuInfo/cuda-gpu-info.h) - set_source_files_properties(gpuInfo/cuda-gpu-info.cu PROPERTIES LANGUAGE CXX) - target_link_libraries(gpu-info-rocm PRIVATE hip::device PUBLIC hip::host roc::rocblas roc::hipblas) - - list(APPEND GPU_INFO_EXTRA_LIBS gpu-info-rocm) - else() - message(FATAL_ERROR "hipBLAS or HIP was not found. Try setting CMAKE_PREFIX_PATH=/opt/rocm") - endif() -endif() - -if (GGML_METAL) - find_library(FOUNDATION_LIBRARY Foundation REQUIRED) - find_library(METAL_FRAMEWORK Metal REQUIRED) - find_library(METALKIT_FRAMEWORK MetalKit REQUIRED) - - message(STATUS "Using Metal for GPU info") - list(APPEND GPU_INFO_HEADERS gpuInfo/metal-gpu-info.h) - list(APPEND GPU_INFO_SOURCES gpuInfo/metal-gpu-info.mm) - - add_compile_definitions(GPU_INFO_USE_METAL) - - list(APPEND GPU_INFO_EXTRA_LIBS - ${FOUNDATION_LIBRARY} - ${METAL_FRAMEWORK} - ${METALKIT_FRAMEWORK} - ) -endif() - list(REMOVE_DUPLICATES GPU_INFO_HEADERS) list(REMOVE_DUPLICATES GPU_INFO_SOURCES) list(REMOVE_DUPLICATES GPU_INFO_EXTRA_LIBS) file(GLOB SOURCE_FILES "addon/*.cpp" "addon/**/*.cpp" ${GPU_INFO_SOURCES}) -if(APPLE) - set(CMAKE_SKIP_BUILD_RPATH FALSE) - set(CMAKE_INSTALL_RPATH_USE_LINK_PATH FALSE) - set(CMAKE_BUILD_RPATH "@loader_path") - set(CMAKE_INSTALL_RPATH "@loader_path") - set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE) -else() - set(CMAKE_BUILD_RPATH_USE_ORIGIN ON) -endif() - add_library(${PROJECT_NAME} SHARED ${SOURCE_FILES} ${CMAKE_JS_SRC} ${GPU_INFO_HEADERS}) set_target_properties(${PROJECT_NAME} PROPERTIES PREFIX "" SUFFIX ".node") target_link_libraries(${PROJECT_NAME} ${CMAKE_JS_LIB}) diff --git a/llama/addon/addon.cpp b/llama/addon/addon.cpp index 5c2d1c52..7b014079 100644 --- a/llama/addon/addon.cpp +++ b/llama/addon/addon.cpp @@ -151,6 +151,22 @@ class AddonBackendUnloadWorker : public Napi::AsyncWorker { } }; +Napi::Value addonLoadBackends(const Napi::CallbackInfo& info) { + const bool forceLoadLibraries = info.Length() == 0 + ? false + : info[0].IsBoolean() + ? info[0].As().Value() + : false; + + ggml_backend_reg_count(); + + if (forceLoadLibraries) { + ggml_backend_load_all(); + } + + return info.Env().Undefined(); +} + Napi::Value addonInit(const Napi::CallbackInfo& info) { if (backendInitialized) { Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env()); @@ -205,6 +221,7 @@ Napi::Object registerCallback(Napi::Env env, Napi::Object exports) { Napi::PropertyDescriptor::Function("getGpuDeviceInfo", getGpuDeviceInfo), Napi::PropertyDescriptor::Function("getGpuType", getGpuType), Napi::PropertyDescriptor::Function("getSwapInfo", getSwapInfo), + Napi::PropertyDescriptor::Function("loadBackends", addonLoadBackends), Napi::PropertyDescriptor::Function("init", addonInit), Napi::PropertyDescriptor::Function("dispose", addonDispose), }); diff --git a/llama/addon/globals/getGpuInfo.cpp b/llama/addon/globals/getGpuInfo.cpp index ef51c1cd..cb15501f 100644 --- a/llama/addon/globals/getGpuInfo.cpp +++ b/llama/addon/globals/getGpuInfo.cpp @@ -1,22 +1,15 @@ #include "getGpuInfo.h" #include "addonLog.h" -#ifdef GPU_INFO_USE_CUDA -# include "../../gpuInfo/cuda-gpu-info.h" +#ifdef __APPLE__ + #include #endif + #ifdef GPU_INFO_USE_VULKAN # include "../../gpuInfo/vulkan-gpu-info.h" #endif -#ifdef GPU_INFO_USE_METAL -# include "../../gpuInfo/metal-gpu-info.h" -#endif -#ifdef GPU_INFO_USE_CUDA -void logCudaError(const char* message) { - addonLlamaCppLogCallback(GGML_LOG_LEVEL_ERROR, (std::string("CUDA error: ") + std::string(message)).c_str(), nullptr); -} -#endif #ifdef GPU_INFO_USE_VULKAN void logVulkanWarning(const char* message) { addonLlamaCppLogCallback(GGML_LOG_LEVEL_WARN, (std::string("Vulkan warning: ") + std::string(message)).c_str(), nullptr); @@ -24,20 +17,31 @@ void logVulkanWarning(const char* message) { #endif Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) { + ggml_backend_dev_t device = NULL; + size_t deviceTotal = 0; + size_t deviceFree = 0; + uint64_t total = 0; uint64_t used = 0; uint64_t unifiedVramSize = 0; -#ifdef GPU_INFO_USE_CUDA - size_t cudaDeviceTotal = 0; - size_t cudaDeviceUsed = 0; - bool cudeGetInfoSuccess = gpuInfoGetTotalCudaDevicesInfo(&cudaDeviceTotal, &cudaDeviceUsed, logCudaError); + for (size_t i = 0; i < ggml_backend_dev_count(); i++) { + device = ggml_backend_dev_get(i); + if (ggml_backend_dev_type(device) == GGML_BACKEND_DEVICE_TYPE_GPU) { + deviceTotal = 0; + deviceFree = 0; + ggml_backend_dev_memory(device, &deviceFree, &deviceTotal); - if (cudeGetInfoSuccess) { - total += cudaDeviceTotal; - used += cudaDeviceUsed; - } + total += deviceTotal; + used += deviceTotal - deviceFree; + +#if defined(__arm64__) || defined(__aarch64__) + if (std::string(ggml_backend_dev_name(device)) == "Metal") { + unifiedVramSize += deviceTotal; + } #endif + } + } #ifdef GPU_INFO_USE_VULKAN uint64_t vulkanDeviceTotal = 0; @@ -46,23 +50,15 @@ Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) { const bool vulkanDeviceSupportsMemoryBudgetExtension = gpuInfoGetTotalVulkanDevicesInfo(&vulkanDeviceTotal, &vulkanDeviceUsed, &vulkanDeviceUnifiedVramSize, logVulkanWarning); if (vulkanDeviceSupportsMemoryBudgetExtension) { - total += vulkanDeviceTotal; - used += vulkanDeviceUsed; + if (vulkanDeviceUnifiedVramSize > total) { + // this means that we counted memory from devices that aren't used by llama.cpp + vulkanDeviceUnifiedVramSize = 0; + } + unifiedVramSize += vulkanDeviceUnifiedVramSize; } #endif -#ifdef GPU_INFO_USE_METAL - uint64_t metalDeviceTotal = 0; - uint64_t metalDeviceUsed = 0; - uint64_t metalDeviceUnifiedVramSize = 0; - getMetalGpuInfo(&metalDeviceTotal, &metalDeviceUsed, &metalDeviceUnifiedVramSize); - - total += metalDeviceTotal; - used += metalDeviceUsed; - unifiedVramSize += metalDeviceUnifiedVramSize; -#endif - Napi::Object result = Napi::Object::New(info.Env()); result.Set("total", Napi::Number::From(info.Env(), total)); result.Set("used", Napi::Number::From(info.Env(), used)); @@ -74,17 +70,13 @@ Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) { Napi::Value getGpuDeviceInfo(const Napi::CallbackInfo& info) { std::vector deviceNames; -#ifdef GPU_INFO_USE_CUDA - gpuInfoGetCudaDeviceNames(&deviceNames, logCudaError); -#endif + for (size_t i = 0; i < ggml_backend_dev_count(); i++) { + ggml_backend_dev_t device = ggml_backend_dev_get(i); + if (ggml_backend_dev_type(device) == GGML_BACKEND_DEVICE_TYPE_GPU) { -#ifdef GPU_INFO_USE_VULKAN - gpuInfoGetVulkanDeviceNames(&deviceNames, logVulkanWarning); -#endif - -#ifdef GPU_INFO_USE_METAL - getMetalGpuDeviceNames(&deviceNames); -#endif + deviceNames.push_back(std::string(ggml_backend_dev_description(device))); + } + } Napi::Object result = Napi::Object::New(info.Env()); @@ -98,17 +90,27 @@ Napi::Value getGpuDeviceInfo(const Napi::CallbackInfo& info) { } Napi::Value getGpuType(const Napi::CallbackInfo& info) { -#ifdef GPU_INFO_USE_CUDA - return Napi::String::New(info.Env(), "cuda"); -#endif - -#ifdef GPU_INFO_USE_VULKAN - return Napi::String::New(info.Env(), "vulkan"); -#endif + for (size_t i = 0; i < ggml_backend_dev_count(); i++) { + ggml_backend_dev_t device = ggml_backend_dev_get(i); + const auto deviceName = std::string(ggml_backend_dev_name(device)); + + if (deviceName == "Metal") { + return Napi::String::New(info.Env(), "metal"); + } else if (std::string(deviceName).find("Vulkan") == 0) { + return Napi::String::New(info.Env(), "vulkan"); + } else if (std::string(deviceName).find("CUDA") == 0 || std::string(deviceName).find("ROCm") == 0 || std::string(deviceName).find("MUSA") == 0) { + return Napi::String::New(info.Env(), "cuda"); + } + } -#ifdef GPU_INFO_USE_METAL - return Napi::String::New(info.Env(), "metal"); -#endif + for (size_t i = 0; i < ggml_backend_dev_count(); i++) { + ggml_backend_dev_t device = ggml_backend_dev_get(i); + const auto deviceName = std::string(ggml_backend_dev_name(device)); + + if (deviceName == "CPU") { + return Napi::Boolean::New(info.Env(), false); + } + } return info.Env().Undefined(); -} \ No newline at end of file +} diff --git a/llama/gpuInfo/cuda-gpu-info.cu b/llama/gpuInfo/cuda-gpu-info.cu deleted file mode 100644 index 1559fc0b..00000000 --- a/llama/gpuInfo/cuda-gpu-info.cu +++ /dev/null @@ -1,120 +0,0 @@ -#include -#include -#include - -#if defined(GPU_INFO_USE_HIPBLAS) -#include -#include -#define cudaGetDevice hipGetDevice -#define cudaGetDeviceCount hipGetDeviceCount -#define cudaGetErrorString hipGetErrorString -#define cudaMemGetInfo hipMemGetInfo -#define cudaSetDevice hipSetDevice -#define cudaSuccess hipSuccess -#else -#include -#include -#endif - - -typedef void (*gpuInfoCudaErrorLogCallback_t)(const char* message); - -bool gpuInfoSetCudaDevice(const int device, gpuInfoCudaErrorLogCallback_t errorLogCallback) { - int current_device; - auto getDeviceResult = cudaGetDevice(¤t_device); - - if (getDeviceResult != cudaSuccess) { - errorLogCallback(cudaGetErrorString(getDeviceResult)); - return false; - } - - if (device == current_device) { - return true; - } - - const auto setDeviceResult = cudaSetDevice(device); - - if (setDeviceResult != cudaSuccess) { - errorLogCallback(cudaGetErrorString(setDeviceResult)); - return false; - } - - return true; -} - -bool gpuInfoGetCudaDeviceInfo(int device, size_t * total, size_t * used, gpuInfoCudaErrorLogCallback_t errorLogCallback) { - gpuInfoSetCudaDevice(device, errorLogCallback); - - size_t freeMem; - size_t totalMem; - auto getMemInfoResult = cudaMemGetInfo(&freeMem, &totalMem); - - if (getMemInfoResult != cudaSuccess) { - errorLogCallback(cudaGetErrorString(getMemInfoResult)); - return false; - } - - *total = totalMem; - *used = totalMem - freeMem; - - return true; -} - -int gpuInfoGetCudaDeviceCount(gpuInfoCudaErrorLogCallback_t errorLogCallback) { - int deviceCount; - auto getDeviceCountResult = cudaGetDeviceCount(&deviceCount); - - if (getDeviceCountResult != cudaSuccess) { - errorLogCallback(cudaGetErrorString(getDeviceCountResult)); - return -1; - } - - return deviceCount; -} - -bool gpuInfoGetTotalCudaDevicesInfo(size_t * total, size_t * used, gpuInfoCudaErrorLogCallback_t errorLogCallback) { - int deviceCount = gpuInfoGetCudaDeviceCount(errorLogCallback); - - if (deviceCount < 0) { - return false; - } - - size_t usedMem = 0; - size_t totalMem = 0; - - for (int i = 0; i < deviceCount; i++) { - size_t deviceUsedMem; - size_t deviceTotalMem; - - if (!gpuInfoGetCudaDeviceInfo(i, &deviceTotalMem, &deviceUsedMem, errorLogCallback)) { - return false; - } - - usedMem += deviceUsedMem; - totalMem += deviceTotalMem; - } - - *total = totalMem; - *used = usedMem; - - return true; -} - -void gpuInfoGetCudaDeviceNames(std::vector * deviceNames, gpuInfoCudaErrorLogCallback_t errorLogCallback) { - int deviceCount = gpuInfoGetCudaDeviceCount(errorLogCallback); - - if (deviceCount < 0) { - return; - } - - for (int i = 0; i < deviceCount; i++) { - cudaDeviceProp prop; - auto getDevicePropertiesResult = cudaGetDeviceProperties(&prop, i); - - if (getDevicePropertiesResult != cudaSuccess) { - errorLogCallback(cudaGetErrorString(getDevicePropertiesResult)); - } else { - (*deviceNames).push_back(std::string(prop.name)); - } - } -} diff --git a/llama/gpuInfo/cuda-gpu-info.h b/llama/gpuInfo/cuda-gpu-info.h deleted file mode 100644 index e77b6f29..00000000 --- a/llama/gpuInfo/cuda-gpu-info.h +++ /dev/null @@ -1,10 +0,0 @@ -#pragma once - -#include -#include -#include - -typedef void (*gpuInfoCudaErrorLogCallback_t)(const char* message); - -bool gpuInfoGetTotalCudaDevicesInfo(size_t * total, size_t * used, gpuInfoCudaErrorLogCallback_t errorLogCallback); -void gpuInfoGetCudaDeviceNames(std::vector * deviceNames, gpuInfoCudaErrorLogCallback_t errorLogCallback); diff --git a/llama/gpuInfo/metal-gpu-info.h b/llama/gpuInfo/metal-gpu-info.h deleted file mode 100644 index 9a199bee..00000000 --- a/llama/gpuInfo/metal-gpu-info.h +++ /dev/null @@ -1,8 +0,0 @@ -#pragma once - -#include -#include -#include - -void getMetalGpuInfo(uint64_t * total, uint64_t * used, uint64_t * unifiedMemorySize); -void getMetalGpuDeviceNames(std::vector * deviceNames); \ No newline at end of file diff --git a/llama/gpuInfo/metal-gpu-info.mm b/llama/gpuInfo/metal-gpu-info.mm deleted file mode 100644 index 46ac0b18..00000000 --- a/llama/gpuInfo/metal-gpu-info.mm +++ /dev/null @@ -1,37 +0,0 @@ -#include -#include -#include -#import - -void getMetalGpuInfo(uint64_t * total, uint64_t * used, uint64_t * unifiedMemorySize) { - id device = MTLCreateSystemDefaultDevice(); - - if (device) { - *total = device.recommendedMaxWorkingSetSize; - *used = device.currentAllocatedSize; - - if (device.hasUnifiedMemory) { - *unifiedMemorySize = device.recommendedMaxWorkingSetSize; - } else { - *unifiedMemorySize = 0; - } - } else { - *total = 0; - *used = 0; - *unifiedMemorySize = 0; - } - - [device release]; - device = nil; -} - -void getMetalGpuDeviceNames(std::vector * deviceNames) { - NSArray> *devices = MTLCopyAllDevices(); - - for (id device in devices) { - (*deviceNames).push_back(std::string(([NSString stringWithUTF8String:device.name.UTF8String]).UTF8String)); - } - - [devices release]; - devices = nil; -} diff --git a/llama/gpuInfo/vulkan-gpu-info.cpp b/llama/gpuInfo/vulkan-gpu-info.cpp index 25356546..b47f92a8 100644 --- a/llama/gpuInfo/vulkan-gpu-info.cpp +++ b/llama/gpuInfo/vulkan-gpu-info.cpp @@ -80,11 +80,3 @@ static bool enumerateVulkanDevices(size_t* total, size_t* used, size_t* unifiedM bool gpuInfoGetTotalVulkanDevicesInfo(size_t* total, size_t* used, size_t* unifiedMemorySize, gpuInfoVulkanWarningLogCallback_t warningLogCallback) { return enumerateVulkanDevices(total, used, unifiedMemorySize, false, nullptr, warningLogCallback); } - -bool gpuInfoGetVulkanDeviceNames(std::vector * deviceNames, gpuInfoVulkanWarningLogCallback_t warningLogCallback) { - size_t vulkanDeviceTotal = 0; - size_t vulkanDeviceUsed = 0; - size_t unifiedMemorySize = 0; - - return enumerateVulkanDevices(&vulkanDeviceTotal, &vulkanDeviceUsed, &unifiedMemorySize, true, deviceNames, warningLogCallback); -} diff --git a/llama/gpuInfo/vulkan-gpu-info.h b/llama/gpuInfo/vulkan-gpu-info.h index f8eb0527..af03026e 100644 --- a/llama/gpuInfo/vulkan-gpu-info.h +++ b/llama/gpuInfo/vulkan-gpu-info.h @@ -6,4 +6,3 @@ typedef void (*gpuInfoVulkanWarningLogCallback_t)(const char* message); bool gpuInfoGetTotalVulkanDevicesInfo(size_t* total, size_t* used, size_t* unifiedMemorySize, gpuInfoVulkanWarningLogCallback_t warningLogCallback); -bool gpuInfoGetVulkanDeviceNames(std::vector * deviceNames, gpuInfoVulkanWarningLogCallback_t warningLogCallback); \ No newline at end of file diff --git a/package-lock.json b/package-lock.json index 6db1793e..84b212d3 100644 --- a/package-lock.json +++ b/package-lock.json @@ -70,6 +70,7 @@ "@types/yargs": "^17.0.33", "@vitest/coverage-v8": "^2.1.4", "@vitest/ui": "^2.1.4", + "electron": "^33.2.0", "eslint": "^9.13.0", "eslint-import-resolver-typescript": "^3.6.3", "eslint-plugin-import": "^2.31.0", @@ -933,6 +934,73 @@ } } }, + "node_modules/@electron/get": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/@electron/get/-/get-2.0.3.tgz", + "integrity": "sha512-Qkzpg2s9GnVV2I2BjRksUi43U5e6+zaQMcjoJy0C+C5oxaKl+fmckGDQFtRpZpZV0NQekuZZ+tGz7EA9TVnQtQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "debug": "^4.1.1", + "env-paths": "^2.2.0", + "fs-extra": "^8.1.0", + "got": "^11.8.5", + "progress": "^2.0.3", + "semver": "^6.2.0", + "sumchecker": "^3.0.1" + }, + "engines": { + "node": ">=12" + }, + "optionalDependencies": { + "global-agent": "^3.0.0" + } + }, + "node_modules/@electron/get/node_modules/fs-extra": { + "version": "8.1.0", + "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-8.1.0.tgz", + "integrity": "sha512-yhlQgA6mnOJUKOsRUFsgJdQCvkKhcz8tlZG5HBQfReYZy46OwLcY+Zia0mtdHsOo9y/hP+CxMN0TU9QxoOtG4g==", + "dev": true, + "license": "MIT", + "dependencies": { + "graceful-fs": "^4.2.0", + "jsonfile": "^4.0.0", + "universalify": "^0.1.0" + }, + "engines": { + "node": ">=6 <7 || >=8" + } + }, + "node_modules/@electron/get/node_modules/jsonfile": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/jsonfile/-/jsonfile-4.0.0.tgz", + "integrity": "sha512-m6F1R3z8jjlf2imQHS2Qez5sjKWQzbuuhuJ/FKYFRZvPE3PuHcSMVZzfsLhGVOkfd20obL5SWEBew5ShlquNxg==", + "dev": true, + "license": "MIT", + "optionalDependencies": { + "graceful-fs": "^4.1.6" + } + }, + "node_modules/@electron/get/node_modules/semver": { + "version": "6.3.1", + "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.1.tgz", + "integrity": "sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==", + "dev": true, + "license": "ISC", + "bin": { + "semver": "bin/semver.js" + } + }, + "node_modules/@electron/get/node_modules/universalify": { + "version": "0.1.2", + "resolved": "https://registry.npmjs.org/universalify/-/universalify-0.1.2.tgz", + "integrity": "sha512-rBJeI5CXAlmy1pV+617WB9J63U6XcazHHF2f2dbJix4XzpUF0RS3Zbj0FGIOCAva5P/d/GBOYaACQ1w+0azUkg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 4.0.0" + } + }, "node_modules/@emnapi/runtime": { "version": "1.2.0", "resolved": "https://registry.npmjs.org/@emnapi/runtime/-/runtime-1.2.0.tgz", @@ -4408,6 +4476,19 @@ "url": "https://github.com/sponsors/jonschlinkert" } }, + "node_modules/@szmarczak/http-timer": { + "version": "4.0.6", + "resolved": "https://registry.npmjs.org/@szmarczak/http-timer/-/http-timer-4.0.6.tgz", + "integrity": "sha512-4BAffykYOgO+5nzBWYwE3W90sBgLJoUPRWWcL8wlyiM8IB8ipJz3UMJ9KXQd1RKQXpKp8Tutn80HZtWsu2u76w==", + "dev": true, + "license": "MIT", + "dependencies": { + "defer-to-connect": "^2.0.0" + }, + "engines": { + "node": ">=10" + } + }, "node_modules/@tinyhttp/content-disposition": { "version": "2.2.0", "resolved": "https://registry.npmjs.org/@tinyhttp/content-disposition/-/content-disposition-2.2.0.tgz", @@ -4441,6 +4522,19 @@ "integrity": "sha512-A0uYgOj3zNc4hNjHc5lYUfJQ/HVyBXiUMKdXd7ysclaE6k9oJdavQzODHuwjpUu2/boCP8afjQYi8z/GtvNCWA==", "dev": true }, + "node_modules/@types/cacheable-request": { + "version": "6.0.3", + "resolved": "https://registry.npmjs.org/@types/cacheable-request/-/cacheable-request-6.0.3.tgz", + "integrity": "sha512-IQ3EbTzGxIigb1I3qPZc1rWJnH0BmSKv5QYTalEwweFvyBDLSAe24zP0le/hyi7ecGfZVlIVAg4BZqb8WBwKqw==", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/http-cache-semantics": "*", + "@types/keyv": "^3.1.4", + "@types/node": "*", + "@types/responselike": "^1.0.0" + } + }, "node_modules/@types/conventional-commits-parser": { "version": "5.0.0", "resolved": "https://registry.npmjs.org/@types/conventional-commits-parser/-/conventional-commits-parser-5.0.0.tgz", @@ -4495,6 +4589,13 @@ "@types/unist": "*" } }, + "node_modules/@types/http-cache-semantics": { + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/@types/http-cache-semantics/-/http-cache-semantics-4.0.4.tgz", + "integrity": "sha512-1m0bIFVc7eJWyve9S0RnuRgcQqF/Xd5QsUZAZeQFr1Q3/p9JWoQQEqmVy+DPTNpGXwhgIetAoYF8JSc33q29QA==", + "dev": true, + "license": "MIT" + }, "node_modules/@types/json-schema": { "version": "7.0.15", "resolved": "https://registry.npmjs.org/@types/json-schema/-/json-schema-7.0.15.tgz", @@ -4517,6 +4618,16 @@ "@types/node": "*" } }, + "node_modules/@types/keyv": { + "version": "3.1.4", + "resolved": "https://registry.npmjs.org/@types/keyv/-/keyv-3.1.4.tgz", + "integrity": "sha512-BQ5aZNSCpj7D6K2ksrRCTmKRLEpnPvWDiLPfoGyhZ++8YtiK9d/3DBKPJgry359X/P1PfruyYwvnvwFjuEiEIg==", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/node": "*" + } + }, "node_modules/@types/linkify-it": { "version": "5.0.0", "resolved": "https://registry.npmjs.org/@types/linkify-it/-/linkify-it-5.0.0.tgz", @@ -4582,6 +4693,16 @@ "@types/retry": "*" } }, + "node_modules/@types/responselike": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/@types/responselike/-/responselike-1.0.3.tgz", + "integrity": "sha512-H/+L+UkTV33uf49PH5pCAUBVPNj2nDBXTN+qS1dOwyyg24l3CcicicCA7ca+HMvJBZcFgl5r8e+RR6elsb4Lyw==", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/node": "*" + } + }, "node_modules/@types/retry": { "version": "0.12.5", "resolved": "https://registry.npmjs.org/@types/retry/-/retry-0.12.5.tgz", @@ -4635,6 +4756,17 @@ "integrity": "sha512-I4q9QU9MQv4oEOz4tAHJtNz1cwuLxn2F3xcc2iV5WdqLPpUnj30aUuxt1mAxYTG+oe8CZMV/+6rU4S4gRDzqtQ==", "dev": true }, + "node_modules/@types/yauzl": { + "version": "2.10.3", + "resolved": "https://registry.npmjs.org/@types/yauzl/-/yauzl-2.10.3.tgz", + "integrity": "sha512-oJoftv0LSuaDZE3Le4DbKX+KS9G36NzOeSap90UIK0yMA/NhKJhqlSGtNDORNRaIbQfzjXDrQa0ytJ6mNRGz/Q==", + "dev": true, + "license": "MIT", + "optional": true, + "dependencies": { + "@types/node": "*" + } + }, "node_modules/@typescript-eslint/eslint-plugin": { "version": "8.12.2", "resolved": "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-8.12.2.tgz", @@ -5848,6 +5980,15 @@ "dev": true, "license": "ISC" }, + "node_modules/boolean": { + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/boolean/-/boolean-3.2.0.tgz", + "integrity": "sha512-d0II/GO9uf9lfUHH2BQsjxzRJZBdsjgsBiW4BvhWk/3qoKwQFjIDVN19PfX8F2D/r9PCMTtLWjYVCFrpeYUzsw==", + "deprecated": "Package no longer supported. Contact Support at https://www.npmjs.com/support for more info.", + "dev": true, + "license": "MIT", + "optional": true + }, "node_modules/bottleneck": { "version": "2.19.5", "resolved": "https://registry.npmjs.org/bottleneck/-/bottleneck-2.19.5.tgz", @@ -5874,6 +6015,16 @@ "node": ">=8" } }, + "node_modules/buffer-crc32": { + "version": "0.2.13", + "resolved": "https://registry.npmjs.org/buffer-crc32/-/buffer-crc32-0.2.13.tgz", + "integrity": "sha512-VO9Ht/+p3SN7SKWqcrgEzjGbRSJYTx+Q1pTQC0wrWqHx0vpJraQ6GtHx8tvcg1rlK1byhU5gccxgOgj7B0TDkQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": "*" + } + }, "node_modules/bytes": { "version": "3.1.2", "resolved": "https://registry.npmjs.org/bytes/-/bytes-3.1.2.tgz", @@ -5892,6 +6043,64 @@ "node": ">=8" } }, + "node_modules/cacheable-lookup": { + "version": "5.0.4", + "resolved": "https://registry.npmjs.org/cacheable-lookup/-/cacheable-lookup-5.0.4.tgz", + "integrity": "sha512-2/kNscPhpcxrOigMZzbiWF7dz8ilhb/nIHU3EyZiXWXpeq/au8qJ8VhdftMkty3n7Gj6HIGalQG8oiBNB3AJgA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=10.6.0" + } + }, + "node_modules/cacheable-request": { + "version": "7.0.4", + "resolved": "https://registry.npmjs.org/cacheable-request/-/cacheable-request-7.0.4.tgz", + "integrity": "sha512-v+p6ongsrp0yTGbJXjgxPow2+DL93DASP4kXCDKb8/bwRtt9OEF3whggkkDkGNzgcWy2XaF4a8nZglC7uElscg==", + "dev": true, + "license": "MIT", + "dependencies": { + "clone-response": "^1.0.2", + "get-stream": "^5.1.0", + "http-cache-semantics": "^4.0.0", + "keyv": "^4.0.0", + "lowercase-keys": "^2.0.0", + "normalize-url": "^6.0.1", + "responselike": "^2.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/cacheable-request/node_modules/get-stream": { + "version": "5.2.0", + "resolved": "https://registry.npmjs.org/get-stream/-/get-stream-5.2.0.tgz", + "integrity": "sha512-nBF+F1rAZVCu/p7rjzgA+Yb4lfYXrpl7a6VmJrU8wF9I1CKvP/QwPNZHnOlwbTkY6dvtFIzFMSyQXbLoTQPRpA==", + "dev": true, + "license": "MIT", + "dependencies": { + "pump": "^3.0.0" + }, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/cacheable-request/node_modules/normalize-url": { + "version": "6.1.0", + "resolved": "https://registry.npmjs.org/normalize-url/-/normalize-url-6.1.0.tgz", + "integrity": "sha512-DlL+XwOy3NxAQ8xuC0okPgK46iuVNAK01YN7RueYBqqFeGsBjV9XmCAzAdgt+667bCl5kPh9EqKKDwnaPG1I7A==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/call-bind": { "version": "1.0.7", "resolved": "https://registry.npmjs.org/call-bind/-/call-bind-1.0.7.tgz", @@ -6248,6 +6457,19 @@ "node": ">=8" } }, + "node_modules/clone-response": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/clone-response/-/clone-response-1.0.3.tgz", + "integrity": "sha512-ROoL94jJH2dUVML2Y/5PEDNaSHgeOdSDicUyS7izcF63G6sTc/FTjLub4b8Il9S8S0beOfYt0TaA5qvFK+w0wA==", + "dev": true, + "license": "MIT", + "dependencies": { + "mimic-response": "^1.0.0" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/cmake-js": { "version": "7.3.0", "resolved": "https://registry.npmjs.org/cmake-js/-/cmake-js-7.3.0.tgz", @@ -6817,6 +7039,35 @@ "url": "https://github.com/sponsors/wooorm" } }, + "node_modules/decompress-response": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/decompress-response/-/decompress-response-6.0.0.tgz", + "integrity": "sha512-aW35yZM6Bb/4oJlZncMH2LCoZtJXTRxES17vE3hoRiowU2kWHaJKFkSBDnDR+cm9J+9QhXmREyIfv0pji9ejCQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "mimic-response": "^3.1.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/decompress-response/node_modules/mimic-response": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/mimic-response/-/mimic-response-3.1.0.tgz", + "integrity": "sha512-z0yWI+4FDrrweS8Zmt4Ej5HdJmky15+L2e6Wgn3+iK5fWzb6T3fhNFq2+MeTRb064c6Wr4N/wv0DzQTjNzHNGQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/deep-eql": { "version": "5.0.2", "resolved": "https://registry.npmjs.org/deep-eql/-/deep-eql-5.0.2.tgz", @@ -6841,6 +7092,16 @@ "integrity": "sha512-oIPzksmTg4/MriiaYGO+okXDT7ztn/w3Eptv/+gSIdMdKsJo0u4CfYNFJPy+4SKMuCqGw2wxnA+URMg3t8a/bQ==", "dev": true }, + "node_modules/defer-to-connect": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/defer-to-connect/-/defer-to-connect-2.0.1.tgz", + "integrity": "sha512-4tvttepXG1VaYGrRibk5EwJd1t4udunSOVMdLSAL6mId1ix438oPwPZMALY41FCijukO1L0twNcGsdzS7dHgDg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=10" + } + }, "node_modules/define-data-property": { "version": "1.1.4", "resolved": "https://registry.npmjs.org/define-data-property/-/define-data-property-1.1.4.tgz", @@ -6914,6 +7175,14 @@ "node": ">=8" } }, + "node_modules/detect-node": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/detect-node/-/detect-node-2.1.0.tgz", + "integrity": "sha512-T0NIuQpnTvFDATNuHN5roPwSBG83rFsuO+MXXH9/3N1eFbn4wcPjttvjMLEPWJ0RGUYgQE7cGgS3tNxbqCGM7g==", + "dev": true, + "license": "MIT", + "optional": true + }, "node_modules/devlop": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/devlop/-/devlop-1.1.0.tgz", @@ -7016,6 +7285,35 @@ "integrity": "sha512-I88TYZWc9XiYHRQ4/3c5rjjfgkjhLyW2luGIheGERbNQ6OY7yTybanSpDXZa8y7VUP9YmDcYa+eyq4ca7iLqWA==", "dev": true }, + "node_modules/electron": { + "version": "33.2.0", + "resolved": "https://registry.npmjs.org/electron/-/electron-33.2.0.tgz", + "integrity": "sha512-PVw1ICAQDPsnnsmpNFX/b1i/49h67pbSPxuIENd9K9WpGO1tsRaQt+K2bmXqTuoMJsbzIc75Ce8zqtuwBPqawA==", + "dev": true, + "hasInstallScript": true, + "license": "MIT", + "dependencies": { + "@electron/get": "^2.0.0", + "@types/node": "^20.9.0", + "extract-zip": "^2.0.1" + }, + "bin": { + "electron": "cli.js" + }, + "engines": { + "node": ">= 12.20.55" + } + }, + "node_modules/electron/node_modules/@types/node": { + "version": "20.17.6", + "resolved": "https://registry.npmjs.org/@types/node/-/node-20.17.6.tgz", + "integrity": "sha512-VEI7OdvK2wP7XHnsuXbAJnEpEkF6NjSN45QJlL4VGqZSXsnicpesdTWsg9RISeSdYd3yeRj/y3k5KGjUXYnFwQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "undici-types": "~6.19.2" + } + }, "node_modules/emoji-regex": { "version": "8.0.0", "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", @@ -7027,6 +7325,16 @@ "integrity": "sha512-5U0rVMU5Y2n2+ykNLQqMoqklN9ICBT/KsvC1Gz6vqHbz2AXXGkG+Pm5rMWk/8Vjrr/mY9985Hi8DYzn1F09Nyw==", "dev": true }, + "node_modules/end-of-stream": { + "version": "1.4.4", + "resolved": "https://registry.npmjs.org/end-of-stream/-/end-of-stream-1.4.4.tgz", + "integrity": "sha512-+uw1inIHVPQoaVuHzRyXd21icM+cnt4CzD5rW+NC1wjOUSTOs+Te7FOv7AhN7vS9x/oIyhLP5PR1H+phQAHu5Q==", + "dev": true, + "license": "MIT", + "dependencies": { + "once": "^1.4.0" + } + }, "node_modules/enhanced-resolve": { "version": "5.17.1", "resolved": "https://registry.npmjs.org/enhanced-resolve/-/enhanced-resolve-5.17.1.tgz", @@ -7257,6 +7565,14 @@ "benchmarks" ] }, + "node_modules/es6-error": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/es6-error/-/es6-error-4.1.1.tgz", + "integrity": "sha512-Um/+FxMr9CISWh0bi5Zv0iOD+4cFh5qLeks1qhAopKVAJw3drgKbKySikp7wGhDL0HPeaja0P5ULZrxLkniUVg==", + "dev": true, + "license": "MIT", + "optional": true + }, "node_modules/esbuild": { "version": "0.21.5", "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.21.5.tgz", @@ -8031,6 +8347,43 @@ "node": ">=0.10.0" } }, + "node_modules/extract-zip": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/extract-zip/-/extract-zip-2.0.1.tgz", + "integrity": "sha512-GDhU9ntwuKyGXdZBUgTIe+vXnWj0fppUEtMDL0+idd5Sta8TGpHssn/eusA9mrPr9qNDym6SxAYZjNvCn/9RBg==", + "dev": true, + "license": "BSD-2-Clause", + "dependencies": { + "debug": "^4.1.1", + "get-stream": "^5.1.0", + "yauzl": "^2.10.0" + }, + "bin": { + "extract-zip": "cli.js" + }, + "engines": { + "node": ">= 10.17.0" + }, + "optionalDependencies": { + "@types/yauzl": "^2.9.1" + } + }, + "node_modules/extract-zip/node_modules/get-stream": { + "version": "5.2.0", + "resolved": "https://registry.npmjs.org/get-stream/-/get-stream-5.2.0.tgz", + "integrity": "sha512-nBF+F1rAZVCu/p7rjzgA+Yb4lfYXrpl7a6VmJrU8wF9I1CKvP/QwPNZHnOlwbTkY6dvtFIzFMSyQXbLoTQPRpA==", + "dev": true, + "license": "MIT", + "dependencies": { + "pump": "^3.0.0" + }, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/fast-deep-equal": { "version": "3.1.3", "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz", @@ -8093,6 +8446,16 @@ "reusify": "^1.0.4" } }, + "node_modules/fd-slicer": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/fd-slicer/-/fd-slicer-1.1.0.tgz", + "integrity": "sha512-cE1qsB/VwyQozZ+q1dGxR8LBYNZeofhEdUNGSMbQD3Gw2lAzX9Zb3uIU6Ebc/Fmyjo9AWWfnn0AUCHqtevs/8g==", + "dev": true, + "license": "MIT", + "dependencies": { + "pend": "~1.2.0" + } + }, "node_modules/feed": { "version": "4.2.2", "resolved": "https://registry.npmjs.org/feed/-/feed-4.2.2.tgz", @@ -8655,6 +9018,25 @@ "node": ">=10.13.0" } }, + "node_modules/global-agent": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/global-agent/-/global-agent-3.0.0.tgz", + "integrity": "sha512-PT6XReJ+D07JvGoxQMkT6qji/jVNfX/h364XHZOWeRzy64sSFr+xJ5OX7LI3b4MPQzdL4H8Y8M0xzPpsVMwA8Q==", + "dev": true, + "license": "BSD-3-Clause", + "optional": true, + "dependencies": { + "boolean": "^3.0.1", + "es6-error": "^4.1.1", + "matcher": "^3.0.0", + "roarr": "^2.15.3", + "semver": "^7.3.2", + "serialize-error": "^7.0.1" + }, + "engines": { + "node": ">=10.0" + } + }, "node_modules/global-directory": { "version": "4.0.1", "resolved": "https://registry.npmjs.org/global-directory/-/global-directory-4.0.1.tgz", @@ -8712,6 +9094,32 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/got": { + "version": "11.8.6", + "resolved": "https://registry.npmjs.org/got/-/got-11.8.6.tgz", + "integrity": "sha512-6tfZ91bOr7bOXnK7PRDCGBLa1H4U080YHNaAQ2KsMGlLEzRbk44nsZF2E1IeRc3vtJHPVbKCYgdFbaGO2ljd8g==", + "dev": true, + "license": "MIT", + "dependencies": { + "@sindresorhus/is": "^4.0.0", + "@szmarczak/http-timer": "^4.0.5", + "@types/cacheable-request": "^6.0.1", + "@types/responselike": "^1.0.0", + "cacheable-lookup": "^5.0.3", + "cacheable-request": "^7.0.2", + "decompress-response": "^6.0.0", + "http2-wrapper": "^1.0.0-beta.5.2", + "lowercase-keys": "^2.0.0", + "p-cancelable": "^2.0.0", + "responselike": "^2.0.0" + }, + "engines": { + "node": ">=10.19.0" + }, + "funding": { + "url": "https://github.com/sindresorhus/got?sponsor=1" + } + }, "node_modules/graceful-fs": { "version": "4.2.11", "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz", @@ -9152,6 +9560,13 @@ "url": "https://github.com/sponsors/wooorm" } }, + "node_modules/http-cache-semantics": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/http-cache-semantics/-/http-cache-semantics-4.1.1.tgz", + "integrity": "sha512-er295DKPVsV82j5kw1Gjt+ADA/XYHsajl82cGNQG2eyoPkvgUhX+nDIyelzhIWbbsXP39EHcI6l5tYs2FYqYXQ==", + "dev": true, + "license": "BSD-2-Clause" + }, "node_modules/http-proxy-agent": { "version": "7.0.2", "resolved": "https://registry.npmjs.org/http-proxy-agent/-/http-proxy-agent-7.0.2.tgz", @@ -9165,6 +9580,20 @@ "node": ">= 14" } }, + "node_modules/http2-wrapper": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/http2-wrapper/-/http2-wrapper-1.0.3.tgz", + "integrity": "sha512-V+23sDMr12Wnz7iTcDeJr3O6AIxlnvT/bmaAAAP/Xda35C90p9599p0F1eHR/N1KILWSoWVAiOMFjBBXaXSMxg==", + "dev": true, + "license": "MIT", + "dependencies": { + "quick-lru": "^5.1.1", + "resolve-alpn": "^1.0.0" + }, + "engines": { + "node": ">=10.19.0" + } + }, "node_modules/https-proxy-agent": { "version": "7.0.4", "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.4.tgz", @@ -9970,6 +10399,14 @@ "integrity": "sha512-Bdboy+l7tA3OGW6FjyFHWkP5LuByj1Tk33Ljyq0axyzdk9//JSi2u3fP1QSmd1KNwq6VOKYGlAu87CisVir6Pw==", "dev": true }, + "node_modules/json-stringify-safe": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/json-stringify-safe/-/json-stringify-safe-5.0.1.tgz", + "integrity": "sha512-ZClg6AaYvamvYEE82d3Iyd3vSSIjQ+odgjaTzRuO3s7toCdFKczob2i0zCh7JE8kWn17yvAWhUVxvqGwUalsRA==", + "dev": true, + "license": "ISC", + "optional": true + }, "node_modules/json5": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/json5/-/json5-1.0.2.tgz", @@ -10343,6 +10780,16 @@ "url": "https://github.com/sponsors/typicode" } }, + "node_modules/lowercase-keys": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/lowercase-keys/-/lowercase-keys-2.0.0.tgz", + "integrity": "sha512-tqNXrS78oMOE73NMxK4EMLQsQowWf8jKooH9g7xPavRT706R6bkQJ6DY2Te7QukaZsulxa30wQ7bk0pm4XiHmA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8" + } + }, "node_modules/lru-cache": { "version": "10.2.2", "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-10.2.2.tgz", @@ -10461,6 +10908,20 @@ "marked": ">=1 <13" } }, + "node_modules/matcher": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/matcher/-/matcher-3.0.0.tgz", + "integrity": "sha512-OkeDaAZ/bQCxeFAozM55PKcKU0yJMPGifLwV4Qgjitu+5MoAfSQN4lsLJeXZ1b8w0x+/Emda6MZgXS1jvsapng==", + "dev": true, + "license": "MIT", + "optional": true, + "dependencies": { + "escape-string-regexp": "^4.0.0" + }, + "engines": { + "node": ">=10" + } + }, "node_modules/mdast-util-find-and-replace": { "version": "3.0.1", "resolved": "https://registry.npmjs.org/mdast-util-find-and-replace/-/mdast-util-find-and-replace-3.0.1.tgz", @@ -11239,6 +11700,16 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/mimic-response": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/mimic-response/-/mimic-response-1.0.1.tgz", + "integrity": "sha512-j5EctnkH7amfV/q5Hgmoal1g2QHFJRraOtmx0JpIqkxhBhI/lJSl1nMpQ45hVarwNETOoWEimndZ4QK0RHxuxQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=4" + } + }, "node_modules/minimatch": { "version": "9.0.5", "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.5.tgz", @@ -14246,6 +14717,16 @@ "node": ">= 18" } }, + "node_modules/once": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", + "integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==", + "dev": true, + "license": "ISC", + "dependencies": { + "wrappy": "1" + } + }, "node_modules/onetime": { "version": "6.0.0", "resolved": "https://registry.npmjs.org/onetime/-/onetime-6.0.0.tgz", @@ -14365,6 +14846,16 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/p-cancelable": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/p-cancelable/-/p-cancelable-2.1.1.tgz", + "integrity": "sha512-BZOr3nRQHOntUjTrH8+Lh54smKHoHyur8We1V8DSMVrl5A2malOOwuJRnKRDjSnkoeBh4at6BwEnb5I7Jl31wg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8" + } + }, "node_modules/p-each-series": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/p-each-series/-/p-each-series-3.0.0.tgz", @@ -14633,6 +15124,13 @@ "node": ">= 14.16" } }, + "node_modules/pend": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/pend/-/pend-1.2.0.tgz", + "integrity": "sha512-F3asv42UuXchdzt+xXqfW1OGlVBe+mxa2mqI0pg5yAHZPvFmY3Y6drSf/GQ1A86WgWEN9Kzh/WrgKa6iGcHXLg==", + "dev": true, + "license": "MIT" + }, "node_modules/perfect-debounce": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/perfect-debounce/-/perfect-debounce-1.0.0.tgz", @@ -14847,6 +15345,16 @@ "integrity": "sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag==", "dev": true }, + "node_modules/progress": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/progress/-/progress-2.0.3.tgz", + "integrity": "sha512-7PiHtLll5LdnKIMw100I+8xJXR5gW2QwWYkT6iJva0bXitZKa/XMrSbdmg3r2Xnaidz9Qumd0VPaMrZlF9V9sA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=0.4.0" + } + }, "node_modules/proper-lockfile": { "version": "4.1.2", "resolved": "https://registry.npmjs.org/proper-lockfile/-/proper-lockfile-4.1.2.tgz", @@ -14900,6 +15408,17 @@ "license": "MIT", "optional": true }, + "node_modules/pump": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.2.tgz", + "integrity": "sha512-tUPXtzlGM8FE3P0ZL6DVs/3P58k9nk8/jZeQCurTJylQA8qFYzHFfhBJkuqyE0FifOsQ0uKWekiZ5g8wtr28cw==", + "dev": true, + "license": "MIT", + "dependencies": { + "end-of-stream": "^1.1.0", + "once": "^1.3.1" + } + }, "node_modules/punycode": { "version": "2.3.1", "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.1.tgz", @@ -14939,6 +15458,19 @@ } ] }, + "node_modules/quick-lru": { + "version": "5.1.1", + "resolved": "https://registry.npmjs.org/quick-lru/-/quick-lru-5.1.1.tgz", + "integrity": "sha512-WuyALRjWPDGtt/wzJiadO5AXY+8hZ80hVpe6MyivgraREW751X3SbhRvG3eLKOYN+8VEvqLcf3wdnt44Z4S4SA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/rc": { "version": "1.2.8", "resolved": "https://registry.npmjs.org/rc/-/rc-1.2.8.tgz", @@ -15213,6 +15745,13 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/resolve-alpn": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/resolve-alpn/-/resolve-alpn-1.2.1.tgz", + "integrity": "sha512-0a1F4l73/ZFZOakJnQ3FvkJ2+gSTQWz/r2KE5OdDY0TxPm5h4GkqkWWfM47T7HsbnOtcJVEF4epCVy6u7Q3K+g==", + "dev": true, + "license": "MIT" + }, "node_modules/resolve-from": { "version": "5.0.0", "resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-5.0.0.tgz", @@ -15232,6 +15771,19 @@ "url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1" } }, + "node_modules/responselike": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/responselike/-/responselike-2.0.1.tgz", + "integrity": "sha512-4gl03wn3hj1HP3yzgdI7d3lCkF95F21Pz4BPGvKHinyQzALR5CapwC8yIi0Rh58DEMQ/SguC03wFj2k0M/mHhw==", + "dev": true, + "license": "MIT", + "dependencies": { + "lowercase-keys": "^2.0.0" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/restore-cursor": { "version": "5.1.0", "resolved": "https://registry.npmjs.org/restore-cursor/-/restore-cursor-5.1.0.tgz", @@ -15404,6 +15956,33 @@ "url": "https://github.com/sponsors/isaacs" } }, + "node_modules/roarr": { + "version": "2.15.4", + "resolved": "https://registry.npmjs.org/roarr/-/roarr-2.15.4.tgz", + "integrity": "sha512-CHhPh+UNHD2GTXNYhPWLnU8ONHdI+5DI+4EYIAOaiD63rHeYlZvyh8P+in5999TTSFgUYuKUAjzRI4mdh/p+2A==", + "dev": true, + "license": "BSD-3-Clause", + "optional": true, + "dependencies": { + "boolean": "^3.0.1", + "detect-node": "^2.0.4", + "globalthis": "^1.0.1", + "json-stringify-safe": "^5.0.1", + "semver-compare": "^1.0.0", + "sprintf-js": "^1.1.2" + }, + "engines": { + "node": ">=8.0" + } + }, + "node_modules/roarr/node_modules/sprintf-js": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/sprintf-js/-/sprintf-js-1.1.3.tgz", + "integrity": "sha512-Oo+0REFV59/rz3gfJNKQiBlwfHaSESl1pcGyABQsnnIfWOFt6JNj5gCog2U6MLZ//IGYD+nA8nI+mTShREReaA==", + "dev": true, + "license": "BSD-3-Clause", + "optional": true + }, "node_modules/rollup": { "version": "4.21.2", "resolved": "https://registry.npmjs.org/rollup/-/rollup-4.21.2.tgz", @@ -15791,6 +16370,14 @@ "node": ">=10" } }, + "node_modules/semver-compare": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/semver-compare/-/semver-compare-1.0.0.tgz", + "integrity": "sha512-YM3/ITh2MJ5MtzaM429anh+x2jiLVjqILF4m4oyQB18W7Ggea7BfqdH/wGMK7dDiMghv/6WG7znWMwUDzJiXow==", + "dev": true, + "license": "MIT", + "optional": true + }, "node_modules/semver-diff": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/semver-diff/-/semver-diff-4.0.0.tgz", @@ -15818,6 +16405,23 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/serialize-error": { + "version": "7.0.1", + "resolved": "https://registry.npmjs.org/serialize-error/-/serialize-error-7.0.1.tgz", + "integrity": "sha512-8I8TjW5KMOKsZQTvoxjuSIa7foAwPWGOts+6o7sgjz41/qMD9VQHEDxi6PBvK2l0MXUmqZyNpUK+T2tQaaElvw==", + "dev": true, + "license": "MIT", + "optional": true, + "dependencies": { + "type-fest": "^0.13.1" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/set-blocking": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/set-blocking/-/set-blocking-2.0.0.tgz", @@ -16613,6 +17217,19 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/sumchecker": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/sumchecker/-/sumchecker-3.0.1.tgz", + "integrity": "sha512-MvjXzkz/BOfyVDkG0oFOtBxHX2u3gKbMHIF/dXblZsgD3BWOFLmHovIpZY7BykJdAjcqRCBi1WYBNdEC9yI7vg==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "debug": "^4.1.0" + }, + "engines": { + "node": ">= 8.0" + } + }, "node_modules/super-regex": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/super-regex/-/super-regex-1.0.0.tgz", @@ -17188,6 +17805,20 @@ "node": ">= 0.8.0" } }, + "node_modules/type-fest": { + "version": "0.13.1", + "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-0.13.1.tgz", + "integrity": "sha512-34R7HTnG0XIJcBSn5XhDd7nNFPRcXYRZrBB2O2jdKqYODldSzBAqzsWoZYYvduky73toYS/ESqxPvkDf/F0XMg==", + "dev": true, + "license": "(MIT OR CC0-1.0)", + "optional": true, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/typed-array-buffer": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/typed-array-buffer/-/typed-array-buffer-1.0.2.tgz", @@ -18121,6 +18752,13 @@ "node": ">=8" } }, + "node_modules/wrappy": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", + "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==", + "dev": true, + "license": "ISC" + }, "node_modules/xml-js": { "version": "1.6.11", "resolved": "https://registry.npmjs.org/xml-js/-/xml-js-1.6.11.tgz", @@ -18194,6 +18832,17 @@ "node": ">=12" } }, + "node_modules/yauzl": { + "version": "2.10.0", + "resolved": "https://registry.npmjs.org/yauzl/-/yauzl-2.10.0.tgz", + "integrity": "sha512-p4a9I6X6nu6IhoGmBqAcbJy1mlC4j27vEPZX9F4L4/vZT3Lyq1VkFHw/V/PUcB9Buo+DG3iHkT0x3Qya58zc3g==", + "dev": true, + "license": "MIT", + "dependencies": { + "buffer-crc32": "~0.2.3", + "fd-slicer": "~1.1.0" + } + }, "node_modules/yocto-queue": { "version": "1.1.1", "resolved": "https://registry.npmjs.org/yocto-queue/-/yocto-queue-1.1.1.tgz", diff --git a/package.json b/package.json index b8992eb4..372e7778 100644 --- a/package.json +++ b/package.json @@ -148,6 +148,7 @@ "@types/yargs": "^17.0.33", "@vitest/coverage-v8": "^2.1.4", "@vitest/ui": "^2.1.4", + "electron": "^33.2.0", "eslint": "^9.13.0", "eslint-import-resolver-typescript": "^3.6.3", "eslint-plugin-import": "^2.31.0", diff --git a/src/bindings/AddonTypes.ts b/src/bindings/AddonTypes.ts index 891d9df4..75b7de29 100644 --- a/src/bindings/AddonTypes.ts +++ b/src/bindings/AddonTypes.ts @@ -69,13 +69,14 @@ export type BindingModule = { getGpuDeviceInfo(): { deviceNames: string[] }, - getGpuType(): "cuda" | "vulkan" | "metal" | undefined, + getGpuType(): "cuda" | "vulkan" | "metal" | false | undefined, getSwapInfo(): { total: number, maxSize: number, free: number }, init(): Promise, + loadBackends(forceLoadLibraries?: boolean): void, dispose(): Promise }; diff --git a/src/bindings/Llama.ts b/src/bindings/Llama.ts index c9effbc7..bff6f34b 100644 --- a/src/bindings/Llama.ts +++ b/src/bindings/Llama.ts @@ -10,7 +10,7 @@ import {LlamaGrammar, LlamaGrammarOptions} from "../evaluator/LlamaGrammar.js"; import {ThreadsSplitter} from "../utils/ThreadsSplitter.js"; import {getLlamaClasses, LlamaClasses} from "../utils/getLlamaClasses.js"; import {BindingModule} from "./AddonTypes.js"; -import {BuildGpu, BuildMetadataFile, LlamaGpuType, LlamaLocks, LlamaLogLevel} from "./types.js"; +import {BuildGpu, BuildMetadataFile, LlamaGpuType, LlamaLocks, LlamaLogLevel, LlamaLogLevelGreaterThanOrEqual} from "./types.js"; import {MemoryOrchestrator, MemoryReservation} from "./utils/MemoryOrchestrator.js"; const LlamaLogLevelToAddonLogLevel: ReadonlyMap = new Map([ @@ -126,6 +126,11 @@ export class Llama { this._bindings.setLoggerLogLevel(LlamaLogLevelToAddonLogLevel.get(this._logLevel) ?? defaultLogLevel); } + this._bindings.loadBackends(); + const loadedGpu = bindings.getGpuType(); + if (loadedGpu == null || (loadedGpu === false && gpu !== false)) + this._bindings.loadBackends(true); + this._onExit = this._onExit.bind(this); process.on("exit", this._onExit); @@ -413,7 +418,9 @@ export class Llama { } try { - this._logger(level, message); + const transformedLogLevel = getTransformedLogLevel(level, message); + if (LlamaLogLevelGreaterThanOrEqual(transformedLogLevel, this._logLevel)) + this._logger(transformedLogLevel, message); } catch (err) { // the native addon code calls this function, so there's no use to throw an error here } @@ -597,3 +604,14 @@ function logMessageIsOnlyDots(message: string | null) { return true; } + +function getTransformedLogLevel(level: LlamaLogLevel, message: string): LlamaLogLevel { + if (level === LlamaLogLevel.warn && message.endsWith("the full capacity of the model will not be utilized")) + return LlamaLogLevel.info; + else if (level === LlamaLogLevel.warn && message.startsWith("ggml_metal_init: skipping kernel_") && message.endsWith("(not supported)")) + return LlamaLogLevel.log; + else if (level === LlamaLogLevel.warn && message.startsWith("ggml_cuda_init: GGML_CUDA_FORCE_") && message.endsWith(" no")) + return LlamaLogLevel.log; + + return level; +} diff --git a/src/bindings/getLlama.ts b/src/bindings/getLlama.ts index d9e3255f..dffeea50 100644 --- a/src/bindings/getLlama.ts +++ b/src/bindings/getLlama.ts @@ -72,6 +72,9 @@ export type LlamaOptions = { * Otherwise, throw a `NoBinaryFoundError` error. * - **`"forceRebuild"`**: Always build from source. * Be cautious with this option, as it will cause the build to fail on Windows when the binaries are in use by another process. + * - **`"try"`**: If a local build is found, use it. + * Otherwise, try to build from source and use the resulting binary. + * If building from source fails, use a prebuilt binary if found. * * When running from inside an Asar archive in Electron, building from source is not possible, so it'll never build from source. * To allow building from source in Electron apps, make sure you ship `node-llama-cpp` as an unpacked module. @@ -79,7 +82,7 @@ export type LlamaOptions = { * Defaults to `"auto"`. * On Electron, defaults to `"never"`. */ - build?: "auto" | "never" | "forceRebuild", + build?: "auto" | "never" | "forceRebuild" | "try", /** * Set custom CMake options for llama.cpp @@ -380,6 +383,45 @@ export async function getLlamaForOptions({ if (buildGpusToTry.length === 0) throw new Error("No GPU types available to try building with"); + if (build === "try") { + if (canUsePrebuiltBinaries) { + try { + return await getLlamaForOptions({ + gpu, + logLevel, + logger, + build: "auto", + cmakeOptions, + existingPrebuiltBinaryMustMatchBuildOptions, + usePrebuiltBinaries: false, + progressLogs, + skipDownload, + maxThreads, + vramPadding, + ramPadding, + debug + }); + } catch (err) { + return await getLlamaForOptions({ + gpu, + logLevel, + logger, + build: "never", + cmakeOptions, + existingPrebuiltBinaryMustMatchBuildOptions, + usePrebuiltBinaries, + progressLogs, + skipDownload, + maxThreads, + vramPadding, + ramPadding, + debug + }); + } + } else + build = "auto"; + } + if (build === "auto" || build === "never") { for (let i = 0; i < buildGpusToTry.length; i++) { const gpu = buildGpusToTry[i]; @@ -544,7 +586,7 @@ async function loadExistingLlamaBinary({ buildMetadata }); const binaryCompatible = shouldTestBinaryBeforeLoading - ? await testBindingBinary(localBuildBinPath) + ? await testBindingBinary(localBuildBinPath, buildOptions.gpu) : true; if (binaryCompatible) { @@ -601,7 +643,7 @@ async function loadExistingLlamaBinary({ buildMetadata }); const binaryCompatible = shouldTestBinaryBeforeLoading - ? await testBindingBinary(prebuiltBinDetails.binaryPath) + ? await testBindingBinary(prebuiltBinDetails.binaryPath, buildOptions.gpu) : true; if (binaryCompatible) { diff --git a/src/bindings/utils/compileLLamaCpp.ts b/src/bindings/utils/compileLLamaCpp.ts index aceb9279..ec9655b4 100644 --- a/src/bindings/utils/compileLLamaCpp.ts +++ b/src/bindings/utils/compileLLamaCpp.ts @@ -9,7 +9,7 @@ import { buildMetadataFileName, documentationPageUrls, llamaCppDirectory, llamaDirectory, llamaLocalBuildBinsDirectory, llamaPrebuiltBinsDirectory, llamaToolchainsDirectory } from "../../config.js"; -import {BuildMetadataFile, BuildOptions, convertBuildOptionsToBuildOptionsJSON} from "../types.js"; +import {BuildGpu, BuildMetadataFile, BuildOptions, convertBuildOptionsToBuildOptionsJSON} from "../types.js"; import {spawnCommand, SpawnError} from "../../utils/spawnCommand.js"; import {downloadCmakeIfNeeded, fixXpackPermissions, getCmakePath, hasBuiltinCmake} from "../../utils/cmake.js"; import {getConsoleLogPrefix} from "../../utils/getConsoleLogPrefix.js"; @@ -31,7 +31,7 @@ export async function compileLlamaCpp(buildOptions: BuildOptions, compileOptions includeBuildOptionsInBinaryFolderName?: boolean, ensureLlamaCppRepoIsCloned?: boolean, downloadCmakeIfNeeded?: boolean, - ignoreWorkarounds?: ("cudaArchitecture")[], + ignoreWorkarounds?: ("cudaArchitecture" | "reduceParallelBuildThreads" | "singleBuildThread")[], envVars?: typeof process.env, ciMode?: boolean }): Promise { @@ -54,6 +54,12 @@ export async function compileLlamaCpp(buildOptions: BuildOptions, compileOptions const outDirectory = path.join(llamaLocalBuildBinsDirectory, finalBuildFolderName); + let parallelBuildThreads = getParallelBuildThreadsToUse(platform, buildOptions.gpu, ciMode); + if (ignoreWorkarounds.includes("singleBuildThread")) + parallelBuildThreads = 1; + else if (ignoreWorkarounds.includes("reduceParallelBuildThreads")) + parallelBuildThreads = reduceParallelBuildThreads(parallelBuildThreads); + await fs.mkdirp(llamaLocalBuildBinsDirectory); try { await withLockfile({ @@ -99,6 +105,9 @@ export async function compileLlamaCpp(buildOptions: BuildOptions, compileOptions if (!cmakeCustomOptions.has("GGML_AMX")) cmakeCustomOptions.set("GGML_AMX", "OFF"); + + if (!cmakeCustomOptions.has("GGML_NATIVE") && buildOptions.platform !== "mac") + cmakeCustomOptions.set("GGML_NATIVE", "OFF"); } await fs.remove(outDirectory); @@ -125,7 +134,7 @@ export async function compileLlamaCpp(buildOptions: BuildOptions, compileOptions "--arch=" + buildOptions.arch, "--out", path.relative(llamaDirectory, outDirectory), "--runtime-version=" + runtimeVersion, - "--parallel=" + getParallelBuildThreadsToUse(platform), + "--parallel=" + parallelBuildThreads, ...cmakePathArgs, ...( [...cmakeCustomOptions].map(([key, value]) => "--CD" + key + "=" + value) @@ -239,6 +248,40 @@ export async function compileLlamaCpp(buildOptions: BuildOptions, compileOptions console.error(getConsoleLogPrefix(true, false), err); } } + } else if ( + (!ignoreWorkarounds.includes("reduceParallelBuildThreads") || !ignoreWorkarounds.includes("singleBuildThread")) && + (platform === "win" || platform === "linux") && + err instanceof SpawnError && + reduceParallelBuildThreads(parallelBuildThreads) !== parallelBuildThreads && ( + err.combinedStd.toLowerCase().includes("LLVM error : out of memory".toLowerCase()) || + err.combinedStd.toLowerCase().includes("compiler is out of heap space".toLowerCase()) + ) + ) { + if (buildOptions.progressLogs) { + if (ignoreWorkarounds.includes("reduceParallelBuildThreads")) + console.info( + getConsoleLogPrefix(true) + "Trying to compile again with a single build thread" + ); + else + console.info( + getConsoleLogPrefix(true) + "Trying to compile again with reduced parallel build threads" + ); + } + + try { + return await compileLlamaCpp(buildOptions, { + ...compileOptions, + ignoreWorkarounds: [ + ...ignoreWorkarounds, + ignoreWorkarounds.includes("reduceParallelBuildThreads") + ? "singleBuildThread" + : "reduceParallelBuildThreads" + ] + }); + } catch (err) { + if (buildOptions.progressLogs) + console.error(getConsoleLogPrefix(true, false), err); + } } console.info("\n" + @@ -461,9 +504,12 @@ async function getToolchainFileForArch(targetArch: string) { return null; } -function getParallelBuildThreadsToUse(platform: BinaryPlatform) { +function getParallelBuildThreadsToUse(platform: BinaryPlatform, gpu?: BuildGpu, ciMode: boolean = false) { const cpuCount = os.cpus().length; + if (ciMode && platform === "win" && gpu === "cuda" && cpuCount === 4) + return 3; // workaround for `compiler is out of heap space` error on GitHub Actions on Windows when building with CUDA + if (cpuCount <= 4) return cpuCount; @@ -472,3 +518,7 @@ function getParallelBuildThreadsToUse(platform: BinaryPlatform) { return cpuCount - 2; } + +function reduceParallelBuildThreads(originalParallelBuildThreads: number) { + return Math.max(1, Math.round(originalParallelBuildThreads / 2)); +} diff --git a/src/bindings/utils/testBindingBinary.ts b/src/bindings/utils/testBindingBinary.ts index f49dbdc6..43e47ebe 100644 --- a/src/bindings/utils/testBindingBinary.ts +++ b/src/bindings/utils/testBindingBinary.ts @@ -3,6 +3,8 @@ import {fileURLToPath} from "url"; import {createRequire} from "module"; import path from "path"; import {getConsoleLogPrefix} from "../../utils/getConsoleLogPrefix.js"; +import {runningInElectron} from "../../utils/runtime.js"; +import {BuildGpu} from "../types.js"; import type {BindingModule} from "../AddonTypes.js"; const require = createRequire(import.meta.url); @@ -10,7 +12,7 @@ const __filename = fileURLToPath(import.meta.url); const detectedFileName = path.basename(__filename); const expectedFileName = "testBindingBinary"; -export function testBindingBinary(bindingBinaryPath: string, testTimeout: number = 1000 * 60 * 5): Promise { +export async function testBindingBinary(bindingBinaryPath: string, gpu: BuildGpu, testTimeout: number = 1000 * 60 * 5): Promise { if (!detectedFileName.startsWith(expectedFileName)) { console.warn( getConsoleLogPrefix() + @@ -22,32 +24,125 @@ export function testBindingBinary(bindingBinaryPath: string, testTimeout: number 'To resolve this issue, make sure that "node-llama-cpp" is not bundled together with other code and is imported as an external module with its original file structure.' ); - return Promise.resolve(true); + return true; } - const subProcess = fork(__filename, [], { - detached: false, - env: { - ...process.env, - TEST_BINDING_CP: "true" + async function getForkFunction() { + if (runningInElectron) { + try { + const {utilityProcess} = await import("electron"); + + return { + type: "electron", + fork: utilityProcess.fork.bind(utilityProcess) + } as const; + } catch (err) { + // do nothing + } + } + + return { + type: "node", + fork + } as const; + } + + const forkFunction = await getForkFunction(); + + function createTestProcess({ + onMessage, + onExit + }: { + onMessage(message: ChildToParentMessage): void, + onExit(code: number): void + }): { + sendMessage(message: ParentToChildMessage): void, + killProcess(): void + } { + if (forkFunction.type === "electron") { + let exited = false; + const subProcess = forkFunction.fork(__filename, [], { + env: { + ...process.env, + TEST_BINDING_CP: "true" + } + }); + + function cleanupElectronFork() { + if (subProcess.pid != null || !exited) { + subProcess.kill(); + exited = true; + } + + process.off("exit", cleanupElectronFork); + } + + process.on("exit", cleanupElectronFork); + + subProcess.on("message", onMessage); + subProcess.on("exit", (code) => { + exited = true; + cleanupElectronFork(); + onExit(code); + }); + + return { + sendMessage: (message: ParentToChildMessage) => subProcess.postMessage(message), + killProcess: cleanupElectronFork + }; + } + + const subProcess = forkFunction.fork(__filename, [], { + detached: false, + silent: true, + env: { + ...process.env, + TEST_BINDING_CP: "true" + } + }); + + function cleanupNodeFork() { + if (subProcess.exitCode == null) + subProcess.kill("SIGKILL"); + + process.off("exit", cleanupNodeFork); + } + + process.on("exit", cleanupNodeFork); + + subProcess.on("message", onMessage); + subProcess.on("exit", (code) => { + cleanupNodeFork(); + onExit(code ?? -1); + }); + + if (subProcess.killed || subProcess.exitCode != null) { + cleanupNodeFork(); + onExit(subProcess.exitCode ?? -1); } - }); + + return { + sendMessage: (message: ParentToChildMessage) => subProcess.send(message), + killProcess: cleanupNodeFork + }; + } + let testPassed = false; let forkSucceeded = false; let timeoutHandle: ReturnType | null = null; + let subProcess: ReturnType | undefined = undefined; + let testFinished = false; + function cleanup() { - if (subProcess.exitCode == null) - subProcess.kill("SIGKILL"); + testFinished = true; if (timeoutHandle != null) clearTimeout(timeoutHandle); - process.off("exit", cleanup); + subProcess?.killProcess(); } - process.on("exit", cleanup); - return Promise.race([ new Promise((_, reject) => { timeoutHandle = setTimeout(() => { @@ -65,45 +160,58 @@ export function testBindingBinary(bindingBinaryPath: string, testTimeout: number cleanup(); } - subProcess.on("message", (message: ChildToParentMessage) => { - if (message.type === "ready") { - forkSucceeded = true; - subProcess.send({type: "start", bindingBinaryPath} satisfies ParentToChildMessage); - } else if (message.type === "done") { - testPassed = true; - subProcess.send({type: "exit"} satisfies ParentToChildMessage); + subProcess = createTestProcess({ + onMessage(message: ChildToParentMessage) { + if (message.type === "ready") { + forkSucceeded = true; + subProcess!.sendMessage({ + type: "start", + bindingBinaryPath, + gpu + }); + } else if (message.type === "done") { + testPassed = true; + subProcess!.sendMessage({type: "exit"}); + } + }, + onExit(code: number) { + if (code !== 0) + testPassed = false; + + done(); } }); - subProcess.on("exit", (code) => { - if (code !== 0) - testPassed = false; - - done(); - }); - - if (subProcess.killed || subProcess.exitCode != null) { - if (subProcess.exitCode !== 0) - testPassed = false; - - done(); - } + if (testFinished) + subProcess.killProcess(); }) ]); } -if (process.env.TEST_BINDING_CP === "true" && process.send != null) { - process.on("message", async (message: ParentToChildMessage) => { +if (process.env.TEST_BINDING_CP === "true" && (process.parentPort != null || process.send != null)) { + const sendMessage = process.parentPort != null + ? (message: ChildToParentMessage) => process.parentPort.postMessage(message) + : (message: ChildToParentMessage) => process.send!(message); + const onMessage = async (message: ParentToChildMessage) => { if (message.type === "start") { - if (process.send == null) - process.exit(1); - try { const binding: BindingModule = require(message.bindingBinaryPath); + + binding.loadBackends(); + const loadedGpu = binding.getGpuType(); + if (loadedGpu == null || (loadedGpu === false && message.gpu !== false)) + binding.loadBackends(true); + await binding.init(); binding.getGpuVramInfo(); binding.getGpuDeviceInfo(); - process.send({type: "done"} satisfies ChildToParentMessage); + + const gpuType = binding.getGpuType(); + void (gpuType as BuildGpu satisfies typeof gpuType); + if (gpuType !== message.gpu) + throw new Error(`Binary GPU type mismatch. Expected: ${message.gpu}, got: ${gpuType}`); + + sendMessage({type: "done"}); } catch (err) { console.error(err); process.exit(1); @@ -111,14 +219,20 @@ if (process.env.TEST_BINDING_CP === "true" && process.send != null) { } else if (message.type === "exit") { process.exit(0); } - }); + }; + + if (process.parentPort != null) + process.parentPort.on("message", (message) => onMessage(message.data)); + else + process.on("message", onMessage); - process.send({type: "ready"} satisfies ChildToParentMessage); + sendMessage({type: "ready"}); } type ParentToChildMessage = { type: "start", - bindingBinaryPath: string + bindingBinaryPath: string, + gpu: BuildGpu } | { type: "exit" }; diff --git a/src/chatWrappers/Llama3_1ChatWrapper.ts b/src/chatWrappers/Llama3_1ChatWrapper.ts index 00ffcf3b..d7c2f412 100644 --- a/src/chatWrappers/Llama3_1ChatWrapper.ts +++ b/src/chatWrappers/Llama3_1ChatWrapper.ts @@ -6,6 +6,7 @@ import { import {SpecialToken, LlamaText, SpecialTokensText} from "../utils/LlamaText.js"; import {ChatModelFunctionsDocumentationGenerator} from "./utils/ChatModelFunctionsDocumentationGenerator.js"; import {jsonDumps} from "./utils/jsonDumps.js"; +import {isLlama3_2LightweightModel} from "./utils/isLlama3_2LightweightModel.js"; // source: https://llama.meta.com/docs/model-cards-and-prompt-formats/llama3_1 export class Llama3_1ChatWrapper extends ChatWrapper { @@ -332,10 +333,10 @@ export class Llama3_1ChatWrapper extends ChatWrapper { public static override _checkModelCompatibility(options: ChatWrapperCheckModelCompatibilityParams): boolean { if (options.tokenizer != null) { const tokens = options.tokenizer("<|eom_id|>", true, "trimLeadingSpace"); - return tokens.length === 1 && options.tokenizer.isSpecialToken(tokens[0]!); + return tokens.length === 1 && options.tokenizer.isSpecialToken(tokens[0]!) && !isLlama3_2LightweightModel(options); } - return true; + return !isLlama3_2LightweightModel(options); } /** @internal */ diff --git a/src/chatWrappers/Llama3_2LightweightChatWrapper.ts b/src/chatWrappers/Llama3_2LightweightChatWrapper.ts new file mode 100644 index 00000000..e38ff973 --- /dev/null +++ b/src/chatWrappers/Llama3_2LightweightChatWrapper.ts @@ -0,0 +1,354 @@ +import {ChatWrapper, ChatWrapperJinjaMatchConfiguration} from "../ChatWrapper.js"; +import { + ChatHistoryItem, ChatModelFunctions, ChatSystemMessage, ChatWrapperCheckModelCompatibilityParams, + ChatWrapperGenerateContextStateOptions, ChatWrapperGeneratedContextState, ChatWrapperSettings +} from "../types.js"; +import {SpecialToken, LlamaText, SpecialTokensText} from "../utils/LlamaText.js"; +import {ChatModelFunctionsDocumentationGenerator} from "./utils/ChatModelFunctionsDocumentationGenerator.js"; +import {isLlama3_2LightweightModel} from "./utils/isLlama3_2LightweightModel.js"; + +// source: https://llama.meta.com/docs/model-cards-and-prompt-formats/llama3_2/ +export class Llama3_2LightweightChatWrapper extends ChatWrapper { + public readonly wrapperName: string = "Llama 3.2 lightweight"; + + public readonly cuttingKnowledgeDate?: Date | (() => Date) | null; + public readonly todayDate: Date | (() => Date) | null; + public readonly noToolInstructions: boolean; + + /** @internal */ private readonly _specialTokensTextForPreamble: boolean; + + public override readonly settings: ChatWrapperSettings = { + supportsSystemMessages: true, + functions: { + call: { + optionalPrefixSpace: true, + prefix: '{"name": "', + paramsPrefix: '", "parameters": ', + suffix: LlamaText("}", new SpecialToken("EOT")) + }, + result: { + prefix: LlamaText(new SpecialToken("EOT"), new SpecialTokensText("<|start_header_id|>ipython<|end_header_id|>\n\n")), + suffix: LlamaText(new SpecialToken("EOT"), new SpecialTokensText("<|start_header_id|>assistant<|end_header_id|>\n\n")) + } + } + }; + + /** + * @param options + */ + public constructor(options: { + /** + * Set to `null` to disable + * + * Defaults to December 2023 + */ + cuttingKnowledgeDate?: Date | (() => Date) | number | string | null, + + /** + * Set to `null` to disable + * + * Defaults to current date + */ + todayDate?: Date | (() => Date) | number | string | null, + + noToolInstructions?: boolean, + + /** @internal */ + _specialTokensTextForPreamble?: boolean + } = {}) { + super(); + + const { + cuttingKnowledgeDate = new Date("2023-12-01T00:00:00Z"), + todayDate = () => new Date(), + noToolInstructions = false, + + _specialTokensTextForPreamble = false + } = options; + + this.cuttingKnowledgeDate = cuttingKnowledgeDate == null + ? null + : cuttingKnowledgeDate instanceof Function + ? cuttingKnowledgeDate + : new Date(cuttingKnowledgeDate); + this.todayDate = todayDate == null + ? null + : todayDate instanceof Function + ? todayDate + : new Date(todayDate); + this.noToolInstructions = noToolInstructions; + + this._specialTokensTextForPreamble = _specialTokensTextForPreamble; + } + + public override addAvailableFunctionsSystemMessageToHistory( + history: readonly ChatHistoryItem[], + availableFunctions?: ChatModelFunctions, { + documentParams = true + }: { + documentParams?: boolean + } = {} + ) { + const availableFunctionNames = Object.keys(availableFunctions ?? {}); + + if (availableFunctions == null || availableFunctionNames.length === 0) + return history; + + const res = history.slice(); + + const functionsSystemMessage: ChatSystemMessage = { + type: "system", + text: this.generateAvailableFunctionsSystemText(availableFunctions, {documentParams}).toJSON() + }; + + if (res.length >= 2 && res[0]!.type === "system" && res[1]!.type === "system") + res.splice(1, 0, functionsSystemMessage); + else + res.unshift({ + type: "system", + text: this.generateAvailableFunctionsSystemText(availableFunctions, {documentParams}).toJSON() + }); + + return res; + } + + public override generateContextState({ + chatHistory, availableFunctions, documentFunctionParams + }: ChatWrapperGenerateContextStateOptions): ChatWrapperGeneratedContextState { + const chatHistoryWithPreamble = this.prependPreambleToChatHistory(chatHistory); + const historyWithFunctions = this.addAvailableFunctionsSystemMessageToHistory(chatHistoryWithPreamble, availableFunctions, { + documentParams: documentFunctionParams + }); + + const resultItems: Array<{ + system: LlamaText | null, + user: LlamaText | null, + model: LlamaText | null + }> = []; + + let systemTexts: LlamaText[] = []; + let userTexts: LlamaText[] = []; + let modelTexts: LlamaText[] = []; + let currentAggregateFocus: "system" | "user" | "model" | null = null; + + const flush = () => { + if (systemTexts.length > 0 || userTexts.length > 0 || modelTexts.length > 0) + resultItems.push({ + system: systemTexts.length === 0 + ? null + : LlamaText.joinValues( + resultItems.length === 0 && this._specialTokensTextForPreamble + ? LlamaText(new SpecialTokensText("\n\n")) + : "\n\n", + systemTexts + ), + user: userTexts.length === 0 + ? null + : LlamaText.joinValues("\n\n", userTexts), + model: modelTexts.length === 0 + ? null + : LlamaText.joinValues("\n\n", modelTexts) + }); + + systemTexts = []; + userTexts = []; + modelTexts = []; + }; + + for (const item of historyWithFunctions) { + if (item.type === "system") { + if (currentAggregateFocus !== "system") + flush(); + + currentAggregateFocus = "system"; + systemTexts.push(LlamaText.fromJSON(item.text)); + } else if (item.type === "user") { + if (currentAggregateFocus !== "user") + flush(); + + currentAggregateFocus = "user"; + userTexts.push(LlamaText(item.text)); + } else if (item.type === "model") { + if (currentAggregateFocus !== "model") + flush(); + + currentAggregateFocus = "model"; + modelTexts.push(this.generateModelResponseText(item.response)); + } else + void (item satisfies never); + } + + flush(); + + const contextText = LlamaText( + new SpecialToken("BOS"), + resultItems.map((item, index) => { + const isLastItem = index === resultItems.length - 1; + const res: LlamaText[] = []; + + if (item.system != null) { + res.push( + LlamaText([ + new SpecialTokensText("<|start_header_id|>system<|end_header_id|>\n\n"), + item.system, + new SpecialToken("EOT") + ]) + ); + } + + if (item.user != null) { + res.push( + LlamaText([ + new SpecialTokensText("<|start_header_id|>user<|end_header_id|>\n\n"), + item.user, + new SpecialToken("EOT") + ]) + ); + } + + if (item.model != null) { + res.push( + LlamaText([ + new SpecialTokensText("<|start_header_id|>assistant<|end_header_id|>\n\n"), + item.model, + isLastItem + ? LlamaText([]) + : new SpecialToken("EOT") + ]) + ); + } + + return LlamaText(res); + }) + ); + + return { + contextText, + stopGenerationTriggers: [ + LlamaText(new SpecialToken("EOS")), + LlamaText(new SpecialToken("EOT")), + LlamaText(new SpecialTokensText("<|eot_id|>")), + LlamaText(new SpecialTokensText("<|end_of_text|>")), + LlamaText("<|eot_id|>"), + LlamaText("<|end_of_text|>") + ] + }; + } + + public override generateAvailableFunctionsSystemText(availableFunctions: ChatModelFunctions, {documentParams = true}: { + documentParams?: boolean + }) { + const functionsDocumentationGenerator = new ChatModelFunctionsDocumentationGenerator(availableFunctions); + + if (!functionsDocumentationGenerator.hasAnyFunctions) + return LlamaText([]); + + return LlamaText.joinValues("\n", [ + "You have access to the following functions. To call a function, please respond with JSON for a function call.", + 'Respond in the format {"name": function name, "parameters": function call parameters}.', + "Do not use variables.", + "", + functionsDocumentationGenerator.getLlama3_2LightweightFunctionSignatures({documentParams}), + "", + "After calling a function, the result will appear afterwards and is only visible to you.", + "To make information visible to the user, you must include it in your response.", + "Do not tell the user about the functions your are using.", + "Only call functions when needed." + ]); + } + + public prependPreambleToChatHistory(chatHistory: readonly ChatHistoryItem[]): readonly ChatHistoryItem[] { + const res = chatHistory.slice(); + + const formatMonthDate = (date: Date, timezone?: "UTC") => { + const today = this.todayDate instanceof Function + ? this.todayDate() + : (this.todayDate ?? new Date()); + + if (today.getUTCMonth() === date.getUTCMonth() && today.getUTCFullYear() === date.getUTCFullYear()) + return formatDate(date, timezone); + + const month = date.toLocaleDateString("en-US", {month: "long", timeZone: timezone}); + const year = date.toLocaleDateString("en-US", {year: "numeric", timeZone: timezone}); + return `${month} ${year}`; + }; + + const lines: string[] = []; + + if (this.cuttingKnowledgeDate != null) { + const date = this.cuttingKnowledgeDate instanceof Function + ? this.cuttingKnowledgeDate() + : this.cuttingKnowledgeDate; + + lines.push(`Cutting Knowledge Date: ${formatMonthDate(date, "UTC")}`); + } + + if (this.todayDate != null) { + const date = this.todayDate instanceof Function + ? this.todayDate() + : this.todayDate; + lines.push(`Today Date: ${formatDate(date, undefined)}`); + } + + if (lines.length > 0) + res.unshift({ + type: "system", + text: this._specialTokensTextForPreamble + ? LlamaText(new SpecialTokensText(lines.join("\n"))).toJSON() + : LlamaText.joinValues("\n", lines).toJSON() + }); + + return res; + } + + /** @internal */ + public static override _checkModelCompatibility(options: ChatWrapperCheckModelCompatibilityParams): boolean { + if (options.tokenizer != null) { + const tokens = options.tokenizer("<|eom_id|>", true, "trimLeadingSpace"); + return tokens.length === 1 && options.tokenizer.isSpecialToken(tokens[0]!) && isLlama3_2LightweightModel(options); + } + + return isLlama3_2LightweightModel(options); + } + + /** @internal */ + public static override _getOptionConfigurationsToTestIfCanSupersedeJinjaTemplate() { + return [ + {}, + [{todayDate: null}, {}], + [{cuttingKnowledgeDate: null}, {}], + [{noToolInstructions: true}, {}], + [{todayDate: null, cuttingKnowledgeDate: null}, {}], + [{todayDate: null, cuttingKnowledgeDate: null, noToolInstructions: true}, {}], + [{todayDate: new Date("2024-07-26T00:00:00"), cuttingKnowledgeDate: null, noToolInstructions: true}, {}], + + [ + { + todayDate: new Date("2024-07-26T00:00:00"), + cuttingKnowledgeDate: new Date("2023-12-01T00:00:00Z"), + noToolInstructions: true + }, + {cuttingKnowledgeDate: new Date("2023-12-01T00:00:00Z")}, + {"date_string": formatDate(new Date("2024-07-26T00:00:00"), undefined)} + ], + + [ + { + todayDate: new Date("2024-07-26T00:00:00"), + cuttingKnowledgeDate: new Date("2023-12-01T00:00:00Z"), + noToolInstructions: true, + _specialTokensTextForPreamble: true + }, + {cuttingKnowledgeDate: new Date("2023-12-01T00:00:00Z")}, + {"date_string": formatDate(new Date("2024-07-26T00:00:00"), undefined)} + ] + ] satisfies ChatWrapperJinjaMatchConfiguration; + } +} + +function formatDate(date: Date, timezone?: "UTC") { + const day = date.toLocaleDateString("en-US", {day: "numeric", timeZone: timezone}); + const month = date.toLocaleDateString("en-US", {month: "short", timeZone: timezone}); + const year = date.toLocaleDateString("en-US", {year: "numeric", timeZone: timezone}); + return `${day} ${month} ${year}`; +} diff --git a/src/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.ts b/src/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.ts index 50916079..32b5642b 100644 --- a/src/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.ts +++ b/src/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.ts @@ -147,4 +147,41 @@ export class ChatModelFunctionsDocumentationGenerator { .join("\n\n"); } /* eslint-enable @stylistic/max-len */ + + /* eslint-disable @stylistic/max-len */ + /** + * Example: + * ``` + * {"name": "getDate", "description": "Retrieve the current date"} + * + * {"name": "getTime", "description": "Retrieve the current time", "parameters": {"type": "object", "properties": {"hours": {"enum": ["24", "12"]}, "seconds": {"type": "boolean"}}}} + * ``` + * @param options + * @param [options.documentParams] - Whether to document the parameters of the functions + */ + public getLlama3_2LightweightFunctionSignatures({documentParams = true}: {documentParams?: boolean} = {}) { + const chatModelFunctions = this.chatModelFunctions; + + if (!this.hasAnyFunctions || chatModelFunctions == null) + return ""; + + const functionNames = Object.keys(chatModelFunctions); + + const functionsLines = functionNames + .map((functionName) => { + const functionDefinition = chatModelFunctions[functionName]; + + const addDescription = functionDefinition?.description != null && functionDefinition.description.trim() !== ""; + + return jsonDumps({ + name: functionName, + ...(addDescription ? {description: functionDefinition.description} : {}), + ...(documentParams && functionDefinition?.params != null ? {parameters: functionDefinition.params} : {}) + }); + }) + .join("\n\n"); + + return functionsLines; + } + /* eslint-enable @stylistic/max-len */ } diff --git a/src/chatWrappers/utils/getModelLinageNames.ts b/src/chatWrappers/utils/getModelLinageNames.ts new file mode 100644 index 00000000..683bb844 --- /dev/null +++ b/src/chatWrappers/utils/getModelLinageNames.ts @@ -0,0 +1,23 @@ +import {GgufMetadata} from "../../gguf/types/GgufMetadataTypes.js"; + +export function getModelLinageNames(ggufMetadata?: GgufMetadata) { + const res: string[][] = []; + + if (ggufMetadata == null) + return res; + + const currentModelInfo = [ggufMetadata?.general?.name, ggufMetadata?.general?.basename] + .filter((v): v is string => v != null); + if (currentModelInfo.length > 0) + res.push(currentModelInfo); + + if (typeof ggufMetadata?.general?.base_model?.count === "number") { + for (let i = 0; i < ggufMetadata.general.base_model.count; i++) { + const baseModel = ggufMetadata.general.base_model[String(i) as `${bigint}`]; + if (baseModel?.name != null) + res.push([baseModel.name]); + } + } + + return res; +} diff --git a/src/chatWrappers/utils/isLlama3_2LightweightModel.ts b/src/chatWrappers/utils/isLlama3_2LightweightModel.ts new file mode 100644 index 00000000..bf71e1e3 --- /dev/null +++ b/src/chatWrappers/utils/isLlama3_2LightweightModel.ts @@ -0,0 +1,11 @@ +import {ChatWrapperCheckModelCompatibilityParams} from "../../types.js"; +import {includesText} from "../../utils/includesText.js"; +import {getModelLinageNames} from "./getModelLinageNames.js"; + +export function isLlama3_2LightweightModel(options: ChatWrapperCheckModelCompatibilityParams) { + const isLlama3_2 = getModelLinageNames(options.fileInfo?.metadata) + .some((modelNames) => includesText(modelNames, ["llama 3.2", "llama-3.2", "llama3.2"])); + const isSmallModel = (["1B", "3B"] as string[]).includes(options.fileInfo?.metadata?.general?.size_label ?? ""); + + return isLlama3_2 && isSmallModel; +} diff --git a/src/chatWrappers/utils/resolveChatWrapper.ts b/src/chatWrappers/utils/resolveChatWrapper.ts index 001bb0a5..8be8dc27 100644 --- a/src/chatWrappers/utils/resolveChatWrapper.ts +++ b/src/chatWrappers/utils/resolveChatWrapper.ts @@ -11,14 +11,17 @@ import {JinjaTemplateChatWrapper, JinjaTemplateChatWrapperOptions} from "../gene import {TemplateChatWrapper} from "../generic/TemplateChatWrapper.js"; import {getConsoleLogPrefix} from "../../utils/getConsoleLogPrefix.js"; import {Llama3_1ChatWrapper} from "../Llama3_1ChatWrapper.js"; +import {Llama3_2LightweightChatWrapper} from "../Llama3_2LightweightChatWrapper.js"; import {MistralChatWrapper} from "../MistralChatWrapper.js"; import {Tokenizer} from "../../types.js"; +import {includesText} from "../../utils/includesText.js"; import {isJinjaTemplateEquivalentToSpecializedChatWrapper} from "./isJinjaTemplateEquivalentToSpecializedChatWrapper.js"; +import {getModelLinageNames} from "./getModelLinageNames.js"; import type {GgufFileInfo} from "../../gguf/types/GgufFileInfoTypes.js"; export const specializedChatWrapperTypeNames = Object.freeze([ - "general", "llama3.1", "llama3", "llama2Chat", "mistral", "alpacaChat", "functionary", "chatML", "falconChat", "gemma" + "general", "llama3.2-lightweight", "llama3.1", "llama3", "llama2Chat", "mistral", "alpacaChat", "functionary", "chatML", "falconChat", "gemma" ] as const); export type SpecializedChatWrapperTypeName = (typeof specializedChatWrapperTypeNames)[number]; @@ -37,6 +40,7 @@ export type ResolvableChatWrapperTypeName = (typeof resolvableChatWrapperTypeNam export const chatWrappers = Object.freeze({ "general": GeneralChatWrapper, "llama3.1": Llama3_1ChatWrapper, + "llama3.2-lightweight": Llama3_2LightweightChatWrapper, "llama3": Llama3ChatWrapper, "llama2Chat": Llama2ChatWrapper, "mistral": MistralChatWrapper, @@ -145,28 +149,6 @@ export function resolveChatWrapper(options: ResolveChatWrapperOptions): BuiltInC }); } - function getModelLinageNames(): string[][] { - const res: string[][] = []; - - if (fileInfo == null) - return res; - - const currentModelInfo = [fileInfo.metadata?.general?.name, fileInfo.metadata?.general?.basename] - .filter((v): v is string => v != null); - if (currentModelInfo.length > 0) - res.push(currentModelInfo); - - if (typeof fileInfo.metadata?.general?.base_model?.count === "number") { - for (let i = 0; i < fileInfo.metadata.general.base_model.count; i++) { - const baseModel = fileInfo.metadata.general.base_model[String(i) as `${bigint}`]; - if (baseModel?.name != null) - res.push([baseModel.name]); - } - } - - return res; - } - if (type !== "auto" && type != null) { if (isTemplateChatWrapperType(type)) { const Wrapper = chatWrappers[type]; @@ -293,8 +275,10 @@ export function resolveChatWrapper(options: ResolveChatWrapperOptions): BuiltInC } } - for (const modelNames of getModelLinageNames()) { - if (includesText(modelNames, ["llama 3.1", "llama-3.1", "llama3.1"]) && Llama3_1ChatWrapper._checkModelCompatibility({tokenizer, fileInfo})) + for (const modelNames of getModelLinageNames(fileInfo?.metadata)) { + if (includesText(modelNames, ["llama 3.2", "llama-3.2", "llama3.2"]) && Llama3_2LightweightChatWrapper._checkModelCompatibility({tokenizer, fileInfo})) + return createSpecializedChatWrapper(Llama3_2LightweightChatWrapper); + else if (includesText(modelNames, ["llama 3.1", "llama-3.1", "llama3.1"]) && Llama3_1ChatWrapper._checkModelCompatibility({tokenizer, fileInfo})) return createSpecializedChatWrapper(Llama3_1ChatWrapper); else if (includesText(modelNames, ["llama 3", "llama-3", "llama3"])) return createSpecializedChatWrapper(Llama3ChatWrapper); @@ -393,25 +377,6 @@ export function isTemplateChatWrapperType(type: string): type is TemplateChatWra return templateChatWrapperTypeNames.includes(type as any); } -function includesText( - value: string | string[] | null | undefined, - textToCheckFor: string | string[], - strictCase: boolean = false -): boolean { - if (value instanceof Array) - return value.some((v) => includesText(v, textToCheckFor, strictCase)); - else if (typeof value !== "string") - return false; - - if (textToCheckFor instanceof Array) - return textToCheckFor.some((t) => includesText(value, t, strictCase)); - - if (strictCase) - return value.includes(textToCheckFor); - - return value.toLowerCase().includes(textToCheckFor.toLowerCase()); -} - // this is needed because TypeScript guards don't work automatically with class references function isClassReference(value: any, classReference: T): value is T { return value === classReference; diff --git a/src/cli/commands/ChatCommand.ts b/src/cli/commands/ChatCommand.ts index 95496021..c80a4760 100644 --- a/src/cli/commands/ChatCommand.ts +++ b/src/cli/commands/ChatCommand.ts @@ -131,7 +131,7 @@ export const ChatCommand: CommandModule = { type: "string", default: "auto" as ChatCommand["wrapper"], choices: ["auto", ...specializedChatWrapperTypeNames] as const, - description: "Chat wrapper to use. Use `auto` to automatically select a wrapper based on the model's BOS token" + description: "Chat wrapper to use. Use `auto` to automatically select a wrapper based on the model's metadata and tokenizer" }) .option("noJinja", { type: "boolean", diff --git a/src/cli/commands/InitCommand.ts b/src/cli/commands/InitCommand.ts index c73ea1ad..fbe0601c 100644 --- a/src/cli/commands/InitCommand.ts +++ b/src/cli/commands/InitCommand.ts @@ -25,6 +25,7 @@ import {resolveModelDestination} from "../../utils/resolveModelDestination.js"; type InitCommand = { name?: string, template?: string, + model?: string, gpu?: BuildGpu | "auto" }; @@ -45,6 +46,10 @@ export const InitCommand: CommandModule = { choices: projectTemplates.map((template) => template.name), description: "Template to use. If omitted, you will be prompted to select one" }) + .option("model", { + type: "string", + description: "Model URI to use. If omitted, you will be prompted to select one interactively" + }) .option("gpu", { type: "string", @@ -73,7 +78,7 @@ export const CreateCliCommand: CommandModule = { handler: InitCommandHandler }; -export async function InitCommandHandler({name, template, gpu}: InitCommand) { +export async function InitCommandHandler({name, template, model, gpu}: InitCommand) { const currentDirectory = path.resolve(process.cwd()); const projectName = (name != null && validateNpmPackageName(name ?? "").validForNewPackages) ? name @@ -84,20 +89,36 @@ export async function InitCommandHandler({name, template, gpu}: InitCommand) { : undefined ) ?? await askForTemplate(); - const llama = gpu == null - ? await getLlama("lastBuild", { - logLevel: LlamaLogLevel.error - }) - : await getLlama({ - gpu, - logLevel: LlamaLogLevel.error + async function resolveModelUri() { + if (model != null && model !== "") { + try { + const resolvedModelDestination = resolveModelDestination(model, true); + if (resolvedModelDestination.type === "uri") + return resolvedModelDestination.uri; + else if (resolvedModelDestination.type === "url") + return resolvedModelDestination.url; + } catch (err) { + // do nothing + } + } + + const llama = gpu == null + ? await getLlama("lastBuild", { + logLevel: LlamaLogLevel.error + }) + : await getLlama({ + gpu, + logLevel: LlamaLogLevel.error + }); + + return await interactivelyAskForModel({ + llama, + allowLocalModels: false, + downloadIntent: false }); + } - const modelUri = await interactivelyAskForModel({ - llama, - allowLocalModels: false, - downloadIntent: false - }); + const modelUri = await resolveModelUri(); const targetDirectory = path.join(currentDirectory, projectName); const readableTargetDirectoryPath = getReadablePath(targetDirectory); diff --git a/src/cli/recommendedModels.ts b/src/cli/recommendedModels.ts index 67d9d038..64890baa 100644 --- a/src/cli/recommendedModels.ts +++ b/src/cli/recommendedModels.ts @@ -48,6 +48,18 @@ export const recommendedModels: ModelRecommendation[] = [{ "hf:mradermacher/Mistral-Nemo-Instruct-2407-GGUF/Mistral-Nemo-Instruct-2407.Q4_K_M.gguf", "hf:mradermacher/Mistral-Nemo-Instruct-2407-GGUF/Mistral-Nemo-Instruct-2407.Q4_K_S.gguf" ] +}, { + name: "Llama 3.2 3B", + abilities: ["chat", "complete", "functionCalling"], + description: "Llama 3.2 3B model was created by Meta and is optimized for an assistant-like chat use cases, with support for function calling.\n" + + "This model is smarter than the 1B model, but is still relatively small and can run on less capable machines.", + + fileOptions: [ + "hf:mradermacher/Llama-3.2-3B-Instruct-GGUF/Llama-3.2-3B-Instruct.Q8_0.gguf", + "hf:mradermacher/Llama-3.2-3B-Instruct-GGUF/Llama-3.2-3B-Instruct.Q6_K.gguf", + "hf:mradermacher/Llama-3.2-3B-Instruct-GGUF/Llama-3.2-3B-Instruct.Q4_K_M.gguf", + "hf:mradermacher/Llama-3.2-3B-Instruct-GGUF/Llama-3.2-3B-Instruct.Q4_K_S.gguf" + ] }, { name: "Phi 3 3.8B", abilities: ["chat", "complete", "functionCalling"], diff --git a/src/evaluator/LlamaChat/LlamaChat.ts b/src/evaluator/LlamaChat/LlamaChat.ts index e1a8355d..9f71b2e3 100644 --- a/src/evaluator/LlamaChat/LlamaChat.ts +++ b/src/evaluator/LlamaChat/LlamaChat.ts @@ -446,6 +446,7 @@ export class LlamaChat { const loadContextWindow = async (avoidReloadingHistory: boolean = false) => { await generateResponseState.loadContextWindow( generateResponseState.getResolvedHistoryWithCurrentModelResponse(), + generateResponseState.getContextWindowsHistoryWithCurrentModelResponse(), false, avoidReloadingHistory ); @@ -611,10 +612,17 @@ export class LlamaChat { return await withLock(this._chatLock, "evaluate", signal, async (): Promise => { try { generateResponseState.ensureLastHistoryItemIsUser(); - const lastResolvedHistoryItem = generateResponseState.resolvedHistory[generateResponseState.resolvedHistory.length - 1]; - const initialUserMessage = lastResolvedHistoryItem?.type === "user" - ? lastResolvedHistoryItem.text - : ""; + const getInitialUserMessage = (history: ChatHistoryItem[]) => { + const lastResolvedHistoryItem = history[history.length - 1]; + + if (lastResolvedHistoryItem?.type === "user") + return lastResolvedHistoryItem.text; + + return ""; + }; + + const initialUserMessage = getInitialUserMessage(generateResponseState.resolvedHistory); + const contextWindowInitialUserMessage = getInitialUserMessage(generateResponseState.lastContextWindowHistory); while (true) { generateResponseState.startTokenLoop(); @@ -623,6 +631,11 @@ export class LlamaChat { generateResponseState.resolvedHistory, initialUserMessage + initialUserPrompt + this.model.detokenize(generateResponseState.res) ), + setLastUserTextInChatHistory( + generateResponseState.lastContextWindowHistory, + contextWindowInitialUserMessage + initialUserPrompt + + this.model.detokenize(generateResponseState.contextWindowsRes) + ), true ); generateResponseState.functionEvaluationMode = false; @@ -1240,7 +1253,6 @@ class GenerateResponseState["maxParallelFunctionCalls"]; private readonly contextShift: LLamaChatGenerateResponseOptions["contextShift"]; private readonly customStopTriggers: LLamaChatGenerateResponseOptions["customStopTriggers"]; - private readonly lastEvaluationContextWindowHistory: Exclude["lastEvaluationContextWindow"], undefined>["history"]; private readonly minimumOverlapPercentageToPreventContextShift: Exclude["lastEvaluationContextWindow"], undefined>["minimumOverlapPercentageToPreventContextShift"], undefined>; public readonly functionsEnabled: boolean; @@ -1369,7 +1381,6 @@ class GenerateResponseState 0); @@ -1404,7 +1415,7 @@ class GenerateResponseState 0 && this.ignoreStartTextDetector.hasTriggeredStops && (forceRemove || !this.ignoreStartTextDetector.hasInProgressStops) @@ -1598,6 +1627,7 @@ class GenerateResponseState { @@ -1622,7 +1652,7 @@ class GenerateResponseState= this.llamaChat.sequence.context.contextSize - 1) - return await this.loadContextWindow(resolvedHistory, endWithUserText, false); + return await this.loadContextWindow(resolvedHistory, resolvedContextWindowsHistory, endWithUserText, false); return { userTextSuffix: this.userTextSuffix diff --git a/src/evaluator/LlamaChatSession/LlamaChatSession.ts b/src/evaluator/LlamaChatSession/LlamaChatSession.ts index e49ea9cf..f66e9cc0 100644 --- a/src/evaluator/LlamaChatSession/LlamaChatSession.ts +++ b/src/evaluator/LlamaChatSession/LlamaChatSession.ts @@ -470,7 +470,7 @@ export class LlamaChatSession { throw new DisposedError(); const supportsParallelFunctionCalling = this._chat.chatWrapper.settings.functions.parallelism != null; - const abortController = wrapAbortSignal(signal); + const [abortController, disposeAbortController] = wrapAbortSignal(signal); let lastEvaluation = this._lastEvaluation; let newChatHistory = appendUserMessageToChatHistory(this._chatHistory, prompt); let newContextWindowChatHistory = lastEvaluation?.contextWindow == null @@ -501,179 +501,185 @@ export class LlamaChatSession { safeEventCallback(onTextChunk)?.(resolvedResponsePrefix); } - while (true) { - const functionCallsAndResults: Array, - functionDefinition: ChatSessionModelFunction, - functionCallResult: any - }>> = []; - let canThrowFunctionCallingErrors = false; - let abortedOnFunctionCallError = false; - - const initialOutputTokens = this._chat.sequence.tokenMeter.usedOutputTokens; - const { - lastEvaluation: currentLastEvaluation, - metadata - } = await this._chat.generateResponse(newChatHistory, { - functions, - documentFunctionParams, - maxParallelFunctionCalls, - grammar: grammar as undefined, // this is a workaround to allow passing both `functions` and `grammar` - onTextChunk: safeEventCallback(onTextChunk), - onToken: safeEventCallback(onToken), - signal: abortController.signal, - stopOnAbortSignal, - repeatPenalty, - minP, - topK, - topP, - seed, - tokenBias, - customStopTriggers, - maxTokens, - temperature, - trimWhitespaceSuffix, - contextShift: { - ...this._contextShift, - lastEvaluationMetadata: lastEvaluation?.contextShiftMetadata - }, - evaluationPriority, - lastEvaluationContextWindow: { - history: newContextWindowChatHistory, - minimumOverlapPercentageToPreventContextShift: 0.5 - }, - onFunctionCall: async (functionCall) => { - functionCallsAndResults.push( - (async () => { - try { - const functionDefinition = functions?.[functionCall.functionName]; - - if (functionDefinition == null) - throw new Error( - `The model tried to call function "${functionCall.functionName}" which is not defined` - ); - - const functionCallResult = await functionDefinition.handler(functionCall.params); - - return { - functionCall, - functionDefinition, - functionCallResult - }; - } catch (err) { - if (!abortController.signal.aborted) { - abortedOnFunctionCallError = true; - abortController.abort(err); + try { + while (true) { + const functionCallsAndResults: Array, + functionDefinition: ChatSessionModelFunction, + functionCallResult: any + }>> = []; + let canThrowFunctionCallingErrors = false; + let abortedOnFunctionCallError = false; + + const initialOutputTokens = this._chat.sequence.tokenMeter.usedOutputTokens; + const { + lastEvaluation: currentLastEvaluation, + metadata + } = await this._chat.generateResponse(newChatHistory, { + functions, + documentFunctionParams, + maxParallelFunctionCalls, + grammar: grammar as undefined, // this is a workaround to allow passing both `functions` and `grammar` + onTextChunk: safeEventCallback(onTextChunk), + onToken: safeEventCallback(onToken), + signal: abortController.signal, + stopOnAbortSignal, + repeatPenalty, + minP, + topK, + topP, + seed, + tokenBias, + customStopTriggers, + maxTokens, + temperature, + trimWhitespaceSuffix, + contextShift: { + ...this._contextShift, + lastEvaluationMetadata: lastEvaluation?.contextShiftMetadata + }, + evaluationPriority, + lastEvaluationContextWindow: { + history: newContextWindowChatHistory, + minimumOverlapPercentageToPreventContextShift: 0.5 + }, + onFunctionCall: async (functionCall) => { + functionCallsAndResults.push( + (async () => { + try { + const functionDefinition = functions?.[functionCall.functionName]; + + if (functionDefinition == null) + throw new Error( + `The model tried to call function "${functionCall.functionName}" which is not defined` + ); + + const functionCallResult = await functionDefinition.handler(functionCall.params); + + return { + functionCall, + functionDefinition, + functionCallResult + }; + } catch (err) { + if (!abortController.signal.aborted) { + abortedOnFunctionCallError = true; + abortController.abort(err); + } + + if (canThrowFunctionCallingErrors) + throw err; + + return null; } - - if (canThrowFunctionCallingErrors) - throw err; - - return null; + })() + ); + } + }); + this._ensureNotDisposed(); + if (abortController.signal.aborted && (abortedOnFunctionCallError || !stopOnAbortSignal)) + throw abortController.signal.reason; + + if (maxTokens != null) + maxTokens = Math.max(0, maxTokens - (this._chat.sequence.tokenMeter.usedOutputTokens - initialOutputTokens)); + + lastEvaluation = currentLastEvaluation; + newChatHistory = lastEvaluation.cleanHistory; + + if (functionCallsAndResults.length > 0) { + canThrowFunctionCallingErrors = true; + const functionCallResultsPromise = Promise.all(functionCallsAndResults); + const raceEventAbortController = new AbortController(); + await Promise.race([ + functionCallResultsPromise, + new Promise((accept, reject) => { + abortController.signal.addEventListener("abort", () => { + if (abortedOnFunctionCallError || !stopOnAbortSignal) + reject(abortController.signal.reason); + else + accept(); + }, {signal: raceEventAbortController.signal}); + + if (abortController.signal.aborted) { + if (abortedOnFunctionCallError || !stopOnAbortSignal) + reject(abortController.signal.reason); + else + accept(); } - })() - ); - } - }); - this._ensureNotDisposed(); - if (abortController.signal.aborted && (abortedOnFunctionCallError || !stopOnAbortSignal)) - throw abortController.signal.reason; + }) + ]); + raceEventAbortController.abort(); + this._ensureNotDisposed(); - if (maxTokens != null) - maxTokens = Math.max(0, maxTokens - (this._chat.sequence.tokenMeter.usedOutputTokens - initialOutputTokens)); - - lastEvaluation = currentLastEvaluation; - newChatHistory = lastEvaluation.cleanHistory; - - if (functionCallsAndResults.length > 0) { - canThrowFunctionCallingErrors = true; - const functionCallResultsPromise = Promise.all(functionCallsAndResults); - await Promise.race([ - functionCallResultsPromise, - new Promise((accept, reject) => { - abortController.signal.addEventListener("abort", () => { - if (abortedOnFunctionCallError || !stopOnAbortSignal) - reject(abortController.signal.reason); - else - accept(); - }); - - if (abortController.signal.aborted) { - if (abortedOnFunctionCallError || !stopOnAbortSignal) - reject(abortController.signal.reason); - else - accept(); + if (!abortController.signal.aborted) { + const functionCallResults = (await functionCallResultsPromise) + .filter((result): result is Exclude => result != null); + this._ensureNotDisposed(); + + if (abortController.signal.aborted) + throw abortController.signal.reason; + + newContextWindowChatHistory = lastEvaluation.contextWindow; + + let startNewChunk = supportsParallelFunctionCalling; + for (const {functionCall, functionDefinition, functionCallResult} of functionCallResults) { + newChatHistory = addFunctionCallToChatHistory({ + chatHistory: newChatHistory, + functionName: functionCall.functionName, + functionDescription: functionDefinition.description, + callParams: functionCall.params, + callResult: functionCallResult, + rawCall: functionCall.raw, + startsNewChunk: startNewChunk + }); + + newContextWindowChatHistory = addFunctionCallToChatHistory({ + chatHistory: newContextWindowChatHistory, + functionName: functionCall.functionName, + functionDescription: functionDefinition.description, + callParams: functionCall.params, + callResult: functionCallResult, + rawCall: functionCall.raw, + startsNewChunk: startNewChunk + }); + + startNewChunk = false; } - }) - ]); - this._ensureNotDisposed(); - if (!abortController.signal.aborted) { - const functionCallResults = (await functionCallResultsPromise) - .filter((result): result is Exclude => result != null); - this._ensureNotDisposed(); + lastEvaluation.cleanHistory = newChatHistory; + lastEvaluation.contextWindow = newContextWindowChatHistory; - if (abortController.signal.aborted) - throw abortController.signal.reason; - - newContextWindowChatHistory = lastEvaluation.contextWindow; - - let startNewChunk = supportsParallelFunctionCalling; - for (const {functionCall, functionDefinition, functionCallResult} of functionCallResults) { - newChatHistory = addFunctionCallToChatHistory({ - chatHistory: newChatHistory, - functionName: functionCall.functionName, - functionDescription: functionDefinition.description, - callParams: functionCall.params, - callResult: functionCallResult, - rawCall: functionCall.raw, - startsNewChunk: startNewChunk - }); - - newContextWindowChatHistory = addFunctionCallToChatHistory({ - chatHistory: newContextWindowChatHistory, - functionName: functionCall.functionName, - functionDescription: functionDefinition.description, - callParams: functionCall.params, - callResult: functionCallResult, - rawCall: functionCall.raw, - startsNewChunk: startNewChunk - }); - - startNewChunk = false; + continue; } - - lastEvaluation.cleanHistory = newChatHistory; - lastEvaluation.contextWindow = newContextWindowChatHistory; - - continue; } - } - this._lastEvaluation = lastEvaluation; - this._chatHistory = newChatHistory; - this._chatHistoryStateRef = {}; + this._lastEvaluation = lastEvaluation; + this._chatHistory = newChatHistory; + this._chatHistoryStateRef = {}; - const lastModelResponseItem = getLastModelResponseItem(newChatHistory); - const responseText = lastModelResponseItem.response - .filter((item): item is string => typeof item === "string") - .join(""); + const lastModelResponseItem = getLastModelResponseItem(newChatHistory); + const responseText = lastModelResponseItem.response + .filter((item): item is string => typeof item === "string") + .join(""); + + if (metadata.stopReason === "customStopTrigger") + return { + response: lastModelResponseItem.response, + responseText, + stopReason: metadata.stopReason, + customStopTrigger: metadata.customStopTrigger, + remainingGenerationAfterStop: metadata.remainingGenerationAfterStop + }; - if (metadata.stopReason === "customStopTrigger") return { response: lastModelResponseItem.response, responseText, stopReason: metadata.stopReason, - customStopTrigger: metadata.customStopTrigger, remainingGenerationAfterStop: metadata.remainingGenerationAfterStop }; - - return { - response: lastModelResponseItem.response, - responseText, - stopReason: metadata.stopReason, - remainingGenerationAfterStop: metadata.remainingGenerationAfterStop - }; + } + } finally { + disposeAbortController(); } }); } @@ -755,7 +761,7 @@ export class LlamaChatSession { throw new Error("The LlamaGrammar used by passed to this function was created with a different Llama instance than the one used by this sequence's model. Make sure you use the same Llama instance for both the model and the grammar."); } - const abortController = wrapAbortSignal(signal); + const [abortController, disposeAbortController] = wrapAbortSignal(signal); this._preloadAndCompleteAbortControllers.add(abortController); try { @@ -821,6 +827,7 @@ export class LlamaChatSession { }); } finally { this._preloadAndCompleteAbortControllers.delete(abortController); + disposeAbortController(); } } diff --git a/src/index.ts b/src/index.ts index 0ec8ab11..2f0f30ec 100644 --- a/src/index.ts +++ b/src/index.ts @@ -42,6 +42,7 @@ import {UnsupportedError} from "./utils/UnsupportedError.js"; import {InsufficientMemoryError} from "./utils/InsufficientMemoryError.js"; import {ChatWrapper} from "./ChatWrapper.js"; import {EmptyChatWrapper} from "./chatWrappers/EmptyChatWrapper.js"; +import {Llama3_2LightweightChatWrapper} from "./chatWrappers/Llama3_2LightweightChatWrapper.js"; import {Llama3_1ChatWrapper} from "./chatWrappers/Llama3_1ChatWrapper.js"; import {Llama3ChatWrapper} from "./chatWrappers/Llama3ChatWrapper.js"; import {Llama2ChatWrapper} from "./chatWrappers/Llama2ChatWrapper.js"; @@ -178,6 +179,7 @@ export { type ChatWrapperGeneratedContextState, type ChatWrapperGenerateInitialHistoryOptions, EmptyChatWrapper, + Llama3_2LightweightChatWrapper, Llama3_1ChatWrapper, Llama3ChatWrapper, Llama2ChatWrapper, diff --git a/src/utils/includesText.ts b/src/utils/includesText.ts new file mode 100644 index 00000000..5f8acceb --- /dev/null +++ b/src/utils/includesText.ts @@ -0,0 +1,18 @@ +export function includesText( + value: string | string[] | null | undefined, + textToCheckFor: string | string[], + strictCase: boolean = false +): boolean { + if (value instanceof Array) + return value.some((v) => includesText(v, textToCheckFor, strictCase)); + else if (typeof value !== "string") + return false; + + if (textToCheckFor instanceof Array) + return textToCheckFor.some((t) => includesText(value, t, strictCase)); + + if (strictCase) + return value.includes(textToCheckFor); + + return value.toLowerCase().includes(textToCheckFor.toLowerCase()); +} diff --git a/src/utils/wrapAbortSignal.ts b/src/utils/wrapAbortSignal.ts index cce2dac4..671a78df 100644 --- a/src/utils/wrapAbortSignal.ts +++ b/src/utils/wrapAbortSignal.ts @@ -1,10 +1,17 @@ -export function wrapAbortSignal(abortSignal?: AbortSignal) { +export function wrapAbortSignal(abortSignal?: AbortSignal): [controller: AbortController, dispose: (() => void)] { const controller = new AbortController(); + function onAbort() { + controller.abort(abortSignal!.reason); + } + + function dispose() { + if (abortSignal != null) + abortSignal.removeEventListener("abort", onAbort); + } + if (abortSignal != null) - abortSignal.addEventListener("abort", () => { - controller.abort(abortSignal.reason); - }); + abortSignal.addEventListener("abort", onAbort); - return controller; + return [controller, dispose]; } diff --git a/templates/README.md b/templates/README.md new file mode 100644 index 00000000..3796f65d --- /dev/null +++ b/templates/README.md @@ -0,0 +1,6 @@ +Use the following command to scaffold a project from a template: +```bash +npm create node-llama-cpp@latest +``` + +> Don't use the templates in this repository directly. They are built to be used with `npm create` command ([learn more](https://node-llama-cpp.withcat.ai/guide/)) diff --git a/templates/electron-typescript-react/electron-builder.ts b/templates/electron-typescript-react/electron-builder.ts index 1e0fcb1f..3b99d153 100644 --- a/templates/electron-typescript-react/electron-builder.ts +++ b/templates/electron-typescript-react/electron-builder.ts @@ -35,10 +35,10 @@ export default { "dist-electron", "!node_modules/node-llama-cpp/bins/**/*", "node_modules/node-llama-cpp/bins/${os}-${arch}*/**/*", - "!node_modules/@node-llama-cpp/*/bins/**/*", - "node_modules/@node-llama-cpp/${os}-${arch}*/bins/**/*", "!node_modules/node-llama-cpp/llama/localBuilds/**/*", - "node_modules/node-llama-cpp/llama/localBuilds/${os}-${arch}*/**/*" + "node_modules/node-llama-cpp/llama/localBuilds/${os}-${arch}*/**/*", + "!node_modules/@node-llama-cpp/*/bins/**/*", + "node_modules/@node-llama-cpp/${os}-${arch}*/bins/**/*" ], asarUnpack: [ "node_modules/node-llama-cpp/bins", diff --git a/test/modelDependent/llama3.2/completion.test.ts b/test/modelDependent/llama3.2/completion.test.ts new file mode 100644 index 00000000..5bb88834 --- /dev/null +++ b/test/modelDependent/llama3.2/completion.test.ts @@ -0,0 +1,25 @@ +import {describe, expect, test} from "vitest"; +import {LlamaChatSession, Llama3_2LightweightChatWrapper} from "../../../src/index.js"; +import {getModelFile} from "../../utils/modelFiles.js"; +import {getTestLlama} from "../../utils/getTestLlama.js"; + +describe("llama 3.2", () => { + describe("chatSession", () => { + test("resolved to the correct chat wrapper", {timeout: 1000 * 60 * 60 * 2}, async () => { + const modelPath = await getModelFile("Llama-3.2-3B-Instruct.Q4_K_M.gguf"); + const llama = await getTestLlama(); + + const model = await llama.loadModel({ + modelPath + }); + const context = await model.createContext({ + contextSize: 4096 + }); + const chatSession = new LlamaChatSession({ + contextSequence: context.getSequence() + }); + + expect(chatSession.chatWrapper).to.be.instanceof(Llama3_2LightweightChatWrapper); + }); + }); +}); diff --git a/test/utils/modelFiles.ts b/test/utils/modelFiles.ts index c4e2f9e7..94d54664 100644 --- a/test/utils/modelFiles.ts +++ b/test/utils/modelFiles.ts @@ -17,7 +17,8 @@ const supportedModels = { "Meta-Llama-3-8B-Instruct-Q4_K_M.gguf": "https://huggingface.co/bartowski/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf?download=true", "lora-Llama-3-Instruct-abliteration-LoRA-8B-f16.gguf": "https://huggingface.co/ngxson/test_gguf_lora_adapter/resolve/main/lora-Llama-3-Instruct-abliteration-LoRA-8B-f16.gguf?download=true", "Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf": "https://huggingface.co/mradermacher/Meta-Llama-3.1-8B-Instruct-GGUF/resolve/main/Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf?download=true", - "codegemma-2b-Q4_K_M.gguf": "https://huggingface.co/bartowski/codegemma-2b-GGUF/resolve/main/codegemma-2b-Q4_K_M.gguf?download=true" + "codegemma-2b-Q4_K_M.gguf": "https://huggingface.co/bartowski/codegemma-2b-GGUF/resolve/main/codegemma-2b-Q4_K_M.gguf?download=true", + "Llama-3.2-3B-Instruct.Q4_K_M.gguf": "https://huggingface.co/mradermacher/Llama-3.2-3B-Instruct-GGUF/resolve/main/Llama-3.2-3B-Instruct.Q4_K_M.gguf?download=true" } as const; export async function getModelFile(modelName: keyof typeof supportedModels) {