feat: Llama 3.2 3B function calling support (#386)

* feat: Llama 3.2 3B function calling support * feat: use `llama.cpp` backend registry for GPUs instead of custom implementations * feat(`getLlama`): `build: "try"` option * feat(`init` command): `--model` flag * fix: improve binary compatibility testing on Electron apps * fix: too many abort signal listeners * fix: log level of some lower level logs * fix: context window missing response during generation on specific extreme conditions * fix: adapt to breaking `llama.cpp` changes * fix: automatically resolve `compiler is out of heap space` CUDA build error * chore: update bug report template * docs: separate open source and proprietary sections in the awesome list * docs(troubleshooting): Electron build error on Windows * docs(Electron): GitHub Actions template for cross-compilation
withcatai · Nov 27, 2024 · 97abbca · 97abbca
1 parent 6405ee9
commit 97abbca
Show file tree

Hide file tree

Showing 43 changed files with 1,890 additions and 633 deletions.
diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml
@@ -27,8 +27,9 @@ body:
     attributes:
       label: Actual Behavior
       description: >-
-        A clear and concise description of what actually happened. Please wrap
-        any error messages or output in code tags, instead of images.
+        A clear and concise description of what actually happened.
+        Please wrap any error messages or output in code tags, instead of images.
+        Please attach logs if relevant.
     validations:
       required: true
   - type: textarea
@@ -60,6 +61,11 @@ body:
         | Node.js version          | x.y.zzz             |
         | Typescript version       | x.y.zzz             |
         | `node-llama-cpp` version | x.y.zzz             |
+        
+        `npx --yes node-llama-cpp inspect gpu` output:
+        ```
+        Result of running `npx --yes node-llama-cpp inspect gpu`
+        ```
     validations:
       required: true
   - type: textarea

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -65,9 +65,12 @@ jobs:
           - name: "Ubuntu"
             os: ubuntu-22.04
             artifact: "linux"
-          - name: "macOS"
+          - name: "macOS x64"
             os: macos-13
-            artifact: "mac"
+            artifact: "mac-x64"
+          - name: "macOS arm64"
+            os: macos-14
+            artifact: "mac-arm64"
 
     steps:
       - uses: actions/checkout@v4
@@ -87,11 +90,18 @@ jobs:
           name: llama.cpp
           path: llama
 
-      - name: Install dependencies on Windows
-        if: startsWith(matrix.config.os, 'windows')
+      - name: Install dependencies on Windows for x64
+        if: matrix.config.name == 'Windows for x64'
         run: |
           choco install ninja cmake
 
+      - name: Install dependencies on Windows
+        if: matrix.config.name == 'Windows for Arm'
+        run: |
+          choco install cmake.install --version=3.31.1
+          choco install cmake --version=3.31.1
+          choco install ninja
+
       - name: Install dependencies on Ubuntu
         if: matrix.config.name == 'Ubuntu'
         run: |
@@ -148,7 +158,7 @@ jobs:
       - name: Setup & Build
         id: build
         shell: bash
-        timeout-minutes: 200
+        timeout-minutes: 300
         env:
           ARTIFACT_NAME: ${{ matrix.config.artifact }}
         run: |
@@ -212,9 +222,10 @@ jobs:
             await buildBinary("x64", ["--gpu", "vulkan"]);
             await buildBinary("arm64", ["--gpu", "false"]);
             await buildBinary("armv7l", ["--gpu", "false"]);
-          } else if (process.env.ARTIFACT_NAME === "mac") {
-            await buildBinary("arm64", ["--gpu", "metal"]);
+          } else if (process.env.ARTIFACT_NAME === "mac-x64") {
             await buildBinary("x64", ["--gpu", "false"]);
+          } else if (process.env.ARTIFACT_NAME === "mac-arm64") {
+            await buildBinary("arm64", ["--gpu", "metal"]);
           }
           
           // move binaries to bins

diff --git a/.gitignore b/.gitignore
@@ -24,7 +24,10 @@ node_modules
 /llama/lastBuild.json
 /llama/gitRelease.bundle
 /llama/.temp
+/llama/.cache
+/llama/build
 /llama/.idea
+/llama/.vscode
 /llama/cmake-build-debug
 /llama/localBuilds
 /llama/Release

diff --git a/.vitepress/assets/ogTemplate.svg b/.vitepress/assets/ogTemplate.svg
diff --git a/.vitepress/config.ts b/.vitepress/config.ts
@@ -122,12 +122,22 @@ export default defineConfig({
 
             return items
                 .map((item) => {
-                    if (item.url === "" || item.url === "blog/") {
+                    if (item.url === "") {
+                        item.lastmod = undefined;
+                        item.changefreq = "daily";
+                        item.priority = 1;
+                    } else if (item.url === "blog/") {
                         item.lastmod = new Date(buildDate);
+                        item.changefreq = "daily";
+                        item.priority = 0.9;
                     } else if (item.url.startsWith("api/") || item.url.startsWith("cli/")) {
                         item = {
                             ...item,
-                            lastmod: new Date(buildDate)
+                            lastmod: new Date(buildDate),
+                            changefreq: "weekly",
+                            priority: item.url.startsWith("cli/")
+                                ? 0.7
+                                : 0.5
                         };
                     } else if (item.lastmod == null && item.url.startsWith("blog/")) {
                         const postDate = blogPostMap.get(item.url)?.frontmatter.date;
@@ -138,6 +148,13 @@ export default defineConfig({
                         }
                     } else if (item.lastmod == null) {
                         item.lastmod = new Date(buildDate);
+                        item.changefreq = "weekly";
+                        item.priority = 0.4;
+                    }
+
+                    if (item.url !== "blog/" && item.url.startsWith("blog/")) {
+                        item.priority = 0.8;
+                        item.changefreq = "hourly";
                     }
 
                     return item;

diff --git a/docs/guide/awesome.md b/docs/guide/awesome.md
@@ -1,10 +1,15 @@
 # Awesome `node-llama-cpp`
 Awesome projects that use `node-llama-cpp`.
 
----
-
+## Open Source
 * [CatAI](https://github.com/withcatai/catai) - a simplified AI assistant API for Node.js, with REST API support
 
+## Proprietary
+> List your project here!
+
+
+<br />
+
 ---
 
 > To have a project listed here, it should clearly state that it uses `node-llama-cpp`.
diff --git a/docs/guide/electron.md b/docs/guide/electron.md
@@ -34,10 +34,67 @@ You also need to call [`getLlama`](../api/functions/getLlama.md) with the CMake
 so that `node-llama-cpp` can find them.
 
 ## Cross Compilation
-Cross packaging from one platform to another is not supported, since binaries for other platforms are not downloaded to you machine when your run `npm install`.
+Cross packaging from one platform to another is not supported, since binaries for other platforms are not downloaded to your machine when you run `npm install`.
 
 Packaging an `arm64` app on an `x64` machine is supported, but packaging an `x64` app on an `arm64` machine is not.
 
+::: details GitHub Actions template for cross-compilation
+
+<span v-pre>
+
+```yml
+name: Build
+on: [push]
+
+jobs:
+  build-electron:
+    name: Build Electron app - ${{ matrix.config.name }}
+    runs-on: ${{ matrix.config.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        config:
+          - name: "Windows"
+            os: windows-2022
+          - name: "Ubuntu"
+            os: ubuntu-22.04
+          - name: "macOS"
+            os: macos-13
+
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-node@v4
+        with:
+          node-version: "20"
+
+      - name: Install dependencies on Ubuntu
+        if: matrix.config.name == 'Ubuntu'
+        run: |
+          sudo apt-get update
+          sudo apt-get install libarchive-tools rpm
+          sudo snap install snapcraft --classic
+
+      - name: Install modules
+        run: npm ci
+
+      - name: Build electron app
+        id: build
+        shell: bash
+        timeout-minutes: 480
+        run: npm run build
+
+      - name: Upload artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          include-hidden-files: true
+          name: "electron-app-${{ matrix.config.name }}"
+          path: "./release"
+```
+
+</span>
+
+:::
+
 ## Bundling
 When bundling your code for Electron using [Electron Vite](https://electron-vite.org) or Webpack,
 ensure that `node-llama-cpp` is not bundled, and is instead treated as an external module.

diff --git a/docs/guide/troubleshooting.md b/docs/guide/troubleshooting.md
@@ -156,3 +156,10 @@ please [open a new issue on GitHub](https://github.com/withcatai/node-llama-cpp/
 The common cause for this issue is when using the `Administrator` to run `npm install` and then trying to run the code with a different user.
 
 Ensure you're not using the `Administrator` user for `npm install` nor to run the code.
+
+## Getting an `EPERM: operation not permitted` Error on a Windows Machine When Building an Electron App
+`electron-builder` needs to create symlinks to perform the build process, which requires enabling Developer Mode on Windows.
+
+To do that, go to `Settings > Update & Security > For developers` and enable `Developer mode`.
+
+After that, delete the `.cache` folder under your user directory and try building the app again.
diff --git a/docs/public/robots.txt b/docs/public/robots.txt
@@ -1,2 +1,3 @@
 User-agent: *
+
 Sitemap: https://node-llama-cpp.withcat.ai/sitemap.xml
diff --git a/llama/CMakeLists.txt b/llama/CMakeLists.txt
@@ -28,6 +28,16 @@ if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "Ap
     add_compile_options(-Wno-c++17-extensions)
 endif()
 
+if(APPLE)
+    set(CMAKE_SKIP_BUILD_RPATH  FALSE)
+    set(CMAKE_INSTALL_RPATH_USE_LINK_PATH FALSE)
+    set(CMAKE_BUILD_RPATH "@loader_path")
+    set(CMAKE_INSTALL_RPATH "@loader_path")
+    set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE)
+else()
+    set(CMAKE_BUILD_RPATH_USE_ORIGIN ON)
+endif()
+
 include_directories(${NODE_ADDON_API_DIR} ${CMAKE_JS_INC})
 
 add_subdirectory("llama.cpp")
@@ -39,41 +49,6 @@ unset(GPU_INFO_HEADERS)
 unset(GPU_INFO_SOURCES)
 unset(GPU_INFO_EXTRA_LIBS)
 
-if (GGML_CUDA)
-    cmake_minimum_required(VERSION 3.17)
-
-    find_package(CUDAToolkit)
-    if (CUDAToolkit_FOUND)
-        message(STATUS "Using CUDA for GPU info")
-
-        enable_language(CUDA)
-
-        list(APPEND GPU_INFO_HEADERS gpuInfo/cuda-gpu-info.h)
-        list(APPEND GPU_INFO_SOURCES gpuInfo/cuda-gpu-info.cu)
-
-        add_compile_definitions(GPU_INFO_USE_CUDA)
-
-        if (GGML_STATIC)
-            list(APPEND GPU_INFO_EXTRA_LIBS CUDA::cudart_static)
-        else()
-            list(APPEND GPU_INFO_EXTRA_LIBS CUDA::cudart)
-        endif()
-
-        list(APPEND GPU_INFO_EXTRA_LIBS CUDA::cuda_driver)
-
-        if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
-            # copied from llama.cpp/CMakLists.txt under "if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES)"
-            if (GGML_CUDA_F16 OR GGML_CUDA_DMMV_F16)
-                set(CMAKE_CUDA_ARCHITECTURES "60;61;70;75")
-            else()
-                set(CMAKE_CUDA_ARCHITECTURES "52;61;70;75")
-            endif()
-        endif()
-    else()
-        message(FATAL_ERROR "CUDA was not found")
-    endif()
-endif()
-
 if (GGML_VULKAN OR GGML_KOMPUTE)
     find_package(Vulkan)
     if (Vulkan_FOUND)
@@ -94,67 +69,12 @@ if (GGML_VULKAN OR GGML_KOMPUTE)
     endif()
 endif()
 
-if (GGML_HIPBLAS)
-    list(APPEND CMAKE_PREFIX_PATH /opt/rocm)
-
-    if (NOT ${CMAKE_C_COMPILER_ID} MATCHES "Clang")
-        message(WARNING "Only LLVM is supported for HIP, hint: CC=/opt/rocm/llvm/bin/clang")
-    endif()
-    if (NOT ${CMAKE_CXX_COMPILER_ID} MATCHES "Clang")
-        message(WARNING "Only LLVM is supported for HIP, hint: CXX=/opt/rocm/llvm/bin/clang++")
-    endif()
-
-    find_package(hip)
-    find_package(hipblas)
-    find_package(rocblas)
-
-    if (${hipblas_FOUND} AND ${hip_FOUND})
-        message(STATUS "Using HIP and hipBLAS for GPU info")
-        add_compile_definitions(GPU_INFO_USE_HIPBLAS GPU_INFO_USE_CUDA)
-        add_library(gpu-info-rocm OBJECT gpuInfo/cuda-gpu-info.cu gpuInfo/cuda-gpu-info.h)
-        set_source_files_properties(gpuInfo/cuda-gpu-info.cu PROPERTIES LANGUAGE CXX)
-        target_link_libraries(gpu-info-rocm PRIVATE hip::device PUBLIC hip::host roc::rocblas roc::hipblas)
-
-        list(APPEND GPU_INFO_EXTRA_LIBS gpu-info-rocm)
-    else()
-        message(FATAL_ERROR "hipBLAS or HIP was not found. Try setting CMAKE_PREFIX_PATH=/opt/rocm")
-    endif()
-endif()
-
-if (GGML_METAL)
-    find_library(FOUNDATION_LIBRARY Foundation REQUIRED)
-    find_library(METAL_FRAMEWORK    Metal      REQUIRED)
-    find_library(METALKIT_FRAMEWORK MetalKit   REQUIRED)
-
-    message(STATUS "Using Metal for GPU info")
-    list(APPEND GPU_INFO_HEADERS gpuInfo/metal-gpu-info.h)
-    list(APPEND GPU_INFO_SOURCES gpuInfo/metal-gpu-info.mm)
-
-    add_compile_definitions(GPU_INFO_USE_METAL)
-
-    list(APPEND GPU_INFO_EXTRA_LIBS
-        ${FOUNDATION_LIBRARY}
-        ${METAL_FRAMEWORK}
-        ${METALKIT_FRAMEWORK}
-        )
-endif()
-
 list(REMOVE_DUPLICATES GPU_INFO_HEADERS)
 list(REMOVE_DUPLICATES GPU_INFO_SOURCES)
 list(REMOVE_DUPLICATES GPU_INFO_EXTRA_LIBS)
 
 file(GLOB SOURCE_FILES "addon/*.cpp" "addon/**/*.cpp" ${GPU_INFO_SOURCES})
 
-if(APPLE)
-    set(CMAKE_SKIP_BUILD_RPATH  FALSE)
-    set(CMAKE_INSTALL_RPATH_USE_LINK_PATH FALSE)
-    set(CMAKE_BUILD_RPATH "@loader_path")
-    set(CMAKE_INSTALL_RPATH "@loader_path")
-    set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE)
-else()
-    set(CMAKE_BUILD_RPATH_USE_ORIGIN ON)
-endif()
-
 add_library(${PROJECT_NAME} SHARED ${SOURCE_FILES} ${CMAKE_JS_SRC} ${GPU_INFO_HEADERS})
 set_target_properties(${PROJECT_NAME} PROPERTIES PREFIX "" SUFFIX ".node")
 target_link_libraries(${PROJECT_NAME} ${CMAKE_JS_LIB})
Original file line number	Diff line number	Diff line change
		@@ -1,2 +1,3 @@
		User-agent: *

		Sitemap: https://node-llama-cpp.withcat.ai/sitemap.xml