Skip to content

Commit

Permalink
feat: Llama 3.2 3B function calling support (#386)
Browse files Browse the repository at this point in the history
* feat: Llama 3.2 3B function calling support
* feat: use `llama.cpp` backend registry for GPUs instead of custom implementations
* feat(`getLlama`): `build: "try"` option
* feat(`init` command): `--model` flag
* fix: improve binary compatibility testing on Electron apps
* fix: too many abort signal listeners
* fix: log level of some lower level logs
* fix: context window missing response during generation on specific extreme conditions
* fix: adapt to breaking `llama.cpp` changes
* fix: automatically resolve `compiler is out of heap space` CUDA build error
* chore: update bug report template
* docs: separate open source and proprietary sections in the awesome list
* docs(troubleshooting): Electron build error on Windows
* docs(Electron): GitHub Actions template for cross-compilation
  • Loading branch information
giladgd authored Nov 27, 2024
1 parent 6405ee9 commit 97abbca
Show file tree
Hide file tree
Showing 43 changed files with 1,890 additions and 633 deletions.
10 changes: 8 additions & 2 deletions .github/ISSUE_TEMPLATE/bug-report.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,9 @@ body:
attributes:
label: Actual Behavior
description: >-
A clear and concise description of what actually happened. Please wrap
any error messages or output in code tags, instead of images.
A clear and concise description of what actually happened.
Please wrap any error messages or output in code tags, instead of images.
Please attach logs if relevant.
validations:
required: true
- type: textarea
Expand Down Expand Up @@ -60,6 +61,11 @@ body:
| Node.js version | x.y.zzz |
| Typescript version | x.y.zzz |
| `node-llama-cpp` version | x.y.zzz |
`npx --yes node-llama-cpp inspect gpu` output:
```
Result of running `npx --yes node-llama-cpp inspect gpu`
```
validations:
required: true
- type: textarea
Expand Down
25 changes: 18 additions & 7 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,12 @@ jobs:
- name: "Ubuntu"
os: ubuntu-22.04
artifact: "linux"
- name: "macOS"
- name: "macOS x64"
os: macos-13
artifact: "mac"
artifact: "mac-x64"
- name: "macOS arm64"
os: macos-14
artifact: "mac-arm64"

steps:
- uses: actions/checkout@v4
Expand All @@ -87,11 +90,18 @@ jobs:
name: llama.cpp
path: llama

- name: Install dependencies on Windows
if: startsWith(matrix.config.os, 'windows')
- name: Install dependencies on Windows for x64
if: matrix.config.name == 'Windows for x64'
run: |
choco install ninja cmake
- name: Install dependencies on Windows
if: matrix.config.name == 'Windows for Arm'
run: |
choco install cmake.install --version=3.31.1
choco install cmake --version=3.31.1
choco install ninja
- name: Install dependencies on Ubuntu
if: matrix.config.name == 'Ubuntu'
run: |
Expand Down Expand Up @@ -148,7 +158,7 @@ jobs:
- name: Setup & Build
id: build
shell: bash
timeout-minutes: 200
timeout-minutes: 300
env:
ARTIFACT_NAME: ${{ matrix.config.artifact }}
run: |
Expand Down Expand Up @@ -212,9 +222,10 @@ jobs:
await buildBinary("x64", ["--gpu", "vulkan"]);
await buildBinary("arm64", ["--gpu", "false"]);
await buildBinary("armv7l", ["--gpu", "false"]);
} else if (process.env.ARTIFACT_NAME === "mac") {
await buildBinary("arm64", ["--gpu", "metal"]);
} else if (process.env.ARTIFACT_NAME === "mac-x64") {
await buildBinary("x64", ["--gpu", "false"]);
} else if (process.env.ARTIFACT_NAME === "mac-arm64") {
await buildBinary("arm64", ["--gpu", "metal"]);
}
// move binaries to bins
Expand Down
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,10 @@ node_modules
/llama/lastBuild.json
/llama/gitRelease.bundle
/llama/.temp
/llama/.cache
/llama/build
/llama/.idea
/llama/.vscode
/llama/cmake-build-debug
/llama/localBuilds
/llama/Release
Expand Down
10 changes: 5 additions & 5 deletions .vitepress/assets/ogTemplate.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
21 changes: 19 additions & 2 deletions .vitepress/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -122,12 +122,22 @@ export default defineConfig({

return items
.map((item) => {
if (item.url === "" || item.url === "blog/") {
if (item.url === "") {
item.lastmod = undefined;
item.changefreq = "daily";
item.priority = 1;
} else if (item.url === "blog/") {
item.lastmod = new Date(buildDate);
item.changefreq = "daily";
item.priority = 0.9;
} else if (item.url.startsWith("api/") || item.url.startsWith("cli/")) {
item = {
...item,
lastmod: new Date(buildDate)
lastmod: new Date(buildDate),
changefreq: "weekly",
priority: item.url.startsWith("cli/")
? 0.7
: 0.5
};
} else if (item.lastmod == null && item.url.startsWith("blog/")) {
const postDate = blogPostMap.get(item.url)?.frontmatter.date;
Expand All @@ -138,6 +148,13 @@ export default defineConfig({
}
} else if (item.lastmod == null) {
item.lastmod = new Date(buildDate);
item.changefreq = "weekly";
item.priority = 0.4;
}

if (item.url !== "blog/" && item.url.startsWith("blog/")) {
item.priority = 0.8;
item.changefreq = "hourly";
}

return item;
Expand Down
9 changes: 7 additions & 2 deletions docs/guide/awesome.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
# Awesome `node-llama-cpp`
Awesome projects that use `node-llama-cpp`.

---

## Open Source
* [CatAI](https://github.com/withcatai/catai) - a simplified AI assistant API for Node.js, with REST API support

## Proprietary
> List your project here!

<br />

---

> To have a project listed here, it should clearly state that it uses `node-llama-cpp`.
59 changes: 58 additions & 1 deletion docs/guide/electron.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,67 @@ You also need to call [`getLlama`](../api/functions/getLlama.md) with the CMake
so that `node-llama-cpp` can find them.

## Cross Compilation
Cross packaging from one platform to another is not supported, since binaries for other platforms are not downloaded to you machine when your run `npm install`.
Cross packaging from one platform to another is not supported, since binaries for other platforms are not downloaded to your machine when you run `npm install`.

Packaging an `arm64` app on an `x64` machine is supported, but packaging an `x64` app on an `arm64` machine is not.

::: details GitHub Actions template for cross-compilation

<span v-pre>

```yml
name: Build
on: [push]

jobs:
build-electron:
name: Build Electron app - ${{ matrix.config.name }}
runs-on: ${{ matrix.config.os }}
strategy:
fail-fast: false
matrix:
config:
- name: "Windows"
os: windows-2022
- name: "Ubuntu"
os: ubuntu-22.04
- name: "macOS"
os: macos-13

steps:
- uses: actions/checkout@v4
- uses: actions/setup-node@v4
with:
node-version: "20"

- name: Install dependencies on Ubuntu
if: matrix.config.name == 'Ubuntu'
run: |
sudo apt-get update
sudo apt-get install libarchive-tools rpm
sudo snap install snapcraft --classic
- name: Install modules
run: npm ci

- name: Build electron app
id: build
shell: bash
timeout-minutes: 480
run: npm run build

- name: Upload artifacts
uses: actions/upload-artifact@v4
with:
include-hidden-files: true
name: "electron-app-${{ matrix.config.name }}"
path: "./release"
```
</span>
:::
## Bundling
When bundling your code for Electron using [Electron Vite](https://electron-vite.org) or Webpack,
ensure that `node-llama-cpp` is not bundled, and is instead treated as an external module.
Expand Down
7 changes: 7 additions & 0 deletions docs/guide/troubleshooting.md
Original file line number Diff line number Diff line change
Expand Up @@ -156,3 +156,10 @@ please [open a new issue on GitHub](https://github.com/withcatai/node-llama-cpp/
The common cause for this issue is when using the `Administrator` to run `npm install` and then trying to run the code with a different user.

Ensure you're not using the `Administrator` user for `npm install` nor to run the code.

## Getting an `EPERM: operation not permitted` Error on a Windows Machine When Building an Electron App
`electron-builder` needs to create symlinks to perform the build process, which requires enabling Developer Mode on Windows.

To do that, go to `Settings > Update & Security > For developers` and enable `Developer mode`.

After that, delete the `.cache` folder under your user directory and try building the app again.
1 change: 1 addition & 0 deletions docs/public/robots.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
User-agent: *

Sitemap: https://node-llama-cpp.withcat.ai/sitemap.xml
100 changes: 10 additions & 90 deletions llama/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,16 @@ if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "Ap
add_compile_options(-Wno-c++17-extensions)
endif()

if(APPLE)
set(CMAKE_SKIP_BUILD_RPATH FALSE)
set(CMAKE_INSTALL_RPATH_USE_LINK_PATH FALSE)
set(CMAKE_BUILD_RPATH "@loader_path")
set(CMAKE_INSTALL_RPATH "@loader_path")
set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE)
else()
set(CMAKE_BUILD_RPATH_USE_ORIGIN ON)
endif()

include_directories(${NODE_ADDON_API_DIR} ${CMAKE_JS_INC})

add_subdirectory("llama.cpp")
Expand All @@ -39,41 +49,6 @@ unset(GPU_INFO_HEADERS)
unset(GPU_INFO_SOURCES)
unset(GPU_INFO_EXTRA_LIBS)

if (GGML_CUDA)
cmake_minimum_required(VERSION 3.17)

find_package(CUDAToolkit)
if (CUDAToolkit_FOUND)
message(STATUS "Using CUDA for GPU info")

enable_language(CUDA)

list(APPEND GPU_INFO_HEADERS gpuInfo/cuda-gpu-info.h)
list(APPEND GPU_INFO_SOURCES gpuInfo/cuda-gpu-info.cu)

add_compile_definitions(GPU_INFO_USE_CUDA)

if (GGML_STATIC)
list(APPEND GPU_INFO_EXTRA_LIBS CUDA::cudart_static)
else()
list(APPEND GPU_INFO_EXTRA_LIBS CUDA::cudart)
endif()

list(APPEND GPU_INFO_EXTRA_LIBS CUDA::cuda_driver)

if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
# copied from llama.cpp/CMakLists.txt under "if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES)"
if (GGML_CUDA_F16 OR GGML_CUDA_DMMV_F16)
set(CMAKE_CUDA_ARCHITECTURES "60;61;70;75")
else()
set(CMAKE_CUDA_ARCHITECTURES "52;61;70;75")
endif()
endif()
else()
message(FATAL_ERROR "CUDA was not found")
endif()
endif()

if (GGML_VULKAN OR GGML_KOMPUTE)
find_package(Vulkan)
if (Vulkan_FOUND)
Expand All @@ -94,67 +69,12 @@ if (GGML_VULKAN OR GGML_KOMPUTE)
endif()
endif()

if (GGML_HIPBLAS)
list(APPEND CMAKE_PREFIX_PATH /opt/rocm)

if (NOT ${CMAKE_C_COMPILER_ID} MATCHES "Clang")
message(WARNING "Only LLVM is supported for HIP, hint: CC=/opt/rocm/llvm/bin/clang")
endif()
if (NOT ${CMAKE_CXX_COMPILER_ID} MATCHES "Clang")
message(WARNING "Only LLVM is supported for HIP, hint: CXX=/opt/rocm/llvm/bin/clang++")
endif()

find_package(hip)
find_package(hipblas)
find_package(rocblas)

if (${hipblas_FOUND} AND ${hip_FOUND})
message(STATUS "Using HIP and hipBLAS for GPU info")
add_compile_definitions(GPU_INFO_USE_HIPBLAS GPU_INFO_USE_CUDA)
add_library(gpu-info-rocm OBJECT gpuInfo/cuda-gpu-info.cu gpuInfo/cuda-gpu-info.h)
set_source_files_properties(gpuInfo/cuda-gpu-info.cu PROPERTIES LANGUAGE CXX)
target_link_libraries(gpu-info-rocm PRIVATE hip::device PUBLIC hip::host roc::rocblas roc::hipblas)

list(APPEND GPU_INFO_EXTRA_LIBS gpu-info-rocm)
else()
message(FATAL_ERROR "hipBLAS or HIP was not found. Try setting CMAKE_PREFIX_PATH=/opt/rocm")
endif()
endif()

if (GGML_METAL)
find_library(FOUNDATION_LIBRARY Foundation REQUIRED)
find_library(METAL_FRAMEWORK Metal REQUIRED)
find_library(METALKIT_FRAMEWORK MetalKit REQUIRED)

message(STATUS "Using Metal for GPU info")
list(APPEND GPU_INFO_HEADERS gpuInfo/metal-gpu-info.h)
list(APPEND GPU_INFO_SOURCES gpuInfo/metal-gpu-info.mm)

add_compile_definitions(GPU_INFO_USE_METAL)

list(APPEND GPU_INFO_EXTRA_LIBS
${FOUNDATION_LIBRARY}
${METAL_FRAMEWORK}
${METALKIT_FRAMEWORK}
)
endif()

list(REMOVE_DUPLICATES GPU_INFO_HEADERS)
list(REMOVE_DUPLICATES GPU_INFO_SOURCES)
list(REMOVE_DUPLICATES GPU_INFO_EXTRA_LIBS)

file(GLOB SOURCE_FILES "addon/*.cpp" "addon/**/*.cpp" ${GPU_INFO_SOURCES})

if(APPLE)
set(CMAKE_SKIP_BUILD_RPATH FALSE)
set(CMAKE_INSTALL_RPATH_USE_LINK_PATH FALSE)
set(CMAKE_BUILD_RPATH "@loader_path")
set(CMAKE_INSTALL_RPATH "@loader_path")
set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE)
else()
set(CMAKE_BUILD_RPATH_USE_ORIGIN ON)
endif()

add_library(${PROJECT_NAME} SHARED ${SOURCE_FILES} ${CMAKE_JS_SRC} ${GPU_INFO_HEADERS})
set_target_properties(${PROJECT_NAME} PROPERTIES PREFIX "" SUFFIX ".node")
target_link_libraries(${PROJECT_NAME} ${CMAKE_JS_LIB})
Expand Down
Loading

0 comments on commit 97abbca

Please sign in to comment.