Skip to content

Commit

Permalink
perf: improve vectorization optimizations and add function multi-vers…
Browse files Browse the repository at this point in the history
…ioning (#1271)

Signed-off-by: k4yt3x <[email protected]>
  • Loading branch information
k4yt3x authored Dec 22, 2024
1 parent f68939c commit e1e8ed8
Show file tree
Hide file tree
Showing 11 changed files with 116 additions and 52 deletions.
9 changes: 5 additions & 4 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,12 @@ jobs:
- name: Build Video2X
run: |
mkdir -p /tmp/build /tmp/install
cmake -B /tmp/build -S . -DUSE_SYSTEM_NCNN=OFF -DUSE_SYSTEM_SPDLOG=OFF \
cmake -B /tmp/build -S . -DCMAKE_BUILD_TYPE=Debug \
-DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ \
-DCMAKE_BUILD_TYPE=Debug -DCMAKE_INSTALL_PREFIX=/tmp/install \
-DVIDEO2X_USE_EXTERNAL_NCNN=OFF -DVIDEO2X_USE_EXTERNAL_SPDLOG=OFF \
-DINSTALL_BIN_DESTINATION=. -DINSTALL_INCLUDE_DESTINATION=include \
-DINSTALL_LIB_DESTINATION=. -DINSTALL_MODEL_DESTINATION=.
-DINSTALL_LIB_DESTINATION=. -DINSTALL_MODEL_DESTINATION=. \
-DCMAKE_INSTALL_PREFIX=/tmp/install
cmake --build /tmp/build --config Debug --target install
- name: Upload artifacts
Expand Down Expand Up @@ -97,7 +98,7 @@ jobs:
shell: pwsh
run: |
cmake -S . -B build `
-DUSE_SYSTEM_NCNN=OFF -DUSE_SYSTEM_SPDLOG=OFF -DUSE_SYSTEM_BOOST=OFF `
-DVIDEO2X_USE_EXTERNAL_NCNN=OFF -DVIDEO2X_USE_EXTERNAL_SPDLOG=OFF -DVIDEO2X_USE_EXTERNAL_BOOST=OFF `
-DCMAKE_BUILD_TYPE=Debug -DCMAKE_INSTALL_PREFIX=build/video2x_install
cmake --build build --config Debug --parallel --target install
Expand Down
9 changes: 5 additions & 4 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,10 @@ jobs:
- name: Build Video2X
run: |
cmake -B build -S . -DUSE_SYSTEM_NCNN=OFF -DUSE_SYSTEM_SPDLOG=OFF -DSPDLOG_NO_EXCEPTIONS=ON \
-DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ \
-DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=build/video2x-linux-ubuntu-2404-amd64/usr
cmake -B build -S . -DCMAKE_BUILD_TYPE=Release \
-DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ \
-DVIDEO2X_USE_EXTERNAL_NCNN=OFF -DVIDEO2X_USE_EXTERNAL_SPDLOG=OFF \
-DCMAKE_INSTALL_PREFIX=build/video2x-linux-ubuntu-2404-amd64/usr
cmake --build build --config Release --target install --parallel
mkdir -p build/video2x-linux-ubuntu-2404-amd64/DEBIAN
cp packaging/debian/control build/video2x-linux-ubuntu-2404-amd64/DEBIAN/control
Expand Down Expand Up @@ -107,7 +108,7 @@ jobs:
shell: pwsh
run: |
cmake -S . -B build `
-DUSE_SYSTEM_NCNN=OFF -DUSE_SYSTEM_SPDLOG=OFF -DUSE_SYSTEM_BOOST=OFF `
-DVIDEO2X_USE_EXTERNAL_NCNN=OFF -DVIDEO2X_USE_EXTERNAL_SPDLOG=OFF -DVIDEO2X_USE_EXTERNAL_BOOST=OFF `
-DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=build/video2x_install
cmake --build build --config Release --parallel --target install
Expand Down
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,16 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased]

### Added

- Multi-versioning to critical functions to enhance performance in generic architecture builds.

### Changed

- Improve the CMake optimization flags and option names.

## [6.3.1] - 2024-12-21

### Fixed
Expand Down
95 changes: 69 additions & 26 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,20 @@ if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release)
endif()

# Build options
option(BUILD_SHARED_LIBS "Build libvideo2x as a shared library" ON)
option(VIDEO2X_BUILD_CLI "Build the video2x command line interface executable" ON)

option(VIDEO2X_ENABLE_NATIVE "Enable native optimizations (-march=native)" OFF)
option(VIDEO2X_ENABLE_X86_64_V4 "Enable x86-64-v4 optimizations (-march=x86-64-v4)" OFF)
option(VIDEO2X_ENABLE_AVX512F "Enable AVX-512 foundation optimizations (-march=avx512f)" OFF)
option(VIDEO2X_ENABLE_X86_64_V3 "Enable x86-64-v3 optimizations (-march=x86-64-v3)" OFF)
option(VIDEO2X_ENABLE_AVX2 "Enable AVX2 optimizations (-march=avx2)" OFF)

option(VIDEO2X_USE_EXTERNAL_NCNN "Use the system-provided ncnn library" ON)
option(VIDEO2X_USE_EXTERNAL_SPDLOG "Use the system-provided spdlog library" ON)
option(VIDEO2X_USE_EXTERNAL_BOOST "Use the system-provided Boost library" ON)

# Set global compile options for all targets
if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
add_compile_options(/W4 /permissive-)
Expand All @@ -25,30 +39,50 @@ endif()

# Set the default optimization flags for Release builds
if(CMAKE_BUILD_TYPE STREQUAL "Release")
# Set the optimization flags for each compiler
if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
add_compile_options(/Ox /Ot /GL /DNDEBUG)
add_link_options(/LTCG /OPT:REF /OPT:ICF)
elseif(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
add_compile_options(-O3 -march=native -ffunction-sections -fdata-sections)
add_compile_options(-O3 -ffunction-sections -fdata-sections)
add_link_options(-Wl,-s -flto -Wl,--gc-sections)
endif()
endif()

# Build options
option(BUILD_SHARED_LIBS "Build libvideo2x as a shared library" ON)
option(BUILD_VIDEO2X_CLI "Build the video2x executable" ON)
option(USE_SYSTEM_NCNN "Use system ncnn library" ON)
option(USE_SYSTEM_SPDLOG "Use system spdlog library" ON)
option(USE_SYSTEM_BOOST "Use system Boost library" ON)

# Generate the version header file
configure_file(
"${CMAKE_CURRENT_SOURCE_DIR}/include/libvideo2x/version.h.in"
"${CMAKE_CURRENT_BINARY_DIR}/libvideo2x/version.h"
@ONLY
)
# Enable the requested architecture-specific optimizations
if(VIDEO2X_ENABLE_NATIVE)
if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
add_compile_options(/arch:NATIVE)
elseif(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
add_compile_options(-march=native)
endif()
elseif(VIDEO2X_ENABLE_X86_64_V4)
if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
add_compile_options(/arch:AVX2)
elseif(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
add_compile_options(-march=x86-64-v4)
endif()
elseif(VIDEO2X_ENABLE_AVX512F)
if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
add_compile_options(/arch:AVX512)
elseif(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
add_compile_options(-mavx512f)
endif()
elseif(VIDEO2X_ENABLE_X86_64_V3)
if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
add_compile_options(/arch:AVX2)
elseif(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
add_compile_options(-march=x86-64-v3)
endif()
elseif(VIDEO2X_ENABLE_AVX2)
if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
add_compile_options(/arch:AVX2)
elseif(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
add_compile_options(-mavx2)
endif()
endif()

# Find the required packages
# Define lists to store include directories and libraries
set(LIBVIDEO2X_INCLUDE_DIRS)
set(LIBVIDEO2X_LIBS)
set(VIDEO2X_INCLUDE_DIRS)
Expand Down Expand Up @@ -109,7 +143,7 @@ else()
endif() # WIN32

# Find ncnn package
if(USE_SYSTEM_NCNN)
if(VIDEO2X_USE_EXTERNAL_NCNN)
find_package(ncnn REQUIRED)
else()
option(NCNN_INSTALL_SDK "" OFF)
Expand Down Expand Up @@ -208,26 +242,28 @@ else()
endif()

# spdlog
if(USE_SYSTEM_SPDLOG)
if(VIDEO2X_USE_EXTERNAL_SPDLOG)
find_package(spdlog REQUIRED)
list(APPEND LIBVIDEO2X_INCLUDE_DIRS ${spdlog_INCLUDE_DIRS})
list(APPEND VIDEO2X_INCLUDE_DIRS ${spdlog_INCLUDE_DIRS})
set(SPDLOG_LIB spdlog::spdlog)
else()
# spdlog exceptions are incompatible with ncnn
option(SPDLOG_NO_EXCEPTIONS "" OFF)
add_subdirectory(third_party/spdlog)
set(SPDLOG_LIB spdlog::spdlog_header_only)
endif()
list(APPEND LIBVIDEO2X_LIBS ${SPDLOG_LIB})
list(APPEND VIDEO2X_LIBS ${SPDLOG_LIB})

# Find dependencies required for the CLI
if(BUILD_VIDEO2X_CLI)
if(VIDEO2X_BUILD_CLI)
# Vulkan
find_package(Vulkan REQUIRED)
list(APPEND VIDEO2X_LIBS Vulkan::Vulkan)

# Boost
if(USE_SYSTEM_BOOST)
if(VIDEO2X_USE_EXTERNAL_BOOST)
find_package(Boost REQUIRED COMPONENTS program_options)
list(APPEND LIBVIDEO2X_INCLUDE_DIRS ${Boost_INCLUDE_DIRS})
else()
Expand Down Expand Up @@ -255,7 +291,7 @@ ExternalProject_Add(
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
-DCMAKE_INSTALL_PREFIX=${CMAKE_BINARY_DIR}/realesrgan_install
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DUSE_SYSTEM_NCNN=${USE_SYSTEM_NCNN}
-DUSE_SYSTEM_NCNN=${VIDEO2X_USE_EXTERNAL_NCNN}
BUILD_ALWAYS ON
INSTALL_COMMAND ${CMAKE_COMMAND} --build . --target install --config ${CMAKE_BUILD_TYPE}
)
Expand All @@ -268,7 +304,7 @@ ExternalProject_Add(
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
-DCMAKE_INSTALL_PREFIX=${CMAKE_BINARY_DIR}/realcugan_install
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DUSE_SYSTEM_NCNN=${USE_SYSTEM_NCNN}
-DUSE_SYSTEM_NCNN=${VIDEO2X_USE_EXTERNAL_NCNN}
BUILD_ALWAYS ON
INSTALL_COMMAND ${CMAKE_COMMAND} --build . --target install --config ${CMAKE_BUILD_TYPE}
)
Expand All @@ -281,7 +317,7 @@ ExternalProject_Add(
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
-DCMAKE_INSTALL_PREFIX=${CMAKE_BINARY_DIR}/rife_install
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DUSE_SYSTEM_NCNN=${USE_SYSTEM_NCNN}
-DUSE_SYSTEM_NCNN=${VIDEO2X_USE_EXTERNAL_NCNN}
BUILD_ALWAYS ON
INSTALL_COMMAND ${CMAKE_COMMAND} --build . --target install --config ${CMAKE_BUILD_TYPE}
)
Expand All @@ -305,6 +341,13 @@ endif()
# Ensure that the shared library is built after the external projects
add_dependencies(libvideo2x realesrgan realcugan rife)

# Generate the version header file
configure_file(
"${CMAKE_CURRENT_SOURCE_DIR}/include/libvideo2x/version.h.in"
"${CMAKE_CURRENT_BINARY_DIR}/libvideo2x/version.h"
@ONLY
)

# Include directories for the shared library
target_include_directories(libvideo2x PRIVATE
${LIBVIDEO2X_INCLUDE_DIRS}
Expand Down Expand Up @@ -335,15 +378,15 @@ list(APPEND LIBVIDEO2X_LIBS ${REALESRGAN_LIB} ${REALCUGAN_LIB} ${RIFE_LIB})
target_link_libraries(libvideo2x PRIVATE ${LIBVIDEO2X_LIBS})

if(NOT WIN32)
if(USE_SYSTEM_NCNN)
if(VIDEO2X_USE_EXTERNAL_NCNN)
target_link_libraries(libvideo2x PUBLIC ncnn)
else()
target_link_libraries(libvideo2x PRIVATE ncnn)
endif()
endif()

# Create the executable 'video2x'
if(BUILD_VIDEO2X_CLI)
if(VIDEO2X_BUILD_CLI)
file(GLOB VIDEO2X_SOURCES tools/video2x/src/*.cpp)
add_executable(video2x ${VIDEO2X_SOURCES})
set_target_properties(video2x PROPERTIES OUTPUT_NAME video2x)
Expand Down Expand Up @@ -395,8 +438,8 @@ install(TARGETS libvideo2x
# Install model files
install(DIRECTORY ${CMAKE_SOURCE_DIR}/models DESTINATION ${INSTALL_MODEL_DESTINATION})

# Install the executable if BUILD_VIDEO2X_CLI is enabled
if(BUILD_VIDEO2X_CLI)
# Install the executable if VIDEO2X_BUILD_CLI is enabled
if(VIDEO2X_BUILD_CLI)
install(TARGETS video2x RUNTIME DESTINATION ${INSTALL_BIN_DESTINATION})
endif()

Expand Down
27 changes: 14 additions & 13 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ build:
cmake -S . -B $(BINDIR) \
-DCMAKE_EXPORT_COMPILE_COMMANDS=ON \
-DCMAKE_CXX_COMPILER=$(CXX) \
-DCMAKE_BUILD_TYPE=Release
-DCMAKE_BUILD_TYPE=Release \
-DVIDEO2X_ENABLE_NATIVE=ON
cmake --build $(BINDIR) --config Release --parallel
cp $(BINDIR)/compile_commands.json .

Expand All @@ -23,9 +24,9 @@ static:
-DCMAKE_CXX_COMPILER=$(CXX) \
-DCMAKE_BUILD_TYPE=Release \
-DBUILD_SHARED_LIBS=OFF \
-DUSE_SYSTEM_NCNN=OFF \
-DUSE_SYSTEM_SPDLOG=OFF \
-DUSE_SYSTEM_BOOST=OFF
-DVIDEO2X_USE_EXTERNAL_NCNN=OFF \
-DVIDEO2X_USE_EXTERNAL_SPDLOG=OFF \
-DVIDEO2X_USE_EXTERNAL_BOOST=OFF
cmake --build $(BINDIR) --config Release --parallel
cp $(BINDIR)/compile_commands.json .

Expand All @@ -39,18 +40,18 @@ debug:

windows:
cmake -S . -B $(BINDIR) \
-DUSE_SYSTEM_NCNN=OFF \
-DUSE_SYSTEM_SPDLOG=OFF \
-DUSE_SYSTEM_BOOST=OFF \
-DVIDEO2X_USE_EXTERNAL_NCNN=OFF \
-DVIDEO2X_USE_EXTERNAL_SPDLOG=OFF \
-DVIDEO2X_USE_EXTERNAL_BOOST=OFF \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_INSTALL_PREFIX=build/libvideo2x-shared
cmake --build $(BINDIR) --config Release --parallel --target install

windows-debug:
cmake -S . -B $(BINDIR) \
-DUSE_SYSTEM_NCNN=OFF \
-DUSE_SYSTEM_SPDLOG=OFF \
-DUSE_SYSTEM_BOOST=OFF \
-DVIDEO2X_USE_EXTERNAL_NCNN=OFF \
-DVIDEO2X_USE_EXTERNAL_SPDLOG=OFF \
-DVIDEO2X_USE_EXTERNAL_BOOST=OFF \
-DCMAKE_BUILD_TYPE=Debug \
-DCMAKE_INSTALL_PREFIX=build/libvideo2x-shared
cmake --build $(BINDIR) --config Debug --parallel --target install
Expand All @@ -70,7 +71,7 @@ debian:
libomp-dev \
libspdlog-dev \
libboost-program-options-dev
cmake -B /tmp/build -S . -DUSE_SYSTEM_NCNN=OFF -DCMAKE_CXX_COMPILER=$(CXX) \
cmake -B /tmp/build -S . -DVIDEO2X_USE_EXTERNAL_NCNN=OFF -DCMAKE_CXX_COMPILER=$(CXX) \
-DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/tmp/install \
-DINSTALL_BIN_DESTINATION=. -DINSTALL_INCLUDE_DESTINATION=include \
-DINSTALL_LIB_DESTINATION=. -DINSTALL_MODEL_DESTINATION=.
Expand All @@ -90,7 +91,7 @@ ubuntu2404:
glslang-tools \
libomp-dev \
libboost-program-options-dev
cmake -B build -S . -DUSE_SYSTEM_NCNN=OFF -DUSE_SYSTEM_SPDLOG=OFF -DSPDLOG_NO_EXCEPTIONS=ON \
cmake -B build -S . -DVIDEO2X_USE_EXTERNAL_NCNN=OFF -DVIDEO2X_USE_EXTERNAL_SPDLOG=OFF \
-DCMAKE_CXX_COMPILER=g++ -DCMAKE_BUILD_TYPE=Release \
-DCMAKE_INSTALL_PREFIX=video2x-linux-ubuntu-amd64/usr
cmake --build build --config Release --target install --parallel
Expand All @@ -115,7 +116,7 @@ ubuntu2204:
glslang-tools \
libomp-dev \
libboost-program-options-dev
cmake -B build -S . -DUSE_SYSTEM_NCNN=OFF -DUSE_SYSTEM_SPDLOG=OFF -DSPDLOG_NO_EXCEPTIONS=ON \
cmake -B build -S . -DVIDEO2X_USE_EXTERNAL_NCNN=OFF -DVIDEO2X_USE_EXTERNAL_SPDLOG=OFF \
-DCMAKE_CXX_COMPILER=g++ -DCMAKE_BUILD_TYPE=Release \
-DCMAKE_INSTALL_PREFIX=video2x-linux-ubuntu-amd64/usr
cmake --build build --config Release --target install --parallel
Expand Down
9 changes: 5 additions & 4 deletions packaging/arch/PKGBUILD
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
pkgname=video2x
pkgver=r874.66c623f
pkgver=r958.996b0bf
pkgrel=1
pkgdesc="A machine learning-based lossless video super resolution framework"
pkgdesc="A machine learning-based video super resolution and frame interpolation framework"
arch=('x86_64')
url="https://github.com/k4yt3x/video2x"
license=('AGPL3')
depends=('ffmpeg' 'ncnn' 'vulkan-driver' 'spdlog' 'boost-libs')
makedepends=('git' 'cmake' 'make' 'clang' 'pkgconf' 'vulkan-headers' 'openmp' 'boost')
makedepends=('git' 'cmake' 'clang' 'vulkan-headers' 'openmp' 'boost')

pkgver() {
printf "r%s.%s" "$(git rev-list --count HEAD)" "$(git rev-parse --short HEAD)"
Expand All @@ -17,7 +17,8 @@ prepare() {
}

build() {
cmake -B build -S .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/usr
cmake -B build -S .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/usr \
-DCMAKE_CXX_COMPILER=clang++ -DVIDEO2X_ENABLE_X86_64_V3=ON
cmake --build build --config Release --parallel
}

Expand Down
1 change: 1 addition & 0 deletions src/avutils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ AVPixelFormat get_encoder_default_pix_fmt(const AVCodec *encoder, AVPixelFormat
return best_pix_fmt;
}

[[gnu::target_clones("default", "avx2", "avx512f")]]
float get_frame_diff(AVFrame *frame1, AVFrame *frame2) {
if (!frame1 || !frame2) {
logger()->error("Invalid frame(s) provided for comparison");
Expand Down
3 changes: 3 additions & 0 deletions src/conversions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ namespace video2x {
namespace conversions {

// Convert AVFrame format
[[gnu::target_clones("default", "avx2", "avx512f")]]
AVFrame *convert_avframe_pix_fmt(AVFrame *src_frame, AVPixelFormat pix_fmt) {
AVFrame *dst_frame = av_frame_alloc();
if (dst_frame == nullptr) {
Expand Down Expand Up @@ -67,6 +68,7 @@ AVFrame *convert_avframe_pix_fmt(AVFrame *src_frame, AVPixelFormat pix_fmt) {
}

// Convert AVFrame to ncnn::Mat by copying the data
[[gnu::target_clones("default", "avx2", "avx512f")]]
ncnn::Mat avframe_to_ncnn_mat(AVFrame *frame) {
AVFrame *converted_frame = nullptr;

Expand Down Expand Up @@ -106,6 +108,7 @@ ncnn::Mat avframe_to_ncnn_mat(AVFrame *frame) {
}

// Convert ncnn::Mat to AVFrame with a specified pixel format (this part is unchanged)
[[gnu::target_clones("default", "avx2", "avx512f")]]
AVFrame *ncnn_mat_to_avframe(const ncnn::Mat &mat, AVPixelFormat pix_fmt) {
int ret;

Expand Down
Loading

0 comments on commit e1e8ed8

Please sign in to comment.