Skip to content

Commit 0e333ff

Browse files
authored
Custom SSE2 and Neon (#772)
- hand written SSE2, Neon, and scalar SIMD - improved performance of SSE2 (x64) and Neon (ARM) - removed SIMDE library - SSE2 is the default on x64
1 parent 2b880d1 commit 0e333ff

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

63 files changed

+1355
-42204
lines changed

.clang-format

+1-1
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ IncludeCategories:
3333

3434
IndentExternBlock: NoIndent
3535
IndentCaseLabels: true
36-
IndentPPDirectives: BeforeHash
36+
#IndentPPDirectives: None
3737
IndentAccessModifiers: false
3838
AccessModifierOffset: -4
3939

.github/workflows/build.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,8 @@ jobs:
5353

5454
- name: Configure CMake
5555
# some problem with simde
56-
# run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DBOX2D_SAMPLES=OFF -DBOX2D_SANITIZE=ON -DBUILD_SHARED_LIBS=OFF
57-
run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DBOX2D_SAMPLES=OFF -DBUILD_SHARED_LIBS=OFF
56+
run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DBOX2D_SAMPLES=OFF -DBOX2D_SANITIZE=ON -DBUILD_SHARED_LIBS=OFF
57+
# run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DBOX2D_SAMPLES=OFF -DBUILD_SHARED_LIBS=OFF
5858

5959
- name: Build
6060
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}}

CMakeLists.txt

+12-4
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
cmake_minimum_required(VERSION 3.22)
22
include(FetchContent)
3+
include(CMakeDependentOption)
34

45
project(box2d
5-
VERSION 3.0.0
6+
VERSION 3.0.1
67
DESCRIPTION "A 2D physics engine for games"
78
HOMEPAGE_URL "https://box2d.org"
89
LANGUAGES C CXX
@@ -34,10 +35,13 @@ if (MSVC OR APPLE)
3435
endif()
3536
endif()
3637

38+
option(BOX2D_ENABLE_SIMD "Enable SIMD math (faster)" ON)
39+
3740
if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|AMD64")
38-
option(BOX2D_AVX2 "Enable AVX2 (faster)" ON)
41+
cmake_dependent_option(BOX2D_AVX2 "Enable AVX2" OFF "BOX2D_ENABLE_SIMD" OFF)
3942
endif()
4043

44+
4145
if(PROJECT_IS_TOP_LEVEL)
4246
# Needed for samples.exe to find box2d.dll
4347
# set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/bin")
@@ -52,8 +56,6 @@ set(CMAKE_COMPILE_WARNING_AS_ERROR ON)
5256
set_property(GLOBAL PROPERTY USE_FOLDERS ON)
5357
set(CMAKE_VERBOSE_MAKEFILE ON)
5458

55-
# The Box2D library uses simde https://github.com/simd-everywhere/simde
56-
add_subdirectory(extern/simde)
5759
add_subdirectory(src)
5860

5961
# This hides samples, test, and doxygen from apps that use box2d via FetchContent
@@ -95,6 +97,7 @@ if(PROJECT_IS_TOP_LEVEL)
9597
if(NOT BUILD_SHARED_LIBS AND BOX2D_UNIT_TESTS)
9698
message(STATUS "Adding Box2D unit tests")
9799
add_subdirectory(test)
100+
set_target_properties(test PROPERTIES XCODE_GENERATE_SCHEME TRUE)
98101
else()
99102
message(STATUS "Skipping Box2D unit tests")
100103
endif()
@@ -107,10 +110,15 @@ if(PROJECT_IS_TOP_LEVEL)
107110
set_property(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY VS_STARTUP_PROJECT samples)
108111
set_property(TARGET samples PROPERTY VS_DEBUGGER_WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}")
109112
endif()
113+
114+
set_target_properties(samples PROPERTIES
115+
XCODE_GENERATE_SCHEME TRUE
116+
XCODE_SCHEME_WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}")
110117
endif()
111118

112119
if(BOX2D_BENCHMARKS)
113120
add_subdirectory(benchmark)
121+
set_target_properties(benchmark PROPERTIES XCODE_GENERATE_SCHEME TRUE)
114122
endif()
115123

116124
if(BOX2D_DOCS)

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ Box2D is a 2D physics engine for games.
5454
- cmake -G Xcode ..
5555
- open box2d.xcodeproj
5656
- Select the samples scheme
57-
- Edit the scheme to set a custom working directory, make this be in box2d/samples
57+
- Edit the scheme to set a custom working directory to the box2d directory
5858
- You can now build and run the samples
5959

6060
## Compatibility

benchmark/CMakeLists.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,4 +19,4 @@ if(MSVC)
1919
# target_compile_options(benchmark PRIVATE /experimental:c11atomics)
2020
endif()
2121

22-
target_link_libraries(benchmark PRIVATE box2d enkiTS simde)
22+
target_link_libraries(benchmark PRIVATE box2d enkiTS)

benchmark/amd7950x/joint_grid.csv

+8-8
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
threads,fps
2-
1,331.343
3-
2,638.04
4-
3,932.731
5-
4,1200.15
6-
5,1480.23
7-
6,1718.79
8-
7,1930.12
9-
8,2133.65
2+
1,333.121
3+
2,638.057
4+
3,928.95
5+
4,1205.85
6+
5,1479.54
7+
6,1699.99
8+
7,1974.84
9+
8,2043.64

benchmark/amd7950x/large_pyramid.csv

+8-8
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
threads,fps
2-
1,325.705
3-
2,616.127
4-
3,886.575
5-
4,1118.85
6-
5,1331.22
7-
6,1498.6
8-
7,1685.28
9-
8,1728.1
2+
1,336.895
3+
2,602.665
4+
3,878.207
5+
4,1117.02
6+
5,1304.79
7+
6,1482.92
8+
7,1663.97
9+
8,1661.13

benchmark/amd7950x/many_pyramids.csv

+8-8
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
threads,fps
2-
1,82.8619
3-
2,160.906
4-
3,236.027
5-
4,300.688
6-
5,368.315
7-
6,429.822
8-
7,498.81
9-
8,549.271
2+
1,84.8025
3+
2,163.264
4+
3,234.388
5+
4,305.216
6+
5,369.85
7+
6,434.45
8+
7,497.573
9+
8,525.427

benchmark/amd7950x/smash.csv

+8-8
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
threads,fps
2-
1,173.898
3-
2,277.19
4-
3,357.566
5-
4,430.528
6-
5,483.446
7-
6,525.652
8-
7,566.859
9-
8,598.553
2+
1,174.051
3+
2,276.742
4+
3,352.751
5+
4,421.773
6+
5,479.049
7+
6,522.318
8+
7,556.193
9+
8,586.672

benchmark/amd7950x/tumbler.csv

+8-8
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
threads,fps
2-
1,373.066
3-
2,581.852
4-
3,764.444
5-
4,902.898
6-
5,1044.99
7-
6,1143.44
8-
7,1229.87
9-
8,1299.61
2+
1,376.3
3+
2,576.749
4+
3,737.749
5+
4,883.315
6+
5,1024.69
7+
6,1120.48
8+
7,1197.2
9+
8,1212.85
+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
threads,fps
2+
1,362.12
3+
2,685.873
4+
3,998.169
5+
4,1274.09
6+
5,1590.5
7+
6,1841.48
8+
7,2036.83
9+
8,2152.76
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
threads,fps
2+
1,148.238
3+
2,279.403
4+
3,407.797
5+
4,524.174
6+
5,635.423
7+
6,716.434
8+
7,799.394
9+
8,880.242
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
threads,fps
2+
1,38.1845
3+
2,73.9263
4+
3,108.337
5+
4,139.456
6+
5,171.725
7+
6,198.861
8+
7,229.515
9+
8,253.222

benchmark/amd7950x_float/smash.csv

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
threads,fps
2+
1,130.637
3+
2,210.938
4+
3,275.828
5+
4,341.204
6+
5,386.281
7+
6,426.426
8+
7,452.909
9+
8,467.611

benchmark/amd7950x_float/tumbler.csv

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
threads,fps
2+
1,247.067
3+
2,403.606
4+
3,523.705
5+
4,629.426
6+
5,734.511
7+
6,800.338
8+
7,857.235
9+
8,898.919
+8-8
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
threads,fps
2-
1,357.551
3-
2,691.193
4-
3,1010.45
5-
4,1317.42
6-
5,1590.65
7-
6,1858.78
8-
7,2074.2
9-
8,2261.67
2+
1,360.077
3+
2,687.48
4+
3,998.479
5+
4,1261.45
6+
5,1581.53
7+
6,1825.64
8+
7,2067.46
9+
8,2216.48
+8-8
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
threads,fps
2-
1,186.185
3-
2,351.045
4-
3,511.316
5-
4,636.035
6-
5,765.404
7-
6,875.296
8-
7,991.353
9-
8,961.402
2+
1,288.876
3+
2,527.399
4+
3,769.81
5+
4,982.428
6+
5,1151.91
7+
6,1323.49
8+
7,1474.09
9+
8,1552.6
+8-8
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
threads,fps
2-
1,48.5561
3-
2,92.6231
4-
3,137.175
5-
4,176.644
6-
5,214.941
7-
6,253.39
8-
7,288.631
9-
8,312.527
2+
1,75.3333
3+
2,141.977
4+
3,205.225
5+
4,266.523
6+
5,330.244
7+
6,380.809
8+
7,433.287
9+
8,482.241

benchmark/amd7950x_sse2/smash.csv

+8-8
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
threads,fps
2-
1,142.532
3-
2,228.987
4-
3,299.951
5-
4,364.679
6-
5,413.564
7-
6,453.351
8-
7,489.239
9-
8,519.379
2+
1,165.538
3+
2,263.517
4+
3,338.066
5+
4,405.629
6+
5,461.45
7+
6,506.119
8+
7,540.182
9+
8,563.682

benchmark/amd7950x_sse2/tumbler.csv

+8-8
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
threads,fps
2-
1,276.905
3-
2,453.522
4-
3,592.946
5-
4,702.383
6-
5,826.52
7-
6,919.179
8-
7,1009.05
9-
8,1062.61
2+
1,326.657
3+
2,521.743
4+
3,671.396
5+
4,805.81
6+
5,928.274
7+
6,1019.45
8+
7,1082.76
9+
8,1109.95

0 commit comments

Comments
 (0)