Skip to content

Commit 6a5430d

Browse files
committed
New makefile
1 parent 2e7f099 commit 6a5430d

File tree

8 files changed

+822
-146
lines changed

8 files changed

+822
-146
lines changed

.github/workflows/deploy.yml

+4-4
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,9 @@ jobs:
4949

5050
steps:
5151
- name: make
52-
run: make -j32 CXX=g++-${{matrix.compiler}} CC=gcc-${{matrix.compiler}} PLATFORM=${{ matrix.platform }} LEIDEN=true
52+
run: gmake -j32 CXX=g++-${{matrix.compiler}} CC=gcc-${{matrix.compiler}} PLATFORM=${{ matrix.platform }} LEIDEN=true STATIC_LINK=true
5353
- name: tar artifacts
54-
run: tar -cvzf clusty.tar.gz clusty LICENSE
54+
run: tar -cvzf clusty.tar.gz LICENSE -C ./bin clusty
5555

5656

5757
########################################################################################
@@ -66,9 +66,9 @@ jobs:
6666

6767
steps:
6868
- name: help
69-
run: ./clusty
69+
run: ./bin/clusty
7070
- name: version
71-
run: ./clusty --version
71+
run: ./bin/clusty --version
7272

7373
########################################################################################
7474
upload:

.github/workflows/main.yml

+12-5
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,21 @@ jobs:
1515
strategy:
1616
fail-fast: false
1717
matrix:
18-
machine: [ubuntu-latest, macOS-12]
18+
machine: [ubuntu-latest]
19+
gmake_install_command: ['gmake --version']
1920
compiler: [12]
21+
include:
22+
- {machine: macOS-13, gmake_install_command: 'brew install make && gmake --version', compiler: 12}
23+
2024
runs-on: ['${{ matrix.machine }}']
2125

2226
steps:
2327
- uses: actions/checkout@v4
2428
with:
2529
submodules: recursive
30+
31+
- name: install gmake
32+
run: ${{ matrix.gmake_install_command }}
2633

2734
- name: get tags
2835
run: |
@@ -34,9 +41,9 @@ jobs:
3441

3542
- name: make
3643
run: |
37-
make -j32 CXX=g++-${{matrix.compiler}} CC=gcc-${{matrix.compiler}}
38-
cp ./clusty ./clusty-${{matrix.compiler}}
39-
make clean
44+
gmake -j32 CXX=g++-${{matrix.compiler}} CC=gcc-${{matrix.compiler}} STATIC_LINK=true
45+
cp ./bin/clusty ./clusty-${{matrix.compiler}}
46+
gmake clean
4047
- name: tar artifacts
4148
run: tar -cvf clusty.tar ./clusty-${{matrix.compiler}} ./test/cmp.py ./test/vir61.*
4249

@@ -52,7 +59,7 @@ jobs:
5259
strategy:
5360
fail-fast: false
5461
matrix:
55-
machine: [ubuntu-latest, macOS-12]
62+
machine: [ubuntu-latest, macOS-13]
5663
compiler: [12]
5764
algo: [single, complete]
5865
threshold: [95, 70]

.github/workflows/self-hosted.yml

+3-3
Original file line numberDiff line numberDiff line change
@@ -54,9 +54,9 @@ jobs:
5454
steps:
5555
- name: make
5656
run: |
57-
make -j32 CXX=g++-${{matrix.compiler}} CC=gcc-${{matrix.compiler}} PLATFORM=${{ matrix.platform }} LEIDEN=true
58-
cp ./clusty ./clusty-${{matrix.compiler}}
59-
make clean
57+
gmake -j32 CXX=g++-${{matrix.compiler}} CC=gcc-${{matrix.compiler}} PLATFORM=${{ matrix.platform }} LEIDEN=true STATIC_LINK=true
58+
cp ./bin/clusty ./clusty-${{matrix.compiler}}
59+
gmake clean
6060
6161
########################################################################################
6262
toy:

.gitignore

+2
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,5 @@
1111
/src/clusty/
1212

1313
/src/clusty.vcxproj.user
14+
/obj
15+
/bin

README.md

+12-18
Original file line numberDiff line numberDiff line change
@@ -20,23 +20,23 @@ Clusty is a tool for large-scale clustering. By using sparse distance matrices i
2020
```bash
2121
git clone --recurse-submodules https://github.com/refresh-bio/clusty
2222
cd clusty
23-
make -j
23+
gmake -j
2424

2525
cd ./test
2626

2727
# Run single linkage clustering on the pairwise similarities stored in ictv.ani file, output cluster identifiers
2828
# (two first columns are used as sequence identifiers, third one is assumed to store similarities).
29-
../clusty --algo single --similarity ictv.ani ictv.single
29+
../bin/clusty --algo single --similarity ictv.ani ictv.single
3030

3131
# Run uclust clustering accepting pairwise connectios with values greater or equal 0.70 in the ani column, output cluster representatives.
32-
../clusty --algo uclust --similarity --min ani 0.70 --out-representatives ictv.ani ictv.uclust.70
32+
../bin/clusty --algo uclust --similarity --min ani 0.70 --out-representatives ictv.ani ictv.uclust.70
3333

3434
# Run CD-HIT clustering accepting pairwise connectios with values greater or equal 0.95 in the ani column, output cluster identifiers
3535
# (use id2 and id2 columns as object identifiers and ani column as the similarity).
36-
../clusty --algo cd-hit --similarity --min ani 0.95 --id-cols id2 id1 --distance-col ani vir61.ani vir61.single.95
36+
../bin/clusty --algo cd-hit --similarity --min ani 0.95 --id-cols id2 id1 --distance-col ani vir61.ani vir61.single.95
3737

3838
# Run complete linkage clustering, consider all objects from ictv.list file (including those without pairwise connections).
39-
../clusty --algo complete --objects-file ictv.list --similarity ictv.ani ictv.complete
39+
../bin/clusty --algo complete --objects-file ictv.list --similarity ictv.ani ictv.complete
4040

4141
```
4242

@@ -52,11 +52,11 @@ For detailed instructions how to set up Bioconda, please refer to the [Bioconda
5252

5353
The package can be built from the sources distributed as:
5454
* Visual Studio 2022 solution for Windows,
55-
* MAKE project for Linux and macOS (g++-10 required).
55+
* GNU Make project for Linux and macOS (gmake 4.3 and gcc/g++ 10 or newer required)
5656

5757
To compile Clusty under Linux/macOS please run:
5858
```
59-
make -j
59+
gmake -j
6060
```
6161

6262
### Leiden algorithm
@@ -65,9 +65,9 @@ Clusty provides igraph's implementation of the Leiden algorithm. Precompiled bin
6565
```
6666
sudo apt-get install cmake flex bison
6767
```
68-
Then, one need to build the package with an additional option enabled:
68+
Then, one needs to build the package with an additional option enabled:
6969
```
70-
make -j LEIDEN=true
70+
gmake -j LEIDEN=true
7171
```
7272

7373
Under Windows, Clusty is by default linked against igraph and it requires CMake as the only system dependency. After installing it (https://cmake.org) a user can run `build_igraph.bat` batch script which downloads Flex and Bison binaries to the appropriate locations and then builds igraph. After that it is possible to build Clusty using Visual Studio (the solution is located in `./src/clusty.sln`).
@@ -107,7 +107,7 @@ Options:
107107
* `--leiden-iterations` - number of interations for Leiden algorithm (default: 2)
108108

109109

110-
The minimum input requirement is a CSV/TSV table with pairwise distances between objects (or similarities, if `--use-similarity` flag is used). By default, identifiers are assumed to be in the two first columns while distances are expected in the third one. Lack of a distance for a given pair of objects is translated to infinite distance. The example input table is given below:
110+
The minimum input requirement is a TSV/CSV table with pairwise distances between objects (or similarities, if `--use-similarity` flag is used). By default, identifiers are assumed to be in the two first columns while distances are expected in the third one. Lack of a distance for a given pair of objects is translated to infinite distance. The example input table is given below:
111111
```
112112
id1,id2,distance
113113
a,b,0.04
@@ -167,14 +167,8 @@ f e
167167

168168
In the following section one can find detailed information on clustering algorithms in Clusty, with *n* representing the number of objects (vertices) and *e* the number of distances (edges) in the data set (graph).
169169

170-
| Algorithm | Details | Time complexity |
171-
| ------------- | ------------- | ------------- |
172-
| Single linkage | Hierarchical agglomerative clustering with a distance between groups defined as a distance between their closest members. Equivalent to finding all consistent subgraphs in a graph. Performed using breadth-first search. | *O*(*e*) |
173-
| Complete linkage | Hierarchical agglomerative clustering with a distance between groups defined as a distance between their furthest members. Equivalent to finding a disjoint set of complete subgraphs covering the entire graph. Fast identification of clusters to merge is performed by storing distances in a heap. | *O*(*e* log*e*) |
174-
| UCLUST | Greedy clustering with objects investigated in descending order w.r.t. representativeness. The first object becomes a centroid; the following objects are either (a) assigned to the closest of centroids they are connected with or (b) become new centroids if they are not connected to any of the existing ones. | *O*(*e*) |
175-
| Greedy set cover | Greedy clustering with objects investigated in descending order w.r.t. the number of neighbors. Every unassigned object becomes a new centroid with all connected objects being assigned to it. Equivalent to MMseqs mode 0 clustering. | *O*(*n* log*n* + *e*) |
176-
| CD-HIT | Greedy clustering with objects investigated in descending order w.r.t. the representativeness. Every unassigned object becomes a new centroid with all connected objects being assigned to it. Equivalent to MMseqs mode 2 clustering. | *O*(*e*) |
177-
| Leiden | Iterative heuristic for finding communities in networks. It consists of three phases: (1) local moving of nodes, (2) refinement of the partition, and (3) aggregation of the network using the refined and the non-refined partitions. | unknown (algorithm provided by an external library) |
170+
![clustering-steps](https://github.com/user-attachments/assets/6d325442-0474-4759-8fff-4c732b97b080)
171+
178172

179173
## Citation
180174
Zielezinski A, Gudyś A, Barylski J, Siminski K, Rozwalak P, Dutilh BE, Deorowicz S. Ultrafast and accurate sequence alignment and clustering of viral genomes. bioRxiv [doi:10.1101/2024.06.27.601020].

makefile

+45-114
Original file line numberDiff line numberDiff line change
@@ -1,128 +1,59 @@
11
all: clusty
22

3-
4-
####################
5-
6-
ifdef MSVC # Avoid the MingW/Cygwin sections
7-
uname_S := Windows
8-
else # If uname not available => 'not'
9-
uname_S := $(shell sh -c 'uname -s 2>/dev/null || echo not')
10-
uname_M := $(shell sh -c 'uname -m 2>/dev/null || echo not')
11-
endif
12-
13-
ifeq ($(uname_S),Linux)
14-
# check if CPU supports AVX2
15-
# HAVE_AVX2 = $(filter-out 0,$(shell grep avx2 /proc/cpuinfo | wc -l))
16-
# OMP_FLAGS = -fopenmp
17-
ABI_FLAGS = -fabi-version=6
18-
MIMALLOC_OBJ=libs/mimalloc/mimalloc.o
19-
endif
20-
ifeq ($(uname_S),Darwin)
21-
# check if CPU supports SSE4.2
22-
# HAVE_AVX2 = $(filter-out 0,$(shell sysctl -n machdep.cpu.features machdep.cpu.leaf7_features| grep AVX2 - | wc -l))
23-
# OMP_FLAGS = -Xpreprocessor -fopenmp
24-
ABI_FLAGS =
25-
MIMALLOC_OBJ=
26-
endif
27-
28-
29-
ifeq ($(PLATFORM), arm8)
30-
$(info *** ARMv8 with NEON extensions ***)
31-
ARCH_FLAGS := -march=armv8-a -DARCH_ARM
32-
else ifeq ($(PLATFORM), m1)
33-
$(info *** Apple M1(or never) with NEON extensions ***)
34-
ARCH_FLAGS := -march=armv8.4-a -DARCH_ARM
35-
else ifeq ($(PLATFORM), sse2)
36-
$(info *** x86-64 with SSE2 extensions ***)
37-
ARCH_FLAGS := -msse2 -m64 -DARCH_X64
38-
else ifeq ($(PLATFORM), avx)
39-
$(info *** x86-64 with AVX extensions ***)
40-
ARCH_FLAGS := -mavx -m64 -DARCH_X64
41-
else ifeq ($(PLATFORM), avx2)
42-
$(info *** x86-64 with AVX2 extensions ***)
43-
ARCH_FLAGS := -mavx2 -m64 -DARCH_X64
3+
# *** REFRESH makefile utils
4+
include refresh.mk
5+
6+
$(call INIT_SUBMODULES)
7+
$(call INIT_GLOBALS)
8+
$(call CHECK_OS_ARCH, $(PLATFORM))
9+
10+
# *** Project directories
11+
$(call SET_SRC_OBJ_BIN,src,obj,bin)
12+
3RD_PARTY_DIR := ./libs
13+
14+
# *** Project configuration
15+
$(call ADD_MIMALLOC, $(3RD_PARTY_DIR)/mimalloc)
16+
#$(call ADD_REFRESH_LIB, $(3RD_PARTY_DIR))
17+
$(call SET_STATIC, $(STATIC_LINK))
18+
$(call SET_C_CPP_STANDARDS, c11, c++17)
19+
$(call SET_GIT_COMMIT)
20+
21+
ifeq ($(LEIDEN),true)
22+
$(call ADD_IGRAPH, $(3RD_PARTY_DIR)/igraph)
4423
else
45-
$(info *** Unspecified platform - use native compilation)
46-
ifeq ($(uname_M),x86_64)
47-
ARCH_FLAGS := -march=native -DARCH_X64
48-
else
49-
ARCH_FLAGS := -march=native -DARCH_ARM
50-
endif
24+
DEFINE_FLAGS += -DNO_LEIDEN
5125
endif
5226

27+
$(call SET_FLAGS, $(TYPE))
5328

54-
GIT_COMMIT = $(shell git describe --always --dirty)
55-
56-
#####################
57-
ROOT_DIR = .
58-
MAIN_DIR = src
59-
INCLUDES = -I libs/mimalloc/include
60-
DEFINE_FLAGS := -DGIT_COMMIT=$(GIT_COMMIT)
29+
$(call SET_COMPILER_VERSION_ALLOWED, GCC, Linux_x86_64, 10, 20)
30+
$(call SET_COMPILER_VERSION_ALLOWED, GCC, Linux_aarch64, 11, 20)
31+
$(call SET_COMPILER_VERSION_ALLOWED, GCC, Darwin_x86_64, 11, 13)
32+
$(call SET_COMPILER_VERSION_ALLOWED, GCC, Darwin_arm64, 11, 13)
6133

62-
ifeq ($(LEIDEN), true)
63-
INCLUDES += -I ./libs/igraph/include -I ./libs/igraph/build/include
64-
LIB_IGRAPH = ./libs/igraph/build/src/libigraph.a
65-
else
66-
DEFINE_FLAGS += -DNO_LEIDEN
34+
ifneq ($(MAKECMDGOALS),clean)
35+
$(call CHECK_COMPILER_VERSION)
6736
endif
6837

38+
# *** Source files and rules
39+
$(eval $(call PREPARE_DEFAULT_COMPILE_RULE,MAIN,.))
6940

70-
ifeq ($(DYNAMIC_LINK), true)
71-
CFLAGS = -Wall -O3 $(ARCH_FLAGS) -std=c++17 $(DEFINE_FLAGS) $(INCLUDES) -pthread
72-
CLINK = -lm -O3 -std=c++17 -pthread $(ABI_FLAGS)
73-
else
74-
ifeq ($(uname_S),Darwin)
75-
CFLAGS = -Wall -O3 $(ARCH_FLAGS) -std=c++17 $(DEFINE_FLAGS) $(INCLUDES)
76-
CLINK = -lm -O3 -std=c++17 $(ABI_FLAGS) -static-libgcc
77-
else
78-
CFLAGS = -Wall -O3 $(ARCH_FLAGS) -std=c++17 $(DEFINE_FLAGS) $(INCLUDES) -static -Wl,--whole-archive -lpthread -Wl,--no-whole-archive
79-
CLINK = -lm -static -O3 -std=c++17 $(ABI_FLAGS) -Wl,--whole-archive -lpthread -Wl,--no-whole-archive
80-
endif
81-
endif
82-
83-
84-
85-
CMAKE_OSX_SYSROOT_FLAG =
86-
ifeq ($(uname_S),Darwin)
87-
SDK_PATH := $(shell $(CXX) -v 2>&1 | grep -- '--with-sysroot' | sed -E 's/.*--with-sysroot=([^ ]+).*/\1/')
88-
CMAKE_OSX_SYSROOT_FLAG := -DCMAKE_OSX_SYSROOT=$(SDK_PATH)
89-
endif
90-
91-
ifeq ($(LEIDEN), true)
92-
igraph:
93-
mkdir libs/igraph/build
94-
cmake $(CMAKE_OSX_SYSROOT_FLAG) -DIEEE754_DOUBLE_ENDIANNESS_MATCHES=TRUE -DCMAKE_C_COMPILER=$(CC) -DCMAKE_CXX_COMPILER=$(CXX) -S libs/igraph -B libs/igraph/build
95-
cmake $(CMAKE_OSX_SYSROOT_FLAG) -DIEEE754_DOUBLE_ENDIANNESS_MATCHES=TRUE -DCMAKE_C_COMPILER=$(CC) -DCMAKE_CXX_COMPILER=$(CXX) -S libs/igraph -B libs/igraph/build
96-
cmake --build libs/igraph/build
97-
else
98-
igraph:
99-
100-
endif
101-
102-
103-
$(MIMALLOC_OBJ):
104-
$(CC) -DMI_MALLOC_OVERRIDE -O3 -DNDEBUG -fPIC -Wall -Wextra -Wno-unknown-pragmas -fvisibility=hidden -Wstrict-prototypes -ftls-model=initial-exec -fno-builtin-malloc -std=gnu11 -c -I libs/mimalloc/include libs/mimalloc/src/static.c -o $(MIMALLOC_OBJ)
105-
106-
107-
108-
OBJS := \
41+
# *** Targets
42+
clusty: $(OUT_BIN_DIR)/clusty
43+
$(OUT_BIN_DIR)/clusty: mimalloc_obj \
44+
$(OBJ_MAIN)
45+
-mkdir -p $(OUT_BIN_DIR)
46+
$(CXX) -o $@ \
10947
$(MIMALLOC_OBJ) \
110-
$(MAIN_DIR)/console.o \
111-
$(MAIN_DIR)/conversion.o \
112-
$(MAIN_DIR)/graph.o \
113-
$(MAIN_DIR)/log.o \
114-
$(MAIN_DIR)/main.o \
115-
$(MAIN_DIR)/params.o \
48+
$(OBJ_MAIN) \
49+
$(LIBRARY_FILES) $(LINKER_FLAGS) $(LINKER_DIRS)
11650

117-
%.o: %.cpp igraph
118-
$(CXX) $(CFLAGS) -c $< -o $@
51+
# *** Cleaning
52+
.PHONY: clean init
53+
clean: clean-zlib-ng clean-isa-l clean-mimalloc_obj clean-igraph
54+
-rm -r $(OBJ_DIR)
55+
-rm -r $(OUT_BIN_DIR)
11956

120-
clusty: $(OBJS)
121-
$(CXX) $(CLINK) $(LDFLAGS) -o $(ROOT_DIR)/$@ $(OBJS) $(LIB_IGRAPH)
57+
init:
58+
$(call INIT_SUBMODULES)
12259

123-
clean:
124-
-rm $(MAIN_DIR)/*.o
125-
-rm $(MIMALLOC_OBJ)
126-
-rm clusty
127-
-rm -r libs/igraph/build
128-

0 commit comments

Comments
 (0)