Skip to content
This repository was archived by the owner on Jul 31, 2024. It is now read-only.

Commit 4725f4e

Browse files
committed
Merge branch 'release-3.2.2'
- Python gotcha fixes - Fixed issues with mallocp segfaulting from Python - Fixed storage merge() segfaulting - New Python tools submodule (timemory.tools) - tools.function_wrappers combines {start,stop}_{mpip,ompt,ncclp,mallocp} into one configurable handle and provides decorator + context-manager features - New Python functions which are used within tools.function_wrappers - timemory.start_function_wrappers - timemory.stop_function_wrappers - Fixed timemory-python-line-profiler script calling timemory.profiler - API change in ring_buffer template - read/write member functions return pointer to object read/written to instead of bytes - API change in storage and tsettings - Classes are declared as final to optimize any vtable calls - Removed runtime_configurable restriction for do_enumerator_generate - This enables user_bundles to be used again in Python - Added operation::python_class_name - Updated examples: - ex_python_bindings (and libex_python_bindings) - Fix to get_hash_identifier - Removed concurrency comparison when generating a diff b/t two runs - Fixed issues with popen.cpp guarding with TIMEMORY_WINDOWS but never defined
2 parents 76ff978 + 5e46d2c commit 4725f4e

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+1292
-546
lines changed

.appveyor.yml

+9-30
Original file line numberDiff line numberDiff line change
@@ -42,45 +42,23 @@ environment:
4242
USE_ARCH: "OFF"
4343
USE_PYTHON: "ON"
4444
BUILD_C: "ON"
45+
BUILD_SHARED: "ON"
4546
BUILD_STATIC: "OFF"
4647
BUILD_TESTING: "ON"
4748
LINE_PROFILER: "ON"
48-
- CONDA: 37
49-
CPP: 14
50-
CONFIG: RelWithDebInfo
51-
EXAMPLES: "OFF"
52-
TOOLS: "OFF"
53-
WINSOCK: "OFF"
54-
USE_ARCH: "OFF"
55-
USE_PYTHON: "ON"
56-
BUILD_C: "OFF"
57-
BUILD_STATIC: "ON"
58-
BUILD_TESTING: "OFF"
59-
LINE_PROFILER: "ON"
60-
- PYTHON: 36
49+
- PYTHON: 37
6150
CPP: 14
6251
CONFIG: Debug
6352
EXAMPLES: "ON"
64-
TOOLS: "OFF"
65-
WINSOCK: "OFF"
66-
USE_ARCH: "OFF"
67-
USE_PYTHON: "ON"
68-
BUILD_C: "ON"
69-
BUILD_STATIC: "OFF"
70-
BUILD_TESTING: "OFF"
71-
LINE_PROFILER: "OFF"
72-
- PYTHON: 37
73-
CPP: 17
74-
CONFIG: MinSizeRel
75-
EXAMPLES: "OFF"
7653
TOOLS: "ON"
7754
WINSOCK: "OFF"
7855
USE_ARCH: "OFF"
7956
USE_PYTHON: "ON"
8057
BUILD_C: "OFF"
58+
BUILD_SHARED: "OFF"
8159
BUILD_STATIC: "ON"
8260
BUILD_TESTING: "OFF"
83-
LINE_PROFILER: "ON"
61+
LINE_PROFILER: "OFF"
8462

8563
install:
8664
# Configure environment
@@ -109,7 +87,7 @@ install:
10987
$env:PYTHON_EXE = "C:\Python$env:PYTHON\python.exe"
11088
}
11189
python -m pip install --disable-pip-version-check --user --upgrade pip wheel
112-
python -m pip install --user cython numpy matplotlib pillow pandas pydot
90+
python -m pip install --user matplotlib numpy pillow cython six pandas pydot multiprocess
11391
} elseif ($env:CONDA) {
11492
if ($env:PLATFORM -eq "x64") {
11593
$env:PATH = "C:\Miniconda$env:CONDA-x64;C:\Miniconda$env:CONDA-x64\Scripts;$env:PATH"
@@ -122,8 +100,8 @@ install:
122100
$env:PYTHONHOME = "C:\Miniconda$env:CONDA"
123101
$env:PYTHON_EXE = "C:\Miniconda$env:CONDA\python.exe"
124102
}
125-
conda update -y -q -n base conda
126-
conda install -y -c defaults -c conda-forge -q pip setuptools scikit-build numpy matplotlib pillow cython pandas pydot
103+
conda update -y -n base conda
104+
conda install -y -c conda-forge -c defaults matplotlib numpy pillow cython six pandas pydot multiprocess
127105
}
128106
$env:TIMEMORY_FILE_OUTPUT = "OFF"
129107
$env:TIMEMORY_AUTO_OUTPUT = "ON"
@@ -134,8 +112,9 @@ build_script:
134112
- mkdir build-timemory
135113
- cd build-timemory
136114
- cmake .. -G "%CMAKE_GENERATOR%" -A "%CMAKE_ARCH%"
137-
-DBUILD_SHARED_LIBS=ON
115+
-DBUILD_SHARED_LIBS="%BUILD_SHARED%"
138116
-DBUILD_STATIC_LIBS="%BUILD_STATIC%"
117+
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
139118
-DCMAKE_BUILD_TYPE="%CONFIG%"
140119
-DCMAKE_CXX_STANDARD="%CPP%"
141120
-DCMAKE_INSTALL_PREFIX=..\install-timemory

.readthedocs.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ conda:
66

77
python:
88
setup_py_install: False
9-
version: 3.6
9+
version: 3.7
1010

1111
build:
1212
image: latest

CHANGELOG.md

+28
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,33 @@
11
# timemory
22

3+
## Version 3.2.2
4+
5+
> Date: Wed Jul 14 20:42:29 2021 -0500
6+
7+
- Python gotcha fixes
8+
- Fixed issues with mallocp segfaulting from Python
9+
- Fixed storage merge() segfaulting
10+
- New Python tools submodule (timemory.tools)
11+
- tools.function_wrappers combines {start,stop}_{mpip,ompt,ncclp,mallocp}
12+
into one configurable handle and provides decorator + context-manager features
13+
- New Python functions which are used within tools.function_wrappers
14+
- timemory.start_function_wrappers
15+
- timemory.stop_function_wrappers
16+
- Fixed timemory-python-line-profiler script calling timemory.profiler
17+
- API change in ring_buffer template
18+
- read/write member functions return pointer to object read/written to
19+
instead of bytes
20+
- API change in storage and tsettings
21+
- Classes are declared as final to optimize any vtable calls
22+
- Removed runtime_configurable restriction for do_enumerator_generate
23+
- This enables user_bundles to be used again in Python
24+
- Added operation::python_class_name
25+
- Updated examples:
26+
- ex_python_bindings (and libex_python_bindings)
27+
- Fix to get_hash_identifier
28+
- Removed concurrency comparison when generating a diff b/t two runs
29+
- Fixed issues with popen.cpp guarding with TIMEMORY_WINDOWS but never defined
30+
331
## Version 3.2.1
432

533
> Date: Fri Jul 9 16:55:33 2021 -0500

VERSION

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
3.2.1
1+
3.2.2

docs/conf.py

+3
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,9 @@ def build_doxy_docs():
8181
"-DENABLE_DOXYGEN_LATEX_DOCS=OFF",
8282
"-DENABLE_DOXYGEN_MAN_DOCS=OFF",
8383
"-DTIMEMORY_BUILD_KOKKOS_TOOLS=ON",
84+
"-DTIMEMORY_BUILD_C=OFF",
85+
"-DTIMEMORY_BUILD_CUDA=OFF",
86+
"-DTIMEMORY_BUILD_FORTRAN=OFF",
8487
_srcdir,
8588
]
8689
)

docs/environment.yml

+2-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ channels:
33
- conda-forge
44
- defaults
55
dependencies:
6-
- python=3.6
6+
- python=3.7
77
- cmake
88
- curl
99
- doxygen
@@ -18,4 +18,5 @@ dependencies:
1818
- setuptools
1919
- breathe
2020
- sphinx-markdown-tables
21+
- docutils
2122

docs/getting_started/integrating.md

+3
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ were not available when timemory was installed.
4242
| `timemory::timemory-compile-extra` | Extra optimization flags |
4343
| `timemory::timemory-compile-options` | Adds the standard set of compiler flags used by timemory |
4444
| `timemory::timemory-compile-timing` | Adds compiler flags which report compilation timing metrics |
45+
| `timemory::timemory-compiler-instrument-compile-options` | INTERFACE |
4546
| `timemory::timemory-compiler-instrument` | Provides library for compiler instrumentation |
4647
| `timemory::timemory-coverage` | Enables code-coverage flags |
4748
| `timemory::timemory-cpu-roofline` | Enables flags and libraries for proper CPU roofline generation |
@@ -69,6 +70,7 @@ were not available when timemory was installed.
6970
| `timemory::timemory-hidden-visibility` | Adds -fvisibility=hidden compiler flag |
7071
| `timemory::timemory-instrument-functions` | Adds compiler flags to enable compile-time instrumentation |
7172
| `timemory::timemory-leak-sanitizer` | Adds compiler flags to enable leak sanitizer (-fsanitize=leak) |
73+
| `timemory::timemory-libunwind` | Enables libunwind support |
7274
| `timemory::timemory-likwid` | Enables LIKWID support |
7375
| `timemory::timemory-lto` | Adds link-time-optimization flags |
7476
| `timemory::timemory-mallocp-library` | Provides MALLOCP library for tracking memory allocations |
@@ -79,6 +81,7 @@ were not available when timemory was installed.
7981
| `timemory::timemory-ncclp-library` | Provides NCCLP library for NCCL performance analysis |
8082
| `timemory::timemory-no-mpi-init` | Disables the generation of MPI_Init and MPI_Init_thread symbols |
8183
| `timemory::timemory-null-sanitizer` | Adds compiler flags to enable null sanitizer (-fsanitize=null) |
84+
| `timemory::timemory-nvml` | Enables NVML support (NVIDIA) |
8285
| `timemory::timemory-ompt-library` | Provides OMPT library for OpenMP performance analysis |
8386
| `timemory::timemory-ompt` | Enables OpenMP-tools support |
8487
| `timemory::timemory-papi-static` | Enables PAPI support + links to static library |

docs/tools/timemory-compiler-instrument/README.md

+4
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,10 @@ In other words, `"TIMEMORY_FLAT_PROFILE=ON"` will not be applied to the compiler
3939
to enable flat profiling for the compiler instrumentation, set `"TIMEMORY_COMPILER_FLAT_PROFILE=ON"`,
4040
and so on for `"TIMEMORY_COMPILER_OUTPUT_PATH=..."`, etc.
4141

42+
> **NOTE:** Environment variables `TIMEMORY_COMPILER_MAX_DEPTH`, `TIMEMORY_COMPILER_THROTTLE_COUNT`, and `TIMEMORY_COMPILER_THROTTLE_VALUE`
43+
> can be very useful for reducing the overhead of the instrumentation. For more information, see the descriptions provided via
44+
> `timemory-avail -Sd -r 'THROTTLE|MAX_DEPTH'`.
45+
4246
## Build
4347

4448
Timemory provides a `timemory::timemory-compiler-instrument` target in CMake which provides the necessary

examples/ex-custom-dynamic-instr/ex_custom_dynamic_instr.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
// SOFTWARE.
2424
//
2525

26+
#include "timemory/components/papi/papi_tuple.hpp"
2627
#include "timemory/library.h"
2728
#include "timemory/timemory.hpp"
2829

examples/ex-optional/ex_optional.cpp

+3
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,9 @@ int main(int argc, char** argv)
123123
ret_sum += ret * ret;
124124
}
125125

126+
// avoid set but unused warning
127+
if(ret_sum < 0) printf("sum: %li\n", ret_sum);
128+
126129
std::vector<long> ret_reduce;
127130
std::vector<long> ret_send;
128131
for(size_t i = 0; i < fibvalues.size(); ++i)

examples/ex-python/ex_bindings.py

+75-41
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,22 @@
11
#!@PYTHON_EXECUTABLE@
22

3-
3+
import sys
44
import numpy
55
import argparse
66

77
use_mpi = True
88
try:
99
import mpi4py # noqa: F401
1010
from mpi4py import MPI # noqa: F401
11+
from mpi4py.MPI import Exception as MPIException # noqa: F401
1112
except ImportError:
1213
use_mpi = False
14+
MPIException = RuntimeError
1315
pass
1416

1517
import timemory # noqa: E402
1618
from timemory.profiler import profile # noqa: E402
19+
from timemory.tools import function_wrappers # noqa: E402
1720
import libex_python_bindings as ex_bindings # noqa: E402
1821

1922
if use_mpi:
@@ -35,37 +38,48 @@ def run_profile(nitr=100, nsize=1000000):
3538

3639
def run_mpi(nitr=100, nsize=1000000):
3740

38-
if size != 2:
39-
return
41+
if use_mpi is False:
42+
_sum = 0.0
43+
for i in range(nitr):
44+
data = numpy.arange(nsize, dtype="i")
45+
_val = numpy.sum(data)
46+
_sum += 1.0 / _val
47+
data = numpy.arange(nsize, dtype=numpy.float64)
48+
_val = numpy.sum(data)
49+
_sum += 1.0 / _val
4050

51+
msgs = set()
4152
for i in range(nitr):
4253
# passing MPI datatypes explicitly
43-
if rank == 0:
44-
data = numpy.arange(nsize, dtype="i")
45-
comm.Send([data, MPI.INT], dest=1, tag=77)
46-
elif rank == 1:
47-
data = numpy.empty(nsize, dtype="i")
48-
comm.Recv([data, MPI.INT], source=0, tag=77)
54+
try:
55+
if rank == 0:
56+
data = numpy.arange(nsize, dtype="i")
57+
comm.Send([data, MPI.INT], dest=1, tag=77)
58+
elif rank == 1:
59+
data = numpy.empty(nsize, dtype="i")
60+
comm.Recv([data, MPI.INT], source=0, tag=77)
61+
except MPIException as e:
62+
msgs.add(f"{e}")
4963

5064
# automatic MPI datatype discovery
51-
if rank == 0:
52-
data = numpy.empty(nsize, dtype=numpy.float64)
53-
comm.Recv(data, source=1, tag=13)
54-
elif rank == 1:
55-
data = numpy.arange(nsize, dtype=numpy.float64)
56-
comm.Send(data, dest=0, tag=13)
57-
65+
try:
66+
if rank == 0 and size == 2:
67+
data = numpy.empty(nsize, dtype=numpy.float64)
68+
comm.Recv(data, source=1, tag=13)
69+
elif rank == 1:
70+
data = numpy.arange(nsize, dtype=numpy.float64)
71+
comm.Send(data, dest=0, tag=13)
72+
except MPIException as e:
73+
msgs.add(f"{e}")
5874

59-
def main(args):
60-
# start MPI wrappers
61-
id = timemory.start_mpip()
75+
for i, itr in enumerate(msgs):
76+
sys.stderr.write("{}: {}\n".format(i, itr))
6277

63-
run_mpi(args.iterations)
64-
ans = run_profile(args.iterations, args.size)
6578

66-
# stop MPI wrappers
67-
timemory.stop_mpip(id)
68-
return ans
79+
def main(args):
80+
# start function wrappers (MPI, OpenMP, etc. if available)
81+
with function_wrappers(*args.profile, nccl=False):
82+
return run_profile(args.iterations, args.size)
6983

7084

7185
if __name__ == "__main__":
@@ -74,40 +88,60 @@ def main(args):
7488
parser.add_argument(
7589
"-i",
7690
"--iterations",
77-
required=False,
7891
default=100,
7992
type=int,
8093
help="Iterations",
8194
)
8295
parser.add_argument(
8396
"-n",
8497
"--size",
85-
required=False,
8698
default=1000000,
8799
type=int,
88100
help="Array size",
89101
)
102+
parser.add_argument(
103+
"-c",
104+
"--components",
105+
default=[
106+
"wall_clock",
107+
"peak_rss",
108+
"cpu_clock",
109+
"cpu_util",
110+
"thread_cpu_clock",
111+
"thread_cpu_util",
112+
],
113+
type=str,
114+
help="Additional components",
115+
nargs="*",
116+
)
117+
parser.add_argument(
118+
"-p",
119+
"--profile",
120+
default=["mpi", "openmp", "malloc"],
121+
choices=("mpi", "openmp", "malloc", "nccl"),
122+
type=str,
123+
help="Profiling library wrappers to activate",
124+
nargs="*",
125+
)
90126

91127
args = parser.parse_args()
92128
timemory.enable_signal_detection()
93-
timemory.settings.width = 12
94-
timemory.settings.precision = 6
129+
timemory.settings.width = 8
130+
timemory.settings.precision = 2
131+
timemory.settings.scientific = True
95132
timemory.settings.plot_output = True
96-
timemory.settings.dart_output = True
133+
timemory.settings.dart_output = False
134+
timemory.timemory_init([__file__])
97135

98-
with profile(
99-
[
100-
"wall_clock",
101-
"user_clock",
102-
"system_clock",
103-
"cpu_util",
104-
"peak_rss",
105-
"thread_cpu_clock",
106-
"thread_cpu_util",
107-
]
108-
):
136+
@function_wrappers(*args.profile, nccl=False)
137+
def runner(nitr, nsize):
138+
run_mpi(nitr, nsize)
139+
140+
runner(args.iterations, args.size)
141+
142+
with profile(args.components):
109143
ans = main(args)
110-
print("Answer = {}".format(ans))
111144

145+
print("Success! Answer = {}. Finalizing...".format(ans))
112146
timemory.finalize()
113147
print("Python Finished")

0 commit comments

Comments
 (0)