Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix hang in tc microbenchmark #127

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Dockerfile.dev
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ RUN if [ "${IS_CI}" != "true" ] ; then \
doxygen \
texlive-latex-extra \
texlive-font-utils \
time \
&& apt clean \
&& update-locale; fi

Expand Down
3 changes: 1 addition & 2 deletions microbench/triangle-counting/include/utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ void printUsage(char* argv0);
// CONNECTION KERNELS
// #####################################################################
template <typename GraphType>
void intersect_dag_merge(galois::WaitGroup::HandleType wgh, pando::GlobalPtr<GraphType> graph_ptr,
void intersect_dag_merge(pando::GlobalPtr<GraphType> graph_ptr,
typename GraphType::VertexTopologyID v0,
typename GraphType::VertexTopologyID v1,
galois::DAccumulator<uint64_t> final_tri_count) {
Expand All @@ -90,7 +90,6 @@ void intersect_dag_merge(galois::WaitGroup::HandleType wgh, pando::GlobalPtr<Gra
count++;
}
final_tri_count.add(count);
wgh.done();
}

template <typename GraphType>
Expand Down
38 changes: 23 additions & 15 deletions microbench/triangle-counting/src/tc_algos.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,9 @@
* @param[in] final_tri_count Thread-safe counter
*/
template <typename Graph>
void edge_tc_counting(pando::GlobalPtr<Graph> graph_ptr, typename Graph::VertexTopologyID v0,
typename Graph::EdgeRange edge_range,
void edge_tc_counting(galois::WaitGroup::HandleType wgh, pando::GlobalPtr<Graph> graph_ptr,
typename Graph::VertexTopologyID v0, typename Graph::EdgeRange edge_range,
galois::DAccumulator<uint64_t> final_tri_count) {
galois::WaitGroup wg;
PANDO_CHECK(wg.initialize(0));
auto wgh = wg.getHandle();
auto innerState = galois::make_tpl(graph_ptr, v0, wgh, final_tri_count);
Graph graph = *graph_ptr;
galois::doAll(
Expand All @@ -28,8 +25,7 @@ void edge_tc_counting(pando::GlobalPtr<Graph> graph_ptr, typename Graph::VertexT
auto [graph_ptr, v0, wgh, final_tri_count] = innerState;
Graph g = *graph_ptr;
typename Graph::VertexTopologyID v1 = fmap(g, getEdgeDst, eh);
wgh.addOne();
intersect_dag_merge<Graph>(wgh, graph_ptr, v0, v1, final_tri_count);
intersect_dag_merge<Graph>(graph_ptr, v0, v1, final_tri_count);
},
[&graph](decltype(innerState) innerState, typename Graph::EdgeHandle eh) -> pando::Place {
auto v0 = std::get<1>(innerState);
Expand All @@ -39,7 +35,6 @@ void edge_tc_counting(pando::GlobalPtr<Graph> graph_ptr, typename Graph::VertexT
: fmap(graph, getLocalityVertex, v1);
return locality;
});
PANDO_CHECK(wg.wait());
}

// #####################################################################
Expand All @@ -55,19 +50,27 @@ template <typename GraphType>
void tc_no_chunk(pando::GlobalPtr<GraphType> graph_ptr,
galois::DAccumulator<uint64_t> final_tri_count) {
GraphType graph = *graph_ptr;
auto state = galois::make_tpl(graph_ptr, final_tri_count);

galois::WaitGroup wg;
PANDO_CHECK(wg.initialize(0));
auto wgh = wg.getHandle();
auto state = galois::make_tpl(graph_ptr, final_tri_count, wgh);

galois::doAll(
state, graph.vertices(), +[](decltype(state) state, typename GraphType::VertexTopologyID v0) {
auto [graph_ptr, final_tri_count] = state;
wgh, state, graph.vertices(),
+[](decltype(state) state, typename GraphType::VertexTopologyID v0) {
auto [graph_ptr, final_tri_count, wgh] = state;
GraphType graph = *graph_ptr;

// Degree Filtering Optimization
uint64_t v0_degree = graph.getNumEdges(v0);
if (v0_degree < (TC_EMBEDDING_SZ - 1))
return;

edge_tc_counting<GraphType>(graph_ptr, v0, graph.edges(v0), final_tri_count);
edge_tc_counting<GraphType>(wgh, graph_ptr, v0, graph.edges(v0), final_tri_count);
});
PANDO_CHECK(wg.wait());
wg.deinitialize();
}

/**
Expand Down Expand Up @@ -159,27 +162,32 @@ void tc_chunk_vertices(pando::GlobalPtr<GraphDL> graph_ptr,
auto lcsr = graph.getLocalCSR();
uint64_t host_vertex_iter_offset = host_vertex_iter_offset_ref;

auto inner_state = galois::make_tpl(graph_ptr, final_tri_count);
galois::WaitGroup wg;
PANDO_CHECK(wg.initialize(0));
auto wgh = wg.getHandle();
auto inner_state = galois::make_tpl(graph_ptr, final_tri_count, wgh);
galois::doAll(
inner_state, fmap(lcsr, vertices, host_vertex_iter_offset, query_sz),
+[](decltype(inner_state) inner_state, typename GraphDL::VertexTopologyID v0) {
auto [graph_ptr, final_tri_count] = inner_state;
auto [graph_ptr, final_tri_count, wgh] = inner_state;
GraphDL graph = *graph_ptr;

// Degree Filtering Optimization
uint64_t v0_degree = graph.getNumEdges(v0);
if (v0_degree < (TC_EMBEDDING_SZ - 1))
return;

edge_tc_counting<GraphDL>(graph_ptr, v0, graph.edges(v0), final_tri_count);
edge_tc_counting<GraphDL>(wgh, graph_ptr, v0, graph.edges(v0), final_tri_count);
});
PANDO_CHECK(wg.wait());

// Move iter offset
uint64_t lcsr_num_vertices = fmap(lcsr, size);
host_vertex_iter_offset += query_sz;
if (host_vertex_iter_offset < lcsr_num_vertices)
work_remaining.increment();
host_vertex_iter_offset_ref = host_vertex_iter_offset;
wg.deinitialize();
});

uint64_t current_count = final_tri_count.reduce();
Expand Down
52 changes: 29 additions & 23 deletions pando-rt/src/init.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -198,29 +198,35 @@ int main(int argc, char* argv[]) {
rc = getrusage(RUSAGE_SELF, &end);
if(rc != 0) {PANDO_ABORT("GETRUSAGE FAILED");}
auto thisPlace = pando::getCurrentPlace();
SPDLOG_WARN("Total time on node: {}, was {}ns",
thisPlace.node.id,
end.ru_utime.tv_sec * 1000000000 + end.ru_utime.tv_usec * 1000 -
(start.ru_utime.tv_sec * 1000000000 + start.ru_utime.tv_usec * 1000) +
end.ru_stime.tv_sec * 1000000000 + end.ru_stime.tv_usec * 1000 -
(start.ru_stime.tv_sec * 1000000000 + start.ru_stime.tv_usec * 1000));
for(std::uint64_t i = 0; i < std::uint64_t(dims.core.x + 1); i++) {
SPDLOG_WARN("Idle time on node: {}, core: {} was {}",
thisPlace.node.id,
std::int8_t((i == std::uint64_t(dims.core.x)) ? -1 : i),
idleCount.get(i));
SPDLOG_WARN("Pointer time on node: {}, core: {} was {}",
thisPlace.node.id,
std::int8_t((i == std::uint64_t(dims.core.x)) ? -1 : i),
pointerCount.get(i));
SPDLOG_WARN("Scheduler time on node: {}, core: {} was {}",
thisPlace.node.id,
std::int8_t((i == std::uint64_t(dims.core.x)) ? -1 : i),
schedulerCount.get(i));
SPDLOG_WARN("DoAll time on node: {}, core: {} was {}",
thisPlace.node.id,
std::int8_t((i == std::uint64_t(dims.core.x)) ? -1 : i),
doAllCount.get(i));
for (std::int64_t j = 0; j < std::int64_t(dims.node.id); j++) {
if (j == thisPlace.node.id) {
SPDLOG_WARN("Total time on node: {}, was {}ns",
thisPlace.node.id,
end.ru_utime.tv_sec * 1000000000 + end.ru_utime.tv_usec * 1000 -
(start.ru_utime.tv_sec * 1000000000 + start.ru_utime.tv_usec * 1000) +
end.ru_stime.tv_sec * 1000000000 + end.ru_stime.tv_usec * 1000 -
(start.ru_stime.tv_sec * 1000000000 + start.ru_stime.tv_usec * 1000));
for(std::uint64_t i = 0; i < std::uint64_t(dims.core.x + 2); i++) {
SPDLOG_WARN("Idle time on node: {}, core: {} was {}",
thisPlace.node.id,
std::int8_t((i == std::uint64_t(dims.core.x + 1)) ? -1 : i),
idleCount.get(i));
SPDLOG_WARN("Pointer time on node: {}, core: {} was {}",
thisPlace.node.id,
std::int8_t((i == std::uint64_t(dims.core.x + 1)) ? -1 : i),
pointerCount.get(i));
SPDLOG_WARN("Scheduler time on node: {}, core: {} was {}",
thisPlace.node.id,
std::int8_t((i == std::uint64_t(dims.core.x + 1)) ? -1 : i),
schedulerCount.get(i));
SPDLOG_WARN("DoAll time on node: {}, core: {} was {}",
thisPlace.node.id,
std::int8_t((i == std::uint64_t(dims.core.x + 1)) ? -1 : i),
doAllCount.get(i));
}
}

pando::Nodes::barrier();
}


Expand Down
Loading