Skip to content

Commit f178cd1

Browse files
authored
parallelize mesh fixup (#1148)
* parallelize mesh fixup * avoid quadratic behavior * documentation * allocation optimization * format * fix gcc warning * reduce temporary allocations in std::function * rename * format * split DedupeEdges into its own function * avoid undefined behavior
1 parent 5ae22f0 commit f178cd1

File tree

4 files changed

+203
-92
lines changed

4 files changed

+203
-92
lines changed

src/edge_op.cpp

+190-86
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
1414

15+
#include <unordered_set>
16+
1517
#include "./impl.h"
1618
#include "./parallel.h"
1719

@@ -214,77 +216,7 @@ void Manifold::Impl::CleanupTopology() {
214216
// In the case of a very bad triangulation, it is possible to create pinched
215217
// verts. They must be removed before edge collapse.
216218
SplitPinchedVerts();
217-
218-
Vec<int> entries;
219-
FlagStore s;
220-
while (1) {
221-
ZoneScopedN("DedupeEdge");
222-
223-
const size_t nbEdges = halfedge_.size();
224-
size_t numFlagged = 0;
225-
226-
#if MANIFOLD_PAR == 1
227-
if (nbEdges > 1e5) {
228-
// Note that this is slightly different from the single thread version
229-
// because we store all indices instead of just indices of forward
230-
// halfedges. Backward halfedges are placed at the end by modifying the
231-
// comparison function.
232-
entries.resize_nofill(nbEdges);
233-
sequence(entries.begin(), entries.end());
234-
stable_sort(entries.begin(), entries.end(), [&](int a, int b) {
235-
const auto& self = halfedge_[a];
236-
const auto& other = halfedge_[b];
237-
// place all backward edges at the end
238-
if (!self.IsForward()) return false;
239-
if (!other.IsForward()) return true;
240-
// dictionary order based on start and end vertices
241-
return self.startVert == other.startVert
242-
? self.endVert < other.endVert
243-
: self.startVert < other.startVert;
244-
});
245-
entries.resize(nbEdges / 2);
246-
} else
247-
#endif
248-
{
249-
entries.clear(true);
250-
entries.reserve(nbEdges / 2);
251-
for (size_t i = 0; i < nbEdges; ++i) {
252-
if (halfedge_[i].IsForward()) {
253-
entries.push_back(i);
254-
}
255-
}
256-
stable_sort(entries.begin(), entries.end(), [&](int a, int b) {
257-
const auto& self = halfedge_[a];
258-
const auto& other = halfedge_[b];
259-
// dictionary order based on start and end vertices
260-
return self.startVert == other.startVert
261-
? self.endVert < other.endVert
262-
: self.startVert < other.startVert;
263-
});
264-
}
265-
266-
s.run(
267-
entries.size() - 1,
268-
[&](int i) {
269-
const int h0 = entries[i];
270-
const int h1 = entries[i + 1];
271-
return (halfedge_[h0].startVert == halfedge_[h1].startVert &&
272-
halfedge_[h0].endVert == halfedge_[h1].endVert);
273-
},
274-
[&](int i) {
275-
DedupeEdge(entries[i]);
276-
numFlagged++;
277-
});
278-
279-
if (numFlagged == 0) break;
280-
281-
#ifdef MANIFOLD_DEBUG
282-
if (ManifoldParams().verbose) {
283-
std::cout << "found " << numFlagged << " duplicate edges to split"
284-
<< std::endl;
285-
}
286-
#endif
287-
}
219+
DedupeEdges();
288220
}
289221

290222
/**
@@ -782,22 +714,194 @@ void Manifold::Impl::RecursiveEdgeSwap(const int edge, int& tag,
782714

783715
void Manifold::Impl::SplitPinchedVerts() {
784716
ZoneScoped;
785-
std::vector<bool> vertProcessed(NumVert(), false);
786-
std::vector<bool> halfedgeProcessed(halfedge_.size(), false);
787-
for (size_t i = 0; i < halfedge_.size(); ++i) {
788-
if (halfedgeProcessed[i]) continue;
789-
int vert = halfedge_[i].startVert;
790-
if (vertProcessed[vert]) {
791-
vertPos_.push_back(vertPos_[vert]);
792-
vert = NumVert() - 1;
793-
} else {
794-
vertProcessed[vert] = true;
717+
718+
auto nbEdges = halfedge_.size();
719+
#if MANIFOLD_PAR == 1
720+
if (nbEdges > 1e4) {
721+
std::mutex mutex;
722+
std::vector<size_t> pinched;
723+
// This parallelized version is non-trivial so we can't reuse the code
724+
//
725+
// The idea here is to identify cycles of halfedges that can be iterated
726+
// through using ForVert. Pinched verts are vertices where there are
727+
// multiple cycles associated with the vertex. Each cycle is identified with
728+
// the largest halfedge index within the cycle, and when there are multiple
729+
// cycles associated with the same starting vertex but with different ids,
730+
// it means we have a pinched vertex. This check is done by using a single
731+
// atomic cas operation, the expected case is either invalid id (the vertex
732+
// was not processed) or with the same id.
733+
//
734+
// The local store is to store the processed halfedges, so to avoid
735+
// repetitive processing. Note that it only approximates the processed
736+
// halfedges because it is thread local. This is why we need a vector to
737+
// deduplicate the probematic halfedges we found.
738+
std::vector<std::atomic<size_t>> largestEdge(NumVert());
739+
for_each(ExecutionPolicy::Par, countAt(0), countAt(NumVert()),
740+
[&largestEdge](size_t i) {
741+
largestEdge[i].store(std::numeric_limits<size_t>::max());
742+
});
743+
tbb::combinable<std::vector<bool>> store(
744+
[nbEdges]() { return std::vector<bool>(nbEdges, false); });
745+
tbb::parallel_for(
746+
tbb::blocked_range<size_t>(0, nbEdges),
747+
[&store, &mutex, &pinched, &largestEdge, this](const auto& r) {
748+
auto& local = store.local();
749+
std::vector<size_t> pinchedLocal;
750+
for (auto i = r.begin(); i < r.end(); ++i) {
751+
if (local[i]) continue;
752+
local[i] = true;
753+
const int vert = halfedge_[i].startVert;
754+
size_t largest = i;
755+
ForVert(i, [&local, &largest](int current) {
756+
local[current] = true;
757+
largest = std::max(largest, static_cast<size_t>(current));
758+
});
759+
size_t expected = std::numeric_limits<size_t>::max();
760+
if (!largestEdge[vert].compare_exchange_strong(expected, largest) &&
761+
expected != largest) {
762+
// we know that there is another loop...
763+
pinchedLocal.push_back(largest);
764+
}
765+
}
766+
if (!pinchedLocal.empty()) {
767+
std::lock_guard<std::mutex> lock(mutex);
768+
pinched.insert(pinched.end(), pinchedLocal.begin(),
769+
pinchedLocal.end());
770+
}
771+
});
772+
773+
std::vector<bool> halfedgeProcessed(nbEdges, false);
774+
for (size_t i : pinched) {
775+
if (halfedgeProcessed[i]) continue;
776+
vertPos_.push_back(vertPos_[halfedge_[i].startVert]);
777+
const int vert = NumVert() - 1;
778+
ForVert(i, [this, vert, &halfedgeProcessed](int current) {
779+
halfedgeProcessed[current] = true;
780+
halfedge_[current].startVert = vert;
781+
halfedge_[halfedge_[current].pairedHalfedge].endVert = vert;
782+
});
795783
}
796-
ForVert(i, [this, &halfedgeProcessed, vert](int current) {
797-
halfedgeProcessed[current] = true;
798-
halfedge_[current].startVert = vert;
799-
halfedge_[halfedge_[current].pairedHalfedge].endVert = vert;
800-
});
784+
} else
785+
#endif
786+
{
787+
std::vector<bool> vertProcessed(NumVert(), false);
788+
std::vector<bool> halfedgeProcessed(nbEdges, false);
789+
for (size_t i = 0; i < nbEdges; ++i) {
790+
if (halfedgeProcessed[i]) continue;
791+
int vert = halfedge_[i].startVert;
792+
if (vertProcessed[vert]) {
793+
vertPos_.push_back(vertPos_[vert]);
794+
vert = NumVert() - 1;
795+
} else {
796+
vertProcessed[vert] = true;
797+
}
798+
ForVert(i, [this, &halfedgeProcessed, vert](int current) {
799+
halfedgeProcessed[current] = true;
800+
halfedge_[current].startVert = vert;
801+
halfedge_[halfedge_[current].pairedHalfedge].endVert = vert;
802+
});
803+
}
804+
}
805+
}
806+
807+
void Manifold::Impl::DedupeEdges() {
808+
while (1) {
809+
ZoneScopedN("DedupeEdge");
810+
811+
const size_t nbEdges = halfedge_.size();
812+
std::vector<size_t> duplicates;
813+
auto localLoop = [&](size_t start, size_t end, std::vector<bool>& local,
814+
std::vector<size_t>& results) {
815+
// Iterate over all halfedges that start with the same vertex, and check
816+
// for halfedges with the same ending vertex.
817+
// Note: we use Vec and linear search when the number of neighbor is
818+
// small because unordered_set requires allocations and is expensive.
819+
// We switch to unordered_set when the number of neighbor is
820+
// larger to avoid making things quadratic.
821+
//
822+
// The local store is to store the processed halfedges, so to avoid
823+
// repetitive processing. Note that it only approximates the processed
824+
// halfedges because it is thread local. This is why we need a vector to
825+
// deduplicate the probematic halfedges we found.
826+
Vec<int> endVerts;
827+
std::unordered_set<int> endVertSet;
828+
for (auto i = start; i < end; ++i) {
829+
if (local[i] || halfedge_[i].startVert == -1 ||
830+
halfedge_[i].endVert == -1)
831+
continue;
832+
local[i] = true;
833+
// we want to keep the allocation
834+
endVerts.clear(false);
835+
endVertSet.clear();
836+
ForVert(
837+
i, [&local, &endVerts, &endVertSet, &results, this](int current) {
838+
local[current] = true;
839+
if (halfedge_[current].startVert == -1 ||
840+
halfedge_[current].endVert == -1) {
841+
return;
842+
}
843+
if (endVertSet.empty()) {
844+
if (std::find(endVerts.begin(), endVerts.end(),
845+
halfedge_[current].endVert) != endVerts.end()) {
846+
results.push_back(current);
847+
} else {
848+
endVerts.push_back(halfedge_[current].endVert);
849+
// switch to hashset for vertices with many neighbors
850+
if (endVerts.size() > 32) {
851+
endVertSet.insert(endVerts.begin(), endVerts.end());
852+
endVerts.clear(false);
853+
}
854+
}
855+
} else {
856+
if (!endVertSet.insert(halfedge_[current].endVert).second) {
857+
results.push_back(current);
858+
}
859+
}
860+
});
861+
}
862+
};
863+
#if MANIFOLD_PAR == 1
864+
if (nbEdges > 1e4) {
865+
std::mutex mutex;
866+
tbb::combinable<std::vector<bool>> store(
867+
[nbEdges]() { return std::vector<bool>(nbEdges, false); });
868+
tbb::parallel_for(
869+
tbb::blocked_range<size_t>(0, nbEdges),
870+
[&store, &mutex, &duplicates, this, &localLoop](const auto& r) {
871+
auto& local = store.local();
872+
std::vector<size_t> duplicatesLocal;
873+
localLoop(r.begin(), r.end(), local, duplicatesLocal);
874+
if (!duplicatesLocal.empty()) {
875+
std::lock_guard<std::mutex> lock(mutex);
876+
duplicates.insert(duplicates.end(), duplicatesLocal.begin(),
877+
duplicatesLocal.end());
878+
}
879+
});
880+
} else
881+
#endif
882+
{
883+
std::vector<bool> local(nbEdges, false);
884+
localLoop(0, nbEdges, local, duplicates);
885+
}
886+
887+
size_t numFlagged = 0;
888+
// there may be duplicates
889+
std::vector<bool> handled(nbEdges, false);
890+
for (size_t i : duplicates) {
891+
if (handled[i]) continue;
892+
DedupeEdge(i);
893+
numFlagged++;
894+
handled[i] = true;
895+
}
896+
897+
if (numFlagged == 0) break;
898+
899+
#ifdef MANIFOLD_DEBUG
900+
if (ManifoldParams().verbose) {
901+
std::cout << "found " << numFlagged << " duplicate edges to split"
902+
<< std::endl;
903+
}
904+
#endif
801905
}
802906
}
803907
} // namespace manifold

src/impl.cpp

+5-2
Original file line numberDiff line numberDiff line change
@@ -223,14 +223,17 @@ void Manifold::Impl::CreateFaces() {
223223
stable_sort(triPriority.begin(), triPriority.end(),
224224
[](auto a, auto b) { return a.area2 > b.area2; });
225225

226+
Vec<int> interiorHalfedges;
226227
for (const auto tp : triPriority) {
227228
if (meshRelation_.triRef[tp.tri].faceID >= 0) continue;
228229

229230
meshRelation_.triRef[tp.tri].faceID = tp.tri;
230231
const vec3 base = vertPos_[halfedge_[3 * tp.tri].startVert];
231232
const vec3 normal = faceNormal_[tp.tri];
232-
std::vector<int> interiorHalfedges = {3 * tp.tri, 3 * tp.tri + 1,
233-
3 * tp.tri + 2};
233+
interiorHalfedges.resize(3);
234+
interiorHalfedges[0] = 3 * tp.tri;
235+
interiorHalfedges[1] = 3 * tp.tri + 1;
236+
interiorHalfedges[2] = 3 * tp.tri + 2;
234237
while (!interiorHalfedges.empty()) {
235238
const int h =
236239
NextHalfedge(halfedge_[interiorHalfedges.back()].pairedHalfedge);

src/impl.h

+5-1
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,8 @@ struct Manifold::Impl {
188188

189189
Vec<ivec3> triVerts;
190190
triVerts.reserve(numTri);
191+
if (triRef.size() > 0) meshRelation_.triRef.reserve(numTri);
192+
if (numProp > 0) meshRelation_.triProperties.reserve(numTri);
191193
for (size_t i = 0; i < numTri; ++i) {
192194
ivec3 tri;
193195
for (const size_t j : {0, 1, 2}) {
@@ -244,7 +246,8 @@ struct Manifold::Impl {
244246
meshRelation_.originalID = -1;
245247
}
246248

247-
inline void ForVert(int halfedge, std::function<void(int halfedge)> func) {
249+
template <typename F>
250+
inline void ForVert(int halfedge, F func) {
248251
int current = halfedge;
249252
do {
250253
current = NextHalfedge(halfedge_[current].pairedHalfedge);
@@ -341,6 +344,7 @@ struct Manifold::Impl {
341344
void FormLoop(int current, int end);
342345
void CollapseTri(const ivec3& triEdge);
343346
void SplitPinchedVerts();
347+
void DedupeEdges();
344348

345349
// subdivision.cpp
346350
int GetNeighbor(int tri) const;

src/vec.h

+3-3
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,7 @@ class Vec : public VecView<T> {
176176
}
177177

178178
void resize(size_t newSize, T val = T()) {
179-
bool shrink = this->size_ > 2 * newSize;
179+
bool shrink = this->size_ > 2 * newSize && this->size_ > 16;
180180
reserve(newSize);
181181
if (this->size_ < newSize) {
182182
fill(autoPolicy(newSize - this->size_), this->ptr_ + this->size_,
@@ -187,13 +187,13 @@ class Vec : public VecView<T> {
187187
}
188188

189189
void resize_nofill(size_t newSize) {
190-
bool shrink = this->size_ > 2 * newSize;
190+
bool shrink = this->size_ > 2 * newSize && this->size_ > 16;
191191
reserve(newSize);
192192
this->size_ = newSize;
193193
if (shrink) shrink_to_fit();
194194
}
195195

196-
void pop_back() { resize(this->size_ - 1); }
196+
void pop_back() { resize_nofill(this->size_ - 1); }
197197

198198
void clear(bool shrink = true) {
199199
this->size_ = 0;

0 commit comments

Comments
 (0)