elalish · pca006132 · Sep 5, 2023 · Sep 5, 2023 · Sep 5, 2023 · Sep 5, 2023
diff --git a/src/manifold/src/boolean_result.cpp b/src/manifold/src/boolean_result.cpp
@@ -44,6 +44,8 @@ struct std::hash<std::pair<int, int>> {
 
 namespace {
 
+constexpr int kParallelThreshold = 128;
+
 struct AbsSum : public thrust::binary_function<int, int, int> {
   int operator()(int a, int b) { return abs(a) + abs(b); }
 };
@@ -204,10 +206,10 @@ void AddNewEdgeVerts(
 #if MANIFOLD_PAR == 'T' && __has_include(<tbb/tbb.h>)
   // parallelize operations, requires concurrent_map so we can only enable this
   // with tbb
-  if (p1q2.size() > 128) {
-    // ideally we should have 1 mutex per key, but 128 is enough to avoid
-    // contention for most of the cases
-    std::array<std::mutex, 128> mutexes;
+  if (!ManifoldParams().deterministic && p1q2.size() > kParallelThreshold) {
+    // ideally we should have 1 mutex per key, but kParallelThreshold is enough
+    // to avoid contention for most of the cases
+    std::array<std::mutex, kParallelThreshold> mutexes;
     static tbb::affinity_partitioner ap;
     auto processFun = std::bind(
         process, [&](size_t hash) { mutexes[hash % mutexes.size()].lock(); },
@@ -240,8 +242,8 @@ std::vector<Halfedge> PairUp(std::vector<EdgePos> &edgePos) {
                                [](EdgePos x) { return x.isStart; });
   ASSERT(middle - edgePos.begin() == nEdges, topologyErr, "Non-manifold edge!");
   auto cmp = [](EdgePos a, EdgePos b) { return a.edgePos < b.edgePos; };
-  std::sort(edgePos.begin(), middle, cmp);
-  std::sort(middle, edgePos.end(), cmp);
+  std::stable_sort(edgePos.begin(), middle, cmp);
+  std::stable_sort(middle, edgePos.end(), cmp);
   std::vector<Halfedge> edges;
   for (int i = 0; i < nEdges; ++i)
     edges.push_back({edgePos[i].vert, edgePos[i + nEdges].vert, -1, -1});
@@ -440,7 +442,8 @@ void AppendWholeEdges(Manifold::Impl &outR, Vec<int> &facePtrR,
                       const Vec<char> wholeHalfedgeP, const Vec<int> &i03,
                       const Vec<int> &vP2R, VecView<const int> faceP2R,
                       bool forward) {
-  for_each_n(autoPolicy(inP.halfedge_.size()),
+  for_each_n(ManifoldParams().deterministic ? ExecutionPolicy::Seq
+                                            : autoPolicy(inP.halfedge_.size()),
              zip(wholeHalfedgeP.begin(), inP.halfedge_.begin(), countAt(0)),
              inP.halfedge_.size(),
              DuplicateHalfedges({outR.halfedge_, halfedgeRef, facePtrR,

diff --git a/src/manifold/src/csg_tree.cpp b/src/manifold/src/csg_tree.cpp
@@ -466,35 +466,38 @@ std::shared_ptr<Manifold::Impl> CsgOpNode::BatchBoolean(
     return std::make_shared<Manifold::Impl>(boolean.Result(operation));
   }
 #if MANIFOLD_PAR == 'T' && __has_include(<tbb/tbb.h>)
-  tbb::task_group group;
-  tbb::concurrent_priority_queue<SharedImpl, MeshCompare> queue(results.size());
-  for (auto result : results) {
-    queue.emplace(result);
-  }
-  results.clear();
-  std::function<void()> process = [&]() {
-    while (queue.size() > 1) {
-      SharedImpl a, b;
-      if (!queue.try_pop(a)) continue;
-      if (!queue.try_pop(b)) {
-        queue.push(a);
-        continue;
-      }
-      group.run([&, a, b]() {
-        const Manifold::Impl *aImpl;
-        const Manifold::Impl *bImpl;
-        Boolean3 boolean(*getImplPtr(a), *getImplPtr(b), operation);
-        queue.emplace(
-            std::make_shared<Manifold::Impl>(boolean.Result(operation)));
-        return group.run(process);
-      });
+  if (!ManifoldParams().deterministic) {
+    tbb::task_group group;
+    tbb::concurrent_priority_queue<SharedImpl, MeshCompare> queue(
+        results.size());
+    for (auto result : results) {
+      queue.emplace(result);
     }
-  };
-  group.run_and_wait(process);
-  SharedImpl r;
-  queue.try_pop(r);
-  return std::get<std::shared_ptr<Manifold::Impl>>(r);
-#else
+    results.clear();
+    std::function<void()> process = [&]() {
+      while (queue.size() > 1) {
+        SharedImpl a, b;
+        if (!queue.try_pop(a)) continue;
+        if (!queue.try_pop(b)) {
+          queue.push(a);
+          continue;
+        }
+        group.run([&, a, b]() {
+          const Manifold::Impl *aImpl;
+          const Manifold::Impl *bImpl;
+          Boolean3 boolean(*getImplPtr(a), *getImplPtr(b), operation);
+          queue.emplace(
+              std::make_shared<Manifold::Impl>(boolean.Result(operation)));
+          return group.run(process);
+        });
+      }
+    };
+    group.run_and_wait(process);
+    SharedImpl r;
+    queue.try_pop(r);
+    return std::get<std::shared_ptr<Manifold::Impl>>(r);
+  }
+#endif
   // apply boolean operations starting from smaller meshes
   // the assumption is that boolean operations on smaller meshes is faster,
   // due to less data being copied and processed
@@ -517,7 +520,6 @@ std::shared_ptr<Manifold::Impl> CsgOpNode::BatchBoolean(
     std::push_heap(results.begin(), results.end(), cmpFn);
   }
   return std::make_shared<Manifold::Impl>(*results.front());
-#endif
 }
 
 /**
@@ -551,9 +553,8 @@ void CsgOpNode::BatchUnion() const {
     std::vector<Vec<size_t>> disjointSets;
     for (size_t i = 0; i < boxes.size(); i++) {
       auto lambda = [&boxes, i](const Vec<size_t> &set) {
-        return find_if<decltype(set.end())>(
-                   autoPolicy(set.size()), set.begin(), set.end(),
-                   CheckOverlap({boxes, i})) == set.end();
+        return std::find_if(set.begin(), set.end(), CheckOverlap({boxes, i})) ==
+               set.end();
       };
       auto it = std::find_if(disjointSets.begin(), disjointSets.end(), lambda);
       if (it == disjointSets.end()) {
@@ -563,27 +564,22 @@ void CsgOpNode::BatchUnion() const {
       }
     }
     // compose each set of disjoint children
-    std::vector<std::shared_ptr<const Manifold::Impl>> impls(
-        disjointSets.size());
-    for_each_n(
-        disjointSets.size() > 1 ? ExecutionPolicy::Par : ExecutionPolicy::Seq,
-        countAt(0), disjointSets.size(),
-        [children_, &impls, &disjointSets, start](int i) {
-          auto set = disjointSets[i];
-          if (set.size() == 1) {
-            impls[i] = std::dynamic_pointer_cast<CsgLeafNode>(
-                           children_[start + set[0]])
-                           ->GetImpl();
-          } else {
-            std::vector<std::shared_ptr<CsgLeafNode>> tmp;
-            for (size_t j : set) {
-              tmp.push_back(
-                  std::dynamic_pointer_cast<CsgLeafNode>(children_[start + j]));
-            }
-            impls[i] = std::make_shared<const Manifold::Impl>(
-                CsgLeafNode::Compose(tmp));
-          }
-        });
+    std::vector<std::shared_ptr<const Manifold::Impl>> impls;
+    for (auto &set : disjointSets) {
+      if (set.size() == 1) {
+        impls.push_back(
+            std::dynamic_pointer_cast<CsgLeafNode>(children_[start + set[0]])
+                    ->GetImpl());
+      } else {
+        std::vector<std::shared_ptr<CsgLeafNode>> tmp;
+        for (size_t j : set) {
+          tmp.push_back(
+              std::dynamic_pointer_cast<CsgLeafNode>(children_[start + j]));
+        }
+        impls.push_back(
+            std::make_shared<const Manifold::Impl>(CsgLeafNode::Compose(tmp)));
+      }
+    }
 
     children_.erase(children_.begin() + start, children_.end());
     children_.push_back(
@@ -607,8 +603,9 @@ std::vector<std::shared_ptr<CsgNode>> &CsgOpNode::GetChildren(
 
   if (forceToLeafNodes && !impl->forcedToLeafNodes_) {
     impl->forcedToLeafNodes_ = true;
-    for_each(impl->children_.size() > 1 ? ExecutionPolicy::Par
-                                        : ExecutionPolicy::Seq,
+    for_each(impl->children_.size() > 1 && !ManifoldParams().deterministic
+                 ? ExecutionPolicy::Par
+                 : ExecutionPolicy::Seq,
              impl->children_.begin(), impl->children_.end(), [](auto &child) {
                if (child->GetNodeType() != CsgNodeType::Leaf) {
                  child = child->ToLeafNode();

diff --git a/src/manifold/src/edge_op.cpp b/src/manifold/src/edge_op.cpp
@@ -157,7 +157,7 @@ void Manifold::Impl::SimplifyTopology() {
     entries[i].index = i;
   });
 
-  sort(policy, entries.begin(), entries.end());
+  stable_sort(policy, entries.begin(), entries.end());
   for (int i = 0; i < nbEdges - 1; ++i) {
     if (entries[i].start == entries[i + 1].start &&
         entries[i].end == entries[i + 1].end) {

diff --git a/src/manifold/src/properties.cpp b/src/manifold/src/properties.cpp
@@ -286,7 +286,7 @@ bool Manifold::Impl::Is2Manifold() const {
   if (!IsManifold()) return false;
 
   Vec<Halfedge> halfedge(halfedge_);
-  sort(policy, halfedge.begin(), halfedge.end());
+  stable_sort(policy, halfedge.begin(), halfedge.end());
 
   return all_of(policy, countAt(0), countAt(2 * NumEdge() - 1),
                 NoDuplicates({halfedge}));
@@ -313,11 +313,26 @@ int Manifold::Impl::NumDegenerateTris() const {
 
 Properties Manifold::Impl::GetProperties() const {
   if (IsEmpty()) return {0, 0};
-  auto areaVolume = transform_reduce<thrust::pair<float, float>>(
-      autoPolicy(NumTri()), countAt(0), countAt(NumTri()),
-      FaceAreaVolume({halfedge_, vertPos_, precision_}),
-      thrust::make_pair(0.0f, 0.0f), SumPair());
-  return {areaVolume.first, areaVolume.second};
+  // Kahan summation
+  float area = 0;
+  float volume = 0;
+  float areaCompensation = 0;
+  float volumeCompensation = 0;
+  for (int i = 0; i < NumTri(); ++i) {
+    auto [area1, volume1] =
+        FaceAreaVolume({halfedge_, vertPos_, precision_})(i);
+    const float t1 = area + area1;
+    const float t2 = volume + volume1;
+    // we know that the elements are non-negative
+    areaCompensation += (area - t1) + area1;
+    volumeCompensation += (volume - t2) + volume1;
+    area = t1;
+    volume = t2;
+  }
+  area += areaCompensation;
+  volume += volumeCompensation;
+
+  return {area, volume};
 }
 
 void Manifold::Impl::CalculateCurvature(int gaussianIdx, int meanIdx) {

diff --git a/src/manifold/src/sort.cpp b/src/manifold/src/sort.cpp
@@ -310,12 +310,12 @@ void Manifold::Impl::SortVerts() {
   Vec<int> vertNew2Old(numVert);
   sequence(policy, vertNew2Old.begin(), vertNew2Old.end());
 
-  sort(policy, zip(vertMorton.begin(), vertNew2Old.begin()),
-       zip(vertMorton.end(), vertNew2Old.end()),
-       [](const thrust::tuple<uint32_t, int>& a,
-          const thrust::tuple<uint32_t, int>& b) {
-         return thrust::get<0>(a) < thrust::get<0>(b);
-       });
+  stable_sort(policy, zip(vertMorton.begin(), vertNew2Old.begin()),
+              zip(vertMorton.end(), vertNew2Old.end()),
+              [](const thrust::tuple<uint32_t, int>& a,
+                 const thrust::tuple<uint32_t, int>& b) {
+                return thrust::get<0>(a) < thrust::get<0>(b);
+              });
 
   ReindexVerts(vertNew2Old, numVert);
 
@@ -394,12 +394,12 @@ void Manifold::Impl::SortFaces(Vec<Box>& faceBox, Vec<uint32_t>& faceMorton) {
   auto policy = autoPolicy(faceNew2Old.size());
   sequence(policy, faceNew2Old.begin(), faceNew2Old.end());
 
-  sort(policy, zip(faceMorton.begin(), faceNew2Old.begin()),
-       zip(faceMorton.end(), faceNew2Old.end()),
-       [](const thrust::tuple<uint32_t, int>& a,
-          const thrust::tuple<uint32_t, int>& b) {
-         return thrust::get<0>(a) < thrust::get<0>(b);
-       });
+  stable_sort(policy, zip(faceMorton.begin(), faceNew2Old.begin()),
+              zip(faceMorton.end(), faceNew2Old.end()),
+              [](const thrust::tuple<uint32_t, int>& a,
+                 const thrust::tuple<uint32_t, int>& b) {
+                return thrust::get<0>(a) < thrust::get<0>(b);
+              });
 
   // Tris were flagged for removal with pairedHalfedge = -1 and assigned kNoCode
   // to sort them to the end, which allows them to be removed.
@@ -572,12 +572,13 @@ bool MeshGL::Merge() {
              zip(vertMorton.begin(), vertBox.begin(), openVerts.cbegin()),
              numOpenVert, VertMortonBox({vertPropD, numProp, precision, bBox}));
 
-  sort(policy, zip(vertMorton.begin(), vertBox.begin(), openVerts.begin()),
-       zip(vertMorton.end(), vertBox.end(), openVerts.end()),
-       [](const thrust::tuple<uint32_t, Box, int>& a,
-          const thrust::tuple<uint32_t, Box, int>& b) {
-         return thrust::get<0>(a) < thrust::get<0>(b);
-       });
+  stable_sort(policy,
+              zip(vertMorton.begin(), vertBox.begin(), openVerts.begin()),
+              zip(vertMorton.end(), vertBox.end(), openVerts.end()),
+              [](const thrust::tuple<uint32_t, Box, int>& a,
+                 const thrust::tuple<uint32_t, Box, int>& b) {
+                return thrust::get<0>(a) < thrust::get<0>(b);
+              });
 
   Collider collider(vertBox, vertMorton);
   SparseIndices toMerge = collider.Collisions<true>(vertBox.cview());

diff --git a/src/polygon/src/polygon.cpp b/src/polygon/src/polygon.cpp
@@ -119,8 +119,8 @@ void CheckTopology(const std::vector<PolyEdge> &halfedges) {
     return a.startVert < b.startVert ||
            (a.startVert == b.startVert && a.endVert < b.endVert);
   };
-  std::sort(forward.begin(), forward.end(), cmp);
-  std::sort(backward.begin(), backward.end(), cmp);
+  std::stable_sort(forward.begin(), forward.end(), cmp);
+  std::stable_sort(backward.begin(), backward.end(), cmp);
   for (int i = 0; i < n_edges; ++i) {
     ASSERT(forward[i].startVert == backward[i].startVert &&
                forward[i].endVert == backward[i].endVert,
@@ -838,9 +838,10 @@ class Monotones {
     }
 #if MANIFOLD_PAR == 'T' && TBB_INTERFACE_VERSION >= 10000 && \
     __has_include(<pstl/glue_execution_defs.h>)
-    std::sort(std::execution::par_unseq, starts.begin(), starts.end(), cmp);
+    std::stable_sort(std::execution::par_unseq, starts.begin(), starts.end(),
+                     cmp);
 #else
-    std::sort(starts.begin(), starts.end(), cmp);
+    std::stable_sort(starts.begin(), starts.end(), cmp);
 #endif
 
     std::vector<VertItr> skipped;

diff --git a/src/utilities/include/par.h b/src/utilities/include/par.h
@@ -177,7 +177,6 @@ STL_DYNAMIC_BACKEND_VOID(fill)
 STL_DYNAMIC_BACKEND_VOID(copy)
 STL_DYNAMIC_BACKEND_VOID(inclusive_scan)
 STL_DYNAMIC_BACKEND_VOID(copy_n)
-STL_DYNAMIC_BACKEND_VOID(sort)
 
 // void implies that the user have to specify the return type in the template
 // argument, as we are unable to deduce it

diff --git a/src/utilities/include/public.h b/src/utilities/include/public.h
@@ -292,7 +292,7 @@ struct Box {
   /**
    * Does this box overlap the one given (including equality)?
    */
-  bool DoesOverlap(const Box& box) const {
+  inline bool DoesOverlap(const Box& box) const {
     return min.x <= box.max.x && min.y <= box.max.y && min.z <= box.max.z &&
            max.x >= box.min.x && max.y >= box.min.y && max.z >= box.min.z;
   }
@@ -301,7 +301,7 @@ struct Box {
    * Does the given point project within the XY extent of this box
    * (including equality)?
    */
-  bool DoesOverlap(glm::vec3 p) const {  // projected in z
+  inline bool DoesOverlap(glm::vec3 p) const {  // projected in z
     return p.x <= max.x && p.x >= min.x && p.y <= max.y && p.y >= min.y;
   }
 
@@ -445,6 +445,8 @@ struct ExecutionParams {
   /// Suppresses printed errors regarding CW triangles. Has no effect if
   /// processOverlaps is true.
   bool suppressErrors = false;
+  /// Deterministic outputs. Will disable some parallel optimizations.
+  bool deterministic = false;
 };
 
 #ifdef MANIFOLD_DEBUG

diff --git a/src/utilities/include/sparse.h b/src/utilities/include/sparse.h
@@ -64,7 +64,7 @@ class SparseIndices {
 
   void Sort() {
     VecView<int64_t> view = AsVec64();
-    sort(autoPolicy(size()), view.begin(), view.end());
+    stable_sort(autoPolicy(size()), view.begin(), view.end());
   }
 
   void Resize(int size) { data_.resize(size * sizeof(int64_t), -1); }

diff --git a/test/boolean_test.cpp b/test/boolean_test.cpp
@@ -879,7 +879,6 @@ TEST(Boolean, Sweep) {
   std::vector<Manifold> result;
 
   for (int i = 0; i < numPoints; i++) {
-    // std::cerr << i << std::endl;
     std::vector<Manifold> primitives =
         cutterPrimitives(pathPoints[i], pathPoints[(i + 1) % numPoints],
                          pathPoints[(i + 2) % numPoints]);