diff --git a/src/ipc/broad_phase/lbvh.cpp b/src/ipc/broad_phase/lbvh.cpp index 0d61f112b..fd622a3df 100644 --- a/src/ipc/broad_phase/lbvh.cpp +++ b/src/ipc/broad_phase/lbvh.cpp @@ -18,6 +18,7 @@ namespace xs = xsimd; #endif #include +#include using namespace std::placeholders; @@ -95,6 +96,11 @@ void LBVH::build( } namespace { + /// Returns the number of common leading bits (CLZ of XOR) between sorted + /// Morton codes at positions i and j. code_i is the Morton code at position + /// i, passed explicitly to avoid a redundant lookup. Returns -1 when j is + /// out of bounds. Duplicate codes fall back to CLZ of the index XOR + /// (offset by 32 so it sorts after any code-level difference). int delta( const LBVH::MortonCodeElements& sorted_morton_codes, int i, @@ -107,8 +113,8 @@ namespace { uint64_t code_j = sorted_morton_codes[j].morton_code; if (code_i == code_j) { // handle duplicate morton codes - int element_idx_i = i; // sorted_morton_codes[i].elementIdx; - int element_idx_j = j; // sorted_morton_codes[j].elementIdx; + int element_idx_i = i; + int element_idx_j = j; // add 32 for common prefix of code_i ^ code_j #if defined(__GNUC__) || defined(__clang__) @@ -123,72 +129,6 @@ namespace { return __lzcnt64(code_i ^ code_j); #endif } - - void determine_range( - const LBVH::MortonCodeElements& sorted_morton_codes, - int idx, - int& lower, - int& upper) - { - // determine direction of the range (+1 or -1) - const uint64_t code = sorted_morton_codes[idx].morton_code; - const int delta_l = delta(sorted_morton_codes, idx, code, idx - 1); - const int delta_r = delta(sorted_morton_codes, idx, code, idx + 1); - const int d = (delta_r >= delta_l) ? 1 : -1; - - // compute upper bound for the length of the range - const int delta_min = std::min(delta_l, delta_r); - int l_max = 2; - while (delta(sorted_morton_codes, idx, code, idx + l_max * d) - > delta_min) { - l_max = l_max << 1; - } - - // find the other end using binary search - int l = 0; - for (int t = l_max >> 1; t > 0; t >>= 1) { - if (delta(sorted_morton_codes, idx, code, idx + (l + t) * d) - > delta_min) { - l += t; - } - } - int jdx = idx + l * d; - - // ensure idx < jdx - lower = std::min(idx, jdx); - upper = std::max(idx, jdx); - } - - int find_split( - const LBVH::MortonCodeElements& sorted_morton_codes, - int first, - int last) - { - uint64_t first_code = sorted_morton_codes[first].morton_code; - - // Calculate the number of highest bits that are the same - // for all objects, using the count-leading-zeros intrinsic. - int common_prefix = delta(sorted_morton_codes, first, first_code, last); - - // Use binary search to find where the next bit differs. - // Specifically, we are looking for the highest object that - // shares more than common_prefix bits with the first one. - int split = first; // initial guess - int stride = last - first; - do { - stride = (stride + 1) >> 1; // exponential decrease - int new_split = split + stride; // proposed new position - if (new_split < last) { - int split_prefix = - delta(sorted_morton_codes, first, first_code, new_split); - if (split_prefix > common_prefix) { - split = new_split; // accept proposal - } - } - } while (stride > 1); - - return split; - } } // namespace void LBVH::init_bvh( @@ -238,7 +178,8 @@ void LBVH::init_bvh( } assert(boxes.size() <= std::numeric_limits::max()); - const int LEAF_OFFSET = int(boxes.size()) - 1; + const int N_LEAVES = int(boxes.size()); + const int LEAF_OFFSET = N_LEAVES - 1; if (rightmost_leaves.size() != lbvh.size()) { rightmost_leaves.resize(lbvh.size()); @@ -246,9 +187,26 @@ void LBVH::init_bvh( LBVH::ConstructionInfos construction_infos(lbvh.size()); { - IPC_TOOLKIT_PROFILE_BLOCK("build_hierarchy"); - tbb::parallel_for(size_t(0), boxes.size(), [&](size_t i) { - assert(i < boxes.size()); + IPC_TOOLKIT_PROFILE_BLOCK("init_visitation_counts"); + tbb::parallel_for(size_t(0), lbvh.size(), [&](size_t i) { + construction_infos[i].visitation_count.store( + 0, std::memory_order_relaxed); + }); + } + + // Apetrei 2014: single bottom-up pass that simultaneously builds the + // hierarchy and computes bounding boxes. Each leaf thread walks toward the + // root, choosing its parent in O(1) by comparing the CLZ-delta values at + // the two ends of its current key range. + // + // In this layout internal node j always splits between sorted keys j and + // j+1. The root is NOT necessarily at index 0, so after construction we + // swap the root into position 0 to match the traversal code's expectation. + std::atomic root_idx(-1); + { + IPC_TOOLKIT_PROFILE_BLOCK("build_hierarchy_and_boxes"); + tbb::parallel_for(0, N_LEAVES, [&](int i) { + // --- Initialize leaf node --- { const auto& box = boxes[morton_codes[i].box_id]; @@ -261,59 +219,51 @@ void LBVH::init_bvh( rightmost_leaves[LEAF_OFFSET + i] = i; } - if (i < LEAF_OFFSET) { - // Find out which range of objects the node corresponds - // to. (This is where the magic happens!) - - int first, last; - determine_range(morton_codes, int(i), first, last); + // --- Bottom-up walk (Apetrei 2014, Fig. 2) --- + // Invariant: the current subtree covers the sorted-key range + // [left_key, right_key]. + int left_key = i; + int right_key = i; + int current_node = LEAF_OFFSET + i; - // Determine where to split the range - int split = find_split(morton_codes, first, last); - - // Select child_a - int child_a = -1; - if (split == first) { - // pointer to leaf node - child_a = LEAF_OFFSET + split; - } else { - child_a = split; // pointer to internal node - } - - // Select child_b - int child_b = -1; - if (split + 1 == last) { - child_b = LEAF_OFFSET + split + 1; // pointer to leaf node + while (true) { + // Choose parent. Candidates are internal node right_key + // (current becomes its left / childA) or internal node + // left_key-1 (current becomes its right / childB). Our delta() + // returns CLZ (higher = more-similar = finer split), so the + // CLOSER ancestor has the LARGER delta — hence ">". + // + // Boundary rules: + // left_key == 0 → must be childA (no node -1) + // right_key == n-1 → must be childB (no node n-1) + const bool is_child_a = (left_key == 0) + || (right_key != N_LEAVES - 1 + && delta( + morton_codes, right_key, + morton_codes[right_key].morton_code, + right_key + 1) + > delta( + morton_codes, left_key - 1, + morton_codes[left_key - 1].morton_code, + left_key)); + const int parent = is_child_a ? right_key : left_key - 1; + + auto& info = construction_infos[parent]; + + // Write the child pointer on the parent node. + // childA writes .left; childB writes .right. + if (is_child_a) { + lbvh[parent].left = current_node; + info.left_range = left_key; } else { - child_b = split + 1; // pointer to internal node + lbvh[parent].right = current_node; + info.right_range = right_key; } - // Record parent-child relationships - lbvh[i].left = child_a; - lbvh[i].right = child_b; - construction_infos[child_a].parent = int(i); - construction_infos[child_b].parent = int(i); - construction_infos[child_a].visitation_count.store( - 0, std::memory_order_relaxed); - construction_infos[child_b].visitation_count.store( - 0, std::memory_order_relaxed); - } + // Atomic arrival gate: the first thread to reach this parent + // stops; the second thread proceeds (it now knows both children + // are complete). - // node 0 is the root and has no parent to set these values - if (i == 0) { - construction_infos[0].parent = 0; - construction_infos[0].visitation_count.store( - 0, std::memory_order_relaxed); - } - }); - } - - { - IPC_TOOLKIT_PROFILE_BLOCK("populate_boxes"); - tbb::parallel_for(size_t(0), boxes.size(), [&](size_t i) { - int node_idx = construction_infos[LEAF_OFFSET + i].parent; - while (true) { - auto& info = construction_infos[node_idx]; if (info.visitation_count++ == 0) { // this is the first thread that arrived at this // node -> finished @@ -322,23 +272,58 @@ void LBVH::init_bvh( // this is the second thread that arrived at this node, // both children are computed -> compute aabb union and // continue - assert(lbvh[node_idx].is_inner()); - const Node& child_b = lbvh[lbvh[node_idx].right]; - const Node& child_a = lbvh[lbvh[node_idx].left]; - lbvh[node_idx].aabb_min = - child_a.aabb_min.min(child_b.aabb_min); - lbvh[node_idx].aabb_max = - child_a.aabb_max.max(child_b.aabb_max); + assert(lbvh[parent].is_inner()); + const Node& child_a = lbvh[lbvh[parent].left]; + const Node& child_b = lbvh[lbvh[parent].right]; + lbvh[parent].aabb_min = child_a.aabb_min.min(child_b.aabb_min); + lbvh[parent].aabb_max = child_a.aabb_max.max(child_b.aabb_max); // Compute rightmost leaf: max of children's rightmost - rightmost_leaves[node_idx] = std::max( - rightmost_leaves[lbvh[node_idx].left], - rightmost_leaves[lbvh[node_idx].right]); - - if (node_idx == 0) { - break; // root node + rightmost_leaves[parent] = std::max( + rightmost_leaves[lbvh[parent].left], + rightmost_leaves[lbvh[parent].right]); + + // Reconstruct the full key range for the parent. + left_key = construction_infos[parent].left_range; + right_key = construction_infos[parent].right_range; + current_node = parent; + + if (left_key == 0 && right_key == N_LEAVES - 1) { + // only one thread should reach the root + int expected = -1; + [[maybe_unused]] bool set = + root_idx.compare_exchange_strong( + expected, current_node); + assert(set); + break; // root AABB is complete } - node_idx = info.parent; + } + }); + } + + // --- Move the root to index 0 so traversal can start there. --- + // In the Apetrei layout the root's index equals the global split position, + // which is generally != 0. We swap the root node into position 0 and patch + // up the single affected child pointer. + // + // Key invariant (Apetrei): node 0's subtree always has left_key=0, so it is + // only ever written as a LEFT child — meaning no internal node ever has + // right==0. Therefore swapping node 0 cannot create a spurious + // is_inner_marker==0 (which would look like a leaf). + const int root = root_idx.load(); + if (root > 0) { + IPC_TOOLKIT_PROFILE_BLOCK("swap_root_to_zero"); + std::swap(lbvh[0], lbvh[root]); + std::swap(rightmost_leaves[0], rightmost_leaves[root]); + + // The root (now at 0) is never any node's child, so no pointer + // references R that needs rewriting to 0. The only pointers that + // referenced 0 (the old node-0) must be rewritten to R. And since old + // node-0 was only ever a LEFT child (see invariant above), we only need + // to patch .left pointers. + tbb::parallel_for(size_t(0), lbvh.size(), [&](size_t i) { + if (lbvh[i].is_inner() && lbvh[i].left == 0) { + lbvh[i].left = root; } }); } diff --git a/src/ipc/broad_phase/lbvh.hpp b/src/ipc/broad_phase/lbvh.hpp index c2733cf1f..64f37de77 100644 --- a/src/ipc/broad_phase/lbvh.hpp +++ b/src/ipc/broad_phase/lbvh.hpp @@ -86,9 +86,11 @@ class LBVH : public BroadPhase { private: struct ConstructionInfo { - /// @brief Parent to the parent - int parent; - /// @brief Number of threads that arrived + /// @brief Left range endpoint passed up by the left child. + int32_t left_range; + /// @brief Right range endpoint passed up by the right child. + int32_t right_range; + /// @brief Number of threads that arrived at this node. std::atomic visitation_count; }; diff --git a/tests/src/tests/broad_phase/test_lbvh.cpp b/tests/src/tests/broad_phase/test_lbvh.cpp index 61a9716a7..80c204f58 100644 --- a/tests/src/tests/broad_phase/test_lbvh.cpp +++ b/tests/src/tests/broad_phase/test_lbvh.cpp @@ -346,4 +346,49 @@ TEST_CASE( lbvh->detect_edge_edge_candidates(ee_candidates); return ee_candidates.size(); }; +} + +TEST_CASE("Benchmark LBVH::build", "[!benchmark][broad_phase][lbvh]") +{ + constexpr double inflation_radius = 0; + + struct Scene { + std::string name, mesh_t0, mesh_t1; + }; + +#ifdef NDEBUG + constexpr int NUM_SCENES = 6; +#else + constexpr int NUM_SCENES = 1; +#endif + + const std::array scenes = { { + Scene { "Cloth-Ball", "cloth_ball92.ply", "cloth_ball93.ply" }, +#ifdef NDEBUG + Scene { "Cloth-Funnel", "cloth-funnel/227.ply", + "cloth-funnel/228.ply" }, + Scene { "Armadillo-Rollers", "armadillo-rollers/326.ply", + "armadillo-rollers/327.ply" }, + Scene { "Rod-Twist", "rod-twist/3036.ply", "rod-twist/3037.ply" }, + Scene { "N-Body-Simulation", "n-body-simulation/balls16_18.ply", + "n-body-simulation/balls16_19.ply" }, + Scene { "Puffer-Ball", "puffer-ball/20.ply", "puffer-ball/21.ply" }, +#endif + } }; + + for (const auto& [scene, mesh_t0, mesh_t1] : scenes) { + Eigen::MatrixXd vertices_t0, vertices_t1; + Eigen::MatrixXi edges, faces; + REQUIRE(tests::load_mesh(mesh_t0, vertices_t0, edges, faces)); + REQUIRE(tests::load_mesh(mesh_t1, vertices_t1, edges, faces)); + + const std::shared_ptr lbvh = std::make_shared(); + + BENCHMARK(fmt::format("LBVH::build [{}]", scene)) + { + lbvh->build( + vertices_t0, vertices_t1, edges, faces, inflation_radius); + return lbvh->edge_nodes().size(); + }; + } } \ No newline at end of file