merge generate_children into do_iteration

Also fix this really weird slow down we get from doing direct mutation
to a state.allocator.get_ref (i.e. a Node by reference).  Maybe the
compiler just can't inline that instruction?  Regardless, this patch
fixes the slow downs!
This commit is contained in:
2025-12-12 03:54:30 +00:00
parent 7e801df280
commit 2dbe3d0a58
2 changed files with 24 additions and 30 deletions

View File

@@ -16,42 +16,40 @@ namespace cw::worker
using cw::node::Fraction;
using cw::node::Node;
std::tuple<u64, u64> generate_children(NodeAllocator &allocator, u64 index)
{
Node node = allocator.get_val(index);
if (node.left < 0)
{
allocator.get_ref(index).left = allocator.alloc(Fraction{
node.value.numerator, node.value.numerator + node.value.denominator});
}
if (node.right < 0)
{
allocator.get_ref(index).right = allocator.alloc(
Fraction{node.value.numerator + node.value.denominator,
node.value.denominator});
}
return {node.left, node.right};
}
void do_iteration(State &state)
{
// state.mutex.lock();
state.mutex.lock();
if (state.queue.empty())
{
// Unlock since there isn't any work to be done.
// state.mutex.unlock();
state.mutex.unlock();
return;
}
u64 index = state.queue.front();
state.queue.pop();
u64 left_child, right_child;
std::tie(left_child, right_child) =
generate_children(state.allocator, index);
Node node = state.allocator.get_val(index);
state.queue.push(left_child);
state.queue.push(right_child);
// state.mutex.unlock();
i64 left = node.left, right = node.right;
if (left < 0)
{
left = state.allocator.alloc(Fraction{
node.value.numerator, node.value.numerator + node.value.denominator});
}
if (right < 0)
{
right = state.allocator.alloc(
Fraction{node.value.numerator + node.value.denominator,
node.value.denominator});
}
Node &node_ref = state.allocator.get_ref(index);
node_ref.left = left;
node_ref.right = right;
state.queue.push(left);
state.queue.push(right);
state.mutex.unlock();
}
void worker(State &state)

View File

@@ -18,11 +18,7 @@ namespace cw::worker
using cw::node::NodeAllocator;
using cw::state::State;
constexpr auto THREAD_PAUSE_DELAY = std::chrono::milliseconds(1000);
constexpr auto THREAD_GENERAL_DELAY = std::chrono::nanoseconds(100);
// Given `index`, return the indices of its children in the tree. If not
// present already, generate them using the allocator.
std::tuple<u64, u64> generate_children(NodeAllocator &allocator, u64 index);
constexpr auto THREAD_GENERAL_DELAY = std::chrono::milliseconds(10);
// Performs a single iteration which consists of the following:
// 1) pop an index off the iteration queue