+further multithreading

I split the grid into 4 quadrants which are managed by their own thread. Improves speed and uses more resources correctly.
2023-08-26 00:51:17 +01:00
parent 1bc283b195
commit 026aa887f9
2 changed files with 40 additions and 11 deletions
--- a/lib.h
+++ b/lib.h
@@ -12,9 +12,10 @@ typedef struct State
  size_t dwidth;
  size_t window_len;
-  int multiplier;
+  double multiplier;
  bool thread_alive;
  uint64_t payload;
 } state_t;
 bool load_from_file(state_t *state, const char *filepath);
--- a/main.c
+++ b/main.c
@@ -17,12 +17,22 @@
 #include "./lib.h"
-void step(state_t *state)
+struct StepArg
 {
-  for (size_t i = 0; i < state->dwidth; ++i)
+  state_t *state;
-    for (size_t j = 0; j < state->dwidth; ++j)
+  size_t x_min, x_max, y_min, y_max;
 };
 pthread_mutex_t mutex;
 void step(struct StepArg arg)
 {
  state_t *state = arg.state;
  for (size_t i = arg.x_min; i < arg.x_max; ++i)
    for (size_t j = arg.y_min; j < arg.y_max; ++j)
      if (state->data[(i * state->dwidth) + j] >= 4)
      {
        pthread_mutex_lock(&mutex);
        uint64_t *references[] = {
            (j == 0) ? NULL : &state->data[((i)*state->dwidth) + j - 1],
            (i == state->dwidth - 1)
@@ -36,29 +46,43 @@ void step(state_t *state)
          if (references[k])
            *references[k] += state->data[(i * state->dwidth) + j] / 4;
        state->data[(i * state->dwidth) + j] %= 4;
        pthread_mutex_unlock(&mutex);
      }
 }
 void *compute_thread(void *input)
 {
-  state_t *state = input;
+  struct StepArg *arg = input;
-  while (state->thread_alive)
+  while (arg->state->thread_alive)
-    step(state);
+    step(*arg);
  return NULL;
 }
 int main(void)
 {
-  state_t state    = {NULL, 512, 512, 0, true};
+  state_t state    = {NULL, 512, 512, 0, true, pow(2, 20)};
  state.data       = calloc(state.dwidth * state.dwidth, sizeof(*state.data));
  state.multiplier = state.window_len / state.dwidth;
  state.data[(state.dwidth * state.dwidth / 2) + (state.dwidth / 2)] =
      state.payload;
  const float zoom = 0.125f;
  Camera2D camera  = {0};
  camera.zoom      = 1.0f;
-  pthread_t step_thread;
+  pthread_mutex_init(&mutex, NULL);
-  pthread_create(&step_thread, NULL, &compute_thread, &state);
+  struct StepArg a = {&state, 0, state.dwidth / 2, 0, state.dwidth / 2};
  struct StepArg b = {&state, 0, state.dwidth / 2, state.dwidth / 2,
                      state.dwidth};
  struct StepArg c = {&state, state.dwidth / 2, state.dwidth, 0,
                      state.dwidth / 2};
  struct StepArg d = {&state, state.dwidth / 2, state.dwidth, state.dwidth / 2,
                      state.dwidth};
  pthread_t thread_a, thread_b, thread_c, thread_d;
  pthread_create(&thread_a, NULL, &compute_thread, &a);
  pthread_create(&thread_b, NULL, &compute_thread, &b);
  pthread_create(&thread_c, NULL, &compute_thread, &c);
  pthread_create(&thread_d, NULL, &compute_thread, &d);
  InitWindow(state.window_len, state.window_len, "Abelian sand pile");
  SetTargetFPS(60);
@@ -119,10 +143,14 @@ int main(void)
  if (state.thread_alive)
  {
    state.thread_alive = false;
-    pthread_join(step_thread, NULL);
+    pthread_join(thread_a, NULL);
    pthread_join(thread_b, NULL);
    pthread_join(thread_c, NULL);
    pthread_join(thread_d, NULL);
  }
  CloseWindow();
  unsigned char *image_data =
      calloc(3 * state.dwidth * state.dwidth, sizeof(*image_data));