From cadba713306be6c73461d9f6b8b0ff7f34e3400b Mon Sep 17 00:00:00 2001 From: Abdelrahman Said Date: Sat, 24 May 2025 03:50:31 +0100 Subject: [PATCH] Add threading --- Makefile | 2 +- dod.c | 86 +++++++++++++++++++++++++++++++++++++++++++++----------- run_perf | 2 +- 3 files changed, 71 insertions(+), 19 deletions(-) diff --git a/Makefile b/Makefile index 31f154d..d85a4be 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ CC = clang CFLAGS = -g -O3 -Iraylib/include -Iwapp/src -LDFLAGS = '-Wl,-rpath,$$ORIGIN/raylib/lib' -Lraylib/lib -lraylib -lm +LDFLAGS = '-Wl,-rpath,$$ORIGIN/raylib/lib' -Lraylib/lib -lraylib -lm -lpthread BASEDIR = $(shell realpath ./) RL_SRCDIR = ${BASEDIR}/raylib-src/src RL_BUILDDIR = ${BASEDIR}/raylib-build diff --git a/dod.c b/dod.c index 7b197b2..cfa70ed 100644 --- a/dod.c +++ b/dod.c @@ -6,6 +6,9 @@ #include #include #include +#include + +#define THREAD_COUNT 4 #define MOVABLE_TAG_SHIFT 0 #define RENDERABLE_TAG_SHIFT 1 @@ -57,6 +60,17 @@ struct Manager { u64 count; }; +typedef struct PositionThreadArgs PositionThreadArgs; +struct PositionThreadArgs { + const Rect *zones; + Rect *rects; + Velocity *velocities; + u8 *tags; + u64 count; + f32 delta; +}; + +typedef void *(*PThreadRoutine)(void *); typedef void (*ScaleInitialiser)(Scale *scale, XOR256State *state); typedef void (*VelocityInitialiser)(Velocity *velocity, XOR256State *state); typedef void (*RaylibDrawRectFunc)(int posX, int posY, int width, int height, Color color); @@ -68,6 +82,7 @@ void init_scale_zone(Scale *scale, XOR256State *state); void init_velocity(Velocity *velocity, XOR256State *state); void zero_velocity(Velocity *velocity, XOR256State *state); void update_positions(u8 *tags, Rect *rects, Velocity *velocities, u64 count, f32 delta); +void *update_position_thread(PositionThreadArgs *args); void render_entities(const u8 *tags, const Rect *rects, u64 count); u8 collides(const Rect *rect, const Rect *collider); i16 get_random_velocity(XOR256State *state); @@ -103,7 +118,6 @@ int main(void) { f32 last_time = GetFrameTime(); f32 delta, cur_time; - u64 count = 0; while (!WindowShouldClose()) { f64 time = GetTime(); @@ -111,8 +125,6 @@ int main(void) { break; } - ++count; - cur_time = GetFrameTime(); delta = cur_time - last_time; @@ -133,8 +145,6 @@ int main(void) { CloseWindow(); - printf("%lu\n", count); - return 0; } @@ -189,6 +199,46 @@ void zero_velocity(Velocity *velocity, XOR256State *state) { } void update_positions(u8 *tags, Rect *rects, Velocity *velocities, u64 count, f32 delta) { + persistent pthread_t threads[THREAD_COUNT] = {0}; + persistent PositionThreadArgs args[THREAD_COUNT] = {0}; + + u64 total_entities_count = count - ZONE_COUNT; + u64 thread_entities_count = (u64)(ceil((f64)total_entities_count / THREAD_COUNT)); + + i32 result; + u64 start = ZONE_COUNT; + u64 end, args_count; + + for (u64 i = 0; i < THREAD_COUNT; ++i) { + if (total_entities_count > thread_entities_count) { + end = start + thread_entities_count; + total_entities_count -= thread_entities_count; + } else { + end = start + total_entities_count; + total_entities_count = 0; + } + + args_count = end - start; + + args[i].zones = &(rects[0]); + args[i].tags = &(tags[start]); + args[i].rects = &(rects[start]); + args[i].velocities = &(velocities[start]); + args[i].count = args_count; + args[i].delta = delta; + + start += args_count; + + result = pthread_create(&(threads[i]), NULL, (PThreadRoutine)update_position_thread, (void *)&(args[i])); + assert(result == 0); + } + + for (u64 i = 0; i < THREAD_COUNT; ++i) { + pthread_join(threads[i], NULL); + } +} + +void *update_position_thread(PositionThreadArgs *args) { persistent f32 multipliers[2] = {1.0f, 0.5f}; persistent u64 inside_zone_mask = 0x7; @@ -196,33 +246,35 @@ void update_positions(u8 *tags, Rect *rects, Velocity *velocities, u64 count, f3 f32 pos_x, pos_y; f32 max_x, max_y; - for (u64 i = ZONE_COUNT; i < count; ++i) { - tags[i] &= inside_zone_mask; + for (u64 i = 0; i < args->count; ++i) { + args->tags[i] &= inside_zone_mask; for (u64 j = 0; j < ZONE_COUNT; ++j) { - tags[i] |= collides(&rects[i], &rects[j]) << INSIDE_ZONE_TAG_SHIFT; + args->tags[i] |= collides(&args->rects[i], &args->zones[j]) << INSIDE_ZONE_TAG_SHIFT; } - index = (tags[i] & ENTITY_TAG_INSIDE_ZONE) >> INSIDE_ZONE_TAG_SHIFT; + index = (args->tags[i] & ENTITY_TAG_INSIDE_ZONE) >> INSIDE_ZONE_TAG_SHIFT; - max_x = WIDTH - rects[i].scale.width; - max_y = HEIGHT - rects[i].scale.height; + max_x = WIDTH - args->rects[i].scale.width; + max_y = HEIGHT - args->rects[i].scale.height; - pos_x = rects[i].position.x + velocities[i].x * multipliers[index] * delta; - pos_y = rects[i].position.y + velocities[i].y * multipliers[index] * delta; + pos_x = args->rects[i].position.x + args->velocities[i].x * multipliers[index] * args->delta; + pos_y = args->rects[i].position.y + args->velocities[i].y * multipliers[index] * args->delta; if (pos_x < 0 || pos_x >= max_x) { pos_x = min(max(pos_x, 0), max_x); - velocities[i].x *= -1; + args->velocities[i].x *= -1; } if (pos_y < 0 || pos_y >= max_y) { pos_y = min(max(pos_y, 0), max_y); - velocities[i].y *= -1; + args->velocities[i].y *= -1; } - rects[i].position.x = roundf(pos_x); - rects[i].position.y = roundf(pos_y); + args->rects[i].position.x = roundf(pos_x); + args->rects[i].position.y = roundf(pos_y); } + + return args; } void render_entities(const u8 *tags, const Rect *rects, u64 count) { diff --git a/run_perf b/run_perf index 5509d30..d3c9723 100755 --- a/run_perf +++ b/run_perf @@ -25,5 +25,5 @@ major-faults,\ minor-faults,\ task-clock" -# perf stat -e $EVENTS -r $REPEATS -o no_dod_stats ./no_dod +perf stat -e $EVENTS -r $REPEATS -o no_dod_stats ./no_dod perf stat -e $EVENTS -r $REPEATS -o dod_stats ./dod