Add threading

This commit is contained in:
Abdelrahman Said 2025-05-24 03:50:31 +01:00
parent e6d39028fb
commit cadba71330
3 changed files with 71 additions and 19 deletions

View File

@ -1,6 +1,6 @@
CC = clang
CFLAGS = -g -O3 -Iraylib/include -Iwapp/src
LDFLAGS = '-Wl,-rpath,$$ORIGIN/raylib/lib' -Lraylib/lib -lraylib -lm
LDFLAGS = '-Wl,-rpath,$$ORIGIN/raylib/lib' -Lraylib/lib -lraylib -lm -lpthread
BASEDIR = $(shell realpath ./)
RL_SRCDIR = ${BASEDIR}/raylib-src/src
RL_BUILDDIR = ${BASEDIR}/raylib-build

86
dod.c
View File

@ -6,6 +6,9 @@
#include <stdbool.h>
#include <assert.h>
#include <string.h>
#include <pthread.h>
#define THREAD_COUNT 4
#define MOVABLE_TAG_SHIFT 0
#define RENDERABLE_TAG_SHIFT 1
@ -57,6 +60,17 @@ struct Manager {
u64 count;
};
typedef struct PositionThreadArgs PositionThreadArgs;
struct PositionThreadArgs {
const Rect *zones;
Rect *rects;
Velocity *velocities;
u8 *tags;
u64 count;
f32 delta;
};
typedef void *(*PThreadRoutine)(void *);
typedef void (*ScaleInitialiser)(Scale *scale, XOR256State *state);
typedef void (*VelocityInitialiser)(Velocity *velocity, XOR256State *state);
typedef void (*RaylibDrawRectFunc)(int posX, int posY, int width, int height, Color color);
@ -68,6 +82,7 @@ void init_scale_zone(Scale *scale, XOR256State *state);
void init_velocity(Velocity *velocity, XOR256State *state);
void zero_velocity(Velocity *velocity, XOR256State *state);
void update_positions(u8 *tags, Rect *rects, Velocity *velocities, u64 count, f32 delta);
void *update_position_thread(PositionThreadArgs *args);
void render_entities(const u8 *tags, const Rect *rects, u64 count);
u8 collides(const Rect *rect, const Rect *collider);
i16 get_random_velocity(XOR256State *state);
@ -103,7 +118,6 @@ int main(void) {
f32 last_time = GetFrameTime();
f32 delta, cur_time;
u64 count = 0;
while (!WindowShouldClose()) {
f64 time = GetTime();
@ -111,8 +125,6 @@ int main(void) {
break;
}
++count;
cur_time = GetFrameTime();
delta = cur_time - last_time;
@ -133,8 +145,6 @@ int main(void) {
CloseWindow();
printf("%lu\n", count);
return 0;
}
@ -189,6 +199,46 @@ void zero_velocity(Velocity *velocity, XOR256State *state) {
}
void update_positions(u8 *tags, Rect *rects, Velocity *velocities, u64 count, f32 delta) {
persistent pthread_t threads[THREAD_COUNT] = {0};
persistent PositionThreadArgs args[THREAD_COUNT] = {0};
u64 total_entities_count = count - ZONE_COUNT;
u64 thread_entities_count = (u64)(ceil((f64)total_entities_count / THREAD_COUNT));
i32 result;
u64 start = ZONE_COUNT;
u64 end, args_count;
for (u64 i = 0; i < THREAD_COUNT; ++i) {
if (total_entities_count > thread_entities_count) {
end = start + thread_entities_count;
total_entities_count -= thread_entities_count;
} else {
end = start + total_entities_count;
total_entities_count = 0;
}
args_count = end - start;
args[i].zones = &(rects[0]);
args[i].tags = &(tags[start]);
args[i].rects = &(rects[start]);
args[i].velocities = &(velocities[start]);
args[i].count = args_count;
args[i].delta = delta;
start += args_count;
result = pthread_create(&(threads[i]), NULL, (PThreadRoutine)update_position_thread, (void *)&(args[i]));
assert(result == 0);
}
for (u64 i = 0; i < THREAD_COUNT; ++i) {
pthread_join(threads[i], NULL);
}
}
void *update_position_thread(PositionThreadArgs *args) {
persistent f32 multipliers[2] = {1.0f, 0.5f};
persistent u64 inside_zone_mask = 0x7;
@ -196,33 +246,35 @@ void update_positions(u8 *tags, Rect *rects, Velocity *velocities, u64 count, f3
f32 pos_x, pos_y;
f32 max_x, max_y;
for (u64 i = ZONE_COUNT; i < count; ++i) {
tags[i] &= inside_zone_mask;
for (u64 i = 0; i < args->count; ++i) {
args->tags[i] &= inside_zone_mask;
for (u64 j = 0; j < ZONE_COUNT; ++j) {
tags[i] |= collides(&rects[i], &rects[j]) << INSIDE_ZONE_TAG_SHIFT;
args->tags[i] |= collides(&args->rects[i], &args->zones[j]) << INSIDE_ZONE_TAG_SHIFT;
}
index = (tags[i] & ENTITY_TAG_INSIDE_ZONE) >> INSIDE_ZONE_TAG_SHIFT;
index = (args->tags[i] & ENTITY_TAG_INSIDE_ZONE) >> INSIDE_ZONE_TAG_SHIFT;
max_x = WIDTH - rects[i].scale.width;
max_y = HEIGHT - rects[i].scale.height;
max_x = WIDTH - args->rects[i].scale.width;
max_y = HEIGHT - args->rects[i].scale.height;
pos_x = rects[i].position.x + velocities[i].x * multipliers[index] * delta;
pos_y = rects[i].position.y + velocities[i].y * multipliers[index] * delta;
pos_x = args->rects[i].position.x + args->velocities[i].x * multipliers[index] * args->delta;
pos_y = args->rects[i].position.y + args->velocities[i].y * multipliers[index] * args->delta;
if (pos_x < 0 || pos_x >= max_x) {
pos_x = min(max(pos_x, 0), max_x);
velocities[i].x *= -1;
args->velocities[i].x *= -1;
}
if (pos_y < 0 || pos_y >= max_y) {
pos_y = min(max(pos_y, 0), max_y);
velocities[i].y *= -1;
args->velocities[i].y *= -1;
}
rects[i].position.x = roundf(pos_x);
rects[i].position.y = roundf(pos_y);
args->rects[i].position.x = roundf(pos_x);
args->rects[i].position.y = roundf(pos_y);
}
return args;
}
void render_entities(const u8 *tags, const Rect *rects, u64 count) {

View File

@ -25,5 +25,5 @@ major-faults,\
minor-faults,\
task-clock"
# perf stat -e $EVENTS -r $REPEATS -o no_dod_stats ./no_dod
perf stat -e $EVENTS -r $REPEATS -o no_dod_stats ./no_dod
perf stat -e $EVENTS -r $REPEATS -o dod_stats ./dod