Compare commits

..

2 Commits

Author SHA1 Message Date
ba31dd9f8c Add cache testing functions 2024-05-06 22:55:44 +01:00
f355ab2d25 Update .gitignore 2024-05-06 22:55:32 +01:00
3 changed files with 754 additions and 349 deletions

View File

@ -4,6 +4,7 @@
compile_commands.json compile_commands.json
count_and_distances count_and_distances
pairs.json pairs.json
cache_test
main main
genhavr genhavr
prochavr prochavr

View File

@ -9,6 +9,9 @@
#define ARR_LEN(ARR) sizeof(ARR) / sizeof(*ARR) #define ARR_LEN(ARR) sizeof(ARR) / sizeof(*ARR)
u64 *g_cache_output = NULL;
u64 g_size = 1024 * 1024 * 1024 / 128 * sizeof(u64);
extern "C" void mov_all_bytes_asm(char *buffer, u64 size); extern "C" void mov_all_bytes_asm(char *buffer, u64 size);
extern "C" void nop_all_bytes_asm(u64 size); extern "C" void nop_all_bytes_asm(u64 size);
extern "C" void nop_1x3_all_bytes_asm(u64 size); extern "C" void nop_1x3_all_bytes_asm(u64 size);
@ -47,6 +50,7 @@ extern "C" void read_32x2_simd_offset(char *buffer, u64 size);
extern "C" void read_32x2_simd_no_offset(char *buffer, u64 size); extern "C" void read_32x2_simd_no_offset(char *buffer, u64 size);
extern "C" void read_16x4_simd(char *buffer, u64 size); extern "C" void read_16x4_simd(char *buffer, u64 size);
extern "C" void read_32x4_simd(char *buffer, u64 size); extern "C" void read_32x4_simd(char *buffer, u64 size);
extern "C" void cache_test(char *buffer, u64 size, u64 mask);
void test_fread(reptester *tester, alloc_type type); void test_fread(reptester *tester, alloc_type type);
void test_read(reptester *tester, alloc_type type); void test_read(reptester *tester, alloc_type type);
@ -89,6 +93,20 @@ void test_read_32x2_simd_offset(reptester *tester, alloc_type type);
void test_read_32x2_simd_no_offset(reptester *tester, alloc_type type); void test_read_32x2_simd_no_offset(reptester *tester, alloc_type type);
void test_read_16x4_simd(reptester *tester, alloc_type type); void test_read_16x4_simd(reptester *tester, alloc_type type);
void test_read_32x4_simd(reptester *tester, alloc_type type); void test_read_32x4_simd(reptester *tester, alloc_type type);
void test_cache_test_16k(reptester *tester, alloc_type type);
void test_cache_test_32k(reptester *tester, alloc_type type);
void test_cache_test_64k(reptester *tester, alloc_type type);
void test_cache_test_128k(reptester *tester, alloc_type type);
void test_cache_test_512k(reptester *tester, alloc_type type);
void test_cache_test_1m(reptester *tester, alloc_type type);
void test_cache_test_2m(reptester *tester, alloc_type type);
void test_cache_test_4m(reptester *tester, alloc_type type);
void test_cache_test_8m(reptester *tester, alloc_type type);
void test_cache_test_16m(reptester *tester, alloc_type type);
void test_cache_test_32m(reptester *tester, alloc_type type);
void test_cache_test_64m(reptester *tester, alloc_type type);
void test_cache_test_512m(reptester *tester, alloc_type type);
void test_cache_test_full(reptester *tester, alloc_type type);
u64 get_file_length(FILE *fp); u64 get_file_length(FILE *fp);
int main(int argc, char *argv[]) { int main(int argc, char *argv[]) {
@ -108,6 +126,8 @@ int main(int argc, char *argv[]) {
break; break;
} }
g_cache_output = (u64 *)calloc(1, g_size);
// clang-format off // clang-format off
reptester tester = { reptester tester = {
{filename, NULL, 0, 0}, // params {filename, NULL, 0, 0}, // params
@ -182,15 +202,36 @@ int main(int argc, char *argv[]) {
// {{"READ 2x2", "READ 2x2 WITH MALLOC"}, test_read_2x2}, // {{"READ 2x2", "READ 2x2 WITH MALLOC"}, test_read_2x2},
// {{"READ 4x2", "READ 4x2 WITH MALLOC"}, test_read_4x2}, // {{"READ 4x2", "READ 4x2 WITH MALLOC"}, test_read_4x2},
// {{"READ 8x2", "READ 8x2 WITH MALLOC"}, test_read_8x2}, // {{"READ 8x2", "READ 8x2 WITH MALLOC"}, test_read_8x2},
{{"READ 4x2_simd", "READ 4x2_simd WITH MALLOC"}, test_read_4x2_simd}, // {{"READ 4x2_simd", "READ 4x2_simd WITH MALLOC"}, test_read_4x2_simd},
{{"READ 8x2_simd", "READ 8x2_simd WITH MALLOC"}, test_read_8x2_simd}, // {{"READ 8x2_simd", "READ 8x2_simd WITH MALLOC"}, test_read_8x2_simd},
{{"READ 16x2_simd", "READ 16x2_simd WITH MALLOC"}, test_read_16x2_simd}, // {{"READ 16x2_simd", "READ 16x2_simd WITH MALLOC"},
{{"READ 32x2_simd_offset", "READ 32x2_simd_offset WITH MALLOC"}, // test_read_16x2_simd},
test_read_32x2_simd_offset}, // {{"READ 32x2_simd_offset", "READ 32x2_simd_offset WITH MALLOC"},
{{"READ 32x2_simd_no_offset", "READ 32x2_simd_no_offset WITH MALLOC"}, // test_read_32x2_simd_offset},
test_read_32x2_simd_no_offset}, // {{"READ 32x2_simd_no_offset", "READ 32x2_simd_no_offset WITH MALLOC"},
{{"READ 16x4_simd", "READ 16x4_simd WITH MALLOC"}, test_read_16x4_simd}, // test_read_32x2_simd_no_offset},
{{"READ 32x4_simd", "READ 32x4_simd WITH MALLOC"}, test_read_32x4_simd}, // {{"READ 16x4_simd", "READ 16x4_simd WITH MALLOC"},
// test_read_16x4_simd},
// {{"READ 32x4_simd", "READ 32x4_simd WITH MALLOC"},
// test_read_32x4_simd},
{{"CACHE TEST 16K", "CACHE TEST 16K WITH MALLOC"}, test_cache_test_16k},
{{"CACHE TEST 32K", "CACHE TEST 32K WITH MALLOC"}, test_cache_test_32k},
{{"CACHE TEST 64K", "CACHE TEST 64K WITH MALLOC"}, test_cache_test_64k},
{{"CACHE TEST 128K", "CACHE TEST 128K WITH MALLOC"},
test_cache_test_128k},
{{"CACHE TEST 512K", "CACHE TEST 512K WITH MALLOC"},
test_cache_test_512k},
{{"CACHE TEST 1M", "CACHE TEST 1M WITH MALLOC"}, test_cache_test_1m},
{{"CACHE TEST 2M", "CACHE TEST 2M WITH MALLOC"}, test_cache_test_2m},
{{"CACHE TEST 4M", "CACHE TEST 4M WITH MALLOC"}, test_cache_test_4m},
{{"CACHE TEST 8M", "CACHE TEST 8M WITH MALLOC"}, test_cache_test_8m},
{{"CACHE TEST 16M", "CACHE TEST 16M WITH MALLOC"}, test_cache_test_16m},
{{"CACHE TEST 32M", "CACHE TEST 32M WITH MALLOC"}, test_cache_test_32m},
{{"CACHE TEST 64M", "CACHE TEST 64M WITH MALLOC"}, test_cache_test_64m},
{{"CACHE TEST 512M", "CACHE TEST 512M WITH MALLOC"},
test_cache_test_512m},
{{"CACHE TEST FULL", "CACHE TEST FULL WITH MALLOC"},
test_cache_test_full},
}; };
tester.params.read_size = get_file_length(fp); tester.params.read_size = get_file_length(fp);
@ -586,7 +627,6 @@ void test_align63_loop(reptester *tester, alloc_type type) {
} }
void test_align75_loop(reptester *tester, alloc_type type) { void test_align75_loop(reptester *tester, alloc_type type) {
u64 start = read_cpu_timer(); u64 start = read_cpu_timer();
u64 fault_count_start = page_fault_count(); u64 fault_count_start = page_fault_count();
@ -612,7 +652,6 @@ void test_align75_loop(reptester *tester, alloc_type type) {
} }
void test_align90_loop(reptester *tester, alloc_type type) { void test_align90_loop(reptester *tester, alloc_type type) {
u64 start = read_cpu_timer(); u64 start = read_cpu_timer();
u64 fault_count_start = page_fault_count(); u64 fault_count_start = page_fault_count();
@ -1112,7 +1151,6 @@ void test_read_4x2_simd(reptester *tester, alloc_type type) {
} }
void test_read_8x2_simd(reptester *tester, alloc_type type) { void test_read_8x2_simd(reptester *tester, alloc_type type) {
u64 start = read_cpu_timer(); u64 start = read_cpu_timer();
u64 fault_count_start = page_fault_count(); u64 fault_count_start = page_fault_count();
@ -1138,7 +1176,6 @@ void test_read_8x2_simd(reptester *tester, alloc_type type) {
} }
void test_read_16x2_simd(reptester *tester, alloc_type type) { void test_read_16x2_simd(reptester *tester, alloc_type type) {
u64 start = read_cpu_timer(); u64 start = read_cpu_timer();
u64 fault_count_start = page_fault_count(); u64 fault_count_start = page_fault_count();
@ -1264,6 +1301,356 @@ void test_read_32x4_simd(reptester *tester, alloc_type type) {
handle_free(tester, type); handle_free(tester, type);
} }
void test_cache_test_16k(reptester *tester, alloc_type type) {
u64 start = read_cpu_timer();
u64 fault_count_start = page_fault_count();
handle_alloc(tester, type);
u64 total_size = tester->params.read_size * tester->params.read_count;
cache_test(tester->params.buffer, total_size, 0x3fff);
u64 fault_count_end = page_fault_count();
u64 end = read_cpu_timer();
u64 read_time = end - start;
u64 page_faults = fault_count_end - fault_count_start;
tester->results = {
total_size,
read_time,
page_faults,
};
handle_free(tester, type);
}
void test_cache_test_32k(reptester *tester, alloc_type type) {
u64 start = read_cpu_timer();
u64 fault_count_start = page_fault_count();
handle_alloc(tester, type);
u64 total_size = tester->params.read_size * tester->params.read_count;
cache_test(tester->params.buffer, total_size, 0x7fff);
u64 fault_count_end = page_fault_count();
u64 end = read_cpu_timer();
u64 read_time = end - start;
u64 page_faults = fault_count_end - fault_count_start;
tester->results = {
total_size,
read_time,
page_faults,
};
handle_free(tester, type);
}
void test_cache_test_64k(reptester *tester, alloc_type type) {
u64 start = read_cpu_timer();
u64 fault_count_start = page_fault_count();
handle_alloc(tester, type);
u64 total_size = tester->params.read_size * tester->params.read_count;
cache_test(tester->params.buffer, total_size, 0xffff);
u64 fault_count_end = page_fault_count();
u64 end = read_cpu_timer();
u64 read_time = end - start;
u64 page_faults = fault_count_end - fault_count_start;
tester->results = {
total_size,
read_time,
page_faults,
};
handle_free(tester, type);
}
void test_cache_test_128k(reptester *tester, alloc_type type) {
u64 start = read_cpu_timer();
u64 fault_count_start = page_fault_count();
handle_alloc(tester, type);
u64 total_size = tester->params.read_size * tester->params.read_count;
cache_test(tester->params.buffer, total_size, 0x1ffff);
u64 fault_count_end = page_fault_count();
u64 end = read_cpu_timer();
u64 read_time = end - start;
u64 page_faults = fault_count_end - fault_count_start;
tester->results = {
total_size,
read_time,
page_faults,
};
handle_free(tester, type);
}
void test_cache_test_512k(reptester *tester, alloc_type type) {
u64 start = read_cpu_timer();
u64 fault_count_start = page_fault_count();
handle_alloc(tester, type);
u64 total_size = tester->params.read_size * tester->params.read_count;
cache_test(tester->params.buffer, total_size, 0x7ffff);
u64 fault_count_end = page_fault_count();
u64 end = read_cpu_timer();
u64 read_time = end - start;
u64 page_faults = fault_count_end - fault_count_start;
tester->results = {
total_size,
read_time,
page_faults,
};
handle_free(tester, type);
}
void test_cache_test_1m(reptester *tester, alloc_type type) {
u64 start = read_cpu_timer();
u64 fault_count_start = page_fault_count();
handle_alloc(tester, type);
u64 total_size = tester->params.read_size * tester->params.read_count;
cache_test(tester->params.buffer, total_size, 0xfffff);
u64 fault_count_end = page_fault_count();
u64 end = read_cpu_timer();
u64 read_time = end - start;
u64 page_faults = fault_count_end - fault_count_start;
tester->results = {
total_size,
read_time,
page_faults,
};
handle_free(tester, type);
}
void test_cache_test_2m(reptester *tester, alloc_type type) {
u64 start = read_cpu_timer();
u64 fault_count_start = page_fault_count();
handle_alloc(tester, type);
u64 total_size = tester->params.read_size * tester->params.read_count;
cache_test(tester->params.buffer, total_size, 0x1fffff);
u64 fault_count_end = page_fault_count();
u64 end = read_cpu_timer();
u64 read_time = end - start;
u64 page_faults = fault_count_end - fault_count_start;
tester->results = {
total_size,
read_time,
page_faults,
};
handle_free(tester, type);
}
void test_cache_test_4m(reptester *tester, alloc_type type) {
u64 start = read_cpu_timer();
u64 fault_count_start = page_fault_count();
handle_alloc(tester, type);
u64 total_size = tester->params.read_size * tester->params.read_count;
cache_test(tester->params.buffer, total_size, 0x3fffff);
u64 fault_count_end = page_fault_count();
u64 end = read_cpu_timer();
u64 read_time = end - start;
u64 page_faults = fault_count_end - fault_count_start;
tester->results = {
total_size,
read_time,
page_faults,
};
handle_free(tester, type);
}
void test_cache_test_8m(reptester *tester, alloc_type type) {
u64 start = read_cpu_timer();
u64 fault_count_start = page_fault_count();
handle_alloc(tester, type);
u64 total_size = tester->params.read_size * tester->params.read_count;
cache_test(tester->params.buffer, total_size, 0x7fffff);
u64 fault_count_end = page_fault_count();
u64 end = read_cpu_timer();
u64 read_time = end - start;
u64 page_faults = fault_count_end - fault_count_start;
tester->results = {
total_size,
read_time,
page_faults,
};
handle_free(tester, type);
}
void test_cache_test_16m(reptester *tester, alloc_type type) {
u64 start = read_cpu_timer();
u64 fault_count_start = page_fault_count();
handle_alloc(tester, type);
u64 total_size = tester->params.read_size * tester->params.read_count;
cache_test(tester->params.buffer, total_size, 0xffffff);
u64 fault_count_end = page_fault_count();
u64 end = read_cpu_timer();
u64 read_time = end - start;
u64 page_faults = fault_count_end - fault_count_start;
tester->results = {
total_size,
read_time,
page_faults,
};
handle_free(tester, type);
}
void test_cache_test_32m(reptester *tester, alloc_type type) {
u64 start = read_cpu_timer();
u64 fault_count_start = page_fault_count();
handle_alloc(tester, type);
u64 total_size = tester->params.read_size * tester->params.read_count;
cache_test(tester->params.buffer, total_size, 0x1ffffff);
u64 fault_count_end = page_fault_count();
u64 end = read_cpu_timer();
u64 read_time = end - start;
u64 page_faults = fault_count_end - fault_count_start;
tester->results = {
total_size,
read_time,
page_faults,
};
handle_free(tester, type);
}
void test_cache_test_64m(reptester *tester, alloc_type type) {
u64 start = read_cpu_timer();
u64 fault_count_start = page_fault_count();
handle_alloc(tester, type);
u64 total_size = tester->params.read_size * tester->params.read_count;
cache_test(tester->params.buffer, total_size, 0x3ffffff);
u64 fault_count_end = page_fault_count();
u64 end = read_cpu_timer();
u64 read_time = end - start;
u64 page_faults = fault_count_end - fault_count_start;
tester->results = {
total_size,
read_time,
page_faults,
};
handle_free(tester, type);
}
void test_cache_test_512m(reptester *tester, alloc_type type) {
u64 start = read_cpu_timer();
u64 fault_count_start = page_fault_count();
handle_alloc(tester, type);
u64 total_size = tester->params.read_size * tester->params.read_count;
cache_test(tester->params.buffer, total_size, 0x1fffffff);
u64 fault_count_end = page_fault_count();
u64 end = read_cpu_timer();
u64 read_time = end - start;
u64 page_faults = fault_count_end - fault_count_start;
tester->results = {
total_size,
read_time,
page_faults,
};
handle_free(tester, type);
}
void test_cache_test_full(reptester *tester, alloc_type type) {
u64 start = read_cpu_timer();
u64 fault_count_start = page_fault_count();
handle_alloc(tester, type);
u64 total_size = tester->params.read_size * tester->params.read_count;
cache_test(tester->params.buffer, total_size, 0xffffffffffffffff);
u64 fault_count_end = page_fault_count();
u64 end = read_cpu_timer();
u64 read_time = end - start;
u64 page_faults = fault_count_end - fault_count_start;
tester->results = {
total_size,
read_time,
page_faults,
};
handle_free(tester, type);
}
u64 get_file_length(FILE *fp) { u64 get_file_length(FILE *fp) {
if (!fp) { if (!fp) {
return 0; return 0;

View File

@ -36,6 +36,7 @@ global read_32x2_simd_offset
global read_32x2_simd_no_offset global read_32x2_simd_no_offset
global read_16x4_simd global read_16x4_simd
global read_32x4_simd global read_32x4_simd
global cache_test ; Expects 3 inputs (pointer, read_count, mask)
mov_all_bytes_asm: mov_all_bytes_asm:
xor rax, rax xor rax, rax
@ -492,3 +493,19 @@ read_32x4_simd:
jb .loop jb .loop
ret ret
cache_test:
xor r10, r10 ; Zero loop counter
mov rbx, rdi ; Save original pointer
.loop:
add rdi, r10 ; Advance the pointer
add r10, 128 ; Increment loop counter
and r10, rdx ; Mask offset
vmovdqu ymm0, [rdi + 0]
vmovdqu ymm1, [rdi + 32]
vmovdqu ymm2, [rdi + 64]
vmovdqu ymm3, [rdi + 96]
mov rdi, rbx ; Restore original pointer
sub rsi, 128 ; Decrement count
ja .loop
ret