Compare commits

..

5 Commits

Author SHA1 Message Date
94167e05fd Remove unused variables 2024-06-23 21:52:58 +01:00
7177add4ce Switch to using mmap and test unaligned loads 2024-06-23 21:10:14 +01:00
37d3340df9 Rename C++ files 2024-06-22 13:54:45 +01:00
fcdaf41495 Write to test buffer before working with it 2024-05-19 00:32:39 +01:00
cec0662e68 Add 256k cache test function 2024-05-19 00:10:18 +01:00
25 changed files with 158 additions and 45 deletions

View File

@@ -1,2 +1,2 @@
all: all:
clang++ -g dasm.cpp -o dasm clang++ -g dasm.cc -o dasm

View File

@@ -1,2 +1,2 @@
all: all:
clang++ -g dasm.cpp -o dasm clang++ -g dasm.cc -o dasm

View File

@@ -1,2 +1,2 @@
all: all:
clang++ -g dasm.cpp -o dasm clang++ -g dasm.cc -o dasm

View File

@@ -1,7 +1,7 @@
CC=clang++ CC=clang++
CFLAGS=-g -O0 -Wall -Wextra CFLAGS=-g -O0 -Wall -Wextra
LIBS=-Wl,-rpath,./lib -L./lib -lsim86 LIBS=-Wl,-rpath,./lib -L./lib -lsim86
SRC=*.cpp SRC=*.cc
OUT=sim86 OUT=sim86
all: all:

View File

@@ -1,17 +1,17 @@
mk_haversine_fscanf: mk_haversine_fscanf:
clang++ -g cpp/fscanf.cpp cpp/haversine.cpp -o cpp/haverscan clang++ -g cpp/fscanf.cc cpp/haversine.cc -o cpp/haverscan
run_haversine_fscanf: run_haversine_fscanf:
cd ./cpp && ./haverscan cd ./cpp && ./haverscan
mk_haversine_strtok: mk_haversine_strtok:
clang++ -g cpp/strtok.cpp cpp/haversine.cpp -o cpp/haverstrtok clang++ -g cpp/strtok.cc cpp/haversine.cc -o cpp/haverstrtok
run_haversine_strtok: run_haversine_strtok:
cd ./cpp && ./haverstrtok cd ./cpp && ./haverstrtok
mk_test: mk_test:
clang++ -g -lpthread cpp/test.cpp cpp/haversine.cpp -o cpp/test clang++ -g -lpthread cpp/test.cc cpp/haversine.cc -o cpp/test
run_test: run_test:
cd ./cpp && ./test cd ./cpp && ./test

View File

@@ -43,11 +43,11 @@ fi
# GENERATOR # GENERATOR
GENSRC="./src/generator/gen_argparser.cpp \ GENSRC="./src/generator/gen_argparser.cc \
./src/generator/generator.cpp \ ./src/generator/generator.cc \
./src/haversine.cpp \ ./src/haversine.cc \
./src/point_types.cpp \ ./src/point_types.cc \
./src/generator/main.cpp" ./src/generator/main.cc"
GENOUT=genhavr GENOUT=genhavr
(set -x ; $CXX $CFLAGS $GENSRC -o $GENOUT) (set -x ; $CXX $CFLAGS $GENSRC -o $GENOUT)
@@ -64,10 +64,10 @@ JSONFLAGS="-c "
JSON_BUILD_DIR=json_build JSON_BUILD_DIR=json_build
PROCSRC="./$JSON_BUILD_DIR/*.o \ PROCSRC="./$JSON_BUILD_DIR/*.o \
./src/haversine.cpp \ ./src/haversine.cc \
./src/point_types.cpp \ ./src/point_types.cc \
./src/processor/proc_argparser.cpp \ ./src/processor/proc_argparser.cc \
./src/processor/main.cpp " ./src/processor/main.cc "
PROCOUT=prochavr PROCOUT=prochavr
# MEMTESTER # MEMTESTER
@@ -113,7 +113,7 @@ if [[ $BASIC_PROFILING == true ]] || [[ $FULL_PROFILING == true ]]; then
cd ../ cd ../
# REPETITION TESTING # REPETITION TESTING
REPTESTSRC="./src/repetition_testing/*.cpp ./$PROF_BUILD_DIR/*.o $ASM_LIB" REPTESTSRC="./src/repetition_testing/*.cc ./$PROF_BUILD_DIR/*.o $ASM_LIB"
REPTESTOUT=reptest REPTESTOUT=reptest
(set -x ; $CXX $CFLAGS $REPTESTFLAGS $REPTESTSRC -o $REPTESTOUT) (set -x ; $CXX $CFLAGS $REPTESTFLAGS $REPTESTSRC -o $REPTESTOUT)

View File

@@ -5,13 +5,11 @@
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <sys/mman.h>
#include <unistd.h> #include <unistd.h>
#define ARR_LEN(ARR) sizeof(ARR) / sizeof(*ARR) #define ARR_LEN(ARR) sizeof(ARR) / sizeof(*ARR)
u64 *g_cache_output = NULL;
u64 g_size = 1024 * 1024 * 1024 / 128 * sizeof(u64);
extern "C" void mov_all_bytes_asm(char *buffer, u64 size); extern "C" void mov_all_bytes_asm(char *buffer, u64 size);
extern "C" void nop_all_bytes_asm(u64 size); extern "C" void nop_all_bytes_asm(u64 size);
extern "C" void nop_1x3_all_bytes_asm(u64 size); extern "C" void nop_1x3_all_bytes_asm(u64 size);
@@ -51,6 +49,7 @@ extern "C" void read_32x2_simd_no_offset(char *buffer, u64 size);
extern "C" void read_16x4_simd(char *buffer, u64 size); extern "C" void read_16x4_simd(char *buffer, u64 size);
extern "C" void read_32x4_simd(char *buffer, u64 size); extern "C" void read_32x4_simd(char *buffer, u64 size);
extern "C" void cache_test(char *buffer, u64 size, u64 mask); extern "C" void cache_test(char *buffer, u64 size, u64 mask);
extern "C" void cache_test_unaligned(char *buffer, u64 size, u64 mask);
void test_fread(reptester *tester, alloc_type type); void test_fread(reptester *tester, alloc_type type);
void test_read(reptester *tester, alloc_type type); void test_read(reptester *tester, alloc_type type);
@@ -94,9 +93,12 @@ void test_read_32x2_simd_no_offset(reptester *tester, alloc_type type);
void test_read_16x4_simd(reptester *tester, alloc_type type); void test_read_16x4_simd(reptester *tester, alloc_type type);
void test_read_32x4_simd(reptester *tester, alloc_type type); void test_read_32x4_simd(reptester *tester, alloc_type type);
void test_cache_test_16k(reptester *tester, alloc_type type); void test_cache_test_16k(reptester *tester, alloc_type type);
void test_cache_test_16k_unaligned(reptester *tester, alloc_type type);
void test_cache_test_32k(reptester *tester, alloc_type type); void test_cache_test_32k(reptester *tester, alloc_type type);
void test_cache_test_32k_unaligned(reptester *tester, alloc_type type);
void test_cache_test_64k(reptester *tester, alloc_type type); void test_cache_test_64k(reptester *tester, alloc_type type);
void test_cache_test_128k(reptester *tester, alloc_type type); void test_cache_test_128k(reptester *tester, alloc_type type);
void test_cache_test_256k(reptester *tester, alloc_type type);
void test_cache_test_512k(reptester *tester, alloc_type type); void test_cache_test_512k(reptester *tester, alloc_type type);
void test_cache_test_1m(reptester *tester, alloc_type type); void test_cache_test_1m(reptester *tester, alloc_type type);
void test_cache_test_2m(reptester *tester, alloc_type type); void test_cache_test_2m(reptester *tester, alloc_type type);
@@ -126,8 +128,6 @@ int main(int argc, char *argv[]) {
break; break;
} }
g_cache_output = (u64 *)calloc(1, g_size);
// clang-format off // clang-format off
reptester tester = { reptester tester = {
{filename, NULL, 0, 0}, // params {filename, NULL, 0, 0}, // params
@@ -215,30 +215,45 @@ int main(int argc, char *argv[]) {
// {{"READ 32x4_simd", "READ 32x4_simd WITH MALLOC"}, // {{"READ 32x4_simd", "READ 32x4_simd WITH MALLOC"},
// test_read_32x4_simd}, // test_read_32x4_simd},
{{"CACHE TEST 16K", "CACHE TEST 16K WITH MALLOC"}, test_cache_test_16k}, {{"CACHE TEST 16K", "CACHE TEST 16K WITH MALLOC"}, test_cache_test_16k},
{{"CACHE TEST 32K", "CACHE TEST 32K WITH MALLOC"}, test_cache_test_32k}, {{"CACHE TEST 16K UNALIGNED", "CACHE TEST 16K UNALIGNED WITH MALLOC"},
{{"CACHE TEST 64K", "CACHE TEST 64K WITH MALLOC"}, test_cache_test_64k}, test_cache_test_16k_unaligned},
{{"CACHE TEST 128K", "CACHE TEST 128K WITH MALLOC"}, // {{"CACHE TEST 32K", "CACHE TEST 32K WITH MALLOC"},
test_cache_test_128k}, // test_cache_test_32k},
{{"CACHE TEST 512K", "CACHE TEST 512K WITH MALLOC"}, // {{"CACHE TEST 64K", "CACHE TEST 64K WITH MALLOC"},
test_cache_test_512k}, // test_cache_test_64k},
{{"CACHE TEST 1M", "CACHE TEST 1M WITH MALLOC"}, test_cache_test_1m}, // {{"CACHE TEST 128K", "CACHE TEST 128K WITH MALLOC"},
{{"CACHE TEST 2M", "CACHE TEST 2M WITH MALLOC"}, test_cache_test_2m}, // test_cache_test_128k},
{{"CACHE TEST 4M", "CACHE TEST 4M WITH MALLOC"}, test_cache_test_4m}, // {{"CACHE TEST 256K", "CACHE TEST 256K WITH MALLOC"},
{{"CACHE TEST 8M", "CACHE TEST 8M WITH MALLOC"}, test_cache_test_8m}, // test_cache_test_256k},
{{"CACHE TEST 16M", "CACHE TEST 16M WITH MALLOC"}, test_cache_test_16m}, // {{"CACHE TEST 512K", "CACHE TEST 512K WITH MALLOC"},
{{"CACHE TEST 32M", "CACHE TEST 32M WITH MALLOC"}, test_cache_test_32m}, // test_cache_test_512k},
{{"CACHE TEST 64M", "CACHE TEST 64M WITH MALLOC"}, test_cache_test_64m}, // {{"CACHE TEST 1M", "CACHE TEST 1M WITH MALLOC"}, test_cache_test_1m},
{{"CACHE TEST 512M", "CACHE TEST 512M WITH MALLOC"}, // {{"CACHE TEST 2M", "CACHE TEST 2M WITH MALLOC"}, test_cache_test_2m},
test_cache_test_512m}, // {{"CACHE TEST 4M", "CACHE TEST 4M WITH MALLOC"}, test_cache_test_4m},
{{"CACHE TEST FULL", "CACHE TEST FULL WITH MALLOC"}, // {{"CACHE TEST 8M", "CACHE TEST 8M WITH MALLOC"}, test_cache_test_8m},
test_cache_test_full}, // {{"CACHE TEST 16M", "CACHE TEST 16M WITH MALLOC"},
// test_cache_test_16m},
// {{"CACHE TEST 32M", "CACHE TEST 32M WITH MALLOC"},
// test_cache_test_32m},
// {{"CACHE TEST 64M", "CACHE TEST 64M WITH MALLOC"},
// test_cache_test_64m},
// {{"CACHE TEST 512M", "CACHE TEST 512M WITH MALLOC"},
// test_cache_test_512m},
// {{"CACHE TEST FULL", "CACHE TEST FULL WITH MALLOC"},
// test_cache_test_full},
}; };
tester.params.read_size = get_file_length(fp); tester.params.read_size = get_file_length(fp);
tester.params.read_count = 1; tester.params.read_count = 1;
tester.params.buffer = (char *)malloc(tester.params.read_size + 1); tester.params.buffer =
(char *)mmap(NULL, tester.params.read_size + 1, PROT_READ | PROT_WRITE,
MAP_ANON | MAP_SHARED | MAP_NORESERVE, -1, 0);
memset(tester.params.buffer, 0, tester.params.read_size + 1); memset(tester.params.buffer, 0, tester.params.read_size + 1);
for (u64 i = 0; i < tester.params.read_size; ++i) {
tester.params.buffer[i] = (char)i;
}
for (u64 i = 0; i < waves; ++i) { for (u64 i = 0; i < waves; ++i) {
for (u64 j = 0; j < ARR_LEN(funcs); ++j) { for (u64 j = 0; j < ARR_LEN(funcs); ++j) {
for (u64 k = 0; k < ALLOC_TYPE_WITH_MALLOC; ++k) { for (u64 k = 0; k < ALLOC_TYPE_WITH_MALLOC; ++k) {
@@ -249,7 +264,7 @@ int main(int argc, char *argv[]) {
fclose(fp); fclose(fp);
free(tester.params.buffer); munmap(tester.params.buffer, tester.params.read_size + 1);
return 0; return 0;
} }
@@ -1326,6 +1341,31 @@ void test_cache_test_16k(reptester *tester, alloc_type type) {
handle_free(tester, type); handle_free(tester, type);
} }
void test_cache_test_16k_unaligned(reptester *tester, alloc_type type) {
u64 start = read_cpu_timer();
u64 fault_count_start = page_fault_count();
handle_alloc(tester, type);
u64 total_size = tester->params.read_size * tester->params.read_count;
cache_test_unaligned(tester->params.buffer, total_size, 0x3fff);
u64 fault_count_end = page_fault_count();
u64 end = read_cpu_timer();
u64 read_time = end - start;
u64 page_faults = fault_count_end - fault_count_start;
tester->results = {
total_size,
read_time,
page_faults,
};
handle_free(tester, type);
}
void test_cache_test_32k(reptester *tester, alloc_type type) { void test_cache_test_32k(reptester *tester, alloc_type type) {
u64 start = read_cpu_timer(); u64 start = read_cpu_timer();
u64 fault_count_start = page_fault_count(); u64 fault_count_start = page_fault_count();
@@ -1351,6 +1391,31 @@ void test_cache_test_32k(reptester *tester, alloc_type type) {
handle_free(tester, type); handle_free(tester, type);
} }
void test_cache_test_32k_unaligned(reptester *tester, alloc_type type) {
u64 start = read_cpu_timer();
u64 fault_count_start = page_fault_count();
handle_alloc(tester, type);
u64 total_size = tester->params.read_size * tester->params.read_count;
cache_test_unaligned(tester->params.buffer, total_size, 0x7fff);
u64 fault_count_end = page_fault_count();
u64 end = read_cpu_timer();
u64 read_time = end - start;
u64 page_faults = fault_count_end - fault_count_start;
tester->results = {
total_size,
read_time,
page_faults,
};
handle_free(tester, type);
}
void test_cache_test_64k(reptester *tester, alloc_type type) { void test_cache_test_64k(reptester *tester, alloc_type type) {
u64 start = read_cpu_timer(); u64 start = read_cpu_timer();
u64 fault_count_start = page_fault_count(); u64 fault_count_start = page_fault_count();
@@ -1401,6 +1466,31 @@ void test_cache_test_128k(reptester *tester, alloc_type type) {
handle_free(tester, type); handle_free(tester, type);
} }
void test_cache_test_256k(reptester *tester, alloc_type type) {
u64 start = read_cpu_timer();
u64 fault_count_start = page_fault_count();
handle_alloc(tester, type);
u64 total_size = tester->params.read_size * tester->params.read_count;
cache_test(tester->params.buffer, total_size, 0x3ffff);
u64 fault_count_end = page_fault_count();
u64 end = read_cpu_timer();
u64 read_time = end - start;
u64 page_faults = fault_count_end - fault_count_start;
tester->results = {
total_size,
read_time,
page_faults,
};
handle_free(tester, type);
}
void test_cache_test_512k(reptester *tester, alloc_type type) { void test_cache_test_512k(reptester *tester, alloc_type type) {
u64 start = read_cpu_timer(); u64 start = read_cpu_timer();
u64 fault_count_start = page_fault_count(); u64 fault_count_start = page_fault_count();

View File

@@ -37,6 +37,7 @@ global read_32x2_simd_no_offset
global read_16x4_simd global read_16x4_simd
global read_32x4_simd global read_32x4_simd
global cache_test ; Expects 3 inputs (pointer, read_count, mask) global cache_test ; Expects 3 inputs (pointer, read_count, mask)
global cache_test_unaligned ; Expects 3 inputs (pointer, read_count, mask)
mov_all_bytes_asm: mov_all_bytes_asm:
xor rax, rax xor rax, rax
@@ -509,3 +510,20 @@ cache_test:
sub rsi, 128 ; Decrement count sub rsi, 128 ; Decrement count
ja .loop ja .loop
ret ret
cache_test_unaligned:
xor r10, r10 ; Zero loop counter
add rdi, 5 ; Unalign pointer
mov rbx, rdi ; Save original pointer
.loop:
add rdi, r10 ; Advance the pointer
add r10, 128 ; Increment loop counter
and r10, rdx ; Mask offset
vmovdqu ymm0, [rdi + 0]
vmovdqu ymm1, [rdi + 32]
vmovdqu ymm2, [rdi + 64]
vmovdqu ymm3, [rdi + 96]
mov rdi, rbx ; Restore original pointer
sub rsi, 128 ; Decrement count
ja .loop
ret

View File

@@ -3,6 +3,7 @@
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <sys/mman.h>
#include <sys/resource.h> #include <sys/resource.h>
#include <sys/time.h> #include <sys/time.h>
@@ -10,7 +11,9 @@ void handle_alloc(reptester *tester, alloc_type type) {
switch (type) { switch (type) {
case ALLOC_TYPE_WITH_MALLOC: case ALLOC_TYPE_WITH_MALLOC:
if (!(tester->params.buffer)) { if (!(tester->params.buffer)) {
tester->params.buffer = (char *)malloc(tester->params.read_size + 1); tester->params.buffer = (char *)mmap(
NULL, tester->params.read_size + 1, PROT_READ | PROT_WRITE,
MAP_ANON | MAP_SHARED | MAP_NORESERVE, -1, 0);
memset(tester->params.buffer, 0, tester->params.read_size + 1); memset(tester->params.buffer, 0, tester->params.read_size + 1);
} }
@@ -24,7 +27,7 @@ void handle_free(reptester *tester, alloc_type type) {
switch (type) { switch (type) {
case ALLOC_TYPE_WITH_MALLOC: case ALLOC_TYPE_WITH_MALLOC:
if (tester->params.buffer) { if (tester->params.buffer) {
free(tester->params.buffer); munmap(tester->params.buffer, tester->params.read_size + 1);
tester->params.buffer = NULL; tester->params.buffer = NULL;
} }
@@ -58,7 +61,9 @@ void run_func_test(reptester *tester, reptest_func func, const char *func_name,
if (type == ALLOC_TYPE_WITH_MALLOC) { if (type == ALLOC_TYPE_WITH_MALLOC) {
buffer = tester->params.buffer; buffer = tester->params.buffer;
tester->params.buffer = (char *)malloc(tester->params.read_size + 1); tester->params.buffer =
(char *)mmap(NULL, tester->params.read_size + 1, PROT_READ | PROT_WRITE,
MAP_ANON | MAP_SHARED | MAP_NORESERVE, -1, 0);
memset(tester->params.buffer, 0, tester->params.read_size + 1); memset(tester->params.buffer, 0, tester->params.read_size + 1);
} }
@@ -100,7 +105,7 @@ void run_func_test(reptester *tester, reptest_func func, const char *func_name,
} }
if (type == ALLOC_TYPE_WITH_MALLOC) { if (type == ALLOC_TYPE_WITH_MALLOC) {
free(tester->params.buffer); munmap(tester->params.buffer, tester->params.read_size + 1);
tester->params.buffer = buffer; tester->params.buffer = buffer;
} }