Compare commits

..

13 Commits

27 changed files with 2293 additions and 363 deletions

View File

@@ -1,2 +1,2 @@
all:
clang++ -g dasm.cpp -o dasm
clang++ -g dasm.cc -o dasm

View File

@@ -1,2 +1,2 @@
all:
clang++ -g dasm.cpp -o dasm
clang++ -g dasm.cc -o dasm

View File

@@ -1,2 +1,2 @@
all:
clang++ -g dasm.cpp -o dasm
clang++ -g dasm.cc -o dasm

View File

@@ -1,7 +1,7 @@
CC=clang++
CFLAGS=-g -O0 -Wall -Wextra
LIBS=-Wl,-rpath,./lib -L./lib -lsim86
SRC=*.cpp
SRC=*.cc
OUT=sim86
all:

View File

@@ -1,17 +1,17 @@
mk_haversine_fscanf:
clang++ -g cpp/fscanf.cpp cpp/haversine.cpp -o cpp/haverscan
clang++ -g cpp/fscanf.cc cpp/haversine.cc -o cpp/haverscan
run_haversine_fscanf:
cd ./cpp && ./haverscan
mk_haversine_strtok:
clang++ -g cpp/strtok.cpp cpp/haversine.cpp -o cpp/haverstrtok
clang++ -g cpp/strtok.cc cpp/haversine.cc -o cpp/haverstrtok
run_haversine_strtok:
cd ./cpp && ./haverstrtok
mk_test:
clang++ -g -lpthread cpp/test.cpp cpp/haversine.cpp -o cpp/test
clang++ -g -lpthread cpp/test.cc cpp/haversine.cc -o cpp/test
run_test:
cd ./cpp && ./test

View File

@@ -4,6 +4,7 @@
compile_commands.json
count_and_distances
pairs.json
cache_test
main
genhavr
prochavr

View File

@@ -43,11 +43,11 @@ fi
# GENERATOR
GENSRC="./src/generator/gen_argparser.cpp \
./src/generator/generator.cpp \
./src/haversine.cpp \
./src/point_types.cpp \
./src/generator/main.cpp"
GENSRC="./src/generator/gen_argparser.cc \
./src/generator/generator.cc \
./src/haversine.cc \
./src/point_types.cc \
./src/generator/main.cc"
GENOUT=genhavr
(set -x ; $CXX $CFLAGS $GENSRC -o $GENOUT)
@@ -64,10 +64,10 @@ JSONFLAGS="-c "
JSON_BUILD_DIR=json_build
PROCSRC="./$JSON_BUILD_DIR/*.o \
./src/haversine.cpp \
./src/point_types.cpp \
./src/processor/proc_argparser.cpp \
./src/processor/main.cpp "
./src/haversine.cc \
./src/point_types.cc \
./src/processor/proc_argparser.cc \
./src/processor/main.cc "
PROCOUT=prochavr
# MEMTESTER
@@ -113,7 +113,7 @@ if [[ $BASIC_PROFILING == true ]] || [[ $FULL_PROFILING == true ]]; then
cd ../
# REPETITION TESTING
REPTESTSRC="./src/repetition_testing/*.cpp ./$PROF_BUILD_DIR/*.o $ASM_LIB"
REPTESTSRC="./src/repetition_testing/*.cc ./$PROF_BUILD_DIR/*.o $ASM_LIB"
REPTESTOUT=reptest
(set -x ; $CXX $CFLAGS $REPTESTFLAGS $REPTESTSRC -o $REPTESTOUT)

File diff suppressed because it is too large Load Diff

View File

@@ -1,318 +0,0 @@
#include "aliases.h"
#include "profiler/timer.h"
#include "repetition_testing/reptester.h"
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#define ARR_LEN(ARR) sizeof(ARR) / sizeof(*ARR)
extern "C" void mov_all_bytes_asm(char *buffer, u64 size);
extern "C" void nop_all_bytes_asm(u64 size);
extern "C" void inc_all_bytes_asm(u64 size);
extern "C" void dec_all_bytes_asm(u64 size);
void test_fread(reptester *tester, alloc_type type);
void test_read(reptester *tester, alloc_type type);
void test_write(reptester *tester, alloc_type type);
void test_write_mov_all_bytes_asm(reptester *tester, alloc_type type);
void test_write_nop_all_bytes_asm(reptester *tester, alloc_type type);
void test_write_inc_all_bytes_asm(reptester *tester, alloc_type type);
void test_write_dec_all_bytes_asm(reptester *tester, alloc_type type);
u64 get_file_length(FILE *fp);
int main(int argc, char *argv[]) {
const char *filename = NULL;
u64 waves = 1;
switch (argc) {
case 3:
waves = atol(argv[2]);
// break left out intentionally
case 2:
filename = argv[1];
break;
default:
printf("Usage: reptest FILENAME [WAVE_COUNT]\n");
return -1;
break;
}
// clang-format off
reptester tester = {
{filename, NULL, 0, 0}, // params
get_cpu_freq(500), // cpu_freq
10.0, // wait_time_secs
0.0, // test_time_secs
read_cpu_timer(), // test_start_time
1, // current_run
{
UINT64_MAX, // min_time
0, // max_time
0, // avg_time
0, // total_time
},
{
0, // min_faults
0, // max_faults
0, // avg_faults
0, // total_bytes
0, // total_faults
},
{}, // results
};
// clang-format on
FILE *fp = fopen(tester.params.filename, "rb");
if (!fp) {
return -1;
}
func_data funcs[] = {
{{"WRITE", "WRITE WITH MALLOC"}, test_write},
{{"WRITE MOV ASM", "WRITE MOV ASM WITH MALLOC"},
test_write_mov_all_bytes_asm},
{{"WRITE NOP ASM", "WRITE NOP ASM WITH MALLOC"},
test_write_nop_all_bytes_asm},
{{"WRITE INC ASM", "WRITE INC ASM WITH MALLOC"},
test_write_inc_all_bytes_asm},
{{"WRITE DEC ASM", "WRITE DEC ASM WITH MALLOC"},
test_write_dec_all_bytes_asm},
// {{"READ", "READ WITH MALLOC"}, test_read},
// {{"FREAD", "FREAD WITH MALLOC"}, test_fread},
};
tester.params.read_size = get_file_length(fp);
tester.params.read_count = 1;
tester.params.buffer = (char *)malloc(tester.params.read_size + 1);
memset(tester.params.buffer, 0, tester.params.read_size + 1);
for (u64 i = 0; i < waves; ++i) {
for (u64 j = 0; j < ARR_LEN(funcs); ++j) {
for (u64 k = 0; k < COUNT_ALLOC_TYPE; ++k) {
run_func_test(&tester, funcs[j].func, funcs[j].names[k], (alloc_type)k);
}
}
}
fclose(fp);
free(tester.params.buffer);
return 0;
}
void test_fread(reptester *tester, alloc_type type) {
FILE *fp = fopen(tester->params.filename, "rb");
if (!fp) {
return;
}
u64 start = read_cpu_timer();
u64 fault_count_start = page_fault_count();
handle_alloc(tester, type);
u64 obj_count = fread(tester->params.buffer, tester->params.read_size,
tester->params.read_count, fp);
u64 fault_count_end = page_fault_count();
u64 end = read_cpu_timer();
u64 bytes_read = obj_count * tester->params.read_size;
u64 read_time = end - start;
u64 page_faults = fault_count_end - fault_count_start;
tester->results = {
bytes_read,
read_time,
page_faults,
};
handle_free(tester, type);
fclose(fp);
}
void test_read(reptester *tester, alloc_type type) {
FILE *fp = fopen(tester->params.filename, "rb");
if (!fp) {
return;
}
u64 start = read_cpu_timer();
u64 fault_count_start = page_fault_count();
handle_alloc(tester, type);
i32 fd = fileno(fp);
u64 bytes_read = read(fd, tester->params.buffer,
tester->params.read_size * tester->params.read_count);
u64 fault_count_end = page_fault_count();
u64 end = read_cpu_timer();
u64 read_time = end - start;
u64 page_faults = fault_count_end - fault_count_start;
tester->results = {
bytes_read,
read_time,
page_faults,
};
handle_free(tester, type);
fclose(fp);
}
void write_to_all_bytes(char *buffer, u64 size) {
for (u64 i = 0; i < size; ++i) {
buffer[i] = i;
}
}
void test_write(reptester *tester, alloc_type type) {
u64 start = read_cpu_timer();
u64 fault_count_start = page_fault_count();
handle_alloc(tester, type);
u64 total_size = tester->params.read_size * tester->params.read_count;
write_to_all_bytes(tester->params.buffer, total_size);
u64 fault_count_end = page_fault_count();
u64 end = read_cpu_timer();
u64 read_time = end - start;
u64 page_faults = fault_count_end - fault_count_start;
tester->results = {
total_size,
read_time,
page_faults,
};
handle_free(tester, type);
}
void test_write_mov_all_bytes_asm(reptester *tester, alloc_type type) {
u64 start = read_cpu_timer();
u64 fault_count_start = page_fault_count();
handle_alloc(tester, type);
u64 total_size = tester->params.read_size * tester->params.read_count;
mov_all_bytes_asm(tester->params.buffer, total_size);
u64 fault_count_end = page_fault_count();
u64 end = read_cpu_timer();
u64 read_time = end - start;
u64 page_faults = fault_count_end - fault_count_start;
tester->results = {
total_size,
read_time,
page_faults,
};
handle_free(tester, type);
}
void test_write_nop_all_bytes_asm(reptester *tester, alloc_type type) {
u64 start = read_cpu_timer();
u64 fault_count_start = page_fault_count();
handle_alloc(tester, type);
u64 total_size = tester->params.read_size * tester->params.read_count;
nop_all_bytes_asm(total_size);
u64 fault_count_end = page_fault_count();
u64 end = read_cpu_timer();
u64 read_time = end - start;
u64 page_faults = fault_count_end - fault_count_start;
tester->results = {
total_size,
read_time,
page_faults,
};
handle_free(tester, type);
}
void test_write_inc_all_bytes_asm(reptester *tester, alloc_type type) {
u64 start = read_cpu_timer();
u64 fault_count_start = page_fault_count();
handle_alloc(tester, type);
u64 total_size = tester->params.read_size * tester->params.read_count;
inc_all_bytes_asm(total_size);
u64 fault_count_end = page_fault_count();
u64 end = read_cpu_timer();
u64 read_time = end - start;
u64 page_faults = fault_count_end - fault_count_start;
tester->results = {
total_size,
read_time,
page_faults,
};
handle_free(tester, type);
}
void test_write_dec_all_bytes_asm(reptester *tester, alloc_type type) {
u64 start = read_cpu_timer();
u64 fault_count_start = page_fault_count();
handle_alloc(tester, type);
u64 total_size = tester->params.read_size * tester->params.read_count;
dec_all_bytes_asm(total_size);
u64 fault_count_end = page_fault_count();
u64 end = read_cpu_timer();
u64 read_time = end - start;
u64 page_faults = fault_count_end - fault_count_start;
tester->results = {
total_size,
read_time,
page_faults,
};
handle_free(tester, type);
}
u64 get_file_length(FILE *fp) {
if (!fp) {
return 0;
}
fseek(fp, 0, SEEK_END);
u64 length = ftell(fp);
fseek(fp, 0, SEEK_SET);
return length;
}

View File

@@ -1,43 +1,529 @@
global mov_all_bytes_asm
global nop_all_bytes_asm
global nop_1x3_all_bytes_asm
global nop_1x9_all_bytes_asm
global inc_all_bytes_asm
global dec_all_bytes_asm
global align64_loop
global align1_loop
global align15_loop
global align31_loop
global align63_loop
global align75_loop
global align90_loop
global align112_loop
global rat_add
global rat_mov_add
global read_1
global read_2
global read_3
global read_4
global read_8
global write_1
global write_2
global write_3
global write_4
global write_8
global read_1x2_low
global read_1x2_high
global read_2x2
global read_4x2
global read_8x2
global read_4x2_simd
global read_8x2_simd
global read_16x2_simd
global read_32x2_simd_offset
global read_32x2_simd_no_offset
global read_16x4_simd
global read_32x4_simd
global cache_test ; Expects 3 inputs (pointer, read_count, mask)
global cache_test_unaligned ; Expects 3 inputs (pointer, read_count, mask)
mov_all_bytes_asm:
xor rax, rax
xor rax, rax
.loop:
mov BYTE [rdi + rax * 1], al
inc rax
cmp rsi, rax
jne .loop
.loop:
mov BYTE [rdi + rax * 1], al
inc rax
cmp rsi, rax
jne .loop
ret
ret
nop_all_bytes_asm:
xor rax, rax
xor rax, rax
.loop:
db 0x0f, 0x1f, 0x00
inc rax
cmp rdi, rax
jne .loop
.loop:
db 0x0f, 0x1f, 0x00
inc rax
cmp rdi, rax
jne .loop
ret
ret
nop_1x3_all_bytes_asm:
xor rax, rax
.loop:
nop
nop
nop
inc rax
cmp rdi, rax
jne .loop
ret
nop_1x9_all_bytes_asm:
xor rax, rax
.loop:
nop
nop
nop
nop
nop
nop
nop
nop
nop
inc rax
cmp rdi, rax
jne .loop
ret
inc_all_bytes_asm:
xor rax, rax
xor rax, rax
.loop:
inc rax
cmp rdi, rax
jne .loop
.loop:
inc rax
cmp rdi, rax
jne .loop
ret
ret
dec_all_bytes_asm:
.loop:
dec rdi
jnz .loop
.loop:
dec rdi
jnz .loop
ret
ret
align64_loop:
xor rax, rax
align 64
.loop:
inc rax
cmp rdi, rax
jne .loop
ret
align1_loop:
xor rax, rax
align 64
nop
.loop:
inc rax
cmp rdi, rax
jne .loop
ret
align15_loop:
xor rax, rax
align 64
%rep 15
nop
%endrep
.loop:
inc rax
cmp rdi, rax
jne .loop
ret
align31_loop:
xor rax, rax
align 64
%rep 31
nop
%endrep
.loop:
inc rax
cmp rdi, rax
jne .loop
ret
align63_loop:
xor rax, rax
align 64
%rep 63
nop
%endrep
.loop:
inc rax
cmp rdi, rax
jne .loop
ret
align75_loop:
xor rax, rax
align 64
%rep 75
nop
%endrep
.loop:
inc rax
cmp rdi, rax
jne .loop
ret
align90_loop:
xor rax, rax
align 64
%rep 90
nop
%endrep
.loop:
inc rax
cmp rdi, rax
jne .loop
ret
align112_loop:
xor rax, rax
align 64
%rep 112
nop
%endrep
.loop:
inc rax
cmp rdi, rax
jne .loop
ret
rat_add:
mov rax, rdi
.loop:
add rcx, 1
add rcx, 1
dec rax
jnz .loop
ret
rat_mov_add:
mov rax, rdi
.loop:
mov rcx, rax
add rcx, 1
mov rcx, rax
add rcx, 1
dec rax
jnz .loop
ret
read_1:
align 64
.loop:
mov rax, [rdi]
sub rsi, 1
jnle .loop
ret
read_2:
align 64
.loop:
%rep 2
mov rax, [rdi]
%endrep
sub rsi, 2
jnle .loop
ret
read_3:
align 64
.loop:
%rep 3
mov rax, [rdi]
%endrep
sub rsi, 3
jnle .loop
ret
read_4:
align 64
.loop:
%rep 4
mov rax, [rdi]
%endrep
sub rsi, 4
jnle .loop
ret
read_8:
align 64
.loop:
%rep 8
mov rax, [rdi]
%endrep
sub rsi, 8
jnle .loop
ret
write_1:
align 64
.loop:
mov QWORD [rdi], 0
sub rsi, 1
jnle .loop
ret
write_2:
align 64
.loop:
%rep 2
mov QWORD [rdi], 0
%endrep
sub rsi, 2
jnle .loop
ret
write_3:
align 64
.loop:
%rep 3
mov QWORD [rdi], 0
%endrep
sub rsi, 3
jnle .loop
ret
write_4:
align 64
.loop:
%rep 4
mov QWORD [rdi], 0
%endrep
sub rsi, 4
jnle .loop
ret
write_8:
align 64
.loop:
%rep 8
mov QWORD [rdi], 0
%endrep
sub rsi, 8
jnle .loop
ret
read_1x2_low:
align 64
.loop:
%rep 2
mov al, [rdi]
%endrep
sub rsi, 2
jnle .loop
ret
read_1x2_high:
align 64
.loop:
%rep 2
mov ah, [rdi]
%endrep
sub rsi, 2
jnle .loop
ret
read_2x2:
align 64
.loop:
%rep 2
mov ax, [rdi]
%endrep
sub rsi, 2
jnle .loop
ret
read_4x2:
align 64
.loop:
%rep 2
mov eax, [rdi]
%endrep
sub rsi, 2
jnle .loop
ret
read_8x2:
align 64
.loop:
%rep 2
mov rax, [rdi]
%endrep
sub rsi, 2
jnle .loop
ret
read_4x2_simd:
xor rax, rax
align 64
.loop:
mov r8d, [rdi]
mov r8d, [rdi + 4]
add rax, 8
cmp rax, rsi
jb .loop
ret
read_8x2_simd:
xor rax, rax
align 64
.loop:
mov r8, [rdi]
mov r8, [rdi + 8]
add rax, 16
cmp rax, rsi
jb .loop
ret
read_16x2_simd:
xor rax, rax
align 64
.loop:
vmovdqu xmm0, [rdi]
vmovdqu xmm0, [rdi + 16]
add rax, 32
cmp rax, rsi
jb .loop
ret
read_32x2_simd_offset:
xor rax, rax
align 64
.loop:
vmovdqu ymm0, [rdi]
vmovdqu ymm0, [rdi + 32]
add rax, 64
cmp rax, rsi
jb .loop
ret
read_32x2_simd_no_offset:
xor rax, rax
align 64
.loop:
vmovdqu ymm0, [rdi]
vmovdqu ymm0, [rdi]
add rax, 64
cmp rax, rsi
jb .loop
ret
read_16x4_simd:
xor rax, rax
align 64
.loop:
%rep 2
vmovdqu xmm0, [rdi]
vmovdqu xmm0, [rdi + 16]
%endrep
add rax, 64
cmp rax, rsi
jb .loop
ret
read_32x4_simd:
xor rax, rax
align 64
.loop:
%rep 2
vmovdqu ymm0, [rdi]
vmovdqu ymm0, [rdi]
%endrep
add rax, 128
cmp rax, rsi
jb .loop
ret
cache_test:
xor r10, r10 ; Zero loop counter
mov rbx, rdi ; Save original pointer
.loop:
add rdi, r10 ; Advance the pointer
add r10, 128 ; Increment loop counter
and r10, rdx ; Mask offset
vmovdqu ymm0, [rdi + 0]
vmovdqu ymm1, [rdi + 32]
vmovdqu ymm2, [rdi + 64]
vmovdqu ymm3, [rdi + 96]
mov rdi, rbx ; Restore original pointer
sub rsi, 128 ; Decrement count
ja .loop
ret
cache_test_unaligned:
xor r10, r10 ; Zero loop counter
add rdi, 5 ; Unalign pointer
mov rbx, rdi ; Save original pointer
.loop:
add rdi, r10 ; Advance the pointer
add r10, 128 ; Increment loop counter
and r10, rdx ; Mask offset
vmovdqu ymm0, [rdi + 0]
vmovdqu ymm1, [rdi + 32]
vmovdqu ymm2, [rdi + 64]
vmovdqu ymm3, [rdi + 96]
mov rdi, rbx ; Restore original pointer
sub rsi, 128 ; Decrement count
ja .loop
ret

View File

@@ -3,6 +3,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/resource.h>
#include <sys/time.h>
@@ -10,7 +11,9 @@ void handle_alloc(reptester *tester, alloc_type type) {
switch (type) {
case ALLOC_TYPE_WITH_MALLOC:
if (!(tester->params.buffer)) {
tester->params.buffer = (char *)malloc(tester->params.read_size + 1);
tester->params.buffer = (char *)mmap(
NULL, tester->params.read_size + 1, PROT_READ | PROT_WRITE,
MAP_ANON | MAP_SHARED | MAP_NORESERVE, -1, 0);
memset(tester->params.buffer, 0, tester->params.read_size + 1);
}
@@ -24,7 +27,7 @@ void handle_free(reptester *tester, alloc_type type) {
switch (type) {
case ALLOC_TYPE_WITH_MALLOC:
if (tester->params.buffer) {
free(tester->params.buffer);
munmap(tester->params.buffer, tester->params.read_size + 1);
tester->params.buffer = NULL;
}
@@ -58,7 +61,9 @@ void run_func_test(reptester *tester, reptest_func func, const char *func_name,
if (type == ALLOC_TYPE_WITH_MALLOC) {
buffer = tester->params.buffer;
tester->params.buffer = (char *)malloc(tester->params.read_size + 1);
tester->params.buffer =
(char *)mmap(NULL, tester->params.read_size + 1, PROT_READ | PROT_WRITE,
MAP_ANON | MAP_SHARED | MAP_NORESERVE, -1, 0);
memset(tester->params.buffer, 0, tester->params.read_size + 1);
}
@@ -100,7 +105,7 @@ void run_func_test(reptester *tester, reptest_func func, const char *func_name,
}
if (type == ALLOC_TYPE_WITH_MALLOC) {
free(tester->params.buffer);
munmap(tester->params.buffer, tester->params.read_size + 1);
tester->params.buffer = buffer;
}