Compare commits
7 Commits
46ee06406f
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| 94167e05fd | |||
| 7177add4ce | |||
| 37d3340df9 | |||
| fcdaf41495 | |||
| cec0662e68 | |||
| ba31dd9f8c | |||
| f355ab2d25 |
@@ -1,2 +1,2 @@
|
|||||||
all:
|
all:
|
||||||
clang++ -g dasm.cpp -o dasm
|
clang++ -g dasm.cc -o dasm
|
||||||
|
|||||||
@@ -1,2 +1,2 @@
|
|||||||
all:
|
all:
|
||||||
clang++ -g dasm.cpp -o dasm
|
clang++ -g dasm.cc -o dasm
|
||||||
|
|||||||
@@ -1,2 +1,2 @@
|
|||||||
all:
|
all:
|
||||||
clang++ -g dasm.cpp -o dasm
|
clang++ -g dasm.cc -o dasm
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
CC=clang++
|
CC=clang++
|
||||||
CFLAGS=-g -O0 -Wall -Wextra
|
CFLAGS=-g -O0 -Wall -Wextra
|
||||||
LIBS=-Wl,-rpath,./lib -L./lib -lsim86
|
LIBS=-Wl,-rpath,./lib -L./lib -lsim86
|
||||||
SRC=*.cpp
|
SRC=*.cc
|
||||||
OUT=sim86
|
OUT=sim86
|
||||||
|
|
||||||
all:
|
all:
|
||||||
|
|||||||
@@ -1,17 +1,17 @@
|
|||||||
mk_haversine_fscanf:
|
mk_haversine_fscanf:
|
||||||
clang++ -g cpp/fscanf.cpp cpp/haversine.cpp -o cpp/haverscan
|
clang++ -g cpp/fscanf.cc cpp/haversine.cc -o cpp/haverscan
|
||||||
|
|
||||||
run_haversine_fscanf:
|
run_haversine_fscanf:
|
||||||
cd ./cpp && ./haverscan
|
cd ./cpp && ./haverscan
|
||||||
|
|
||||||
mk_haversine_strtok:
|
mk_haversine_strtok:
|
||||||
clang++ -g cpp/strtok.cpp cpp/haversine.cpp -o cpp/haverstrtok
|
clang++ -g cpp/strtok.cc cpp/haversine.cc -o cpp/haverstrtok
|
||||||
|
|
||||||
run_haversine_strtok:
|
run_haversine_strtok:
|
||||||
cd ./cpp && ./haverstrtok
|
cd ./cpp && ./haverstrtok
|
||||||
|
|
||||||
mk_test:
|
mk_test:
|
||||||
clang++ -g -lpthread cpp/test.cpp cpp/haversine.cpp -o cpp/test
|
clang++ -g -lpthread cpp/test.cc cpp/haversine.cc -o cpp/test
|
||||||
|
|
||||||
run_test:
|
run_test:
|
||||||
cd ./cpp && ./test
|
cd ./cpp && ./test
|
||||||
|
|||||||
1
haversine_02/.gitignore
vendored
1
haversine_02/.gitignore
vendored
@@ -4,6 +4,7 @@
|
|||||||
compile_commands.json
|
compile_commands.json
|
||||||
count_and_distances
|
count_and_distances
|
||||||
pairs.json
|
pairs.json
|
||||||
|
cache_test
|
||||||
main
|
main
|
||||||
genhavr
|
genhavr
|
||||||
prochavr
|
prochavr
|
||||||
|
|||||||
@@ -43,11 +43,11 @@ fi
|
|||||||
|
|
||||||
|
|
||||||
# GENERATOR
|
# GENERATOR
|
||||||
GENSRC="./src/generator/gen_argparser.cpp \
|
GENSRC="./src/generator/gen_argparser.cc \
|
||||||
./src/generator/generator.cpp \
|
./src/generator/generator.cc \
|
||||||
./src/haversine.cpp \
|
./src/haversine.cc \
|
||||||
./src/point_types.cpp \
|
./src/point_types.cc \
|
||||||
./src/generator/main.cpp"
|
./src/generator/main.cc"
|
||||||
GENOUT=genhavr
|
GENOUT=genhavr
|
||||||
|
|
||||||
(set -x ; $CXX $CFLAGS $GENSRC -o $GENOUT)
|
(set -x ; $CXX $CFLAGS $GENSRC -o $GENOUT)
|
||||||
@@ -64,10 +64,10 @@ JSONFLAGS="-c "
|
|||||||
JSON_BUILD_DIR=json_build
|
JSON_BUILD_DIR=json_build
|
||||||
|
|
||||||
PROCSRC="./$JSON_BUILD_DIR/*.o \
|
PROCSRC="./$JSON_BUILD_DIR/*.o \
|
||||||
./src/haversine.cpp \
|
./src/haversine.cc \
|
||||||
./src/point_types.cpp \
|
./src/point_types.cc \
|
||||||
./src/processor/proc_argparser.cpp \
|
./src/processor/proc_argparser.cc \
|
||||||
./src/processor/main.cpp "
|
./src/processor/main.cc "
|
||||||
PROCOUT=prochavr
|
PROCOUT=prochavr
|
||||||
|
|
||||||
# MEMTESTER
|
# MEMTESTER
|
||||||
@@ -113,7 +113,7 @@ if [[ $BASIC_PROFILING == true ]] || [[ $FULL_PROFILING == true ]]; then
|
|||||||
cd ../
|
cd ../
|
||||||
|
|
||||||
# REPETITION TESTING
|
# REPETITION TESTING
|
||||||
REPTESTSRC="./src/repetition_testing/*.cpp ./$PROF_BUILD_DIR/*.o $ASM_LIB"
|
REPTESTSRC="./src/repetition_testing/*.cc ./$PROF_BUILD_DIR/*.o $ASM_LIB"
|
||||||
REPTESTOUT=reptest
|
REPTESTOUT=reptest
|
||||||
|
|
||||||
(set -x ; $CXX $CFLAGS $REPTESTFLAGS $REPTESTSRC -o $REPTESTOUT)
|
(set -x ; $CXX $CFLAGS $REPTESTFLAGS $REPTESTSRC -o $REPTESTOUT)
|
||||||
|
|||||||
@@ -5,6 +5,7 @@
|
|||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
#include <sys/mman.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
|
|
||||||
#define ARR_LEN(ARR) sizeof(ARR) / sizeof(*ARR)
|
#define ARR_LEN(ARR) sizeof(ARR) / sizeof(*ARR)
|
||||||
@@ -47,6 +48,8 @@ extern "C" void read_32x2_simd_offset(char *buffer, u64 size);
|
|||||||
extern "C" void read_32x2_simd_no_offset(char *buffer, u64 size);
|
extern "C" void read_32x2_simd_no_offset(char *buffer, u64 size);
|
||||||
extern "C" void read_16x4_simd(char *buffer, u64 size);
|
extern "C" void read_16x4_simd(char *buffer, u64 size);
|
||||||
extern "C" void read_32x4_simd(char *buffer, u64 size);
|
extern "C" void read_32x4_simd(char *buffer, u64 size);
|
||||||
|
extern "C" void cache_test(char *buffer, u64 size, u64 mask);
|
||||||
|
extern "C" void cache_test_unaligned(char *buffer, u64 size, u64 mask);
|
||||||
|
|
||||||
void test_fread(reptester *tester, alloc_type type);
|
void test_fread(reptester *tester, alloc_type type);
|
||||||
void test_read(reptester *tester, alloc_type type);
|
void test_read(reptester *tester, alloc_type type);
|
||||||
@@ -89,6 +92,23 @@ void test_read_32x2_simd_offset(reptester *tester, alloc_type type);
|
|||||||
void test_read_32x2_simd_no_offset(reptester *tester, alloc_type type);
|
void test_read_32x2_simd_no_offset(reptester *tester, alloc_type type);
|
||||||
void test_read_16x4_simd(reptester *tester, alloc_type type);
|
void test_read_16x4_simd(reptester *tester, alloc_type type);
|
||||||
void test_read_32x4_simd(reptester *tester, alloc_type type);
|
void test_read_32x4_simd(reptester *tester, alloc_type type);
|
||||||
|
void test_cache_test_16k(reptester *tester, alloc_type type);
|
||||||
|
void test_cache_test_16k_unaligned(reptester *tester, alloc_type type);
|
||||||
|
void test_cache_test_32k(reptester *tester, alloc_type type);
|
||||||
|
void test_cache_test_32k_unaligned(reptester *tester, alloc_type type);
|
||||||
|
void test_cache_test_64k(reptester *tester, alloc_type type);
|
||||||
|
void test_cache_test_128k(reptester *tester, alloc_type type);
|
||||||
|
void test_cache_test_256k(reptester *tester, alloc_type type);
|
||||||
|
void test_cache_test_512k(reptester *tester, alloc_type type);
|
||||||
|
void test_cache_test_1m(reptester *tester, alloc_type type);
|
||||||
|
void test_cache_test_2m(reptester *tester, alloc_type type);
|
||||||
|
void test_cache_test_4m(reptester *tester, alloc_type type);
|
||||||
|
void test_cache_test_8m(reptester *tester, alloc_type type);
|
||||||
|
void test_cache_test_16m(reptester *tester, alloc_type type);
|
||||||
|
void test_cache_test_32m(reptester *tester, alloc_type type);
|
||||||
|
void test_cache_test_64m(reptester *tester, alloc_type type);
|
||||||
|
void test_cache_test_512m(reptester *tester, alloc_type type);
|
||||||
|
void test_cache_test_full(reptester *tester, alloc_type type);
|
||||||
u64 get_file_length(FILE *fp);
|
u64 get_file_length(FILE *fp);
|
||||||
|
|
||||||
int main(int argc, char *argv[]) {
|
int main(int argc, char *argv[]) {
|
||||||
@@ -182,22 +202,58 @@ int main(int argc, char *argv[]) {
|
|||||||
// {{"READ 2x2", "READ 2x2 WITH MALLOC"}, test_read_2x2},
|
// {{"READ 2x2", "READ 2x2 WITH MALLOC"}, test_read_2x2},
|
||||||
// {{"READ 4x2", "READ 4x2 WITH MALLOC"}, test_read_4x2},
|
// {{"READ 4x2", "READ 4x2 WITH MALLOC"}, test_read_4x2},
|
||||||
// {{"READ 8x2", "READ 8x2 WITH MALLOC"}, test_read_8x2},
|
// {{"READ 8x2", "READ 8x2 WITH MALLOC"}, test_read_8x2},
|
||||||
{{"READ 4x2_simd", "READ 4x2_simd WITH MALLOC"}, test_read_4x2_simd},
|
// {{"READ 4x2_simd", "READ 4x2_simd WITH MALLOC"}, test_read_4x2_simd},
|
||||||
{{"READ 8x2_simd", "READ 8x2_simd WITH MALLOC"}, test_read_8x2_simd},
|
// {{"READ 8x2_simd", "READ 8x2_simd WITH MALLOC"}, test_read_8x2_simd},
|
||||||
{{"READ 16x2_simd", "READ 16x2_simd WITH MALLOC"}, test_read_16x2_simd},
|
// {{"READ 16x2_simd", "READ 16x2_simd WITH MALLOC"},
|
||||||
{{"READ 32x2_simd_offset", "READ 32x2_simd_offset WITH MALLOC"},
|
// test_read_16x2_simd},
|
||||||
test_read_32x2_simd_offset},
|
// {{"READ 32x2_simd_offset", "READ 32x2_simd_offset WITH MALLOC"},
|
||||||
{{"READ 32x2_simd_no_offset", "READ 32x2_simd_no_offset WITH MALLOC"},
|
// test_read_32x2_simd_offset},
|
||||||
test_read_32x2_simd_no_offset},
|
// {{"READ 32x2_simd_no_offset", "READ 32x2_simd_no_offset WITH MALLOC"},
|
||||||
{{"READ 16x4_simd", "READ 16x4_simd WITH MALLOC"}, test_read_16x4_simd},
|
// test_read_32x2_simd_no_offset},
|
||||||
{{"READ 32x4_simd", "READ 32x4_simd WITH MALLOC"}, test_read_32x4_simd},
|
// {{"READ 16x4_simd", "READ 16x4_simd WITH MALLOC"},
|
||||||
|
// test_read_16x4_simd},
|
||||||
|
// {{"READ 32x4_simd", "READ 32x4_simd WITH MALLOC"},
|
||||||
|
// test_read_32x4_simd},
|
||||||
|
{{"CACHE TEST 16K", "CACHE TEST 16K WITH MALLOC"}, test_cache_test_16k},
|
||||||
|
{{"CACHE TEST 16K UNALIGNED", "CACHE TEST 16K UNALIGNED WITH MALLOC"},
|
||||||
|
test_cache_test_16k_unaligned},
|
||||||
|
// {{"CACHE TEST 32K", "CACHE TEST 32K WITH MALLOC"},
|
||||||
|
// test_cache_test_32k},
|
||||||
|
// {{"CACHE TEST 64K", "CACHE TEST 64K WITH MALLOC"},
|
||||||
|
// test_cache_test_64k},
|
||||||
|
// {{"CACHE TEST 128K", "CACHE TEST 128K WITH MALLOC"},
|
||||||
|
// test_cache_test_128k},
|
||||||
|
// {{"CACHE TEST 256K", "CACHE TEST 256K WITH MALLOC"},
|
||||||
|
// test_cache_test_256k},
|
||||||
|
// {{"CACHE TEST 512K", "CACHE TEST 512K WITH MALLOC"},
|
||||||
|
// test_cache_test_512k},
|
||||||
|
// {{"CACHE TEST 1M", "CACHE TEST 1M WITH MALLOC"}, test_cache_test_1m},
|
||||||
|
// {{"CACHE TEST 2M", "CACHE TEST 2M WITH MALLOC"}, test_cache_test_2m},
|
||||||
|
// {{"CACHE TEST 4M", "CACHE TEST 4M WITH MALLOC"}, test_cache_test_4m},
|
||||||
|
// {{"CACHE TEST 8M", "CACHE TEST 8M WITH MALLOC"}, test_cache_test_8m},
|
||||||
|
// {{"CACHE TEST 16M", "CACHE TEST 16M WITH MALLOC"},
|
||||||
|
// test_cache_test_16m},
|
||||||
|
// {{"CACHE TEST 32M", "CACHE TEST 32M WITH MALLOC"},
|
||||||
|
// test_cache_test_32m},
|
||||||
|
// {{"CACHE TEST 64M", "CACHE TEST 64M WITH MALLOC"},
|
||||||
|
// test_cache_test_64m},
|
||||||
|
// {{"CACHE TEST 512M", "CACHE TEST 512M WITH MALLOC"},
|
||||||
|
// test_cache_test_512m},
|
||||||
|
// {{"CACHE TEST FULL", "CACHE TEST FULL WITH MALLOC"},
|
||||||
|
// test_cache_test_full},
|
||||||
};
|
};
|
||||||
|
|
||||||
tester.params.read_size = get_file_length(fp);
|
tester.params.read_size = get_file_length(fp);
|
||||||
tester.params.read_count = 1;
|
tester.params.read_count = 1;
|
||||||
tester.params.buffer = (char *)malloc(tester.params.read_size + 1);
|
tester.params.buffer =
|
||||||
|
(char *)mmap(NULL, tester.params.read_size + 1, PROT_READ | PROT_WRITE,
|
||||||
|
MAP_ANON | MAP_SHARED | MAP_NORESERVE, -1, 0);
|
||||||
memset(tester.params.buffer, 0, tester.params.read_size + 1);
|
memset(tester.params.buffer, 0, tester.params.read_size + 1);
|
||||||
|
|
||||||
|
for (u64 i = 0; i < tester.params.read_size; ++i) {
|
||||||
|
tester.params.buffer[i] = (char)i;
|
||||||
|
}
|
||||||
|
|
||||||
for (u64 i = 0; i < waves; ++i) {
|
for (u64 i = 0; i < waves; ++i) {
|
||||||
for (u64 j = 0; j < ARR_LEN(funcs); ++j) {
|
for (u64 j = 0; j < ARR_LEN(funcs); ++j) {
|
||||||
for (u64 k = 0; k < ALLOC_TYPE_WITH_MALLOC; ++k) {
|
for (u64 k = 0; k < ALLOC_TYPE_WITH_MALLOC; ++k) {
|
||||||
@@ -208,7 +264,7 @@ int main(int argc, char *argv[]) {
|
|||||||
|
|
||||||
fclose(fp);
|
fclose(fp);
|
||||||
|
|
||||||
free(tester.params.buffer);
|
munmap(tester.params.buffer, tester.params.read_size + 1);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@@ -586,7 +642,6 @@ void test_align63_loop(reptester *tester, alloc_type type) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void test_align75_loop(reptester *tester, alloc_type type) {
|
void test_align75_loop(reptester *tester, alloc_type type) {
|
||||||
|
|
||||||
u64 start = read_cpu_timer();
|
u64 start = read_cpu_timer();
|
||||||
u64 fault_count_start = page_fault_count();
|
u64 fault_count_start = page_fault_count();
|
||||||
|
|
||||||
@@ -612,7 +667,6 @@ void test_align75_loop(reptester *tester, alloc_type type) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void test_align90_loop(reptester *tester, alloc_type type) {
|
void test_align90_loop(reptester *tester, alloc_type type) {
|
||||||
|
|
||||||
u64 start = read_cpu_timer();
|
u64 start = read_cpu_timer();
|
||||||
u64 fault_count_start = page_fault_count();
|
u64 fault_count_start = page_fault_count();
|
||||||
|
|
||||||
@@ -1112,7 +1166,6 @@ void test_read_4x2_simd(reptester *tester, alloc_type type) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void test_read_8x2_simd(reptester *tester, alloc_type type) {
|
void test_read_8x2_simd(reptester *tester, alloc_type type) {
|
||||||
|
|
||||||
u64 start = read_cpu_timer();
|
u64 start = read_cpu_timer();
|
||||||
u64 fault_count_start = page_fault_count();
|
u64 fault_count_start = page_fault_count();
|
||||||
|
|
||||||
@@ -1138,7 +1191,6 @@ void test_read_8x2_simd(reptester *tester, alloc_type type) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void test_read_16x2_simd(reptester *tester, alloc_type type) {
|
void test_read_16x2_simd(reptester *tester, alloc_type type) {
|
||||||
|
|
||||||
u64 start = read_cpu_timer();
|
u64 start = read_cpu_timer();
|
||||||
u64 fault_count_start = page_fault_count();
|
u64 fault_count_start = page_fault_count();
|
||||||
|
|
||||||
@@ -1264,6 +1316,431 @@ void test_read_32x4_simd(reptester *tester, alloc_type type) {
|
|||||||
handle_free(tester, type);
|
handle_free(tester, type);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void test_cache_test_16k(reptester *tester, alloc_type type) {
|
||||||
|
u64 start = read_cpu_timer();
|
||||||
|
u64 fault_count_start = page_fault_count();
|
||||||
|
|
||||||
|
handle_alloc(tester, type);
|
||||||
|
|
||||||
|
u64 total_size = tester->params.read_size * tester->params.read_count;
|
||||||
|
|
||||||
|
cache_test(tester->params.buffer, total_size, 0x3fff);
|
||||||
|
|
||||||
|
u64 fault_count_end = page_fault_count();
|
||||||
|
u64 end = read_cpu_timer();
|
||||||
|
|
||||||
|
u64 read_time = end - start;
|
||||||
|
u64 page_faults = fault_count_end - fault_count_start;
|
||||||
|
|
||||||
|
tester->results = {
|
||||||
|
total_size,
|
||||||
|
read_time,
|
||||||
|
page_faults,
|
||||||
|
};
|
||||||
|
|
||||||
|
handle_free(tester, type);
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_cache_test_16k_unaligned(reptester *tester, alloc_type type) {
|
||||||
|
u64 start = read_cpu_timer();
|
||||||
|
u64 fault_count_start = page_fault_count();
|
||||||
|
|
||||||
|
handle_alloc(tester, type);
|
||||||
|
|
||||||
|
u64 total_size = tester->params.read_size * tester->params.read_count;
|
||||||
|
|
||||||
|
cache_test_unaligned(tester->params.buffer, total_size, 0x3fff);
|
||||||
|
|
||||||
|
u64 fault_count_end = page_fault_count();
|
||||||
|
u64 end = read_cpu_timer();
|
||||||
|
|
||||||
|
u64 read_time = end - start;
|
||||||
|
u64 page_faults = fault_count_end - fault_count_start;
|
||||||
|
|
||||||
|
tester->results = {
|
||||||
|
total_size,
|
||||||
|
read_time,
|
||||||
|
page_faults,
|
||||||
|
};
|
||||||
|
|
||||||
|
handle_free(tester, type);
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_cache_test_32k(reptester *tester, alloc_type type) {
|
||||||
|
u64 start = read_cpu_timer();
|
||||||
|
u64 fault_count_start = page_fault_count();
|
||||||
|
|
||||||
|
handle_alloc(tester, type);
|
||||||
|
|
||||||
|
u64 total_size = tester->params.read_size * tester->params.read_count;
|
||||||
|
|
||||||
|
cache_test(tester->params.buffer, total_size, 0x7fff);
|
||||||
|
|
||||||
|
u64 fault_count_end = page_fault_count();
|
||||||
|
u64 end = read_cpu_timer();
|
||||||
|
|
||||||
|
u64 read_time = end - start;
|
||||||
|
u64 page_faults = fault_count_end - fault_count_start;
|
||||||
|
|
||||||
|
tester->results = {
|
||||||
|
total_size,
|
||||||
|
read_time,
|
||||||
|
page_faults,
|
||||||
|
};
|
||||||
|
|
||||||
|
handle_free(tester, type);
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_cache_test_32k_unaligned(reptester *tester, alloc_type type) {
|
||||||
|
u64 start = read_cpu_timer();
|
||||||
|
u64 fault_count_start = page_fault_count();
|
||||||
|
|
||||||
|
handle_alloc(tester, type);
|
||||||
|
|
||||||
|
u64 total_size = tester->params.read_size * tester->params.read_count;
|
||||||
|
|
||||||
|
cache_test_unaligned(tester->params.buffer, total_size, 0x7fff);
|
||||||
|
|
||||||
|
u64 fault_count_end = page_fault_count();
|
||||||
|
u64 end = read_cpu_timer();
|
||||||
|
|
||||||
|
u64 read_time = end - start;
|
||||||
|
u64 page_faults = fault_count_end - fault_count_start;
|
||||||
|
|
||||||
|
tester->results = {
|
||||||
|
total_size,
|
||||||
|
read_time,
|
||||||
|
page_faults,
|
||||||
|
};
|
||||||
|
|
||||||
|
handle_free(tester, type);
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_cache_test_64k(reptester *tester, alloc_type type) {
|
||||||
|
u64 start = read_cpu_timer();
|
||||||
|
u64 fault_count_start = page_fault_count();
|
||||||
|
|
||||||
|
handle_alloc(tester, type);
|
||||||
|
|
||||||
|
u64 total_size = tester->params.read_size * tester->params.read_count;
|
||||||
|
|
||||||
|
cache_test(tester->params.buffer, total_size, 0xffff);
|
||||||
|
|
||||||
|
u64 fault_count_end = page_fault_count();
|
||||||
|
u64 end = read_cpu_timer();
|
||||||
|
|
||||||
|
u64 read_time = end - start;
|
||||||
|
u64 page_faults = fault_count_end - fault_count_start;
|
||||||
|
|
||||||
|
tester->results = {
|
||||||
|
total_size,
|
||||||
|
read_time,
|
||||||
|
page_faults,
|
||||||
|
};
|
||||||
|
|
||||||
|
handle_free(tester, type);
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_cache_test_128k(reptester *tester, alloc_type type) {
|
||||||
|
u64 start = read_cpu_timer();
|
||||||
|
u64 fault_count_start = page_fault_count();
|
||||||
|
|
||||||
|
handle_alloc(tester, type);
|
||||||
|
|
||||||
|
u64 total_size = tester->params.read_size * tester->params.read_count;
|
||||||
|
|
||||||
|
cache_test(tester->params.buffer, total_size, 0x1ffff);
|
||||||
|
|
||||||
|
u64 fault_count_end = page_fault_count();
|
||||||
|
u64 end = read_cpu_timer();
|
||||||
|
|
||||||
|
u64 read_time = end - start;
|
||||||
|
u64 page_faults = fault_count_end - fault_count_start;
|
||||||
|
|
||||||
|
tester->results = {
|
||||||
|
total_size,
|
||||||
|
read_time,
|
||||||
|
page_faults,
|
||||||
|
};
|
||||||
|
|
||||||
|
handle_free(tester, type);
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_cache_test_256k(reptester *tester, alloc_type type) {
|
||||||
|
u64 start = read_cpu_timer();
|
||||||
|
u64 fault_count_start = page_fault_count();
|
||||||
|
|
||||||
|
handle_alloc(tester, type);
|
||||||
|
|
||||||
|
u64 total_size = tester->params.read_size * tester->params.read_count;
|
||||||
|
|
||||||
|
cache_test(tester->params.buffer, total_size, 0x3ffff);
|
||||||
|
|
||||||
|
u64 fault_count_end = page_fault_count();
|
||||||
|
u64 end = read_cpu_timer();
|
||||||
|
|
||||||
|
u64 read_time = end - start;
|
||||||
|
u64 page_faults = fault_count_end - fault_count_start;
|
||||||
|
|
||||||
|
tester->results = {
|
||||||
|
total_size,
|
||||||
|
read_time,
|
||||||
|
page_faults,
|
||||||
|
};
|
||||||
|
|
||||||
|
handle_free(tester, type);
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_cache_test_512k(reptester *tester, alloc_type type) {
|
||||||
|
u64 start = read_cpu_timer();
|
||||||
|
u64 fault_count_start = page_fault_count();
|
||||||
|
|
||||||
|
handle_alloc(tester, type);
|
||||||
|
|
||||||
|
u64 total_size = tester->params.read_size * tester->params.read_count;
|
||||||
|
|
||||||
|
cache_test(tester->params.buffer, total_size, 0x7ffff);
|
||||||
|
|
||||||
|
u64 fault_count_end = page_fault_count();
|
||||||
|
u64 end = read_cpu_timer();
|
||||||
|
|
||||||
|
u64 read_time = end - start;
|
||||||
|
u64 page_faults = fault_count_end - fault_count_start;
|
||||||
|
|
||||||
|
tester->results = {
|
||||||
|
total_size,
|
||||||
|
read_time,
|
||||||
|
page_faults,
|
||||||
|
};
|
||||||
|
|
||||||
|
handle_free(tester, type);
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_cache_test_1m(reptester *tester, alloc_type type) {
|
||||||
|
u64 start = read_cpu_timer();
|
||||||
|
u64 fault_count_start = page_fault_count();
|
||||||
|
|
||||||
|
handle_alloc(tester, type);
|
||||||
|
|
||||||
|
u64 total_size = tester->params.read_size * tester->params.read_count;
|
||||||
|
|
||||||
|
cache_test(tester->params.buffer, total_size, 0xfffff);
|
||||||
|
|
||||||
|
u64 fault_count_end = page_fault_count();
|
||||||
|
u64 end = read_cpu_timer();
|
||||||
|
|
||||||
|
u64 read_time = end - start;
|
||||||
|
u64 page_faults = fault_count_end - fault_count_start;
|
||||||
|
|
||||||
|
tester->results = {
|
||||||
|
total_size,
|
||||||
|
read_time,
|
||||||
|
page_faults,
|
||||||
|
};
|
||||||
|
|
||||||
|
handle_free(tester, type);
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_cache_test_2m(reptester *tester, alloc_type type) {
|
||||||
|
u64 start = read_cpu_timer();
|
||||||
|
u64 fault_count_start = page_fault_count();
|
||||||
|
|
||||||
|
handle_alloc(tester, type);
|
||||||
|
|
||||||
|
u64 total_size = tester->params.read_size * tester->params.read_count;
|
||||||
|
|
||||||
|
cache_test(tester->params.buffer, total_size, 0x1fffff);
|
||||||
|
|
||||||
|
u64 fault_count_end = page_fault_count();
|
||||||
|
u64 end = read_cpu_timer();
|
||||||
|
|
||||||
|
u64 read_time = end - start;
|
||||||
|
u64 page_faults = fault_count_end - fault_count_start;
|
||||||
|
|
||||||
|
tester->results = {
|
||||||
|
total_size,
|
||||||
|
read_time,
|
||||||
|
page_faults,
|
||||||
|
};
|
||||||
|
|
||||||
|
handle_free(tester, type);
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_cache_test_4m(reptester *tester, alloc_type type) {
|
||||||
|
u64 start = read_cpu_timer();
|
||||||
|
u64 fault_count_start = page_fault_count();
|
||||||
|
|
||||||
|
handle_alloc(tester, type);
|
||||||
|
|
||||||
|
u64 total_size = tester->params.read_size * tester->params.read_count;
|
||||||
|
|
||||||
|
cache_test(tester->params.buffer, total_size, 0x3fffff);
|
||||||
|
|
||||||
|
u64 fault_count_end = page_fault_count();
|
||||||
|
u64 end = read_cpu_timer();
|
||||||
|
|
||||||
|
u64 read_time = end - start;
|
||||||
|
u64 page_faults = fault_count_end - fault_count_start;
|
||||||
|
|
||||||
|
tester->results = {
|
||||||
|
total_size,
|
||||||
|
read_time,
|
||||||
|
page_faults,
|
||||||
|
};
|
||||||
|
|
||||||
|
handle_free(tester, type);
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_cache_test_8m(reptester *tester, alloc_type type) {
|
||||||
|
u64 start = read_cpu_timer();
|
||||||
|
u64 fault_count_start = page_fault_count();
|
||||||
|
|
||||||
|
handle_alloc(tester, type);
|
||||||
|
|
||||||
|
u64 total_size = tester->params.read_size * tester->params.read_count;
|
||||||
|
|
||||||
|
cache_test(tester->params.buffer, total_size, 0x7fffff);
|
||||||
|
|
||||||
|
u64 fault_count_end = page_fault_count();
|
||||||
|
u64 end = read_cpu_timer();
|
||||||
|
|
||||||
|
u64 read_time = end - start;
|
||||||
|
u64 page_faults = fault_count_end - fault_count_start;
|
||||||
|
|
||||||
|
tester->results = {
|
||||||
|
total_size,
|
||||||
|
read_time,
|
||||||
|
page_faults,
|
||||||
|
};
|
||||||
|
|
||||||
|
handle_free(tester, type);
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_cache_test_16m(reptester *tester, alloc_type type) {
|
||||||
|
u64 start = read_cpu_timer();
|
||||||
|
u64 fault_count_start = page_fault_count();
|
||||||
|
|
||||||
|
handle_alloc(tester, type);
|
||||||
|
|
||||||
|
u64 total_size = tester->params.read_size * tester->params.read_count;
|
||||||
|
|
||||||
|
cache_test(tester->params.buffer, total_size, 0xffffff);
|
||||||
|
|
||||||
|
u64 fault_count_end = page_fault_count();
|
||||||
|
u64 end = read_cpu_timer();
|
||||||
|
|
||||||
|
u64 read_time = end - start;
|
||||||
|
u64 page_faults = fault_count_end - fault_count_start;
|
||||||
|
|
||||||
|
tester->results = {
|
||||||
|
total_size,
|
||||||
|
read_time,
|
||||||
|
page_faults,
|
||||||
|
};
|
||||||
|
|
||||||
|
handle_free(tester, type);
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_cache_test_32m(reptester *tester, alloc_type type) {
|
||||||
|
u64 start = read_cpu_timer();
|
||||||
|
u64 fault_count_start = page_fault_count();
|
||||||
|
|
||||||
|
handle_alloc(tester, type);
|
||||||
|
|
||||||
|
u64 total_size = tester->params.read_size * tester->params.read_count;
|
||||||
|
|
||||||
|
cache_test(tester->params.buffer, total_size, 0x1ffffff);
|
||||||
|
|
||||||
|
u64 fault_count_end = page_fault_count();
|
||||||
|
u64 end = read_cpu_timer();
|
||||||
|
|
||||||
|
u64 read_time = end - start;
|
||||||
|
u64 page_faults = fault_count_end - fault_count_start;
|
||||||
|
|
||||||
|
tester->results = {
|
||||||
|
total_size,
|
||||||
|
read_time,
|
||||||
|
page_faults,
|
||||||
|
};
|
||||||
|
|
||||||
|
handle_free(tester, type);
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_cache_test_64m(reptester *tester, alloc_type type) {
|
||||||
|
u64 start = read_cpu_timer();
|
||||||
|
u64 fault_count_start = page_fault_count();
|
||||||
|
|
||||||
|
handle_alloc(tester, type);
|
||||||
|
|
||||||
|
u64 total_size = tester->params.read_size * tester->params.read_count;
|
||||||
|
|
||||||
|
cache_test(tester->params.buffer, total_size, 0x3ffffff);
|
||||||
|
|
||||||
|
u64 fault_count_end = page_fault_count();
|
||||||
|
u64 end = read_cpu_timer();
|
||||||
|
|
||||||
|
u64 read_time = end - start;
|
||||||
|
u64 page_faults = fault_count_end - fault_count_start;
|
||||||
|
|
||||||
|
tester->results = {
|
||||||
|
total_size,
|
||||||
|
read_time,
|
||||||
|
page_faults,
|
||||||
|
};
|
||||||
|
|
||||||
|
handle_free(tester, type);
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_cache_test_512m(reptester *tester, alloc_type type) {
|
||||||
|
u64 start = read_cpu_timer();
|
||||||
|
u64 fault_count_start = page_fault_count();
|
||||||
|
|
||||||
|
handle_alloc(tester, type);
|
||||||
|
|
||||||
|
u64 total_size = tester->params.read_size * tester->params.read_count;
|
||||||
|
|
||||||
|
cache_test(tester->params.buffer, total_size, 0x1fffffff);
|
||||||
|
|
||||||
|
u64 fault_count_end = page_fault_count();
|
||||||
|
u64 end = read_cpu_timer();
|
||||||
|
|
||||||
|
u64 read_time = end - start;
|
||||||
|
u64 page_faults = fault_count_end - fault_count_start;
|
||||||
|
|
||||||
|
tester->results = {
|
||||||
|
total_size,
|
||||||
|
read_time,
|
||||||
|
page_faults,
|
||||||
|
};
|
||||||
|
|
||||||
|
handle_free(tester, type);
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_cache_test_full(reptester *tester, alloc_type type) {
|
||||||
|
u64 start = read_cpu_timer();
|
||||||
|
u64 fault_count_start = page_fault_count();
|
||||||
|
|
||||||
|
handle_alloc(tester, type);
|
||||||
|
|
||||||
|
u64 total_size = tester->params.read_size * tester->params.read_count;
|
||||||
|
|
||||||
|
cache_test(tester->params.buffer, total_size, 0xffffffffffffffff);
|
||||||
|
|
||||||
|
u64 fault_count_end = page_fault_count();
|
||||||
|
u64 end = read_cpu_timer();
|
||||||
|
|
||||||
|
u64 read_time = end - start;
|
||||||
|
u64 page_faults = fault_count_end - fault_count_start;
|
||||||
|
|
||||||
|
tester->results = {
|
||||||
|
total_size,
|
||||||
|
read_time,
|
||||||
|
page_faults,
|
||||||
|
};
|
||||||
|
|
||||||
|
handle_free(tester, type);
|
||||||
|
}
|
||||||
|
|
||||||
u64 get_file_length(FILE *fp) {
|
u64 get_file_length(FILE *fp) {
|
||||||
if (!fp) {
|
if (!fp) {
|
||||||
return 0;
|
return 0;
|
||||||
@@ -36,6 +36,8 @@ global read_32x2_simd_offset
|
|||||||
global read_32x2_simd_no_offset
|
global read_32x2_simd_no_offset
|
||||||
global read_16x4_simd
|
global read_16x4_simd
|
||||||
global read_32x4_simd
|
global read_32x4_simd
|
||||||
|
global cache_test ; Expects 3 inputs (pointer, read_count, mask)
|
||||||
|
global cache_test_unaligned ; Expects 3 inputs (pointer, read_count, mask)
|
||||||
|
|
||||||
mov_all_bytes_asm:
|
mov_all_bytes_asm:
|
||||||
xor rax, rax
|
xor rax, rax
|
||||||
@@ -492,3 +494,36 @@ read_32x4_simd:
|
|||||||
jb .loop
|
jb .loop
|
||||||
|
|
||||||
ret
|
ret
|
||||||
|
|
||||||
|
cache_test:
|
||||||
|
xor r10, r10 ; Zero loop counter
|
||||||
|
mov rbx, rdi ; Save original pointer
|
||||||
|
.loop:
|
||||||
|
add rdi, r10 ; Advance the pointer
|
||||||
|
add r10, 128 ; Increment loop counter
|
||||||
|
and r10, rdx ; Mask offset
|
||||||
|
vmovdqu ymm0, [rdi + 0]
|
||||||
|
vmovdqu ymm1, [rdi + 32]
|
||||||
|
vmovdqu ymm2, [rdi + 64]
|
||||||
|
vmovdqu ymm3, [rdi + 96]
|
||||||
|
mov rdi, rbx ; Restore original pointer
|
||||||
|
sub rsi, 128 ; Decrement count
|
||||||
|
ja .loop
|
||||||
|
ret
|
||||||
|
|
||||||
|
cache_test_unaligned:
|
||||||
|
xor r10, r10 ; Zero loop counter
|
||||||
|
add rdi, 5 ; Unalign pointer
|
||||||
|
mov rbx, rdi ; Save original pointer
|
||||||
|
.loop:
|
||||||
|
add rdi, r10 ; Advance the pointer
|
||||||
|
add r10, 128 ; Increment loop counter
|
||||||
|
and r10, rdx ; Mask offset
|
||||||
|
vmovdqu ymm0, [rdi + 0]
|
||||||
|
vmovdqu ymm1, [rdi + 32]
|
||||||
|
vmovdqu ymm2, [rdi + 64]
|
||||||
|
vmovdqu ymm3, [rdi + 96]
|
||||||
|
mov rdi, rbx ; Restore original pointer
|
||||||
|
sub rsi, 128 ; Decrement count
|
||||||
|
ja .loop
|
||||||
|
ret
|
||||||
|
|||||||
@@ -3,6 +3,7 @@
|
|||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
#include <sys/mman.h>
|
||||||
#include <sys/resource.h>
|
#include <sys/resource.h>
|
||||||
#include <sys/time.h>
|
#include <sys/time.h>
|
||||||
|
|
||||||
@@ -10,7 +11,9 @@ void handle_alloc(reptester *tester, alloc_type type) {
|
|||||||
switch (type) {
|
switch (type) {
|
||||||
case ALLOC_TYPE_WITH_MALLOC:
|
case ALLOC_TYPE_WITH_MALLOC:
|
||||||
if (!(tester->params.buffer)) {
|
if (!(tester->params.buffer)) {
|
||||||
tester->params.buffer = (char *)malloc(tester->params.read_size + 1);
|
tester->params.buffer = (char *)mmap(
|
||||||
|
NULL, tester->params.read_size + 1, PROT_READ | PROT_WRITE,
|
||||||
|
MAP_ANON | MAP_SHARED | MAP_NORESERVE, -1, 0);
|
||||||
memset(tester->params.buffer, 0, tester->params.read_size + 1);
|
memset(tester->params.buffer, 0, tester->params.read_size + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -24,7 +27,7 @@ void handle_free(reptester *tester, alloc_type type) {
|
|||||||
switch (type) {
|
switch (type) {
|
||||||
case ALLOC_TYPE_WITH_MALLOC:
|
case ALLOC_TYPE_WITH_MALLOC:
|
||||||
if (tester->params.buffer) {
|
if (tester->params.buffer) {
|
||||||
free(tester->params.buffer);
|
munmap(tester->params.buffer, tester->params.read_size + 1);
|
||||||
tester->params.buffer = NULL;
|
tester->params.buffer = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -58,7 +61,9 @@ void run_func_test(reptester *tester, reptest_func func, const char *func_name,
|
|||||||
|
|
||||||
if (type == ALLOC_TYPE_WITH_MALLOC) {
|
if (type == ALLOC_TYPE_WITH_MALLOC) {
|
||||||
buffer = tester->params.buffer;
|
buffer = tester->params.buffer;
|
||||||
tester->params.buffer = (char *)malloc(tester->params.read_size + 1);
|
tester->params.buffer =
|
||||||
|
(char *)mmap(NULL, tester->params.read_size + 1, PROT_READ | PROT_WRITE,
|
||||||
|
MAP_ANON | MAP_SHARED | MAP_NORESERVE, -1, 0);
|
||||||
memset(tester->params.buffer, 0, tester->params.read_size + 1);
|
memset(tester->params.buffer, 0, tester->params.read_size + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -100,7 +105,7 @@ void run_func_test(reptester *tester, reptest_func func, const char *func_name,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (type == ALLOC_TYPE_WITH_MALLOC) {
|
if (type == ALLOC_TYPE_WITH_MALLOC) {
|
||||||
free(tester->params.buffer);
|
munmap(tester->params.buffer, tester->params.read_size + 1);
|
||||||
tester->params.buffer = buffer;
|
tester->params.buffer = buffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
Reference in New Issue
Block a user