Compare commits

...

43 Commits

Author SHA1 Message Date
94167e05fd Remove unused variables 2024-06-23 21:52:58 +01:00
7177add4ce Switch to using mmap and test unaligned loads 2024-06-23 21:10:14 +01:00
37d3340df9 Rename C++ files 2024-06-22 13:54:45 +01:00
fcdaf41495 Write to test buffer before working with it 2024-05-19 00:32:39 +01:00
cec0662e68 Add 256k cache test function 2024-05-19 00:10:18 +01:00
ba31dd9f8c Add cache testing functions 2024-05-06 22:55:44 +01:00
f355ab2d25 Update .gitignore 2024-05-06 22:55:32 +01:00
46ee06406f SIMD homework 2024-04-13 21:31:00 +01:00
5c97a99839 Execution ports homework 2024-04-13 13:56:29 +01:00
2cb6f1beb6 Complete homework for execution ports video 2024-02-11 00:42:14 +00:00
4945a298ac Add more assembly functions to repetition_testing 2024-02-10 17:54:42 +00:00
063183e46c Code alignment testing 2024-01-13 19:09:42 +00:00
12f25cfe51 Additional assembly loops 2023-12-03 23:24:52 +00:00
57acc5e16f Add assembly loops 2023-12-02 22:18:57 +00:00
43718ff047 Update release build flag 2023-12-02 20:28:04 +00:00
297d9c53f3 Fix repetition tester 2023-11-25 18:25:53 +00:00
b2cb252822 Remove .vscode and update .gitignore 2023-11-25 18:25:33 +00:00
a7d977210e Remove extraneous Windows functions 2023-09-24 19:11:10 +01:00
96ae35912f Wrap page fault count functions in os-agnostic function 2023-09-24 19:00:25 +01:00
dd512f8304 Fix Windows errors 2023-09-24 18:48:51 +01:00
389a494bfc Ensure clang-format doesn't change the include order of Windows headers 2023-09-24 18:46:14 +01:00
f18ecde7bc Add support for probing page fault behaviour on Windows 2023-09-24 17:35:23 +01:00
9104a41e2d Start probing page fault behaviour 2023-09-24 17:15:22 +01:00
7ce7101240 Add page fault stats to repetition tester 2023-09-23 23:55:46 +01:00
dca94a0edf Restructure of repetition tester 2023-09-23 23:06:38 +01:00
b7d33de2d7 Update comment 2023-09-23 22:44:13 +01:00
b1b90bc6f5 Change the testing function so it doesn't reallocate the main buffer 2023-09-18 22:28:48 +01:00
3a0917ed58 Test reading with and without malloc 2023-09-10 00:39:49 +01:00
4b905a56a5 Update .gitignore 2023-09-09 21:26:45 +01:00
967b1524d7 Update compile script 2023-09-09 21:26:35 +01:00
ab99d4b003 Update parser.c 2023-09-09 21:26:22 +01:00
22466ea56f Add time_in_seconds function 2023-09-09 21:25:57 +01:00
9ddb991b94 Basic repetition testing implementation 2023-09-09 21:25:32 +01:00
1bfc162845 Update profiler output 2023-09-03 00:35:58 +01:00
e461de30c0 Add data throughput calculation 2023-09-03 00:26:30 +01:00
19c02b4e99 Update the profiler to allow for different level of profiling 2023-07-23 16:36:21 +01:00
0e973feb38 Include the IDs from the update location 2023-07-23 16:36:07 +01:00
3af3a72472 Move the IDs to the processor 2023-07-23 16:35:35 +01:00
8e17765774 Update the timer to properly handle recursion and deep call stacks 2023-07-23 14:21:47 +01:00
f8cd7d253e Update .gitignore 2023-07-23 14:21:26 +01:00
46bc7e03a4 Add debug config for timer_test 2023-07-23 14:21:11 +01:00
419a7c8534 Ensure compile is executable 2023-07-23 11:33:07 +01:00
2d74f02138 Use the updated free_json and profile it 2023-07-12 00:45:29 +01:00
34 changed files with 2938 additions and 175 deletions

View File

@@ -1,2 +1,2 @@
all: all:
clang++ -g dasm.cpp -o dasm clang++ -g dasm.cc -o dasm

View File

@@ -1,2 +1,2 @@
all: all:
clang++ -g dasm.cpp -o dasm clang++ -g dasm.cc -o dasm

View File

@@ -1,2 +1,2 @@
all: all:
clang++ -g dasm.cpp -o dasm clang++ -g dasm.cc -o dasm

View File

@@ -1,7 +1,7 @@
CC=clang++ CC=clang++
CFLAGS=-g -O0 -Wall -Wextra CFLAGS=-g -O0 -Wall -Wextra
LIBS=-Wl,-rpath,./lib -L./lib -lsim86 LIBS=-Wl,-rpath,./lib -L./lib -lsim86
SRC=*.cpp SRC=*.cc
OUT=sim86 OUT=sim86
all: all:

View File

@@ -1,17 +1,17 @@
mk_haversine_fscanf: mk_haversine_fscanf:
clang++ -g cpp/fscanf.cpp cpp/haversine.cpp -o cpp/haverscan clang++ -g cpp/fscanf.cc cpp/haversine.cc -o cpp/haverscan
run_haversine_fscanf: run_haversine_fscanf:
cd ./cpp && ./haverscan cd ./cpp && ./haverscan
mk_haversine_strtok: mk_haversine_strtok:
clang++ -g cpp/strtok.cpp cpp/haversine.cpp -o cpp/haverstrtok clang++ -g cpp/strtok.cc cpp/haversine.cc -o cpp/haverstrtok
run_haversine_strtok: run_haversine_strtok:
cd ./cpp && ./haverstrtok cd ./cpp && ./haverstrtok
mk_test: mk_test:
clang++ -g -lpthread cpp/test.cpp cpp/haversine.cpp -o cpp/test clang++ -g -lpthread cpp/test.cc cpp/haversine.cc -o cpp/test
run_test: run_test:
cd ./cpp && ./test cd ./cpp && ./test

View File

@@ -1,7 +1,13 @@
.cache .cache
.vscode
.idea
compile_commands.json compile_commands.json
count_and_distances count_and_distances
pairs.json pairs.json
cache_test
main main
genhavr genhavr
prochavr prochavr
reptest
memtest
timer_test*

View File

@@ -1,35 +0,0 @@
{
"configurations": [
{
"name": "Debug clustered generator",
"type": "cppdbg",
"request": "launch",
"cwd": "${workspaceFolder}",
"program": "${workspaceFolder}/genhavr",
"args": [
"--cluster",
"10"
]
},
{
"name": "Debug uniform generator",
"type": "cppdbg",
"request": "launch",
"cwd": "${workspaceFolder}",
"program": "${workspaceFolder}/genhavr",
"args": [
"10"
]
},
{
"name": "Debug processor",
"type": "cppdbg",
"request": "launch",
"cwd": "${workspaceFolder}",
"program": "${workspaceFolder}/prochavr",
"args": [
"${workspaceFolder}/pairs.json"
]
}
]
}

86
haversine_02/compile Normal file → Executable file
View File

@@ -4,6 +4,12 @@ CC=clang
CXX=clang++ CXX=clang++
CFLAGS="-Wall -Wextra -I$(realpath ./include) " CFLAGS="-Wall -Wextra -I$(realpath ./include) "
ASM=nasm
ASM_FLAGS="-f elf64 "
AR=ar
AR_FLAGS="rcs"
# PARSE ARGUMENTS # PARSE ARGUMENTS
# From this StackOverflow answer https://stackoverflow.com/a/14203146 # From this StackOverflow answer https://stackoverflow.com/a/14203146
while [[ $# > 0 ]];do while [[ $# > 0 ]];do
@@ -12,8 +18,12 @@ while [[ $# > 0 ]];do
RELEASE=true RELEASE=true
shift shift
;; ;;
--enable-profiling) --basic-profiling)
ENABLE_PROFILING=true BASIC_PROFILING=true
shift
;;
--full-profiling)
FULL_PROFILING=true
shift shift
;; ;;
*|-*|--*) *|-*|--*)
@@ -26,45 +36,73 @@ done
# BUILD TYPE # BUILD TYPE
if [[ $RELEASE == true ]]; then if [[ $RELEASE == true ]]; then
CFLAGS+="-O3" CFLAGS+="-g -O1"
else else
CFLAGS+="-g" CFLAGS+="-g"
fi fi
# GENERATOR # GENERATOR
GENSRC="./src/generator/gen_argparser.cpp \ GENSRC="./src/generator/gen_argparser.cc \
./src/generator/generator.cpp \ ./src/generator/generator.cc \
./src/haversine.cpp \ ./src/haversine.cc \
./src/point_types.cpp \ ./src/point_types.cc \
./src/generator/main.cpp" ./src/generator/main.cc"
GENOUT=genhavr GENOUT=genhavr
(set -x ; $CXX $CFLAGS $GENSRC -o $GENOUT) (set -x ; $CXX $CFLAGS $GENSRC -o $GENOUT)
echo echo
# PROFILER # PROFILER
PROFSRC="../src/profiler/timer.c" PROFSRC="../src/profiler/timer.c"
PROFFLAGS="-c" PROFFLAGS="-c "
PROF_BUILD_DIR=prof_build PROF_BUILD_DIR=prof_build
# PROCESSOR # PROCESSOR
JSONSRC="../src/json/*.c" JSONSRC="../src/json/*.c "
JSONFLAGS="-c " JSONFLAGS="-c "
JSON_BUILD_DIR=json_build JSON_BUILD_DIR=json_build
PROCSRC="./$JSON_BUILD_DIR/*.o \ PROCSRC="./$JSON_BUILD_DIR/*.o \
./src/haversine.cpp \ ./src/haversine.cc \
./src/point_types.cpp \ ./src/point_types.cc \
./src/processor/proc_argparser.cpp \ ./src/processor/proc_argparser.cc \
./src/processor/main.cpp " ./src/processor/main.cc "
PROCOUT=prochavr PROCOUT=prochavr
if [[ $ENABLE_PROFILING == true ]]; then # MEMTESTER
JSONFLAGS+="-DENABLE_PROFILING" MEMTESTSRC="./src/memtester/*.c"
PROCSRC+="./$PROF_BUILD_DIR/*.o" MEMTESTOUT=memtest
PROCFLAGS="-DENABLE_PROFILING"
(set -x ; $CC $CFLAGS $MEMTESTSRC -o $MEMTESTOUT)
echo
# REPTEST ASSEMBLY
ASM_BUILD_DIR=reptest_build
ASM_SRC="./src/repetition_testing/reptest_functions.asm"
ASM_OBJ="./$ASM_BUILD_DIR/funcs.o"
ASM_LIB="./$ASM_BUILD_DIR/libfuncs.a"
mkdir $ASM_BUILD_DIR
(set -x ; $ASM $ASM_FLAGS $ASM_SRC -o $ASM_OBJ)
(set -x ; $AR $AR_FLAGS $ASM_LIB $ASM_OBJ)
echo
if [[ $BASIC_PROFILING == true ]] || [[ $FULL_PROFILING == true ]]; then
if [[ $FULL_PROFILING == true ]]; then
JSONFLAGS+="-DFULL_PROFILING"
PROCFLAGS="-DFULL_PROFILING"
REPTESTFLAGS="-DFULL_PROFILING"
PROFFLAGS+="-DFULL_PROFILING"
elif [[ $BASIC_PROFILING == true ]]; then
JSONFLAGS+="-DBASIC_PROFILING"
PROCFLAGS="-DBASIC_PROFILING"
REPTESTFLAGS="-DBASIC_PROFILING"
PROFFLAGS+="-DBASIC_PROFILING"
fi
PROCSRC+=./$PROF_BUILD_DIR/*.o
mkdir $PROF_BUILD_DIR mkdir $PROF_BUILD_DIR
cd $PROF_BUILD_DIR cd $PROF_BUILD_DIR
@@ -73,6 +111,13 @@ if [[ $ENABLE_PROFILING == true ]]; then
echo echo
cd ../ cd ../
# REPETITION TESTING
REPTESTSRC="./src/repetition_testing/*.cc ./$PROF_BUILD_DIR/*.o $ASM_LIB"
REPTESTOUT=reptest
(set -x ; $CXX $CFLAGS $REPTESTFLAGS $REPTESTSRC -o $REPTESTOUT)
echo
fi fi
mkdir $JSON_BUILD_DIR mkdir $JSON_BUILD_DIR
@@ -86,6 +131,5 @@ cd ../
(set -x ; $CXX $CFLAGS $PROCFLAGS $PROCSRC -o $PROCOUT) (set -x ; $CXX $CFLAGS $PROCFLAGS $PROCSRC -o $PROCOUT)
echo echo
# CLEAR BUILD FILES # CLEAR BUILD FILES
rm -rvf $JSON_BUILD_DIR $PROF_BUILD_DIR rm -rvf $JSON_BUILD_DIR $PROF_BUILD_DIR $ASM_BUILD_DIR

View File

@@ -6,15 +6,14 @@ enum profiler_ids {
PROFILER_ID_JSON_PARSE, PROFILER_ID_JSON_PARSE,
PROFILER_ID_READ_JSON_FILE, PROFILER_ID_READ_JSON_FILE,
PROFILER_ID_PARSER_SETUP, PROFILER_ID_PARSER_SETUP,
PROFILER_ID_LEX_GET_TOKEN, PROFILER_ID_PARSER_PARSE_TOKENS,
PROFILER_ID_PARSE_TOKEN,
PROFILER_ID_PARSER_TEAR_DOWN, PROFILER_ID_PARSER_TEAR_DOWN,
PROFILER_ID_LOAD_JSON_PAIRS, PROFILER_ID_LOAD_JSON_PAIRS,
PROFILER_ID_READ_BINARY, PROFILER_ID_READ_BINARY,
PROFILER_ID_HAVERSINE_SUM, PROFILER_ID_HAVERSINE_SUM,
PROFILER_ID_HAVERSINE_DISTANCE,
PROFILER_ID_HAVERSINE_AVG, PROFILER_ID_HAVERSINE_AVG,
PROFILER_ID_TEAR_DOWN, PROFILER_ID_TEAR_DOWN,
PROFILER_ID_FREE_JSON,
COUNT_PROFILER_IDS, COUNT_PROFILER_IDS,
}; };

View File

@@ -7,17 +7,31 @@
#define MAX_PROFILE_SAMPLES 1024 #define MAX_PROFILE_SAMPLES 1024
#endif // !MAX_PROFILE_SAMPLES #endif // !MAX_PROFILE_SAMPLES
#ifdef ENABLE_PROFILING #ifdef FULL_PROFILING
#define SAMPLE_START(ID, TITLE) sample_start(ID, TITLE)
#define SAMPLE_END(ID, BYTES) sample_end(ID, BYTES)
#define SAMPLE_END_DEFAULT(ID) sample_end(ID, 0)
#ifdef __cplusplus
extern "C" {
#endif
void sample_start(u64 id, const char *title);
void sample_end(u64 id, u64 byte_count);
#ifdef __cplusplus
}
#endif
#else
#define SAMPLE_START(ID, TITLE)
#define SAMPLE_END(ID, BYTES)
#define SAMPLE_END_DEFAULT(ID)
#endif // FULL_PROFILING
#if defined(BASIC_PROFILING) || defined(FULL_PROFILING)
#define PROFILE_START(COUNT) profile_start(COUNT) #define PROFILE_START(COUNT) profile_start(COUNT)
#define PROFILE_END profile_end() #define PROFILE_END profile_end()
#define SAMPLE_START(ID, TITLE) sample_start(ID, TITLE)
#define SAMPLE_END(ID) sample_end(ID)
#else
#define PROFILE_START(COUNT)
#define PROFILE_END
#define SAMPLE_START(ID, TITLE)
#define SAMPLE_END(ID)
#endif // ENABLE_PROFILING
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
@@ -27,10 +41,12 @@ typedef struct sample profiler_sample_t;
struct sample { struct sample {
const char *title; const char *title;
u64 first_start;
u64 start; u64 start;
u64 duration; u64 exclusive_time;
u64 children_duration; u64 children_time;
u64 hit_count; u64 hit_count;
u64 byte_count;
profiler_sample_t *parent; profiler_sample_t *parent;
}; };
@@ -45,14 +61,18 @@ u64 read_cpu_timer(void);
// CPU frequency in hz/sec // CPU frequency in hz/sec
u64 get_cpu_freq(u64 milliseconds); u64 get_cpu_freq(u64 milliseconds);
f64 time_in_seconds(u64 cpu_time, u64 cpu_freq);
void profile_start(u64 count); void profile_start(u64 count);
void profile_end(); void profile_end();
void sample_start(u64 id, const char *title);
void sample_end(u64 id);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif
#else
#define PROFILE_START(COUNT)
#define PROFILE_END
#endif // BASIC_PROFILING || FULL_PROFILING
#endif // !TIMER_H #endif // !TIMER_H

View File

@@ -0,0 +1,71 @@
#ifndef REPTESTER_H
#define REPTESTER_H
#include "aliases.h"
struct reptest_params {
const char *filename;
char *buffer;
u64 read_size;
u64 read_count;
};
struct reptest_results {
u64 bytes_read;
u64 read_time;
u64 page_faults;
};
struct time_stats {
u64 min_time;
u64 max_time;
u64 avg_time;
u64 total_time;
};
struct mem_stats {
u64 min_faults;
u64 max_faults;
u64 avg_faults;
u64 total_bytes;
u64 total_faults;
};
struct reptester {
reptest_params params;
const u64 cpu_freq;
f64 wait_time_secs;
f64 test_time_secs;
u64 test_start_time;
u64 current_run;
time_stats tstats;
mem_stats mstats;
reptest_results results;
};
enum alloc_type {
ALLOC_TYPE_WITHOUT_MALLOC,
ALLOC_TYPE_WITH_MALLOC,
COUNT_ALLOC_TYPE,
};
typedef void (*reptest_func)(reptester *tester, alloc_type type);
struct func_data {
const char *names[COUNT_ALLOC_TYPE];
reptest_func func;
};
void handle_alloc(reptester *tester, alloc_type type);
void handle_free(reptester *tester, alloc_type type);
void run_func_test(reptester *tester, reptest_func func, const char *func_name,
alloc_type type);
u64 page_fault_count();
void print_results(reptester *tester, const char *name);
#endif // !REPTESTER_H

View File

@@ -1,5 +1,7 @@
#include "json/json_entities.h" #include "json/json_entities.h"
#include "aliases.h" #include "aliases.h"
#include "processor/ids.h"
#include "profiler/timer.h"
#include "json/dstring.h" #include "json/dstring.h"
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
@@ -110,58 +112,90 @@ void print_json(const jentity_t *entity, u32 indent) {
} }
} }
void free_json(jentity_t **entity) { void free_json(jentity_t **root) {
if (!(*entity)) { if (!(*root)) {
return; return;
} }
jentity_t *entt_ptr = *entity; SAMPLE_START(PROFILER_ID_FREE_JSON, "FREE JSON");
jentity_t *current = *root;
jentity_t *temp = NULL;
dstr_t *key = NULL; dstr_t *key = NULL;
const jval_t *value = NULL; jval_t *value = NULL;
if (entt_ptr->type == JENTITY_SINGLE) { while (current) {
value = &(entt_ptr->value); if (current->parent) {
} else { // Move the beginning pointer of the collection to the next child
key = entt_ptr->pair.key;
value = &(entt_ptr->pair.value);
}
if (key) { // TODO (Abdelrahman): This part gets repeated for some elements. Try to
dstr_free(&(entt_ptr->pair.key)); // avoid that repetition
}
if (!value) { jentity_t *parent = current->parent;
return; jcoll_t *collection = NULL;
}
switch (value->type) { if (parent->type == JENTITY_SINGLE) {
case JVAL_COLLECTION: collection = parent->value.collection;
if (!(value->collection)) { } else {
collection = parent->pair.value.collection;
}
if (collection) {
collection->begin = current->next;
}
}
if (current->type == JENTITY_SINGLE) {
key = NULL;
value = &(current->value);
} else {
key = current->pair.key;
value = &(current->pair.value);
}
if (key) {
dstr_free(&(current->pair.key));
}
if (!value) {
break; break;
} }
if (value->collection->begin) { if (value->type == JVAL_COLLECTION) {
free_json(&(value->collection->begin)); if (!(value->collection->begin)) {
// Once all children of the collection has been freed, free the memory
// allocated to the collection and the entity that holds it
free(value->collection);
temp = current;
current = current->next != NULL ? current->next : current->parent;
free(temp);
temp = NULL;
continue;
}
current = value->collection->begin;
} else {
if (value->type == JVAL_STRING) {
dstr_free(&(value->string));
}
temp = current;
current = current->next != NULL ? current->next : current->parent;
free(temp);
temp = NULL;
} }
free(value->collection);
break;
case JVAL_STRING:
dstr_free(&(entt_ptr->pair.value.string));
break;
default:
break;
} }
if (entt_ptr->next) { *root = NULL;
free_json(&(entt_ptr->next));
}
free(*entity); SAMPLE_END_DEFAULT(PROFILER_ID_FREE_JSON);
*entity = NULL;
} }
jcoll_t *get_collection_from_entity(const jentity_t *entity) { jcoll_t *get_collection_from_entity(const jentity_t *entity) {

View File

@@ -1,6 +1,6 @@
#include "json/parser.h" #include "json/parser.h"
#include "aliases.h" #include "aliases.h"
#include "profiler/ids.h" #include "processor/ids.h"
#include "profiler/timer.h" #include "profiler/timer.h"
#include "json/dstring.h" #include "json/dstring.h"
#include "json/json_entities.h" #include "json/json_entities.h"
@@ -23,8 +23,6 @@ INTERNAL jentity_t *add_value(parser_t *parser);
INTERNAL void add_collection(parser_t *parser); INTERNAL void add_collection(parser_t *parser);
jentity_t *load_json(const char *filepath) { jentity_t *load_json(const char *filepath) {
SAMPLE_START(PROFILER_ID_READ_JSON_FILE, "READ JSON FILE");
FILE *fp = fopen(filepath, "r"); FILE *fp = fopen(filepath, "r");
if (!fp) { if (!fp) {
@@ -40,11 +38,13 @@ jentity_t *load_json(const char *filepath) {
char *json = (char *)malloc(sizeof(char) * (length + 1)); char *json = (char *)malloc(sizeof(char) * (length + 1));
memset(json, 0, length + 1); memset(json, 0, length + 1);
SAMPLE_START(PROFILER_ID_READ_JSON_FILE, "READ JSON FILE");
fread(json, sizeof(char), length, fp); fread(json, sizeof(char), length, fp);
fclose(fp); SAMPLE_END(PROFILER_ID_READ_JSON_FILE, length);
SAMPLE_END(PROFILER_ID_READ_JSON_FILE); fclose(fp);
SAMPLE_START(PROFILER_ID_PARSER_SETUP, "JSON PARSER SETUP"); SAMPLE_START(PROFILER_ID_PARSER_SETUP, "JSON PARSER SETUP");
@@ -63,23 +63,19 @@ jentity_t *load_json(const char *filepath) {
return NULL; return NULL;
} }
SAMPLE_END(PROFILER_ID_PARSER_SETUP); SAMPLE_END_DEFAULT(PROFILER_ID_PARSER_SETUP);
SAMPLE_START(PROFILER_ID_PARSER_PARSE_TOKENS, "PARSE TOKENS");
SAMPLE_START(PROFILER_ID_LEX_GET_TOKEN, "GET NEXT TOKEN");
lex_result_t result = get_next_token(lexer, json); lex_result_t result = get_next_token(lexer, json);
SAMPLE_END(PROFILER_ID_LEX_GET_TOKEN);
if (result.error.errno) { if (result.error.errno) {
printf("%s\n", result.error.msg); printf("%s\n", result.error.msg);
} else { } else {
while (result.token.type != TK_NO_TOKEN) { while (result.token.type != TK_NO_TOKEN) {
SAMPLE_START(PROFILER_ID_PARSE_TOKEN, "PARSE TOKEN");
parse_token(parser, result.token); parse_token(parser, result.token);
SAMPLE_END(PROFILER_ID_PARSE_TOKEN);
SAMPLE_START(PROFILER_ID_LEX_GET_TOKEN, "GET NEXT TOKEN");
result = get_next_token(lexer, NULL); result = get_next_token(lexer, NULL);
SAMPLE_END(PROFILER_ID_LEX_GET_TOKEN);
if (result.error.errno) { if (result.error.errno) {
printf("%s\n", result.error.msg); printf("%s\n", result.error.msg);
@@ -88,6 +84,8 @@ jentity_t *load_json(const char *filepath) {
} }
} }
SAMPLE_END_DEFAULT(PROFILER_ID_PARSER_PARSE_TOKENS);
jentity_t *root = parser->root; jentity_t *root = parser->root;
SAMPLE_START(PROFILER_ID_PARSER_TEAR_DOWN, "PARSER TEAR DOWN"); SAMPLE_START(PROFILER_ID_PARSER_TEAR_DOWN, "PARSER TEAR DOWN");
@@ -96,7 +94,7 @@ jentity_t *load_json(const char *filepath) {
lexer_free(&lexer); lexer_free(&lexer);
free(json); free(json);
SAMPLE_END(PROFILER_ID_PARSER_TEAR_DOWN); SAMPLE_END_DEFAULT(PROFILER_ID_PARSER_TEAR_DOWN);
return root; return root;
} }

View File

@@ -0,0 +1,125 @@
#include "aliases.h"
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#define PAGESIZE 4096
#if _WIN32
// clang-format off
#include <intrin.h>
#include <windows.h>
#include <psapi.h>
// clang-format on
typedef struct {
bool Initialized;
HANDLE ProcessHandle;
} os_metrics;
static os_metrics GlobalMetrics;
static u64 ReadWindowsPageFaultCount(void) {
PROCESS_MEMORY_COUNTERS_EX MemoryCounters = {0};
MemoryCounters.cb = sizeof(MemoryCounters);
GetProcessMemoryInfo(GlobalMetrics.ProcessHandle,
(PROCESS_MEMORY_COUNTERS *)&MemoryCounters,
sizeof(MemoryCounters));
u64 Result = MemoryCounters.PageFaultCount;
return Result;
}
static void InitializeOSMetrics(void) {
if (!GlobalMetrics.Initialized) {
GlobalMetrics.Initialized = true;
GlobalMetrics.ProcessHandle =
OpenProcess(PROCESS_QUERY_INFORMATION | PROCESS_VM_READ, FALSE,
GetCurrentProcessId());
}
}
#else // _WIN32
#include <sys/mman.h>
#include <sys/resource.h>
#include <sys/time.h>
typedef struct rusage rusage_t;
u64 nix_page_fault_count() {
rusage_t usage;
getrusage(RUSAGE_SELF, &usage);
return usage.ru_minflt + usage.ru_majflt;
}
#endif // _WIN32
u64 page_fault_count() {
#if _WIN32
return ReadWindowsPageFaultCount();
#else
return nix_page_fault_count();
#endif
}
int main(int argc, char *argv[]) {
if (argc < 2 || argc > 2) {
printf("Usage: %s [NUMBER OF PAGES TO ALLOCATE]\n", argv[0]);
return EXIT_FAILURE;
}
#if _WIN32
InitializeOSMetrics();
#endif
u64 page_count = atol(argv[1]);
u64 alloc_size = page_count * PAGESIZE;
u64 touch_size = 0;
printf("Page Count,Touch Count,Fault Count,Extra Faults\n");
for (u64 touch_count = 0; touch_count <= page_count; ++touch_count) {
touch_size = touch_count * PAGESIZE;
#if _WIN32
u8 *data = (u8 *)VirtualAlloc(0, alloc_size, MEM_RESERVE | MEM_COMMIT,
PAGE_READWRITE);
#else
u8 *data = (u8 *)mmap(NULL, alloc_size, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
#endif
if (!data) {
printf("Failed to allocate memory\n");
return EXIT_FAILURE;
}
u64 fault_start = page_fault_count();
for (u64 i = 0; i < touch_size; ++i) {
data[i] = (u8)i;
}
u64 fault_end = page_fault_count();
u64 faults = fault_end - fault_start;
printf("%llu,%llu,%llu,%lld\n", (unsigned long long)page_count,
(unsigned long long)touch_count, (unsigned long long)faults,
((long long)faults - touch_count));
#if _WIN32
VirtualFree(data, 0, MEM_RELEASE);
#else
munmap((void *)data, alloc_size);
#endif
}
return EXIT_SUCCESS;
}

View File

@@ -1,7 +1,7 @@
#include "haversine.h" #include "haversine.h"
#include "point_types.h" #include "point_types.h"
#include "processor/ids.h"
#include "processor/proc_argparser.h" #include "processor/proc_argparser.h"
#include "profiler/ids.h"
#include "profiler/timer.h" #include "profiler/timer.h"
#include "json/dstring.h" #include "json/dstring.h"
#include "json/json_entities.h" #include "json/json_entities.h"
@@ -19,7 +19,7 @@ int main(int argc, char *argv[]) {
SAMPLE_START(PROFILER_ID_CLI_PARSE, "CLI PARSING"); SAMPLE_START(PROFILER_ID_CLI_PARSE, "CLI PARSING");
ProcessorArgs args = parse_args(argc, argv); ProcessorArgs args = parse_args(argc, argv);
SAMPLE_END(PROFILER_ID_CLI_PARSE); SAMPLE_END_DEFAULT(PROFILER_ID_CLI_PARSE);
SAMPLE_START(PROFILER_ID_JSON_PARSE, "JSON PARSING"); SAMPLE_START(PROFILER_ID_JSON_PARSE, "JSON PARSING");
@@ -27,7 +27,7 @@ int main(int argc, char *argv[]) {
assert(root->type == JENTITY_SINGLE && root->value.type == JVAL_COLLECTION); assert(root->type == JENTITY_SINGLE && root->value.type == JVAL_COLLECTION);
SAMPLE_END(PROFILER_ID_JSON_PARSE); SAMPLE_END_DEFAULT(PROFILER_ID_JSON_PARSE);
SAMPLE_START(PROFILER_ID_LOAD_JSON_PAIRS, "LOAD JSON PAIRS"); SAMPLE_START(PROFILER_ID_LOAD_JSON_PAIRS, "LOAD JSON PAIRS");
@@ -62,7 +62,7 @@ int main(int argc, char *argv[]) {
point_pairs[index++] = p; point_pairs[index++] = p;
} }
SAMPLE_END(PROFILER_ID_LOAD_JSON_PAIRS); SAMPLE_END_DEFAULT(PROFILER_ID_LOAD_JSON_PAIRS);
SAMPLE_START(PROFILER_ID_READ_BINARY, "BINARY READ"); SAMPLE_START(PROFILER_ID_READ_BINARY, "BINARY READ");
@@ -76,7 +76,7 @@ int main(int argc, char *argv[]) {
fseek(fp, sizeof(u64), SEEK_SET); fseek(fp, sizeof(u64), SEEK_SET);
} }
SAMPLE_END(PROFILER_ID_READ_BINARY); SAMPLE_END_DEFAULT(PROFILER_ID_READ_BINARY);
SAMPLE_START(PROFILER_ID_HAVERSINE_SUM, "HAVERSINE SUM"); SAMPLE_START(PROFILER_ID_HAVERSINE_SUM, "HAVERSINE SUM");
@@ -84,9 +84,7 @@ int main(int argc, char *argv[]) {
f64 distance = 0.0; f64 distance = 0.0;
f64 saved_distance = 0.0; f64 saved_distance = 0.0;
for (u64 i = 0; i < pair_count; ++i) { for (u64 i = 0; i < pair_count; ++i) {
SAMPLE_START(PROFILER_ID_HAVERSINE_DISTANCE, "HAVERSINE DISTANCE");
distance = haversine_of_degrees(point_pairs[i], EARTH_RADIUS_KM); distance = haversine_of_degrees(point_pairs[i], EARTH_RADIUS_KM);
SAMPLE_END(PROFILER_ID_HAVERSINE_DISTANCE);
if (fp) { if (fp) {
fread(&saved_distance, sizeof(f64), 1, fp); fread(&saved_distance, sizeof(f64), 1, fp);
@@ -100,11 +98,11 @@ int main(int argc, char *argv[]) {
sum += distance; sum += distance;
} }
SAMPLE_END(PROFILER_ID_HAVERSINE_SUM); SAMPLE_END(PROFILER_ID_HAVERSINE_SUM, sizeof(f64) * pair_count);
SAMPLE_START(PROFILER_ID_HAVERSINE_AVG, "HAVERSINE AVERAGE"); SAMPLE_START(PROFILER_ID_HAVERSINE_AVG, "HAVERSINE AVERAGE");
printf("\nAVERAGE DISTANCE: %f\n", sum / pair_count); printf("\nAVERAGE DISTANCE: %f\n", sum / pair_count);
SAMPLE_END(PROFILER_ID_HAVERSINE_AVG); SAMPLE_END_DEFAULT(PROFILER_ID_HAVERSINE_AVG);
SAMPLE_START(PROFILER_ID_TEAR_DOWN, "TEAR DOWN"); SAMPLE_START(PROFILER_ID_TEAR_DOWN, "TEAR DOWN");
@@ -114,7 +112,9 @@ int main(int argc, char *argv[]) {
free(point_pairs); free(point_pairs);
SAMPLE_END(PROFILER_ID_TEAR_DOWN); free_json(&root);
SAMPLE_END_DEFAULT(PROFILER_ID_TEAR_DOWN);
PROFILE_END; PROFILE_END;

View File

@@ -1,11 +1,13 @@
#include "profiler/timer.h" #include "profiler/timer.h"
#include "aliases.h" #include "aliases.h"
#include <stdbool.h>
#include <stdio.h> #include <stdio.h>
#include <string.h> #include <string.h>
#include <time.h> #include <time.h>
#include <x86intrin.h> #include <x86intrin.h>
#if defined(BASIC_PROFILING) || defined(FULL_PROFILING)
typedef struct { typedef struct {
profiler_sample_t samples[MAX_PROFILE_SAMPLES]; profiler_sample_t samples[MAX_PROFILE_SAMPLES];
u64 cpu_freq; u64 cpu_freq;
@@ -36,12 +38,13 @@ u64 read_cpu_timer(void) { return __rdtsc(); }
u64 get_cpu_freq(u64 milliseconds) { u64 get_cpu_freq(u64 milliseconds) {
u64 os_freq = get_os_frequency(); u64 os_freq = get_os_frequency();
u64 os_start = get_os_time();
u64 cpu_start = read_cpu_timer();
u64 os_end = 0; u64 os_end = 0;
u64 os_elapsed = 0; u64 os_elapsed = 0;
u64 os_wait_time = os_freq * milliseconds / 1000; u64 os_wait_time = os_freq * milliseconds / 1000;
u64 os_start = get_os_time();
u64 cpu_start = read_cpu_timer();
while (os_elapsed < os_wait_time) { while (os_elapsed < os_wait_time) {
os_end = get_os_time(); os_end = get_os_time();
os_elapsed = os_end - os_start; os_elapsed = os_end - os_start;
@@ -59,8 +62,12 @@ u64 get_cpu_freq(u64 milliseconds) {
return cpu_freq; return cpu_freq;
} }
f64 time_in_seconds(u64 cpu_time, u64 cpu_freq) {
return (f64)cpu_time / cpu_freq;
}
void profile_start(u64 count) { void profile_start(u64 count) {
profiler.cpu_freq = get_cpu_freq(500); profiler.cpu_freq = get_cpu_freq(1000);
profiler.start = read_cpu_timer(); profiler.start = read_cpu_timer();
profiler.max_title_length = 0; profiler.max_title_length = 0;
profiler.size = count; profiler.size = count;
@@ -81,20 +88,27 @@ void profile_end() {
u16 time_precision = 16; u16 time_precision = 16;
u16 time_char_count = 20; u16 time_char_count = 20;
// clang-format off
printf("\n============================================================PROFILING============================================================\n");
// clang-format on
if (profiler.cpu_freq) {
printf("Total: %*.*f seconds, %zu (CPU frequency: %llu hz/sec)\n\n",
time_char_count, time_precision, (f64)total / profiler.cpu_freq,
total, (unsigned long long)profiler.cpu_freq);
}
#ifdef FULL_PROFILING
f64 byte_to_mb = 1.0 / (1024.0 * 1024.0);
f64 mb_to_gb = 1.0 / 1024.0;
u16 duration_char_count = 22; u16 duration_char_count = 22;
u16 hits_char_count = 10; u16 hits_char_count = 10;
u16 percentage_precision = 8; u16 percentage_precision = 8;
u16 percentage_char_count = 12; u16 percentage_char_count = 12;
u16 throughput_precision = 24;
// clang-format off u16 throughput_char_count = 32;
printf("\n==============================PROFILING==============================\n");
// clang-format on
if (profiler.cpu_freq) {
printf("Total: %*.*f seconds (CPU frequency: %llu hz/sec)\n\n",
time_char_count, time_precision, (f64)total / profiler.cpu_freq,
(unsigned long long)profiler.cpu_freq);
}
profiler_sample_t *sample = NULL; profiler_sample_t *sample = NULL;
@@ -105,27 +119,38 @@ void profile_end() {
continue; continue;
} }
u64 duration = 0;
if (sample->duration >= sample->children_duration) {
duration = sample->duration - sample->children_duration;
}
printf("%*s (hits: %*lld): %*lld (%*.*f %%", (i32)profiler.max_title_length, printf("%*s (hits: %*lld): %*lld (%*.*f %%", (i32)profiler.max_title_length,
sample->title, hits_char_count, sample->title, hits_char_count,
(unsigned long long)sample->hit_count, duration_char_count, (unsigned long long)sample->hit_count, duration_char_count,
(unsigned long long)duration, percentage_char_count, (unsigned long long)sample->exclusive_time, percentage_char_count,
percentage_precision, (f64)duration / total * 100.0); percentage_precision, (f64)(sample->exclusive_time) / total * 100.0);
if (sample->children_duration > 0) { if (sample->children_time > 0) {
printf(", w/ children: %*.*f %%", percentage_char_count, printf(", w/ children: %*.*f %%", percentage_char_count,
percentage_precision, (f64)sample->duration / total * 100.0); percentage_precision,
(f64)(sample->exclusive_time + sample->children_time) / total *
100.0);
}
if (sample->byte_count > 0) {
f64 data_read = (f64)(sample->byte_count) * byte_to_mb;
f64 sample_time_in_seconds =
(f64)(sample->exclusive_time + sample->children_time) /
profiler.cpu_freq;
printf(", Data read: %*.*f MB, Throughput: %*.*f GB/s",
throughput_char_count, throughput_precision, data_read,
throughput_char_count, throughput_precision,
data_read * mb_to_gb / sample_time_in_seconds);
} }
printf(")\n"); printf(")\n");
} }
#endif // FULL_PROFILING
} }
#endif // BASIC_PROFILING || FULL_PROFILING
#ifdef FULL_PROFILING
void sample_start(u64 id, const char *title) { void sample_start(u64 id, const char *title) {
if (id >= MAX_PROFILE_SAMPLES) { if (id >= MAX_PROFILE_SAMPLES) {
return; return;
@@ -135,10 +160,12 @@ void sample_start(u64 id, const char *title) {
if (!(sample->title) || strcmp(title, sample->title) != 0) { if (!(sample->title) || strcmp(title, sample->title) != 0) {
sample->title = title; sample->title = title;
sample->first_start = 0;
sample->start = 0; sample->start = 0;
sample->duration = 0; sample->exclusive_time = 0;
sample->children_duration = 0; sample->children_time = 0;
sample->hit_count = 0; sample->hit_count = 0;
sample->byte_count = 0;
sample->parent = NULL; sample->parent = NULL;
u64 length = strlen(sample->title); u64 length = strlen(sample->title);
@@ -149,9 +176,20 @@ void sample_start(u64 id, const char *title) {
} }
sample->start = read_cpu_timer(); sample->start = read_cpu_timer();
if (sample->hit_count == 0) {
sample->first_start = sample->start;
}
++(sample->hit_count); ++(sample->hit_count);
if (sample != profiler.active) { if (profiler.active) {
u64 duration = sample->start - (profiler.active->start);
profiler.active->exclusive_time += duration;
}
if (!(profiler.active) || sample != profiler.active) {
// This handles recursive functions by changing the parent only when a // This handles recursive functions by changing the parent only when a
// function isn't calling itself // function isn't calling itself
sample->parent = profiler.active; sample->parent = profiler.active;
@@ -160,7 +198,7 @@ void sample_start(u64 id, const char *title) {
profiler.active = sample; profiler.active = sample;
} }
void sample_end(u64 id) { void sample_end(u64 id, u64 byte_count) {
if (id >= MAX_PROFILE_SAMPLES) { if (id >= MAX_PROFILE_SAMPLES) {
return; return;
} }
@@ -169,17 +207,27 @@ void sample_end(u64 id) {
u64 duration = read_cpu_timer() - sample->start; u64 duration = read_cpu_timer() - sample->start;
if (!(sample->parent) || sample != sample->parent) { sample->exclusive_time += duration;
// This handles recursive functions by adding to the duration only when a sample->byte_count += byte_count;
// function isn't calling itself
sample->duration += duration;
}
if (sample->parent && sample != sample->parent) { u64 now = read_cpu_timer();
// This handles recursive functions by adding to the children_duration only
// when a function isn't calling itself // Reset the start time at the end of the sample to handle recursion
sample->parent->children_duration += duration; sample->start = now;
profiler_sample_t *parent = sample->parent;
if (parent) {
// Add sample duration to all parents. This handles deep call stacks
while (parent) {
parent->children_time += duration;
parent = parent->parent;
}
sample->parent->start = now;
} }
profiler.active = sample->parent; profiler.active = sample->parent;
} }
#endif // FULL_PROFILING

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,529 @@
global mov_all_bytes_asm
global nop_all_bytes_asm
global nop_1x3_all_bytes_asm
global nop_1x9_all_bytes_asm
global inc_all_bytes_asm
global dec_all_bytes_asm
global align64_loop
global align1_loop
global align15_loop
global align31_loop
global align63_loop
global align75_loop
global align90_loop
global align112_loop
global rat_add
global rat_mov_add
global read_1
global read_2
global read_3
global read_4
global read_8
global write_1
global write_2
global write_3
global write_4
global write_8
global read_1x2_low
global read_1x2_high
global read_2x2
global read_4x2
global read_8x2
global read_4x2_simd
global read_8x2_simd
global read_16x2_simd
global read_32x2_simd_offset
global read_32x2_simd_no_offset
global read_16x4_simd
global read_32x4_simd
global cache_test ; Expects 3 inputs (pointer, read_count, mask)
global cache_test_unaligned ; Expects 3 inputs (pointer, read_count, mask)
mov_all_bytes_asm:
xor rax, rax
.loop:
mov BYTE [rdi + rax * 1], al
inc rax
cmp rsi, rax
jne .loop
ret
nop_all_bytes_asm:
xor rax, rax
.loop:
db 0x0f, 0x1f, 0x00
inc rax
cmp rdi, rax
jne .loop
ret
nop_1x3_all_bytes_asm:
xor rax, rax
.loop:
nop
nop
nop
inc rax
cmp rdi, rax
jne .loop
ret
nop_1x9_all_bytes_asm:
xor rax, rax
.loop:
nop
nop
nop
nop
nop
nop
nop
nop
nop
inc rax
cmp rdi, rax
jne .loop
ret
inc_all_bytes_asm:
xor rax, rax
.loop:
inc rax
cmp rdi, rax
jne .loop
ret
dec_all_bytes_asm:
.loop:
dec rdi
jnz .loop
ret
align64_loop:
xor rax, rax
align 64
.loop:
inc rax
cmp rdi, rax
jne .loop
ret
align1_loop:
xor rax, rax
align 64
nop
.loop:
inc rax
cmp rdi, rax
jne .loop
ret
align15_loop:
xor rax, rax
align 64
%rep 15
nop
%endrep
.loop:
inc rax
cmp rdi, rax
jne .loop
ret
align31_loop:
xor rax, rax
align 64
%rep 31
nop
%endrep
.loop:
inc rax
cmp rdi, rax
jne .loop
ret
align63_loop:
xor rax, rax
align 64
%rep 63
nop
%endrep
.loop:
inc rax
cmp rdi, rax
jne .loop
ret
align75_loop:
xor rax, rax
align 64
%rep 75
nop
%endrep
.loop:
inc rax
cmp rdi, rax
jne .loop
ret
align90_loop:
xor rax, rax
align 64
%rep 90
nop
%endrep
.loop:
inc rax
cmp rdi, rax
jne .loop
ret
align112_loop:
xor rax, rax
align 64
%rep 112
nop
%endrep
.loop:
inc rax
cmp rdi, rax
jne .loop
ret
rat_add:
mov rax, rdi
.loop:
add rcx, 1
add rcx, 1
dec rax
jnz .loop
ret
rat_mov_add:
mov rax, rdi
.loop:
mov rcx, rax
add rcx, 1
mov rcx, rax
add rcx, 1
dec rax
jnz .loop
ret
read_1:
align 64
.loop:
mov rax, [rdi]
sub rsi, 1
jnle .loop
ret
read_2:
align 64
.loop:
%rep 2
mov rax, [rdi]
%endrep
sub rsi, 2
jnle .loop
ret
read_3:
align 64
.loop:
%rep 3
mov rax, [rdi]
%endrep
sub rsi, 3
jnle .loop
ret
read_4:
align 64
.loop:
%rep 4
mov rax, [rdi]
%endrep
sub rsi, 4
jnle .loop
ret
read_8:
align 64
.loop:
%rep 8
mov rax, [rdi]
%endrep
sub rsi, 8
jnle .loop
ret
write_1:
align 64
.loop:
mov QWORD [rdi], 0
sub rsi, 1
jnle .loop
ret
write_2:
align 64
.loop:
%rep 2
mov QWORD [rdi], 0
%endrep
sub rsi, 2
jnle .loop
ret
write_3:
align 64
.loop:
%rep 3
mov QWORD [rdi], 0
%endrep
sub rsi, 3
jnle .loop
ret
write_4:
align 64
.loop:
%rep 4
mov QWORD [rdi], 0
%endrep
sub rsi, 4
jnle .loop
ret
write_8:
align 64
.loop:
%rep 8
mov QWORD [rdi], 0
%endrep
sub rsi, 8
jnle .loop
ret
read_1x2_low:
align 64
.loop:
%rep 2
mov al, [rdi]
%endrep
sub rsi, 2
jnle .loop
ret
read_1x2_high:
align 64
.loop:
%rep 2
mov ah, [rdi]
%endrep
sub rsi, 2
jnle .loop
ret
read_2x2:
align 64
.loop:
%rep 2
mov ax, [rdi]
%endrep
sub rsi, 2
jnle .loop
ret
read_4x2:
align 64
.loop:
%rep 2
mov eax, [rdi]
%endrep
sub rsi, 2
jnle .loop
ret
read_8x2:
align 64
.loop:
%rep 2
mov rax, [rdi]
%endrep
sub rsi, 2
jnle .loop
ret
read_4x2_simd:
xor rax, rax
align 64
.loop:
mov r8d, [rdi]
mov r8d, [rdi + 4]
add rax, 8
cmp rax, rsi
jb .loop
ret
read_8x2_simd:
xor rax, rax
align 64
.loop:
mov r8, [rdi]
mov r8, [rdi + 8]
add rax, 16
cmp rax, rsi
jb .loop
ret
read_16x2_simd:
xor rax, rax
align 64
.loop:
vmovdqu xmm0, [rdi]
vmovdqu xmm0, [rdi + 16]
add rax, 32
cmp rax, rsi
jb .loop
ret
read_32x2_simd_offset:
xor rax, rax
align 64
.loop:
vmovdqu ymm0, [rdi]
vmovdqu ymm0, [rdi + 32]
add rax, 64
cmp rax, rsi
jb .loop
ret
read_32x2_simd_no_offset:
xor rax, rax
align 64
.loop:
vmovdqu ymm0, [rdi]
vmovdqu ymm0, [rdi]
add rax, 64
cmp rax, rsi
jb .loop
ret
read_16x4_simd:
xor rax, rax
align 64
.loop:
%rep 2
vmovdqu xmm0, [rdi]
vmovdqu xmm0, [rdi + 16]
%endrep
add rax, 64
cmp rax, rsi
jb .loop
ret
read_32x4_simd:
xor rax, rax
align 64
.loop:
%rep 2
vmovdqu ymm0, [rdi]
vmovdqu ymm0, [rdi]
%endrep
add rax, 128
cmp rax, rsi
jb .loop
ret
cache_test:
xor r10, r10 ; Zero loop counter
mov rbx, rdi ; Save original pointer
.loop:
add rdi, r10 ; Advance the pointer
add r10, 128 ; Increment loop counter
and r10, rdx ; Mask offset
vmovdqu ymm0, [rdi + 0]
vmovdqu ymm1, [rdi + 32]
vmovdqu ymm2, [rdi + 64]
vmovdqu ymm3, [rdi + 96]
mov rdi, rbx ; Restore original pointer
sub rsi, 128 ; Decrement count
ja .loop
ret
cache_test_unaligned:
xor r10, r10 ; Zero loop counter
add rdi, 5 ; Unalign pointer
mov rbx, rdi ; Save original pointer
.loop:
add rdi, r10 ; Advance the pointer
add r10, 128 ; Increment loop counter
and r10, rdx ; Mask offset
vmovdqu ymm0, [rdi + 0]
vmovdqu ymm1, [rdi + 32]
vmovdqu ymm2, [rdi + 64]
vmovdqu ymm3, [rdi + 96]
mov rdi, rbx ; Restore original pointer
sub rsi, 128 ; Decrement count
ja .loop
ret

View File

@@ -0,0 +1,168 @@
#include "repetition_testing/reptester.h"
#include "profiler/timer.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/resource.h>
#include <sys/time.h>
void handle_alloc(reptester *tester, alloc_type type) {
switch (type) {
case ALLOC_TYPE_WITH_MALLOC:
if (!(tester->params.buffer)) {
tester->params.buffer = (char *)mmap(
NULL, tester->params.read_size + 1, PROT_READ | PROT_WRITE,
MAP_ANON | MAP_SHARED | MAP_NORESERVE, -1, 0);
memset(tester->params.buffer, 0, tester->params.read_size + 1);
}
break;
default:
break;
}
}
void handle_free(reptester *tester, alloc_type type) {
switch (type) {
case ALLOC_TYPE_WITH_MALLOC:
if (tester->params.buffer) {
munmap(tester->params.buffer, tester->params.read_size + 1);
tester->params.buffer = NULL;
}
break;
default:
break;
}
}
void run_func_test(reptester *tester, reptest_func func, const char *func_name,
alloc_type type) {
tester->test_start_time = read_cpu_timer();
tester->test_time_secs = 0.0;
tester->current_run = 1;
tester->tstats = {
UINT64_MAX, // min_time
0, // max_time
0, // avg_time
0, // total_time
};
tester->mstats = {
UINT64_MAX, // min_faults
0, // max_faults
0, // avg_faults
0, // total_bytes
0, // total_faults
};
tester->results = {};
char *buffer = NULL;
if (type == ALLOC_TYPE_WITH_MALLOC) {
buffer = tester->params.buffer;
tester->params.buffer =
(char *)mmap(NULL, tester->params.read_size + 1, PROT_READ | PROT_WRITE,
MAP_ANON | MAP_SHARED | MAP_NORESERVE, -1, 0);
memset(tester->params.buffer, 0, tester->params.read_size + 1);
}
while (tester->test_time_secs <= tester->wait_time_secs) {
func(tester, type);
if (tester->results.bytes_read <
tester->params.read_size * tester->params.read_count) {
printf("Failed to read the entire file (Total size: %lu, Bytes read: "
"%lu)\n",
tester->params.read_size, tester->results.bytes_read);
return;
}
tester->tstats.total_time += tester->results.read_time;
tester->mstats.total_bytes += tester->results.bytes_read;
tester->mstats.total_faults += tester->results.page_faults;
if (tester->results.read_time > tester->tstats.max_time) {
tester->tstats.max_time = tester->results.read_time;
tester->mstats.max_faults = tester->results.page_faults;
} else if (tester->results.read_time < tester->tstats.min_time) {
tester->test_start_time = read_cpu_timer();
tester->tstats.min_time = tester->results.read_time;
tester->mstats.min_faults = tester->results.page_faults;
}
if (tester->results.page_faults > tester->mstats.max_faults) {
tester->mstats.max_faults = tester->results.page_faults;
} else if (tester->results.page_faults < tester->mstats.min_faults) {
tester->mstats.min_faults = tester->results.page_faults;
}
tester->test_time_secs = time_in_seconds(
read_cpu_timer() - tester->test_start_time, tester->cpu_freq);
++(tester->current_run);
}
if (type == ALLOC_TYPE_WITH_MALLOC) {
munmap(tester->params.buffer, tester->params.read_size + 1);
tester->params.buffer = buffer;
}
print_results(tester, func_name);
}
u64 page_fault_count() {
rusage usage;
getrusage(RUSAGE_SELF, &usage);
return usage.ru_minflt + usage.ru_majflt;
}
void print_results(reptester *tester, const char *name) {
f64 kb = 1024.0;
f64 gb = kb * kb * kb;
f64 size_in_kb =
(f64)(tester->params.read_size * tester->params.read_count) / kb;
f64 size_in_gb =
(f64)(tester->params.read_size * tester->params.read_count) / gb;
u64 run_count = tester->current_run - 1;
tester->tstats.avg_time = tester->tstats.total_time / run_count;
tester->mstats.avg_faults = tester->mstats.total_faults / run_count;
printf("\n%s: %lu runs\n", name, run_count);
printf("MIN: %lu (%fGB/s)", tester->tstats.min_time,
size_in_gb /
time_in_seconds(tester->tstats.min_time, tester->cpu_freq));
if (tester->mstats.min_faults > 0) {
printf(", FAULTS: %lu (%fK/fault)\n", tester->mstats.min_faults,
size_in_kb / tester->mstats.min_faults);
} else {
printf("\n");
}
printf("MAX: %lu (%fGB/s)", tester->tstats.max_time,
size_in_gb /
time_in_seconds(tester->tstats.max_time, tester->cpu_freq));
if (tester->mstats.max_faults > 0) {
printf(", FAULTS: %lu (%fK/fault)\n", tester->mstats.max_faults,
size_in_kb / tester->mstats.max_faults);
} else {
printf("\n");
}
printf("AVG: %lu (%fGB/s)", tester->tstats.avg_time,
size_in_gb /
time_in_seconds(tester->tstats.avg_time, tester->cpu_freq));
if (tester->mstats.avg_faults > 0) {
printf(", FAULTS: %lu (%fK/fault)\n", tester->mstats.avg_faults,
tester->mstats.total_bytes / kb / tester->mstats.avg_faults);
} else {
printf("\n");
}
}