Compare commits

...

74 Commits

Author SHA1 Message Date
94167e05fd Remove unused variables 2024-06-23 21:52:58 +01:00
7177add4ce Switch to using mmap and test unaligned loads 2024-06-23 21:10:14 +01:00
37d3340df9 Rename C++ files 2024-06-22 13:54:45 +01:00
fcdaf41495 Write to test buffer before working with it 2024-05-19 00:32:39 +01:00
cec0662e68 Add 256k cache test function 2024-05-19 00:10:18 +01:00
ba31dd9f8c Add cache testing functions 2024-05-06 22:55:44 +01:00
f355ab2d25 Update .gitignore 2024-05-06 22:55:32 +01:00
46ee06406f SIMD homework 2024-04-13 21:31:00 +01:00
5c97a99839 Execution ports homework 2024-04-13 13:56:29 +01:00
2cb6f1beb6 Complete homework for execution ports video 2024-02-11 00:42:14 +00:00
4945a298ac Add more assembly functions to repetition_testing 2024-02-10 17:54:42 +00:00
063183e46c Code alignment testing 2024-01-13 19:09:42 +00:00
12f25cfe51 Additional assembly loops 2023-12-03 23:24:52 +00:00
57acc5e16f Add assembly loops 2023-12-02 22:18:57 +00:00
43718ff047 Update release build flag 2023-12-02 20:28:04 +00:00
297d9c53f3 Fix repetition tester 2023-11-25 18:25:53 +00:00
b2cb252822 Remove .vscode and update .gitignore 2023-11-25 18:25:33 +00:00
a7d977210e Remove extraneous Windows functions 2023-09-24 19:11:10 +01:00
96ae35912f Wrap page fault count functions in os-agnostic function 2023-09-24 19:00:25 +01:00
dd512f8304 Fix Windows errors 2023-09-24 18:48:51 +01:00
389a494bfc Ensure clang-format doesn't change the include order of Windows headers 2023-09-24 18:46:14 +01:00
f18ecde7bc Add support for probing page fault behaviour on Windows 2023-09-24 17:35:23 +01:00
9104a41e2d Start probing page fault behaviour 2023-09-24 17:15:22 +01:00
7ce7101240 Add page fault stats to repetition tester 2023-09-23 23:55:46 +01:00
dca94a0edf Restructure of repetition tester 2023-09-23 23:06:38 +01:00
b7d33de2d7 Update comment 2023-09-23 22:44:13 +01:00
b1b90bc6f5 Change the testing function so it doesn't reallocate the main buffer 2023-09-18 22:28:48 +01:00
3a0917ed58 Test reading with and without malloc 2023-09-10 00:39:49 +01:00
4b905a56a5 Update .gitignore 2023-09-09 21:26:45 +01:00
967b1524d7 Update compile script 2023-09-09 21:26:35 +01:00
ab99d4b003 Update parser.c 2023-09-09 21:26:22 +01:00
22466ea56f Add time_in_seconds function 2023-09-09 21:25:57 +01:00
9ddb991b94 Basic repetition testing implementation 2023-09-09 21:25:32 +01:00
1bfc162845 Update profiler output 2023-09-03 00:35:58 +01:00
e461de30c0 Add data throughput calculation 2023-09-03 00:26:30 +01:00
19c02b4e99 Update the profiler to allow for different level of profiling 2023-07-23 16:36:21 +01:00
0e973feb38 Include the IDs from the update location 2023-07-23 16:36:07 +01:00
3af3a72472 Move the IDs to the processor 2023-07-23 16:35:35 +01:00
8e17765774 Update the timer to properly handle recursion and deep call stacks 2023-07-23 14:21:47 +01:00
f8cd7d253e Update .gitignore 2023-07-23 14:21:26 +01:00
46bc7e03a4 Add debug config for timer_test 2023-07-23 14:21:11 +01:00
419a7c8534 Ensure compile is executable 2023-07-23 11:33:07 +01:00
2d74f02138 Use the updated free_json and profile it 2023-07-12 00:45:29 +01:00
0360a2da35 Compile json code with profiler when profiling is enabled 2023-07-09 22:16:16 +01:00
43ec97378c Use numerical IDs for the profiler 2023-07-09 22:15:53 +01:00
e17ca4d3d2 Update debug config 2023-07-09 22:15:31 +01:00
6b9a7ab8a5 Update the timer code to support profiling loops and recursive functions 2023-07-09 22:13:06 +01:00
a118afaffb Add profiling code to the json parser 2023-07-09 22:12:32 +01:00
f11d4481a2 Add extra checks for NULL pointers 2023-07-09 22:11:32 +01:00
5e84e270bc Added macros for profiling functions and made it possible to compile the
profiling code out
2023-07-09 04:01:56 +01:00
c053d20a8f Use sample_start and sample_end pairs instead of the PROFILER_SAMPLE
macro
2023-07-09 01:52:16 +01:00
0073114723 Refactor profiling code 2023-07-09 00:55:22 +01:00
2e9c2dc6d5 Use profiling functions to time the haversine processor 2023-07-03 22:46:24 +01:00
f0380ce638 Added profiling functions 2023-07-03 22:46:10 +01:00
25d38c24b1 Update build files 2023-07-02 21:57:05 +01:00
99063fc700 Create the processor 2023-07-02 20:56:25 +01:00
6b27c86a18 Make sum a double instead of integer 2023-07-02 20:56:02 +01:00
1bb16971b2 Read the json file into a dynamically-allocated array to avoid stack
overflow
2023-07-02 20:55:31 +01:00
09ed32e41a Write the pairs to json with better precision 2023-07-02 20:55:08 +01:00
31d02aff8c Move EARTH_RADIUS_KM to haversine.h 2023-07-02 19:58:47 +01:00
d284a7edc8 Update .gitignore 2023-07-02 19:29:44 +01:00
9a686a2692 Add the json parser 2023-07-02 19:29:32 +01:00
10f1d5686e Update the compilation process 2023-07-02 19:29:05 +01:00
50f881c655 Reorganise the project 2023-07-02 19:28:48 +01:00
301ea44759 Remove parser binary from version control 2023-06-11 02:15:43 +01:00
c75e51aa3e Reorganise and start working on the json parser 2023-06-11 02:14:41 +01:00
5ff93f61d7 Switch to using argp 2023-05-29 19:49:49 +01:00
5c742fe1f7 Implement haversine generator 2023-05-28 23:29:56 +01:00
66aa3500d6 Rename haversine to haversine_01 2023-05-28 23:29:56 +01:00
2900e508a1 Completed homework for 8086 simulator 2023-04-16 14:29:54 -07:00
60538301d5 Defined mem_start 2023-04-15 23:11:10 -07:00
e9d7a608f0 Removed the binary from source control 2023-04-15 23:10:08 -07:00
d980153185 Read the instructions into simulated memory instead of separate buffer 2023-04-15 23:08:14 -07:00
90128fae8f Added sim86 binary to .gitignore 2023-04-15 23:07:08 -07:00
58 changed files with 6243 additions and 207 deletions

View File

@@ -1,2 +1,2 @@
all:
clang++ -g dasm.cpp -o dasm
clang++ -g dasm.cc -o dasm

View File

@@ -1,2 +1,2 @@
all:
clang++ -g dasm.cpp -o dasm
clang++ -g dasm.cc -o dasm

View File

@@ -1,2 +1,2 @@
all:
clang++ -g dasm.cpp -o dasm
clang++ -g dasm.cc -o dasm

8
8086_sim/.gitignore vendored
View File

@@ -1,3 +1,4 @@
sim86
listing_0043_immediate_movs
listing_0044_register_movs
listing_0045_challenge_register_movs
@@ -6,3 +7,10 @@ listing_0047_challenge_flags
listing_0048_ip_register
listing_0049_conditional_jumps
listing_0050_challenge_jumps
listing_0051_memory_mov
listing_0052_memory_add_loop
listing_0053_add_loop_challenge
listing_0054_draw_rectangle
test.asm
test
image.data

View File

@@ -1,7 +1,7 @@
CC=clang++
CFLAGS=-g -O0 -Wall -Wextra
LIBS=-Wl,-rpath,./lib -L./lib -lsim86
SRC=*.cpp
SRC=*.cc
OUT=sim86
all:

View File

@@ -0,0 +1,30 @@
; ========================================================================
;
; (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved.
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Please see https://computerenhance.com for further information
;
; ========================================================================
; ========================================================================
; LISTING 51
; ========================================================================
bits 16
mov word [1000], 1
mov word [1002], 2
mov word [1004], 3
mov word [1006], 4
mov bx, 1000
mov word [bx + 4], 10
mov bx, word [1000]
mov cx, word [1002]
mov dx, word [1004]
mov bp, word [1006]

View File

@@ -0,0 +1,36 @@
; ========================================================================
;
; (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved.
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Please see https://computerenhance.com for further information
;
; ========================================================================
; ========================================================================
; LISTING 52
; ========================================================================
bits 16
mov dx, 6
mov bp, 1000
mov si, 0
init_loop_start:
mov word [bp + si], si
add si, 2
cmp si, dx
jnz init_loop_start
mov bx, 0
mov si, 0
add_loop_start:
mov cx, word [bp + si]
add bx, cx
add si, 2
cmp si, dx
jnz add_loop_start

View File

@@ -0,0 +1,35 @@
; ========================================================================
;
; (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved.
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Please see https://computerenhance.com for further information
;
; ========================================================================
; ========================================================================
; LISTING 53
; ========================================================================
bits 16
mov dx, 6
mov bp, 1000
mov si, 0
init_loop_start:
mov word [bp + si], si
add si, 2
cmp si, dx
jnz init_loop_start
mov bx, 0
mov si, dx
sub bp, 2
add_loop_start:
add bx, word [bp + si]
sub si, 2
jnz add_loop_start

View File

@@ -0,0 +1,43 @@
; ========================================================================
;
; (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved.
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Please see https://computerenhance.com for further information
;
; ========================================================================
; ========================================================================
; LISTING 54
; ========================================================================
bits 16
; Start image after one row, to avoid overwriting our code!
mov bp, 64*4
mov dx, 0
y_loop_start:
mov cx, 0
x_loop_start:
; Fill pixel
mov word [bp + 0], cx ; Red
mov word [bp + 2], dx ; Blue
mov byte [bp + 3], 255 ; Alpha
; Advance pixel location
add bp, 4
; Advance X coordinate and loop
add cx, 1
cmp cx, 64
jnz x_loop_start
; Advance Y coordinate and loop
add dx, 1
cmp dx, 64
jnz y_loop_start

Binary file not shown.

357
8086_sim/sim86.cc Normal file
View File

@@ -0,0 +1,357 @@
#include "include/aliases.h"
#include "include/flag_access.h"
#include "include/reg_access.h"
#include "include/sim86_instruction.h"
#include "include/sim86_lib.h"
#include <bits/types/FILE.h>
#include <stdio.h>
#include <string.h>
#define MEM_SIZE (1 << 16)
#define BITS_PER_BYTE 8
struct basic_string {
char str[4096];
};
struct membuf {
u8 buffer[MEM_SIZE];
u64 mem_start;
};
struct mem_access_result {
u16 value;
u32 error;
};
u16 get_operand_value(instruction_operand operand, bool wide);
basic_string get_operand_string(instruction_operand operand, bool wide);
void print_instruction(instruction inst);
void mov_to_register(const register_access &reg,
const instruction_operand &source, bool wide);
void mov_to_memory(const effective_address_expression &addrexp,
const instruction_operand &source, bool wide);
mem_access_result get_mem_value(const effective_address_expression &addrexp,
bool wide);
mem_access_result set_mem_value(const effective_address_expression &addrexp,
u16 value, bool wide);
u16 get_mem_index(const effective_address_expression &addrexp);
static membuf memory;
int main(int argc, char *argv[]) {
if (argc < 2) {
printf("Usage: sim86 BINARY_FILE\n");
return 1;
}
memset((void *)memory.buffer, 0, MEM_SIZE);
memory.mem_start = 0;
const char *filename = argv[1];
printf("Filename: %s\n", filename);
FILE *fp = fopen(filename, "rb");
if (!fp) {
printf("Failed to open file %s\n", filename);
}
fseek(fp, 0, SEEK_END);
u32 size = ftell(fp);
fseek(fp, 0, SEEK_SET);
fread((void *)memory.buffer, sizeof(u8), size, fp);
memory.mem_start = size + 1;
fclose(fp);
instruction_table table;
Sim86_Get8086InstructionTable(&table);
u32 offset = 0;
bool accessed_registers[REGISTER_COUNT] = {false};
printf("\nDisassembly:\n");
while (offset < size) {
instruction decoded;
Sim86_Decode8086Instruction(size - offset, memory.buffer + offset,
&decoded);
if (decoded.Op) {
offset += decoded.Size;
bool wide = (decoded.Flags & Inst_Wide) == Inst_Wide;
print_instruction(decoded);
instruction_operand dest = decoded.Operands[0];
instruction_operand source = decoded.Operands[1];
switch (decoded.Op) {
case Op_mov: {
if (dest.Type == Operand_Register) {
mov_to_register(dest.Register, source, wide);
accessed_registers[dest.Register.Index] = true;
} else if (dest.Type == Operand_Memory) {
mov_to_memory(dest.Address, source, wide);
}
break;
}
case Op_add: {
if (dest.Type == Operand_Register) {
u16 value = get_register(dest.Register);
value += get_operand_value(source, wide);
set_flags(value);
set_register(dest.Register, value);
}
break;
}
case Op_sub:
case Op_cmp: {
if (dest.Type == Operand_Register) {
u16 value = get_register(dest.Register);
value -= get_operand_value(source, wide);
set_flags(value);
if (decoded.Op == Op_sub) {
set_register(dest.Register, value);
}
}
break;
}
case Op_jne: {
if (!get_flag(FLAG_ZERO)) {
i16 inst_offset = get_operand_value(dest, wide);
offset += inst_offset;
}
}
default:
break;
}
}
}
printf("\nFinal registers:\n");
for (u32 i = 0; i < REGISTER_COUNT; ++i) {
if (accessed_registers[i]) {
register_access reg = {i, 0, 2};
u16 value = get_register(reg);
printf("\t%s: 0x%04x (%d)\n", get_register_name(reg), value, value);
}
}
// Print the instruction pointer register
printf("\tip: 0x%04x (%d)\n", offset, offset);
printf("\nFinal flags:\n");
print_flags();
#if 0 // Only needed (and working) for listing 0054
#define SIZE 64
#define BYTES SIZE * 4 * SIZE
u8 image[BYTES];
mempcpy(image, &(memory.buffer[memory.mem_start + (SIZE * 4)]), BYTES);
FILE *out = fopen("image.data", "wb");
fwrite(image, sizeof(u8), BYTES, out);
fclose(out);
#endif
return 0;
}
u16 get_operand_value(instruction_operand operand, bool wide) {
u16 output = 0;
switch (operand.Type) {
case Operand_Register:
output = get_register(operand.Register);
break;
case Operand_Memory: {
mem_access_result result = get_mem_value(operand.Address, wide);
if (result.error) {
break;
}
output = result.value;
break;
}
case Operand_Immediate:
output = operand.Immediate.Value;
break;
default:
break;
}
return output;
}
basic_string get_operand_string(instruction_operand operand, bool wide) {
basic_string output = {""};
switch (operand.Type) {
case Operand_Register:
sprintf(output.str, "%s", get_register_name(operand.Register));
break;
case Operand_Memory: {
char mem_string[1024] = {0};
register_access reg1 = operand.Address.Terms[0].Register;
if (reg1.Index != 0) {
sprintf(mem_string, "%s + ", get_register_name(reg1));
}
register_access reg2 = operand.Address.Terms[1].Register;
if (reg2.Index != 0) {
strcat(mem_string, get_register_name(reg2));
} else {
u32 length = strlen(mem_string);
sprintf(&(mem_string[length]), "%d", operand.Address.Displacement);
}
sprintf(output.str, "%s [%s]", wide ? "word" : "byte", mem_string);
break;
}
case Operand_Immediate:
sprintf(output.str, "%d", operand.Immediate.Value);
break;
default:
break;
}
return output;
}
void print_instruction(instruction inst) {
bool wide = (inst.Flags & Inst_Wide) == Inst_Wide;
printf("\t%s %s, %s\n", Sim86_MnemonicFromOperationType(inst.Op),
get_operand_string(inst.Operands[0], wide).str,
get_operand_string(inst.Operands[1], wide).str);
}
void mov_to_register(const register_access &reg,
const instruction_operand &source, bool wide) {
switch (source.Type) {
case Operand_Immediate:
set_register(reg, source.Immediate.Value);
break;
case Operand_Register:
set_register(reg, get_register(source.Register));
break;
case Operand_Memory: {
mem_access_result result = get_mem_value(source.Address, wide);
if (!result.error) {
set_register(reg, result.value);
}
break;
}
default:
break;
}
}
void mov_to_memory(const effective_address_expression &addrexp,
const instruction_operand &source, bool wide) {
switch (source.Type) {
case Operand_Immediate:
set_mem_value(addrexp, source.Immediate.Value, wide);
break;
case Operand_Register:
set_mem_value(addrexp, get_register(source.Register), wide);
break;
case Operand_Memory: {
mem_access_result result = get_mem_value(source.Address, wide);
if (!result.error) {
set_mem_value(addrexp, result.value, wide);
}
break;
}
default:
break;
}
}
mem_access_result get_mem_value(const effective_address_expression &addrexp,
bool wide) {
u16 index = get_mem_index(addrexp);
mem_access_result result = {0, 0};
if (memory.mem_start + index >= MEM_SIZE) {
result.error = 1;
} else {
result.value |= memory.buffer[memory.mem_start + index];
if (wide) {
result.value |= (memory.buffer[memory.mem_start + index + 1]
<< (wide ? BITS_PER_BYTE : 0));
}
}
return result;
}
mem_access_result set_mem_value(const effective_address_expression &addrexp,
u16 value, bool wide) {
u16 index = get_mem_index(addrexp);
mem_access_result result = {0, 0};
if (memory.mem_start + index >= MEM_SIZE) {
result.error = 1;
} else {
memory.buffer[memory.mem_start + index] = (u8)value;
if (wide) {
memory.buffer[memory.mem_start + index + 1] =
(u8)(value >> (wide ? BITS_PER_BYTE : 0));
}
result.value = value;
}
return result;
}
u16 get_mem_index(const effective_address_expression &addrexp) {
u16 index = addrexp.Displacement;
const u16 term_count = 2;
for (u16 i = 0; i < term_count; ++i) {
if (addrexp.Terms[i].Register.Index != 0) {
index += get_register(addrexp.Terms[i].Register);
}
}
return index;
}

View File

@@ -1,200 +0,0 @@
#include "include/aliases.h"
#include "include/flag_access.h"
#include "include/reg_access.h"
#include "include/sim86_lib.h"
#include <bits/types/FILE.h>
#include <stdio.h>
#include <string.h>
struct basic_string {
char str[4096];
};
u16 get_operand_value(instruction_operand operand);
basic_string get_operand_string(instruction_operand operand);
void print_instruction(instruction inst);
void mov_to_register(const register_access &reg,
const instruction_operand &source);
int main(int argc, char *argv[]) {
if (argc < 2) {
printf("Usage: sim86 BINARY_FILE\n");
return 1;
}
const char *filename = argv[1];
printf("Filename: %s\n", filename);
FILE *fp = fopen(filename, "rb");
if (!fp) {
printf("Failed to open file %s\n", filename);
}
fseek(fp, 0, SEEK_END);
u32 size = ftell(fp);
fseek(fp, 0, SEEK_SET);
u8 buffer[size + 1];
memset((void *)buffer, 0, size + 1);
fread((void *)buffer, sizeof(u8), size, fp);
fclose(fp);
instruction_table table;
Sim86_Get8086InstructionTable(&table);
u32 offset = 0;
bool accessed_registers[REGISTER_COUNT] = {false};
printf("\nDisassembly:\n");
while (offset < size) {
instruction decoded;
Sim86_Decode8086Instruction(size - offset, buffer + offset, &decoded);
if (decoded.Op) {
offset += decoded.Size;
print_instruction(decoded);
instruction_operand dest = decoded.Operands[0];
instruction_operand source = decoded.Operands[1];
switch (decoded.Op) {
case Op_mov: {
if (dest.Type == Operand_Register) {
mov_to_register(dest.Register, source);
accessed_registers[dest.Register.Index] = true;
}
break;
}
case Op_add: {
if (dest.Type == Operand_Register) {
u16 value = get_register(dest.Register);
value += get_operand_value(source);
set_flags(value);
set_register(dest.Register, value);
}
break;
}
case Op_sub:
case Op_cmp: {
if (dest.Type == Operand_Register) {
u16 value = get_register(dest.Register);
value -= get_operand_value(source);
set_flags(value);
if (decoded.Op == Op_sub) {
set_register(dest.Register, value);
}
}
break;
}
case Op_jne: {
if (!get_flag(FLAG_ZERO)) {
i16 inst_offset = get_operand_value(dest);
offset += inst_offset;
}
}
default:
break;
}
}
}
printf("\nFinal registers:\n");
for (u32 i = 0; i < REGISTER_COUNT; ++i) {
if (accessed_registers[i]) {
register_access reg = {i, 0, 2};
u16 value = get_register(reg);
printf("\t%s: 0x%04x (%d)\n", get_register_name(reg), value, value);
}
}
// Print the instruction pointer register
printf("\tip: 0x%04x (%d)\n", offset, offset);
printf("\nFinal flags:\n");
print_flags();
return 0;
}
u16 get_operand_value(instruction_operand operand) {
switch (operand.Type) {
case Operand_Register:
return get_register(operand.Register);
break;
case Operand_Memory:
return 0;
break;
case Operand_Immediate:
return operand.Immediate.Value;
break;
default:
return 0;
break;
}
}
basic_string get_operand_string(instruction_operand operand) {
basic_string output = {""};
switch (operand.Type) {
case Operand_Register:
sprintf(output.str, "%s", get_register_name(operand.Register));
break;
case Operand_Memory:
sprintf(output.str, "%s", "MEM OPERAND");
break;
case Operand_Immediate:
sprintf(output.str, "%d", operand.Immediate.Value);
break;
default:
break;
}
return output;
}
void print_instruction(instruction inst) {
printf("\t%s %s, %s\n", Sim86_MnemonicFromOperationType(inst.Op),
get_operand_string(inst.Operands[0]).str,
get_operand_string(inst.Operands[1]).str);
}
void mov_to_register(const register_access &reg,
const instruction_operand &source) {
switch (source.Type) {
case Operand_Immediate:
set_register(reg, source.Immediate.Value);
break;
case Operand_Register:
set_register(reg, get_register(source.Register));
break;
default:
break;
}
}

View File

@@ -1,17 +1,17 @@
mk_haversine_fscanf:
clang++ -g cpp/fscanf.cpp cpp/haversine.cpp -o cpp/haverscan
clang++ -g cpp/fscanf.cc cpp/haversine.cc -o cpp/haverscan
run_haversine_fscanf:
cd ./cpp && ./haverscan
mk_haversine_strtok:
clang++ -g cpp/strtok.cpp cpp/haversine.cpp -o cpp/haverstrtok
clang++ -g cpp/strtok.cc cpp/haversine.cc -o cpp/haverstrtok
run_haversine_strtok:
cd ./cpp && ./haverstrtok
mk_test:
clang++ -g -lpthread cpp/test.cpp cpp/haversine.cpp -o cpp/test
clang++ -g -lpthread cpp/test.cc cpp/haversine.cc -o cpp/test
run_test:
cd ./cpp && ./test

13
haversine_02/.gitignore vendored Normal file
View File

@@ -0,0 +1,13 @@
.cache
.vscode
.idea
compile_commands.json
count_and_distances
pairs.json
cache_test
main
genhavr
prochavr
reptest
memtest
timer_test*

3
haversine_02/build Executable file
View File

@@ -0,0 +1,3 @@
#!/bin/bash
bear -- ./compile $@

135
haversine_02/compile Executable file
View File

@@ -0,0 +1,135 @@
#!/bin/bash
CC=clang
CXX=clang++
CFLAGS="-Wall -Wextra -I$(realpath ./include) "
ASM=nasm
ASM_FLAGS="-f elf64 "
AR=ar
AR_FLAGS="rcs"
# PARSE ARGUMENTS
# From this StackOverflow answer https://stackoverflow.com/a/14203146
while [[ $# > 0 ]];do
case $1 in
--release)
RELEASE=true
shift
;;
--basic-profiling)
BASIC_PROFILING=true
shift
;;
--full-profiling)
FULL_PROFILING=true
shift
;;
*|-*|--*)
echo "Unknown option $1"
exit 1
;;
esac
done
# BUILD TYPE
if [[ $RELEASE == true ]]; then
CFLAGS+="-g -O1"
else
CFLAGS+="-g"
fi
# GENERATOR
GENSRC="./src/generator/gen_argparser.cc \
./src/generator/generator.cc \
./src/haversine.cc \
./src/point_types.cc \
./src/generator/main.cc"
GENOUT=genhavr
(set -x ; $CXX $CFLAGS $GENSRC -o $GENOUT)
echo
# PROFILER
PROFSRC="../src/profiler/timer.c"
PROFFLAGS="-c "
PROF_BUILD_DIR=prof_build
# PROCESSOR
JSONSRC="../src/json/*.c "
JSONFLAGS="-c "
JSON_BUILD_DIR=json_build
PROCSRC="./$JSON_BUILD_DIR/*.o \
./src/haversine.cc \
./src/point_types.cc \
./src/processor/proc_argparser.cc \
./src/processor/main.cc "
PROCOUT=prochavr
# MEMTESTER
MEMTESTSRC="./src/memtester/*.c"
MEMTESTOUT=memtest
(set -x ; $CC $CFLAGS $MEMTESTSRC -o $MEMTESTOUT)
echo
# REPTEST ASSEMBLY
ASM_BUILD_DIR=reptest_build
ASM_SRC="./src/repetition_testing/reptest_functions.asm"
ASM_OBJ="./$ASM_BUILD_DIR/funcs.o"
ASM_LIB="./$ASM_BUILD_DIR/libfuncs.a"
mkdir $ASM_BUILD_DIR
(set -x ; $ASM $ASM_FLAGS $ASM_SRC -o $ASM_OBJ)
(set -x ; $AR $AR_FLAGS $ASM_LIB $ASM_OBJ)
echo
if [[ $BASIC_PROFILING == true ]] || [[ $FULL_PROFILING == true ]]; then
if [[ $FULL_PROFILING == true ]]; then
JSONFLAGS+="-DFULL_PROFILING"
PROCFLAGS="-DFULL_PROFILING"
REPTESTFLAGS="-DFULL_PROFILING"
PROFFLAGS+="-DFULL_PROFILING"
elif [[ $BASIC_PROFILING == true ]]; then
JSONFLAGS+="-DBASIC_PROFILING"
PROCFLAGS="-DBASIC_PROFILING"
REPTESTFLAGS="-DBASIC_PROFILING"
PROFFLAGS+="-DBASIC_PROFILING"
fi
PROCSRC+=./$PROF_BUILD_DIR/*.o
mkdir $PROF_BUILD_DIR
cd $PROF_BUILD_DIR
(set -x ; $CC $CFLAGS $PROFFLAGS $PROFSRC)
echo
cd ../
# REPETITION TESTING
REPTESTSRC="./src/repetition_testing/*.cc ./$PROF_BUILD_DIR/*.o $ASM_LIB"
REPTESTOUT=reptest
(set -x ; $CXX $CFLAGS $REPTESTFLAGS $REPTESTSRC -o $REPTESTOUT)
echo
fi
mkdir $JSON_BUILD_DIR
cd $JSON_BUILD_DIR
(set -x ; $CC $CFLAGS $JSONFLAGS $JSONSRC)
echo
cd ../
(set -x ; $CXX $CFLAGS $PROCFLAGS $PROCSRC -o $PROCOUT)
echo
# CLEAR BUILD FILES
rm -rvf $JSON_BUILD_DIR $PROF_BUILD_DIR $ASM_BUILD_DIR

View File

@@ -0,0 +1,26 @@
#ifndef ALIASES_H
#define ALIASES_H
#include <stdint.h>
#define u8 uint8_t
#define u16 uint16_t
#define u32 uint32_t
#define u64 uint64_t
#define i8 int8_t
#define i16 int16_t
#define i32 int32_t
#define i64 int64_t
#define f32 float
#define f64 double
#define INTERNAL static
#define PERSISTENT static
#ifdef __cplusplus
#define CLASS_MEMBER static
#endif // __cplusplus
#endif // !ALIASES_H

View File

@@ -0,0 +1,15 @@
#ifndef GEN_ARGPARSER_H
#define GEN_ARGPARSER_H
#include "aliases.h"
#include <argp.h>
struct GeneratorArgs {
u32 seed;
bool clustered;
u64 count;
};
GeneratorArgs parse_args(i32 argc, char *argv[]);
#endif // !GEN_ARGPARSER_H

View File

@@ -0,0 +1,8 @@
#ifndef GENERATOR_H
#define GENERATOR_H
#include "point_types.h"
void fill_pairs_array(PairArray *pairs, bool clustered);
#endif // !GENERATOR_H

View File

@@ -0,0 +1,11 @@
#ifndef HAVERSINE_H
#define HAVERSINE_H
#include "aliases.h"
#include "point_types.h"
#define EARTH_RADIUS_KM 6371.0
f64 haversine_of_degrees(const PointPair &pair, f64 radius);
#endif // !HAVERSINE_H

View File

@@ -0,0 +1,30 @@
#ifndef DSTRING_H
#define DSTRING_H
#include "aliases.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef struct dstring dstr_t;
dstr_t *dstr_with_capacity(u64 capacity);
dstr_t *dstr_from_string(const char *str);
void dstr_update(dstr_t **dst, const char *src);
void dstr_free(dstr_t **str);
void dstr_concat(dstr_t **dst, const char *src);
void dstr_append(dstr_t **dst, char c);
void dstr_resize(dstr_t **str);
void dstr_clear(dstr_t *str);
void dstr_print(const dstr_t *str);
i64 dstr_find(const dstr_t *str, const char *substr);
u64 dstr_length(const dstr_t *str);
u64 dstr_capacity(const dstr_t *str);
const char *dstr_to_cstr(const dstr_t *str);
#ifdef __cplusplus
}
#endif
#endif // !DSTRING_H

View File

@@ -0,0 +1,82 @@
#ifndef JSON_ENTITIES_H
#define JSON_ENTITIES_H
#include "aliases.h"
#include "dstring.h"
#include <stdbool.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef struct json_entity jentity_t;
typedef struct json_collection jcoll_t;
typedef struct json_value jval_t;
typedef struct json_pair jpair_t;
typedef enum {
JVAL_EMPTY,
JVAL_COLLECTION,
JVAL_STRING,
JVAL_INTEGER,
JVAL_DOUBLE,
JVAL_BOOLEAN,
JVAL_NULL,
} jval_type;
struct json_value {
jval_type type;
union {
void *null_val;
jcoll_t *collection;
dstr_t *string;
i64 num_int;
f64 num_dbl;
bool boolean;
};
};
struct json_pair {
dstr_t *key;
jval_t value;
};
typedef enum {
JENTITY_SINGLE,
JENTITY_PAIR,
} jentity_type;
struct json_entity {
jentity_type type;
union {
jval_t value;
jpair_t pair;
};
jentity_t *parent;
jentity_t *next;
};
typedef enum {
JCOLL_OBJECT,
JCOLL_ARRAY,
} jcoll_type;
struct json_collection {
u64 size;
jcoll_type type;
jentity_t *begin;
jentity_t *end;
};
void print_json(const jentity_t *entity, u32 indent);
void free_json(jentity_t **entity);
jcoll_t *get_collection_from_entity(const jentity_t *entity);
jentity_t *create_new_single_entity(const jval_t value, jentity_t *parent);
jentity_t *create_new_pair_entity(dstr_t *key, const jval_t value,
jentity_t *parent);
#ifdef __cplusplus
}
#endif
#endif // !JSON_ENTITIES_H

View File

@@ -0,0 +1,72 @@
#ifndef LEXER_STATES_H
#define LEXER_STATES_H
#include "aliases.h"
#include <stdbool.h>
#define VALID_JSON true
#define INVALID_JSON false
#ifdef __cplusplus
extern "C" {
#endif
typedef const char *str_view_t;
typedef enum {
TK_NO_TOKEN,
TK_L_BRACE,
TK_R_BRACE,
TK_L_BRACKET,
TK_R_BRACKET,
TK_NULL,
TK_BOOL,
TK_STR_KEY,
TK_STR_VAL,
TK_INTEGER,
TK_DOUBLE,
} token_type;
typedef union {
void *no_val;
i64 num_int;
f64 num_frac;
str_view_t string;
bool boolean;
} token_value_t;
typedef struct {
u64 line;
u64 column;
token_type type;
token_value_t value;
} token_t;
typedef enum {
LEX_ERR_NONE,
LEX_ERR_INVALID,
} lex_err_type;
typedef struct {
lex_err_type errno;
str_view_t msg;
} lex_err_t;
typedef struct {
lex_err_t error;
token_t token;
} lex_result_t;
typedef struct lexer_s lexer_t;
void lexer_init(lexer_t **lexer);
void lexer_free(lexer_t **lexer);
lex_result_t get_next_token(lexer_t *lexer, const char *text);
void print_token(token_t token);
#ifdef __cplusplus
}
#endif
#endif // !LEXER_STATES_H

View File

@@ -0,0 +1,19 @@
#ifndef PARSER_H
#define PARSER_H
#include "json_entities.h"
#include "lexer.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef struct parser_s parser_t;
jentity_t *load_json(const char *filepath);
#ifdef __cplusplus
}
#endif
#endif // !PARSER_H

View File

@@ -0,0 +1,27 @@
#ifndef POINT_H
#define POINT_H
#include "aliases.h"
#include <stdio.h>
struct Point {
f64 x;
f64 y;
};
struct PointPair {
Point p1;
Point p2;
};
struct PairArray {
u64 count;
PointPair *pairs;
};
void write_pairs_to_binary(const PairArray &arr, const char *filename);
void read_pairs_from_binary(PairArray &arr, const char *filename);
void write_pairs_to_json(const PairArray &arr, const char *filename);
bool compare_pair_array(const PairArray &arr1, const PairArray &arr2);
#endif // !POINT_H

View File

@@ -0,0 +1,21 @@
#ifndef PROFILER_IDS_H
#define PROFILER_IDS_H
enum profiler_ids {
PROFILER_ID_CLI_PARSE,
PROFILER_ID_JSON_PARSE,
PROFILER_ID_READ_JSON_FILE,
PROFILER_ID_PARSER_SETUP,
PROFILER_ID_PARSER_PARSE_TOKENS,
PROFILER_ID_PARSER_TEAR_DOWN,
PROFILER_ID_LOAD_JSON_PAIRS,
PROFILER_ID_READ_BINARY,
PROFILER_ID_HAVERSINE_SUM,
PROFILER_ID_HAVERSINE_AVG,
PROFILER_ID_TEAR_DOWN,
PROFILER_ID_FREE_JSON,
COUNT_PROFILER_IDS,
};
#endif // !PROFILER_IDS_H

View File

@@ -0,0 +1,12 @@
#ifndef PROC_ARGPARSER_H
#define PROC_ARGPARSER_H
#include "aliases.h"
struct ProcessorArgs {
const char *filepath;
};
ProcessorArgs parse_args(i32 argc, char *argv[]);
#endif // !PROC_ARGPARSER_H

View File

@@ -0,0 +1,78 @@
#ifndef TIMER_H
#define TIMER_H
#include "aliases.h"
#ifndef MAX_PROFILE_SAMPLES
#define MAX_PROFILE_SAMPLES 1024
#endif // !MAX_PROFILE_SAMPLES
#ifdef FULL_PROFILING
#define SAMPLE_START(ID, TITLE) sample_start(ID, TITLE)
#define SAMPLE_END(ID, BYTES) sample_end(ID, BYTES)
#define SAMPLE_END_DEFAULT(ID) sample_end(ID, 0)
#ifdef __cplusplus
extern "C" {
#endif
void sample_start(u64 id, const char *title);
void sample_end(u64 id, u64 byte_count);
#ifdef __cplusplus
}
#endif
#else
#define SAMPLE_START(ID, TITLE)
#define SAMPLE_END(ID, BYTES)
#define SAMPLE_END_DEFAULT(ID)
#endif // FULL_PROFILING
#if defined(BASIC_PROFILING) || defined(FULL_PROFILING)
#define PROFILE_START(COUNT) profile_start(COUNT)
#define PROFILE_END profile_end()
#ifdef __cplusplus
extern "C" {
#endif
typedef struct sample profiler_sample_t;
struct sample {
const char *title;
u64 first_start;
u64 start;
u64 exclusive_time;
u64 children_time;
u64 hit_count;
u64 byte_count;
profiler_sample_t *parent;
};
u64 get_os_frequency();
// Time in nanoseconds
u64 get_os_time(void);
// CPU timer using rdtsc
u64 read_cpu_timer(void);
// CPU frequency in hz/sec
u64 get_cpu_freq(u64 milliseconds);
f64 time_in_seconds(u64 cpu_time, u64 cpu_freq);
void profile_start(u64 count);
void profile_end();
#ifdef __cplusplus
}
#endif
#else
#define PROFILE_START(COUNT)
#define PROFILE_END
#endif // BASIC_PROFILING || FULL_PROFILING
#endif // !TIMER_H

View File

@@ -0,0 +1,71 @@
#ifndef REPTESTER_H
#define REPTESTER_H
#include "aliases.h"
struct reptest_params {
const char *filename;
char *buffer;
u64 read_size;
u64 read_count;
};
struct reptest_results {
u64 bytes_read;
u64 read_time;
u64 page_faults;
};
struct time_stats {
u64 min_time;
u64 max_time;
u64 avg_time;
u64 total_time;
};
struct mem_stats {
u64 min_faults;
u64 max_faults;
u64 avg_faults;
u64 total_bytes;
u64 total_faults;
};
struct reptester {
reptest_params params;
const u64 cpu_freq;
f64 wait_time_secs;
f64 test_time_secs;
u64 test_start_time;
u64 current_run;
time_stats tstats;
mem_stats mstats;
reptest_results results;
};
enum alloc_type {
ALLOC_TYPE_WITHOUT_MALLOC,
ALLOC_TYPE_WITH_MALLOC,
COUNT_ALLOC_TYPE,
};
typedef void (*reptest_func)(reptester *tester, alloc_type type);
struct func_data {
const char *names[COUNT_ALLOC_TYPE];
reptest_func func;
};
void handle_alloc(reptester *tester, alloc_type type);
void handle_free(reptester *tester, alloc_type type);
void run_func_test(reptester *tester, reptest_func func, const char *func_name,
alloc_type type);
u64 page_fault_count();
void print_results(reptester *tester, const char *name);
#endif // !REPTESTER_H

View File

@@ -0,0 +1,66 @@
#include "generator/gen_argparser.h"
#include "aliases.h"
#include <argp.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
INTERNAL error_t argp_parser(i32 key, char *arg, argp_state *state);
INTERNAL argp parser = {};
INTERNAL argp_option options[] = {
{.name = "seed", .key = 's', .arg = "SEED"},
{.name = "cluster", .key = 'c'},
{0, 0, 0, 0, 0, 0},
};
GeneratorArgs parse_args(i32 argc, char *argv[]) {
GeneratorArgs args = {};
parser.options = options;
parser.parser = argp_parser;
parser.args_doc = "COUNT";
argp_parse(&parser, argc, argv, 0, 0, &args);
if (args.seed == 0) {
args.seed = time(NULL);
}
return args;
}
error_t argp_parser(i32 key, char *arg, argp_state *state) {
GeneratorArgs *args = (GeneratorArgs *)state->input;
switch (key) {
case 's':
args->seed = strtoul(arg, NULL, 10);
break;
case 'c':
args->clustered = true;
break;
case ARGP_KEY_ARG:
if (state->arg_num >= 1) {
argp_usage(state);
}
args->count = strtoull(arg, NULL, 10);
break;
case ARGP_KEY_END:
if (state->arg_num < 1) {
argp_usage(state);
}
break;
default:
return ARGP_ERR_UNKNOWN;
}
return 0;
}

View File

@@ -0,0 +1,115 @@
#include "generator/generator.h"
#include "aliases.h"
#include "point_types.h"
#include <math.h>
#include <stdlib.h>
#define X_MIN -180.0
#define X_MAX 180.0
#define Y_MIN -90.0
#define Y_MAX 90.0
f64 generate_random_double(f64 min, f64 max);
Point generate_random_point(f64 x_min, f64 x_max, f64 y_min, f64 y_max);
PointPair generate_random_pair(f64 x_min, f64 x_max, f64 y_min, f64 y_max);
void fill_pairs_array(PairArray *pairs, bool clustered) {
if (clustered) {
u64 digit_count = (u64)log10(pairs->count) + 1;
u64 cluster_count = 0;
if (digit_count > 2) {
cluster_count = digit_count * 8;
} else {
cluster_count = digit_count;
}
Point clusters[cluster_count];
f64 radii[cluster_count];
u64 pairs_per_cluster = pairs->count / cluster_count;
u64 generated_pairs = 0;
u64 pairs_to_generate = 0;
for (u64 i = 0; i < cluster_count; ++i) {
clusters[i] = {
generate_random_double(X_MIN, X_MAX),
generate_random_double(Y_MIN, Y_MAX),
};
radii[i] = generate_random_double(0.0, (digit_count - 1) * 40.0);
f64 cluster_x_min = clusters[i].x - radii[i];
if (cluster_x_min < X_MIN) {
cluster_x_min = X_MIN;
}
f64 cluster_x_max = clusters[i].x + radii[i];
if (cluster_x_max > X_MAX) {
cluster_x_max = X_MAX;
}
f64 cluster_y_min = clusters[i].y - radii[i];
if (cluster_y_min < Y_MIN) {
cluster_y_min = Y_MIN;
}
f64 cluster_y_max = clusters[i].y + radii[i];
if (cluster_y_max > Y_MAX) {
cluster_y_max = Y_MAX;
}
pairs_to_generate = 0;
if (generated_pairs + pairs_per_cluster < pairs->count) {
pairs_to_generate = pairs_per_cluster;
} else {
pairs_to_generate = pairs->count - generated_pairs;
}
for (u64 i = 0; i < pairs_to_generate; ++i) {
// clang-format off
pairs->pairs[generated_pairs + i] = generate_random_pair(
cluster_x_min,
cluster_x_max,
cluster_y_min,
cluster_y_max
);
// clang-format on
}
generated_pairs += pairs_to_generate;
}
} else {
for (u64 i = 0; i < pairs->count; ++i) {
pairs->pairs[i] = generate_random_pair(X_MIN, X_MAX, Y_MIN, Y_MAX);
}
}
}
f64 generate_random_double(f64 min, f64 max) {
u32 num = rand();
f64 result = min + (((f64)num / (f64)RAND_MAX) * (max - min));
return result;
}
Point generate_random_point(f64 x_min, f64 x_max, f64 y_min, f64 y_max) {
Point p = {
generate_random_double(x_min, x_max),
generate_random_double(y_min, y_max),
};
return p;
}
PointPair generate_random_pair(f64 x_min, f64 x_max, f64 y_min, f64 y_max) {
PointPair pair = {
generate_random_point(x_min, x_max, y_min, y_max),
generate_random_point(x_min, x_max, y_min, y_max),
};
return pair;
}

View File

@@ -0,0 +1,42 @@
#include "aliases.h"
#include "generator/gen_argparser.h"
#include "generator/generator.h"
#include "haversine.h"
#include "point_types.h"
#include <stdio.h>
#include <stdlib.h>
i32 main(i32 argc, char *argv[]) {
GeneratorArgs args = parse_args(argc, argv);
srand(args.seed);
PairArray arr = {args.count, NULL};
arr.pairs = (PointPair *)malloc(arr.count * sizeof(PointPair));
fill_pairs_array(&arr, args.clustered);
write_pairs_to_json(arr, "pairs.json");
FILE *fp = fopen("count_and_distances", "w");
if (fp) {
fwrite(&(arr.count), sizeof(arr.count), 1, fp);
f64 sum = 0.0;
for (u64 i = 0; i < arr.count; ++i) {
f64 distance = haversine_of_degrees(arr.pairs[i], EARTH_RADIUS_KM);
fwrite(&distance, sizeof(f64), 1, fp);
sum += distance;
}
printf("\nAVERAGE DISTANCE: %f\n", sum / arr.count);
fclose(fp);
}
free(arr.pairs);
return 0;
}

View File

@@ -0,0 +1,30 @@
#include "haversine.h"
#include "aliases.h"
#include "point_types.h"
#include <math.h>
#define PI 3.14159265358979323845
#define SQUARE(X) ((X) * (X))
f64 radians(f64 degrees);
f64 haversine_of_degrees(const PointPair &pair, f64 radius) {
f64 x0 = pair.p1.x;
f64 y0 = pair.p1.y;
f64 x1 = pair.p2.x;
f64 y1 = pair.p2.y;
f64 dy = radians(y1 - y0);
f64 dx = radians(x1 - x0);
y0 = radians(y0);
y1 = radians(y1);
f64 root_term =
SQUARE(sin(dy / 2.0)) + cos(y0) * cos(y1) * SQUARE(sin(dx / 2.0));
f64 result = 2.0 * radius * asin(sqrt(root_term));
return result;
}
f64 radians(f64 degrees) { return (degrees * PI) / 180.0; }

View File

@@ -0,0 +1,219 @@
#include "json/dstring.h"
#include "aliases.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
// Use this scalar to allocate extra memory in order to avoid having to
// constantly reallocate
#define CAPACITY_SCALAR 8
struct dstring {
u64 capacity;
u64 size;
char buf[];
};
dstr_t *dstr_with_capacity(u64 capacity) {
dstr_t *out = (dstr_t *)malloc(sizeof(dstr_t) + capacity + 1);
if (!out) {
return NULL;
}
out->capacity = capacity;
out->size = 0;
memset(out->buf, 0, capacity + 1);
return out;
}
dstr_t *dstr_from_string(const char *str) {
if (!str) {
return NULL;
}
u64 length = strlen(str);
u64 capacity = length * CAPACITY_SCALAR;
dstr_t *out = dstr_with_capacity(capacity);
if (!out) {
return NULL;
}
out->size = length;
strncpy(out->buf, str, length);
return out;
}
void dstr_update(dstr_t **dst, const char *src) {
if (!dst || !(*dst)) {
return;
}
u64 length = strlen(src);
dstr_t *str = *dst;
if (length <= str->capacity) {
memset(str->buf, 0, str->capacity);
str->size = length;
strncpy(str->buf, src, length);
} else {
u64 capacity = length * CAPACITY_SCALAR;
dstr_t *tmp = (dstr_t *)realloc(*dst, sizeof(dstr_t) + capacity + 1);
if (!tmp) {
return;
}
tmp->capacity = capacity;
tmp->size = length;
strncpy(tmp->buf, src, length);
*dst = tmp;
}
}
void dstr_free(dstr_t **str) {
if (!str || !(*str)) {
return;
}
free(*str);
*str = NULL;
}
void dstr_concat(dstr_t **dst, const char *src) {
if (!dst || !(*dst)) {
return;
}
u64 src_length = strlen(src);
if (src_length == 0) {
return;
}
u64 new_length = (*dst)->size + src_length;
char str[new_length + 1];
memset(str, 0, new_length + 1);
strncpy(str, (*dst)->buf, (*dst)->size);
strncat(str, src, src_length);
dstr_update(dst, str);
}
void dstr_append(dstr_t **dst, char c) {
if (!dst || !(*dst)) {
return;
}
u64 new_length = (*dst)->size + 1;
char str[new_length + 1];
memset(str, 0, new_length + 1);
strncpy(str, (*dst)->buf, (*dst)->size);
str[(*dst)->size] = c;
dstr_update(dst, str);
}
void dstr_resize(dstr_t **str) {
if (!str || !(*str)) {
return;
}
u64 capacity = (*str)->size;
dstr_t *tmp = (dstr_t *)realloc(*str, sizeof(dstr_t) + capacity + 1);
if (!tmp) {
return;
}
tmp->capacity = capacity;
*str = tmp;
}
void dstr_clear(dstr_t *str) {
if (!str || str->size == 0) {
return;
}
memset(str->buf, 0, str->capacity);
str->size = 0;
}
void dstr_print(const dstr_t *str) {
if (!str) {
return;
}
printf("%s\n", str->buf);
}
i64 dstr_find(const dstr_t *str, const char *substr) {
if (!str || !substr) {
return -1;
}
u64 substr_length = strlen(substr);
if (substr_length == 0 || substr_length > str->size) {
return -1;
}
char buf[substr_length + 1];
memset(buf, 0, substr_length + 1);
for (u64 i = 0; i < str->size; ++i) {
if (i + substr_length >= str->size) {
break;
}
for (u64 j = 0; j < substr_length; ++j) {
buf[j] = str->buf[i + j];
}
if (strcmp(buf, substr) == 0) {
return i;
}
}
return -1;
}
u64 dstr_length(const dstr_t *str) {
if (!str) {
return 0;
}
return str->size;
}
u64 dstr_capacity(const dstr_t *str) {
if (!str) {
return 0;
}
return str->capacity;
}
const char *dstr_to_cstr(const dstr_t *str) {
if (!str) {
return "";
}
return str->buf;
}

View File

@@ -0,0 +1,236 @@
#include "json/json_entities.h"
#include "aliases.h"
#include "processor/ids.h"
#include "profiler/timer.h"
#include "json/dstring.h"
#include <stdio.h>
#include <stdlib.h>
void print_json(const jentity_t *entity, u32 indent) {
PERSISTENT i32 indentation = 0;
dstr_t *key = NULL;
const jval_t *value = NULL;
if (entity->type == JENTITY_SINGLE) {
value = &(entity->value);
} else {
key = entity->pair.key;
value = &(entity->pair.value);
}
if (key) {
printf("%*s\"%s\": ", indentation * indent, "", dstr_to_cstr(key));
}
switch (value->type) {
case JVAL_COLLECTION: {
const char *open = "";
const char *close = "";
if (value->collection->type == JCOLL_OBJECT) {
open = "{";
close = "}";
} else {
open = "[";
close = "]";
}
if (key) {
printf("%s\n", open);
} else {
printf("%*s%s\n", indentation * indent, "", open);
}
++indentation;
if (value->collection->begin) {
print_json(value->collection->begin, indent);
}
--indentation;
printf("\n%*s%s", indentation * indent, "", close);
break;
}
case JVAL_STRING:
if (key) {
printf("\"%s\"", dstr_to_cstr(value->string));
} else {
printf("%*s\"%s\"", indentation * indent, "",
dstr_to_cstr(value->string));
}
break;
case JVAL_INTEGER:
if (key) {
printf("%llu", (unsigned long long)value->num_int);
} else {
printf("%*s%llu", indentation * indent, "",
(unsigned long long)value->num_int);
}
break;
case JVAL_DOUBLE:
if (key) {
printf("%f", value->num_dbl);
} else {
printf("%*s%f", indentation * indent, "", value->num_dbl);
}
break;
case JVAL_BOOLEAN:
if (key) {
printf("%s", value->boolean ? "true" : "false");
} else {
printf("%*s%s", indentation * indent, "",
value->boolean ? "true" : "false");
}
break;
case JVAL_NULL:
if (key) {
printf("%s", "null");
} else {
printf("%*s%s", indentation * indent, "", "null");
}
break;
case JVAL_EMPTY:
break;
}
if (entity->next) {
printf(",\n");
print_json(entity->next, indent);
}
// Add newline after printing the entire json tree
if (indentation == 0 && entity->parent == NULL && entity->next == NULL) {
printf("\n");
}
}
void free_json(jentity_t **root) {
if (!(*root)) {
return;
}
SAMPLE_START(PROFILER_ID_FREE_JSON, "FREE JSON");
jentity_t *current = *root;
jentity_t *temp = NULL;
dstr_t *key = NULL;
jval_t *value = NULL;
while (current) {
if (current->parent) {
// Move the beginning pointer of the collection to the next child
// TODO (Abdelrahman): This part gets repeated for some elements. Try to
// avoid that repetition
jentity_t *parent = current->parent;
jcoll_t *collection = NULL;
if (parent->type == JENTITY_SINGLE) {
collection = parent->value.collection;
} else {
collection = parent->pair.value.collection;
}
if (collection) {
collection->begin = current->next;
}
}
if (current->type == JENTITY_SINGLE) {
key = NULL;
value = &(current->value);
} else {
key = current->pair.key;
value = &(current->pair.value);
}
if (key) {
dstr_free(&(current->pair.key));
}
if (!value) {
break;
}
if (value->type == JVAL_COLLECTION) {
if (!(value->collection->begin)) {
// Once all children of the collection has been freed, free the memory
// allocated to the collection and the entity that holds it
free(value->collection);
temp = current;
current = current->next != NULL ? current->next : current->parent;
free(temp);
temp = NULL;
continue;
}
current = value->collection->begin;
} else {
if (value->type == JVAL_STRING) {
dstr_free(&(value->string));
}
temp = current;
current = current->next != NULL ? current->next : current->parent;
free(temp);
temp = NULL;
}
}
*root = NULL;
SAMPLE_END_DEFAULT(PROFILER_ID_FREE_JSON);
}
jcoll_t *get_collection_from_entity(const jentity_t *entity) {
return entity->type == JENTITY_SINGLE ? entity->value.collection
: entity->pair.value.collection;
}
jentity_t *create_new_single_entity(const jval_t value, jentity_t *parent) {
jentity_t *entity = (jentity_t *)malloc(sizeof(jentity_t));
if (!entity) {
return NULL;
}
entity->type = JENTITY_SINGLE;
entity->value = value;
entity->parent = parent;
entity->next = NULL;
return entity;
}
jentity_t *create_new_pair_entity(dstr_t *key, const jval_t value,
jentity_t *parent) {
jentity_t *entity = (jentity_t *)malloc(sizeof(jentity_t));
if (!entity) {
return NULL;
}
entity->type = JENTITY_PAIR;
entity->pair.key = key;
entity->pair.value = value;
entity->parent = parent;
entity->next = NULL;
return entity;
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,275 @@
#include "json/parser.h"
#include "aliases.h"
#include "processor/ids.h"
#include "profiler/timer.h"
#include "json/dstring.h"
#include "json/json_entities.h"
#include "json/lexer.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
struct parser_s {
jentity_t *root;
jentity_t *current;
jval_t value;
};
INTERNAL void parser_free(parser_t **parser);
INTERNAL void parser_init(parser_t **parser);
INTERNAL void parse_token(parser_t *parser, token_t token);
INTERNAL void add_key(parser_t *parser, dstr_t *key);
INTERNAL jentity_t *add_value(parser_t *parser);
INTERNAL void add_collection(parser_t *parser);
jentity_t *load_json(const char *filepath) {
FILE *fp = fopen(filepath, "r");
if (!fp) {
return NULL;
}
fseek(fp, 0, SEEK_END);
u64 length = ftell(fp);
fseek(fp, 0, SEEK_SET);
char *json = (char *)malloc(sizeof(char) * (length + 1));
memset(json, 0, length + 1);
SAMPLE_START(PROFILER_ID_READ_JSON_FILE, "READ JSON FILE");
fread(json, sizeof(char), length, fp);
SAMPLE_END(PROFILER_ID_READ_JSON_FILE, length);
fclose(fp);
SAMPLE_START(PROFILER_ID_PARSER_SETUP, "JSON PARSER SETUP");
lexer_t *lexer = NULL;
parser_t *parser = NULL;
lexer_init(&lexer);
if (!lexer) {
return NULL;
}
parser_init(&parser);
if (!parser) {
lexer_free(&lexer);
return NULL;
}
SAMPLE_END_DEFAULT(PROFILER_ID_PARSER_SETUP);
SAMPLE_START(PROFILER_ID_PARSER_PARSE_TOKENS, "PARSE TOKENS");
lex_result_t result = get_next_token(lexer, json);
if (result.error.errno) {
printf("%s\n", result.error.msg);
} else {
while (result.token.type != TK_NO_TOKEN) {
parse_token(parser, result.token);
result = get_next_token(lexer, NULL);
if (result.error.errno) {
printf("%s\n", result.error.msg);
break;
}
}
}
SAMPLE_END_DEFAULT(PROFILER_ID_PARSER_PARSE_TOKENS);
jentity_t *root = parser->root;
SAMPLE_START(PROFILER_ID_PARSER_TEAR_DOWN, "PARSER TEAR DOWN");
parser_free(&parser);
lexer_free(&lexer);
free(json);
SAMPLE_END_DEFAULT(PROFILER_ID_PARSER_TEAR_DOWN);
return root;
}
void parser_init(parser_t **parser) {
if (*parser) {
parser_free(parser);
}
*parser = (parser_t *)malloc(sizeof(parser_t));
if (!(*parser)) {
return;
}
(*parser)->root = NULL;
(*parser)->current = NULL;
(*parser)->value = (jval_t){0};
}
void parser_free(parser_t **parser) {
if (!(*parser)) {
return;
}
(*parser)->root = NULL;
(*parser)->current = NULL;
free(*parser);
*parser = NULL;
}
void parse_token(parser_t *parser, token_t token) {
switch (token.type) {
case TK_L_BRACE:
case TK_L_BRACKET: {
parser->value = (jval_t){
.type = JVAL_COLLECTION,
.collection = (jcoll_t *)malloc(sizeof(jcoll_t)),
};
if (token.type == TK_L_BRACE) {
parser->value.collection->type = JCOLL_OBJECT;
} else {
parser->value.collection->type = JCOLL_ARRAY;
}
parser->value.collection->size = 0;
parser->value.collection->begin = NULL;
parser->value.collection->end = NULL;
add_collection(parser);
break;
}
case TK_R_BRACE:
case TK_R_BRACKET:
if (parser->current->parent) {
parser->current = parser->current->parent;
}
break;
case TK_STR_KEY: {
parser->value = (jval_t){.type = JVAL_EMPTY, .null_val = NULL};
add_key(parser, dstr_from_string(token.value.string));
break;
}
case TK_NULL: {
parser->value = (jval_t){.type = JVAL_NULL, .null_val = NULL};
add_value(parser);
break;
}
case TK_BOOL: {
parser->value =
(jval_t){.type = JVAL_BOOLEAN, .boolean = token.value.boolean};
add_value(parser);
break;
}
case TK_STR_VAL: {
parser->value = (jval_t){.type = JVAL_STRING,
.string = dstr_from_string(token.value.string)};
add_value(parser);
break;
}
case TK_INTEGER: {
parser->value =
(jval_t){.type = JVAL_INTEGER, .num_int = token.value.num_int};
add_value(parser);
break;
}
case TK_DOUBLE: {
parser->value =
(jval_t){.type = JVAL_DOUBLE, .num_dbl = token.value.num_frac};
add_value(parser);
break;
}
case TK_NO_TOKEN:
break;
}
}
void add_key(parser_t *parser, dstr_t *key) {
jcoll_t *collection = get_collection_from_entity(parser->current);
if (!collection) {
return;
}
if (!(collection->end)) {
collection->begin = collection->end =
create_new_pair_entity(key, parser->value, parser->current);
collection->size = 1;
} else {
jentity_t *new_entity =
create_new_pair_entity(key, parser->value, parser->current);
collection->end->next = new_entity;
collection->end = new_entity;
++(collection->size);
}
}
jentity_t *add_value(parser_t *parser) {
jcoll_t *collection = get_collection_from_entity(parser->current);
if (!collection) {
return NULL;
}
if (!(collection->end)) {
collection->begin = collection->end =
create_new_single_entity(parser->value, parser->current);
collection->size = 1;
} else {
if (collection->end->type == JENTITY_PAIR &&
collection->end->pair.value.type == JVAL_EMPTY) {
collection->end->pair.value = parser->value;
} else {
jentity_t *new_entity =
create_new_single_entity(parser->value, parser->current);
collection->end->next = new_entity;
collection->end = new_entity;
++(collection->size);
}
}
return collection->end;
}
void add_collection(parser_t *parser) {
if (!(parser->root)) {
parser->root = parser->current =
create_new_single_entity(parser->value, NULL);
return;
}
parser->current = add_value(parser);
}

View File

@@ -0,0 +1,125 @@
#include "aliases.h"
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#define PAGESIZE 4096
#if _WIN32
// clang-format off
#include <intrin.h>
#include <windows.h>
#include <psapi.h>
// clang-format on
typedef struct {
bool Initialized;
HANDLE ProcessHandle;
} os_metrics;
static os_metrics GlobalMetrics;
static u64 ReadWindowsPageFaultCount(void) {
PROCESS_MEMORY_COUNTERS_EX MemoryCounters = {0};
MemoryCounters.cb = sizeof(MemoryCounters);
GetProcessMemoryInfo(GlobalMetrics.ProcessHandle,
(PROCESS_MEMORY_COUNTERS *)&MemoryCounters,
sizeof(MemoryCounters));
u64 Result = MemoryCounters.PageFaultCount;
return Result;
}
static void InitializeOSMetrics(void) {
if (!GlobalMetrics.Initialized) {
GlobalMetrics.Initialized = true;
GlobalMetrics.ProcessHandle =
OpenProcess(PROCESS_QUERY_INFORMATION | PROCESS_VM_READ, FALSE,
GetCurrentProcessId());
}
}
#else // _WIN32
#include <sys/mman.h>
#include <sys/resource.h>
#include <sys/time.h>
typedef struct rusage rusage_t;
u64 nix_page_fault_count() {
rusage_t usage;
getrusage(RUSAGE_SELF, &usage);
return usage.ru_minflt + usage.ru_majflt;
}
#endif // _WIN32
u64 page_fault_count() {
#if _WIN32
return ReadWindowsPageFaultCount();
#else
return nix_page_fault_count();
#endif
}
int main(int argc, char *argv[]) {
if (argc < 2 || argc > 2) {
printf("Usage: %s [NUMBER OF PAGES TO ALLOCATE]\n", argv[0]);
return EXIT_FAILURE;
}
#if _WIN32
InitializeOSMetrics();
#endif
u64 page_count = atol(argv[1]);
u64 alloc_size = page_count * PAGESIZE;
u64 touch_size = 0;
printf("Page Count,Touch Count,Fault Count,Extra Faults\n");
for (u64 touch_count = 0; touch_count <= page_count; ++touch_count) {
touch_size = touch_count * PAGESIZE;
#if _WIN32
u8 *data = (u8 *)VirtualAlloc(0, alloc_size, MEM_RESERVE | MEM_COMMIT,
PAGE_READWRITE);
#else
u8 *data = (u8 *)mmap(NULL, alloc_size, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
#endif
if (!data) {
printf("Failed to allocate memory\n");
return EXIT_FAILURE;
}
u64 fault_start = page_fault_count();
for (u64 i = 0; i < touch_size; ++i) {
data[i] = (u8)i;
}
u64 fault_end = page_fault_count();
u64 faults = fault_end - fault_start;
printf("%llu,%llu,%llu,%lld\n", (unsigned long long)page_count,
(unsigned long long)touch_count, (unsigned long long)faults,
((long long)faults - touch_count));
#if _WIN32
VirtualFree(data, 0, MEM_RELEASE);
#else
munmap((void *)data, alloc_size);
#endif
}
return EXIT_SUCCESS;
}

View File

@@ -0,0 +1,77 @@
#include "point_types.h"
#include <stdio.h>
#include <stdlib.h>
bool compare_point(const Point &p1, const Point &p2);
bool compare_pair(const PointPair &pair1, const PointPair &pair2);
void write_pairs_to_binary(const PairArray &arr, const char *filename) {
FILE *fp = fopen(filename, "w");
if (fp) {
fwrite(&arr, sizeof(arr.count), 1, fp);
u64 total_size = sizeof(*(arr.pairs)) * arr.count;
fwrite(arr.pairs, total_size, 1, fp);
fclose(fp);
}
}
void read_pairs_from_binary(PairArray &arr, const char *filename) {
FILE *fp = fopen(filename, "r");
if (fp) {
fread(&(arr.count), sizeof(arr.count), 1, fp);
arr.pairs = (PointPair *)malloc(arr.count * sizeof(PointPair));
fread(arr.pairs, sizeof(PointPair), arr.count, fp);
fclose(fp);
}
}
void write_pairs_to_json(const PairArray &arr, const char *filename) {
FILE *fp = fopen(filename, "w");
if (fp) {
fprintf(fp, "{\n\t\"pairs\": [\n");
for (u64 i = 0; i < arr.count; ++i) {
PointPair pair = arr.pairs[i];
fprintf(fp,
"\t\t{\"x0\": %.16f, \"y0\": %.16f, \"x1\": %.16f, \"y1\": "
"%.16f}%s\n",
pair.p1.x, pair.p1.y, pair.p2.x, pair.p2.y,
i + 1 < arr.count ? "," : "");
}
fprintf(fp, "\t]\n}\n");
fclose(fp);
}
}
bool compare_pair_array(const PairArray &arr1, const PairArray &arr2) {
if (arr1.count != arr2.count) {
return false;
}
for (u64 i = 0; i < arr1.count; ++i) {
if (!compare_pair(arr1.pairs[i], arr2.pairs[i])) {
return false;
}
}
return true;
}
bool compare_point(const Point &p1, const Point &p2) {
return p1.x == p2.x && p1.y == p2.y;
}
bool compare_pair(const PointPair &pair1, const PointPair &pair2) {
return compare_point(pair1.p1, pair2.p1) && compare_point(pair1.p2, pair2.p2);
}

View File

@@ -0,0 +1,122 @@
#include "haversine.h"
#include "point_types.h"
#include "processor/ids.h"
#include "processor/proc_argparser.h"
#include "profiler/timer.h"
#include "json/dstring.h"
#include "json/json_entities.h"
#include "json/parser.h"
#include <aliases.h>
#include <assert.h>
#include <float.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(int argc, char *argv[]) {
PROFILE_START(COUNT_PROFILER_IDS);
SAMPLE_START(PROFILER_ID_CLI_PARSE, "CLI PARSING");
ProcessorArgs args = parse_args(argc, argv);
SAMPLE_END_DEFAULT(PROFILER_ID_CLI_PARSE);
SAMPLE_START(PROFILER_ID_JSON_PARSE, "JSON PARSING");
jentity_t *root = load_json(args.filepath);
assert(root->type == JENTITY_SINGLE && root->value.type == JVAL_COLLECTION);
SAMPLE_END_DEFAULT(PROFILER_ID_JSON_PARSE);
SAMPLE_START(PROFILER_ID_LOAD_JSON_PAIRS, "LOAD JSON PAIRS");
jentity_t *pairs = root->value.collection->begin;
assert(pairs->type == JENTITY_PAIR &&
pairs->pair.value.type == JVAL_COLLECTION);
u64 pair_count = pairs->pair.value.collection->size;
PointPair *point_pairs = (PointPair *)malloc(sizeof(PointPair) * pair_count);
memset(point_pairs, 0, pair_count);
u64 index = 0;
for (jentity_t *pair = pairs->pair.value.collection->begin; pair != NULL;
pair = pair->next) {
assert(index < pair_count && pair->type == JENTITY_SINGLE &&
pair->value.type == JVAL_COLLECTION &&
pair->value.collection->size == 4);
jentity_t *x0 = pair->value.collection->begin;
jentity_t *y0 = x0->next;
jentity_t *x1 = y0->next;
jentity_t *y1 = x1->next;
PointPair p = ((PointPair){
{x0->pair.value.num_dbl, y0->pair.value.num_dbl},
{x1->pair.value.num_dbl, y1->pair.value.num_dbl},
});
point_pairs[index++] = p;
}
SAMPLE_END_DEFAULT(PROFILER_ID_LOAD_JSON_PAIRS);
SAMPLE_START(PROFILER_ID_READ_BINARY, "BINARY READ");
const char *filename = "count_and_distances";
FILE *fp = fopen(filename, "r");
if (!fp) {
printf("Failed to open the %s file", filename);
} else {
// Skip the count
fseek(fp, sizeof(u64), SEEK_SET);
}
SAMPLE_END_DEFAULT(PROFILER_ID_READ_BINARY);
SAMPLE_START(PROFILER_ID_HAVERSINE_SUM, "HAVERSINE SUM");
f64 sum = 0.0;
f64 distance = 0.0;
f64 saved_distance = 0.0;
for (u64 i = 0; i < pair_count; ++i) {
distance = haversine_of_degrees(point_pairs[i], EARTH_RADIUS_KM);
if (fp) {
fread(&saved_distance, sizeof(f64), 1, fp);
if (fabs(distance - saved_distance) > FLT_EPSILON) {
printf("%llu: %.16f does not equal %.16f\n", (unsigned long long)i,
distance, saved_distance);
}
}
sum += distance;
}
SAMPLE_END(PROFILER_ID_HAVERSINE_SUM, sizeof(f64) * pair_count);
SAMPLE_START(PROFILER_ID_HAVERSINE_AVG, "HAVERSINE AVERAGE");
printf("\nAVERAGE DISTANCE: %f\n", sum / pair_count);
SAMPLE_END_DEFAULT(PROFILER_ID_HAVERSINE_AVG);
SAMPLE_START(PROFILER_ID_TEAR_DOWN, "TEAR DOWN");
if (fp) {
fclose(fp);
}
free(point_pairs);
free_json(&root);
SAMPLE_END_DEFAULT(PROFILER_ID_TEAR_DOWN);
PROFILE_END;
return 0;
}

View File

@@ -0,0 +1,44 @@
#include "processor/proc_argparser.h"
#include "aliases.h"
#include <argp.h>
INTERNAL error_t argp_parser(i32 key, char *arg, argp_state *state);
INTERNAL argp parser = {};
ProcessorArgs parse_args(i32 argc, char *argv[]) {
ProcessorArgs args = {};
parser.options = {};
parser.parser = argp_parser;
parser.args_doc = "JSON_FILEPATH";
argp_parse(&parser, argc, argv, 0, 0, &args);
return args;
}
error_t argp_parser(i32 key, char *arg, argp_state *state) {
ProcessorArgs *args = (ProcessorArgs *)state->input;
switch (key) {
case ARGP_KEY_ARG:
if (state->arg_num >= 1) {
argp_usage(state);
}
args->filepath = arg;
break;
case ARGP_KEY_END:
if (state->arg_num < 1) {
argp_usage(state);
}
break;
default:
return ARGP_ERR_UNKNOWN;
}
return 0;
}

View File

@@ -0,0 +1,233 @@
#include "profiler/timer.h"
#include "aliases.h"
#include <stdbool.h>
#include <stdio.h>
#include <string.h>
#include <time.h>
#include <x86intrin.h>
#if defined(BASIC_PROFILING) || defined(FULL_PROFILING)
typedef struct {
profiler_sample_t samples[MAX_PROFILE_SAMPLES];
u64 cpu_freq;
u64 size;
u64 start;
u64 end;
u64 max_title_length;
profiler_sample_t *active;
} profiler_t;
INTERNAL profiler_t profiler = {0};
typedef struct timespec timespec_t;
u64 get_os_frequency() { return 1000000000; }
u64 get_os_time(void) {
timespec_t ts = {0};
if (clock_gettime(CLOCK_MONOTONIC_RAW, &ts) != 0) {
return 0;
}
return ts.tv_sec * get_os_frequency() + ts.tv_nsec;
}
u64 read_cpu_timer(void) { return __rdtsc(); }
u64 get_cpu_freq(u64 milliseconds) {
u64 os_freq = get_os_frequency();
u64 os_end = 0;
u64 os_elapsed = 0;
u64 os_wait_time = os_freq * milliseconds / 1000;
u64 os_start = get_os_time();
u64 cpu_start = read_cpu_timer();
while (os_elapsed < os_wait_time) {
os_end = get_os_time();
os_elapsed = os_end - os_start;
}
u64 cpu_end = read_cpu_timer();
u64 cpu_elapsed = cpu_end - cpu_start;
u64 cpu_freq = 0;
if (os_elapsed) {
cpu_freq = cpu_elapsed * os_freq / os_elapsed;
}
return cpu_freq;
}
f64 time_in_seconds(u64 cpu_time, u64 cpu_freq) {
return (f64)cpu_time / cpu_freq;
}
void profile_start(u64 count) {
profiler.cpu_freq = get_cpu_freq(1000);
profiler.start = read_cpu_timer();
profiler.max_title_length = 0;
profiler.size = count;
}
void profile_end() {
if (!profiler.start || !profiler.size) {
return;
}
profiler.end = read_cpu_timer();
u64 total = 0;
if (profiler.end >= profiler.start) {
total = profiler.end - profiler.start;
}
u16 time_precision = 16;
u16 time_char_count = 20;
// clang-format off
printf("\n============================================================PROFILING============================================================\n");
// clang-format on
if (profiler.cpu_freq) {
printf("Total: %*.*f seconds, %zu (CPU frequency: %llu hz/sec)\n\n",
time_char_count, time_precision, (f64)total / profiler.cpu_freq,
total, (unsigned long long)profiler.cpu_freq);
}
#ifdef FULL_PROFILING
f64 byte_to_mb = 1.0 / (1024.0 * 1024.0);
f64 mb_to_gb = 1.0 / 1024.0;
u16 duration_char_count = 22;
u16 hits_char_count = 10;
u16 percentage_precision = 8;
u16 percentage_char_count = 12;
u16 throughput_precision = 24;
u16 throughput_char_count = 32;
profiler_sample_t *sample = NULL;
for (u64 i = 0; i < profiler.size; ++i) {
sample = &(profiler.samples[i]);
if (sample->hit_count == 0) {
continue;
}
printf("%*s (hits: %*lld): %*lld (%*.*f %%", (i32)profiler.max_title_length,
sample->title, hits_char_count,
(unsigned long long)sample->hit_count, duration_char_count,
(unsigned long long)sample->exclusive_time, percentage_char_count,
percentage_precision, (f64)(sample->exclusive_time) / total * 100.0);
if (sample->children_time > 0) {
printf(", w/ children: %*.*f %%", percentage_char_count,
percentage_precision,
(f64)(sample->exclusive_time + sample->children_time) / total *
100.0);
}
if (sample->byte_count > 0) {
f64 data_read = (f64)(sample->byte_count) * byte_to_mb;
f64 sample_time_in_seconds =
(f64)(sample->exclusive_time + sample->children_time) /
profiler.cpu_freq;
printf(", Data read: %*.*f MB, Throughput: %*.*f GB/s",
throughput_char_count, throughput_precision, data_read,
throughput_char_count, throughput_precision,
data_read * mb_to_gb / sample_time_in_seconds);
}
printf(")\n");
}
#endif // FULL_PROFILING
}
#endif // BASIC_PROFILING || FULL_PROFILING
#ifdef FULL_PROFILING
void sample_start(u64 id, const char *title) {
if (id >= MAX_PROFILE_SAMPLES) {
return;
}
profiler_sample_t *sample = &(profiler.samples[id]);
if (!(sample->title) || strcmp(title, sample->title) != 0) {
sample->title = title;
sample->first_start = 0;
sample->start = 0;
sample->exclusive_time = 0;
sample->children_time = 0;
sample->hit_count = 0;
sample->byte_count = 0;
sample->parent = NULL;
u64 length = strlen(sample->title);
if (length > profiler.max_title_length) {
profiler.max_title_length = length;
}
}
sample->start = read_cpu_timer();
if (sample->hit_count == 0) {
sample->first_start = sample->start;
}
++(sample->hit_count);
if (profiler.active) {
u64 duration = sample->start - (profiler.active->start);
profiler.active->exclusive_time += duration;
}
if (!(profiler.active) || sample != profiler.active) {
// This handles recursive functions by changing the parent only when a
// function isn't calling itself
sample->parent = profiler.active;
}
profiler.active = sample;
}
void sample_end(u64 id, u64 byte_count) {
if (id >= MAX_PROFILE_SAMPLES) {
return;
}
profiler_sample_t *sample = &(profiler.samples[id]);
u64 duration = read_cpu_timer() - sample->start;
sample->exclusive_time += duration;
sample->byte_count += byte_count;
u64 now = read_cpu_timer();
// Reset the start time at the end of the sample to handle recursion
sample->start = now;
profiler_sample_t *parent = sample->parent;
if (parent) {
// Add sample duration to all parents. This handles deep call stacks
while (parent) {
parent->children_time += duration;
parent = parent->parent;
}
sample->parent->start = now;
}
profiler.active = sample->parent;
}
#endif // FULL_PROFILING

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,529 @@
global mov_all_bytes_asm
global nop_all_bytes_asm
global nop_1x3_all_bytes_asm
global nop_1x9_all_bytes_asm
global inc_all_bytes_asm
global dec_all_bytes_asm
global align64_loop
global align1_loop
global align15_loop
global align31_loop
global align63_loop
global align75_loop
global align90_loop
global align112_loop
global rat_add
global rat_mov_add
global read_1
global read_2
global read_3
global read_4
global read_8
global write_1
global write_2
global write_3
global write_4
global write_8
global read_1x2_low
global read_1x2_high
global read_2x2
global read_4x2
global read_8x2
global read_4x2_simd
global read_8x2_simd
global read_16x2_simd
global read_32x2_simd_offset
global read_32x2_simd_no_offset
global read_16x4_simd
global read_32x4_simd
global cache_test ; Expects 3 inputs (pointer, read_count, mask)
global cache_test_unaligned ; Expects 3 inputs (pointer, read_count, mask)
mov_all_bytes_asm:
xor rax, rax
.loop:
mov BYTE [rdi + rax * 1], al
inc rax
cmp rsi, rax
jne .loop
ret
nop_all_bytes_asm:
xor rax, rax
.loop:
db 0x0f, 0x1f, 0x00
inc rax
cmp rdi, rax
jne .loop
ret
nop_1x3_all_bytes_asm:
xor rax, rax
.loop:
nop
nop
nop
inc rax
cmp rdi, rax
jne .loop
ret
nop_1x9_all_bytes_asm:
xor rax, rax
.loop:
nop
nop
nop
nop
nop
nop
nop
nop
nop
inc rax
cmp rdi, rax
jne .loop
ret
inc_all_bytes_asm:
xor rax, rax
.loop:
inc rax
cmp rdi, rax
jne .loop
ret
dec_all_bytes_asm:
.loop:
dec rdi
jnz .loop
ret
align64_loop:
xor rax, rax
align 64
.loop:
inc rax
cmp rdi, rax
jne .loop
ret
align1_loop:
xor rax, rax
align 64
nop
.loop:
inc rax
cmp rdi, rax
jne .loop
ret
align15_loop:
xor rax, rax
align 64
%rep 15
nop
%endrep
.loop:
inc rax
cmp rdi, rax
jne .loop
ret
align31_loop:
xor rax, rax
align 64
%rep 31
nop
%endrep
.loop:
inc rax
cmp rdi, rax
jne .loop
ret
align63_loop:
xor rax, rax
align 64
%rep 63
nop
%endrep
.loop:
inc rax
cmp rdi, rax
jne .loop
ret
align75_loop:
xor rax, rax
align 64
%rep 75
nop
%endrep
.loop:
inc rax
cmp rdi, rax
jne .loop
ret
align90_loop:
xor rax, rax
align 64
%rep 90
nop
%endrep
.loop:
inc rax
cmp rdi, rax
jne .loop
ret
align112_loop:
xor rax, rax
align 64
%rep 112
nop
%endrep
.loop:
inc rax
cmp rdi, rax
jne .loop
ret
rat_add:
mov rax, rdi
.loop:
add rcx, 1
add rcx, 1
dec rax
jnz .loop
ret
rat_mov_add:
mov rax, rdi
.loop:
mov rcx, rax
add rcx, 1
mov rcx, rax
add rcx, 1
dec rax
jnz .loop
ret
read_1:
align 64
.loop:
mov rax, [rdi]
sub rsi, 1
jnle .loop
ret
read_2:
align 64
.loop:
%rep 2
mov rax, [rdi]
%endrep
sub rsi, 2
jnle .loop
ret
read_3:
align 64
.loop:
%rep 3
mov rax, [rdi]
%endrep
sub rsi, 3
jnle .loop
ret
read_4:
align 64
.loop:
%rep 4
mov rax, [rdi]
%endrep
sub rsi, 4
jnle .loop
ret
read_8:
align 64
.loop:
%rep 8
mov rax, [rdi]
%endrep
sub rsi, 8
jnle .loop
ret
write_1:
align 64
.loop:
mov QWORD [rdi], 0
sub rsi, 1
jnle .loop
ret
write_2:
align 64
.loop:
%rep 2
mov QWORD [rdi], 0
%endrep
sub rsi, 2
jnle .loop
ret
write_3:
align 64
.loop:
%rep 3
mov QWORD [rdi], 0
%endrep
sub rsi, 3
jnle .loop
ret
write_4:
align 64
.loop:
%rep 4
mov QWORD [rdi], 0
%endrep
sub rsi, 4
jnle .loop
ret
write_8:
align 64
.loop:
%rep 8
mov QWORD [rdi], 0
%endrep
sub rsi, 8
jnle .loop
ret
read_1x2_low:
align 64
.loop:
%rep 2
mov al, [rdi]
%endrep
sub rsi, 2
jnle .loop
ret
read_1x2_high:
align 64
.loop:
%rep 2
mov ah, [rdi]
%endrep
sub rsi, 2
jnle .loop
ret
read_2x2:
align 64
.loop:
%rep 2
mov ax, [rdi]
%endrep
sub rsi, 2
jnle .loop
ret
read_4x2:
align 64
.loop:
%rep 2
mov eax, [rdi]
%endrep
sub rsi, 2
jnle .loop
ret
read_8x2:
align 64
.loop:
%rep 2
mov rax, [rdi]
%endrep
sub rsi, 2
jnle .loop
ret
read_4x2_simd:
xor rax, rax
align 64
.loop:
mov r8d, [rdi]
mov r8d, [rdi + 4]
add rax, 8
cmp rax, rsi
jb .loop
ret
read_8x2_simd:
xor rax, rax
align 64
.loop:
mov r8, [rdi]
mov r8, [rdi + 8]
add rax, 16
cmp rax, rsi
jb .loop
ret
read_16x2_simd:
xor rax, rax
align 64
.loop:
vmovdqu xmm0, [rdi]
vmovdqu xmm0, [rdi + 16]
add rax, 32
cmp rax, rsi
jb .loop
ret
read_32x2_simd_offset:
xor rax, rax
align 64
.loop:
vmovdqu ymm0, [rdi]
vmovdqu ymm0, [rdi + 32]
add rax, 64
cmp rax, rsi
jb .loop
ret
read_32x2_simd_no_offset:
xor rax, rax
align 64
.loop:
vmovdqu ymm0, [rdi]
vmovdqu ymm0, [rdi]
add rax, 64
cmp rax, rsi
jb .loop
ret
read_16x4_simd:
xor rax, rax
align 64
.loop:
%rep 2
vmovdqu xmm0, [rdi]
vmovdqu xmm0, [rdi + 16]
%endrep
add rax, 64
cmp rax, rsi
jb .loop
ret
read_32x4_simd:
xor rax, rax
align 64
.loop:
%rep 2
vmovdqu ymm0, [rdi]
vmovdqu ymm0, [rdi]
%endrep
add rax, 128
cmp rax, rsi
jb .loop
ret
cache_test:
xor r10, r10 ; Zero loop counter
mov rbx, rdi ; Save original pointer
.loop:
add rdi, r10 ; Advance the pointer
add r10, 128 ; Increment loop counter
and r10, rdx ; Mask offset
vmovdqu ymm0, [rdi + 0]
vmovdqu ymm1, [rdi + 32]
vmovdqu ymm2, [rdi + 64]
vmovdqu ymm3, [rdi + 96]
mov rdi, rbx ; Restore original pointer
sub rsi, 128 ; Decrement count
ja .loop
ret
cache_test_unaligned:
xor r10, r10 ; Zero loop counter
add rdi, 5 ; Unalign pointer
mov rbx, rdi ; Save original pointer
.loop:
add rdi, r10 ; Advance the pointer
add r10, 128 ; Increment loop counter
and r10, rdx ; Mask offset
vmovdqu ymm0, [rdi + 0]
vmovdqu ymm1, [rdi + 32]
vmovdqu ymm2, [rdi + 64]
vmovdqu ymm3, [rdi + 96]
mov rdi, rbx ; Restore original pointer
sub rsi, 128 ; Decrement count
ja .loop
ret

View File

@@ -0,0 +1,168 @@
#include "repetition_testing/reptester.h"
#include "profiler/timer.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/resource.h>
#include <sys/time.h>
void handle_alloc(reptester *tester, alloc_type type) {
switch (type) {
case ALLOC_TYPE_WITH_MALLOC:
if (!(tester->params.buffer)) {
tester->params.buffer = (char *)mmap(
NULL, tester->params.read_size + 1, PROT_READ | PROT_WRITE,
MAP_ANON | MAP_SHARED | MAP_NORESERVE, -1, 0);
memset(tester->params.buffer, 0, tester->params.read_size + 1);
}
break;
default:
break;
}
}
void handle_free(reptester *tester, alloc_type type) {
switch (type) {
case ALLOC_TYPE_WITH_MALLOC:
if (tester->params.buffer) {
munmap(tester->params.buffer, tester->params.read_size + 1);
tester->params.buffer = NULL;
}
break;
default:
break;
}
}
void run_func_test(reptester *tester, reptest_func func, const char *func_name,
alloc_type type) {
tester->test_start_time = read_cpu_timer();
tester->test_time_secs = 0.0;
tester->current_run = 1;
tester->tstats = {
UINT64_MAX, // min_time
0, // max_time
0, // avg_time
0, // total_time
};
tester->mstats = {
UINT64_MAX, // min_faults
0, // max_faults
0, // avg_faults
0, // total_bytes
0, // total_faults
};
tester->results = {};
char *buffer = NULL;
if (type == ALLOC_TYPE_WITH_MALLOC) {
buffer = tester->params.buffer;
tester->params.buffer =
(char *)mmap(NULL, tester->params.read_size + 1, PROT_READ | PROT_WRITE,
MAP_ANON | MAP_SHARED | MAP_NORESERVE, -1, 0);
memset(tester->params.buffer, 0, tester->params.read_size + 1);
}
while (tester->test_time_secs <= tester->wait_time_secs) {
func(tester, type);
if (tester->results.bytes_read <
tester->params.read_size * tester->params.read_count) {
printf("Failed to read the entire file (Total size: %lu, Bytes read: "
"%lu)\n",
tester->params.read_size, tester->results.bytes_read);
return;
}
tester->tstats.total_time += tester->results.read_time;
tester->mstats.total_bytes += tester->results.bytes_read;
tester->mstats.total_faults += tester->results.page_faults;
if (tester->results.read_time > tester->tstats.max_time) {
tester->tstats.max_time = tester->results.read_time;
tester->mstats.max_faults = tester->results.page_faults;
} else if (tester->results.read_time < tester->tstats.min_time) {
tester->test_start_time = read_cpu_timer();
tester->tstats.min_time = tester->results.read_time;
tester->mstats.min_faults = tester->results.page_faults;
}
if (tester->results.page_faults > tester->mstats.max_faults) {
tester->mstats.max_faults = tester->results.page_faults;
} else if (tester->results.page_faults < tester->mstats.min_faults) {
tester->mstats.min_faults = tester->results.page_faults;
}
tester->test_time_secs = time_in_seconds(
read_cpu_timer() - tester->test_start_time, tester->cpu_freq);
++(tester->current_run);
}
if (type == ALLOC_TYPE_WITH_MALLOC) {
munmap(tester->params.buffer, tester->params.read_size + 1);
tester->params.buffer = buffer;
}
print_results(tester, func_name);
}
u64 page_fault_count() {
rusage usage;
getrusage(RUSAGE_SELF, &usage);
return usage.ru_minflt + usage.ru_majflt;
}
void print_results(reptester *tester, const char *name) {
f64 kb = 1024.0;
f64 gb = kb * kb * kb;
f64 size_in_kb =
(f64)(tester->params.read_size * tester->params.read_count) / kb;
f64 size_in_gb =
(f64)(tester->params.read_size * tester->params.read_count) / gb;
u64 run_count = tester->current_run - 1;
tester->tstats.avg_time = tester->tstats.total_time / run_count;
tester->mstats.avg_faults = tester->mstats.total_faults / run_count;
printf("\n%s: %lu runs\n", name, run_count);
printf("MIN: %lu (%fGB/s)", tester->tstats.min_time,
size_in_gb /
time_in_seconds(tester->tstats.min_time, tester->cpu_freq));
if (tester->mstats.min_faults > 0) {
printf(", FAULTS: %lu (%fK/fault)\n", tester->mstats.min_faults,
size_in_kb / tester->mstats.min_faults);
} else {
printf("\n");
}
printf("MAX: %lu (%fGB/s)", tester->tstats.max_time,
size_in_gb /
time_in_seconds(tester->tstats.max_time, tester->cpu_freq));
if (tester->mstats.max_faults > 0) {
printf(", FAULTS: %lu (%fK/fault)\n", tester->mstats.max_faults,
size_in_kb / tester->mstats.max_faults);
} else {
printf("\n");
}
printf("AVG: %lu (%fGB/s)", tester->tstats.avg_time,
size_in_gb /
time_in_seconds(tester->tstats.avg_time, tester->cpu_freq));
if (tester->mstats.avg_faults > 0) {
printf(", FAULTS: %lu (%fK/fault)\n", tester->mstats.avg_faults,
tester->mstats.total_bytes / kb / tester->mstats.avg_faults);
} else {
printf("\n");
}
}