Compare commits

..

86 Commits

Author SHA1 Message Date
94167e05fd Remove unused variables 2024-06-23 21:52:58 +01:00
7177add4ce Switch to using mmap and test unaligned loads 2024-06-23 21:10:14 +01:00
37d3340df9 Rename C++ files 2024-06-22 13:54:45 +01:00
fcdaf41495 Write to test buffer before working with it 2024-05-19 00:32:39 +01:00
cec0662e68 Add 256k cache test function 2024-05-19 00:10:18 +01:00
ba31dd9f8c Add cache testing functions 2024-05-06 22:55:44 +01:00
f355ab2d25 Update .gitignore 2024-05-06 22:55:32 +01:00
46ee06406f SIMD homework 2024-04-13 21:31:00 +01:00
5c97a99839 Execution ports homework 2024-04-13 13:56:29 +01:00
2cb6f1beb6 Complete homework for execution ports video 2024-02-11 00:42:14 +00:00
4945a298ac Add more assembly functions to repetition_testing 2024-02-10 17:54:42 +00:00
063183e46c Code alignment testing 2024-01-13 19:09:42 +00:00
12f25cfe51 Additional assembly loops 2023-12-03 23:24:52 +00:00
57acc5e16f Add assembly loops 2023-12-02 22:18:57 +00:00
43718ff047 Update release build flag 2023-12-02 20:28:04 +00:00
297d9c53f3 Fix repetition tester 2023-11-25 18:25:53 +00:00
b2cb252822 Remove .vscode and update .gitignore 2023-11-25 18:25:33 +00:00
a7d977210e Remove extraneous Windows functions 2023-09-24 19:11:10 +01:00
96ae35912f Wrap page fault count functions in os-agnostic function 2023-09-24 19:00:25 +01:00
dd512f8304 Fix Windows errors 2023-09-24 18:48:51 +01:00
389a494bfc Ensure clang-format doesn't change the include order of Windows headers 2023-09-24 18:46:14 +01:00
f18ecde7bc Add support for probing page fault behaviour on Windows 2023-09-24 17:35:23 +01:00
9104a41e2d Start probing page fault behaviour 2023-09-24 17:15:22 +01:00
7ce7101240 Add page fault stats to repetition tester 2023-09-23 23:55:46 +01:00
dca94a0edf Restructure of repetition tester 2023-09-23 23:06:38 +01:00
b7d33de2d7 Update comment 2023-09-23 22:44:13 +01:00
b1b90bc6f5 Change the testing function so it doesn't reallocate the main buffer 2023-09-18 22:28:48 +01:00
3a0917ed58 Test reading with and without malloc 2023-09-10 00:39:49 +01:00
4b905a56a5 Update .gitignore 2023-09-09 21:26:45 +01:00
967b1524d7 Update compile script 2023-09-09 21:26:35 +01:00
ab99d4b003 Update parser.c 2023-09-09 21:26:22 +01:00
22466ea56f Add time_in_seconds function 2023-09-09 21:25:57 +01:00
9ddb991b94 Basic repetition testing implementation 2023-09-09 21:25:32 +01:00
1bfc162845 Update profiler output 2023-09-03 00:35:58 +01:00
e461de30c0 Add data throughput calculation 2023-09-03 00:26:30 +01:00
19c02b4e99 Update the profiler to allow for different level of profiling 2023-07-23 16:36:21 +01:00
0e973feb38 Include the IDs from the update location 2023-07-23 16:36:07 +01:00
3af3a72472 Move the IDs to the processor 2023-07-23 16:35:35 +01:00
8e17765774 Update the timer to properly handle recursion and deep call stacks 2023-07-23 14:21:47 +01:00
f8cd7d253e Update .gitignore 2023-07-23 14:21:26 +01:00
46bc7e03a4 Add debug config for timer_test 2023-07-23 14:21:11 +01:00
419a7c8534 Ensure compile is executable 2023-07-23 11:33:07 +01:00
2d74f02138 Use the updated free_json and profile it 2023-07-12 00:45:29 +01:00
0360a2da35 Compile json code with profiler when profiling is enabled 2023-07-09 22:16:16 +01:00
43ec97378c Use numerical IDs for the profiler 2023-07-09 22:15:53 +01:00
e17ca4d3d2 Update debug config 2023-07-09 22:15:31 +01:00
6b9a7ab8a5 Update the timer code to support profiling loops and recursive functions 2023-07-09 22:13:06 +01:00
a118afaffb Add profiling code to the json parser 2023-07-09 22:12:32 +01:00
f11d4481a2 Add extra checks for NULL pointers 2023-07-09 22:11:32 +01:00
5e84e270bc Added macros for profiling functions and made it possible to compile the
profiling code out
2023-07-09 04:01:56 +01:00
c053d20a8f Use sample_start and sample_end pairs instead of the PROFILER_SAMPLE
macro
2023-07-09 01:52:16 +01:00
0073114723 Refactor profiling code 2023-07-09 00:55:22 +01:00
2e9c2dc6d5 Use profiling functions to time the haversine processor 2023-07-03 22:46:24 +01:00
f0380ce638 Added profiling functions 2023-07-03 22:46:10 +01:00
25d38c24b1 Update build files 2023-07-02 21:57:05 +01:00
99063fc700 Create the processor 2023-07-02 20:56:25 +01:00
6b27c86a18 Make sum a double instead of integer 2023-07-02 20:56:02 +01:00
1bb16971b2 Read the json file into a dynamically-allocated array to avoid stack
overflow
2023-07-02 20:55:31 +01:00
09ed32e41a Write the pairs to json with better precision 2023-07-02 20:55:08 +01:00
31d02aff8c Move EARTH_RADIUS_KM to haversine.h 2023-07-02 19:58:47 +01:00
d284a7edc8 Update .gitignore 2023-07-02 19:29:44 +01:00
9a686a2692 Add the json parser 2023-07-02 19:29:32 +01:00
10f1d5686e Update the compilation process 2023-07-02 19:29:05 +01:00
50f881c655 Reorganise the project 2023-07-02 19:28:48 +01:00
301ea44759 Remove parser binary from version control 2023-06-11 02:15:43 +01:00
c75e51aa3e Reorganise and start working on the json parser 2023-06-11 02:14:41 +01:00
5ff93f61d7 Switch to using argp 2023-05-29 19:49:49 +01:00
5c742fe1f7 Implement haversine generator 2023-05-28 23:29:56 +01:00
66aa3500d6 Rename haversine to haversine_01 2023-05-28 23:29:56 +01:00
2900e508a1 Completed homework for 8086 simulator 2023-04-16 14:29:54 -07:00
60538301d5 Defined mem_start 2023-04-15 23:11:10 -07:00
e9d7a608f0 Removed the binary from source control 2023-04-15 23:10:08 -07:00
d980153185 Read the instructions into simulated memory instead of separate buffer 2023-04-15 23:08:14 -07:00
90128fae8f Added sim86 binary to .gitignore 2023-04-15 23:07:08 -07:00
c320371432 Completed add, sub, cmp and jump simulations 2023-04-08 19:26:31 +01:00
43c748e0c5 Clean the repo of unnecessary files 2023-03-29 22:56:04 +01:00
904c398911 Completed the first 8086 simulation homework 2023-03-29 22:47:59 +01:00
416df979cf Removed the binaries 2023-03-29 22:47:23 +01:00
96d1381303 Started implementing the 8086 simulation 2023-03-29 01:28:37 +01:00
732aa2803f Update 2023-03-19 23:22:18 +00:00
b3eb9b99c7 Completed homework for decoding add, sub, cmp and jump instructions 2023-03-12 17:29:49 +00:00
620f9e0daa Completed decoding the challenging mov instructions 2023-03-11 19:50:09 +00:00
e223314349 Decode more variations of mov instruction 2023-03-08 23:48:03 +00:00
0efaa69327 Added constant ROW_WIDTH to decode_register 2023-03-05 14:02:42 +00:00
d4733b7be7 Updated decode_register to use flat array instead of branches 2023-03-05 13:58:12 +00:00
d683951f67 Moved .gitignore to haversine directory 2023-03-05 00:41:09 +00:00
86 changed files with 8518 additions and 69 deletions

5
.gitignore vendored
View File

@@ -1,5 +0,0 @@
haversine/python/__pycache__/**/*
haversine/cpp/haverscan
haversine/cpp/haverstrtok
haversine/cpp/test*
haversine/data_10000000_flex.json

View File

@@ -1,2 +1,2 @@
all:
clang++ -g dasm.cpp -o dasm
clang++ -g dasm.cc -o dasm

View File

@@ -58,7 +58,6 @@ int main(int argc, char *argv[]) {
(inst & (uint16_t)FLAGS::REG_DEST) == (uint16_t)FLAGS::REG_DEST;
bool word = (inst & (uint16_t)FLAGS::WORD) == (uint16_t)FLAGS::WORD;
// NOTE: Using right shift will only work on little-endian CPUs
uint16_t operands_info = inst >> 8;
if (decode_mode(operands_info) == (uint16_t)MODE::REG) {
@@ -96,64 +95,25 @@ uint16_t decode_mode(uint16_t instruction) {
}
void decode_register(uint16_t instruction, bool word, char *dest) {
uint16_t reg_mask = 0x07;
static uint16_t reg_mask = 0x07;
switch (instruction & reg_mask) {
case 0x00:
if (word) {
strcpy(dest, "ax");
} else {
strcpy(dest, "al");
}
break;
case 0x01:
if (word) {
strcpy(dest, "cx");
} else {
strcpy(dest, "cl");
}
break;
case 0x02:
if (word) {
strcpy(dest, "dx");
} else {
strcpy(dest, "dl");
}
break;
case 0x03:
if (word) {
strcpy(dest, "bx");
} else {
strcpy(dest, "bl");
}
break;
case 0x04:
if (word) {
strcpy(dest, "sp");
} else {
strcpy(dest, "ah");
}
break;
case 0x05:
if (word) {
strcpy(dest, "bp");
} else {
strcpy(dest, "ch");
}
break;
case 0x06:
if (word) {
strcpy(dest, "si");
} else {
strcpy(dest, "dh");
}
break;
case 0x07:
if (word) {
strcpy(dest, "di");
} else {
strcpy(dest, "bh");
}
break;
}
// clang-format off
static const char *table[16] = {
"al", "ax",
"cl", "cx",
"dl", "dx",
"bl", "bx",
"ah", "sp",
"ch", "bp",
"dh", "si",
"bh", "di"
};
// clang-format on
static const uint16_t ROW_WIDTH = 2;
uint16_t offset = instruction & reg_mask;
// Multiply offset by 2 since each row has 2 columns
strcpy(dest, table[offset * ROW_WIDTH + (uint16_t)word]);
}

5
8086_assembly_02/.gitignore vendored Normal file
View File

@@ -0,0 +1,5 @@
dasm
listing_0039_more_movs
listing_0039_more_movs_out
listing_0040_challenge_movs
listing_0040_challenge_movs_out

View File

@@ -0,0 +1,2 @@
all:
clang++ -g dasm.cc -o dasm

285
8086_assembly_02/dasm.cc Normal file
View File

@@ -0,0 +1,285 @@
#include <bits/types/FILE.h>
#include <math.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
enum class INST_BITS {
REG_MEM_REG = 0x88,
MEM_TO_ACC = 0xa0,
ACC_TO_MEM = 0xa2,
IMM_TO_REG = 0xb0,
IMM_TO_REG_MEM = 0xc6,
};
enum class INST_MASKS {
REG_MEM_REG = 0xfc,
IMM_TO_REG = 0xf0,
IMM_TO_REG_MEM = 0xfe,
ACCUMULATOR = 0xfe,
};
enum class MODE {
MEM = 0x00,
MEM8 = 0x40,
MEM16 = 0x80,
REG = 0xc0,
};
bool mask_instruction(uint8_t instruction, uint8_t inst_bits, uint8_t mask);
void decode_register(uint8_t instruction, bool word, char *dest);
void decode_rm(uint8_t instruction, char *dest);
void stringify_rm_and_disp(FILE *fp, uint8_t operands, char *rm,
uint32_t buff_size);
void handle_accumulator_mov_instructions(FILE *fp, uint8_t inst, bool reg_dest,
char *dest);
int main(int argc, char *argv[]) {
if (argc < 2) {
printf("Please provide a file to disassemble\n");
return 1;
}
const char *filename = argv[1];
FILE *fp = fopen(filename, "rb");
if (fp) {
uint8_t inst = 0;
const char *op = "";
char out_filename[4096] = {0};
sprintf(out_filename, "%s_out.asm", filename);
FILE *out = fopen(out_filename, "w");
if (out) {
fprintf(out, "; Disassembled by DASM\n\nbits 16\n\n");
while (fread(&inst, sizeof(inst), 1, fp)) {
if (mask_instruction(inst, (uint8_t)INST_BITS::REG_MEM_REG,
(uint8_t)INST_MASKS::REG_MEM_REG)) {
op = "mov";
uint8_t operands = 0;
fread(&operands, sizeof(operands), 1, fp);
// Instruction bit pattern is:
// 7 6 5 4 3 2 1 0
// -------------------------------
// | 1 | 0 | 0 | 0 | 1 | 0 | d | w |
// -------------------------------
//
// Operands bit pattern is:
// 7 6 5 4 3 2 1 0
// -------------------------------
// | mod | reg | r/m |
// -------------------------------
bool reg_dest = mask_instruction(inst, 0x02, 0x02);
bool word = mask_instruction(inst, 0x01, 0x01);
char reg[3] = {0};
decode_register(operands >> 3, word, reg);
if (mask_instruction(operands, (uint8_t)MODE::REG,
(uint8_t)MODE::REG)) {
char rm[3] = {0};
decode_register(operands, word, rm);
fprintf(out, "%s %s, %s\n", op, reg_dest ? reg : rm,
reg_dest ? rm : reg);
} else {
char rm[20] = {0};
stringify_rm_and_disp(fp, operands, rm, 20);
fprintf(out, reg_dest ? "%s %s, [%s]\n" : "%s [%s], %s\n", op,
reg_dest ? reg : rm, reg_dest ? rm : reg);
}
} else if (mask_instruction(inst, (uint8_t)INST_BITS::IMM_TO_REG,
(uint8_t)INST_MASKS::IMM_TO_REG)) {
op = "mov";
// Bit pattern is:
// 7 6 5 4 3 2 1 0
// -------------------------------
// | 1 | 0 | 1 | 1 | w | reg |
// -------------------------------
//
// So, we need to mask the fourth bit to check the w flag
bool word = mask_instruction(inst, 0x08, 0x08);
uint8_t next_bytes = word ? 2 : 1;
char reg[3] = {0};
decode_register(inst, word, reg);
int16_t data = 0;
fread(&data, sizeof(next_bytes), next_bytes, fp);
fprintf(out, "%s %s, %d\n", op, reg, word ? data : (int8_t)data);
} else if (mask_instruction(inst, (uint8_t)INST_BITS::IMM_TO_REG_MEM,
(uint8_t)INST_MASKS::IMM_TO_REG_MEM)) {
op = "mov";
uint8_t operands = 0;
fread(&operands, sizeof(operands), 1, fp);
// Instruction bit pattern is:
// 7 6 5 4 3 2 1 0
// -------------------------------
// | 1 | 1 | 0 | 0 | 0 | 1 | 1 | w |
// -------------------------------
//
// Operands bit pattern is:
// 7 6 5 4 3 2 1 0
// -------------------------------
// | mod | 000 | r/m |
// -------------------------------
bool word = mask_instruction(inst, 0x01, 0x01);
char rm[20] = {0};
stringify_rm_and_disp(fp, operands, rm, 20);
uint8_t next_bytes = word ? 2 : 1;
int16_t data = 0;
fread(&data, sizeof(next_bytes), next_bytes, fp);
fprintf(out, "%s [%s], %s %d\n", op, rm, word ? "word" : "byte",
word ? data : (int8_t)data);
} else if (mask_instruction(inst, (uint8_t)INST_BITS::MEM_TO_ACC,
(uint8_t)INST_MASKS::ACCUMULATOR)) {
char inst_out[256] = {0};
handle_accumulator_mov_instructions(fp, inst, true, inst_out);
fprintf(out, "%s\n", inst_out);
} else if (mask_instruction(inst, (uint8_t)INST_BITS::ACC_TO_MEM,
(uint8_t)INST_MASKS::ACCUMULATOR)) {
char inst_out[256] = {0};
handle_accumulator_mov_instructions(fp, inst, false, inst_out);
fprintf(out, "%s\n", inst_out);
} else {
printf("It's not a mov operation\n");
}
}
fclose(out);
} else {
printf("Failed to open output file\n");
}
fclose(fp);
} else {
printf("Failed to open the selected file\n");
}
return 0;
}
bool mask_instruction(uint8_t instruction, uint8_t inst_bits, uint8_t mask) {
return (instruction & mask) == inst_bits;
}
void decode_register(uint8_t instruction, bool word, char *dest) {
static uint8_t reg_mask = 0x07;
// clang-format off
static const char *table[16] = {
"al", "ax",
"cl", "cx",
"dl", "dx",
"bl", "bx",
"ah", "sp",
"ch", "bp",
"dh", "si",
"bh", "di"
};
// clang-format on
static const uint8_t ROW_WIDTH = 2;
uint8_t offset = instruction & reg_mask;
// Multiply offset by 2 since each row has 2 columns
strcpy(dest, table[offset * ROW_WIDTH + (uint8_t)word]);
}
void decode_rm(uint8_t instruction, char *dest) {
static uint8_t rm_mask = 0x07;
// clang-format off
static const char *table[8] = {
"bx + si",
"bx + di",
"bp + si",
"bp + di",
"si",
"di",
"bp",
"bx"
};
// clang-format on
uint8_t index = instruction & rm_mask;
strcpy(dest, table[index]);
}
void stringify_rm_and_disp(FILE *fp, uint8_t operands, char *rm,
uint32_t buff_size) {
decode_rm(operands, rm);
bool direct_address = false;
uint8_t next_bytes = operands >> 6;
if (next_bytes == 0 && mask_instruction(operands, 0x06, 0x06)) {
// Handle case when MOD == 00 and R/M == 110
next_bytes = 2;
direct_address = true;
}
int16_t disp = 0;
fread(&disp, sizeof(next_bytes), next_bytes, fp);
if (disp != 0) {
if (direct_address) {
memset(rm, 0, buff_size);
sprintf(rm, "%d", disp);
} else {
bool positive = next_bytes > 1 ? disp > 0 : (int8_t)disp > 0;
char data_out[buff_size];
memset(data_out, 0, buff_size);
sprintf(data_out, " %c %d", positive ? '+' : '-',
next_bytes > 1 ? abs(disp) : abs((int8_t)disp));
strcat(rm, data_out);
}
}
}
void handle_accumulator_mov_instructions(FILE *fp, uint8_t inst, bool reg_dest,
char *dest) {
bool word = mask_instruction(inst, 0x01, 0x01);
uint8_t next_bytes = word ? 2 : 1;
uint16_t addr = 0;
fread(&addr, sizeof(next_bytes), next_bytes, fp);
char addr_out[64] = {0};
sprintf(addr_out, "[%d]", word ? addr : (uint8_t)addr);
sprintf(dest, "mov %s, %s", reg_dest ? "ax" : addr_out,
reg_dest ? addr_out : "ax");
}

View File

@@ -0,0 +1,47 @@
; ========================================================================
;
; (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved.
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Please see https://computerenhance.com for further information
;
; ========================================================================
; ========================================================================
; LISTING 39
; ========================================================================
bits 16
; Register-to-register
mov si, bx
mov dh, al
; 8-bit immediate-to-register
mov cl, 12
mov ch, -12
; 16-bit immediate-to-register
mov cx, 12
mov cx, -12
mov dx, 3948
mov dx, -3948
; Source address calculation
mov al, [bx + si]
mov bx, [bp + di]
mov dx, [bp]
; Source address calculation plus 8-bit displacement
mov ah, [bx + si + 4]
; Source address calculation plus 16-bit displacement
mov al, [bx + si + 4999]
; Dest address calculation
mov [bx + di], cx
mov [bp + si], cl
mov [bp], ch

View File

@@ -0,0 +1,20 @@
; Disassembled by DASM
bits 16
mov si, bx
mov dh, al
mov cl, 12
mov ch, -12
mov cx, 12
mov cx, -12
mov dx, 3948
mov dx, -3948
mov al, [bx + si]
mov bx, [bp + di]
mov dx, [bp]
mov ah, [bx + si + 4]
mov al, [bx + si + 4999]
mov [bx + di], cx
mov [bp + si], cl
mov [bp], ch

View File

@@ -0,0 +1,38 @@
; ========================================================================
;
; (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved.
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Please see https://computerenhance.com for further information
;
; ========================================================================
; ========================================================================
; LISTING 40
; ========================================================================
bits 16
; Signed displacements
mov ax, [bx + di - 37]
mov [si - 300], cx
mov dx, [bx - 32]
; Explicit sizes
mov [bp + di], byte 7
mov [di + 901], word 347
; Direct address
mov bp, [5]
mov bx, [3458]
; Memory-to-accumulator test
mov ax, [2555]
mov ax, [16]
; Accumulator-to-memory test
mov [2554], ax
mov [15], ax

View File

@@ -0,0 +1,13 @@
bits 16
mov ax, [bx + di - 37]
mov [si - 300], cx
mov dx, [bx - 32]
mov [bp + di], byte 7
mov [di + 901], word 347
mov bp, [5]
mov bx, [3458]
mov ax, [2555]
mov ax, [16]
mov [2554], ax
mov [15], ax

7
8086_assembly_03/.gitignore vendored Normal file
View File

@@ -0,0 +1,7 @@
dasm
listing_0039_more_movs
listing_0039_more_movs_out
listing_0040_challenge_movs
listing_0040_challenge_movs_out
listing_0041_add_sub_cmp_jnz
listing_0041_add_sub_cmp_jnz_out

View File

@@ -0,0 +1,2 @@
all:
clang++ -g dasm.cc -o dasm

557
8086_assembly_03/dasm.cc Normal file
View File

@@ -0,0 +1,557 @@
#include <bits/types/FILE.h>
#include <math.h>
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#define ARRAY_LEN(ARR) (sizeof(ARR) / sizeof(*ARR))
enum class INST_BITS {
ADD_REG_MEM_REG = 0x00,
SUB_REG_MEM_REG = 0x28,
CMP_REG_MEM_REG = 0x38,
MOV_REG_MEM_REG = 0x88,
ADD_IMM_TO_ACC = 0x04,
SUB_IMM_FROM_ACC = 0x2c,
CMP_IMM_WITH_ACC = 0x3c,
MOV_MEM_TO_ACC = 0xa0,
MOV_ACC_TO_MEM = 0xa2,
MOV_IMM_TO_REG = 0xb0,
MOV_IMM_TO_REG_MEM = 0xc6,
ARITHMETIC_IMM_TO_REG_MEM = 0x80,
JE = 0x74,
JL = 0x7c,
JLE = 0x7e,
JB = 0x72,
JBE = 0x76,
JP = 0x7a,
JO = 0x70,
JS = 0x78,
JNE_JNZ = 0x75,
JNL = 0x7d,
JG = 0x7f,
JNB = 0x73,
JA = 0x77,
JNP = 0x7b,
JNO = 0x71,
JNS = 0x79,
LOOP = 0xe2,
LOOPZ = 0xe1,
LOOPNZ = 0xe0,
JCXZ = 0xe3,
};
uint8_t reg_mem_reg_insts[] = {
(uint8_t)INST_BITS::ADD_REG_MEM_REG,
(uint8_t)INST_BITS::SUB_REG_MEM_REG,
(uint8_t)INST_BITS::CMP_REG_MEM_REG,
(uint8_t)INST_BITS::MOV_REG_MEM_REG,
};
uint8_t to_accumulator_insts[] = {
(uint8_t)INST_BITS::MOV_MEM_TO_ACC,
(uint8_t)INST_BITS::ADD_IMM_TO_ACC,
(uint8_t)INST_BITS::SUB_IMM_FROM_ACC,
(uint8_t)INST_BITS::CMP_IMM_WITH_ACC,
};
uint8_t jump_insts[] = {
(uint8_t)INST_BITS::JE, (uint8_t)INST_BITS::JL,
(uint8_t)INST_BITS::JLE, (uint8_t)INST_BITS::JB,
(uint8_t)INST_BITS::JBE, (uint8_t)INST_BITS::JP,
(uint8_t)INST_BITS::JO, (uint8_t)INST_BITS::JS,
(uint8_t)INST_BITS::JNE_JNZ, (uint8_t)INST_BITS::JNL,
(uint8_t)INST_BITS::JG, (uint8_t)INST_BITS::JNB,
(uint8_t)INST_BITS::JA, (uint8_t)INST_BITS::JNP,
(uint8_t)INST_BITS::JNO, (uint8_t)INST_BITS::JNS,
(uint8_t)INST_BITS::LOOP, (uint8_t)INST_BITS::LOOPZ,
(uint8_t)INST_BITS::LOOPNZ, (uint8_t)INST_BITS::JCXZ,
};
enum class ARITHMETIC {
ADD = 0x00,
SUB = 0x28,
CMP = 0x38,
};
enum class INST_MASKS {
REG_MEM_REG = 0xfc,
IMM_TO_REG = 0xf0,
MOV_IMM_TO_REG_MEM = 0xfe,
ARITHMETIC_IMM_TO_REG_MEM = 0xfc,
ACCUMULATOR = 0xfe,
ARITHMETIC = 0x38,
JUMPS = 0xff,
};
enum class MODE {
MEM = 0x00,
MEM8 = 0x40,
MEM16 = 0x80,
REG = 0xc0,
};
bool mask_instruction(uint8_t instruction, uint8_t inst_bits, uint8_t mask);
bool instruction_in_array(uint8_t inst, uint8_t *instructions, size_t arr_size,
uint8_t mask);
uint8_t get_instruction_from_array(uint8_t inst, uint8_t *instructions,
size_t arr_size, uint8_t mask);
void decode_register(uint8_t instruction, bool word, char *dest);
void decode_rm(uint8_t instruction, char *dest);
void stringify_rm_and_disp(FILE *fp, uint8_t operands, char *rm,
uint32_t buff_size);
void handle_accumulator_mov_instructions(FILE *fp, uint8_t inst, bool reg_dest,
char *dest);
void handle_accumulator_arithmetic_instructions(FILE *fp, uint8_t inst,
char *dest);
int main(int argc, char *argv[]) {
if (argc < 2) {
printf("Please provide a file to disassemble\n");
return 1;
}
const char *filename = argv[1];
FILE *fp = fopen(filename, "rb");
if (fp) {
uint8_t inst = 0;
const char *op = "";
char out_filename[4096] = {0};
sprintf(out_filename, "%s_out.asm", filename);
FILE *out = fopen(out_filename, "w");
if (out) {
fprintf(out, "; Disassembled by DASM\n\nbits 16\n\n");
while (fread(&inst, sizeof(inst), 1, fp)) {
if (instruction_in_array(inst, reg_mem_reg_insts,
ARRAY_LEN(reg_mem_reg_insts),
(uint8_t)INST_MASKS::REG_MEM_REG)) {
switch (get_instruction_from_array(
inst, reg_mem_reg_insts, ARRAY_LEN(reg_mem_reg_insts),
(uint8_t)INST_MASKS::REG_MEM_REG)) {
case (uint8_t)INST_BITS::MOV_REG_MEM_REG:
op = "mov";
break;
case (uint8_t)INST_BITS::ADD_REG_MEM_REG:
op = "add";
break;
case (uint8_t)INST_BITS::SUB_REG_MEM_REG:
op = "sub";
break;
case (uint8_t)INST_BITS::CMP_REG_MEM_REG:
op = "cmp";
break;
}
uint8_t operands = 0;
fread(&operands, sizeof(operands), 1, fp);
bool reg_dest = mask_instruction(inst, 0x02, 0x02);
bool word = mask_instruction(inst, 0x01, 0x01);
char reg[3] = {0};
decode_register(operands >> 3, word, reg);
if (mask_instruction(operands, (uint8_t)MODE::REG,
(uint8_t)MODE::REG)) {
char rm[3] = {0};
decode_register(operands, word, rm);
fprintf(out, "%s %s, %s\n", op, reg_dest ? reg : rm,
reg_dest ? rm : reg);
} else {
char rm[20] = {0};
stringify_rm_and_disp(fp, operands, rm, 20);
fprintf(out, reg_dest ? "%s %s, [%s]\n" : "%s [%s], %s\n", op,
reg_dest ? reg : rm, reg_dest ? rm : reg);
}
} else if (mask_instruction(inst, (uint8_t)INST_BITS::MOV_IMM_TO_REG,
(uint8_t)INST_MASKS::IMM_TO_REG)) {
op = "mov";
// Bit pattern is:
// 7 6 5 4 3 2 1 0
// -------------------------------
// | 1 | 0 | 1 | 1 | w | reg |
// -------------------------------
//
// So, we need to mask the fourth bit to check the w flag
bool word = mask_instruction(inst, 0x08, 0x08);
uint8_t next_bytes = word ? 2 : 1;
char reg[3] = {0};
decode_register(inst, word, reg);
int16_t data = 0;
fread(&data, sizeof(next_bytes), next_bytes, fp);
fprintf(out, "%s %s, %d\n", op, reg, word ? data : (int8_t)data);
} else if (mask_instruction(inst,
(uint8_t)INST_BITS::MOV_IMM_TO_REG_MEM,
(uint8_t)INST_MASKS::MOV_IMM_TO_REG_MEM)) {
op = "mov";
uint8_t operands = 0;
fread(&operands, sizeof(operands), 1, fp);
// Instruction bit pattern is:
// 7 6 5 4 3 2 1 0
// -------------------------------
// | 1 | 1 | 0 | 0 | 0 | 1 | 1 | w |
// -------------------------------
//
// Operands bit pattern is:
// 7 6 5 4 3 2 1 0
// -------------------------------
// | mod | 000 | r/m |
// -------------------------------
bool word = mask_instruction(inst, 0x01, 0x01);
char rm[20] = {0};
stringify_rm_and_disp(fp, operands, rm, 20);
uint8_t next_bytes = word ? 2 : 1;
int16_t data = 0;
fread(&data, sizeof(next_bytes), next_bytes, fp);
fprintf(out, "%s [%s], %s %d\n", op, rm, word ? "word" : "byte",
word ? data : (int8_t)data);
} else if (mask_instruction(
inst, (uint8_t)INST_BITS::ARITHMETIC_IMM_TO_REG_MEM,
(uint8_t)INST_MASKS::ARITHMETIC_IMM_TO_REG_MEM)) {
uint8_t operands = 0;
fread(&operands, sizeof(operands), 1, fp);
if (mask_instruction(operands, (uint8_t)ARITHMETIC::ADD,
(uint8_t)INST_MASKS::ARITHMETIC)) {
op = "add";
} else if (mask_instruction(operands, (uint8_t)ARITHMETIC::SUB,
(uint8_t)INST_MASKS::ARITHMETIC)) {
op = "sub";
} else if (mask_instruction(operands, (uint8_t)ARITHMETIC::CMP,
(uint8_t)INST_MASKS::ARITHMETIC)) {
op = "cmp";
}
bool word = mask_instruction(inst, 0x01, 0x01);
bool sign = mask_instruction(inst, 0x02, 0x02);
if (mask_instruction(operands, (uint8_t)MODE::REG,
(uint8_t)MODE::REG)) {
char rm[3] = {0};
decode_register(operands, word, rm);
uint8_t next_bytes = 0;
if ((!word && !sign) || (word && sign)) {
next_bytes = 1;
} else if (word && !sign) {
next_bytes = 2;
}
int16_t data = 0;
fread(&data, sizeof(next_bytes), next_bytes, fp);
fprintf(out, "%s %s, %d\n", op, rm,
next_bytes == 1 ? (int8_t)data : data);
} else {
char rm[20] = {0};
stringify_rm_and_disp(fp, operands, rm, 20);
uint8_t next_bytes = 0;
if ((!word && !sign) || (word && sign)) {
next_bytes = 1;
} else if (word && !sign) {
next_bytes = 2;
}
int16_t data = 0;
fread(&data, sizeof(next_bytes), next_bytes, fp);
fprintf(out, "%s %s [%s], %d\n", op, word ? "word" : "byte", rm,
next_bytes == 1 ? (int8_t)data : data);
}
} else if (instruction_in_array(inst, to_accumulator_insts,
ARRAY_LEN(to_accumulator_insts),
(uint8_t)INST_MASKS::ACCUMULATOR)) {
char inst_out[256] = {0};
switch (get_instruction_from_array(
inst, to_accumulator_insts, ARRAY_LEN(to_accumulator_insts),
(uint8_t)INST_MASKS::ACCUMULATOR)) {
case (uint8_t)INST_BITS::MOV_MEM_TO_ACC:
handle_accumulator_mov_instructions(fp, inst, true, inst_out);
break;
case (uint8_t)INST_BITS::ADD_IMM_TO_ACC:
handle_accumulator_arithmetic_instructions(fp, inst, inst_out);
break;
case (uint8_t)INST_BITS::SUB_IMM_FROM_ACC:
handle_accumulator_arithmetic_instructions(fp, inst, inst_out);
break;
case (uint8_t)INST_BITS::CMP_IMM_WITH_ACC:
handle_accumulator_arithmetic_instructions(fp, inst, inst_out);
break;
}
fprintf(out, "%s\n", inst_out);
} else if (mask_instruction(inst, (uint8_t)INST_BITS::MOV_ACC_TO_MEM,
(uint8_t)INST_MASKS::ACCUMULATOR)) {
char inst_out[256] = {0};
handle_accumulator_mov_instructions(fp, inst, false, inst_out);
fprintf(out, "%s\n", inst_out);
} else if (instruction_in_array(inst, jump_insts, ARRAY_LEN(jump_insts),
(uint8_t)INST_MASKS::JUMPS)) {
switch (get_instruction_from_array(inst, jump_insts,
ARRAY_LEN(jump_insts),
(uint8_t)INST_MASKS::JUMPS)) {
case (uint8_t)INST_BITS::JE:
op = "je";
break;
case (uint8_t)INST_BITS::JL:
op = "jl";
break;
case (uint8_t)INST_BITS::JLE:
op = "jle";
break;
case (uint8_t)INST_BITS::JB:
op = "jb";
break;
case (uint8_t)INST_BITS::JBE:
op = "jbe";
break;
case (uint8_t)INST_BITS::JP:
op = "jp";
break;
case (uint8_t)INST_BITS::JO:
op = "jo";
break;
case (uint8_t)INST_BITS::JS:
op = "js";
break;
case (uint8_t)INST_BITS::JNE_JNZ:
op = "jnz";
break;
case (uint8_t)INST_BITS::JNL:
op = "jnl";
break;
case (uint8_t)INST_BITS::JG:
op = "jg";
break;
case (uint8_t)INST_BITS::JNB:
op = "jnb";
break;
case (uint8_t)INST_BITS::JA:
op = "ja";
break;
case (uint8_t)INST_BITS::JNP:
op = "jnp";
break;
case (uint8_t)INST_BITS::JNO:
op = "jno";
break;
case (uint8_t)INST_BITS::JNS:
op = "jns";
break;
case (uint8_t)INST_BITS::LOOP:
op = "loop";
break;
case (uint8_t)INST_BITS::LOOPZ:
op = "loopz";
break;
case (uint8_t)INST_BITS::LOOPNZ:
op = "loopnz";
break;
case (uint8_t)INST_BITS::JCXZ:
op = "jcxz";
break;
}
int8_t inc = 0;
fread(&inc, sizeof(int8_t), 1, fp);
fprintf(out, "%s %d\n", op, inc);
} else {
printf("Invalid instruction\n");
}
}
fclose(out);
} else {
printf("Failed to open output file\n");
}
fclose(fp);
} else {
printf("Failed to open the selected file\n");
}
return 0;
}
bool mask_instruction(uint8_t instruction, uint8_t inst_bits, uint8_t mask) {
return (instruction & mask) == inst_bits;
}
bool instruction_in_array(uint8_t inst, uint8_t *instructions, size_t arr_size,
uint8_t mask) {
for (size_t i = 0; i < arr_size; ++i) {
if (mask_instruction(inst, instructions[i], mask)) {
return true;
}
}
return false;
}
uint8_t get_instruction_from_array(uint8_t inst, uint8_t *instructions,
size_t arr_size, uint8_t mask) {
for (size_t i = 0; i < arr_size; ++i) {
if (mask_instruction(inst, instructions[i], mask)) {
return instructions[i];
}
}
return mask;
}
void decode_register(uint8_t instruction, bool word, char *dest) {
static uint8_t reg_mask = 0x07;
// clang-format off
static const char *table[16] = {
"al", "ax",
"cl", "cx",
"dl", "dx",
"bl", "bx",
"ah", "sp",
"ch", "bp",
"dh", "si",
"bh", "di"
};
// clang-format on
static const uint8_t ROW_WIDTH = 2;
uint8_t offset = instruction & reg_mask;
// Multiply offset by 2 since each row has 2 columns
strcpy(dest, table[offset * ROW_WIDTH + (uint8_t)word]);
}
void decode_rm(uint8_t instruction, char *dest) {
static uint8_t rm_mask = 0x07;
// clang-format off
static const char *table[8] = {
"bx + si",
"bx + di",
"bp + si",
"bp + di",
"si",
"di",
"bp",
"bx"
};
// clang-format on
uint8_t index = instruction & rm_mask;
strcpy(dest, table[index]);
}
void stringify_rm_and_disp(FILE *fp, uint8_t operands, char *rm,
uint32_t buff_size) {
decode_rm(operands, rm);
bool direct_address = false;
uint8_t next_bytes = operands >> 6;
if (next_bytes == 0 && mask_instruction(operands, 0x06, 0x07)) {
// Handle case when MOD == 00 and R/M == 110
next_bytes = 2;
direct_address = true;
}
int16_t disp = 0;
fread(&disp, sizeof(next_bytes), next_bytes, fp);
if (disp != 0) {
if (direct_address) {
memset(rm, 0, buff_size);
sprintf(rm, "%d", disp);
} else {
bool positive = next_bytes > 1 ? disp > 0 : (int8_t)disp > 0;
char disp_out[buff_size];
memset(disp_out, 0, buff_size);
sprintf(disp_out, " %c %d", positive ? '+' : '-',
next_bytes > 1 ? abs(disp) : abs((int8_t)disp));
strcat(rm, disp_out);
}
}
}
void handle_accumulator_mov_instructions(FILE *fp, uint8_t inst, bool reg_dest,
char *dest) {
bool word = mask_instruction(inst, 0x01, 0x01);
uint8_t next_bytes = word ? 2 : 1;
uint16_t addr = 0;
fread(&addr, sizeof(next_bytes), next_bytes, fp);
char addr_out[64] = {0};
sprintf(addr_out, "[%d]", word ? addr : (uint8_t)addr);
sprintf(dest, "mov %s, %s", reg_dest ? "ax" : addr_out,
reg_dest ? addr_out : "ax");
}
void handle_accumulator_arithmetic_instructions(FILE *fp, uint8_t inst,
char *dest) {
const char *op = "";
switch (get_instruction_from_array(inst, to_accumulator_insts,
ARRAY_LEN(to_accumulator_insts),
(uint8_t)INST_MASKS::ACCUMULATOR)) {
case (uint8_t)INST_BITS::ADD_IMM_TO_ACC:
op = "add";
break;
case (uint8_t)INST_BITS::SUB_IMM_FROM_ACC:
op = "sub";
break;
case (uint8_t)INST_BITS::CMP_IMM_WITH_ACC:
op = "cmp";
break;
}
bool word = mask_instruction(inst, 0x01, 0x01);
uint8_t next_bytes = word ? 2 : 1;
uint16_t data = 0;
fread(&data, sizeof(next_bytes), next_bytes, fp);
sprintf(dest, "%s %s, %d", op, word ? "ax" : "al",
word ? data : (int8_t)data);
}

View File

@@ -0,0 +1,47 @@
; ========================================================================
;
; (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved.
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Please see https://computerenhance.com for further information
;
; ========================================================================
; ========================================================================
; LISTING 39
; ========================================================================
bits 16
; Register-to-register
mov si, bx
mov dh, al
; 8-bit immediate-to-register
mov cl, 12
mov ch, -12
; 16-bit immediate-to-register
mov cx, 12
mov cx, -12
mov dx, 3948
mov dx, -3948
; Source address calculation
mov al, [bx + si]
mov bx, [bp + di]
mov dx, [bp]
; Source address calculation plus 8-bit displacement
mov ah, [bx + si + 4]
; Source address calculation plus 16-bit displacement
mov al, [bx + si + 4999]
; Dest address calculation
mov [bx + di], cx
mov [bp + si], cl
mov [bp], ch

View File

@@ -0,0 +1,20 @@
; Disassembled by DASM
bits 16
mov si, bx
mov dh, al
mov cl, 12
mov ch, -12
mov cx, 12
mov cx, -12
mov dx, 3948
mov dx, -3948
mov al, [bx + si]
mov bx, [bp + di]
mov dx, [bp]
mov ah, [bx + si + 4]
mov al, [bx + si + 4999]
mov [bx + di], cx
mov [bp + si], cl
mov [bp], ch

View File

@@ -0,0 +1,38 @@
; ========================================================================
;
; (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved.
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Please see https://computerenhance.com for further information
;
; ========================================================================
; ========================================================================
; LISTING 40
; ========================================================================
bits 16
; Signed displacements
mov ax, [bx + di - 37]
mov [si - 300], cx
mov dx, [bx - 32]
; Explicit sizes
mov [bp + di], byte 7
mov [di + 901], word 347
; Direct address
mov bp, [5]
mov bx, [3458]
; Memory-to-accumulator test
mov ax, [2555]
mov ax, [16]
; Accumulator-to-memory test
mov [2554], ax
mov [15], ax

View File

@@ -0,0 +1,13 @@
bits 16
mov ax, [bx + di - 37]
mov [si - 300], cx
mov dx, [bx - 32]
mov [bp + di], byte 7
mov [di + 901], word 347
mov bp, [5]
mov bx, [3458]
mov ax, [2555]
mov ax, [16]
mov [2554], ax
mov [15], ax

View File

@@ -0,0 +1,122 @@
; ========================================================================
;
; (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved.
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Please see https://computerenhance.com for further information
;
; ========================================================================
; ========================================================================
; LISTING 41
; ========================================================================
bits 16
add bx, [bx+si]
add bx, [bp]
add si, 2
add bp, 2
add cx, 8
add bx, [bp + 0]
add cx, [bx + 2]
add bh, [bp + si + 4]
add di, [bp + di + 6]
add [bx+si], bx
add [bp], bx
add [bp + 0], bx
add [bx + 2], cx
add [bp + si + 4], bh
add [bp + di + 6], di
add byte [bx], 34
add word [bp + si + 1000], 29
add word [bp + si + 1000], 520
add ax, [bp]
add al, [bx + si]
add ax, bx
add al, ah
add ax, 1000
add al, -30
add al, 9
sub bx, [bx+si]
sub bx, [bp]
sub si, 2
sub bp, 2
sub cx, 8
sub bx, [bp + 0]
sub cx, [bx + 2]
sub bh, [bp + si + 4]
sub di, [bp + di + 6]
sub [bx+si], bx
sub [bp], bx
sub [bp + 0], bx
sub [bx + 2], cx
sub [bp + si + 4], bh
sub [bp + di + 6], di
sub byte [bx], 34
sub word [bx + di], 29
sub ax, [bp]
sub al, [bx + si]
sub ax, bx
sub al, ah
sub ax, 1000
sub al, -30
sub al, 9
cmp bx, [bx+si]
cmp bx, [bp]
cmp si, 2
cmp bp, 2
cmp cx, 8
cmp bx, [bp + 0]
cmp cx, [bx + 2]
cmp bh, [bp + si + 4]
cmp di, [bp + di + 6]
cmp [bx+si], bx
cmp [bp], bx
cmp [bp + 0], bx
cmp [bx + 2], cx
cmp [bp + si + 4], bh
cmp [bp + di + 6], di
cmp byte [bx], 34
cmp word [4834], 29
cmp ax, [bp]
cmp al, [bx + si]
cmp ax, bx
cmp al, ah
cmp ax, 1000
cmp al, -30
cmp al, 9
test_label0:
jnz test_label1
jnz test_label0
test_label1:
jnz test_label0
jnz test_label1
label:
je label
jl label
jle label
jb label
jbe label
jp label
jo label
js label
jne label
jnl label
jg label
jnb label
ja label
jnp label
jno label
jns label
loop label
loopz label
loopnz label
jcxz label

View File

@@ -0,0 +1,101 @@
; Disassembled by DASM
bits 16
add bx, [bx + si]
add bx, [bp]
add si, 2
add bp, 2
add cx, 8
add bx, [bp]
add cx, [bx + 2]
add bh, [bp + si + 4]
add di, [bp + di + 6]
add [bx + si], bx
add [bp], bx
add [bp], bx
add [bx + 2], cx
add [bp + si + 4], bh
add [bp + di + 6], di
add byte [bx], 34
add word [bp + si + 1000], 29
add word [bp + si + 1000], 520
add ax, [bp]
add al, [bx + si]
add ax, bx
add al, ah
add ax, 1000
add al, -30
add al, 9
sub bx, [bx + si]
sub bx, [bp]
sub si, 2
sub bp, 2
sub cx, 8
sub bx, [bp]
sub cx, [bx + 2]
sub bh, [bp + si + 4]
sub di, [bp + di + 6]
sub [bx + si], bx
sub [bp], bx
sub [bp], bx
sub [bx + 2], cx
sub [bp + si + 4], bh
sub [bp + di + 6], di
sub byte [bx], 34
sub word [bx + di], 29
sub ax, [bp]
sub al, [bx + si]
sub ax, bx
sub al, ah
sub ax, 1000
sub al, -30
sub al, 9
cmp bx, [bx + si]
cmp bx, [bp]
cmp si, 2
cmp bp, 2
cmp cx, 8
cmp bx, [bp]
cmp cx, [bx + 2]
cmp bh, [bp + si + 4]
cmp di, [bp + di + 6]
cmp [bx + si], bx
cmp [bp], bx
cmp [bp], bx
cmp [bx + 2], cx
cmp [bp + si + 4], bh
cmp [bp + di + 6], di
cmp byte [bx], 34
cmp word [4834], 29
cmp ax, [bp]
cmp al, [bx + si]
cmp ax, bx
cmp al, ah
cmp ax, 1000
cmp al, -30
cmp al, 9
jnz 2
jnz -4
jnz -6
jnz -4
je -2
jl -4
jle -6
jb -8
jbe -10
jp -12
jo -14
js -16
jnz -18
jnl -20
jg -22
jnb -24
ja -26
jnp -28
jno -30
jns -32
loop -34
loopz -36
loopnz -38
jcxz -40

16
8086_sim/.gitignore vendored Normal file
View File

@@ -0,0 +1,16 @@
sim86
listing_0043_immediate_movs
listing_0044_register_movs
listing_0045_challenge_register_movs
listing_0046_add_sub_cmp
listing_0047_challenge_flags
listing_0048_ip_register
listing_0049_conditional_jumps
listing_0050_challenge_jumps
listing_0051_memory_mov
listing_0052_memory_add_loop
listing_0053_add_loop_challenge
listing_0054_draw_rectangle
test.asm
test
image.data

8
8086_sim/Makefile Normal file
View File

@@ -0,0 +1,8 @@
CC=clang++
CFLAGS=-g -O0 -Wall -Wextra
LIBS=-Wl,-rpath,./lib -L./lib -lsim86
SRC=*.cc
OUT=sim86
all:
$(CC) $(CFLAGS) $(LIBS) $(SRC) -o $(OUT)

57
8086_sim/flag_access.cc Normal file
View File

@@ -0,0 +1,57 @@
#include "include/flag_access.h"
#include "include/aliases.h"
#include <stdio.h>
const char *get_flag_string(flag_access flag);
static bool flags[FLAG_COUNT] = {false};
bool get_flag(flag_access flag) {
if (flag < FLAG_COUNT) {
return flags[flag];
}
return false;
}
void set_flags(u16 value) {
if (value == 0) {
flags[FLAG_ZERO] = true;
flags[FLAG_SIGN] = false;
} else if ((value & 0x8000) == 0x8000) {
flags[FLAG_ZERO] = false;
flags[FLAG_SIGN] = true;
} else {
flags[FLAG_ZERO] = false;
flags[FLAG_SIGN] = false;
}
}
void print_flags() {
printf("\t");
for (u32 i = 0; i < FLAG_COUNT; ++i) {
if (flags[i]) {
printf("%s", get_flag_string((flag_access)i));
}
}
printf("\n");
}
const char *get_flag_string(flag_access flag) {
const char *output = "";
switch (flag) {
case FLAG_ZERO:
output = "Z";
break;
case FLAG_SIGN:
output = "S";
break;
default:
break;
}
return output;
}

View File

@@ -0,0 +1,46 @@
#ifndef ALIASES_H
#define ALIASES_H
#include <stdint.h>
#ifndef u8
#define u8 uint8_t
#endif // !u8
#ifndef u16
#define u16 uint16_t
#endif // !u16
#ifndef u32
#define u32 uint32_t
#endif // !u32
#ifndef u64
#define u64 uint64_t
#endif // !u64
#ifndef i8
#define i8 int8_t
#endif // !i8
#ifndef i16
#define i16 int16_t
#endif // !i16
#ifndef i32
#define i32 int32_t
#endif // !i32
#ifndef i64
#define i64 int64_t
#endif // !i64
#ifndef f32
#define f32 float
#endif // !f32
#ifndef f64
#define f64 double
#endif // !f64
#endif // !ALIASES_H

View File

@@ -0,0 +1,17 @@
#ifndef FLAG_ACCESS_H
#define FLAG_ACCESS_H
#include "aliases.h"
enum flag_access : u8 {
FLAG_ZERO,
FLAG_SIGN,
FLAG_COUNT
};
bool get_flag(flag_access flag);
void set_flags(u16 value);
void print_flags();
#endif // !FLAG_ACCESS_H

View File

@@ -0,0 +1,13 @@
#ifndef REG_ACCESS_H
#define REG_ACCESS_H
#include "aliases.h"
#include "sim86_instruction.h"
#define REGISTER_COUNT 13
void set_register(register_access reg, u16 new_value);
u16 get_register(register_access reg);
const char *get_register_name(register_access reg);
#endif // !REG_ACCESS_H

54
8086_sim/include/sim86.h Normal file
View File

@@ -0,0 +1,54 @@
/* ========================================================================
(C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved.
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
arising from the use of this software.
Please see https://computerenhance.com for more information
======================================================================== */
#ifndef SIM86_H
#define SIM86_H
#ifndef u8
typedef char unsigned u8;
#endif // !u8
#ifndef u16
typedef short unsigned u16;
#endif // !u16
#ifndef u32
typedef int unsigned u32;
#endif // !u32
#ifndef u64
typedef long long unsigned u64;
#endif // u64
#ifndef s8
typedef char s8;
#endif // !s8
#ifndef s16
typedef short s16;
#endif // !s16
#ifndef s32
typedef int s32;
#endif // !s32
#ifndef s64
typedef long long s64;
#endif // !s64
typedef s32 b32;
#define ArrayCount(Array) (sizeof(Array) / sizeof((Array)[0]))
static u32 const SIM86_VERSION = 3;
#endif // !SIM86_H

View File

@@ -0,0 +1,92 @@
/* ========================================================================
(C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved.
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
arising from the use of this software.
Please see https://computerenhance.com for more information
======================================================================== */
#ifndef SIM86_INST_H
#define SIM86_INST_H
#include "sim86.h"
enum operation_type : u32 {
Op_None,
#define INST(Mnemonic, ...) Op_##Mnemonic,
#define INSTALT(...)
#include "sim86_instruction_table.inl"
Op_Count,
};
enum instruction_flag : u32 {
Inst_Lock = 0x1,
Inst_Rep = 0x2,
Inst_Segment = 0x4,
Inst_Wide = 0x8,
Inst_Far = 0x10,
};
struct register_access {
u32 Index; // Index in the register table
u32 Offset; // High vs Low bits
u32 Count; // How many bytes are accessed
};
struct effective_address_term {
register_access Register;
s32 Scale;
};
enum effective_address_flag : u32 {
Address_ExplicitSegment = 0x1,
};
struct effective_address_expression {
effective_address_term Terms[2];
u32 ExplicitSegment;
s32 Displacement;
u32 Flags;
};
enum immediate_flag : u32 {
Immediate_RelativeJumpDisplacement = 0x1,
};
struct immediate {
s32 Value;
u32 Flags;
};
enum operand_type : u32 {
Operand_None,
Operand_Register,
Operand_Memory,
Operand_Immediate,
};
struct instruction_operand {
operand_type Type;
union {
effective_address_expression Address;
register_access Register;
immediate Immediate;
};
};
struct instruction {
u32 Address;
u32 Size;
operation_type Op;
u32 Flags;
instruction_operand Operands[2];
u32 SegmentOverride;
};
#endif // !SIM86_INST_H

View File

@@ -0,0 +1,65 @@
/* ========================================================================
(C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved.
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
arising from the use of this software.
Please see https://computerenhance.com for more information
======================================================================== */
#include "sim86.h"
#include "sim86_instruction.h"
enum instruction_bits_usage : u8 {
Bits_End, // NOTE(casey): The 0 value, indicating the end of the instruction
// encoding array
Bits_Literal, // NOTE(casey): These are opcode bits that identify instructions
// NOTE(casey): These bits correspond directly to the 8086 instruction manual
Bits_D,
Bits_S,
Bits_W,
Bits_V,
Bits_Z,
Bits_MOD,
Bits_REG,
Bits_RM,
Bits_SR,
Bits_Disp,
Bits_Data,
Bits_DispAlwaysW, // NOTE(casey): Tag for instructions where the displacement
// is always 16 bits
Bits_WMakesDataW, // NOTE(casey): Tag for instructions where SW=01 makes the
// data field become 16 bits
Bits_RMRegAlwaysW, // NOTE(casey): Tag for instructions where the register
// encoded in RM is always 16-bit width
Bits_RelJMPDisp, // NOTE(casey): Tag for instructions that require address
// adjustment to go through NASM properly
Bits_Far, // NOTE(casey): Tag for instructions that require a "far" keyword in
// their ASM to select the right opcode
Bits_Count,
};
struct instruction_bits {
instruction_bits_usage Usage;
u8 BitCount;
u8 Shift;
u8 Value;
};
struct instruction_encoding {
operation_type Op;
instruction_bits Bits[16];
};
struct instruction_table {
instruction_encoding *Encodings;
u32 EncodingCount;
u32 MaxInstructionByteCount;
};

View File

@@ -0,0 +1,250 @@
/* ========================================================================
(C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved.
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
arising from the use of this software.
Please see https://computerenhance.com for more information
======================================================================== */
/*
NOTE(casey): This instruction table is a direct translation of table 4-12 in the Intel 8086 manual.
The macros are designed to allow direct transcription, without changing the order or manner
of specification in the table in any way. Additional "implicit" versions of the macros are provided
so that hard-coded fields can be supplied uniformly.
The table is also designed to allow you to include it multiple times to "pull out" other things
from the table, such as opcode mnemonics as strings or enums, etc.
*/
#ifndef INST
#define INST(Mnemonic, Encoding, ...) {Op_##Mnemonic, Encoding, __VA_ARGS__},
#endif
#ifndef INSTALT
#define INSTALT INST
#endif
#define B(Bits) {Bits_Literal, sizeof(#Bits)-1, 0, 0b##Bits}
#define D {Bits_D, 1}
#define S {Bits_S, 1}
#define W {Bits_W, 1}
#define V {Bits_V, 1}
#define Z {Bits_Z, 1}
#define XXX {Bits_Data, 3, 0}
#define YYY {Bits_Data, 3, 3}
#define RM {Bits_RM, 3}
#define MOD {Bits_MOD, 2}
#define REG {Bits_REG, 3}
#define SR {Bits_SR, 2}
#define ImpW(Value) {Bits_W, 0, 0, Value}
#define ImpREG(Value) {Bits_REG, 0, 0, Value}
#define ImpMOD(Value) {Bits_MOD, 0, 0, Value}
#define ImpRM(Value) {Bits_RM, 0, 0, Value}
#define ImpD(Value) {Bits_D, 0, 0, Value}
#define ImpS(Value) {Bits_S, 0, 0, Value}
#define DISP {Bits_Disp, 0, 0, 0}
#define ADDR {Bits_Disp, 0, 0, 0}, {Bits_DispAlwaysW, 0, 0, 1}
#define DATA {Bits_Data, 0, 0, 0}
#define DATA_IF_W {Bits_WMakesDataW, 0, 0, 1}
#define Flags(F) {F, 0, 0, 1}
INST(mov, {B(100010), D, W, MOD, REG, RM})
INSTALT(mov, {B(1100011), W, MOD, B(000), RM, DATA, DATA_IF_W, ImpD(0)})
INSTALT(mov, {B(1011), W, REG, DATA, DATA_IF_W, ImpD(1)})
INSTALT(mov, {B(1010000), W, ADDR, ImpREG(0), ImpMOD(0), ImpRM(0b110), ImpD(1)})
INSTALT(mov, {B(1010001), W, ADDR, ImpREG(0), ImpMOD(0), ImpRM(0b110), ImpD(0)})
INSTALT(mov, {B(100011), D, B(0), MOD, B(0), SR, RM, ImpW(1)}) // NOTE(casey): This collapses 2 entries in the 8086 table by adding an explicit D bit
INST(push, {B(11111111), MOD, B(110), RM, ImpW(1)})
INSTALT(push, {B(01010), REG, ImpW(1)})
INSTALT(push, {B(000), SR, B(110), ImpW(1)})
INST(pop, {B(10001111), MOD, B(000), RM, ImpW(1)})
INSTALT(pop, {B(01011), REG, ImpW(1)})
INSTALT(pop, {B(000), SR, B(111), ImpW(1)})
INST(xchg, {B(1000011), W, MOD, REG, RM, ImpD(1)})
INSTALT(xchg, {B(10010), REG, ImpMOD(0b11), ImpW(1), ImpRM(0)})
INST(in, {B(1110010), W, DATA, ImpREG(0), ImpD(1)})
INSTALT(in, {B(1110110), W, ImpREG(0), ImpD(1), ImpMOD(0b11), ImpRM(2), Flags(Bits_RMRegAlwaysW)})
INST(out, {B(1110011), W, DATA, ImpREG(0), ImpD(0)})
INSTALT(out, {B(1110111), W, ImpREG(0), ImpD(0), ImpMOD(0b11), ImpRM(2), Flags(Bits_RMRegAlwaysW)})
INST(xlat, {B(11010111)})
INST(lea, {B(10001101), MOD, REG, RM, ImpD(1), ImpW(1)})
INST(lds, {B(11000101), MOD, REG, RM, ImpD(1), ImpW(1)})
INST(les, {B(11000100), MOD, REG, RM, ImpD(1), ImpW(1)})
INST(lahf, {B(10011111)})
INST(sahf, {B(10011110)})
INST(pushf, {B(10011100)})
INST(popf, {B(10011101)})
INST(add, {B(000000), D, W, MOD, REG, RM})
INSTALT(add, {B(100000), S, W, MOD, B(000), RM, DATA, DATA_IF_W})
INSTALT(add, {B(0000010), W, DATA, DATA_IF_W, ImpREG(0), ImpD(1)})
INST(adc, {B(000100), D, W, MOD, REG, RM})
INSTALT(adc, {B(100000), S, W, MOD, B(010), RM, DATA, DATA_IF_W})
INSTALT(adc, {B(0001010), W, DATA, DATA_IF_W, ImpREG(0), ImpD(1)})
INST(inc, {B(1111111), W, MOD, B(000), RM})
INSTALT(inc, {B(01000), REG, ImpW(1)})
INST(aaa, {B(00110111)})
INST(daa, {B(00100111)})
INST(sub, {B(001010), D, W, MOD, REG, RM})
INSTALT(sub, {B(100000), S, W, MOD, B(101), RM, DATA, DATA_IF_W})
INSTALT(sub, {B(0010110), W, DATA, DATA_IF_W, ImpREG(0), ImpD(1)})
INST(sbb, {B(000110), D, W, MOD, REG, RM})
INSTALT(sbb, {B(100000), S, W, MOD, B(011), RM, DATA, DATA_IF_W})
INSTALT(sbb, {B(0001110), W, DATA, DATA_IF_W, ImpREG(0), ImpD(1)})
INST(dec, {B(1111111), W, MOD, B(001), RM})
INSTALT(dec, {B(01001), REG, ImpW(1)})
INST(neg, {B(1111011), W, MOD, B(011), RM})
INST(cmp, {B(001110), D, W, MOD, REG, RM})
INSTALT(cmp, {B(100000), S, W, MOD, B(111), RM, DATA, DATA_IF_W})
INSTALT(cmp, {B(0011110), W, DATA, DATA_IF_W, ImpREG(0), ImpD(1)}) // NOTE(casey): The manual table suggests this data is only 8-bit, but wouldn't it be 16 as well?
INST(aas, {B(00111111)})
INST(das, {B(00101111)})
INST(mul, {B(1111011), W, MOD, B(100), RM, ImpS(0)})
INST(imul, {B(1111011), W, MOD, B(101), RM, ImpS(1)})
INST(aam, {B(11010100), B(00001010)}) // NOTE(casey): The manual says this has a DISP... but how could it? What for??
INST(div, {B(1111011), W, MOD, B(110), RM, ImpS(0)})
INST(idiv, {B(1111011), W, MOD, B(111), RM, ImpS(1)})
INST(aad, {B(11010101), B(00001010)})
INST(cbw, {B(10011000)})
INST(cwd, {B(10011001)})
INST(not, {B(1111011), W, MOD, B(010), RM})
INST(shl, {B(110100), V, W, MOD, B(100), RM})
INST(shr, {B(110100), V, W, MOD, B(101), RM})
INST(sar, {B(110100), V, W, MOD, B(111), RM})
INST(rol, {B(110100), V, W, MOD, B(000), RM})
INST(ror, {B(110100), V, W, MOD, B(001), RM})
INST(rcl, {B(110100), V, W, MOD, B(010), RM})
INST(rcr, {B(110100), V, W, MOD, B(011), RM})
INST(and, {B(001000), D, W, MOD, REG, RM})
INSTALT(and, {B(1000000), W, MOD, B(100), RM, DATA, DATA_IF_W})
INSTALT(and, {B(0010010), W, DATA, DATA_IF_W, ImpREG(0), ImpD(1)})
INST(test, {B(1000010), W, MOD, REG, RM}) // NOTE(casey): The manual suggests there is a D flag here, but it doesn't appear to be true (it would conflict with xchg if it did)
INSTALT(test, {B(1111011), W, MOD, B(000), RM, DATA, DATA_IF_W})
INSTALT(test, {B(1010100), W, DATA, DATA_IF_W, ImpREG(0), ImpD(1)}) // NOTE(casey): The manual table suggests this data is only 8-bit, but it seems like it could be 16 too?
INST(or, {B(000010), D, W, MOD, REG, RM})
INSTALT(or, {B(1000000), W, MOD, B(001), RM, DATA, DATA_IF_W})
INSTALT(or, {B(0000110), W, DATA, DATA_IF_W, ImpREG(0), ImpD(1)})
INST(xor, {B(001100), D, W, MOD, REG, RM})
INSTALT(xor, {B(1000000), W, MOD, B(110), RM, DATA, DATA_IF_W}) // NOTE(casey): The manual has conflicting information about this encoding, but I believe this is the correct binary pattern.
INSTALT(xor, {B(0011010), W, DATA, DATA_IF_W, ImpREG(0), ImpD(1)})
INST(rep, {B(1111001), Z})
INST(movs, {B(1010010), W})
INST(cmps, {B(1010011), W})
INST(scas, {B(1010111), W})
INST(lods, {B(1010110), W})
INST(stos, {B(1010101), W})
INST(call, {B(11101000), ADDR, Flags(Bits_RelJMPDisp)})
INSTALT(call, {B(11111111), MOD, B(010), RM, ImpW(1)})
INSTALT(call, {B(10011010), ADDR, DATA, DATA_IF_W, ImpW(1)})
INSTALT(call, {B(11111111), MOD, B(011), RM, ImpW(1), Flags(Bits_Far)})
INST(jmp, {B(11101001), ADDR, Flags(Bits_RelJMPDisp)})
INSTALT(jmp, {B(11101011), DISP, Flags(Bits_RelJMPDisp)})
INSTALT(jmp, {B(11111111), MOD, B(100), RM, ImpW(1)})
INSTALT(jmp, {B(11101010), ADDR, DATA, DATA_IF_W, ImpW(1)})
INSTALT(jmp, {B(11111111), MOD, B(101), RM, ImpW(1), Flags(Bits_Far)})
// NOTE(casey): The actual Intel manual does not distinguish mnemonics RET and RETF,
// but NASM needs this to reassemble properly, so we do.
INST(ret, {B(11000011)})
INSTALT(ret, {B(11000010), DATA, DATA_IF_W, ImpW(1)})
INST(retf, {B(11001011)})
INSTALT(retf, {B(11001010), DATA, DATA_IF_W, ImpW(1)})
INST(je, {B(01110100), DISP, Flags(Bits_RelJMPDisp)})
INST(jl, {B(01111100), DISP, Flags(Bits_RelJMPDisp)})
INST(jle, {B(01111110), DISP, Flags(Bits_RelJMPDisp)})
INST(jb, {B(01110010), DISP, Flags(Bits_RelJMPDisp)})
INST(jbe, {B(01110110), DISP, Flags(Bits_RelJMPDisp)})
INST(jp, {B(01111010), DISP, Flags(Bits_RelJMPDisp)})
INST(jo, {B(01110000), DISP, Flags(Bits_RelJMPDisp)})
INST(js, {B(01111000), DISP, Flags(Bits_RelJMPDisp)})
INST(jne, {B(01110101), DISP, Flags(Bits_RelJMPDisp)})
INST(jnl, {B(01111101), DISP, Flags(Bits_RelJMPDisp)})
INST(jg, {B(01111111), DISP, Flags(Bits_RelJMPDisp)})
INST(jnb, {B(01110011), DISP, Flags(Bits_RelJMPDisp)})
INST(ja, {B(01110111), DISP, Flags(Bits_RelJMPDisp)})
INST(jnp, {B(01111011), DISP, Flags(Bits_RelJMPDisp)})
INST(jno, {B(01110001), DISP, Flags(Bits_RelJMPDisp)})
INST(jns, {B(01111001), DISP, Flags(Bits_RelJMPDisp)})
INST(loop, {B(11100010), DISP, Flags(Bits_RelJMPDisp)})
INST(loopz, {B(11100001), DISP, Flags(Bits_RelJMPDisp)})
INST(loopnz, {B(11100000), DISP, Flags(Bits_RelJMPDisp)})
INST(jcxz, {B(11100011), DISP, Flags(Bits_RelJMPDisp)})
INST(int, {B(11001101), DATA})
INST(int3, {B(11001100)}) // TODO(casey): The manual does not suggest that this intrinsic has an "int3" mnemonic, but NASM thinks so
INST(into, {B(11001110)})
INST(iret, {B(11001111)})
INST(clc, {B(11111000)})
INST(cmc, {B(11110101)})
INST(stc, {B(11111001)})
INST(cld, {B(11111100)})
INST(std, {B(11111101)})
INST(cli, {B(11111010)})
INST(sti, {B(11111011)})
INST(hlt, {B(11110100)})
INST(wait, {B(10011011)})
INST(esc, {B(11011), XXX, MOD, YYY, RM})
INST(lock, {B(11110000)})
INST(segment, {B(001), SR, B(110)})
#undef INST
#undef INSTALT
#undef B
#undef D
#undef S
#undef W
#undef V
#undef Z
#undef XXX
#undef YYY
#undef RM
#undef MOD
#undef REG
#undef SR
#undef ImpW
#undef ImpREG
#undef ImpMOD
#undef ImpRM
#undef ImpD
#undef ImpS
#undef DISP
#undef ADDR
#undef DATA
#undef DATA_IF_W
#undef Flags

View File

@@ -0,0 +1,21 @@
/* ========================================================================
(C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved.
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
arising from the use of this software.
Please see https://computerenhance.com for more information
======================================================================== */
#include "sim86.h"
#include "sim86_instruction.h"
#include "sim86_instruction_table.h"
extern "C" u32 Sim86_GetVersion(void);
extern "C" void Sim86_Decode8086Instruction(u32 SourceSize, u8 *Source, instruction *Dest);
extern "C" char const *Sim86_RegisterNameFromOperand(register_access *RegAccess);
extern "C" char const *Sim86_MnemonicFromOperationType(operation_type Type);
extern "C" void Sim86_Get8086InstructionTable(instruction_table *Dest);

BIN
8086_sim/lib/libsim86.so Normal file

Binary file not shown.

View File

@@ -0,0 +1,27 @@
; ========================================================================
;
; (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved.
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Please see https://computerenhance.com for further information
;
; ========================================================================
; ========================================================================
; LISTING 43
; ========================================================================
bits 16
mov ax, 1
mov bx, 2
mov cx, 3
mov dx, 4
mov sp, 5
mov bp, 6
mov si, 7
mov di, 8

View File

@@ -0,0 +1,32 @@
; ========================================================================
;
; (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved.
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Please see https://computerenhance.com for further information
;
; ========================================================================
; ========================================================================
; LISTING 44
; ========================================================================
bits 16
mov ax, 1
mov bx, 2
mov cx, 3
mov dx, 4
mov sp, ax
mov bp, bx
mov si, cx
mov di, dx
mov dx, sp
mov cx, bp
mov bx, si
mov ax, di

View File

@@ -0,0 +1,43 @@
; ========================================================================
;
; (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved.
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Please see https://computerenhance.com for further information
;
; ========================================================================
; ========================================================================
; LISTING 45
; ========================================================================
bits 16
mov ax, 0x2222
mov bx, 0x4444
mov cx, 0x6666
mov dx, 0x8888
mov ss, ax
mov ds, bx
mov es, cx
mov al, 0x11
mov bh, 0x33
mov cl, 0x55
mov dh, 0x77
mov ah, bl
mov cl, dh
mov ss, ax
mov ds, bx
mov es, cx
mov sp, ss
mov bp, ds
mov si, es
mov di, dx

View File

@@ -0,0 +1,28 @@
; ========================================================================
;
; (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved.
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Please see https://computerenhance.com for further information
;
; ========================================================================
; ========================================================================
; LISTING 46
; ========================================================================
bits 16
mov bx, -4093
mov cx, 3841
sub bx, cx
mov sp, 998
mov bp, 999
cmp bp, sp
add bp, 1027
sub bp, 2026

View File

@@ -0,0 +1,36 @@
; ========================================================================
;
; (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved.
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Please see https://computerenhance.com for further information
;
; ========================================================================
; ========================================================================
; LISTING 47
; ========================================================================
bits 16
add bx, 30000
add bx, 10000
sub bx, 5000
sub bx, 5000
mov bx, 1
mov cx, 100
add bx, cx
mov dx, 10
sub cx, dx
add bx, 40000
add cx, -90
mov sp, 99
mov bp, 98
cmp bp, sp

View File

@@ -0,0 +1,23 @@
; ========================================================================
;
; (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved.
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Please see https://computerenhance.com for further information
;
; ========================================================================
; ========================================================================
; LISTING 48
; ========================================================================
bits 16
mov cx, 200
mov bx, cx
add cx, 1000
mov bx, 2000
sub cx, bx

View File

@@ -0,0 +1,24 @@
; ========================================================================
;
; (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved.
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Please see https://computerenhance.com for further information
;
; ========================================================================
; ========================================================================
; LISTING 49
; ========================================================================
bits 16
mov cx, 3
mov bx, 1000
loop_start:
add bx, 10
sub cx, 1
jnz loop_start

View File

@@ -0,0 +1,38 @@
; ========================================================================
;
; (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved.
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Please see https://computerenhance.com for further information
;
; ========================================================================
; ========================================================================
; LISTING 50
; ========================================================================
bits 16
mov ax, 10
mov bx, 10
mov cx, 10
label_0:
cmp bx, cx
je label_1
add ax, 1
jp label_2
label_1:
sub bx, 5
jb label_3
label_2:
sub cx, 2
label_3:
loopnz label_0

View File

@@ -0,0 +1,30 @@
; ========================================================================
;
; (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved.
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Please see https://computerenhance.com for further information
;
; ========================================================================
; ========================================================================
; LISTING 51
; ========================================================================
bits 16
mov word [1000], 1
mov word [1002], 2
mov word [1004], 3
mov word [1006], 4
mov bx, 1000
mov word [bx + 4], 10
mov bx, word [1000]
mov cx, word [1002]
mov dx, word [1004]
mov bp, word [1006]

View File

@@ -0,0 +1,36 @@
; ========================================================================
;
; (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved.
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Please see https://computerenhance.com for further information
;
; ========================================================================
; ========================================================================
; LISTING 52
; ========================================================================
bits 16
mov dx, 6
mov bp, 1000
mov si, 0
init_loop_start:
mov word [bp + si], si
add si, 2
cmp si, dx
jnz init_loop_start
mov bx, 0
mov si, 0
add_loop_start:
mov cx, word [bp + si]
add bx, cx
add si, 2
cmp si, dx
jnz add_loop_start

View File

@@ -0,0 +1,35 @@
; ========================================================================
;
; (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved.
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Please see https://computerenhance.com for further information
;
; ========================================================================
; ========================================================================
; LISTING 53
; ========================================================================
bits 16
mov dx, 6
mov bp, 1000
mov si, 0
init_loop_start:
mov word [bp + si], si
add si, 2
cmp si, dx
jnz init_loop_start
mov bx, 0
mov si, dx
sub bp, 2
add_loop_start:
add bx, word [bp + si]
sub si, 2
jnz add_loop_start

View File

@@ -0,0 +1,43 @@
; ========================================================================
;
; (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved.
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Please see https://computerenhance.com for further information
;
; ========================================================================
; ========================================================================
; LISTING 54
; ========================================================================
bits 16
; Start image after one row, to avoid overwriting our code!
mov bp, 64*4
mov dx, 0
y_loop_start:
mov cx, 0
x_loop_start:
; Fill pixel
mov word [bp + 0], cx ; Red
mov word [bp + 2], dx ; Blue
mov byte [bp + 3], 255 ; Alpha
; Advance pixel location
add bp, 4
; Advance X coordinate and loop
add cx, 1
cmp cx, 64
jnz x_loop_start
; Advance Y coordinate and loop
add dx, 1
cmp dx, 64
jnz y_loop_start

54
8086_sim/reg_access.cc Normal file
View File

@@ -0,0 +1,54 @@
#include "include/reg_access.h"
#include "include/aliases.h"
#include <stdio.h>
static u16 registers[REGISTER_COUNT] = {0};
#define BIT_PER_BYTE 8
void set_register(register_access reg, u16 new_value) {
if (reg.Count == 2) {
registers[reg.Index] = new_value;
return;
}
u16 shift = (u16)(reg.Offset) * BIT_PER_BYTE;
u16 mask = 0xff00;
registers[reg.Index] &= (mask >> shift);
registers[reg.Index] |= (new_value << shift);
}
u16 get_register(register_access reg) {
if (reg.Count == 2) {
return registers[reg.Index];
}
u16 shift = (u16)(reg.Offset) * BIT_PER_BYTE;
u16 mask = 0x00ff;
return (registers[reg.Index] >> shift) & mask;
}
const char *get_register_name(register_access reg) {
// clang-format off
static const char *register_names[REGISTER_COUNT][3] = {
{"NONE", "NONE", "NONE"},
{"al", "ah", "ax"},
{"bl", "bh", "bx"},
{"cl", "ch", "cx"},
{"dl", "dh", "dx"},
{"sp", "sp", "sp"},
{"bp", "bp", "bp"},
{"si", "si", "si"},
{"di", "di", "di"},
{"es", "es", "es"},
{"cs", "cs", "cs"},
{"ss", "ss", "ss"},
{"ds", "ds", "ds"},
};
// clang-format on
return register_names[reg.Index]
[(reg.Count == 2) ? 2 : (reg.Offset & reg.Count)];
}

357
8086_sim/sim86.cc Normal file
View File

@@ -0,0 +1,357 @@
#include "include/aliases.h"
#include "include/flag_access.h"
#include "include/reg_access.h"
#include "include/sim86_instruction.h"
#include "include/sim86_lib.h"
#include <bits/types/FILE.h>
#include <stdio.h>
#include <string.h>
#define MEM_SIZE (1 << 16)
#define BITS_PER_BYTE 8
struct basic_string {
char str[4096];
};
struct membuf {
u8 buffer[MEM_SIZE];
u64 mem_start;
};
struct mem_access_result {
u16 value;
u32 error;
};
u16 get_operand_value(instruction_operand operand, bool wide);
basic_string get_operand_string(instruction_operand operand, bool wide);
void print_instruction(instruction inst);
void mov_to_register(const register_access &reg,
const instruction_operand &source, bool wide);
void mov_to_memory(const effective_address_expression &addrexp,
const instruction_operand &source, bool wide);
mem_access_result get_mem_value(const effective_address_expression &addrexp,
bool wide);
mem_access_result set_mem_value(const effective_address_expression &addrexp,
u16 value, bool wide);
u16 get_mem_index(const effective_address_expression &addrexp);
static membuf memory;
int main(int argc, char *argv[]) {
if (argc < 2) {
printf("Usage: sim86 BINARY_FILE\n");
return 1;
}
memset((void *)memory.buffer, 0, MEM_SIZE);
memory.mem_start = 0;
const char *filename = argv[1];
printf("Filename: %s\n", filename);
FILE *fp = fopen(filename, "rb");
if (!fp) {
printf("Failed to open file %s\n", filename);
}
fseek(fp, 0, SEEK_END);
u32 size = ftell(fp);
fseek(fp, 0, SEEK_SET);
fread((void *)memory.buffer, sizeof(u8), size, fp);
memory.mem_start = size + 1;
fclose(fp);
instruction_table table;
Sim86_Get8086InstructionTable(&table);
u32 offset = 0;
bool accessed_registers[REGISTER_COUNT] = {false};
printf("\nDisassembly:\n");
while (offset < size) {
instruction decoded;
Sim86_Decode8086Instruction(size - offset, memory.buffer + offset,
&decoded);
if (decoded.Op) {
offset += decoded.Size;
bool wide = (decoded.Flags & Inst_Wide) == Inst_Wide;
print_instruction(decoded);
instruction_operand dest = decoded.Operands[0];
instruction_operand source = decoded.Operands[1];
switch (decoded.Op) {
case Op_mov: {
if (dest.Type == Operand_Register) {
mov_to_register(dest.Register, source, wide);
accessed_registers[dest.Register.Index] = true;
} else if (dest.Type == Operand_Memory) {
mov_to_memory(dest.Address, source, wide);
}
break;
}
case Op_add: {
if (dest.Type == Operand_Register) {
u16 value = get_register(dest.Register);
value += get_operand_value(source, wide);
set_flags(value);
set_register(dest.Register, value);
}
break;
}
case Op_sub:
case Op_cmp: {
if (dest.Type == Operand_Register) {
u16 value = get_register(dest.Register);
value -= get_operand_value(source, wide);
set_flags(value);
if (decoded.Op == Op_sub) {
set_register(dest.Register, value);
}
}
break;
}
case Op_jne: {
if (!get_flag(FLAG_ZERO)) {
i16 inst_offset = get_operand_value(dest, wide);
offset += inst_offset;
}
}
default:
break;
}
}
}
printf("\nFinal registers:\n");
for (u32 i = 0; i < REGISTER_COUNT; ++i) {
if (accessed_registers[i]) {
register_access reg = {i, 0, 2};
u16 value = get_register(reg);
printf("\t%s: 0x%04x (%d)\n", get_register_name(reg), value, value);
}
}
// Print the instruction pointer register
printf("\tip: 0x%04x (%d)\n", offset, offset);
printf("\nFinal flags:\n");
print_flags();
#if 0 // Only needed (and working) for listing 0054
#define SIZE 64
#define BYTES SIZE * 4 * SIZE
u8 image[BYTES];
mempcpy(image, &(memory.buffer[memory.mem_start + (SIZE * 4)]), BYTES);
FILE *out = fopen("image.data", "wb");
fwrite(image, sizeof(u8), BYTES, out);
fclose(out);
#endif
return 0;
}
u16 get_operand_value(instruction_operand operand, bool wide) {
u16 output = 0;
switch (operand.Type) {
case Operand_Register:
output = get_register(operand.Register);
break;
case Operand_Memory: {
mem_access_result result = get_mem_value(operand.Address, wide);
if (result.error) {
break;
}
output = result.value;
break;
}
case Operand_Immediate:
output = operand.Immediate.Value;
break;
default:
break;
}
return output;
}
basic_string get_operand_string(instruction_operand operand, bool wide) {
basic_string output = {""};
switch (operand.Type) {
case Operand_Register:
sprintf(output.str, "%s", get_register_name(operand.Register));
break;
case Operand_Memory: {
char mem_string[1024] = {0};
register_access reg1 = operand.Address.Terms[0].Register;
if (reg1.Index != 0) {
sprintf(mem_string, "%s + ", get_register_name(reg1));
}
register_access reg2 = operand.Address.Terms[1].Register;
if (reg2.Index != 0) {
strcat(mem_string, get_register_name(reg2));
} else {
u32 length = strlen(mem_string);
sprintf(&(mem_string[length]), "%d", operand.Address.Displacement);
}
sprintf(output.str, "%s [%s]", wide ? "word" : "byte", mem_string);
break;
}
case Operand_Immediate:
sprintf(output.str, "%d", operand.Immediate.Value);
break;
default:
break;
}
return output;
}
void print_instruction(instruction inst) {
bool wide = (inst.Flags & Inst_Wide) == Inst_Wide;
printf("\t%s %s, %s\n", Sim86_MnemonicFromOperationType(inst.Op),
get_operand_string(inst.Operands[0], wide).str,
get_operand_string(inst.Operands[1], wide).str);
}
void mov_to_register(const register_access &reg,
const instruction_operand &source, bool wide) {
switch (source.Type) {
case Operand_Immediate:
set_register(reg, source.Immediate.Value);
break;
case Operand_Register:
set_register(reg, get_register(source.Register));
break;
case Operand_Memory: {
mem_access_result result = get_mem_value(source.Address, wide);
if (!result.error) {
set_register(reg, result.value);
}
break;
}
default:
break;
}
}
void mov_to_memory(const effective_address_expression &addrexp,
const instruction_operand &source, bool wide) {
switch (source.Type) {
case Operand_Immediate:
set_mem_value(addrexp, source.Immediate.Value, wide);
break;
case Operand_Register:
set_mem_value(addrexp, get_register(source.Register), wide);
break;
case Operand_Memory: {
mem_access_result result = get_mem_value(source.Address, wide);
if (!result.error) {
set_mem_value(addrexp, result.value, wide);
}
break;
}
default:
break;
}
}
mem_access_result get_mem_value(const effective_address_expression &addrexp,
bool wide) {
u16 index = get_mem_index(addrexp);
mem_access_result result = {0, 0};
if (memory.mem_start + index >= MEM_SIZE) {
result.error = 1;
} else {
result.value |= memory.buffer[memory.mem_start + index];
if (wide) {
result.value |= (memory.buffer[memory.mem_start + index + 1]
<< (wide ? BITS_PER_BYTE : 0));
}
}
return result;
}
mem_access_result set_mem_value(const effective_address_expression &addrexp,
u16 value, bool wide) {
u16 index = get_mem_index(addrexp);
mem_access_result result = {0, 0};
if (memory.mem_start + index >= MEM_SIZE) {
result.error = 1;
} else {
memory.buffer[memory.mem_start + index] = (u8)value;
if (wide) {
memory.buffer[memory.mem_start + index + 1] =
(u8)(value >> (wide ? BITS_PER_BYTE : 0));
}
result.value = value;
}
return result;
}
u16 get_mem_index(const effective_address_expression &addrexp) {
u16 index = addrexp.Displacement;
const u16 term_count = 2;
for (u16 i = 0; i < term_count; ++i) {
if (addrexp.Terms[i].Register.Index != 0) {
index += get_register(addrexp.Terms[i].Register);
}
}
return index;
}

5
haversine_01/.gitignore vendored Normal file
View File

@@ -0,0 +1,5 @@
python/__pycache__/**/*
cpp/haverscan
cpp/haverstrtok
cpp/test*
data_10000000_flex.json

View File

@@ -1,17 +1,17 @@
mk_haversine_fscanf:
clang++ -g cpp/fscanf.cpp cpp/haversine.cpp -o cpp/haverscan
clang++ -g cpp/fscanf.cc cpp/haversine.cc -o cpp/haverscan
run_haversine_fscanf:
cd ./cpp && ./haverscan
mk_haversine_strtok:
clang++ -g cpp/strtok.cpp cpp/haversine.cpp -o cpp/haverstrtok
clang++ -g cpp/strtok.cc cpp/haversine.cc -o cpp/haverstrtok
run_haversine_strtok:
cd ./cpp && ./haverstrtok
mk_test:
clang++ -g -lpthread cpp/test.cpp cpp/haversine.cpp -o cpp/test
clang++ -g -lpthread cpp/test.cc cpp/haversine.cc -o cpp/test
run_test:
cd ./cpp && ./test

13
haversine_02/.gitignore vendored Normal file
View File

@@ -0,0 +1,13 @@
.cache
.vscode
.idea
compile_commands.json
count_and_distances
pairs.json
cache_test
main
genhavr
prochavr
reptest
memtest
timer_test*

3
haversine_02/build Executable file
View File

@@ -0,0 +1,3 @@
#!/bin/bash
bear -- ./compile $@

135
haversine_02/compile Executable file
View File

@@ -0,0 +1,135 @@
#!/bin/bash
CC=clang
CXX=clang++
CFLAGS="-Wall -Wextra -I$(realpath ./include) "
ASM=nasm
ASM_FLAGS="-f elf64 "
AR=ar
AR_FLAGS="rcs"
# PARSE ARGUMENTS
# From this StackOverflow answer https://stackoverflow.com/a/14203146
while [[ $# > 0 ]];do
case $1 in
--release)
RELEASE=true
shift
;;
--basic-profiling)
BASIC_PROFILING=true
shift
;;
--full-profiling)
FULL_PROFILING=true
shift
;;
*|-*|--*)
echo "Unknown option $1"
exit 1
;;
esac
done
# BUILD TYPE
if [[ $RELEASE == true ]]; then
CFLAGS+="-g -O1"
else
CFLAGS+="-g"
fi
# GENERATOR
GENSRC="./src/generator/gen_argparser.cc \
./src/generator/generator.cc \
./src/haversine.cc \
./src/point_types.cc \
./src/generator/main.cc"
GENOUT=genhavr
(set -x ; $CXX $CFLAGS $GENSRC -o $GENOUT)
echo
# PROFILER
PROFSRC="../src/profiler/timer.c"
PROFFLAGS="-c "
PROF_BUILD_DIR=prof_build
# PROCESSOR
JSONSRC="../src/json/*.c "
JSONFLAGS="-c "
JSON_BUILD_DIR=json_build
PROCSRC="./$JSON_BUILD_DIR/*.o \
./src/haversine.cc \
./src/point_types.cc \
./src/processor/proc_argparser.cc \
./src/processor/main.cc "
PROCOUT=prochavr
# MEMTESTER
MEMTESTSRC="./src/memtester/*.c"
MEMTESTOUT=memtest
(set -x ; $CC $CFLAGS $MEMTESTSRC -o $MEMTESTOUT)
echo
# REPTEST ASSEMBLY
ASM_BUILD_DIR=reptest_build
ASM_SRC="./src/repetition_testing/reptest_functions.asm"
ASM_OBJ="./$ASM_BUILD_DIR/funcs.o"
ASM_LIB="./$ASM_BUILD_DIR/libfuncs.a"
mkdir $ASM_BUILD_DIR
(set -x ; $ASM $ASM_FLAGS $ASM_SRC -o $ASM_OBJ)
(set -x ; $AR $AR_FLAGS $ASM_LIB $ASM_OBJ)
echo
if [[ $BASIC_PROFILING == true ]] || [[ $FULL_PROFILING == true ]]; then
if [[ $FULL_PROFILING == true ]]; then
JSONFLAGS+="-DFULL_PROFILING"
PROCFLAGS="-DFULL_PROFILING"
REPTESTFLAGS="-DFULL_PROFILING"
PROFFLAGS+="-DFULL_PROFILING"
elif [[ $BASIC_PROFILING == true ]]; then
JSONFLAGS+="-DBASIC_PROFILING"
PROCFLAGS="-DBASIC_PROFILING"
REPTESTFLAGS="-DBASIC_PROFILING"
PROFFLAGS+="-DBASIC_PROFILING"
fi
PROCSRC+=./$PROF_BUILD_DIR/*.o
mkdir $PROF_BUILD_DIR
cd $PROF_BUILD_DIR
(set -x ; $CC $CFLAGS $PROFFLAGS $PROFSRC)
echo
cd ../
# REPETITION TESTING
REPTESTSRC="./src/repetition_testing/*.cc ./$PROF_BUILD_DIR/*.o $ASM_LIB"
REPTESTOUT=reptest
(set -x ; $CXX $CFLAGS $REPTESTFLAGS $REPTESTSRC -o $REPTESTOUT)
echo
fi
mkdir $JSON_BUILD_DIR
cd $JSON_BUILD_DIR
(set -x ; $CC $CFLAGS $JSONFLAGS $JSONSRC)
echo
cd ../
(set -x ; $CXX $CFLAGS $PROCFLAGS $PROCSRC -o $PROCOUT)
echo
# CLEAR BUILD FILES
rm -rvf $JSON_BUILD_DIR $PROF_BUILD_DIR $ASM_BUILD_DIR

View File

@@ -0,0 +1,26 @@
#ifndef ALIASES_H
#define ALIASES_H
#include <stdint.h>
#define u8 uint8_t
#define u16 uint16_t
#define u32 uint32_t
#define u64 uint64_t
#define i8 int8_t
#define i16 int16_t
#define i32 int32_t
#define i64 int64_t
#define f32 float
#define f64 double
#define INTERNAL static
#define PERSISTENT static
#ifdef __cplusplus
#define CLASS_MEMBER static
#endif // __cplusplus
#endif // !ALIASES_H

View File

@@ -0,0 +1,15 @@
#ifndef GEN_ARGPARSER_H
#define GEN_ARGPARSER_H
#include "aliases.h"
#include <argp.h>
struct GeneratorArgs {
u32 seed;
bool clustered;
u64 count;
};
GeneratorArgs parse_args(i32 argc, char *argv[]);
#endif // !GEN_ARGPARSER_H

View File

@@ -0,0 +1,8 @@
#ifndef GENERATOR_H
#define GENERATOR_H
#include "point_types.h"
void fill_pairs_array(PairArray *pairs, bool clustered);
#endif // !GENERATOR_H

View File

@@ -0,0 +1,11 @@
#ifndef HAVERSINE_H
#define HAVERSINE_H
#include "aliases.h"
#include "point_types.h"
#define EARTH_RADIUS_KM 6371.0
f64 haversine_of_degrees(const PointPair &pair, f64 radius);
#endif // !HAVERSINE_H

View File

@@ -0,0 +1,30 @@
#ifndef DSTRING_H
#define DSTRING_H
#include "aliases.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef struct dstring dstr_t;
dstr_t *dstr_with_capacity(u64 capacity);
dstr_t *dstr_from_string(const char *str);
void dstr_update(dstr_t **dst, const char *src);
void dstr_free(dstr_t **str);
void dstr_concat(dstr_t **dst, const char *src);
void dstr_append(dstr_t **dst, char c);
void dstr_resize(dstr_t **str);
void dstr_clear(dstr_t *str);
void dstr_print(const dstr_t *str);
i64 dstr_find(const dstr_t *str, const char *substr);
u64 dstr_length(const dstr_t *str);
u64 dstr_capacity(const dstr_t *str);
const char *dstr_to_cstr(const dstr_t *str);
#ifdef __cplusplus
}
#endif
#endif // !DSTRING_H

View File

@@ -0,0 +1,82 @@
#ifndef JSON_ENTITIES_H
#define JSON_ENTITIES_H
#include "aliases.h"
#include "dstring.h"
#include <stdbool.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef struct json_entity jentity_t;
typedef struct json_collection jcoll_t;
typedef struct json_value jval_t;
typedef struct json_pair jpair_t;
typedef enum {
JVAL_EMPTY,
JVAL_COLLECTION,
JVAL_STRING,
JVAL_INTEGER,
JVAL_DOUBLE,
JVAL_BOOLEAN,
JVAL_NULL,
} jval_type;
struct json_value {
jval_type type;
union {
void *null_val;
jcoll_t *collection;
dstr_t *string;
i64 num_int;
f64 num_dbl;
bool boolean;
};
};
struct json_pair {
dstr_t *key;
jval_t value;
};
typedef enum {
JENTITY_SINGLE,
JENTITY_PAIR,
} jentity_type;
struct json_entity {
jentity_type type;
union {
jval_t value;
jpair_t pair;
};
jentity_t *parent;
jentity_t *next;
};
typedef enum {
JCOLL_OBJECT,
JCOLL_ARRAY,
} jcoll_type;
struct json_collection {
u64 size;
jcoll_type type;
jentity_t *begin;
jentity_t *end;
};
void print_json(const jentity_t *entity, u32 indent);
void free_json(jentity_t **entity);
jcoll_t *get_collection_from_entity(const jentity_t *entity);
jentity_t *create_new_single_entity(const jval_t value, jentity_t *parent);
jentity_t *create_new_pair_entity(dstr_t *key, const jval_t value,
jentity_t *parent);
#ifdef __cplusplus
}
#endif
#endif // !JSON_ENTITIES_H

View File

@@ -0,0 +1,72 @@
#ifndef LEXER_STATES_H
#define LEXER_STATES_H
#include "aliases.h"
#include <stdbool.h>
#define VALID_JSON true
#define INVALID_JSON false
#ifdef __cplusplus
extern "C" {
#endif
typedef const char *str_view_t;
typedef enum {
TK_NO_TOKEN,
TK_L_BRACE,
TK_R_BRACE,
TK_L_BRACKET,
TK_R_BRACKET,
TK_NULL,
TK_BOOL,
TK_STR_KEY,
TK_STR_VAL,
TK_INTEGER,
TK_DOUBLE,
} token_type;
typedef union {
void *no_val;
i64 num_int;
f64 num_frac;
str_view_t string;
bool boolean;
} token_value_t;
typedef struct {
u64 line;
u64 column;
token_type type;
token_value_t value;
} token_t;
typedef enum {
LEX_ERR_NONE,
LEX_ERR_INVALID,
} lex_err_type;
typedef struct {
lex_err_type errno;
str_view_t msg;
} lex_err_t;
typedef struct {
lex_err_t error;
token_t token;
} lex_result_t;
typedef struct lexer_s lexer_t;
void lexer_init(lexer_t **lexer);
void lexer_free(lexer_t **lexer);
lex_result_t get_next_token(lexer_t *lexer, const char *text);
void print_token(token_t token);
#ifdef __cplusplus
}
#endif
#endif // !LEXER_STATES_H

View File

@@ -0,0 +1,19 @@
#ifndef PARSER_H
#define PARSER_H
#include "json_entities.h"
#include "lexer.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef struct parser_s parser_t;
jentity_t *load_json(const char *filepath);
#ifdef __cplusplus
}
#endif
#endif // !PARSER_H

View File

@@ -0,0 +1,27 @@
#ifndef POINT_H
#define POINT_H
#include "aliases.h"
#include <stdio.h>
struct Point {
f64 x;
f64 y;
};
struct PointPair {
Point p1;
Point p2;
};
struct PairArray {
u64 count;
PointPair *pairs;
};
void write_pairs_to_binary(const PairArray &arr, const char *filename);
void read_pairs_from_binary(PairArray &arr, const char *filename);
void write_pairs_to_json(const PairArray &arr, const char *filename);
bool compare_pair_array(const PairArray &arr1, const PairArray &arr2);
#endif // !POINT_H

View File

@@ -0,0 +1,21 @@
#ifndef PROFILER_IDS_H
#define PROFILER_IDS_H
enum profiler_ids {
PROFILER_ID_CLI_PARSE,
PROFILER_ID_JSON_PARSE,
PROFILER_ID_READ_JSON_FILE,
PROFILER_ID_PARSER_SETUP,
PROFILER_ID_PARSER_PARSE_TOKENS,
PROFILER_ID_PARSER_TEAR_DOWN,
PROFILER_ID_LOAD_JSON_PAIRS,
PROFILER_ID_READ_BINARY,
PROFILER_ID_HAVERSINE_SUM,
PROFILER_ID_HAVERSINE_AVG,
PROFILER_ID_TEAR_DOWN,
PROFILER_ID_FREE_JSON,
COUNT_PROFILER_IDS,
};
#endif // !PROFILER_IDS_H

View File

@@ -0,0 +1,12 @@
#ifndef PROC_ARGPARSER_H
#define PROC_ARGPARSER_H
#include "aliases.h"
struct ProcessorArgs {
const char *filepath;
};
ProcessorArgs parse_args(i32 argc, char *argv[]);
#endif // !PROC_ARGPARSER_H

View File

@@ -0,0 +1,78 @@
#ifndef TIMER_H
#define TIMER_H
#include "aliases.h"
#ifndef MAX_PROFILE_SAMPLES
#define MAX_PROFILE_SAMPLES 1024
#endif // !MAX_PROFILE_SAMPLES
#ifdef FULL_PROFILING
#define SAMPLE_START(ID, TITLE) sample_start(ID, TITLE)
#define SAMPLE_END(ID, BYTES) sample_end(ID, BYTES)
#define SAMPLE_END_DEFAULT(ID) sample_end(ID, 0)
#ifdef __cplusplus
extern "C" {
#endif
void sample_start(u64 id, const char *title);
void sample_end(u64 id, u64 byte_count);
#ifdef __cplusplus
}
#endif
#else
#define SAMPLE_START(ID, TITLE)
#define SAMPLE_END(ID, BYTES)
#define SAMPLE_END_DEFAULT(ID)
#endif // FULL_PROFILING
#if defined(BASIC_PROFILING) || defined(FULL_PROFILING)
#define PROFILE_START(COUNT) profile_start(COUNT)
#define PROFILE_END profile_end()
#ifdef __cplusplus
extern "C" {
#endif
typedef struct sample profiler_sample_t;
struct sample {
const char *title;
u64 first_start;
u64 start;
u64 exclusive_time;
u64 children_time;
u64 hit_count;
u64 byte_count;
profiler_sample_t *parent;
};
u64 get_os_frequency();
// Time in nanoseconds
u64 get_os_time(void);
// CPU timer using rdtsc
u64 read_cpu_timer(void);
// CPU frequency in hz/sec
u64 get_cpu_freq(u64 milliseconds);
f64 time_in_seconds(u64 cpu_time, u64 cpu_freq);
void profile_start(u64 count);
void profile_end();
#ifdef __cplusplus
}
#endif
#else
#define PROFILE_START(COUNT)
#define PROFILE_END
#endif // BASIC_PROFILING || FULL_PROFILING
#endif // !TIMER_H

View File

@@ -0,0 +1,71 @@
#ifndef REPTESTER_H
#define REPTESTER_H
#include "aliases.h"
struct reptest_params {
const char *filename;
char *buffer;
u64 read_size;
u64 read_count;
};
struct reptest_results {
u64 bytes_read;
u64 read_time;
u64 page_faults;
};
struct time_stats {
u64 min_time;
u64 max_time;
u64 avg_time;
u64 total_time;
};
struct mem_stats {
u64 min_faults;
u64 max_faults;
u64 avg_faults;
u64 total_bytes;
u64 total_faults;
};
struct reptester {
reptest_params params;
const u64 cpu_freq;
f64 wait_time_secs;
f64 test_time_secs;
u64 test_start_time;
u64 current_run;
time_stats tstats;
mem_stats mstats;
reptest_results results;
};
enum alloc_type {
ALLOC_TYPE_WITHOUT_MALLOC,
ALLOC_TYPE_WITH_MALLOC,
COUNT_ALLOC_TYPE,
};
typedef void (*reptest_func)(reptester *tester, alloc_type type);
struct func_data {
const char *names[COUNT_ALLOC_TYPE];
reptest_func func;
};
void handle_alloc(reptester *tester, alloc_type type);
void handle_free(reptester *tester, alloc_type type);
void run_func_test(reptester *tester, reptest_func func, const char *func_name,
alloc_type type);
u64 page_fault_count();
void print_results(reptester *tester, const char *name);
#endif // !REPTESTER_H

View File

@@ -0,0 +1,66 @@
#include "generator/gen_argparser.h"
#include "aliases.h"
#include <argp.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
INTERNAL error_t argp_parser(i32 key, char *arg, argp_state *state);
INTERNAL argp parser = {};
INTERNAL argp_option options[] = {
{.name = "seed", .key = 's', .arg = "SEED"},
{.name = "cluster", .key = 'c'},
{0, 0, 0, 0, 0, 0},
};
GeneratorArgs parse_args(i32 argc, char *argv[]) {
GeneratorArgs args = {};
parser.options = options;
parser.parser = argp_parser;
parser.args_doc = "COUNT";
argp_parse(&parser, argc, argv, 0, 0, &args);
if (args.seed == 0) {
args.seed = time(NULL);
}
return args;
}
error_t argp_parser(i32 key, char *arg, argp_state *state) {
GeneratorArgs *args = (GeneratorArgs *)state->input;
switch (key) {
case 's':
args->seed = strtoul(arg, NULL, 10);
break;
case 'c':
args->clustered = true;
break;
case ARGP_KEY_ARG:
if (state->arg_num >= 1) {
argp_usage(state);
}
args->count = strtoull(arg, NULL, 10);
break;
case ARGP_KEY_END:
if (state->arg_num < 1) {
argp_usage(state);
}
break;
default:
return ARGP_ERR_UNKNOWN;
}
return 0;
}

View File

@@ -0,0 +1,115 @@
#include "generator/generator.h"
#include "aliases.h"
#include "point_types.h"
#include <math.h>
#include <stdlib.h>
#define X_MIN -180.0
#define X_MAX 180.0
#define Y_MIN -90.0
#define Y_MAX 90.0
f64 generate_random_double(f64 min, f64 max);
Point generate_random_point(f64 x_min, f64 x_max, f64 y_min, f64 y_max);
PointPair generate_random_pair(f64 x_min, f64 x_max, f64 y_min, f64 y_max);
void fill_pairs_array(PairArray *pairs, bool clustered) {
if (clustered) {
u64 digit_count = (u64)log10(pairs->count) + 1;
u64 cluster_count = 0;
if (digit_count > 2) {
cluster_count = digit_count * 8;
} else {
cluster_count = digit_count;
}
Point clusters[cluster_count];
f64 radii[cluster_count];
u64 pairs_per_cluster = pairs->count / cluster_count;
u64 generated_pairs = 0;
u64 pairs_to_generate = 0;
for (u64 i = 0; i < cluster_count; ++i) {
clusters[i] = {
generate_random_double(X_MIN, X_MAX),
generate_random_double(Y_MIN, Y_MAX),
};
radii[i] = generate_random_double(0.0, (digit_count - 1) * 40.0);
f64 cluster_x_min = clusters[i].x - radii[i];
if (cluster_x_min < X_MIN) {
cluster_x_min = X_MIN;
}
f64 cluster_x_max = clusters[i].x + radii[i];
if (cluster_x_max > X_MAX) {
cluster_x_max = X_MAX;
}
f64 cluster_y_min = clusters[i].y - radii[i];
if (cluster_y_min < Y_MIN) {
cluster_y_min = Y_MIN;
}
f64 cluster_y_max = clusters[i].y + radii[i];
if (cluster_y_max > Y_MAX) {
cluster_y_max = Y_MAX;
}
pairs_to_generate = 0;
if (generated_pairs + pairs_per_cluster < pairs->count) {
pairs_to_generate = pairs_per_cluster;
} else {
pairs_to_generate = pairs->count - generated_pairs;
}
for (u64 i = 0; i < pairs_to_generate; ++i) {
// clang-format off
pairs->pairs[generated_pairs + i] = generate_random_pair(
cluster_x_min,
cluster_x_max,
cluster_y_min,
cluster_y_max
);
// clang-format on
}
generated_pairs += pairs_to_generate;
}
} else {
for (u64 i = 0; i < pairs->count; ++i) {
pairs->pairs[i] = generate_random_pair(X_MIN, X_MAX, Y_MIN, Y_MAX);
}
}
}
f64 generate_random_double(f64 min, f64 max) {
u32 num = rand();
f64 result = min + (((f64)num / (f64)RAND_MAX) * (max - min));
return result;
}
Point generate_random_point(f64 x_min, f64 x_max, f64 y_min, f64 y_max) {
Point p = {
generate_random_double(x_min, x_max),
generate_random_double(y_min, y_max),
};
return p;
}
PointPair generate_random_pair(f64 x_min, f64 x_max, f64 y_min, f64 y_max) {
PointPair pair = {
generate_random_point(x_min, x_max, y_min, y_max),
generate_random_point(x_min, x_max, y_min, y_max),
};
return pair;
}

View File

@@ -0,0 +1,42 @@
#include "aliases.h"
#include "generator/gen_argparser.h"
#include "generator/generator.h"
#include "haversine.h"
#include "point_types.h"
#include <stdio.h>
#include <stdlib.h>
i32 main(i32 argc, char *argv[]) {
GeneratorArgs args = parse_args(argc, argv);
srand(args.seed);
PairArray arr = {args.count, NULL};
arr.pairs = (PointPair *)malloc(arr.count * sizeof(PointPair));
fill_pairs_array(&arr, args.clustered);
write_pairs_to_json(arr, "pairs.json");
FILE *fp = fopen("count_and_distances", "w");
if (fp) {
fwrite(&(arr.count), sizeof(arr.count), 1, fp);
f64 sum = 0.0;
for (u64 i = 0; i < arr.count; ++i) {
f64 distance = haversine_of_degrees(arr.pairs[i], EARTH_RADIUS_KM);
fwrite(&distance, sizeof(f64), 1, fp);
sum += distance;
}
printf("\nAVERAGE DISTANCE: %f\n", sum / arr.count);
fclose(fp);
}
free(arr.pairs);
return 0;
}

View File

@@ -0,0 +1,30 @@
#include "haversine.h"
#include "aliases.h"
#include "point_types.h"
#include <math.h>
#define PI 3.14159265358979323845
#define SQUARE(X) ((X) * (X))
f64 radians(f64 degrees);
f64 haversine_of_degrees(const PointPair &pair, f64 radius) {
f64 x0 = pair.p1.x;
f64 y0 = pair.p1.y;
f64 x1 = pair.p2.x;
f64 y1 = pair.p2.y;
f64 dy = radians(y1 - y0);
f64 dx = radians(x1 - x0);
y0 = radians(y0);
y1 = radians(y1);
f64 root_term =
SQUARE(sin(dy / 2.0)) + cos(y0) * cos(y1) * SQUARE(sin(dx / 2.0));
f64 result = 2.0 * radius * asin(sqrt(root_term));
return result;
}
f64 radians(f64 degrees) { return (degrees * PI) / 180.0; }

View File

@@ -0,0 +1,219 @@
#include "json/dstring.h"
#include "aliases.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
// Use this scalar to allocate extra memory in order to avoid having to
// constantly reallocate
#define CAPACITY_SCALAR 8
struct dstring {
u64 capacity;
u64 size;
char buf[];
};
dstr_t *dstr_with_capacity(u64 capacity) {
dstr_t *out = (dstr_t *)malloc(sizeof(dstr_t) + capacity + 1);
if (!out) {
return NULL;
}
out->capacity = capacity;
out->size = 0;
memset(out->buf, 0, capacity + 1);
return out;
}
dstr_t *dstr_from_string(const char *str) {
if (!str) {
return NULL;
}
u64 length = strlen(str);
u64 capacity = length * CAPACITY_SCALAR;
dstr_t *out = dstr_with_capacity(capacity);
if (!out) {
return NULL;
}
out->size = length;
strncpy(out->buf, str, length);
return out;
}
void dstr_update(dstr_t **dst, const char *src) {
if (!dst || !(*dst)) {
return;
}
u64 length = strlen(src);
dstr_t *str = *dst;
if (length <= str->capacity) {
memset(str->buf, 0, str->capacity);
str->size = length;
strncpy(str->buf, src, length);
} else {
u64 capacity = length * CAPACITY_SCALAR;
dstr_t *tmp = (dstr_t *)realloc(*dst, sizeof(dstr_t) + capacity + 1);
if (!tmp) {
return;
}
tmp->capacity = capacity;
tmp->size = length;
strncpy(tmp->buf, src, length);
*dst = tmp;
}
}
void dstr_free(dstr_t **str) {
if (!str || !(*str)) {
return;
}
free(*str);
*str = NULL;
}
void dstr_concat(dstr_t **dst, const char *src) {
if (!dst || !(*dst)) {
return;
}
u64 src_length = strlen(src);
if (src_length == 0) {
return;
}
u64 new_length = (*dst)->size + src_length;
char str[new_length + 1];
memset(str, 0, new_length + 1);
strncpy(str, (*dst)->buf, (*dst)->size);
strncat(str, src, src_length);
dstr_update(dst, str);
}
void dstr_append(dstr_t **dst, char c) {
if (!dst || !(*dst)) {
return;
}
u64 new_length = (*dst)->size + 1;
char str[new_length + 1];
memset(str, 0, new_length + 1);
strncpy(str, (*dst)->buf, (*dst)->size);
str[(*dst)->size] = c;
dstr_update(dst, str);
}
void dstr_resize(dstr_t **str) {
if (!str || !(*str)) {
return;
}
u64 capacity = (*str)->size;
dstr_t *tmp = (dstr_t *)realloc(*str, sizeof(dstr_t) + capacity + 1);
if (!tmp) {
return;
}
tmp->capacity = capacity;
*str = tmp;
}
void dstr_clear(dstr_t *str) {
if (!str || str->size == 0) {
return;
}
memset(str->buf, 0, str->capacity);
str->size = 0;
}
void dstr_print(const dstr_t *str) {
if (!str) {
return;
}
printf("%s\n", str->buf);
}
i64 dstr_find(const dstr_t *str, const char *substr) {
if (!str || !substr) {
return -1;
}
u64 substr_length = strlen(substr);
if (substr_length == 0 || substr_length > str->size) {
return -1;
}
char buf[substr_length + 1];
memset(buf, 0, substr_length + 1);
for (u64 i = 0; i < str->size; ++i) {
if (i + substr_length >= str->size) {
break;
}
for (u64 j = 0; j < substr_length; ++j) {
buf[j] = str->buf[i + j];
}
if (strcmp(buf, substr) == 0) {
return i;
}
}
return -1;
}
u64 dstr_length(const dstr_t *str) {
if (!str) {
return 0;
}
return str->size;
}
u64 dstr_capacity(const dstr_t *str) {
if (!str) {
return 0;
}
return str->capacity;
}
const char *dstr_to_cstr(const dstr_t *str) {
if (!str) {
return "";
}
return str->buf;
}

View File

@@ -0,0 +1,236 @@
#include "json/json_entities.h"
#include "aliases.h"
#include "processor/ids.h"
#include "profiler/timer.h"
#include "json/dstring.h"
#include <stdio.h>
#include <stdlib.h>
void print_json(const jentity_t *entity, u32 indent) {
PERSISTENT i32 indentation = 0;
dstr_t *key = NULL;
const jval_t *value = NULL;
if (entity->type == JENTITY_SINGLE) {
value = &(entity->value);
} else {
key = entity->pair.key;
value = &(entity->pair.value);
}
if (key) {
printf("%*s\"%s\": ", indentation * indent, "", dstr_to_cstr(key));
}
switch (value->type) {
case JVAL_COLLECTION: {
const char *open = "";
const char *close = "";
if (value->collection->type == JCOLL_OBJECT) {
open = "{";
close = "}";
} else {
open = "[";
close = "]";
}
if (key) {
printf("%s\n", open);
} else {
printf("%*s%s\n", indentation * indent, "", open);
}
++indentation;
if (value->collection->begin) {
print_json(value->collection->begin, indent);
}
--indentation;
printf("\n%*s%s", indentation * indent, "", close);
break;
}
case JVAL_STRING:
if (key) {
printf("\"%s\"", dstr_to_cstr(value->string));
} else {
printf("%*s\"%s\"", indentation * indent, "",
dstr_to_cstr(value->string));
}
break;
case JVAL_INTEGER:
if (key) {
printf("%llu", (unsigned long long)value->num_int);
} else {
printf("%*s%llu", indentation * indent, "",
(unsigned long long)value->num_int);
}
break;
case JVAL_DOUBLE:
if (key) {
printf("%f", value->num_dbl);
} else {
printf("%*s%f", indentation * indent, "", value->num_dbl);
}
break;
case JVAL_BOOLEAN:
if (key) {
printf("%s", value->boolean ? "true" : "false");
} else {
printf("%*s%s", indentation * indent, "",
value->boolean ? "true" : "false");
}
break;
case JVAL_NULL:
if (key) {
printf("%s", "null");
} else {
printf("%*s%s", indentation * indent, "", "null");
}
break;
case JVAL_EMPTY:
break;
}
if (entity->next) {
printf(",\n");
print_json(entity->next, indent);
}
// Add newline after printing the entire json tree
if (indentation == 0 && entity->parent == NULL && entity->next == NULL) {
printf("\n");
}
}
void free_json(jentity_t **root) {
if (!(*root)) {
return;
}
SAMPLE_START(PROFILER_ID_FREE_JSON, "FREE JSON");
jentity_t *current = *root;
jentity_t *temp = NULL;
dstr_t *key = NULL;
jval_t *value = NULL;
while (current) {
if (current->parent) {
// Move the beginning pointer of the collection to the next child
// TODO (Abdelrahman): This part gets repeated for some elements. Try to
// avoid that repetition
jentity_t *parent = current->parent;
jcoll_t *collection = NULL;
if (parent->type == JENTITY_SINGLE) {
collection = parent->value.collection;
} else {
collection = parent->pair.value.collection;
}
if (collection) {
collection->begin = current->next;
}
}
if (current->type == JENTITY_SINGLE) {
key = NULL;
value = &(current->value);
} else {
key = current->pair.key;
value = &(current->pair.value);
}
if (key) {
dstr_free(&(current->pair.key));
}
if (!value) {
break;
}
if (value->type == JVAL_COLLECTION) {
if (!(value->collection->begin)) {
// Once all children of the collection has been freed, free the memory
// allocated to the collection and the entity that holds it
free(value->collection);
temp = current;
current = current->next != NULL ? current->next : current->parent;
free(temp);
temp = NULL;
continue;
}
current = value->collection->begin;
} else {
if (value->type == JVAL_STRING) {
dstr_free(&(value->string));
}
temp = current;
current = current->next != NULL ? current->next : current->parent;
free(temp);
temp = NULL;
}
}
*root = NULL;
SAMPLE_END_DEFAULT(PROFILER_ID_FREE_JSON);
}
jcoll_t *get_collection_from_entity(const jentity_t *entity) {
return entity->type == JENTITY_SINGLE ? entity->value.collection
: entity->pair.value.collection;
}
jentity_t *create_new_single_entity(const jval_t value, jentity_t *parent) {
jentity_t *entity = (jentity_t *)malloc(sizeof(jentity_t));
if (!entity) {
return NULL;
}
entity->type = JENTITY_SINGLE;
entity->value = value;
entity->parent = parent;
entity->next = NULL;
return entity;
}
jentity_t *create_new_pair_entity(dstr_t *key, const jval_t value,
jentity_t *parent) {
jentity_t *entity = (jentity_t *)malloc(sizeof(jentity_t));
if (!entity) {
return NULL;
}
entity->type = JENTITY_PAIR;
entity->pair.key = key;
entity->pair.value = value;
entity->parent = parent;
entity->next = NULL;
return entity;
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,275 @@
#include "json/parser.h"
#include "aliases.h"
#include "processor/ids.h"
#include "profiler/timer.h"
#include "json/dstring.h"
#include "json/json_entities.h"
#include "json/lexer.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
struct parser_s {
jentity_t *root;
jentity_t *current;
jval_t value;
};
INTERNAL void parser_free(parser_t **parser);
INTERNAL void parser_init(parser_t **parser);
INTERNAL void parse_token(parser_t *parser, token_t token);
INTERNAL void add_key(parser_t *parser, dstr_t *key);
INTERNAL jentity_t *add_value(parser_t *parser);
INTERNAL void add_collection(parser_t *parser);
jentity_t *load_json(const char *filepath) {
FILE *fp = fopen(filepath, "r");
if (!fp) {
return NULL;
}
fseek(fp, 0, SEEK_END);
u64 length = ftell(fp);
fseek(fp, 0, SEEK_SET);
char *json = (char *)malloc(sizeof(char) * (length + 1));
memset(json, 0, length + 1);
SAMPLE_START(PROFILER_ID_READ_JSON_FILE, "READ JSON FILE");
fread(json, sizeof(char), length, fp);
SAMPLE_END(PROFILER_ID_READ_JSON_FILE, length);
fclose(fp);
SAMPLE_START(PROFILER_ID_PARSER_SETUP, "JSON PARSER SETUP");
lexer_t *lexer = NULL;
parser_t *parser = NULL;
lexer_init(&lexer);
if (!lexer) {
return NULL;
}
parser_init(&parser);
if (!parser) {
lexer_free(&lexer);
return NULL;
}
SAMPLE_END_DEFAULT(PROFILER_ID_PARSER_SETUP);
SAMPLE_START(PROFILER_ID_PARSER_PARSE_TOKENS, "PARSE TOKENS");
lex_result_t result = get_next_token(lexer, json);
if (result.error.errno) {
printf("%s\n", result.error.msg);
} else {
while (result.token.type != TK_NO_TOKEN) {
parse_token(parser, result.token);
result = get_next_token(lexer, NULL);
if (result.error.errno) {
printf("%s\n", result.error.msg);
break;
}
}
}
SAMPLE_END_DEFAULT(PROFILER_ID_PARSER_PARSE_TOKENS);
jentity_t *root = parser->root;
SAMPLE_START(PROFILER_ID_PARSER_TEAR_DOWN, "PARSER TEAR DOWN");
parser_free(&parser);
lexer_free(&lexer);
free(json);
SAMPLE_END_DEFAULT(PROFILER_ID_PARSER_TEAR_DOWN);
return root;
}
void parser_init(parser_t **parser) {
if (*parser) {
parser_free(parser);
}
*parser = (parser_t *)malloc(sizeof(parser_t));
if (!(*parser)) {
return;
}
(*parser)->root = NULL;
(*parser)->current = NULL;
(*parser)->value = (jval_t){0};
}
void parser_free(parser_t **parser) {
if (!(*parser)) {
return;
}
(*parser)->root = NULL;
(*parser)->current = NULL;
free(*parser);
*parser = NULL;
}
void parse_token(parser_t *parser, token_t token) {
switch (token.type) {
case TK_L_BRACE:
case TK_L_BRACKET: {
parser->value = (jval_t){
.type = JVAL_COLLECTION,
.collection = (jcoll_t *)malloc(sizeof(jcoll_t)),
};
if (token.type == TK_L_BRACE) {
parser->value.collection->type = JCOLL_OBJECT;
} else {
parser->value.collection->type = JCOLL_ARRAY;
}
parser->value.collection->size = 0;
parser->value.collection->begin = NULL;
parser->value.collection->end = NULL;
add_collection(parser);
break;
}
case TK_R_BRACE:
case TK_R_BRACKET:
if (parser->current->parent) {
parser->current = parser->current->parent;
}
break;
case TK_STR_KEY: {
parser->value = (jval_t){.type = JVAL_EMPTY, .null_val = NULL};
add_key(parser, dstr_from_string(token.value.string));
break;
}
case TK_NULL: {
parser->value = (jval_t){.type = JVAL_NULL, .null_val = NULL};
add_value(parser);
break;
}
case TK_BOOL: {
parser->value =
(jval_t){.type = JVAL_BOOLEAN, .boolean = token.value.boolean};
add_value(parser);
break;
}
case TK_STR_VAL: {
parser->value = (jval_t){.type = JVAL_STRING,
.string = dstr_from_string(token.value.string)};
add_value(parser);
break;
}
case TK_INTEGER: {
parser->value =
(jval_t){.type = JVAL_INTEGER, .num_int = token.value.num_int};
add_value(parser);
break;
}
case TK_DOUBLE: {
parser->value =
(jval_t){.type = JVAL_DOUBLE, .num_dbl = token.value.num_frac};
add_value(parser);
break;
}
case TK_NO_TOKEN:
break;
}
}
void add_key(parser_t *parser, dstr_t *key) {
jcoll_t *collection = get_collection_from_entity(parser->current);
if (!collection) {
return;
}
if (!(collection->end)) {
collection->begin = collection->end =
create_new_pair_entity(key, parser->value, parser->current);
collection->size = 1;
} else {
jentity_t *new_entity =
create_new_pair_entity(key, parser->value, parser->current);
collection->end->next = new_entity;
collection->end = new_entity;
++(collection->size);
}
}
jentity_t *add_value(parser_t *parser) {
jcoll_t *collection = get_collection_from_entity(parser->current);
if (!collection) {
return NULL;
}
if (!(collection->end)) {
collection->begin = collection->end =
create_new_single_entity(parser->value, parser->current);
collection->size = 1;
} else {
if (collection->end->type == JENTITY_PAIR &&
collection->end->pair.value.type == JVAL_EMPTY) {
collection->end->pair.value = parser->value;
} else {
jentity_t *new_entity =
create_new_single_entity(parser->value, parser->current);
collection->end->next = new_entity;
collection->end = new_entity;
++(collection->size);
}
}
return collection->end;
}
void add_collection(parser_t *parser) {
if (!(parser->root)) {
parser->root = parser->current =
create_new_single_entity(parser->value, NULL);
return;
}
parser->current = add_value(parser);
}

View File

@@ -0,0 +1,125 @@
#include "aliases.h"
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#define PAGESIZE 4096
#if _WIN32
// clang-format off
#include <intrin.h>
#include <windows.h>
#include <psapi.h>
// clang-format on
typedef struct {
bool Initialized;
HANDLE ProcessHandle;
} os_metrics;
static os_metrics GlobalMetrics;
static u64 ReadWindowsPageFaultCount(void) {
PROCESS_MEMORY_COUNTERS_EX MemoryCounters = {0};
MemoryCounters.cb = sizeof(MemoryCounters);
GetProcessMemoryInfo(GlobalMetrics.ProcessHandle,
(PROCESS_MEMORY_COUNTERS *)&MemoryCounters,
sizeof(MemoryCounters));
u64 Result = MemoryCounters.PageFaultCount;
return Result;
}
static void InitializeOSMetrics(void) {
if (!GlobalMetrics.Initialized) {
GlobalMetrics.Initialized = true;
GlobalMetrics.ProcessHandle =
OpenProcess(PROCESS_QUERY_INFORMATION | PROCESS_VM_READ, FALSE,
GetCurrentProcessId());
}
}
#else // _WIN32
#include <sys/mman.h>
#include <sys/resource.h>
#include <sys/time.h>
typedef struct rusage rusage_t;
u64 nix_page_fault_count() {
rusage_t usage;
getrusage(RUSAGE_SELF, &usage);
return usage.ru_minflt + usage.ru_majflt;
}
#endif // _WIN32
u64 page_fault_count() {
#if _WIN32
return ReadWindowsPageFaultCount();
#else
return nix_page_fault_count();
#endif
}
int main(int argc, char *argv[]) {
if (argc < 2 || argc > 2) {
printf("Usage: %s [NUMBER OF PAGES TO ALLOCATE]\n", argv[0]);
return EXIT_FAILURE;
}
#if _WIN32
InitializeOSMetrics();
#endif
u64 page_count = atol(argv[1]);
u64 alloc_size = page_count * PAGESIZE;
u64 touch_size = 0;
printf("Page Count,Touch Count,Fault Count,Extra Faults\n");
for (u64 touch_count = 0; touch_count <= page_count; ++touch_count) {
touch_size = touch_count * PAGESIZE;
#if _WIN32
u8 *data = (u8 *)VirtualAlloc(0, alloc_size, MEM_RESERVE | MEM_COMMIT,
PAGE_READWRITE);
#else
u8 *data = (u8 *)mmap(NULL, alloc_size, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
#endif
if (!data) {
printf("Failed to allocate memory\n");
return EXIT_FAILURE;
}
u64 fault_start = page_fault_count();
for (u64 i = 0; i < touch_size; ++i) {
data[i] = (u8)i;
}
u64 fault_end = page_fault_count();
u64 faults = fault_end - fault_start;
printf("%llu,%llu,%llu,%lld\n", (unsigned long long)page_count,
(unsigned long long)touch_count, (unsigned long long)faults,
((long long)faults - touch_count));
#if _WIN32
VirtualFree(data, 0, MEM_RELEASE);
#else
munmap((void *)data, alloc_size);
#endif
}
return EXIT_SUCCESS;
}

View File

@@ -0,0 +1,77 @@
#include "point_types.h"
#include <stdio.h>
#include <stdlib.h>
bool compare_point(const Point &p1, const Point &p2);
bool compare_pair(const PointPair &pair1, const PointPair &pair2);
void write_pairs_to_binary(const PairArray &arr, const char *filename) {
FILE *fp = fopen(filename, "w");
if (fp) {
fwrite(&arr, sizeof(arr.count), 1, fp);
u64 total_size = sizeof(*(arr.pairs)) * arr.count;
fwrite(arr.pairs, total_size, 1, fp);
fclose(fp);
}
}
void read_pairs_from_binary(PairArray &arr, const char *filename) {
FILE *fp = fopen(filename, "r");
if (fp) {
fread(&(arr.count), sizeof(arr.count), 1, fp);
arr.pairs = (PointPair *)malloc(arr.count * sizeof(PointPair));
fread(arr.pairs, sizeof(PointPair), arr.count, fp);
fclose(fp);
}
}
void write_pairs_to_json(const PairArray &arr, const char *filename) {
FILE *fp = fopen(filename, "w");
if (fp) {
fprintf(fp, "{\n\t\"pairs\": [\n");
for (u64 i = 0; i < arr.count; ++i) {
PointPair pair = arr.pairs[i];
fprintf(fp,
"\t\t{\"x0\": %.16f, \"y0\": %.16f, \"x1\": %.16f, \"y1\": "
"%.16f}%s\n",
pair.p1.x, pair.p1.y, pair.p2.x, pair.p2.y,
i + 1 < arr.count ? "," : "");
}
fprintf(fp, "\t]\n}\n");
fclose(fp);
}
}
bool compare_pair_array(const PairArray &arr1, const PairArray &arr2) {
if (arr1.count != arr2.count) {
return false;
}
for (u64 i = 0; i < arr1.count; ++i) {
if (!compare_pair(arr1.pairs[i], arr2.pairs[i])) {
return false;
}
}
return true;
}
bool compare_point(const Point &p1, const Point &p2) {
return p1.x == p2.x && p1.y == p2.y;
}
bool compare_pair(const PointPair &pair1, const PointPair &pair2) {
return compare_point(pair1.p1, pair2.p1) && compare_point(pair1.p2, pair2.p2);
}

View File

@@ -0,0 +1,122 @@
#include "haversine.h"
#include "point_types.h"
#include "processor/ids.h"
#include "processor/proc_argparser.h"
#include "profiler/timer.h"
#include "json/dstring.h"
#include "json/json_entities.h"
#include "json/parser.h"
#include <aliases.h>
#include <assert.h>
#include <float.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(int argc, char *argv[]) {
PROFILE_START(COUNT_PROFILER_IDS);
SAMPLE_START(PROFILER_ID_CLI_PARSE, "CLI PARSING");
ProcessorArgs args = parse_args(argc, argv);
SAMPLE_END_DEFAULT(PROFILER_ID_CLI_PARSE);
SAMPLE_START(PROFILER_ID_JSON_PARSE, "JSON PARSING");
jentity_t *root = load_json(args.filepath);
assert(root->type == JENTITY_SINGLE && root->value.type == JVAL_COLLECTION);
SAMPLE_END_DEFAULT(PROFILER_ID_JSON_PARSE);
SAMPLE_START(PROFILER_ID_LOAD_JSON_PAIRS, "LOAD JSON PAIRS");
jentity_t *pairs = root->value.collection->begin;
assert(pairs->type == JENTITY_PAIR &&
pairs->pair.value.type == JVAL_COLLECTION);
u64 pair_count = pairs->pair.value.collection->size;
PointPair *point_pairs = (PointPair *)malloc(sizeof(PointPair) * pair_count);
memset(point_pairs, 0, pair_count);
u64 index = 0;
for (jentity_t *pair = pairs->pair.value.collection->begin; pair != NULL;
pair = pair->next) {
assert(index < pair_count && pair->type == JENTITY_SINGLE &&
pair->value.type == JVAL_COLLECTION &&
pair->value.collection->size == 4);
jentity_t *x0 = pair->value.collection->begin;
jentity_t *y0 = x0->next;
jentity_t *x1 = y0->next;
jentity_t *y1 = x1->next;
PointPair p = ((PointPair){
{x0->pair.value.num_dbl, y0->pair.value.num_dbl},
{x1->pair.value.num_dbl, y1->pair.value.num_dbl},
});
point_pairs[index++] = p;
}
SAMPLE_END_DEFAULT(PROFILER_ID_LOAD_JSON_PAIRS);
SAMPLE_START(PROFILER_ID_READ_BINARY, "BINARY READ");
const char *filename = "count_and_distances";
FILE *fp = fopen(filename, "r");
if (!fp) {
printf("Failed to open the %s file", filename);
} else {
// Skip the count
fseek(fp, sizeof(u64), SEEK_SET);
}
SAMPLE_END_DEFAULT(PROFILER_ID_READ_BINARY);
SAMPLE_START(PROFILER_ID_HAVERSINE_SUM, "HAVERSINE SUM");
f64 sum = 0.0;
f64 distance = 0.0;
f64 saved_distance = 0.0;
for (u64 i = 0; i < pair_count; ++i) {
distance = haversine_of_degrees(point_pairs[i], EARTH_RADIUS_KM);
if (fp) {
fread(&saved_distance, sizeof(f64), 1, fp);
if (fabs(distance - saved_distance) > FLT_EPSILON) {
printf("%llu: %.16f does not equal %.16f\n", (unsigned long long)i,
distance, saved_distance);
}
}
sum += distance;
}
SAMPLE_END(PROFILER_ID_HAVERSINE_SUM, sizeof(f64) * pair_count);
SAMPLE_START(PROFILER_ID_HAVERSINE_AVG, "HAVERSINE AVERAGE");
printf("\nAVERAGE DISTANCE: %f\n", sum / pair_count);
SAMPLE_END_DEFAULT(PROFILER_ID_HAVERSINE_AVG);
SAMPLE_START(PROFILER_ID_TEAR_DOWN, "TEAR DOWN");
if (fp) {
fclose(fp);
}
free(point_pairs);
free_json(&root);
SAMPLE_END_DEFAULT(PROFILER_ID_TEAR_DOWN);
PROFILE_END;
return 0;
}

View File

@@ -0,0 +1,44 @@
#include "processor/proc_argparser.h"
#include "aliases.h"
#include <argp.h>
INTERNAL error_t argp_parser(i32 key, char *arg, argp_state *state);
INTERNAL argp parser = {};
ProcessorArgs parse_args(i32 argc, char *argv[]) {
ProcessorArgs args = {};
parser.options = {};
parser.parser = argp_parser;
parser.args_doc = "JSON_FILEPATH";
argp_parse(&parser, argc, argv, 0, 0, &args);
return args;
}
error_t argp_parser(i32 key, char *arg, argp_state *state) {
ProcessorArgs *args = (ProcessorArgs *)state->input;
switch (key) {
case ARGP_KEY_ARG:
if (state->arg_num >= 1) {
argp_usage(state);
}
args->filepath = arg;
break;
case ARGP_KEY_END:
if (state->arg_num < 1) {
argp_usage(state);
}
break;
default:
return ARGP_ERR_UNKNOWN;
}
return 0;
}

View File

@@ -0,0 +1,233 @@
#include "profiler/timer.h"
#include "aliases.h"
#include <stdbool.h>
#include <stdio.h>
#include <string.h>
#include <time.h>
#include <x86intrin.h>
#if defined(BASIC_PROFILING) || defined(FULL_PROFILING)
typedef struct {
profiler_sample_t samples[MAX_PROFILE_SAMPLES];
u64 cpu_freq;
u64 size;
u64 start;
u64 end;
u64 max_title_length;
profiler_sample_t *active;
} profiler_t;
INTERNAL profiler_t profiler = {0};
typedef struct timespec timespec_t;
u64 get_os_frequency() { return 1000000000; }
u64 get_os_time(void) {
timespec_t ts = {0};
if (clock_gettime(CLOCK_MONOTONIC_RAW, &ts) != 0) {
return 0;
}
return ts.tv_sec * get_os_frequency() + ts.tv_nsec;
}
u64 read_cpu_timer(void) { return __rdtsc(); }
u64 get_cpu_freq(u64 milliseconds) {
u64 os_freq = get_os_frequency();
u64 os_end = 0;
u64 os_elapsed = 0;
u64 os_wait_time = os_freq * milliseconds / 1000;
u64 os_start = get_os_time();
u64 cpu_start = read_cpu_timer();
while (os_elapsed < os_wait_time) {
os_end = get_os_time();
os_elapsed = os_end - os_start;
}
u64 cpu_end = read_cpu_timer();
u64 cpu_elapsed = cpu_end - cpu_start;
u64 cpu_freq = 0;
if (os_elapsed) {
cpu_freq = cpu_elapsed * os_freq / os_elapsed;
}
return cpu_freq;
}
f64 time_in_seconds(u64 cpu_time, u64 cpu_freq) {
return (f64)cpu_time / cpu_freq;
}
void profile_start(u64 count) {
profiler.cpu_freq = get_cpu_freq(1000);
profiler.start = read_cpu_timer();
profiler.max_title_length = 0;
profiler.size = count;
}
void profile_end() {
if (!profiler.start || !profiler.size) {
return;
}
profiler.end = read_cpu_timer();
u64 total = 0;
if (profiler.end >= profiler.start) {
total = profiler.end - profiler.start;
}
u16 time_precision = 16;
u16 time_char_count = 20;
// clang-format off
printf("\n============================================================PROFILING============================================================\n");
// clang-format on
if (profiler.cpu_freq) {
printf("Total: %*.*f seconds, %zu (CPU frequency: %llu hz/sec)\n\n",
time_char_count, time_precision, (f64)total / profiler.cpu_freq,
total, (unsigned long long)profiler.cpu_freq);
}
#ifdef FULL_PROFILING
f64 byte_to_mb = 1.0 / (1024.0 * 1024.0);
f64 mb_to_gb = 1.0 / 1024.0;
u16 duration_char_count = 22;
u16 hits_char_count = 10;
u16 percentage_precision = 8;
u16 percentage_char_count = 12;
u16 throughput_precision = 24;
u16 throughput_char_count = 32;
profiler_sample_t *sample = NULL;
for (u64 i = 0; i < profiler.size; ++i) {
sample = &(profiler.samples[i]);
if (sample->hit_count == 0) {
continue;
}
printf("%*s (hits: %*lld): %*lld (%*.*f %%", (i32)profiler.max_title_length,
sample->title, hits_char_count,
(unsigned long long)sample->hit_count, duration_char_count,
(unsigned long long)sample->exclusive_time, percentage_char_count,
percentage_precision, (f64)(sample->exclusive_time) / total * 100.0);
if (sample->children_time > 0) {
printf(", w/ children: %*.*f %%", percentage_char_count,
percentage_precision,
(f64)(sample->exclusive_time + sample->children_time) / total *
100.0);
}
if (sample->byte_count > 0) {
f64 data_read = (f64)(sample->byte_count) * byte_to_mb;
f64 sample_time_in_seconds =
(f64)(sample->exclusive_time + sample->children_time) /
profiler.cpu_freq;
printf(", Data read: %*.*f MB, Throughput: %*.*f GB/s",
throughput_char_count, throughput_precision, data_read,
throughput_char_count, throughput_precision,
data_read * mb_to_gb / sample_time_in_seconds);
}
printf(")\n");
}
#endif // FULL_PROFILING
}
#endif // BASIC_PROFILING || FULL_PROFILING
#ifdef FULL_PROFILING
void sample_start(u64 id, const char *title) {
if (id >= MAX_PROFILE_SAMPLES) {
return;
}
profiler_sample_t *sample = &(profiler.samples[id]);
if (!(sample->title) || strcmp(title, sample->title) != 0) {
sample->title = title;
sample->first_start = 0;
sample->start = 0;
sample->exclusive_time = 0;
sample->children_time = 0;
sample->hit_count = 0;
sample->byte_count = 0;
sample->parent = NULL;
u64 length = strlen(sample->title);
if (length > profiler.max_title_length) {
profiler.max_title_length = length;
}
}
sample->start = read_cpu_timer();
if (sample->hit_count == 0) {
sample->first_start = sample->start;
}
++(sample->hit_count);
if (profiler.active) {
u64 duration = sample->start - (profiler.active->start);
profiler.active->exclusive_time += duration;
}
if (!(profiler.active) || sample != profiler.active) {
// This handles recursive functions by changing the parent only when a
// function isn't calling itself
sample->parent = profiler.active;
}
profiler.active = sample;
}
void sample_end(u64 id, u64 byte_count) {
if (id >= MAX_PROFILE_SAMPLES) {
return;
}
profiler_sample_t *sample = &(profiler.samples[id]);
u64 duration = read_cpu_timer() - sample->start;
sample->exclusive_time += duration;
sample->byte_count += byte_count;
u64 now = read_cpu_timer();
// Reset the start time at the end of the sample to handle recursion
sample->start = now;
profiler_sample_t *parent = sample->parent;
if (parent) {
// Add sample duration to all parents. This handles deep call stacks
while (parent) {
parent->children_time += duration;
parent = parent->parent;
}
sample->parent->start = now;
}
profiler.active = sample->parent;
}
#endif // FULL_PROFILING

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,529 @@
global mov_all_bytes_asm
global nop_all_bytes_asm
global nop_1x3_all_bytes_asm
global nop_1x9_all_bytes_asm
global inc_all_bytes_asm
global dec_all_bytes_asm
global align64_loop
global align1_loop
global align15_loop
global align31_loop
global align63_loop
global align75_loop
global align90_loop
global align112_loop
global rat_add
global rat_mov_add
global read_1
global read_2
global read_3
global read_4
global read_8
global write_1
global write_2
global write_3
global write_4
global write_8
global read_1x2_low
global read_1x2_high
global read_2x2
global read_4x2
global read_8x2
global read_4x2_simd
global read_8x2_simd
global read_16x2_simd
global read_32x2_simd_offset
global read_32x2_simd_no_offset
global read_16x4_simd
global read_32x4_simd
global cache_test ; Expects 3 inputs (pointer, read_count, mask)
global cache_test_unaligned ; Expects 3 inputs (pointer, read_count, mask)
mov_all_bytes_asm:
xor rax, rax
.loop:
mov BYTE [rdi + rax * 1], al
inc rax
cmp rsi, rax
jne .loop
ret
nop_all_bytes_asm:
xor rax, rax
.loop:
db 0x0f, 0x1f, 0x00
inc rax
cmp rdi, rax
jne .loop
ret
nop_1x3_all_bytes_asm:
xor rax, rax
.loop:
nop
nop
nop
inc rax
cmp rdi, rax
jne .loop
ret
nop_1x9_all_bytes_asm:
xor rax, rax
.loop:
nop
nop
nop
nop
nop
nop
nop
nop
nop
inc rax
cmp rdi, rax
jne .loop
ret
inc_all_bytes_asm:
xor rax, rax
.loop:
inc rax
cmp rdi, rax
jne .loop
ret
dec_all_bytes_asm:
.loop:
dec rdi
jnz .loop
ret
align64_loop:
xor rax, rax
align 64
.loop:
inc rax
cmp rdi, rax
jne .loop
ret
align1_loop:
xor rax, rax
align 64
nop
.loop:
inc rax
cmp rdi, rax
jne .loop
ret
align15_loop:
xor rax, rax
align 64
%rep 15
nop
%endrep
.loop:
inc rax
cmp rdi, rax
jne .loop
ret
align31_loop:
xor rax, rax
align 64
%rep 31
nop
%endrep
.loop:
inc rax
cmp rdi, rax
jne .loop
ret
align63_loop:
xor rax, rax
align 64
%rep 63
nop
%endrep
.loop:
inc rax
cmp rdi, rax
jne .loop
ret
align75_loop:
xor rax, rax
align 64
%rep 75
nop
%endrep
.loop:
inc rax
cmp rdi, rax
jne .loop
ret
align90_loop:
xor rax, rax
align 64
%rep 90
nop
%endrep
.loop:
inc rax
cmp rdi, rax
jne .loop
ret
align112_loop:
xor rax, rax
align 64
%rep 112
nop
%endrep
.loop:
inc rax
cmp rdi, rax
jne .loop
ret
rat_add:
mov rax, rdi
.loop:
add rcx, 1
add rcx, 1
dec rax
jnz .loop
ret
rat_mov_add:
mov rax, rdi
.loop:
mov rcx, rax
add rcx, 1
mov rcx, rax
add rcx, 1
dec rax
jnz .loop
ret
read_1:
align 64
.loop:
mov rax, [rdi]
sub rsi, 1
jnle .loop
ret
read_2:
align 64
.loop:
%rep 2
mov rax, [rdi]
%endrep
sub rsi, 2
jnle .loop
ret
read_3:
align 64
.loop:
%rep 3
mov rax, [rdi]
%endrep
sub rsi, 3
jnle .loop
ret
read_4:
align 64
.loop:
%rep 4
mov rax, [rdi]
%endrep
sub rsi, 4
jnle .loop
ret
read_8:
align 64
.loop:
%rep 8
mov rax, [rdi]
%endrep
sub rsi, 8
jnle .loop
ret
write_1:
align 64
.loop:
mov QWORD [rdi], 0
sub rsi, 1
jnle .loop
ret
write_2:
align 64
.loop:
%rep 2
mov QWORD [rdi], 0
%endrep
sub rsi, 2
jnle .loop
ret
write_3:
align 64
.loop:
%rep 3
mov QWORD [rdi], 0
%endrep
sub rsi, 3
jnle .loop
ret
write_4:
align 64
.loop:
%rep 4
mov QWORD [rdi], 0
%endrep
sub rsi, 4
jnle .loop
ret
write_8:
align 64
.loop:
%rep 8
mov QWORD [rdi], 0
%endrep
sub rsi, 8
jnle .loop
ret
read_1x2_low:
align 64
.loop:
%rep 2
mov al, [rdi]
%endrep
sub rsi, 2
jnle .loop
ret
read_1x2_high:
align 64
.loop:
%rep 2
mov ah, [rdi]
%endrep
sub rsi, 2
jnle .loop
ret
read_2x2:
align 64
.loop:
%rep 2
mov ax, [rdi]
%endrep
sub rsi, 2
jnle .loop
ret
read_4x2:
align 64
.loop:
%rep 2
mov eax, [rdi]
%endrep
sub rsi, 2
jnle .loop
ret
read_8x2:
align 64
.loop:
%rep 2
mov rax, [rdi]
%endrep
sub rsi, 2
jnle .loop
ret
read_4x2_simd:
xor rax, rax
align 64
.loop:
mov r8d, [rdi]
mov r8d, [rdi + 4]
add rax, 8
cmp rax, rsi
jb .loop
ret
read_8x2_simd:
xor rax, rax
align 64
.loop:
mov r8, [rdi]
mov r8, [rdi + 8]
add rax, 16
cmp rax, rsi
jb .loop
ret
read_16x2_simd:
xor rax, rax
align 64
.loop:
vmovdqu xmm0, [rdi]
vmovdqu xmm0, [rdi + 16]
add rax, 32
cmp rax, rsi
jb .loop
ret
read_32x2_simd_offset:
xor rax, rax
align 64
.loop:
vmovdqu ymm0, [rdi]
vmovdqu ymm0, [rdi + 32]
add rax, 64
cmp rax, rsi
jb .loop
ret
read_32x2_simd_no_offset:
xor rax, rax
align 64
.loop:
vmovdqu ymm0, [rdi]
vmovdqu ymm0, [rdi]
add rax, 64
cmp rax, rsi
jb .loop
ret
read_16x4_simd:
xor rax, rax
align 64
.loop:
%rep 2
vmovdqu xmm0, [rdi]
vmovdqu xmm0, [rdi + 16]
%endrep
add rax, 64
cmp rax, rsi
jb .loop
ret
read_32x4_simd:
xor rax, rax
align 64
.loop:
%rep 2
vmovdqu ymm0, [rdi]
vmovdqu ymm0, [rdi]
%endrep
add rax, 128
cmp rax, rsi
jb .loop
ret
cache_test:
xor r10, r10 ; Zero loop counter
mov rbx, rdi ; Save original pointer
.loop:
add rdi, r10 ; Advance the pointer
add r10, 128 ; Increment loop counter
and r10, rdx ; Mask offset
vmovdqu ymm0, [rdi + 0]
vmovdqu ymm1, [rdi + 32]
vmovdqu ymm2, [rdi + 64]
vmovdqu ymm3, [rdi + 96]
mov rdi, rbx ; Restore original pointer
sub rsi, 128 ; Decrement count
ja .loop
ret
cache_test_unaligned:
xor r10, r10 ; Zero loop counter
add rdi, 5 ; Unalign pointer
mov rbx, rdi ; Save original pointer
.loop:
add rdi, r10 ; Advance the pointer
add r10, 128 ; Increment loop counter
and r10, rdx ; Mask offset
vmovdqu ymm0, [rdi + 0]
vmovdqu ymm1, [rdi + 32]
vmovdqu ymm2, [rdi + 64]
vmovdqu ymm3, [rdi + 96]
mov rdi, rbx ; Restore original pointer
sub rsi, 128 ; Decrement count
ja .loop
ret

View File

@@ -0,0 +1,168 @@
#include "repetition_testing/reptester.h"
#include "profiler/timer.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/resource.h>
#include <sys/time.h>
void handle_alloc(reptester *tester, alloc_type type) {
switch (type) {
case ALLOC_TYPE_WITH_MALLOC:
if (!(tester->params.buffer)) {
tester->params.buffer = (char *)mmap(
NULL, tester->params.read_size + 1, PROT_READ | PROT_WRITE,
MAP_ANON | MAP_SHARED | MAP_NORESERVE, -1, 0);
memset(tester->params.buffer, 0, tester->params.read_size + 1);
}
break;
default:
break;
}
}
void handle_free(reptester *tester, alloc_type type) {
switch (type) {
case ALLOC_TYPE_WITH_MALLOC:
if (tester->params.buffer) {
munmap(tester->params.buffer, tester->params.read_size + 1);
tester->params.buffer = NULL;
}
break;
default:
break;
}
}
void run_func_test(reptester *tester, reptest_func func, const char *func_name,
alloc_type type) {
tester->test_start_time = read_cpu_timer();
tester->test_time_secs = 0.0;
tester->current_run = 1;
tester->tstats = {
UINT64_MAX, // min_time
0, // max_time
0, // avg_time
0, // total_time
};
tester->mstats = {
UINT64_MAX, // min_faults
0, // max_faults
0, // avg_faults
0, // total_bytes
0, // total_faults
};
tester->results = {};
char *buffer = NULL;
if (type == ALLOC_TYPE_WITH_MALLOC) {
buffer = tester->params.buffer;
tester->params.buffer =
(char *)mmap(NULL, tester->params.read_size + 1, PROT_READ | PROT_WRITE,
MAP_ANON | MAP_SHARED | MAP_NORESERVE, -1, 0);
memset(tester->params.buffer, 0, tester->params.read_size + 1);
}
while (tester->test_time_secs <= tester->wait_time_secs) {
func(tester, type);
if (tester->results.bytes_read <
tester->params.read_size * tester->params.read_count) {
printf("Failed to read the entire file (Total size: %lu, Bytes read: "
"%lu)\n",
tester->params.read_size, tester->results.bytes_read);
return;
}
tester->tstats.total_time += tester->results.read_time;
tester->mstats.total_bytes += tester->results.bytes_read;
tester->mstats.total_faults += tester->results.page_faults;
if (tester->results.read_time > tester->tstats.max_time) {
tester->tstats.max_time = tester->results.read_time;
tester->mstats.max_faults = tester->results.page_faults;
} else if (tester->results.read_time < tester->tstats.min_time) {
tester->test_start_time = read_cpu_timer();
tester->tstats.min_time = tester->results.read_time;
tester->mstats.min_faults = tester->results.page_faults;
}
if (tester->results.page_faults > tester->mstats.max_faults) {
tester->mstats.max_faults = tester->results.page_faults;
} else if (tester->results.page_faults < tester->mstats.min_faults) {
tester->mstats.min_faults = tester->results.page_faults;
}
tester->test_time_secs = time_in_seconds(
read_cpu_timer() - tester->test_start_time, tester->cpu_freq);
++(tester->current_run);
}
if (type == ALLOC_TYPE_WITH_MALLOC) {
munmap(tester->params.buffer, tester->params.read_size + 1);
tester->params.buffer = buffer;
}
print_results(tester, func_name);
}
u64 page_fault_count() {
rusage usage;
getrusage(RUSAGE_SELF, &usage);
return usage.ru_minflt + usage.ru_majflt;
}
void print_results(reptester *tester, const char *name) {
f64 kb = 1024.0;
f64 gb = kb * kb * kb;
f64 size_in_kb =
(f64)(tester->params.read_size * tester->params.read_count) / kb;
f64 size_in_gb =
(f64)(tester->params.read_size * tester->params.read_count) / gb;
u64 run_count = tester->current_run - 1;
tester->tstats.avg_time = tester->tstats.total_time / run_count;
tester->mstats.avg_faults = tester->mstats.total_faults / run_count;
printf("\n%s: %lu runs\n", name, run_count);
printf("MIN: %lu (%fGB/s)", tester->tstats.min_time,
size_in_gb /
time_in_seconds(tester->tstats.min_time, tester->cpu_freq));
if (tester->mstats.min_faults > 0) {
printf(", FAULTS: %lu (%fK/fault)\n", tester->mstats.min_faults,
size_in_kb / tester->mstats.min_faults);
} else {
printf("\n");
}
printf("MAX: %lu (%fGB/s)", tester->tstats.max_time,
size_in_gb /
time_in_seconds(tester->tstats.max_time, tester->cpu_freq));
if (tester->mstats.max_faults > 0) {
printf(", FAULTS: %lu (%fK/fault)\n", tester->mstats.max_faults,
size_in_kb / tester->mstats.max_faults);
} else {
printf("\n");
}
printf("AVG: %lu (%fGB/s)", tester->tstats.avg_time,
size_in_gb /
time_in_seconds(tester->tstats.avg_time, tester->cpu_freq));
if (tester->mstats.avg_faults > 0) {
printf(", FAULTS: %lu (%fK/fault)\n", tester->mstats.avg_faults,
tester->mstats.total_bytes / kb / tester->mstats.avg_faults);
} else {
printf("\n");
}
}