performance-aware-programming/8086_assembly_02/dasm.cc
2024-06-22 13:54:45 +01:00

286 lines
7.8 KiB
C++

#include <bits/types/FILE.h>
#include <math.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
enum class INST_BITS {
REG_MEM_REG = 0x88,
MEM_TO_ACC = 0xa0,
ACC_TO_MEM = 0xa2,
IMM_TO_REG = 0xb0,
IMM_TO_REG_MEM = 0xc6,
};
enum class INST_MASKS {
REG_MEM_REG = 0xfc,
IMM_TO_REG = 0xf0,
IMM_TO_REG_MEM = 0xfe,
ACCUMULATOR = 0xfe,
};
enum class MODE {
MEM = 0x00,
MEM8 = 0x40,
MEM16 = 0x80,
REG = 0xc0,
};
bool mask_instruction(uint8_t instruction, uint8_t inst_bits, uint8_t mask);
void decode_register(uint8_t instruction, bool word, char *dest);
void decode_rm(uint8_t instruction, char *dest);
void stringify_rm_and_disp(FILE *fp, uint8_t operands, char *rm,
uint32_t buff_size);
void handle_accumulator_mov_instructions(FILE *fp, uint8_t inst, bool reg_dest,
char *dest);
int main(int argc, char *argv[]) {
if (argc < 2) {
printf("Please provide a file to disassemble\n");
return 1;
}
const char *filename = argv[1];
FILE *fp = fopen(filename, "rb");
if (fp) {
uint8_t inst = 0;
const char *op = "";
char out_filename[4096] = {0};
sprintf(out_filename, "%s_out.asm", filename);
FILE *out = fopen(out_filename, "w");
if (out) {
fprintf(out, "; Disassembled by DASM\n\nbits 16\n\n");
while (fread(&inst, sizeof(inst), 1, fp)) {
if (mask_instruction(inst, (uint8_t)INST_BITS::REG_MEM_REG,
(uint8_t)INST_MASKS::REG_MEM_REG)) {
op = "mov";
uint8_t operands = 0;
fread(&operands, sizeof(operands), 1, fp);
// Instruction bit pattern is:
// 7 6 5 4 3 2 1 0
// -------------------------------
// | 1 | 0 | 0 | 0 | 1 | 0 | d | w |
// -------------------------------
//
// Operands bit pattern is:
// 7 6 5 4 3 2 1 0
// -------------------------------
// | mod | reg | r/m |
// -------------------------------
bool reg_dest = mask_instruction(inst, 0x02, 0x02);
bool word = mask_instruction(inst, 0x01, 0x01);
char reg[3] = {0};
decode_register(operands >> 3, word, reg);
if (mask_instruction(operands, (uint8_t)MODE::REG,
(uint8_t)MODE::REG)) {
char rm[3] = {0};
decode_register(operands, word, rm);
fprintf(out, "%s %s, %s\n", op, reg_dest ? reg : rm,
reg_dest ? rm : reg);
} else {
char rm[20] = {0};
stringify_rm_and_disp(fp, operands, rm, 20);
fprintf(out, reg_dest ? "%s %s, [%s]\n" : "%s [%s], %s\n", op,
reg_dest ? reg : rm, reg_dest ? rm : reg);
}
} else if (mask_instruction(inst, (uint8_t)INST_BITS::IMM_TO_REG,
(uint8_t)INST_MASKS::IMM_TO_REG)) {
op = "mov";
// Bit pattern is:
// 7 6 5 4 3 2 1 0
// -------------------------------
// | 1 | 0 | 1 | 1 | w | reg |
// -------------------------------
//
// So, we need to mask the fourth bit to check the w flag
bool word = mask_instruction(inst, 0x08, 0x08);
uint8_t next_bytes = word ? 2 : 1;
char reg[3] = {0};
decode_register(inst, word, reg);
int16_t data = 0;
fread(&data, sizeof(next_bytes), next_bytes, fp);
fprintf(out, "%s %s, %d\n", op, reg, word ? data : (int8_t)data);
} else if (mask_instruction(inst, (uint8_t)INST_BITS::IMM_TO_REG_MEM,
(uint8_t)INST_MASKS::IMM_TO_REG_MEM)) {
op = "mov";
uint8_t operands = 0;
fread(&operands, sizeof(operands), 1, fp);
// Instruction bit pattern is:
// 7 6 5 4 3 2 1 0
// -------------------------------
// | 1 | 1 | 0 | 0 | 0 | 1 | 1 | w |
// -------------------------------
//
// Operands bit pattern is:
// 7 6 5 4 3 2 1 0
// -------------------------------
// | mod | 000 | r/m |
// -------------------------------
bool word = mask_instruction(inst, 0x01, 0x01);
char rm[20] = {0};
stringify_rm_and_disp(fp, operands, rm, 20);
uint8_t next_bytes = word ? 2 : 1;
int16_t data = 0;
fread(&data, sizeof(next_bytes), next_bytes, fp);
fprintf(out, "%s [%s], %s %d\n", op, rm, word ? "word" : "byte",
word ? data : (int8_t)data);
} else if (mask_instruction(inst, (uint8_t)INST_BITS::MEM_TO_ACC,
(uint8_t)INST_MASKS::ACCUMULATOR)) {
char inst_out[256] = {0};
handle_accumulator_mov_instructions(fp, inst, true, inst_out);
fprintf(out, "%s\n", inst_out);
} else if (mask_instruction(inst, (uint8_t)INST_BITS::ACC_TO_MEM,
(uint8_t)INST_MASKS::ACCUMULATOR)) {
char inst_out[256] = {0};
handle_accumulator_mov_instructions(fp, inst, false, inst_out);
fprintf(out, "%s\n", inst_out);
} else {
printf("It's not a mov operation\n");
}
}
fclose(out);
} else {
printf("Failed to open output file\n");
}
fclose(fp);
} else {
printf("Failed to open the selected file\n");
}
return 0;
}
bool mask_instruction(uint8_t instruction, uint8_t inst_bits, uint8_t mask) {
return (instruction & mask) == inst_bits;
}
void decode_register(uint8_t instruction, bool word, char *dest) {
static uint8_t reg_mask = 0x07;
// clang-format off
static const char *table[16] = {
"al", "ax",
"cl", "cx",
"dl", "dx",
"bl", "bx",
"ah", "sp",
"ch", "bp",
"dh", "si",
"bh", "di"
};
// clang-format on
static const uint8_t ROW_WIDTH = 2;
uint8_t offset = instruction & reg_mask;
// Multiply offset by 2 since each row has 2 columns
strcpy(dest, table[offset * ROW_WIDTH + (uint8_t)word]);
}
void decode_rm(uint8_t instruction, char *dest) {
static uint8_t rm_mask = 0x07;
// clang-format off
static const char *table[8] = {
"bx + si",
"bx + di",
"bp + si",
"bp + di",
"si",
"di",
"bp",
"bx"
};
// clang-format on
uint8_t index = instruction & rm_mask;
strcpy(dest, table[index]);
}
void stringify_rm_and_disp(FILE *fp, uint8_t operands, char *rm,
uint32_t buff_size) {
decode_rm(operands, rm);
bool direct_address = false;
uint8_t next_bytes = operands >> 6;
if (next_bytes == 0 && mask_instruction(operands, 0x06, 0x06)) {
// Handle case when MOD == 00 and R/M == 110
next_bytes = 2;
direct_address = true;
}
int16_t disp = 0;
fread(&disp, sizeof(next_bytes), next_bytes, fp);
if (disp != 0) {
if (direct_address) {
memset(rm, 0, buff_size);
sprintf(rm, "%d", disp);
} else {
bool positive = next_bytes > 1 ? disp > 0 : (int8_t)disp > 0;
char data_out[buff_size];
memset(data_out, 0, buff_size);
sprintf(data_out, " %c %d", positive ? '+' : '-',
next_bytes > 1 ? abs(disp) : abs((int8_t)disp));
strcat(rm, data_out);
}
}
}
void handle_accumulator_mov_instructions(FILE *fp, uint8_t inst, bool reg_dest,
char *dest) {
bool word = mask_instruction(inst, 0x01, 0x01);
uint8_t next_bytes = word ? 2 : 1;
uint16_t addr = 0;
fread(&addr, sizeof(next_bytes), next_bytes, fp);
char addr_out[64] = {0};
sprintf(addr_out, "[%d]", word ? addr : (uint8_t)addr);
sprintf(dest, "mov %s, %s", reg_dest ? "ax" : addr_out,
reg_dest ? addr_out : "ax");
}