#include #include #include #include enum class INST_MASKS { IMM_TO_REG = 0xb0, REG_MEM_REG = 0x88, }; enum class MODE { MEM = 0x00, MEM8 = 0x40, MEM16 = 0x80, REG = 0xc0, }; bool mask_instruction(uint8_t instruction, uint8_t mask); void decode_register(uint8_t instruction, bool word, char *dest); void decode_rm(uint8_t instruction, char *dest); int main(int argc, char *argv[]) { if (argc < 2) { printf("Please provide a file to disassemble\n"); return 1; } const char *filename = argv[1]; FILE *fp = fopen(filename, "rb"); if (fp) { uint8_t inst = 0; const char *op = ""; char out_filename[4096] = {0}; sprintf(out_filename, "%s_out.asm", filename); FILE *out = fopen(out_filename, "w"); if (out) { fprintf(out, "; Disassembled by DASM\n\nbits 16\n\n"); while (fread(&inst, sizeof(inst), 1, fp)) { if (mask_instruction(inst, (uint8_t)INST_MASKS::IMM_TO_REG)) { op = "mov"; // Bit pattern is: // 7 6 5 4 3 2 1 0 // ------------------------------- // | 1 | 0 | 1 | 1 | w | reg | // ------------------------------- // // So, we need to mask the fourth bit to check the w flag bool word = mask_instruction(inst, 0x08); uint8_t next_bytes = word ? 2 : 1; char reg[3] = {0}; decode_register(inst, word, reg); int16_t data = 0; fread(&data, sizeof(next_bytes), next_bytes, fp); fprintf(out, "%s %s, %d\n", op, reg, word ? data : (int8_t)data); } else if (mask_instruction(inst, (uint8_t)INST_MASKS::REG_MEM_REG)) { op = "mov"; uint8_t operands = 0; fread(&operands, sizeof(operands), 1, fp); // Instruction bit pattern is: // 7 6 5 4 3 2 1 0 // ------------------------------- // | 1 | 0 | 0 | 0 | 1 | 0 | d | w | // ------------------------------- // // Operands bit pattern is: // 7 6 5 4 3 2 1 0 // ------------------------------- // | mod | reg | r/m | // ------------------------------- bool reg_dest = mask_instruction(inst, 0x02); bool word = mask_instruction(inst, 0x01); char reg[3] = {0}; decode_register(operands >> 3, word, reg); if (mask_instruction(operands, (uint8_t)MODE::REG)) { char rm[3] = {0}; decode_register(operands, word, rm); fprintf(out, "%s %s, %s\n", op, reg_dest ? reg : rm, reg_dest ? rm : reg); } else { char rm[20] = {0}; decode_rm(operands, rm); bool direct_address = false; uint8_t next_bytes = operands >> 6; if (next_bytes == 0 && mask_instruction(operands, 0x06)) { // Handle case when MOD == 00 and R/M == 110 next_bytes = 2; direct_address = true; } int16_t data = 0; fread(&data, sizeof(next_bytes), next_bytes, fp); if (data > 0) { if (direct_address) { memset(rm, 0, 20); sprintf(rm, "%d", data); } else { char data_out[20] = {0}; sprintf(data_out, " + %d", data); strcat(rm, data_out); } } fprintf(out, reg_dest ? "%s %s, [%s]\n" : "%s [%s], %s\n", op, reg_dest ? reg : rm, reg_dest ? rm : reg); } } else { printf("It's not a mov operation\n"); } } fclose(out); } else { printf("Failed to open output file\n"); } fclose(fp); } else { printf("Failed to open the selected file\n"); } return 0; } bool mask_instruction(uint8_t instruction, uint8_t mask) { return (instruction & mask) == mask; } void decode_register(uint8_t instruction, bool word, char *dest) { static uint8_t reg_mask = 0x07; // clang-format off static const char *table[16] = { "al", "ax", "cl", "cx", "dl", "dx", "bl", "bx", "ah", "sp", "ch", "bp", "dh", "si", "bh", "di" }; // clang-format on static const uint8_t ROW_WIDTH = 2; uint8_t offset = instruction & reg_mask; // Multiply offset by 2 since each row has 2 columns strcpy(dest, table[offset * ROW_WIDTH + (uint8_t)word]); } void decode_rm(uint8_t instruction, char *dest) { static uint8_t rm_mask = 0x07; // clang-format off static const char *table[8] = { "bx + si", "bx + di", "bp + si", "bp + di", "si", "di", "bp", "bx" }; // clang-format on uint8_t index = instruction & rm_mask; strcpy(dest, table[index]); }