From e223314349aed5c11c034399ecd51c37f89dddbe Mon Sep 17 00:00:00 2001 From: Abdelrahman Date: Wed, 8 Mar 2023 23:48:03 +0000 Subject: [PATCH] Decode more variations of mov instruction --- 8086_assembly_02/.gitignore | 3 + 8086_assembly_02/Makefile | 2 + 8086_assembly_02/dasm.cpp | 198 ++++++++++++++++++ 8086_assembly_02/listing_0039_more_movs.asm | 47 +++++ .../listing_0039_more_movs_out.asm | 20 ++ 5 files changed, 270 insertions(+) create mode 100644 8086_assembly_02/.gitignore create mode 100644 8086_assembly_02/Makefile create mode 100644 8086_assembly_02/dasm.cpp create mode 100644 8086_assembly_02/listing_0039_more_movs.asm create mode 100644 8086_assembly_02/listing_0039_more_movs_out.asm diff --git a/8086_assembly_02/.gitignore b/8086_assembly_02/.gitignore new file mode 100644 index 0000000..8812d96 --- /dev/null +++ b/8086_assembly_02/.gitignore @@ -0,0 +1,3 @@ +dasm +listing_0039_more_movs +listing_0039_more_movs_out diff --git a/8086_assembly_02/Makefile b/8086_assembly_02/Makefile new file mode 100644 index 0000000..46770a1 --- /dev/null +++ b/8086_assembly_02/Makefile @@ -0,0 +1,2 @@ +all: + clang++ -g dasm.cpp -o dasm diff --git a/8086_assembly_02/dasm.cpp b/8086_assembly_02/dasm.cpp new file mode 100644 index 0000000..8aa1cb1 --- /dev/null +++ b/8086_assembly_02/dasm.cpp @@ -0,0 +1,198 @@ +#include +#include +#include +#include + +enum class INST_MASKS { + IMM_TO_REG = 0xb0, + REG_MEM_REG = 0x88, +}; + +enum class MODE { + MEM = 0x00, + MEM8 = 0x40, + MEM16 = 0x80, + REG = 0xc0, +}; + +bool mask_instruction(uint8_t instruction, uint8_t mask); +void decode_register(uint8_t instruction, bool word, char *dest); +void decode_rm(uint8_t instruction, char *dest); + +int main(int argc, char *argv[]) { + if (argc < 2) { + printf("Please provide a file to disassemble\n"); + return 1; + } + + const char *filename = argv[1]; + + FILE *fp = fopen(filename, "rb"); + + if (fp) { + uint8_t inst = 0; + const char *op = ""; + + char out_filename[4096] = {0}; + sprintf(out_filename, "%s_out.asm", filename); + + FILE *out = fopen(out_filename, "w"); + + if (out) { + fprintf(out, "; Disassembled by DASM\n\nbits 16\n\n"); + + while (fread(&inst, sizeof(inst), 1, fp)) { + if (mask_instruction(inst, (uint8_t)INST_MASKS::IMM_TO_REG)) { + op = "mov"; + + // Bit pattern is: + // 7 6 5 4 3 2 1 0 + // ------------------------------- + // | 1 | 0 | 1 | 1 | w | reg | + // ------------------------------- + // + // So, we need to mask the fourth bit to check the w flag + bool word = mask_instruction(inst, 0x08); + + uint8_t next_bytes = word ? 2 : 1; + + char reg[3] = {0}; + + decode_register(inst, word, reg); + + int16_t data = 0; + + fread(&data, sizeof(next_bytes), next_bytes, fp); + + fprintf(out, "%s %s, %d\n", op, reg, word ? data : (int8_t)data); + } else if (mask_instruction(inst, (uint8_t)INST_MASKS::REG_MEM_REG)) { + op = "mov"; + + uint8_t operands = 0; + fread(&operands, sizeof(operands), 1, fp); + + // Instruction bit pattern is: + // 7 6 5 4 3 2 1 0 + // ------------------------------- + // | 1 | 0 | 0 | 0 | 1 | 0 | d | w | + // ------------------------------- + // + // Operands bit pattern is: + // 7 6 5 4 3 2 1 0 + // ------------------------------- + // | mod | reg | r/m | + // ------------------------------- + + bool reg_dest = mask_instruction(inst, 0x02); + + bool word = mask_instruction(inst, 0x01); + + char reg[3] = {0}; + decode_register(operands >> 3, word, reg); + + if (mask_instruction(operands, (uint8_t)MODE::REG)) { + char rm[3] = {0}; + decode_register(operands, word, rm); + + fprintf(out, "%s %s, %s\n", op, reg_dest ? reg : rm, + reg_dest ? rm : reg); + } else { + char rm[20] = {0}; + decode_rm(operands, rm); + + bool direct_address = false; + + uint8_t next_bytes = operands >> 6; + + if (next_bytes == 0 && mask_instruction(operands, 0x06)) { + // Handle case when MOD == 00 and R/M == 110 + next_bytes = 2; + + direct_address = true; + } + + int16_t data = 0; + fread(&data, sizeof(next_bytes), next_bytes, fp); + + if (data > 0) { + if (direct_address) { + memset(rm, 0, 20); + + sprintf(rm, "%d", data); + } else { + char data_out[20] = {0}; + sprintf(data_out, " + %d", data); + + strcat(rm, data_out); + } + } + + fprintf(out, reg_dest ? "%s %s, [%s]\n" : "%s [%s], %s\n", op, + reg_dest ? reg : rm, reg_dest ? rm : reg); + } + } else { + printf("It's not a mov operation\n"); + } + } + + fclose(out); + } else { + printf("Failed to open output file\n"); + } + + fclose(fp); + } else { + printf("Failed to open the selected file\n"); + } + + return 0; +} + +bool mask_instruction(uint8_t instruction, uint8_t mask) { + return (instruction & mask) == mask; +} + +void decode_register(uint8_t instruction, bool word, char *dest) { + static uint8_t reg_mask = 0x07; + + // clang-format off + static const char *table[16] = { + "al", "ax", + "cl", "cx", + "dl", "dx", + "bl", "bx", + "ah", "sp", + "ch", "bp", + "dh", "si", + "bh", "di" + }; + // clang-format on + + static const uint8_t ROW_WIDTH = 2; + + uint8_t offset = instruction & reg_mask; + + // Multiply offset by 2 since each row has 2 columns + strcpy(dest, table[offset * ROW_WIDTH + (uint8_t)word]); +} + +void decode_rm(uint8_t instruction, char *dest) { + static uint8_t rm_mask = 0x07; + + // clang-format off + static const char *table[8] = { + "bx + si", + "bx + di", + "bp + si", + "bp + di", + "si", + "di", + "bp", + "bx" + }; + // clang-format on + + uint8_t index = instruction & rm_mask; + + strcpy(dest, table[index]); +} diff --git a/8086_assembly_02/listing_0039_more_movs.asm b/8086_assembly_02/listing_0039_more_movs.asm new file mode 100644 index 0000000..854fcb4 --- /dev/null +++ b/8086_assembly_02/listing_0039_more_movs.asm @@ -0,0 +1,47 @@ +; ======================================================================== +; +; (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved. +; +; This software is provided 'as-is', without any express or implied +; warranty. In no event will the authors be held liable for any damages +; arising from the use of this software. +; +; Please see https://computerenhance.com for further information +; +; ======================================================================== + +; ======================================================================== +; LISTING 39 +; ======================================================================== + +bits 16 + +; Register-to-register +mov si, bx +mov dh, al + +; 8-bit immediate-to-register +mov cl, 12 +mov ch, -12 + +; 16-bit immediate-to-register +mov cx, 12 +mov cx, -12 +mov dx, 3948 +mov dx, -3948 + +; Source address calculation +mov al, [bx + si] +mov bx, [bp + di] +mov dx, [bp] + +; Source address calculation plus 8-bit displacement +mov ah, [bx + si + 4] + +; Source address calculation plus 16-bit displacement +mov al, [bx + si + 4999] + +; Dest address calculation +mov [bx + di], cx +mov [bp + si], cl +mov [bp], ch diff --git a/8086_assembly_02/listing_0039_more_movs_out.asm b/8086_assembly_02/listing_0039_more_movs_out.asm new file mode 100644 index 0000000..dc57f26 --- /dev/null +++ b/8086_assembly_02/listing_0039_more_movs_out.asm @@ -0,0 +1,20 @@ +; Disassembled by DASM + +bits 16 + +mov si, bx +mov dh, al +mov cl, 12 +mov ch, -12 +mov cx, 12 +mov cx, -12 +mov dx, 3948 +mov dx, -3948 +mov al, [bx + si] +mov bx, [bp + di] +mov dx, [bp] +mov ah, [bx + si + 4] +mov al, [bx + si + 4999] +mov [bx + di], cx +mov [bp + si], cl +mov [bp], ch