From 4481fc10d4ab58a9b3c98180589b3b176fdc9670 Mon Sep 17 00:00:00 2001 From: n loewen Date: Mon, 21 Aug 2023 16:08:58 +0100 Subject: [PATCH] assembler - WIP - Start adding a 'set the initial IP to assembler to' feature + Start adding a 'return debug data as well as machine code' feature --- assembler.js | 124 ++++++++++++++++-- scratch.js | 71 ++++++++++ ...encing-program-counter-during-assembly.asm | 2 + 3 files changed, 189 insertions(+), 8 deletions(-) create mode 100644 scratch.js diff --git a/assembler.js b/assembler.js index ff9e116..715f26e 100644 --- a/assembler.js +++ b/assembler.js @@ -20,7 +20,7 @@ exports.assemble = (str, debug = false) => { } // Configure pseudo-ops: -const POINTER_TO_CURRENT_ADDR_PSEUDO_OPERAND = '*'; +const POINTER_TO_IP_OP = '*'; const CONSTANT_PREFIX = '#'; const LABEL_PREFIX = '@'; @@ -39,22 +39,123 @@ const mnemonics2opcodes = { }; /** - * @param {String} line - One line of assembly to decode + * @param {string} s + * @returns {boolean} **/ -function decodeInstructions(line) { - let lines = line.split(/\n/); // returns an array of lines +function startsWithPointerToIP(s) { + return stripWhitespaceFromEnds(s).startsWith(POINTER_TO_IP_OP); +} - let machineCode = new Array(INITIAL_IP_ADDRESS).fill(0); + +/** + * @param {string[]} lines + */ +function getFirstCodeLine(lines) { + function isCode(line) { + line = stripComments(stripWhitespaceFromEnds(line)); + if (line.length === 0) { + return false; + } + return true; + }; + lines = lines.filter(isCode); + if (lines.length > 0) { + return lines[0]; + } + return false; +} + +/** + * @param {string} source; + **/ + // * @returns {{number: number, source: string, type: 'code'|'comment'|'blank'}} + // TODO: https://stackoverflow.com/questions/32295263/how-to-document-an-array-of-objects-in-jsdoc +function splitCodeFromComments(source) { + let lines = source.split(/\n/); // returns an array of lines + + const isLineBlank = (l) => { return l.length === 0 ? true : false }; + const isLineComment = (l) => { return stripWhitespaceFromEnds(l).startsWith(';') }; + const getLineType = (l) => { + console.log('get type for ', l); + if (isLineBlank(l)) return 'blank'; + if (isLineComment(l)) return 'comment'; + return 'code'; + } + + return lines.map((line, index) => { + return { + number: index, + source: line, + type: getLineType(line) + }; + }); +} + + +/** + * Assemble source code. + * + * If the source doesn't explicitly set an address to assemble to, + * it will be assembled to the default intial value of the IP, + * as specified in `machine.config.js`. + * @param {string} source - Assembly source to decode + * @return TODO + **/ +function decodeInstructions(source) { + // let lines = source.split(/\n/); // returns an array of lines + + + // WIP: everything broken + // - just finished writing `splitCodeFromComments` + // - plan: + // - use that to pre-load debugInfo array + // - and to check if the first code-line is `*` + // so currently i'm implementing 2 entangled features: + // 1. check if first line * and set IP + // 2. return debug data along with machine code + + console.log(splitCodeFromComments('foo \n\n; bar')); + + + + // Figure out where to start assembly... + + /** @type {number} IP - The address where the next line of code will be assembled **/ + let IP; + + // Check if the source code explicitly sets an address to assemble at + // by including a `* [addr]` as the first (non-blank, non-comment) line + const sourceIPdefinition = lines.findIndex(startsWithPointerToIP); + if (sourceSetsIP) { + let op_arg_array = lines[.split(/\s+/); // split line into an array of [op, arg] + IP = explicitIP; + } else { + IP = INITIAL_IP_ADDRESS; + } + + // Initialize arrays to collect assembled code + + /** @type {array} - Assembled source code, as an array of bytes **/ + let machineCode = new Array(IP).fill(0); + + /** @type {{lineNumber: number, source: string, lineType: 'code'|'comment'}} **/ + let debugData = new Array(); + + // Initialize memory-mapped IO -- TODO this should probably be in the CPU, not here machineCode[POINTER_TO_DISPLAY] = DISPLAY_ADDR; machineCode[POINTER_TO_KEYPAD] = KEYPAD_ADDR; + // Initialize arrays that collect code references that + // have to be revisited after our first pass through the source let labels = {}; let constants = {}; - let IP = INITIAL_IP_ADDRESS; + + // Decode line by line for (let i = 0; i < lines.length; i++) { dbg(2, ''); dbgGroup(1, `Input line ${i}, IP ${num2hex(IP)}`); dbg(3, `> ${lines[i]}`); + // TODO - update debugData let line = stripWhitespaceFromEnds(stripComments(lines[i])); // Handle blank lines @@ -93,12 +194,19 @@ function decodeInstructions(line) { let arg_num = null; let addressingMode = 'direct'; // Must be "direct" or "indirect" + // Handle setting value of IP + if (startsWithPointerToIP(line)) { + dbg(3, 'CHANGING IP'); + IP = arg_str.parseInt(); + continue; + } + // Handle constant definitions if (opName.startsWith(CONSTANT_PREFIX)) { // FIXME - a quick hack to get around problems caused by another use of lower-casing to sanitize input: let constantName = opName.substring(1).toLowerCase(); // strip '>' let constantValue = arg_str; - if (constantValue.toLowerCase() === POINTER_TO_CURRENT_ADDR_PSEUDO_OPERAND) { + if (constantValue.toLowerCase() === POINTER_TO_IP_OP) { constantValue = IP.toString(); } constants[constantName] = constantValue; @@ -136,7 +244,7 @@ function decodeInstructions(line) { dbg(2, `bytesToReplace: ${labels[label].bytesToReplace}`); // Handle references to the Instruction Pointer - } else if (arg_str.toLowerCase() === POINTER_TO_CURRENT_ADDR_PSEUDO_OPERAND) { + } else if (arg_str.toLowerCase() === POINTER_TO_IP_OP) { dbg(2, `operand references current address`); arg_num = IP; dbg(2, `arg_num: ${num2hex(arg_num)}`); diff --git a/scratch.js b/scratch.js new file mode 100644 index 0000000..d944b99 --- /dev/null +++ b/scratch.js @@ -0,0 +1,71 @@ +/* +function getFirstCodeLine(lines) { + function isCode(line) { + line = stripComments(stripWhitespaceFromEnds(line)); + if (line.length === 0) { + return false; + } + return true; + }; + + let isLineCode + + lines = lines.filter(isCode); + if (lines.length > 0) { + return lines[0]; + } + return false; +} +*/ + +/** + * @param {string} source; + **/ + // * @returns {{number: number, source: string, type: 'code'|'comment'|'blank'}} + // TODO: https://stackoverflow.com/questions/32295263/how-to-document-an-array-of-objects-in-jsdoc +function splitCodeFromComments(source) { + let lines = source.split(/\n/); // returns an array of lines + + const isLineBlank = (l) => { return l.length === 0 ? true : false }; + const isLineComment = (l) => { return stripWhitespaceFromEnds(l).startsWith(';') }; + const getLineType = (l) => { + console.log('get type for ', l); + if (isLineBlank(l)) return 'blank'; + if (isLineComment(l)) return 'comment'; + return 'code'; + } + + return lines.map((line, index) => { + return { + number: index, + source: line, + type: getLineType(line) + }; + }); +} + +console.log(splitCodeFromComments('foo \n\n; bar')); + + +const logArrayElements = (element, index /*, array */) => { + console.log(`a[${index}] = ${element}`); + return index; +}; + +// Notice that index 2 is skipped, since there is no item at +// that position in the array. +console.log([2, 5, , 9].map(logArrayElements)); + +const l = ['', '', '; foo', 'bar']; + +// console.log(getFirstCodeLine(l)); + +function stripComments(line) { + return line.replace(/;.+/,""); +} + +function stripWhitespaceFromEnds(line) { + line = line.replace(/^\s+/,""); + line = line.replace(/\s+$/,""); + return line; +} \ No newline at end of file diff --git a/test-programs/referencing-program-counter-during-assembly.asm b/test-programs/referencing-program-counter-during-assembly.asm index 2ab9518..1a43688 100644 --- a/test-programs/referencing-program-counter-during-assembly.asm +++ b/test-programs/referencing-program-counter-during-assembly.asm @@ -1,5 +1,7 @@ ;; Test referencing address of line being assembled +* 30 + NOP ; Push the const below to a later address #initAddr *