diff --git a/sketches/assembler.js b/sketches/assembler.js index 9452a3d..c8861fe 100644 --- a/sketches/assembler.js +++ b/sketches/assembler.js @@ -1,9 +1,22 @@ // Syntax: -// ADD $01 ; comments follow a `;` -// ADD $FF ; this is direct addressing -// ADD ($CC) ; this is indirect addressing -// END ; END, CFC, and CHP don't require arguments -// ; (a default value of 0 will be used as their operand) +// ADD $01 ; comments follow a `;` +// ADD $FF ; this is direct addressing +// ADD ($CC) ; this is indirect addressing +// END ; END, CFC, and CHP don't require arguments +// ; (a default value of 0 will be used as their operand) +// +// @label ; create a label +// JMP @label ; reference a label + +const printMemory = require('./print-memory.js'); + +const { debug } = require("console"); + +// 0 = silent +// 1 = verbose +// 2 = what i'm currently focusing on +// 3 = always print +const DEBUG = 2; const mnemonicsWithOptionalArgs = ['end', 'cfc', 'chp']; const mnemonics2opcodes = { @@ -18,59 +31,127 @@ const mnemonics2opcodes = { chp: { direct: 14, indirect: 14 }, }; -function decodeMultipleInstructions(str) { +function decodeInstructions(str) { let lines = str.split(/\n/); // returns an array of lines - let output = []; - lines.forEach( (l) => { - let decoded = decodeInstruction(l); - if (decoded) { - output.push(decoded.op); - output.push(decoded.arg); + let machineCode = []; + let labels = {}; + let byteCursor = 0; + for (let i = 0; i < lines.length; i++) { + console.log(); + console.group(`Input line ${i}, cursor ${byteCursor}`); + dbg(3, `> ${lines[i]}`); + let line = stripWhitespaceFromEnds(stripComments(lines[i])); + // console.log(); + // console.log(`> ${line}`); + + // Handle blank lines + if (line.length === 0) { + dbg(3, `cursor: ${byteCursor}, new code: none`); + dbg(1, 'blank'); + console.groupEnd('Input line'); + continue; } - }); - return new Uint8Array(output); + + // Handle labels -- anchors + if (line.startsWith('@')) { + // TODO: validate label + // validateLabel(line); + label = line.substring(1); // strip '@' + + if (label in labels) { + labels[label].pointsToByte = byteCursor; + } else { + labels[label] = { + pointsToByte: byteCursor, + bytesToReplace: [], + }; + } + + dbg(2, ''); + dbg(2, `@label anchor: ${label}`); + dbg(2, labels); + dbg(2, ''); + dbg(3, `cursor: ${byteCursor}, new code: none`); + console.groupEnd('Input line'); + continue; + } + + let op_arg_array = line.split(" "); // split line into an array of [op, arg] + let opName = op_arg_array[0].toLowerCase(); + let addressingMode = 'direct'; // Must be "direct" or "indirect" + let arg_str = op_arg_array[1]; + let arg_num = null; + + if (typeof arg_str === 'undefined') { + // Handle mnemonics without arguments (eg END) ... + if (mnemonicsWithOptionalArgs.indexOf(opName) < 0) { + console.error(`Missing opcode: ${line}`); + throw new Error("Missing opcode"); + } + arg_num = 0; + } else if (arg_str.startsWith('@')) { + // Handle mnemonics with pointers to labels + // TODO: validate label // validateLabel(line); + label = arg_str.substring(1); // strip '@' + arg_num = 0; + dbg(2, ''); + dbg(2, `@label reference: ${label}`); + + if (label in labels) { + dbg(2, ''); + dbg(1, `'${label}' already in labels object`); + labels[label].bytesToReplace.push(byteCursor + 1); + } else { + dbg(1, `'${label}' NOT in labels object`); + labels[label] = { + pointsToByte: byteCursor, + bytesToReplace: [byteCursor + 1], + }; + } + // dbg(2, labels); + dbg(2, `label pointsToByte: ${labels[label].pointsToByte}`); + dbg(2, `label bytesToReplace: ${labels[label].bytesToReplace}`); + + dbg(2, ''); + } else if (arg_str.startsWith("(")) { + // Handle indirect expressions + addressingMode = "indirect"; + arg_str = arg_str.replace("(", ""); + arg_str = arg_str.replace(")", ""); + arg_num = parseInt(arg_str); + } else if (arg_str.startsWith("$")) { + // Handle direct expressions + arg_str = arg_str.replace("$", ""); + arg_num = hex2num(arg_str); + } else { + // Accept decimal i guess + arg_num = parseInt(arg_str); + } + + // Decode! + op = mnemonics2opcodes[opName][addressingMode]; + + machineCode.push(op); + machineCode.push(arg_num); + byteCursor += 2; + dbg(3, `cursor: ${byteCursor}, new code: ${op}, ${arg_num}`); + console.groupEnd('Input line'); + }; + + printMemory.printTable(machineCode); + + // Backfill label pointers + for (let k of Object.keys(labels)) { + let label = labels[k]; + dbg(2, label); + for (let j = 0; j < label.bytesToReplace.length; j++) { + machineCode[label.bytesToReplace[j]] = label.pointsToByte; + } + } + + return new Uint8Array(machineCode); } -/** - * @param {string} line - A line of assembly code - * @returns {(object|false)} Either {op: machineOp, arg: 0}, or false if the line was blank - */ -function decodeInstruction(line) { - line = stripWhitespaceFromEnds(stripComments(line)); - let op_arg_array = line.split(" "); // split line into an array of [op, arg] - let opName = op_arg_array[0].toLowerCase(); - let addressingMode = 'direct'; // Must be "direct" or "indirect" - - // Handle blank lines and mnemonics without arguments (eg END) - if (op_arg_array.length < 2) { // No argument - // handle blank lines, or lines that just contain a comment: - if (line.length === 0) { return false; } - - // handle mnemonics that aren't paired with an argument: - if (mnemonicsWithOptionalArgs.indexOf(opName) < 0) { - console.error(`Missing opcode: ${line}`); - throw new Error("Missing opcode"); - } - let machineOp = mnemonics2opcodes[opName][addressingMode]; - return { op: machineOp, arg: 0 }; - } - - // Handle mnemonics with arguments (eg ADD $FF) - let arg_str = op_arg_array[1]; - if (arg_str.startsWith("(")) { - addressingMode = "indirect"; - arg_str = arg_str.replace("(", ""); - arg_str = arg_str.replace(")", ""); - } - if (arg_str.startsWith("$")) { - arg_str = arg_str.replace("$", ""); - arg_num = hex2num(arg_str); - } else { - arg_num = parseInt(arg_str); - } - let machineOp = mnemonics2opcodes[opName][addressingMode]; - return { op: machineOp, arg: arg_num }; -} function stripComments(line) { return line.replace(/;.+/,""); @@ -84,9 +165,12 @@ function stripWhitespaceFromEnds(line) { function hex2num(hex) { return parseInt(hex, 16) }; +function dbg(debugLevel, string) { + if (debugLevel >= DEBUG) console.log(string); +} // RUN IT exports.assemble = (str) => { - return decodeMultipleInstructions(str); + return decodeInstructions(str); } \ No newline at end of file