const { logMemory, num2hex } = require('./logging.js'); const { INITIAL_IP_ADDRESS, DISPLAY_ADDR, KEYPAD_ADDR, POINTER_TO_DISPLAY, POINTER_TO_KEYPAD } = require('./machine.config.js'); // 1 = verbose // 2 = what i'm currently focusing on // 3 = always print // 4 = silent const DEBUG_LEVEL = 2; let DEBUG = false; // Turn debugging on/off -- set by assemble() exports.assemble = (str, debug = false) => { DEBUG = debug; return decodeInstructions(str); } // Configure pseudo-ops: const ASM_IP_LABEL = '*'; const ASM_CONSTANT_PREFIX = '#'; const ASM_LABEL_PREFIX = '@'; const mnemonicsWithOptionalArgs = ['end', 'nop']; const mnemonics2opcodes = { end: { direct: 0, indirect: 0 }, sto: { direct: 1, indirect: 2 }, lda: { direct: 3, indirect: 4 }, add: { direct: 5, indirect: 6 }, sub: { direct: 7, indirect: 8 }, hop: { direct: 9, indirect: 10 }, jmp: { direct: 11, indirect: 12 }, ftg: { direct: 13, indirect: 13 }, fhp: { direct: 14, indirect: 14 }, nop: { direct: 15, indirect: 15 }, }; /** * @param {string} s * @returns {boolean} **/ function startsWithPointerToIP(s) { return stripWhitespaceFromEnds(s).startsWith(ASM_IP_LABEL); } /** * @typedef {('code'|'comment'|'blank')} SourceLineType **/ /** * @typedef {Object} SourceLineInfo * @property {number} number - line number * @property {string} source - source text * @property {string} sanitized - source text, with comments and whitespace removed * @property {SourceLineType} type - line type * @property {string} [operation] - For code: the first non-whitespace chunk * @property {string} [argument] - For code: the second non-whitespace chunk, if there is one **/ /** * @param {string} source * @returns {Array} **/ function preparseSourceCode(source) { let lines = source.split(/\n/); // returns an array of lines const isLineBlank = (l) => { return l.length === 0 ? true : false }; const isLineComment = (l) => { return stripWhitespaceFromEnds(l).startsWith(';') }; /** * @param {string} l * @returns {SourceLineType} **/ const getLineType = (l) => { if (isLineBlank(l)) return 'blank'; if (isLineComment(l)) return 'comment'; return 'code'; } return lines.map((line, index) => { console.log('pre-parsing ', line); let info = { number: index, source: line, sanitized: stripWhitespaceFromEnds(stripComments(line)), type: getLineType(line), }; if (info.type === 'code') { const op_arg_array = line.split(/\s+/); // split line into an array of [op, arg] if (op_arg_array[0] !== 'undefined') { info.operation = op_arg_array[0]; } if (op_arg_array[1] !== 'undefined') { info.argument = op_arg_array[1]; } } return info; }); } /** * @param {string} arg * @returns {number} **/ function parseNumericOperand(arg) { if (arg.startsWith("$")) return hex2num(arg.replace("$", "")); return parseInt(arg); } // DECODE! const op = mnemonics2opcodes[opName][addressingMode]; // FIXME rename machineCode.push(op); machineCode.push(arg_num); dbg(3, `IP: $${num2hex(IP)}, new code: $${num2hex(op)} $${num2hex(arg_num)}`); IP += 2; dbgGroupEnd(1, 'Input line'); /** * Assemble source code. * * If the source doesn't explicitly set an address to assemble to, * it will be assembled to the default intial value of the IP, * as specified in `machine.config.js`. * @param {string} source - Assembly source to decode * @return TODO **/ function decodeInstructions(source) { // WIP: everything broken // - just finished writing `splitCodeFromComments` // - plan: // - use that to pre-load debugInfo array // - and to check if the first code-line is `*` // so currently i'm implementing 2 entangled features: // 1. check if first line * and set IP // 2. return debug data along with machine code let lines = preparseSourceCode(source); console.log(lines); // Figure out where to start assembly... /** @type {number} IP - Destination addr for the next line **/ let IP; // Check if the source code explicitly sets an address to assemble at // by including a `* [addr]` as the first (non-blank, non-comment) line let idOfFirstLineWithCode = lines.findIndex((el) => el.type === 'code'); if (lines[idOfFirstLineWithCode].operation.startsWith(ASM_IP_LABEL)) { IP = parseInt(lines[idOfFirstLineWithCode].argument); } else { IP = INITIAL_IP_ADDRESS; } // Initialize arrays to collect assembled code /** @type {array} - Assembled source code, as an array of bytes **/ let machineCode = new Array(IP).fill(0); // Initialize memory-mapped IO -- TODO this should probably be in the CPU, not here machineCode[POINTER_TO_DISPLAY] = DISPLAY_ADDR; machineCode[POINTER_TO_KEYPAD] = KEYPAD_ADDR; // Initialize arrays that collect code references that // have to be revisited after our first pass through the source let labels = {}; let constants = {}; // Decode line by line for (let i = 0; i < lines.length; i++) { let line = lines[0]; if (line.type === 'code') { const op = line.operation; const arg = null; if (typeof line.argument != 'undefined') { const arg = line.argument; } /** @type {{op: (number | null), arg: (number|null)}} **/ let assembledLine = { op: null, arg: null }; /** @type {'direct'|'indirect'} **/ let addressingMode = 'direct'; // Opcodes - Handle label definitions if (op.startsWith(ASM_LABEL_PREFIX)) { let label = op.substring(1); // strip label prefix if (label in labels) { labels[label].pointsToByte = IP; } else { labels[label] = { pointsToByte: IP, bytesToReplace: [], }; } dbg(2, `pointsToByte: ${labels[label].pointsToByte}`); dbg(2, `bytesToReplace: ${labels[label].bytesToReplace}`); dbg(3, `IP: $${num2hex(IP)}, new code: none`); dbgGroupEnd(1, 'Input line'); continue; } // let op_arg_array = line.split(/\s+/); // split line into an array of [op, arg] // let opName = op_arg_array[0].toLowerCase(); // let arg_str = op_arg_array[1]; // let arg_num = null; // Opcodes - Handle setting value of IP if (startsWithPointerToIP(op)) { dbg(3, 'CHANGING IP'); IP = parseInt(arg); continue; } // Opcodes - Handle constant definitions if (op.startsWith(ASM_CONSTANT_PREFIX)) { let constantName = op.substring(1); // strip '>' let constantValue = arg; if (constantValue === ASM_IP_LABEL) { constantValue = IP.toString(); } constants[constantName] = constantValue; dbg(2, `constants:`); dbg(2, constants); continue; } // Opcodes - Handle mnemonics without operands (eg END) ... if (arg === null) { if (mnemonicsWithOptionalArgs.indexOf(op) < 0) { console.error(`Missing opcode for line ${line.number}: ${line.source}`); throw new Error("Missing opcode"); } assembledLine.arg = 0; // Operands - Handle references to labels } else if (arg.startsWith(ASM_LABEL_PREFIX)) { let label = arg.substring(1); // strip label prefix assembledLine.arg = 0; if (label in labels) { dbg(1, `'${label}' already in labels object`); labels[label].bytesToReplace.push(IP + 1); } else { dbg(1, `'${label}' NOT in labels object`); labels[label] = { bytesToReplace: [IP + 1], }; } dbg(2, `pointsToByte: ${labels[label].pointsToByte}`); dbg(2, `bytesToReplace: ${labels[label].bytesToReplace}`); // Operands - Handle references to the Instruction Pointer } else if (arg.toLowerCase() === ASM_IP_LABEL) { dbg(2, `operand references current address`); assembledLine.arg = IP; dbg(2, `arg_num: ${num2hex(assembledLine.arg)}`); // Operands - Handle references to constants } else if (arg.startsWith(ASM_CONSTANT_PREFIX)) { dbg(2, `operand references '${arg}'`); assembledLine.arg = constants[arg.substring(1)]; // substring(1) strips '>' // Operands - Handle references to constants in indirect mode } else if (arg.startsWith(`(${ASM_CONSTANT_PREFIX}`)) { addressingMode = "indirect"; dbg(2, `IND - operand references '${arg}'`); let constTemp = arg.replace(`(${ASM_CONSTANT_PREFIX}`, "").replace(")", ""); assembledLine.arg = constants[constTemp]; // Operands - Handle indirect expressions } else if (arg.startsWith("(")) { addressingMode = "indirect"; assembledLine.arg = arg.replace("(", "").replace(")", ""); } // Operands - Handle numeric operands if (arg_num === null) { if (arg_str.startsWith("$")) { // Handle hex arg_str = arg_str.replace("$", ""); arg_num = hex2num(arg_str); } else { // Accept decimal i guess arg_num = parseInt(arg_str); } } }; } dbg(1, ''); dbgGroup(1, 'Memory before filling in label constants'); dbgExec(1, () => logMemory(new Uint8Array(machineCode))); dbgGroupEnd(1, 'Memory before filling in label constants'); // Backfill label references for (let k of Object.keys(labels)) { dbgGroup(2, `${ASM_LABEL_PREFIX}${k}`); let label = labels[k]; dbg(2, `pointsToByte: ${label.pointsToByte}`); dbg(2, `bytesToReplace: ${label.bytesToReplace}`); dbgGroupEnd(2, `label`); for (let j = 0; j < label.bytesToReplace.length; j++) { machineCode[label.bytesToReplace[j]] = label.pointsToByte; } } return new Uint8Array(machineCode); } /** * @param {string} line * @returns {string} **/ function stripComments(line) { return line.replace(/;.+/,""); } /** * @param {string} line * @returns {string} **/ function stripWhitespaceFromEnds(line) { line = line.replace(/^\s+/,""); line = line.replace(/\s+$/,""); return line; } function hex2num(hex) { return parseInt(hex, 16) }; // Debug helpers const dbg = (lvl, s) => { if (DEBUG && (lvl >= DEBUG_LEVEL)) console.log(s) }; const dbgGroup = (lvl, s) => { if (DEBUG && (lvl >= DEBUG_LEVEL)) console.group(s) }; const dbgGroupEnd = (lvl, s) => { if (DEBUG && (lvl >= DEBUG_LEVEL)) console.groupEnd() }; const dbgExec = (lvl, func) => { if (DEBUG && (lvl >= DEBUG_LEVEL)) func(); }