const { logMemory, num2hex } = require('./logging.js'); const { INITIAL_IP_ADDRESS, DISPLAY_ADDR, POINTER_TO_DISPLAY, } = require('./machine.config.js'); // 1 = verbose // 2 = what i'm currently focusing on // 3 = always print // 4 = silent const DEBUG_LEVEL = 2; let DEBUG; // Turn debugging on/off -- set by assemble() /** * @param {string} assemblyCode * @param {Boolean} [debug = false] **/ exports.assemble = (assemblyCode, debug = false) => { DEBUG = debug; return decodeInstructions(assemblyCode); } // Configure pseudo-ops: const ASM_IP_LABEL = '*'; const ASM_CONSTANT_PREFIX = '#'; const ASM_LABEL_PREFIX = '@'; const mnemonicsWithOptionalArgs = ['end', 'nop']; const mnemonics2opcodes = { end: { direct: 0, indirect: 0 }, sto: { direct: 1, indirect: 2 }, lda: { direct: 3, indirect: 4 }, add: { direct: 5, indirect: 6 }, sub: { direct: 7, indirect: 8 }, hop: { direct: 9, indirect: 10 }, jmp: { direct: 11, indirect: 12 }, ftg: { direct: 13, indirect: 13 }, fhp: { direct: 14, indirect: 14 }, nop: { direct: 15, indirect: 15 }, }; /** * @typedef {('code'|'comment'|'blank')} SourceLineType **/ /** * @typedef {Object} SourceLineInfo * @property {number} number - line number * @property {string} source - source text * @property {string} sanitized - source text, with comments and whitespace removed * @property {SourceLineType} type - line type * @property {string} [operation] - For code: the first non-whitespace chunk * @property {string} [argument] - For code: the second non-whitespace chunk, if there is one * @property {string} [extraArgument] - For code: the third non-whitespace chunk, if there is one **/ /** * @param {string} source * @returns {Array} **/ function preparseSourceCode(source) { let lines = source.split(/\n/); // returns an array of lines const isLineBlank = (l) => { return l.length === 0 ? true : false }; const isLineComment = (l) => { return stripWhitespaceFromEnds(l).startsWith(';') }; /** * @param {string} l * @returns {SourceLineType} **/ const getLineType = (l) => { if (isLineBlank(l)) return 'blank'; if (isLineComment(l)) return 'comment'; return 'code'; } return lines.map((line, index) => { dbg(1, ` in: ${line}`); let info = { number: index + 1, source: line, sanitized: stripWhitespaceFromEnds(stripComments(line)), type: getLineType(line), }; dbg(1, ` → ${info.number} - ${info.type}: ${info.sanitized}`); dbg(1, ``); if (info.type === 'code') { const op_arg_array = info.sanitized.split(/\s+/); // split line into an array of [op, arg, extra_arg] if (op_arg_array[0] !== 'undefined') { info.operation = op_arg_array[0]; } if (op_arg_array.length === 2) { info.argument = op_arg_array[1]; } if (op_arg_array.length === 3) { info.argument = op_arg_array[1]; info.extraArgument = op_arg_array[2]; } // If there's too many arguments, throw an error // NB. there's a special case: // lines with the ASM_IP_LABEL can take an extra argument let maxArgs = 2; if (op_arg_array.length > 2 && op_arg_array[1].startsWith(ASM_IP_LABEL)) { maxArgs = 3; } if (op_arg_array.length > maxArgs) { console.error(); console.error(`Error: Too many arguments`); console.error(` at line ${info.number}`); process.exit(); } } return info; }); } /** * @param {string} arg * @returns {number} **/ function decodeNumericOp(arg) { if (arg.startsWith("$")) return hex2num(arg.replace("$", "")); return parseInt(arg); } /** * @param {string} op * @param {object} labels // TODO document better * @param {number} IP * @returns {Array} - array of labels **/ function handleLabelDefinition(op, IP, labels) { let label = op.substring(1); // strip label prefix if (label in labels) { labels[label].pointsToByte = IP; } else { labels[label] = { pointsToByte: IP, bytesToReplace: [], }; } dbg(1, ` Label definition:`); dbg(1, ` Points to byte: ${labels[label].pointsToByte}`); dbg(1, ` Bytes to replace: ${labels[label].bytesToReplace}`); dbg(1, ` IP: $${num2hex(IP)}, new code: none`); dbgGroupEnd(1, 'Input line'); return labels; } /** * @param {string} op * @param {string} arg * @param {number} IP * @returns {Array} - array of constants **/ function handleConstantDefinitions(op, arg, IP, constants) { let constantName = op.substring(1); // strip '>' let constantValue = arg; if (constantValue === ASM_IP_LABEL) { constantValue = IP.toString(); } constants[constantName] = constantValue; dbg(1, ''); dbg(1, `Constants:`); dbg(1, constants); dbg(1, ''); return constants; } /** * Assemble source code. * * If the source doesn't explicitly set an address to assemble to, * it will be assembled to the default intial value of the IP, * as specified in `machine.config.js`. * @param {string} source - Assembly source to decode * @return {{ debugInfo: Object, machineCode: Uint8Array }}; **/ function decodeInstructions(source) { dbg(1, 'Pre-parsing...'); let lines = preparseSourceCode(source); dbg(1, ''); dbg(1, 'Done pre-parsing.'); dbg(1, ''); dbg(1, 'Assembling...'); // Figure out where to start assembly... /** @type {number} IP - Destination addr for the next line **/ let IP; // Check if the source code explicitly sets an address to assemble at // by including a `* [addr]` as the first (non-blank, non-comment) line let idOfFirstLineWithCode = lines.findIndex((el) => el.type === 'code'); if (lines[idOfFirstLineWithCode].operation.startsWith(ASM_IP_LABEL)) { IP = parseInt(lines[idOfFirstLineWithCode].argument); } else { IP = INITIAL_IP_ADDRESS; } // Initialize arrays to collect assembled code /** @type {Array} - Assembled source code, as an array of bytes **/ let machineCode = new Array(IP).fill(0); let debugInfo = {}; // Initialize memory-mapped IO -- TODO this should probably be in the CPU, not here machineCode[POINTER_TO_DISPLAY] = DISPLAY_ADDR; // Initialize arrays that collect code references that // have to be revisited after our first pass through the source let labels = {}; let constants = {}; // Decode line by line... for (let i = 0; i < lines.length; i++) { let line = lines[i]; // dbg(2, `line info:`); // dbg(2, line); if (line.type === 'code') { const op = line.operation; if (typeof line.argument === 'undefined') { // If this isn't a label definition, // or one of the ops with optional arguments, // then it's an error if (!line.operation.startsWith('@')) { if (mnemonicsWithOptionalArgs.indexOf(line.operation.toLowerCase()) < 0) { console.error(''); console.error(`Error: Missing operand ${line.source}`); console.error(` at line ${line.number}`); process.exit(); } else { // It *is* one of the special optional-arg ops // So let's fill in the implicit operand with $00 line.argument = '0'; } } } // *** Decode special operations *** // Opcodes - Handle label definitions if (op.startsWith(ASM_LABEL_PREFIX)) { labels = handleLabelDefinition(op, IP, labels); continue; } // Opcodes - Handle constant definitions if (op.startsWith(ASM_CONSTANT_PREFIX)) { constants = handleConstantDefinitions(op, line.argument, IP, constants); continue; } // Opcodes - Handle setting value of IP if (op.startsWith(ASM_IP_LABEL)) { IP = parseInt(line.argument); continue; } // *** Decode regular operations *** /** @type {number|null} decodedOp **/ let decodedOp = null; /** @type {number|null} decodedArg **/ let decodedArg = null; /** @typedef {'direct'|'indirect'} AddressingMode **/ let addressingMode = 'direct'; // Now that it can't be a label or a constant, normalize the opcode line.operation = line.operation.toLowerCase(); // Operands - Handle references to labels if (line.argument.startsWith(ASM_LABEL_PREFIX)) { let label = line.argument.substring(1); // strip label prefix if (label in labels) { dbg(1, `'${label}' already in labels object`); labels[label].bytesToReplace.push(IP + 1); } else { dbg(1, `'${label}' NOT in labels object`); labels[label] = { bytesToReplace: [IP + 1], }; } dbg(1, `Label reference:`); dbg(1, ` Points to byte: ${labels[label].pointsToByte}`); dbg(1, ` Bytes to replace: ${labels[label].bytesToReplace}`); decodedArg = 0; // Return 0 for operand for now -- we'll replace it later } // Operands - Handle references to the Instruction Pointer if (line.argument === ASM_IP_LABEL) { dbg(1, ` References current IP - ${IP}`); if (typeof line.extraArgument === 'undefined') { decodedArg = IP; } else { decodedArg = IP + decodeNumericOp(line.extraArgument); } } // Operands - Handle references to constants if (line.argument.startsWith(ASM_CONSTANT_PREFIX)) { dbg(1, `References '${line.argument}'`); if (typeof constants[line.argument.substring(1)] === 'undefined') { console.error(); console.error(`Error: Undefined constant '${line.argument}'`); console.error(` at line ${line.number}`); process.exit(); } decodedArg = decodeNumericOp(constants[line.argument.substring(1)]); // substring(1) strips '>' } // Operands - Handle references to constants in indirect mode if (line.argument.startsWith(`(${ASM_CONSTANT_PREFIX}`)) { addressingMode = "indirect"; dbg(1, `(Indirectly) References '${line.argument}'`); let constName = line.argument.replace(`(${ASM_CONSTANT_PREFIX}`, ""); constName = constName.replace(")", ""); decodedArg = decodeNumericOp(constants[constName]); } // Operands - Handle indirect expressions if (decodedArg === null && line.argument.startsWith("(")) { addressingMode = "indirect"; let indyTemp = line.argument.replace("(", "").replace(")", ""); decodedArg = decodeNumericOp(indyTemp); } // Decode regular opcodes if (decodedOp === null) { decodedOp = mnemonics2opcodes[line.operation][addressingMode]; } // Decode regular operands if (decodedArg === null) { decodedArg = decodeNumericOp(line.argument); } machineCode[IP] = decodedOp; machineCode[IP + 1] = decodedArg; debugInfo[IP] = { lineNumber: line.number, source: line.source, address: IP, machine: [decodedOp, decodedArg] }; dbg(3, ``); dbg(3, `Line ${line.number}: ${line.source}`); if (line.argument) { dbg(3, ` Asm operation: ${line.operation.toUpperCase()} ${line.argument}`); } else if (line.operation) { dbg(3, ` Asm operation: ${line.operation.toUpperCase()}`); } dbg(3, ` Machine code: $${num2hex(decodedOp)} $${num2hex(decodedArg)}`); dbg(3, ` IP: $${num2hex(IP)}`); IP += 2; }; } dbg(1, ''); dbgGroup(1, 'Memory before filling in label constants'); dbgExec(1, () => logMemory(new Uint8Array(machineCode))); dbgGroupEnd(1); // Backfill label references for (let k of Object.keys(labels)) { dbgGroup(1, `${ASM_LABEL_PREFIX}${k}`); let label = labels[k]; dbg(1, `Points to byte: ${label.pointsToByte}`); dbg(1, `Bytes to replace: ${label.bytesToReplace}`); dbgGroupEnd(1); for (let j = 0; j < label.bytesToReplace.length; j++) { machineCode[label.bytesToReplace[j]] = label.pointsToByte; } } return { 'debugInfo': debugInfo, 'machineCode': new Uint8Array(machineCode) }; } /** * @param {string} line * @returns {string} **/ function stripComments(line) { return line.replace(/;.+/,""); } /** * @param {string} line * @returns {string} **/ function stripWhitespaceFromEnds(line) { line = line.replace(/^\s+/,""); line = line.replace(/\s+$/,""); return line; } function hex2num(hex) { return parseInt(hex, 16) }; // Debug helpers const dbg = (lvl, s) => { if (DEBUG && (lvl >= DEBUG_LEVEL)) console.log(s) }; const dbgGroup = (lvl, s) => { if (DEBUG && (lvl >= DEBUG_LEVEL)) console.group(s) }; const dbgGroupEnd = (lvl, s) => { if (DEBUG && (lvl >= DEBUG_LEVEL)) console.groupEnd() }; const dbgExec = (lvl, func) => { if (DEBUG && (lvl >= DEBUG_LEVEL)) func(); }