#!/usr/bin/env node const fs = require('fs'); const Opter = require('./opter/opter.js'); const { logMemory } = require('./logging.js'); const { num2hex, hex2num, bin2num } = require('./conversions.js'); const DBG = require('./dbg.js'); const CFG = require('./machine.config.js'); /** Configure pseudo-ops **/ const ASM_IP_LABEL = '*'; const ASM_CONSTANT_PREFIX = '#'; const ASM_LABEL_PREFIX = '@'; /** Configure mnemonics **/ const mnemonicsWithOptionalArgs = ['end', 'nop']; const mnemonics2opcodes = { end: { direct: 0, indirect: 0 }, sto: { direct: 1, indirect: 2 }, lda: { direct: 3, indirect: 4 }, add: { direct: 5, indirect: 6 }, sub: { direct: 7, indirect: 8 }, hop: { direct: 9, indirect: 10 }, jmp: { direct: 11, indirect: 12 }, ftg: { direct: 13, indirect: 13 }, fhp: { direct: 14, indirect: 14 }, nop: { direct: 15, indirect: 15 }, }; /** * @typedef {('code'|'comment'|'blank')} SourceLineType **/ /** * @typedef {Object} SourceLineInfo * @property {number} number - line number * @property {string} source - source text * @property {string} sanitized - source text, with comments and whitespace removed * @property {SourceLineType} type - line type * @property {string} [operation] - For code: the first non-whitespace chunk * @property {string} [argument] - For code: the second non-whitespace chunk, if there is one * @property {string} [extraArgument] - For code: the third non-whitespace chunk, if there is one **/ /** * @param {string} source * @returns {Array} **/ function preparseSourceCode(source) { let lines = source.split(/\n/); // returns an array of lines const isLineBlank = (l) => { return stripWhitespaceFromEnds(l).length === 0 ? true : false }; const isLineComment = (l) => { return stripWhitespaceFromEnds(l).startsWith(';') }; /** * @param {string} l * @returns {SourceLineType} **/ const getLineType = (l) => { if (isLineBlank(l)) return 'blank'; if (isLineComment(l)) return 'comment'; return 'code'; } return lines.map((line, index) => { dbg.nit(` in: ${line}`); let info = { number: index + 1, source: line, sanitized: stripWhitespaceFromEnds(stripComments(line)), type: getLineType(line), }; dbg.nit(` → ${info.number} - ${info.type}: ${info.sanitized}`); dbg.nit(``); if (info.type === 'code') { const op_arg_array = info.sanitized.split(/\s+/); // split line into an array of [op, arg, extra_arg] if (op_arg_array[0] !== 'undefined') { info.operation = op_arg_array[0]; } if (op_arg_array.length === 2) { info.argument = op_arg_array[1]; } if (op_arg_array.length === 3) { info.argument = op_arg_array[1]; info.extraArgument = op_arg_array[2]; } // If there's too many arguments, throw an error // NB. there's a special case: // lines with the ASM_IP_LABEL can take an extra argument let maxArgs = 2; if (op_arg_array.length > 2 && op_arg_array[1].startsWith(ASM_IP_LABEL)) { maxArgs = 3; } if (op_arg_array.length > maxArgs) { console.error(); console.error(`Error: Too many arguments`); console.error(` at line ${info.number}`); process.exit(); } } return info; }); } /** * @param {string} arg * @returns {number} **/ function decodeNumericOp(arg) { if (arg.startsWith("$")) return hex2num(arg.replace("$", "")); if (arg.startsWith("0x")) return hex2num(arg.replace("0x", "")); if (arg.startsWith("0b")) return bin2num(arg.replace("0b", "")); return parseInt(arg); } /** * @param {string} op * @param {object} labels // TODO - document labels object * @param {number} IP * @returns {Array} - array of labels **/ function handleLabelDefinition(op, IP, labels) { let label = op.substring(1); // strip label prefix if (label in labels) { labels[label].pointsToByte = IP; } else { labels[label] = { pointsToByte: IP, bytesToReplace: [], }; } dbg.nit(` Label definition:`); dbg.nit(` Points to byte: ${labels[label].pointsToByte}`); dbg.nit(` Bytes to replace: ${labels[label].bytesToReplace}`); dbg.nit(` IP: $${num2hex(IP)}, new code: none`); dbg.nitGroupEnd('Input line'); return labels; } /** * @param {string} op * @param {string} arg * @param {number} IP * @returns {Array} - array of constants **/ function handleConstantDefinitions(op, arg, IP, constants) { let constantName = op.substring(1); // strip '>' let constantValue = arg; if (constantValue === ASM_IP_LABEL) { constantValue = IP.toString(); } constants[constantName] = constantValue; dbg.nit(''); dbg.nit(`Constants:`); dbg.nit(constants); dbg.nit(''); return constants; } /** * Assemble source code. * * If the source doesn't explicitly set an address to assemble to, * it will be assembled to the default intial value of the IP, * as specified in `machine.config.js`. * @param {string} source - Assembly source to decode * @return {{ sourceAnnotations: Object, machineCode: Array }}; **/ // TODO rename? function decodeInstructions(source) { dbg.nit('Pre-parsing...'); let lines = preparseSourceCode(source); dbg.nit(''); dbg.nit('Done pre-parsing.'); dbg.nit(''); dbg.nit('Assembling...'); // Figure out where to start assembly... /** @type {number} IP - Destination addr for the next line **/ let IP; // Check if the source code explicitly sets an address to assemble at // by including a `* [addr]` as the first (non-blank, non-comment) line let idOfFirstLineWithCode = lines.findIndex((el) => el.type === 'code'); if (lines[idOfFirstLineWithCode].operation.startsWith(ASM_IP_LABEL)) { IP = parseInt(lines[idOfFirstLineWithCode].argument); } else { IP = CFG.initialIP; } // Initialize arrays to collect assembled code /** @type {Array} - Assembled source code, as an array of bytes **/ let machineCode = new Array(IP).fill(0); let sourceAnnotations = {}; // Initialize memory-mapped IO -- TODO this should probably be in the CPU, not here machineCode[CFG.pointerToDisplay] = CFG.displayAddr; // Initialize arrays that collect code references that // have to be revisited after our first pass through the source let labels = {}; let constants = {}; // Decode line by line... for (let i = 0; i < lines.length; i++) { let line = lines[i]; // dbg(2, `line info:`); // dbg(2, line); if (line.type === 'code') { const op = line.operation; if (typeof line.argument === 'undefined') { // If this isn't a label definition, // or one of the ops with optional arguments, // then it's an error if (!line.operation.startsWith('@')) { if (mnemonicsWithOptionalArgs.indexOf(line.operation.toLowerCase()) < 0) { console.error(''); console.error(`Error: Missing operand ${line.source}`); console.error(` at line ${line.number}`); process.exit(); } else { // It *is* one of the special optional-arg ops // So let's fill in the implicit operand with $00 line.argument = '0'; } } } // *** Decode special operations *** // Opcodes - Handle label definitions if (op.startsWith(ASM_LABEL_PREFIX)) { labels = handleLabelDefinition(op, IP, labels); continue; } // Opcodes - Handle constant definitions if (op.startsWith(ASM_CONSTANT_PREFIX)) { constants = handleConstantDefinitions(op, line.argument, IP, constants); continue; } // Opcodes - Handle setting value of IP if (op.startsWith(ASM_IP_LABEL)) { IP = parseInt(line.argument); continue; } // *** Decode regular operations *** /** @type {number|null} decodedOp **/ let decodedOp = null; /** @type {number|null} decodedArg **/ let decodedArg = null; /** @typedef {'direct'|'indirect'} AddressingMode **/ let addressingMode = 'direct'; // Now that it can't be a label or a constant, normalize the opcode line.operation = line.operation.toLowerCase(); // Operands - Handle references to labels if (line.argument.startsWith(ASM_LABEL_PREFIX)) { let label = line.argument.substring(1); // strip label prefix if (label in labels) { dbg.nit(`'${label}' already in labels object`); labels[label].bytesToReplace.push(IP + 1); } else { dbg.nit(`'${label}' NOT in labels object`); labels[label] = { bytesToReplace: [IP + 1], }; } dbg.nit(`Label reference:`); dbg.nit(` Points to byte: ${labels[label].pointsToByte}`); dbg.nit(` Bytes to replace: ${labels[label].bytesToReplace}`); decodedArg = 0; // Return 0 for operand for now -- we'll replace it later } // Operands - Handle references to the Instruction Pointer if (line.argument === ASM_IP_LABEL) { dbg.nit(` References current IP - ${IP}`); if (typeof line.extraArgument === 'undefined') { decodedArg = IP; } else { decodedArg = IP + decodeNumericOp(line.extraArgument); } } // Operands - Handle references to constants if (line.argument.startsWith(ASM_CONSTANT_PREFIX)) { dbg.nit(`References '${line.argument}'`); if (typeof constants[line.argument.substring(1)] === 'undefined') { console.error(); console.error(`Error: Undefined constant '${line.argument}'`); console.error(` at line ${line.number}`); process.exit(); } decodedArg = decodeNumericOp(constants[line.argument.substring(1)]); // substring(1) strips '>' } // Operands - Handle references to constants in indirect mode if (line.argument.startsWith(`(${ASM_CONSTANT_PREFIX}`)) { addressingMode = "indirect"; dbg.nit(`(Indirectly) References '${line.argument}'`); let constName = line.argument.replace(`(${ASM_CONSTANT_PREFIX}`, ""); constName = constName.replace(")", ""); decodedArg = decodeNumericOp(constants[constName]); } // Operands - Handle indirect expressions if (decodedArg === null && line.argument.startsWith("(")) { addressingMode = "indirect"; let indyTemp = line.argument.replace("(", "").replace(")", ""); decodedArg = decodeNumericOp(indyTemp); } // Decode regular opcodes if (decodedOp === null) { decodedOp = mnemonics2opcodes[line.operation][addressingMode]; } // Decode regular operands if (decodedArg === null) { decodedArg = decodeNumericOp(line.argument); } machineCode[IP] = decodedOp; machineCode[IP + 1] = decodedArg; sourceAnnotations[IP] = { lineNumber: line.number, source: line.source, address: IP, machine: [decodedOp, decodedArg] }; dbg.i(); dbg.i(`Line ${line.number}: ${line.source}`); if (line.argument) { dbg.i(` Asm operation: ${line.operation.toUpperCase()} ${line.argument}`); } else if (line.operation) { dbg.i(` Asm operation: ${line.operation.toUpperCase()}`); } dbg.i(` Machine code: $${num2hex(decodedOp)} $${num2hex(decodedArg)}`); dbg.i(` IP: $${num2hex(IP)}`); IP += 2; }; } dbg.nit(''); dbg.nitGroup('Memory before filling in label constants'); dbg.nitExec(() => logMemory(new Uint8Array(machineCode))); dbg.nitGroupEnd(); // Backfill label references for (let k of Object.keys(labels)) { dbg.nitGroup(`${ASM_LABEL_PREFIX}${k}`); let label = labels[k]; dbg.nit(`Points to byte: ${label.pointsToByte}`); dbg.nit(`Bytes to replace: ${label.bytesToReplace}`); dbg.nitGroupEnd(); for (let j = 0; j < label.bytesToReplace.length; j++) { machineCode[label.bytesToReplace[j]] = label.pointsToByte; } } return { 'machineCode': machineCode, 'sourceAnnotations': sourceAnnotations }; } /** * @param {string} line * @returns {string} **/ function stripComments(line) { return line.replace(/;.+/,""); } /** * @param {string} line * @returns {string} **/ function stripWhitespaceFromEnds(line) { line = line.replace(/^\s+/,""); line = line.replace(/\s+$/,""); return line; } /** * Assemble source code into machine code. * If 'includeMetadata' is true, a JSON object containing * both machine code and metadata is written to the output file. * Otherwise, a string of decimal numbers is written. * @arg {string} inputFilename File containing code to assemble * @arg {boolean} outputToFile If false, output is on stdout * @arg {boolean} includeMetadata Include metadata when writing output to a file? (for use when debugging using the simulator) * @arg {string} [outputFilename] Output file for machine code (and optional metadata) **/ function assemble(inputFilename, outputToFile, includeMetadata, outputFilename=null) { const sourceCode = fs.readFileSync(inputFilename, 'utf8'); const out = decodeInstructions(sourceCode); if (includeMetadata) { const debugJSON = JSON.stringify(out); if (outputToFile) { fs.writeFileSync(outputFilename, debugJSON); } else { console.log(debugJSON); } } else { const asciiMachineCode = out.machineCode.toString().replace(/,/g, ' '); if (outputToFile) { fs.writeFileSync(outputFilename, asciiMachineCode); } else { console.log(asciiMachineCode); } } } /** MAIN **/ // Initialize debugger... const dbg = new DBG('nitpick'); // Handle command-line options... const opter = new Opter(); opter.addOption('-a', '--annotate'); opter.addOption('-i', '--in', true, true, 1); opter.addOption('-o', '--out', false, true, 1); let opts = opter.parse(process.argv); const inputFilename = opts.in[0]; let outputWithAnnotations = 'annotate' in opts; // Assemble...! if ('out' in opts) { const outputFilename = opts.out[0]; assemble(inputFilename, true, outputWithAnnotations, outputFilename); } else { dbg.setLevel('none'); assemble(inputFilename, false, outputWithAnnotations); }