const { logMemory, num2hex } = require('./logging.js'); const { INITIAL_IP_ADDRESS, DISPLAY_ADDR, KEYPAD_ADDR, POINTER_TO_DISPLAY, POINTER_TO_KEYPAD } = require('./machine.config.js'); // 1 = verbose // 2 = what i'm currently focusing on // 3 = always print // 4 = silent const DEBUG_LEVEL = 2; let DEBUG = false; // Turn debugging on/off -- set by assemble() exports.assemble = (str, debug = false) => { DEBUG = debug; return decodeInstructions(str); } // Configure pseudo-ops: const POINTER_TO_IP_OP = '*'; const CONSTANT_PREFIX = '#'; const LABEL_PREFIX = '@'; const mnemonicsWithOptionalArgs = ['end', 'nop']; const mnemonics2opcodes = { end: { direct: 0, indirect: 0 }, sto: { direct: 1, indirect: 2 }, lda: { direct: 3, indirect: 4 }, add: { direct: 5, indirect: 6 }, sub: { direct: 7, indirect: 8 }, hop: { direct: 9, indirect: 10 }, jmp: { direct: 11, indirect: 12 }, ftg: { direct: 13, indirect: 13 }, fhp: { direct: 14, indirect: 14 }, nop: { direct: 15, indirect: 15 }, }; /** * @param {string} s * @returns {boolean} **/ function startsWithPointerToIP(s) { return stripWhitespaceFromEnds(s).startsWith(POINTER_TO_IP_OP); } /** * @param {string[]} lines */ function getFirstCodeLine(lines) { function isCode(line) { line = stripComments(stripWhitespaceFromEnds(line)); if (line.length === 0) { return false; } return true; }; lines = lines.filter(isCode); if (lines.length > 0) { return lines[0]; } return false; } /** * @param {string} source; **/ // * @returns {{number: number, source: string, type: 'code'|'comment'|'blank'}} // TODO: https://stackoverflow.com/questions/32295263/how-to-document-an-array-of-objects-in-jsdoc function splitCodeFromComments(source) { let lines = source.split(/\n/); // returns an array of lines const isLineBlank = (l) => { return l.length === 0 ? true : false }; const isLineComment = (l) => { return stripWhitespaceFromEnds(l).startsWith(';') }; const getLineType = (l) => { console.log('get type for ', l); if (isLineBlank(l)) return 'blank'; if (isLineComment(l)) return 'comment'; return 'code'; } return lines.map((line, index) => { return { number: index, source: line, type: getLineType(line) }; }); } /** * Assemble source code. * * If the source doesn't explicitly set an address to assemble to, * it will be assembled to the default intial value of the IP, * as specified in `machine.config.js`. * @param {string} source - Assembly source to decode * @return TODO **/ function decodeInstructions(source) { // let lines = source.split(/\n/); // returns an array of lines // WIP: everything broken // - just finished writing `splitCodeFromComments` // - plan: // - use that to pre-load debugInfo array // - and to check if the first code-line is `*` // so currently i'm implementing 2 entangled features: // 1. check if first line * and set IP // 2. return debug data along with machine code console.log(splitCodeFromComments('foo \n\n; bar')); // Figure out where to start assembly... /** @type {number} IP - The address where the next line of code will be assembled **/ let IP; // Check if the source code explicitly sets an address to assemble at // by including a `* [addr]` as the first (non-blank, non-comment) line const sourceIPdefinition = lines.findIndex(startsWithPointerToIP); if (sourceSetsIP) { let op_arg_array = lines[.split(/\s+/); // split line into an array of [op, arg] IP = explicitIP; } else { IP = INITIAL_IP_ADDRESS; } // Initialize arrays to collect assembled code /** @type {array} - Assembled source code, as an array of bytes **/ let machineCode = new Array(IP).fill(0); /** @type {{lineNumber: number, source: string, lineType: 'code'|'comment'}} **/ let debugData = new Array(); // Initialize memory-mapped IO -- TODO this should probably be in the CPU, not here machineCode[POINTER_TO_DISPLAY] = DISPLAY_ADDR; machineCode[POINTER_TO_KEYPAD] = KEYPAD_ADDR; // Initialize arrays that collect code references that // have to be revisited after our first pass through the source let labels = {}; let constants = {}; // Decode line by line for (let i = 0; i < lines.length; i++) { dbg(2, ''); dbgGroup(1, `Input line ${i}, IP ${num2hex(IP)}`); dbg(3, `> ${lines[i]}`); // TODO - update debugData let line = stripWhitespaceFromEnds(stripComments(lines[i])); // Handle blank lines if (line.length === 0) { dbg(3, `IP: $${num2hex(IP)}, new code: none`); dbg(1, 'blank'); dbgGroupEnd(1, 'Input line'); continue; } // HANDLE OPS // Handle label definitions if (line.startsWith(LABEL_PREFIX)) { let label = line.substring(1); // strip label prefix if (label in labels) { labels[label].pointsToByte = IP; } else { labels[label] = { pointsToByte: IP, bytesToReplace: [], }; } dbg(2, `pointsToByte: ${labels[label].pointsToByte}`); dbg(2, `bytesToReplace: ${labels[label].bytesToReplace}`); dbg(3, `IP: $${num2hex(IP)}, new code: none`); dbgGroupEnd(1, 'Input line'); continue; } let op_arg_array = line.split(/\s+/); // split line into an array of [op, arg] let opName = op_arg_array[0].toLowerCase(); let arg_str = op_arg_array[1]; let arg_num = null; let addressingMode = 'direct'; // Must be "direct" or "indirect" // Handle setting value of IP if (startsWithPointerToIP(line)) { dbg(3, 'CHANGING IP'); IP = arg_str.parseInt(); continue; } // Handle constant definitions if (opName.startsWith(CONSTANT_PREFIX)) { // FIXME - a quick hack to get around problems caused by another use of lower-casing to sanitize input: let constantName = opName.substring(1).toLowerCase(); // strip '>' let constantValue = arg_str; if (constantValue.toLowerCase() === POINTER_TO_IP_OP) { constantValue = IP.toString(); } constants[constantName] = constantValue; dbg(2, `constants:`); dbg(2, constants); continue; } // Handle mnemonics without operands (eg END) ... if (typeof arg_str === 'undefined') { if (mnemonicsWithOptionalArgs.indexOf(opName) < 0) { console.error(`Missing opcode: ${line}`); throw new Error("Missing opcode"); } arg_num = 0; // HANDLE OPERANDS // Handle references to labels } else if (arg_str.startsWith(LABEL_PREFIX)) { let label = arg_str.substring(1); // strip label prefix arg_num = 0; if (label in labels) { dbg(1, `'${label}' already in labels object`); labels[label].bytesToReplace.push(IP + 1); } else { dbg(1, `'${label}' NOT in labels object`); labels[label] = { bytesToReplace: [IP + 1], }; } dbg(2, `pointsToByte: ${labels[label].pointsToByte}`); dbg(2, `bytesToReplace: ${labels[label].bytesToReplace}`); // Handle references to the Instruction Pointer } else if (arg_str.toLowerCase() === POINTER_TO_IP_OP) { dbg(2, `operand references current address`); arg_num = IP; dbg(2, `arg_num: ${num2hex(arg_num)}`); // Handle references to constants } else if (arg_str.startsWith(CONSTANT_PREFIX)) { // FIXME - a quick hack to get around problems caused by another use of lower-casing to sanitize input: arg_str = arg_str.substring(1).toLowerCase(); // strip '>' dbg(2, `operand references '${arg_str}'`); arg_str = constants[arg_str]; dbg(2, `arg_str from '${arg_str}'`); // Handle references to constants in indirect mode } else if (arg_str.startsWith(`(${CONSTANT_PREFIX}`)) { addressingMode = "indirect"; arg_str = arg_str.replace(`(${CONSTANT_PREFIX}`, ""); arg_str = arg_str.replace(")", ""); // FIXME - a quick hack to get around problems caused by another use of lower-casing to sanitize input: arg_str = arg_str.toLowerCase(); dbg(2, `INDY - operand references '${arg_str}'`); arg_str = constants[arg_str]; // Handle indirect expressions } else if (arg_str.startsWith("(")) { addressingMode = "indirect"; arg_str = arg_str.replace("(", ""); arg_str = arg_str.replace(")", ""); } // Handle numeric operands if (arg_num === null) { if (arg_str.startsWith("$")) { // Handle hex arg_str = arg_str.replace("$", ""); arg_num = hex2num(arg_str); } else { // Accept decimal i guess arg_num = parseInt(arg_str); } } // DECODE! const op = mnemonics2opcodes[opName][addressingMode]; machineCode.push(op); machineCode.push(arg_num); dbg(3, `IP: $${num2hex(IP)}, new code: $${num2hex(op)} $${num2hex(arg_num)}`); IP += 2; dbgGroupEnd(1, 'Input line'); }; dbg(1, ''); dbgGroup(1, 'Memory before filling in label constants'); dbgExec(1, () => logMemory(new Uint8Array(machineCode))); dbgGroupEnd(1, 'Memory before filling in label constants'); // Backfill label references for (let k of Object.keys(labels)) { dbgGroup(2, `${LABEL_PREFIX}${k}`); let label = labels[k]; dbg(2, `pointsToByte: ${label.pointsToByte}`); dbg(2, `bytesToReplace: ${label.bytesToReplace}`); dbgGroupEnd(2, `label`); for (let j = 0; j < label.bytesToReplace.length; j++) { machineCode[label.bytesToReplace[j]] = label.pointsToByte; } } return new Uint8Array(machineCode); } function stripComments(line) { return line.replace(/;.+/,""); } function stripWhitespaceFromEnds(line) { line = line.replace(/^\s+/,""); line = line.replace(/\s+$/,""); return line; } function hex2num(hex) { return parseInt(hex, 16) }; // Debug helpers const dbg = (lvl, s) => { if (DEBUG && (lvl >= DEBUG_LEVEL)) console.log(s) }; const dbgGroup = (lvl, s) => { if (DEBUG && (lvl >= DEBUG_LEVEL)) console.group(s) }; const dbgGroupEnd = (lvl, s) => { if (DEBUG && (lvl >= DEBUG_LEVEL)) console.groupEnd() }; const dbgExec = (lvl, func) => { if (DEBUG && (lvl >= DEBUG_LEVEL)) func(); }