diff --git a/assembler.js b/assembler.js index 715f26e..394b610 100644 --- a/assembler.js +++ b/assembler.js @@ -20,9 +20,9 @@ exports.assemble = (str, debug = false) => { } // Configure pseudo-ops: -const POINTER_TO_IP_OP = '*'; -const CONSTANT_PREFIX = '#'; -const LABEL_PREFIX = '@'; +const ASM_IP_LABEL = '*'; +const ASM_CONSTANT_PREFIX = '#'; +const ASM_LABEL_PREFIX = '@'; const mnemonicsWithOptionalArgs = ['end', 'nop']; const mnemonics2opcodes = { @@ -43,55 +43,87 @@ const mnemonics2opcodes = { * @returns {boolean} **/ function startsWithPointerToIP(s) { - return stripWhitespaceFromEnds(s).startsWith(POINTER_TO_IP_OP); + return stripWhitespaceFromEnds(s).startsWith(ASM_IP_LABEL); } /** - * @param {string[]} lines - */ -function getFirstCodeLine(lines) { - function isCode(line) { - line = stripComments(stripWhitespaceFromEnds(line)); - if (line.length === 0) { - return false; - } - return true; - }; - lines = lines.filter(isCode); - if (lines.length > 0) { - return lines[0]; - } - return false; -} - -/** - * @param {string} source; + * @typedef {('code'|'comment'|'blank')} SourceLineType **/ - // * @returns {{number: number, source: string, type: 'code'|'comment'|'blank'}} - // TODO: https://stackoverflow.com/questions/32295263/how-to-document-an-array-of-objects-in-jsdoc -function splitCodeFromComments(source) { + +/** + * @typedef {Object} SourceLineInfo + * @property {number} number - line number + * @property {string} source - source text + * @property {string} sanitized - source text, with comments and whitespace removed + * @property {SourceLineType} type - line type + * @property {string} [operation] - For code: the first non-whitespace chunk + * @property {string} [argument] - For code: the second non-whitespace chunk, if there is one + **/ + +/** + * @param {string} source + * @returns {Array} + **/ +function preparseSourceCode(source) { let lines = source.split(/\n/); // returns an array of lines const isLineBlank = (l) => { return l.length === 0 ? true : false }; const isLineComment = (l) => { return stripWhitespaceFromEnds(l).startsWith(';') }; + + /** + * @param {string} l + * @returns {SourceLineType} + **/ const getLineType = (l) => { - console.log('get type for ', l); if (isLineBlank(l)) return 'blank'; if (isLineComment(l)) return 'comment'; return 'code'; } return lines.map((line, index) => { - return { + console.log('pre-parsing ', line); + let info = { number: index, source: line, - type: getLineType(line) + sanitized: stripWhitespaceFromEnds(stripComments(line)), + type: getLineType(line), }; + + if (info.type === 'code') { + const op_arg_array = line.split(/\s+/); // split line into an array of [op, arg] + if (op_arg_array[0] !== 'undefined') { + info.operation = op_arg_array[0]; + } + if (op_arg_array[1] !== 'undefined') { + info.argument = op_arg_array[1]; + } + } + return info; }); } + +/** + * @param {string} arg + * @returns {number} + **/ +function parseNumericOperand(arg) { + if (arg.startsWith("$")) return hex2num(arg.replace("$", "")); + return parseInt(arg); +} + + // DECODE! + const op = mnemonics2opcodes[opName][addressingMode]; // FIXME rename + + machineCode.push(op); + machineCode.push(arg_num); + dbg(3, `IP: $${num2hex(IP)}, new code: $${num2hex(op)} $${num2hex(arg_num)}`); + IP += 2; + dbgGroupEnd(1, 'Input line'); + + /** * Assemble source code. * @@ -102,9 +134,6 @@ function splitCodeFromComments(source) { * @return TODO **/ function decodeInstructions(source) { - // let lines = source.split(/\n/); // returns an array of lines - - // WIP: everything broken // - just finished writing `splitCodeFromComments` // - plan: @@ -114,21 +143,20 @@ function decodeInstructions(source) { // 1. check if first line * and set IP // 2. return debug data along with machine code - console.log(splitCodeFromComments('foo \n\n; bar')); - + let lines = preparseSourceCode(source); + console.log(lines); // Figure out where to start assembly... - /** @type {number} IP - The address where the next line of code will be assembled **/ + /** @type {number} IP - Destination addr for the next line **/ let IP; // Check if the source code explicitly sets an address to assemble at // by including a `* [addr]` as the first (non-blank, non-comment) line - const sourceIPdefinition = lines.findIndex(startsWithPointerToIP); - if (sourceSetsIP) { - let op_arg_array = lines[.split(/\s+/); // split line into an array of [op, arg] - IP = explicitIP; + let idOfFirstLineWithCode = lines.findIndex((el) => el.type === 'code'); + if (lines[idOfFirstLineWithCode].operation.startsWith(ASM_IP_LABEL)) { + IP = parseInt(lines[idOfFirstLineWithCode].argument); } else { IP = INITIAL_IP_ADDRESS; } @@ -138,9 +166,6 @@ function decodeInstructions(source) { /** @type {array} - Assembled source code, as an array of bytes **/ let machineCode = new Array(IP).fill(0); - /** @type {{lineNumber: number, source: string, lineType: 'code'|'comment'}} **/ - let debugData = new Array(); - // Initialize memory-mapped IO -- TODO this should probably be in the CPU, not here machineCode[POINTER_TO_DISPLAY] = DISPLAY_ADDR; machineCode[POINTER_TO_KEYPAD] = KEYPAD_ADDR; @@ -152,149 +177,131 @@ function decodeInstructions(source) { // Decode line by line for (let i = 0; i < lines.length; i++) { - dbg(2, ''); - dbgGroup(1, `Input line ${i}, IP ${num2hex(IP)}`); - dbg(3, `> ${lines[i]}`); - // TODO - update debugData - let line = stripWhitespaceFromEnds(stripComments(lines[i])); - - // Handle blank lines - if (line.length === 0) { - dbg(3, `IP: $${num2hex(IP)}, new code: none`); - dbg(1, 'blank'); - dbgGroupEnd(1, 'Input line'); - continue; - } - - - // HANDLE OPS - - // Handle label definitions - if (line.startsWith(LABEL_PREFIX)) { - let label = line.substring(1); // strip label prefix - - if (label in labels) { - labels[label].pointsToByte = IP; - } else { - labels[label] = { - pointsToByte: IP, - bytesToReplace: [], - }; + let line = lines[0]; + if (line.type === 'code') { + const op = line.operation; + const arg = null; + if (typeof line.argument != 'undefined') { + const arg = line.argument; } - dbg(2, `pointsToByte: ${labels[label].pointsToByte}`); - dbg(2, `bytesToReplace: ${labels[label].bytesToReplace}`); - dbg(3, `IP: $${num2hex(IP)}, new code: none`); - dbgGroupEnd(1, 'Input line'); - continue; - } - let op_arg_array = line.split(/\s+/); // split line into an array of [op, arg] - let opName = op_arg_array[0].toLowerCase(); - let arg_str = op_arg_array[1]; - let arg_num = null; - let addressingMode = 'direct'; // Must be "direct" or "indirect" + /** @type {{op: (number | null), arg: (number|null)}} **/ + let assembledLine = { + op: null, + arg: null + }; - // Handle setting value of IP - if (startsWithPointerToIP(line)) { - dbg(3, 'CHANGING IP'); - IP = arg_str.parseInt(); - continue; - } + /** @type {'direct'|'indirect'} **/ + let addressingMode = 'direct'; - // Handle constant definitions - if (opName.startsWith(CONSTANT_PREFIX)) { - // FIXME - a quick hack to get around problems caused by another use of lower-casing to sanitize input: - let constantName = opName.substring(1).toLowerCase(); // strip '>' - let constantValue = arg_str; - if (constantValue.toLowerCase() === POINTER_TO_IP_OP) { - constantValue = IP.toString(); + // Opcodes - Handle label definitions + if (op.startsWith(ASM_LABEL_PREFIX)) { + let label = op.substring(1); // strip label prefix + + if (label in labels) { + labels[label].pointsToByte = IP; + } else { + labels[label] = { + pointsToByte: IP, + bytesToReplace: [], + }; + } + dbg(2, `pointsToByte: ${labels[label].pointsToByte}`); + dbg(2, `bytesToReplace: ${labels[label].bytesToReplace}`); + dbg(3, `IP: $${num2hex(IP)}, new code: none`); + dbgGroupEnd(1, 'Input line'); + continue; } - constants[constantName] = constantValue; - dbg(2, `constants:`); - dbg(2, constants); - continue; - } - // Handle mnemonics without operands (eg END) ... - if (typeof arg_str === 'undefined') { - if (mnemonicsWithOptionalArgs.indexOf(opName) < 0) { - console.error(`Missing opcode: ${line}`); - throw new Error("Missing opcode"); + // let op_arg_array = line.split(/\s+/); // split line into an array of [op, arg] + // let opName = op_arg_array[0].toLowerCase(); + // let arg_str = op_arg_array[1]; + // let arg_num = null; + + // Opcodes - Handle setting value of IP + if (startsWithPointerToIP(op)) { + dbg(3, 'CHANGING IP'); + IP = parseInt(arg); + continue; } - arg_num = 0; - - // HANDLE OPERANDS - - // Handle references to labels - } else if (arg_str.startsWith(LABEL_PREFIX)) { - let label = arg_str.substring(1); // strip label prefix - arg_num = 0; - - if (label in labels) { - dbg(1, `'${label}' already in labels object`); - labels[label].bytesToReplace.push(IP + 1); - } else { - dbg(1, `'${label}' NOT in labels object`); - labels[label] = { - bytesToReplace: [IP + 1], - }; + // Opcodes - Handle constant definitions + if (op.startsWith(ASM_CONSTANT_PREFIX)) { + let constantName = op.substring(1); // strip '>' + let constantValue = arg; + if (constantValue === ASM_IP_LABEL) { + constantValue = IP.toString(); + } + constants[constantName] = constantValue; + dbg(2, `constants:`); + dbg(2, constants); + continue; } - dbg(2, `pointsToByte: ${labels[label].pointsToByte}`); - dbg(2, `bytesToReplace: ${labels[label].bytesToReplace}`); - - // Handle references to the Instruction Pointer - } else if (arg_str.toLowerCase() === POINTER_TO_IP_OP) { - dbg(2, `operand references current address`); - arg_num = IP; - dbg(2, `arg_num: ${num2hex(arg_num)}`); - // Handle references to constants - } else if (arg_str.startsWith(CONSTANT_PREFIX)) { - // FIXME - a quick hack to get around problems caused by another use of lower-casing to sanitize input: - arg_str = arg_str.substring(1).toLowerCase(); // strip '>' - dbg(2, `operand references '${arg_str}'`); - arg_str = constants[arg_str]; - dbg(2, `arg_str from '${arg_str}'`); - - // Handle references to constants in indirect mode - } else if (arg_str.startsWith(`(${CONSTANT_PREFIX}`)) { - addressingMode = "indirect"; - arg_str = arg_str.replace(`(${CONSTANT_PREFIX}`, ""); - arg_str = arg_str.replace(")", ""); - // FIXME - a quick hack to get around problems caused by another use of lower-casing to sanitize input: - arg_str = arg_str.toLowerCase(); - dbg(2, `INDY - operand references '${arg_str}'`); - arg_str = constants[arg_str]; + // Opcodes - Handle mnemonics without operands (eg END) ... + if (arg === null) { + if (mnemonicsWithOptionalArgs.indexOf(op) < 0) { + console.error(`Missing opcode for line ${line.number}: ${line.source}`); + throw new Error("Missing opcode"); + } + assembledLine.arg = 0; - // Handle indirect expressions - } else if (arg_str.startsWith("(")) { - addressingMode = "indirect"; - arg_str = arg_str.replace("(", ""); - arg_str = arg_str.replace(")", ""); - } - // Handle numeric operands - if (arg_num === null) { - if (arg_str.startsWith("$")) { - // Handle hex - arg_str = arg_str.replace("$", ""); - arg_num = hex2num(arg_str); - } else { - // Accept decimal i guess - arg_num = parseInt(arg_str); + // Operands - Handle references to labels + } else if (arg.startsWith(ASM_LABEL_PREFIX)) { + let label = arg.substring(1); // strip label prefix + assembledLine.arg = 0; + + if (label in labels) { + dbg(1, `'${label}' already in labels object`); + labels[label].bytesToReplace.push(IP + 1); + } else { + dbg(1, `'${label}' NOT in labels object`); + labels[label] = { + bytesToReplace: [IP + 1], + }; + } + dbg(2, `pointsToByte: ${labels[label].pointsToByte}`); + dbg(2, `bytesToReplace: ${labels[label].bytesToReplace}`); + + // Operands - Handle references to the Instruction Pointer + } else if (arg.toLowerCase() === ASM_IP_LABEL) { + dbg(2, `operand references current address`); + assembledLine.arg = IP; + dbg(2, `arg_num: ${num2hex(assembledLine.arg)}`); + + // Operands - Handle references to constants + } else if (arg.startsWith(ASM_CONSTANT_PREFIX)) { + dbg(2, `operand references '${arg}'`); + assembledLine.arg = constants[arg.substring(1)]; // substring(1) strips '>' + + // Operands - Handle references to constants in indirect mode + } else if (arg.startsWith(`(${ASM_CONSTANT_PREFIX}`)) { + addressingMode = "indirect"; + dbg(2, `IND - operand references '${arg}'`); + let constTemp = arg.replace(`(${ASM_CONSTANT_PREFIX}`, "").replace(")", ""); + assembledLine.arg = constants[constTemp]; + + // Operands - Handle indirect expressions + } else if (arg.startsWith("(")) { + addressingMode = "indirect"; + assembledLine.arg = arg.replace("(", "").replace(")", ""); } - } - // DECODE! - const op = mnemonics2opcodes[opName][addressingMode]; + // Operands - Handle numeric operands + if (arg_num === null) { + if (arg_str.startsWith("$")) { + // Handle hex + arg_str = arg_str.replace("$", ""); + arg_num = hex2num(arg_str); + } else { + // Accept decimal i guess + arg_num = parseInt(arg_str); + } + } - machineCode.push(op); - machineCode.push(arg_num); - dbg(3, `IP: $${num2hex(IP)}, new code: $${num2hex(op)} $${num2hex(arg_num)}`); - IP += 2; - dbgGroupEnd(1, 'Input line'); - }; + }; + } dbg(1, ''); dbgGroup(1, 'Memory before filling in label constants'); @@ -303,7 +310,7 @@ function decodeInstructions(source) { // Backfill label references for (let k of Object.keys(labels)) { - dbgGroup(2, `${LABEL_PREFIX}${k}`); + dbgGroup(2, `${ASM_LABEL_PREFIX}${k}`); let label = labels[k]; dbg(2, `pointsToByte: ${label.pointsToByte}`); dbg(2, `bytesToReplace: ${label.bytesToReplace}`); @@ -317,10 +324,18 @@ function decodeInstructions(source) { } +/** + * @param {string} line + * @returns {string} + **/ function stripComments(line) { return line.replace(/;.+/,""); } +/** + * @param {string} line + * @returns {string} + **/ function stripWhitespaceFromEnds(line) { line = line.replace(/^\s+/,""); line = line.replace(/\s+$/,"");