cardiograph-computer/assembler.js

422 lines
13 KiB
JavaScript

const { logMemory, num2hex } = require('./logging.js');
const {
INITIAL_IP_ADDRESS,
DISPLAY_ADDR,
POINTER_TO_DISPLAY,
} = require('./machine.config.js');
// 1 = verbose
// 2 = what i'm currently focusing on
// 3 = always print
// 4 = silent
const DEBUG_LEVEL = 2;
let DEBUG; // Turn debugging on/off -- set by assemble()
/**
* @param {string} assemblyCode
* @param {Boolean} [debug = false]
**/
exports.assemble = (assemblyCode, debug = false) => {
DEBUG = debug;
return decodeInstructions(assemblyCode);
}
// Configure pseudo-ops:
const ASM_IP_LABEL = '*';
const ASM_CONSTANT_PREFIX = '#';
const ASM_LABEL_PREFIX = '@';
const mnemonicsWithOptionalArgs = ['end', 'nop'];
const mnemonics2opcodes = {
end: { direct: 0, indirect: 0 },
sto: { direct: 1, indirect: 2 },
lda: { direct: 3, indirect: 4 },
add: { direct: 5, indirect: 6 },
sub: { direct: 7, indirect: 8 },
hop: { direct: 9, indirect: 10 },
jmp: { direct: 11, indirect: 12 },
ftg: { direct: 13, indirect: 13 },
fhp: { direct: 14, indirect: 14 },
nop: { direct: 15, indirect: 15 },
};
/**
* @typedef {('code'|'comment'|'blank')} SourceLineType
**/
/**
* @typedef {Object} SourceLineInfo
* @property {number} number - line number
* @property {string} source - source text
* @property {string} sanitized - source text, with comments and whitespace removed
* @property {SourceLineType} type - line type
* @property {string} [operation] - For code: the first non-whitespace chunk
* @property {string} [argument] - For code: the second non-whitespace chunk, if there is one
* @property {string} [extraArgument] - For code: the third non-whitespace chunk, if there is one
**/
/**
* @param {string} source
* @returns {Array<SourceLineInfo>}
**/
function preparseSourceCode(source) {
let lines = source.split(/\n/); // returns an array of lines
const isLineBlank = (l) => { return l.length === 0 ? true : false };
const isLineComment = (l) => { return stripWhitespaceFromEnds(l).startsWith(';') };
/**
* @param {string} l
* @returns {SourceLineType}
**/
const getLineType = (l) => {
if (isLineBlank(l)) return 'blank';
if (isLineComment(l)) return 'comment';
return 'code';
}
return lines.map((line, index) => {
dbg(1, ` in: ${line}`);
let info = {
number: index + 1,
source: line,
sanitized: stripWhitespaceFromEnds(stripComments(line)),
type: getLineType(line),
};
dbg(1, `${info.number} - ${info.type}: ${info.sanitized}`);
dbg(1, ``);
if (info.type === 'code') {
const op_arg_array = info.sanitized.split(/\s+/); // split line into an array of [op, arg, extra_arg]
if (op_arg_array[0] !== 'undefined') {
info.operation = op_arg_array[0];
}
if (op_arg_array.length === 2) {
info.argument = op_arg_array[1];
}
if (op_arg_array.length === 3) {
info.argument = op_arg_array[1];
info.extraArgument = op_arg_array[2];
}
// If there's too many arguments, throw an error
// NB. there's a special case:
// lines with the ASM_IP_LABEL can take an extra argument
let maxArgs = 2;
if (op_arg_array.length > 2 && op_arg_array[1].startsWith(ASM_IP_LABEL)) {
maxArgs = 3;
}
if (op_arg_array.length > maxArgs) {
console.error();
console.error(`Error: Too many arguments`);
console.error(` at line ${info.number}`);
process.exit();
}
}
return info;
});
}
/**
* @param {string} arg
* @returns {number}
**/
function decodeNumericOp(arg) {
if (arg.startsWith("$")) return hex2num(arg.replace("$", ""));
return parseInt(arg);
}
/**
* @param {string} op
* @param {object} labels // TODO document better
* @param {number} IP
* @returns {Array<string>} - array of labels
**/
function handleLabelDefinition(op, IP, labels) {
let label = op.substring(1); // strip label prefix
if (label in labels) {
labels[label].pointsToByte = IP;
} else {
labels[label] = {
pointsToByte: IP,
bytesToReplace: [],
};
}
dbg(1, ` Label definition:`);
dbg(1, ` Points to byte: ${labels[label].pointsToByte}`);
dbg(1, ` Bytes to replace: ${labels[label].bytesToReplace}`);
dbg(1, ` IP: $${num2hex(IP)}, new code: none`);
dbgGroupEnd(1, 'Input line');
return labels;
}
/**
* @param {string} op
* @param {string} arg
* @param {number} IP
* @returns {Array<string>} - array of constants
**/
function handleConstantDefinitions(op, arg, IP, constants) {
let constantName = op.substring(1); // strip '>'
let constantValue = arg;
if (constantValue === ASM_IP_LABEL) {
constantValue = IP.toString();
}
constants[constantName] = constantValue;
dbg(1, '');
dbg(1, `Constants:`);
dbg(1, constants);
dbg(1, '');
return constants;
}
/**
* Assemble source code.
*
* If the source doesn't explicitly set an address to assemble to,
* it will be assembled to the default intial value of the IP,
* as specified in `machine.config.js`.
* @param {string} source - Assembly source to decode
* @return {{ debugInfo: Object, machineCode: Uint8Array }};
**/
function decodeInstructions(source) {
dbg(1, 'Pre-parsing...');
let lines = preparseSourceCode(source);
dbg(1, '');
dbg(1, 'Done pre-parsing.');
dbg(1, '');
dbg(1, 'Assembling...');
// Figure out where to start assembly...
/** @type {number} IP - Destination addr for the next line **/
let IP;
// Check if the source code explicitly sets an address to assemble at
// by including a `* [addr]` as the first (non-blank, non-comment) line
let idOfFirstLineWithCode = lines.findIndex((el) => el.type === 'code');
if (lines[idOfFirstLineWithCode].operation.startsWith(ASM_IP_LABEL)) {
IP = parseInt(lines[idOfFirstLineWithCode].argument);
} else {
IP = INITIAL_IP_ADDRESS;
}
// Initialize arrays to collect assembled code
/** @type {Array<number>} - Assembled source code, as an array of bytes **/
let machineCode = new Array(IP).fill(0);
let debugInfo = {};
// Initialize memory-mapped IO -- TODO this should probably be in the CPU, not here
machineCode[POINTER_TO_DISPLAY] = DISPLAY_ADDR;
// Initialize arrays that collect code references that
// have to be revisited after our first pass through the source
let labels = {};
let constants = {};
// Decode line by line...
for (let i = 0; i < lines.length; i++) {
let line = lines[i];
// dbg(2, `line info:`);
// dbg(2, line);
if (line.type === 'code') {
const op = line.operation;
if (typeof line.argument === 'undefined') {
// If this isn't a label definition,
// or one of the ops with optional arguments,
// then it's an error
if (!line.operation.startsWith('@')) {
if (mnemonicsWithOptionalArgs.indexOf(line.operation.toLowerCase()) < 0) {
console.error('');
console.error(`Error: Missing operand ${line.source}`);
console.error(` at line ${line.number}`);
process.exit();
} else {
// It *is* one of the special optional-arg ops
// So let's fill in the implicit operand with $00
line.argument = '0';
}
}
}
// *** Decode special operations ***
// Opcodes - Handle label definitions
if (op.startsWith(ASM_LABEL_PREFIX)) {
labels = handleLabelDefinition(op, IP, labels);
continue;
}
// Opcodes - Handle constant definitions
if (op.startsWith(ASM_CONSTANT_PREFIX)) {
constants = handleConstantDefinitions(op, line.argument, IP, constants);
continue;
}
// Opcodes - Handle setting value of IP
if (op.startsWith(ASM_IP_LABEL)) {
IP = parseInt(line.argument);
continue;
}
// *** Decode regular operations ***
/** @type {number|null} decodedOp **/
let decodedOp = null;
/** @type {number|null} decodedArg **/
let decodedArg = null;
/** @typedef {'direct'|'indirect'} AddressingMode **/
let addressingMode = 'direct';
// Now that it can't be a label or a constant, normalize the opcode
line.operation = line.operation.toLowerCase();
// Operands - Handle references to labels
if (line.argument.startsWith(ASM_LABEL_PREFIX)) {
let label = line.argument.substring(1); // strip label prefix
if (label in labels) {
dbg(1, `'${label}' already in labels object`);
labels[label].bytesToReplace.push(IP + 1);
} else {
dbg(1, `'${label}' NOT in labels object`);
labels[label] = {
bytesToReplace: [IP + 1],
};
}
dbg(1, `Label reference:`);
dbg(1, ` Points to byte: ${labels[label].pointsToByte}`);
dbg(1, ` Bytes to replace: ${labels[label].bytesToReplace}`);
decodedArg = 0; // Return 0 for operand for now -- we'll replace it later
}
// Operands - Handle references to the Instruction Pointer
if (line.argument === ASM_IP_LABEL) {
dbg(1, ` References current IP - ${IP}`);
if (typeof line.extraArgument === 'undefined') {
decodedArg = IP;
} else {
decodedArg = IP + decodeNumericOp(line.extraArgument);
}
}
// Operands - Handle references to constants
if (line.argument.startsWith(ASM_CONSTANT_PREFIX)) {
dbg(1, `References '${line.argument}'`);
if (typeof constants[line.argument.substring(1)] === 'undefined') {
console.error();
console.error(`Error: Undefined constant '${line.argument}'`);
console.error(` at line ${line.number}`);
process.exit();
}
decodedArg = decodeNumericOp(constants[line.argument.substring(1)]); // substring(1) strips '>'
}
// Operands - Handle references to constants in indirect mode
if (line.argument.startsWith(`(${ASM_CONSTANT_PREFIX}`)) {
addressingMode = "indirect";
dbg(1, `(Indirectly) References '${line.argument}'`);
let constName = line.argument.replace(`(${ASM_CONSTANT_PREFIX}`, "");
constName = constName.replace(")", "");
decodedArg = decodeNumericOp(constants[constName]);
}
// Operands - Handle indirect expressions
if (decodedArg === null && line.argument.startsWith("(")) {
addressingMode = "indirect";
let indyTemp = line.argument.replace("(", "").replace(")", "");
decodedArg = decodeNumericOp(indyTemp);
}
// Decode regular opcodes
if (decodedOp === null) {
decodedOp = mnemonics2opcodes[line.operation][addressingMode];
}
// Decode regular operands
if (decodedArg === null) {
decodedArg = decodeNumericOp(line.argument);
}
machineCode[IP] = decodedOp;
machineCode[IP + 1] = decodedArg;
debugInfo[IP] = {
lineNumber: line.number,
source: line.source,
address: IP,
machine: [decodedOp, decodedArg]
};
dbg(3, ``);
dbg(3, `Line ${line.number}: ${line.source}`);
if (line.argument) {
dbg(3, ` Asm operation: ${line.operation.toUpperCase()} ${line.argument}`);
} else if (line.operation) {
dbg(3, ` Asm operation: ${line.operation.toUpperCase()}`);
}
dbg(3, ` Machine code: $${num2hex(decodedOp)} $${num2hex(decodedArg)}`);
dbg(3, ` IP: $${num2hex(IP)}`);
IP += 2;
};
}
dbg(1, '');
dbgGroup(1, 'Memory before filling in label constants');
dbgExec(1, () => logMemory(new Uint8Array(machineCode)));
dbgGroupEnd(1);
// Backfill label references
for (let k of Object.keys(labels)) {
dbgGroup(1, `${ASM_LABEL_PREFIX}${k}`);
let label = labels[k];
dbg(1, `Points to byte: ${label.pointsToByte}`);
dbg(1, `Bytes to replace: ${label.bytesToReplace}`);
dbgGroupEnd(1);
for (let j = 0; j < label.bytesToReplace.length; j++) {
machineCode[label.bytesToReplace[j]] = label.pointsToByte;
}
}
return { 'debugInfo': debugInfo, 'machineCode': new Uint8Array(machineCode) };
}
/**
* @param {string} line
* @returns {string}
**/
function stripComments(line) {
return line.replace(/;.+/,"");
}
/**
* @param {string} line
* @returns {string}
**/
function stripWhitespaceFromEnds(line) {
line = line.replace(/^\s+/,"");
line = line.replace(/\s+$/,"");
return line;
}
function hex2num(hex) { return parseInt(hex, 16) };
// Debug helpers
const dbg = (lvl, s) => { if (DEBUG && (lvl >= DEBUG_LEVEL)) console.log(s) };
const dbgGroup = (lvl, s) => { if (DEBUG && (lvl >= DEBUG_LEVEL)) console.group(s) };
const dbgGroupEnd = (lvl, s) => { if (DEBUG && (lvl >= DEBUG_LEVEL)) console.groupEnd() };
const dbgExec = (lvl, func) => { if (DEBUG && (lvl >= DEBUG_LEVEL)) func(); }