351 lines
10 KiB
JavaScript
351 lines
10 KiB
JavaScript
const { logMemory, num2hex } = require('./logging.js');
|
|
const {
|
|
INITIAL_IP_ADDRESS,
|
|
DISPLAY_ADDR,
|
|
KEYPAD_ADDR,
|
|
POINTER_TO_DISPLAY,
|
|
POINTER_TO_KEYPAD
|
|
} = require('./machine.config.js');
|
|
|
|
// 1 = verbose
|
|
// 2 = what i'm currently focusing on
|
|
// 3 = always print
|
|
// 4 = silent
|
|
const DEBUG_LEVEL = 2;
|
|
let DEBUG = false; // Turn debugging on/off -- set by assemble()
|
|
|
|
exports.assemble = (str, debug = false) => {
|
|
DEBUG = debug;
|
|
return decodeInstructions(str);
|
|
}
|
|
|
|
// Configure pseudo-ops:
|
|
const ASM_IP_LABEL = '*';
|
|
const ASM_CONSTANT_PREFIX = '#';
|
|
const ASM_LABEL_PREFIX = '@';
|
|
|
|
const mnemonicsWithOptionalArgs = ['end', 'nop'];
|
|
const mnemonics2opcodes = {
|
|
end: { direct: 0, indirect: 0 },
|
|
sto: { direct: 1, indirect: 2 },
|
|
lda: { direct: 3, indirect: 4 },
|
|
add: { direct: 5, indirect: 6 },
|
|
sub: { direct: 7, indirect: 8 },
|
|
hop: { direct: 9, indirect: 10 },
|
|
jmp: { direct: 11, indirect: 12 },
|
|
ftg: { direct: 13, indirect: 13 },
|
|
fhp: { direct: 14, indirect: 14 },
|
|
nop: { direct: 15, indirect: 15 },
|
|
};
|
|
|
|
/**
|
|
* @param {string} s
|
|
* @returns {boolean}
|
|
**/
|
|
function startsWithPointerToIP(s) {
|
|
return stripWhitespaceFromEnds(s).startsWith(ASM_IP_LABEL);
|
|
}
|
|
|
|
|
|
/**
|
|
* @typedef {('code'|'comment'|'blank')} SourceLineType
|
|
**/
|
|
|
|
/**
|
|
* @typedef {Object} SourceLineInfo
|
|
* @property {number} number - line number
|
|
* @property {string} source - source text
|
|
* @property {string} sanitized - source text, with comments and whitespace removed
|
|
* @property {SourceLineType} type - line type
|
|
* @property {string} [operation] - For code: the first non-whitespace chunk
|
|
* @property {string} [argument] - For code: the second non-whitespace chunk, if there is one
|
|
**/
|
|
|
|
/**
|
|
* @param {string} source
|
|
* @returns {Array<SourceLineInfo>}
|
|
**/
|
|
function preparseSourceCode(source) {
|
|
let lines = source.split(/\n/); // returns an array of lines
|
|
|
|
const isLineBlank = (l) => { return l.length === 0 ? true : false };
|
|
const isLineComment = (l) => { return stripWhitespaceFromEnds(l).startsWith(';') };
|
|
|
|
/**
|
|
* @param {string} l
|
|
* @returns {SourceLineType}
|
|
**/
|
|
const getLineType = (l) => {
|
|
if (isLineBlank(l)) return 'blank';
|
|
if (isLineComment(l)) return 'comment';
|
|
return 'code';
|
|
}
|
|
|
|
return lines.map((line, index) => {
|
|
console.log('pre-parsing ', line);
|
|
let info = {
|
|
number: index,
|
|
source: line,
|
|
sanitized: stripWhitespaceFromEnds(stripComments(line)),
|
|
type: getLineType(line),
|
|
};
|
|
|
|
if (info.type === 'code') {
|
|
const op_arg_array = line.split(/\s+/); // split line into an array of [op, arg]
|
|
if (op_arg_array[0] !== 'undefined') {
|
|
info.operation = op_arg_array[0];
|
|
}
|
|
if (op_arg_array[1] !== 'undefined') {
|
|
info.argument = op_arg_array[1];
|
|
}
|
|
}
|
|
return info;
|
|
});
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
* @param {string} arg
|
|
* @returns {number}
|
|
**/
|
|
function parseNumericOperand(arg) {
|
|
if (arg.startsWith("$")) return hex2num(arg.replace("$", ""));
|
|
return parseInt(arg);
|
|
}
|
|
|
|
// DECODE!
|
|
const op = mnemonics2opcodes[opName][addressingMode]; // FIXME rename
|
|
|
|
machineCode.push(op);
|
|
machineCode.push(arg_num);
|
|
dbg(3, `IP: $${num2hex(IP)}, new code: $${num2hex(op)} $${num2hex(arg_num)}`);
|
|
IP += 2;
|
|
dbgGroupEnd(1, 'Input line');
|
|
|
|
|
|
/**
|
|
* Assemble source code.
|
|
*
|
|
* If the source doesn't explicitly set an address to assemble to,
|
|
* it will be assembled to the default intial value of the IP,
|
|
* as specified in `machine.config.js`.
|
|
* @param {string} source - Assembly source to decode
|
|
* @return TODO
|
|
**/
|
|
function decodeInstructions(source) {
|
|
// WIP: everything broken
|
|
// - just finished writing `splitCodeFromComments`
|
|
// - plan:
|
|
// - use that to pre-load debugInfo array
|
|
// - and to check if the first code-line is `*`
|
|
// so currently i'm implementing 2 entangled features:
|
|
// 1. check if first line * and set IP
|
|
// 2. return debug data along with machine code
|
|
|
|
let lines = preparseSourceCode(source);
|
|
console.log(lines);
|
|
|
|
|
|
// Figure out where to start assembly...
|
|
|
|
/** @type {number} IP - Destination addr for the next line **/
|
|
let IP;
|
|
|
|
// Check if the source code explicitly sets an address to assemble at
|
|
// by including a `* [addr]` as the first (non-blank, non-comment) line
|
|
let idOfFirstLineWithCode = lines.findIndex((el) => el.type === 'code');
|
|
if (lines[idOfFirstLineWithCode].operation.startsWith(ASM_IP_LABEL)) {
|
|
IP = parseInt(lines[idOfFirstLineWithCode].argument);
|
|
} else {
|
|
IP = INITIAL_IP_ADDRESS;
|
|
}
|
|
|
|
// Initialize arrays to collect assembled code
|
|
|
|
/** @type {array} - Assembled source code, as an array of bytes **/
|
|
let machineCode = new Array(IP).fill(0);
|
|
|
|
// Initialize memory-mapped IO -- TODO this should probably be in the CPU, not here
|
|
machineCode[POINTER_TO_DISPLAY] = DISPLAY_ADDR;
|
|
machineCode[POINTER_TO_KEYPAD] = KEYPAD_ADDR;
|
|
|
|
// Initialize arrays that collect code references that
|
|
// have to be revisited after our first pass through the source
|
|
let labels = {};
|
|
let constants = {};
|
|
|
|
// Decode line by line
|
|
for (let i = 0; i < lines.length; i++) {
|
|
let line = lines[0];
|
|
if (line.type === 'code') {
|
|
const op = line.operation;
|
|
const arg = null;
|
|
if (typeof line.argument != 'undefined') {
|
|
const arg = line.argument;
|
|
}
|
|
|
|
/** @type {{op: (number | null), arg: (number|null)}} **/
|
|
let assembledLine = {
|
|
op: null,
|
|
arg: null
|
|
};
|
|
|
|
/** @type {'direct'|'indirect'} **/
|
|
let addressingMode = 'direct';
|
|
|
|
// Opcodes - Handle label definitions
|
|
if (op.startsWith(ASM_LABEL_PREFIX)) {
|
|
let label = op.substring(1); // strip label prefix
|
|
|
|
if (label in labels) {
|
|
labels[label].pointsToByte = IP;
|
|
} else {
|
|
labels[label] = {
|
|
pointsToByte: IP,
|
|
bytesToReplace: [],
|
|
};
|
|
}
|
|
dbg(2, `pointsToByte: ${labels[label].pointsToByte}`);
|
|
dbg(2, `bytesToReplace: ${labels[label].bytesToReplace}`);
|
|
dbg(3, `IP: $${num2hex(IP)}, new code: none`);
|
|
dbgGroupEnd(1, 'Input line');
|
|
continue;
|
|
}
|
|
|
|
// let op_arg_array = line.split(/\s+/); // split line into an array of [op, arg]
|
|
// let opName = op_arg_array[0].toLowerCase();
|
|
// let arg_str = op_arg_array[1];
|
|
// let arg_num = null;
|
|
|
|
// Opcodes - Handle setting value of IP
|
|
if (startsWithPointerToIP(op)) {
|
|
dbg(3, 'CHANGING IP');
|
|
IP = parseInt(arg);
|
|
continue;
|
|
}
|
|
|
|
// Opcodes - Handle constant definitions
|
|
if (op.startsWith(ASM_CONSTANT_PREFIX)) {
|
|
let constantName = op.substring(1); // strip '>'
|
|
let constantValue = arg;
|
|
if (constantValue === ASM_IP_LABEL) {
|
|
constantValue = IP.toString();
|
|
}
|
|
constants[constantName] = constantValue;
|
|
dbg(2, `constants:`);
|
|
dbg(2, constants);
|
|
continue;
|
|
}
|
|
|
|
// Opcodes - Handle mnemonics without operands (eg END) ...
|
|
if (arg === null) {
|
|
if (mnemonicsWithOptionalArgs.indexOf(op) < 0) {
|
|
console.error(`Missing opcode for line ${line.number}: ${line.source}`);
|
|
throw new Error("Missing opcode");
|
|
}
|
|
assembledLine.arg = 0;
|
|
|
|
|
|
// Operands - Handle references to labels
|
|
} else if (arg.startsWith(ASM_LABEL_PREFIX)) {
|
|
let label = arg.substring(1); // strip label prefix
|
|
assembledLine.arg = 0;
|
|
|
|
if (label in labels) {
|
|
dbg(1, `'${label}' already in labels object`);
|
|
labels[label].bytesToReplace.push(IP + 1);
|
|
} else {
|
|
dbg(1, `'${label}' NOT in labels object`);
|
|
labels[label] = {
|
|
bytesToReplace: [IP + 1],
|
|
};
|
|
}
|
|
dbg(2, `pointsToByte: ${labels[label].pointsToByte}`);
|
|
dbg(2, `bytesToReplace: ${labels[label].bytesToReplace}`);
|
|
|
|
// Operands - Handle references to the Instruction Pointer
|
|
} else if (arg.toLowerCase() === ASM_IP_LABEL) {
|
|
dbg(2, `operand references current address`);
|
|
assembledLine.arg = IP;
|
|
dbg(2, `arg_num: ${num2hex(assembledLine.arg)}`);
|
|
|
|
// Operands - Handle references to constants
|
|
} else if (arg.startsWith(ASM_CONSTANT_PREFIX)) {
|
|
dbg(2, `operand references '${arg}'`);
|
|
assembledLine.arg = constants[arg.substring(1)]; // substring(1) strips '>'
|
|
|
|
// Operands - Handle references to constants in indirect mode
|
|
} else if (arg.startsWith(`(${ASM_CONSTANT_PREFIX}`)) {
|
|
addressingMode = "indirect";
|
|
dbg(2, `IND - operand references '${arg}'`);
|
|
let constTemp = arg.replace(`(${ASM_CONSTANT_PREFIX}`, "").replace(")", "");
|
|
assembledLine.arg = constants[constTemp];
|
|
|
|
// Operands - Handle indirect expressions
|
|
} else if (arg.startsWith("(")) {
|
|
addressingMode = "indirect";
|
|
assembledLine.arg = arg.replace("(", "").replace(")", "");
|
|
}
|
|
|
|
// Operands - Handle numeric operands
|
|
if (arg_num === null) {
|
|
if (arg_str.startsWith("$")) {
|
|
// Handle hex
|
|
arg_str = arg_str.replace("$", "");
|
|
arg_num = hex2num(arg_str);
|
|
} else {
|
|
// Accept decimal i guess
|
|
arg_num = parseInt(arg_str);
|
|
}
|
|
}
|
|
|
|
};
|
|
}
|
|
|
|
dbg(1, '');
|
|
dbgGroup(1, 'Memory before filling in label constants');
|
|
dbgExec(1, () => logMemory(new Uint8Array(machineCode)));
|
|
dbgGroupEnd(1, 'Memory before filling in label constants');
|
|
|
|
// Backfill label references
|
|
for (let k of Object.keys(labels)) {
|
|
dbgGroup(2, `${ASM_LABEL_PREFIX}${k}`);
|
|
let label = labels[k];
|
|
dbg(2, `pointsToByte: ${label.pointsToByte}`);
|
|
dbg(2, `bytesToReplace: ${label.bytesToReplace}`);
|
|
dbgGroupEnd(2, `label`);
|
|
for (let j = 0; j < label.bytesToReplace.length; j++) {
|
|
machineCode[label.bytesToReplace[j]] = label.pointsToByte;
|
|
}
|
|
}
|
|
|
|
return new Uint8Array(machineCode);
|
|
}
|
|
|
|
|
|
/**
|
|
* @param {string} line
|
|
* @returns {string}
|
|
**/
|
|
function stripComments(line) {
|
|
return line.replace(/;.+/,"");
|
|
}
|
|
|
|
/**
|
|
* @param {string} line
|
|
* @returns {string}
|
|
**/
|
|
function stripWhitespaceFromEnds(line) {
|
|
line = line.replace(/^\s+/,"");
|
|
line = line.replace(/\s+$/,"");
|
|
return line;
|
|
}
|
|
|
|
function hex2num(hex) { return parseInt(hex, 16) };
|
|
|
|
// Debug helpers
|
|
const dbg = (lvl, s) => { if (DEBUG && (lvl >= DEBUG_LEVEL)) console.log(s) };
|
|
const dbgGroup = (lvl, s) => { if (DEBUG && (lvl >= DEBUG_LEVEL)) console.group(s) };
|
|
const dbgGroupEnd = (lvl, s) => { if (DEBUG && (lvl >= DEBUG_LEVEL)) console.groupEnd() };
|
|
const dbgExec = (lvl, func) => { if (DEBUG && (lvl >= DEBUG_LEVEL)) func(); } |