assembler - WIP - Continue refactoring everything, in support of 'set the initial IP to assemble to' feature, etc. (Everything is broken...)

This commit is contained in:
n loewen 2023-08-21 18:34:15 +01:00
parent 4481fc10d4
commit f0e8664ab8
1 changed files with 186 additions and 171 deletions

View File

@ -20,9 +20,9 @@ exports.assemble = (str, debug = false) => {
} }
// Configure pseudo-ops: // Configure pseudo-ops:
const POINTER_TO_IP_OP = '*'; const ASM_IP_LABEL = '*';
const CONSTANT_PREFIX = '#'; const ASM_CONSTANT_PREFIX = '#';
const LABEL_PREFIX = '@'; const ASM_LABEL_PREFIX = '@';
const mnemonicsWithOptionalArgs = ['end', 'nop']; const mnemonicsWithOptionalArgs = ['end', 'nop'];
const mnemonics2opcodes = { const mnemonics2opcodes = {
@ -43,55 +43,87 @@ const mnemonics2opcodes = {
* @returns {boolean} * @returns {boolean}
**/ **/
function startsWithPointerToIP(s) { function startsWithPointerToIP(s) {
return stripWhitespaceFromEnds(s).startsWith(POINTER_TO_IP_OP); return stripWhitespaceFromEnds(s).startsWith(ASM_IP_LABEL);
} }
/** /**
* @param {string[]} lines * @typedef {('code'|'comment'|'blank')} SourceLineType
*/
function getFirstCodeLine(lines) {
function isCode(line) {
line = stripComments(stripWhitespaceFromEnds(line));
if (line.length === 0) {
return false;
}
return true;
};
lines = lines.filter(isCode);
if (lines.length > 0) {
return lines[0];
}
return false;
}
/**
* @param {string} source;
**/ **/
// * @returns {{number: number, source: string, type: 'code'|'comment'|'blank'}}
// TODO: https://stackoverflow.com/questions/32295263/how-to-document-an-array-of-objects-in-jsdoc /**
function splitCodeFromComments(source) { * @typedef {Object} SourceLineInfo
* @property {number} number - line number
* @property {string} source - source text
* @property {string} sanitized - source text, with comments and whitespace removed
* @property {SourceLineType} type - line type
* @property {string} [operation] - For code: the first non-whitespace chunk
* @property {string} [argument] - For code: the second non-whitespace chunk, if there is one
**/
/**
* @param {string} source
* @returns {Array<SourceLineInfo>}
**/
function preparseSourceCode(source) {
let lines = source.split(/\n/); // returns an array of lines let lines = source.split(/\n/); // returns an array of lines
const isLineBlank = (l) => { return l.length === 0 ? true : false }; const isLineBlank = (l) => { return l.length === 0 ? true : false };
const isLineComment = (l) => { return stripWhitespaceFromEnds(l).startsWith(';') }; const isLineComment = (l) => { return stripWhitespaceFromEnds(l).startsWith(';') };
/**
* @param {string} l
* @returns {SourceLineType}
**/
const getLineType = (l) => { const getLineType = (l) => {
console.log('get type for ', l);
if (isLineBlank(l)) return 'blank'; if (isLineBlank(l)) return 'blank';
if (isLineComment(l)) return 'comment'; if (isLineComment(l)) return 'comment';
return 'code'; return 'code';
} }
return lines.map((line, index) => { return lines.map((line, index) => {
return { console.log('pre-parsing ', line);
let info = {
number: index, number: index,
source: line, source: line,
type: getLineType(line) sanitized: stripWhitespaceFromEnds(stripComments(line)),
type: getLineType(line),
}; };
if (info.type === 'code') {
const op_arg_array = line.split(/\s+/); // split line into an array of [op, arg]
if (op_arg_array[0] !== 'undefined') {
info.operation = op_arg_array[0];
}
if (op_arg_array[1] !== 'undefined') {
info.argument = op_arg_array[1];
}
}
return info;
}); });
} }
/**
* @param {string} arg
* @returns {number}
**/
function parseNumericOperand(arg) {
if (arg.startsWith("$")) return hex2num(arg.replace("$", ""));
return parseInt(arg);
}
// DECODE!
const op = mnemonics2opcodes[opName][addressingMode]; // FIXME rename
machineCode.push(op);
machineCode.push(arg_num);
dbg(3, `IP: $${num2hex(IP)}, new code: $${num2hex(op)} $${num2hex(arg_num)}`);
IP += 2;
dbgGroupEnd(1, 'Input line');
/** /**
* Assemble source code. * Assemble source code.
* *
@ -102,9 +134,6 @@ function splitCodeFromComments(source) {
* @return TODO * @return TODO
**/ **/
function decodeInstructions(source) { function decodeInstructions(source) {
// let lines = source.split(/\n/); // returns an array of lines
// WIP: everything broken // WIP: everything broken
// - just finished writing `splitCodeFromComments` // - just finished writing `splitCodeFromComments`
// - plan: // - plan:
@ -114,21 +143,20 @@ function decodeInstructions(source) {
// 1. check if first line * and set IP // 1. check if first line * and set IP
// 2. return debug data along with machine code // 2. return debug data along with machine code
console.log(splitCodeFromComments('foo \n\n; bar')); let lines = preparseSourceCode(source);
console.log(lines);
// Figure out where to start assembly... // Figure out where to start assembly...
/** @type {number} IP - The address where the next line of code will be assembled **/ /** @type {number} IP - Destination addr for the next line **/
let IP; let IP;
// Check if the source code explicitly sets an address to assemble at // Check if the source code explicitly sets an address to assemble at
// by including a `* [addr]` as the first (non-blank, non-comment) line // by including a `* [addr]` as the first (non-blank, non-comment) line
const sourceIPdefinition = lines.findIndex(startsWithPointerToIP); let idOfFirstLineWithCode = lines.findIndex((el) => el.type === 'code');
if (sourceSetsIP) { if (lines[idOfFirstLineWithCode].operation.startsWith(ASM_IP_LABEL)) {
let op_arg_array = lines[.split(/\s+/); // split line into an array of [op, arg] IP = parseInt(lines[idOfFirstLineWithCode].argument);
IP = explicitIP;
} else { } else {
IP = INITIAL_IP_ADDRESS; IP = INITIAL_IP_ADDRESS;
} }
@ -138,9 +166,6 @@ function decodeInstructions(source) {
/** @type {array} - Assembled source code, as an array of bytes **/ /** @type {array} - Assembled source code, as an array of bytes **/
let machineCode = new Array(IP).fill(0); let machineCode = new Array(IP).fill(0);
/** @type {{lineNumber: number, source: string, lineType: 'code'|'comment'}} **/
let debugData = new Array();
// Initialize memory-mapped IO -- TODO this should probably be in the CPU, not here // Initialize memory-mapped IO -- TODO this should probably be in the CPU, not here
machineCode[POINTER_TO_DISPLAY] = DISPLAY_ADDR; machineCode[POINTER_TO_DISPLAY] = DISPLAY_ADDR;
machineCode[POINTER_TO_KEYPAD] = KEYPAD_ADDR; machineCode[POINTER_TO_KEYPAD] = KEYPAD_ADDR;
@ -152,149 +177,131 @@ function decodeInstructions(source) {
// Decode line by line // Decode line by line
for (let i = 0; i < lines.length; i++) { for (let i = 0; i < lines.length; i++) {
dbg(2, ''); let line = lines[0];
dbgGroup(1, `Input line ${i}, IP ${num2hex(IP)}`); if (line.type === 'code') {
dbg(3, `> ${lines[i]}`); const op = line.operation;
// TODO - update debugData const arg = null;
let line = stripWhitespaceFromEnds(stripComments(lines[i])); if (typeof line.argument != 'undefined') {
const arg = line.argument;
// Handle blank lines
if (line.length === 0) {
dbg(3, `IP: $${num2hex(IP)}, new code: none`);
dbg(1, 'blank');
dbgGroupEnd(1, 'Input line');
continue;
}
// HANDLE OPS
// Handle label definitions
if (line.startsWith(LABEL_PREFIX)) {
let label = line.substring(1); // strip label prefix
if (label in labels) {
labels[label].pointsToByte = IP;
} else {
labels[label] = {
pointsToByte: IP,
bytesToReplace: [],
};
} }
dbg(2, `pointsToByte: ${labels[label].pointsToByte}`);
dbg(2, `bytesToReplace: ${labels[label].bytesToReplace}`);
dbg(3, `IP: $${num2hex(IP)}, new code: none`);
dbgGroupEnd(1, 'Input line');
continue;
}
let op_arg_array = line.split(/\s+/); // split line into an array of [op, arg] /** @type {{op: (number | null), arg: (number|null)}} **/
let opName = op_arg_array[0].toLowerCase(); let assembledLine = {
let arg_str = op_arg_array[1]; op: null,
let arg_num = null; arg: null
let addressingMode = 'direct'; // Must be "direct" or "indirect" };
// Handle setting value of IP /** @type {'direct'|'indirect'} **/
if (startsWithPointerToIP(line)) { let addressingMode = 'direct';
dbg(3, 'CHANGING IP');
IP = arg_str.parseInt();
continue;
}
// Handle constant definitions // Opcodes - Handle label definitions
if (opName.startsWith(CONSTANT_PREFIX)) { if (op.startsWith(ASM_LABEL_PREFIX)) {
// FIXME - a quick hack to get around problems caused by another use of lower-casing to sanitize input: let label = op.substring(1); // strip label prefix
let constantName = opName.substring(1).toLowerCase(); // strip '>'
let constantValue = arg_str; if (label in labels) {
if (constantValue.toLowerCase() === POINTER_TO_IP_OP) { labels[label].pointsToByte = IP;
constantValue = IP.toString(); } else {
labels[label] = {
pointsToByte: IP,
bytesToReplace: [],
};
}
dbg(2, `pointsToByte: ${labels[label].pointsToByte}`);
dbg(2, `bytesToReplace: ${labels[label].bytesToReplace}`);
dbg(3, `IP: $${num2hex(IP)}, new code: none`);
dbgGroupEnd(1, 'Input line');
continue;
} }
constants[constantName] = constantValue;
dbg(2, `constants:`);
dbg(2, constants);
continue;
}
// Handle mnemonics without operands (eg END) ... // let op_arg_array = line.split(/\s+/); // split line into an array of [op, arg]
if (typeof arg_str === 'undefined') { // let opName = op_arg_array[0].toLowerCase();
if (mnemonicsWithOptionalArgs.indexOf(opName) < 0) { // let arg_str = op_arg_array[1];
console.error(`Missing opcode: ${line}`); // let arg_num = null;
throw new Error("Missing opcode");
// Opcodes - Handle setting value of IP
if (startsWithPointerToIP(op)) {
dbg(3, 'CHANGING IP');
IP = parseInt(arg);
continue;
} }
arg_num = 0;
// Opcodes - Handle constant definitions
// HANDLE OPERANDS if (op.startsWith(ASM_CONSTANT_PREFIX)) {
let constantName = op.substring(1); // strip '>'
// Handle references to labels let constantValue = arg;
} else if (arg_str.startsWith(LABEL_PREFIX)) { if (constantValue === ASM_IP_LABEL) {
let label = arg_str.substring(1); // strip label prefix constantValue = IP.toString();
arg_num = 0; }
constants[constantName] = constantValue;
if (label in labels) { dbg(2, `constants:`);
dbg(1, `'${label}' already in labels object`); dbg(2, constants);
labels[label].bytesToReplace.push(IP + 1); continue;
} else {
dbg(1, `'${label}' NOT in labels object`);
labels[label] = {
bytesToReplace: [IP + 1],
};
} }
dbg(2, `pointsToByte: ${labels[label].pointsToByte}`);
dbg(2, `bytesToReplace: ${labels[label].bytesToReplace}`);
// Handle references to the Instruction Pointer
} else if (arg_str.toLowerCase() === POINTER_TO_IP_OP) {
dbg(2, `operand references current address`);
arg_num = IP;
dbg(2, `arg_num: ${num2hex(arg_num)}`);
// Handle references to constants // Opcodes - Handle mnemonics without operands (eg END) ...
} else if (arg_str.startsWith(CONSTANT_PREFIX)) { if (arg === null) {
// FIXME - a quick hack to get around problems caused by another use of lower-casing to sanitize input: if (mnemonicsWithOptionalArgs.indexOf(op) < 0) {
arg_str = arg_str.substring(1).toLowerCase(); // strip '>' console.error(`Missing opcode for line ${line.number}: ${line.source}`);
dbg(2, `operand references '${arg_str}'`); throw new Error("Missing opcode");
arg_str = constants[arg_str]; }
dbg(2, `arg_str from '${arg_str}'`); assembledLine.arg = 0;
// Handle references to constants in indirect mode
} else if (arg_str.startsWith(`(${CONSTANT_PREFIX}`)) {
addressingMode = "indirect";
arg_str = arg_str.replace(`(${CONSTANT_PREFIX}`, "");
arg_str = arg_str.replace(")", "");
// FIXME - a quick hack to get around problems caused by another use of lower-casing to sanitize input:
arg_str = arg_str.toLowerCase();
dbg(2, `INDY - operand references '${arg_str}'`);
arg_str = constants[arg_str];
// Handle indirect expressions
} else if (arg_str.startsWith("(")) {
addressingMode = "indirect";
arg_str = arg_str.replace("(", "");
arg_str = arg_str.replace(")", "");
}
// Handle numeric operands // Operands - Handle references to labels
if (arg_num === null) { } else if (arg.startsWith(ASM_LABEL_PREFIX)) {
if (arg_str.startsWith("$")) { let label = arg.substring(1); // strip label prefix
// Handle hex assembledLine.arg = 0;
arg_str = arg_str.replace("$", "");
arg_num = hex2num(arg_str); if (label in labels) {
} else { dbg(1, `'${label}' already in labels object`);
// Accept decimal i guess labels[label].bytesToReplace.push(IP + 1);
arg_num = parseInt(arg_str); } else {
dbg(1, `'${label}' NOT in labels object`);
labels[label] = {
bytesToReplace: [IP + 1],
};
}
dbg(2, `pointsToByte: ${labels[label].pointsToByte}`);
dbg(2, `bytesToReplace: ${labels[label].bytesToReplace}`);
// Operands - Handle references to the Instruction Pointer
} else if (arg.toLowerCase() === ASM_IP_LABEL) {
dbg(2, `operand references current address`);
assembledLine.arg = IP;
dbg(2, `arg_num: ${num2hex(assembledLine.arg)}`);
// Operands - Handle references to constants
} else if (arg.startsWith(ASM_CONSTANT_PREFIX)) {
dbg(2, `operand references '${arg}'`);
assembledLine.arg = constants[arg.substring(1)]; // substring(1) strips '>'
// Operands - Handle references to constants in indirect mode
} else if (arg.startsWith(`(${ASM_CONSTANT_PREFIX}`)) {
addressingMode = "indirect";
dbg(2, `IND - operand references '${arg}'`);
let constTemp = arg.replace(`(${ASM_CONSTANT_PREFIX}`, "").replace(")", "");
assembledLine.arg = constants[constTemp];
// Operands - Handle indirect expressions
} else if (arg.startsWith("(")) {
addressingMode = "indirect";
assembledLine.arg = arg.replace("(", "").replace(")", "");
} }
}
// DECODE! // Operands - Handle numeric operands
const op = mnemonics2opcodes[opName][addressingMode]; if (arg_num === null) {
if (arg_str.startsWith("$")) {
// Handle hex
arg_str = arg_str.replace("$", "");
arg_num = hex2num(arg_str);
} else {
// Accept decimal i guess
arg_num = parseInt(arg_str);
}
}
machineCode.push(op); };
machineCode.push(arg_num); }
dbg(3, `IP: $${num2hex(IP)}, new code: $${num2hex(op)} $${num2hex(arg_num)}`);
IP += 2;
dbgGroupEnd(1, 'Input line');
};
dbg(1, ''); dbg(1, '');
dbgGroup(1, 'Memory before filling in label constants'); dbgGroup(1, 'Memory before filling in label constants');
@ -303,7 +310,7 @@ function decodeInstructions(source) {
// Backfill label references // Backfill label references
for (let k of Object.keys(labels)) { for (let k of Object.keys(labels)) {
dbgGroup(2, `${LABEL_PREFIX}${k}`); dbgGroup(2, `${ASM_LABEL_PREFIX}${k}`);
let label = labels[k]; let label = labels[k];
dbg(2, `pointsToByte: ${label.pointsToByte}`); dbg(2, `pointsToByte: ${label.pointsToByte}`);
dbg(2, `bytesToReplace: ${label.bytesToReplace}`); dbg(2, `bytesToReplace: ${label.bytesToReplace}`);
@ -317,10 +324,18 @@ function decodeInstructions(source) {
} }
/**
* @param {string} line
* @returns {string}
**/
function stripComments(line) { function stripComments(line) {
return line.replace(/;.+/,""); return line.replace(/;.+/,"");
} }
/**
* @param {string} line
* @returns {string}
**/
function stripWhitespaceFromEnds(line) { function stripWhitespaceFromEnds(line) {
line = line.replace(/^\s+/,""); line = line.replace(/^\s+/,"");
line = line.replace(/\s+$/,""); line = line.replace(/\s+$/,"");