assembler - WIP - Continue refactoring everything, in support of 'set the initial IP to assemble to' feature, etc. (Everything is probably broken... but it's much closer)

This commit is contained in:
n loewen 2023-08-21 19:37:32 +01:00
parent f0e8664ab8
commit 12273a6389
1 changed files with 110 additions and 99 deletions

View File

@ -38,14 +38,6 @@ const mnemonics2opcodes = {
nop: { direct: 15, indirect: 15 }, nop: { direct: 15, indirect: 15 },
}; };
/**
* @param {string} s
* @returns {boolean}
**/
function startsWithPointerToIP(s) {
return stripWhitespaceFromEnds(s).startsWith(ASM_IP_LABEL);
}
/** /**
* @typedef {('code'|'comment'|'blank')} SourceLineType * @typedef {('code'|'comment'|'blank')} SourceLineType
@ -91,7 +83,7 @@ function preparseSourceCode(source) {
}; };
if (info.type === 'code') { if (info.type === 'code') {
const op_arg_array = line.split(/\s+/); // split line into an array of [op, arg] const op_arg_array = info.sanitized.split(/\s+/); // split line into an array of [op, arg]
if (op_arg_array[0] !== 'undefined') { if (op_arg_array[0] !== 'undefined') {
info.operation = op_arg_array[0]; info.operation = op_arg_array[0];
} }
@ -109,19 +101,67 @@ function preparseSourceCode(source) {
* @param {string} arg * @param {string} arg
* @returns {number} * @returns {number}
**/ **/
function parseNumericOperand(arg) { function decodeNumericOp(arg) {
if (arg.startsWith("$")) return hex2num(arg.replace("$", "")); if (arg.startsWith("$")) return hex2num(arg.replace("$", ""));
return parseInt(arg); return parseInt(arg);
} }
// DECODE!
const op = mnemonics2opcodes[opName][addressingMode]; // FIXME rename
machineCode.push(op); /**
machineCode.push(arg_num); * @param {string} op
dbg(3, `IP: $${num2hex(IP)}, new code: $${num2hex(op)} $${num2hex(arg_num)}`); * @param {object} labels // TODO
IP += 2; * @param {number} IP
dbgGroupEnd(1, 'Input line'); * @returns {Array<string>} - array of labels
**/
function handleLabelDefinition(op, IP, labels) {
let label = op.substring(1); // strip label prefix
if (label in labels) {
labels[label].pointsToByte = IP;
} else {
labels[label] = {
pointsToByte: IP,
bytesToReplace: [],
};
}
dbg(2, `pointsToByte: ${labels[label].pointsToByte}`);
dbg(2, `bytesToReplace: ${labels[label].bytesToReplace}`);
dbg(3, `IP: $${num2hex(IP)}, new code: none`);
dbgGroupEnd(1, 'Input line');
return labels;
}
/**
* @param {string} op
* @param {string} arg
* @param {number} IP
* @returns {Array<string>} - array of constants
**/
function handleConstantDefinitions(op, arg, IP, constants) {
let constantName = op.substring(1); // strip '>'
let constantValue = arg;
if (constantValue === ASM_IP_LABEL) {
constantValue = IP.toString();
}
constants[constantName] = constantValue;
dbg(2, `constants:`);
dbg(2, constants);
return constants;
}
/**
* @param {SourceLineInfo} line
* @returns {Object}
**/
function assembleMnemonicsWithOptionalArgs(line, assemblerState) {
if (mnemonicsWithOptionalArgs.indexOf(line.operation) < 0) {
console.error(`Missing opcode for line ${line.number}: ${line.source}`);
throw new Error("Missing opcode");
}
let opcode = decodeNumericOp(line.operation);
let operand = line.argument !== null ? decodeNumericOp(line.argument) : 0;
return [opcode, operand];
}
/** /**
@ -134,18 +174,7 @@ function parseNumericOperand(arg) {
* @return TODO * @return TODO
**/ **/
function decodeInstructions(source) { function decodeInstructions(source) {
// WIP: everything broken
// - just finished writing `splitCodeFromComments`
// - plan:
// - use that to pre-load debugInfo array
// - and to check if the first code-line is `*`
// so currently i'm implementing 2 entangled features:
// 1. check if first line * and set IP
// 2. return debug data along with machine code
let lines = preparseSourceCode(source); let lines = preparseSourceCode(source);
console.log(lines);
// Figure out where to start assembly... // Figure out where to start assembly...
@ -175,7 +204,7 @@ function decodeInstructions(source) {
let labels = {}; let labels = {};
let constants = {}; let constants = {};
// Decode line by line // Decode line by line...
for (let i = 0; i < lines.length; i++) { for (let i = 0; i < lines.length; i++) {
let line = lines[0]; let line = lines[0];
if (line.type === 'code') { if (line.type === 'code') {
@ -185,73 +214,42 @@ function decodeInstructions(source) {
const arg = line.argument; const arg = line.argument;
} }
/** @type {{op: (number | null), arg: (number|null)}} **/
let assembledLine = {
op: null,
arg: null
};
/** @type {'direct'|'indirect'} **/ // *** Decode special operations ***
let addressingMode = 'direct';
// Opcodes - Handle label definitions // Opcodes - Handle label definitions
if (op.startsWith(ASM_LABEL_PREFIX)) { if (op.startsWith(ASM_LABEL_PREFIX)) {
let label = op.substring(1); // strip label prefix labels = handleLabelDefinition(op, IP, labels);
if (label in labels) {
labels[label].pointsToByte = IP;
} else {
labels[label] = {
pointsToByte: IP,
bytesToReplace: [],
};
}
dbg(2, `pointsToByte: ${labels[label].pointsToByte}`);
dbg(2, `bytesToReplace: ${labels[label].bytesToReplace}`);
dbg(3, `IP: $${num2hex(IP)}, new code: none`);
dbgGroupEnd(1, 'Input line');
continue;
}
// let op_arg_array = line.split(/\s+/); // split line into an array of [op, arg]
// let opName = op_arg_array[0].toLowerCase();
// let arg_str = op_arg_array[1];
// let arg_num = null;
// Opcodes - Handle setting value of IP
if (startsWithPointerToIP(op)) {
dbg(3, 'CHANGING IP');
IP = parseInt(arg);
continue; continue;
} }
// Opcodes - Handle constant definitions // Opcodes - Handle constant definitions
if (op.startsWith(ASM_CONSTANT_PREFIX)) { if (op.startsWith(ASM_CONSTANT_PREFIX)) {
let constantName = op.substring(1); // strip '>' constants = handleConstantDefinitions(op, arg, IP, constants);
let constantValue = arg;
if (constantValue === ASM_IP_LABEL) {
constantValue = IP.toString();
}
constants[constantName] = constantValue;
dbg(2, `constants:`);
dbg(2, constants);
continue; continue;
} }
// Opcodes - Handle mnemonics without operands (eg END) ... // Opcodes - Handle setting value of IP
if (arg === null) { if (op.startsWith(ASM_IP_LABEL)) {
if (mnemonicsWithOptionalArgs.indexOf(op) < 0) { IP = parseInt(arg);
console.error(`Missing opcode for line ${line.number}: ${line.source}`); continue;
throw new Error("Missing opcode"); }
}
assembledLine.arg = 0; // *** Decode regular operations ***
/** @type {number|null} decodedOp **/
let decodedOp = null;
/** @type {number|null} decodedArg **/
let decodedArg = null;
/** @typedef {'direct'|'indirect'} AddressingMode **/
let addressingMode = 'direct';
// Operands - Handle references to labels // Operands - Handle references to labels
} else if (arg.startsWith(ASM_LABEL_PREFIX)) { if (arg !== null && arg.startsWith(ASM_LABEL_PREFIX)) {
let label = arg.substring(1); // strip label prefix let label = line.argument.substring(1); // strip label prefix
assembledLine.arg = 0;
if (label in labels) { if (label in labels) {
dbg(1, `'${label}' already in labels object`); dbg(1, `'${label}' already in labels object`);
labels[label].bytesToReplace.push(IP + 1); labels[label].bytesToReplace.push(IP + 1);
@ -263,43 +261,56 @@ function decodeInstructions(source) {
} }
dbg(2, `pointsToByte: ${labels[label].pointsToByte}`); dbg(2, `pointsToByte: ${labels[label].pointsToByte}`);
dbg(2, `bytesToReplace: ${labels[label].bytesToReplace}`); dbg(2, `bytesToReplace: ${labels[label].bytesToReplace}`);
let code = [decodeNumericOp(line.operation), 0] // Return 0 for operand for now -- we'll replace it later
}
// Operands - Handle references to the Instruction Pointer // Operands - Handle references to the Instruction Pointer
} else if (arg.toLowerCase() === ASM_IP_LABEL) { if (arg !== null && arg === ASM_IP_LABEL) {
dbg(2, `operand references current address`); dbg(2, `operand references current address`);
assembledLine.arg = IP; decodedArg = IP;
dbg(2, `arg_num: ${num2hex(assembledLine.arg)}`); dbg(2, `arg_num: ${num2hex(decodedArg)}`);
}
// Operands - Handle references to constants // Operands - Handle references to constants
} else if (arg.startsWith(ASM_CONSTANT_PREFIX)) { if (arg !== null && arg.startsWith(ASM_CONSTANT_PREFIX)) {
dbg(2, `operand references '${arg}'`); dbg(2, `operand references '${arg}'`);
assembledLine.arg = constants[arg.substring(1)]; // substring(1) strips '>' decodedArg = constants[arg.substring(1)]; // substring(1) strips '>'
}
// Operands - Handle references to constants in indirect mode // Operands - Handle references to constants in indirect mode
} else if (arg.startsWith(`(${ASM_CONSTANT_PREFIX}`)) { if (arg !== null && arg.startsWith(`(${ASM_CONSTANT_PREFIX}`)) {
addressingMode = "indirect"; addressingMode = "indirect";
dbg(2, `IND - operand references '${arg}'`); dbg(2, `IND - operand references '${arg}'`);
let constTemp = arg.replace(`(${ASM_CONSTANT_PREFIX}`, "").replace(")", ""); let constTemp = arg.replace(`(${ASM_CONSTANT_PREFIX}`, "").replace(")", "");
assembledLine.arg = constants[constTemp]; decodedArg = constants[constTemp];
}
// Operands - Handle indirect expressions // Operands - Handle indirect expressions
} else if (arg.startsWith("(")) { if (arg !== null && arg.startsWith("(")) {
addressingMode = "indirect"; addressingMode = "indirect";
assembledLine.arg = arg.replace("(", "").replace(")", ""); let indyTemp = arg.replace("(", "").replace(")", "");
decodedArg = decodeNumericOp(indyTemp);
} }
// Operands - Handle numeric operands // Opcodes - Handle mnemonics without operands (eg END)
if (arg_num === null) { if (arg === null) {
if (arg_str.startsWith("$")) { if (mnemonicsWithOptionalArgs.indexOf(line.operation) < 0) {
// Handle hex console.error(`Missing opcode for line ${line.number}: ${line.source}`);
arg_str = arg_str.replace("$", ""); throw new Error("Missing opcode");
arg_num = hex2num(arg_str);
} else {
// Accept decimal i guess
arg_num = parseInt(arg_str);
} }
decodedOp = decodeNumericOp(line.operation);
decodedArg = line.argument !== null ? decodeNumericOp(line.argument) : 0;
} }
if (decodedOp === null) {
decodedOp = mnemonics2opcodes[line.operation][addressingMode];
}
machineCode.push(decodedOp);
machineCode.push(decodedArg);
dbg(3, `IP: $${num2hex(IP)}, new code: $${num2hex(decodedOp)} $${num2hex(decodedArg)}`);
IP += 2;
dbgGroupEnd(1, 'Input line');
}; };
} }