feat: first_pass, parser functions, check command types, add to symbol table

This commit is contained in:
2026-02-19 09:22:27 +01:00
parent 4bbe67d3d6
commit 056464d85a
6 changed files with 121 additions and 55 deletions

View File

@@ -3,10 +3,11 @@
// by Nisan and Schocken, MIT Press.
// Computes R0 = 2 + 3 (R0 refers to RAM[0])
(lab)
@2
D=A
@3
D=D+A
(test)
@0
M=D

View File

@@ -1,5 +1,6 @@
from pathlib import Path
from assembler.constants.command_types import CommandType
from assembler.parser import Parser
from .symbol_table import SymbolTable
@@ -31,27 +32,68 @@ class Assembler:
# Counters
self.next_variable_address = 16
self.max_variable_address = 16383
# Will be created later
self.parser: Parser
self.lines: list[str] = []
self.cleaned_lines: list[str] = []
# Opens file and cleans it for whitespace and comments.
with open(input_file, "r") as f:
self.lines = f.readlines()
raise NotImplementedError("Not implemented")
self.cleaned_lines = self._trim_comments_and_whitespace(f.readlines())
def assemble(self) -> None:
"""
Main assembly process - coordinates first and second pass.
Public API method that CLI will call.
"""
self._first_pass()
print("Symbol Table After First Pass")
print(self.symbol_table.symbols)
def _trim_comments_and_whitespace(self, lines: list[str]) -> list[str]:
"""
Remove comments and whitespace from assembly lines.
Returns:
list[str]: Cleaned lines (no empty lines, no comments, no extra whitespace)
"""
processed_lines: list[str] = []
for line in lines:
# Remove leading/trailing whitespace
line = line.strip()
# Remove comments (everything after //)
if "//" in line:
line = line[: line.index("//")].strip()
# Skip empty lines
if not line:
continue
# Add cleaned line to result
processed_lines.append(line)
return processed_lines
def _first_pass(self) -> None:
"""
First pass: Build symbol table with label addresses.
Scans through code to find all (LABEL) declarations.
"""
raise NotImplementedError("Not implemented")
"""Build symbol table with labels"""
parser = Parser(self.cleaned_lines)
instruction_address = 0
while parser.has_more_commands():
parser.advance() # Move to next command
cmd_type = parser.command_type()
if cmd_type == CommandType.L_COMMAND:
label = parser.symbol() # Extract label name
self.symbol_table.add_entry(label, instruction_address)
# Don't increment address
else:
# A-command or C-command
instruction_address += 1
def _second_pass(self) -> None:
"""
@@ -78,7 +120,7 @@ class Assembler:
"""Write binary instructions to output file."""
with open(self.output_file, "w") as f:
f.write(f"// Assembled from {self.input_file.name}\n")
f.write(f"// {len(self.lines)} lines processed\n")
f.write(f"// {len(self.cleaned_lines)} lines processed\n")
raise NotImplementedError("Not implemented")
def _assembled_success(self) -> None:

View File

@@ -26,9 +26,18 @@ def main():
# Assemble
try:
# Assemble
assembler = Assembler(input_path, output_path)
assembler.assemble()
except Exception as e:
print(f"Error during assembly: {e}", file=sys.stderr)
print(f"Successfully assembled {input_path} -> {output_path}")
except ValueError as e:
# Handle validation errors (empty labels, invalid syntax, etc.)
print(f"Assembly Error: {e}", file=sys.stderr)
sys.exit(1)
except FileNotFoundError as e:
# Handle missing files
print(f"File Error: {e}", file=sys.stderr)
sys.exit(1)
except Exception as e:
# Catch any other unexpected errors
print(f"Unexpected Error: {e}", file=sys.stderr)
sys.exit(1)

View File

@@ -0,0 +1,7 @@
from enum import Enum
class CommandType(Enum):
A_COMMAND = "A_COMMAND"
C_COMMAND = "C_COMMAND"
L_COMMAND = "L_COMMAND"

View File

@@ -1,3 +1,6 @@
from .constants.command_types import CommandType
class Parser:
def __init__(self, file_lines):
"""
@@ -7,57 +10,63 @@ class Parser:
file_lines: List of strings (lines from the .asm file)
"""
self.lines = file_lines
# Store the file lines
# Initialize current_command index to -1 (before first command)
# No processing happens in constructor for now
self.current_index = -1 # Before first command
self.current_command = ""
def has_more_commands(self) -> bool:
"""
Check if there are more commands in the input.
"""Are there more commands to process?"""
return self.current_index + 1 < len(self.lines)
Returns:
bool: True if more commands exist, False otherwise
"""
raise NotImplementedError("hasMoreCommands not yet implemented")
def advance(self) -> None:
"""Move to next command"""
if self.has_more_commands():
self.current_index += 1
self.current_command = self.lines[self.current_index]
def advance(self):
"""
Reads the next command and makes it the current command.
Should only be called if hasMoreCommands() is true.
For now: does nothing (stub)
Later: will skip whitespace/comments and advance to next valid command
"""
print(self.lines)
pass # Does nothing for now
def command_type(self) -> str:
def command_type(
self,
) -> CommandType:
"""
Returns the type of the current command.
Returns:
str: "A_COMMAND", "C_COMMAND", or "L_COMMAND"
Raises:
NotImplementedError: Not yet implemented
Determine command type
"""
raise NotImplementedError("command_type not yet implemented")
if self.current_command.startswith("(") and self.current_command.endswith(")"):
label_name = self.current_command[1:-1]
# Check 1: Not empty
if len(label_name) == 0:
raise ValueError("Invalid label: empty label name '()' is not allowed")
# Check 2: No spaces (optional)
if " " in label_name:
raise ValueError(
f"Invalid label: label name cannot contain spaces: '{self.current_command}'"
)
# Check 3: Valid identifier (optional - letters,
# numbers, underscore, no start with digit)
if not label_name[0].isalpha() and label_name[0] != "_":
raise ValueError(
f"Invalid label: label must start with letter or underscore: '{self.current_command}'"
)
return CommandType.L_COMMAND
elif self.current_command.startswith("@"):
return CommandType.A_COMMAND
else:
return CommandType.C_COMMAND
def symbol(self) -> str:
"""
Returns the symbol or decimal value of the current command.
Should only be called when commandType() is A_COMMAND or L_COMMAND.
- For @Xxx, returns "Xxx"
- For (Xxx), returns "Xxx"
Returns:
str: The symbol/decimal value
Raises:
NotImplementedError: Not yet implemented
"""
raise NotImplementedError("symbol not yet implemented")
"""Get symbol from A-command or L-command"""
if self.current_command.startswith("@"):
return self.current_command[1:] # @LOOP → LOOP
elif self.current_command.startswith("("):
return self.current_command[1:-1] # (LOOP) → LOOP
return ""
def dest(self) -> str:
"""

View File

@@ -30,10 +30,8 @@ class SymbolTable:
}
def add_entry(self, symbol: str, address: int) -> None:
print(symbol)
print(address)
# Add symbol-address pair to dictionary
raise NotImplementedError("add_entry not yet implemented")
self.symbols[symbol] = address
def contains(self, symbol: str) -> bool:
print(symbol)