feat: first_pass, parser functions, check command types, add to symbol table
This commit is contained in:
@@ -3,10 +3,11 @@
|
|||||||
// by Nisan and Schocken, MIT Press.
|
// by Nisan and Schocken, MIT Press.
|
||||||
|
|
||||||
// Computes R0 = 2 + 3 (R0 refers to RAM[0])
|
// Computes R0 = 2 + 3 (R0 refers to RAM[0])
|
||||||
|
(lab)
|
||||||
@2
|
@2
|
||||||
D=A
|
D=A
|
||||||
@3
|
@3
|
||||||
D=D+A
|
D=D+A
|
||||||
|
(test)
|
||||||
@0
|
@0
|
||||||
M=D
|
M=D
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
from assembler.constants.command_types import CommandType
|
||||||
from assembler.parser import Parser
|
from assembler.parser import Parser
|
||||||
|
|
||||||
from .symbol_table import SymbolTable
|
from .symbol_table import SymbolTable
|
||||||
@@ -31,27 +32,68 @@ class Assembler:
|
|||||||
|
|
||||||
# Counters
|
# Counters
|
||||||
self.next_variable_address = 16
|
self.next_variable_address = 16
|
||||||
|
self.max_variable_address = 16383
|
||||||
|
|
||||||
# Will be created later
|
# Will be created later
|
||||||
self.parser: Parser
|
self.parser: Parser
|
||||||
self.lines: list[str] = []
|
self.cleaned_lines: list[str] = []
|
||||||
|
|
||||||
|
# Opens file and cleans it for whitespace and comments.
|
||||||
with open(input_file, "r") as f:
|
with open(input_file, "r") as f:
|
||||||
self.lines = f.readlines()
|
self.cleaned_lines = self._trim_comments_and_whitespace(f.readlines())
|
||||||
raise NotImplementedError("Not implemented")
|
|
||||||
|
|
||||||
def assemble(self) -> None:
|
def assemble(self) -> None:
|
||||||
"""
|
"""
|
||||||
Main assembly process - coordinates first and second pass.
|
Main assembly process - coordinates first and second pass.
|
||||||
Public API method that CLI will call.
|
Public API method that CLI will call.
|
||||||
"""
|
"""
|
||||||
|
self._first_pass()
|
||||||
|
print("Symbol Table After First Pass")
|
||||||
|
print(self.symbol_table.symbols)
|
||||||
|
|
||||||
|
def _trim_comments_and_whitespace(self, lines: list[str]) -> list[str]:
|
||||||
|
"""
|
||||||
|
Remove comments and whitespace from assembly lines.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
list[str]: Cleaned lines (no empty lines, no comments, no extra whitespace)
|
||||||
|
"""
|
||||||
|
|
||||||
|
processed_lines: list[str] = []
|
||||||
|
|
||||||
|
for line in lines:
|
||||||
|
# Remove leading/trailing whitespace
|
||||||
|
line = line.strip()
|
||||||
|
|
||||||
|
# Remove comments (everything after //)
|
||||||
|
if "//" in line:
|
||||||
|
line = line[: line.index("//")].strip()
|
||||||
|
|
||||||
|
# Skip empty lines
|
||||||
|
if not line:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Add cleaned line to result
|
||||||
|
processed_lines.append(line)
|
||||||
|
return processed_lines
|
||||||
|
|
||||||
def _first_pass(self) -> None:
|
def _first_pass(self) -> None:
|
||||||
"""
|
"""Build symbol table with labels"""
|
||||||
First pass: Build symbol table with label addresses.
|
parser = Parser(self.cleaned_lines)
|
||||||
Scans through code to find all (LABEL) declarations.
|
instruction_address = 0
|
||||||
"""
|
|
||||||
raise NotImplementedError("Not implemented")
|
while parser.has_more_commands():
|
||||||
|
parser.advance() # Move to next command
|
||||||
|
|
||||||
|
cmd_type = parser.command_type()
|
||||||
|
|
||||||
|
if cmd_type == CommandType.L_COMMAND:
|
||||||
|
label = parser.symbol() # Extract label name
|
||||||
|
self.symbol_table.add_entry(label, instruction_address)
|
||||||
|
# Don't increment address
|
||||||
|
else:
|
||||||
|
# A-command or C-command
|
||||||
|
instruction_address += 1
|
||||||
|
|
||||||
def _second_pass(self) -> None:
|
def _second_pass(self) -> None:
|
||||||
"""
|
"""
|
||||||
@@ -78,7 +120,7 @@ class Assembler:
|
|||||||
"""Write binary instructions to output file."""
|
"""Write binary instructions to output file."""
|
||||||
with open(self.output_file, "w") as f:
|
with open(self.output_file, "w") as f:
|
||||||
f.write(f"// Assembled from {self.input_file.name}\n")
|
f.write(f"// Assembled from {self.input_file.name}\n")
|
||||||
f.write(f"// {len(self.lines)} lines processed\n")
|
f.write(f"// {len(self.cleaned_lines)} lines processed\n")
|
||||||
raise NotImplementedError("Not implemented")
|
raise NotImplementedError("Not implemented")
|
||||||
|
|
||||||
def _assembled_success(self) -> None:
|
def _assembled_success(self) -> None:
|
||||||
|
|||||||
@@ -26,9 +26,18 @@ def main():
|
|||||||
|
|
||||||
# Assemble
|
# Assemble
|
||||||
try:
|
try:
|
||||||
# Assemble
|
|
||||||
assembler = Assembler(input_path, output_path)
|
assembler = Assembler(input_path, output_path)
|
||||||
assembler.assemble()
|
assembler.assemble()
|
||||||
except Exception as e:
|
print(f"Successfully assembled {input_path} -> {output_path}")
|
||||||
print(f"Error during assembly: {e}", file=sys.stderr)
|
except ValueError as e:
|
||||||
|
# Handle validation errors (empty labels, invalid syntax, etc.)
|
||||||
|
print(f"Assembly Error: {e}", file=sys.stderr)
|
||||||
|
sys.exit(1)
|
||||||
|
except FileNotFoundError as e:
|
||||||
|
# Handle missing files
|
||||||
|
print(f"File Error: {e}", file=sys.stderr)
|
||||||
|
sys.exit(1)
|
||||||
|
except Exception as e:
|
||||||
|
# Catch any other unexpected errors
|
||||||
|
print(f"Unexpected Error: {e}", file=sys.stderr)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|||||||
@@ -0,0 +1,7 @@
|
|||||||
|
from enum import Enum
|
||||||
|
|
||||||
|
|
||||||
|
class CommandType(Enum):
|
||||||
|
A_COMMAND = "A_COMMAND"
|
||||||
|
C_COMMAND = "C_COMMAND"
|
||||||
|
L_COMMAND = "L_COMMAND"
|
||||||
@@ -1,3 +1,6 @@
|
|||||||
|
from .constants.command_types import CommandType
|
||||||
|
|
||||||
|
|
||||||
class Parser:
|
class Parser:
|
||||||
def __init__(self, file_lines):
|
def __init__(self, file_lines):
|
||||||
"""
|
"""
|
||||||
@@ -7,57 +10,63 @@ class Parser:
|
|||||||
file_lines: List of strings (lines from the .asm file)
|
file_lines: List of strings (lines from the .asm file)
|
||||||
"""
|
"""
|
||||||
self.lines = file_lines
|
self.lines = file_lines
|
||||||
# Store the file lines
|
self.current_index = -1 # Before first command
|
||||||
# Initialize current_command index to -1 (before first command)
|
self.current_command = ""
|
||||||
# No processing happens in constructor for now
|
|
||||||
|
|
||||||
def has_more_commands(self) -> bool:
|
def has_more_commands(self) -> bool:
|
||||||
"""
|
"""Are there more commands to process?"""
|
||||||
Check if there are more commands in the input.
|
return self.current_index + 1 < len(self.lines)
|
||||||
|
|
||||||
Returns:
|
def advance(self) -> None:
|
||||||
bool: True if more commands exist, False otherwise
|
"""Move to next command"""
|
||||||
"""
|
if self.has_more_commands():
|
||||||
raise NotImplementedError("hasMoreCommands not yet implemented")
|
self.current_index += 1
|
||||||
|
self.current_command = self.lines[self.current_index]
|
||||||
|
|
||||||
def advance(self):
|
def command_type(
|
||||||
"""
|
self,
|
||||||
Reads the next command and makes it the current command.
|
) -> CommandType:
|
||||||
Should only be called if hasMoreCommands() is true.
|
|
||||||
|
|
||||||
For now: does nothing (stub)
|
|
||||||
Later: will skip whitespace/comments and advance to next valid command
|
|
||||||
"""
|
|
||||||
print(self.lines)
|
|
||||||
pass # Does nothing for now
|
|
||||||
|
|
||||||
def command_type(self) -> str:
|
|
||||||
"""
|
"""
|
||||||
Returns the type of the current command.
|
Returns the type of the current command.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
str: "A_COMMAND", "C_COMMAND", or "L_COMMAND"
|
str: "A_COMMAND", "C_COMMAND", or "L_COMMAND"
|
||||||
|
|
||||||
Raises:
|
Determine command type
|
||||||
NotImplementedError: Not yet implemented
|
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError("command_type not yet implemented")
|
if self.current_command.startswith("(") and self.current_command.endswith(")"):
|
||||||
|
label_name = self.current_command[1:-1]
|
||||||
|
|
||||||
|
# Check 1: Not empty
|
||||||
|
if len(label_name) == 0:
|
||||||
|
raise ValueError("Invalid label: empty label name '()' is not allowed")
|
||||||
|
|
||||||
|
# Check 2: No spaces (optional)
|
||||||
|
if " " in label_name:
|
||||||
|
raise ValueError(
|
||||||
|
f"Invalid label: label name cannot contain spaces: '{self.current_command}'"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Check 3: Valid identifier (optional - letters,
|
||||||
|
# numbers, underscore, no start with digit)
|
||||||
|
if not label_name[0].isalpha() and label_name[0] != "_":
|
||||||
|
raise ValueError(
|
||||||
|
f"Invalid label: label must start with letter or underscore: '{self.current_command}'"
|
||||||
|
)
|
||||||
|
|
||||||
|
return CommandType.L_COMMAND
|
||||||
|
elif self.current_command.startswith("@"):
|
||||||
|
return CommandType.A_COMMAND
|
||||||
|
else:
|
||||||
|
return CommandType.C_COMMAND
|
||||||
|
|
||||||
def symbol(self) -> str:
|
def symbol(self) -> str:
|
||||||
"""
|
"""Get symbol from A-command or L-command"""
|
||||||
Returns the symbol or decimal value of the current command.
|
if self.current_command.startswith("@"):
|
||||||
|
return self.current_command[1:] # @LOOP → LOOP
|
||||||
Should only be called when commandType() is A_COMMAND or L_COMMAND.
|
elif self.current_command.startswith("("):
|
||||||
- For @Xxx, returns "Xxx"
|
return self.current_command[1:-1] # (LOOP) → LOOP
|
||||||
- For (Xxx), returns "Xxx"
|
return ""
|
||||||
|
|
||||||
Returns:
|
|
||||||
str: The symbol/decimal value
|
|
||||||
|
|
||||||
Raises:
|
|
||||||
NotImplementedError: Not yet implemented
|
|
||||||
"""
|
|
||||||
raise NotImplementedError("symbol not yet implemented")
|
|
||||||
|
|
||||||
def dest(self) -> str:
|
def dest(self) -> str:
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -30,10 +30,8 @@ class SymbolTable:
|
|||||||
}
|
}
|
||||||
|
|
||||||
def add_entry(self, symbol: str, address: int) -> None:
|
def add_entry(self, symbol: str, address: int) -> None:
|
||||||
print(symbol)
|
|
||||||
print(address)
|
|
||||||
# Add symbol-address pair to dictionary
|
# Add symbol-address pair to dictionary
|
||||||
raise NotImplementedError("add_entry not yet implemented")
|
self.symbols[symbol] = address
|
||||||
|
|
||||||
def contains(self, symbol: str) -> bool:
|
def contains(self, symbol: str) -> bool:
|
||||||
print(symbol)
|
print(symbol)
|
||||||
|
|||||||
Reference in New Issue
Block a user