168 lines
5.6 KiB
Python
168 lines
5.6 KiB
Python
from pathlib import Path
|
|
|
|
from assembler.code import Code
|
|
from assembler.constants.command_types import CommandType
|
|
from assembler.parser import Parser
|
|
|
|
from .symbol_table import SymbolTable
|
|
|
|
|
|
class Assembler:
|
|
"""
|
|
Main assembler that coordinates the two-pass assembly process.
|
|
|
|
Attributes:
|
|
input_file: Path to the .asm input file
|
|
output_file: Path to the .hack output file
|
|
parser: Parser instance for reading commands
|
|
symbol_table: SymbolTable instance for managing symbols
|
|
binary_instructions: List of translated binary instructions
|
|
"""
|
|
|
|
def __init__(self, input_file: Path, output_file: Path):
|
|
"""Initialize assembler with input/output paths."""
|
|
# File paths
|
|
self.input_file: Path = input_file
|
|
self.output_file: Path = output_file
|
|
|
|
# Components (create now)
|
|
self.symbol_table: SymbolTable = SymbolTable()
|
|
|
|
# Data structures (empty, will fill later)
|
|
self.binary_instructions: list[str] = []
|
|
|
|
# Counters
|
|
self.next_variable_address = 16
|
|
self.max_variable_address = 16383
|
|
|
|
# Will be created later
|
|
self.parser: Parser
|
|
self.cleaned_lines: list[str] = []
|
|
|
|
# Opens file and cleans it for whitespace and comments.
|
|
with open(input_file, "r") as f:
|
|
self.cleaned_lines = self._trim_comments_and_whitespace(f.readlines())
|
|
|
|
def assemble(self) -> None:
|
|
"""
|
|
Main assembly process - coordinates first and second pass.
|
|
Public API method that CLI will call.
|
|
"""
|
|
print("[ ] - Starting First Pass..")
|
|
self._first_pass()
|
|
print("[x] - First Pass Done..")
|
|
print("[ ] - Starting Second Pass..")
|
|
self._second_pass()
|
|
print("[x] - Second pass done..")
|
|
print("[ ] - Writing output file..")
|
|
self._write_output()
|
|
print("[x] - Output file written..")
|
|
self._assembled_success()
|
|
|
|
def _trim_comments_and_whitespace(self, lines: list[str]) -> list[str]:
|
|
"""
|
|
Remove comments and whitespace from assembly lines.
|
|
|
|
Returns:
|
|
list[str]: Cleaned lines (no empty lines, no comments, no extra whitespace)
|
|
"""
|
|
|
|
processed_lines: list[str] = []
|
|
|
|
for line in lines:
|
|
# Remove leading/trailing whitespace
|
|
line = line.strip()
|
|
|
|
# Remove comments (everything after //)
|
|
if "//" in line:
|
|
line = line[: line.index("//")].strip()
|
|
|
|
# Skip empty lines
|
|
if not line:
|
|
continue
|
|
|
|
# Add cleaned line to result
|
|
processed_lines.append(line)
|
|
return processed_lines
|
|
|
|
def _first_pass(self) -> None:
|
|
"""Build symbol table with labels"""
|
|
self.parser = Parser(self.cleaned_lines)
|
|
instruction_address = 0
|
|
|
|
while self.parser.has_more_commands():
|
|
self.parser.advance() # Move to next command
|
|
cmd_type = self.parser.command_type()
|
|
if cmd_type == CommandType.L_COMMAND:
|
|
label = self.parser.symbol() # Extract label name
|
|
self.symbol_table.add_entry(label, instruction_address)
|
|
# Don't increment address
|
|
else:
|
|
# A-command or C-command
|
|
instruction_address += 1
|
|
|
|
self.parser.reset_index()
|
|
|
|
def _second_pass(self) -> None:
|
|
"""
|
|
Second pass: Translate instructions to binary.
|
|
Handles A-commands, C-commands, and resolves symbols.
|
|
"""
|
|
|
|
while self.parser.has_more_commands():
|
|
self.parser.advance() # Move to next command
|
|
|
|
# cmd cmd_type
|
|
cmd_type = self.parser.command_type()
|
|
if cmd_type == CommandType.L_COMMAND:
|
|
continue
|
|
elif cmd_type == CommandType.A_COMMAND:
|
|
binary = self._translate_a_command(self.parser.symbol())
|
|
self.binary_instructions.append(binary) # Store resultp
|
|
# Do A command things
|
|
elif cmd_type == CommandType.C_COMMAND:
|
|
binary = self._translate_c_command()
|
|
self.binary_instructions.append(binary) # Store resultp
|
|
|
|
# Do C command Things
|
|
|
|
def _translate_a_command(self, symbol: str) -> str:
|
|
# Case 1: Check if symbol is a numeric constant (e.g., "2", "100")
|
|
if symbol.isdigit():
|
|
address = int(symbol)
|
|
|
|
# Case 2: Check if symbol exists in symbol table (predefined or label)
|
|
elif self.symbol_table.contains(symbol):
|
|
address = self.symbol_table.get_address(symbol)
|
|
|
|
# Case 3: It's a new variable - allocate next available address
|
|
else:
|
|
address = self.next_variable_address
|
|
self.symbol_table.add_entry(symbol, address)
|
|
self.next_variable_address += 1
|
|
|
|
# Convert address to 16-bit binary string (format: 0xxxxxxxxxxxxxxx)
|
|
return format(address, "016b")
|
|
|
|
def _translate_c_command(self) -> str:
|
|
"""
|
|
Translate C-command to 16-bit binary.
|
|
Format: 111 a c1c2c3c4c5c6 d1d2d3 j1j2j3
|
|
"""
|
|
|
|
comp = Code.comp(self.parser.comp())
|
|
dest = Code.dest(self.parser.dest())
|
|
jump = Code.jump(self.parser.jump())
|
|
|
|
binary = f"111{comp}{dest}{jump}"
|
|
return binary
|
|
|
|
def _write_output(self) -> None:
|
|
"""Write binary instructions to output file."""
|
|
with open(self.output_file, "w") as f:
|
|
f.write("\n".join(self.binary_instructions))
|
|
|
|
def _assembled_success(self) -> None:
|
|
"""Success Message"""
|
|
print(f"Successfully assembled {self.input_file} -> {self.output_file}")
|