From 056464d85aa2c75e36ffa34d17392548427a1e3b Mon Sep 17 00:00:00 2001 From: Jonas Date: Thu, 19 Feb 2026 09:22:27 +0100 Subject: [PATCH] feat: first_pass, parser functions, check command types, add to symbol table --- .../assembler/provided-asm-files/Add.asm | 5 +- .../assembler/src/assembler/assembler.py | 60 +++++++++++-- .../assembler/src/assembler/cli.py | 15 +++- .../src/assembler/constants/command_types.py | 7 ++ .../assembler/src/assembler/parser.py | 85 ++++++++++--------- .../assembler/src/assembler/symbol_table.py | 4 +- 6 files changed, 121 insertions(+), 55 deletions(-) create mode 100644 project-6-assembler/assembler/src/assembler/constants/command_types.py diff --git a/project-6-assembler/assembler/provided-asm-files/Add.asm b/project-6-assembler/assembler/provided-asm-files/Add.asm index 8c3ae23..bf43fbc 100644 --- a/project-6-assembler/assembler/provided-asm-files/Add.asm +++ b/project-6-assembler/assembler/provided-asm-files/Add.asm @@ -3,10 +3,11 @@ // by Nisan and Schocken, MIT Press. // Computes R0 = 2 + 3 (R0 refers to RAM[0]) - +(lab) @2 D=A @3 D=D+A +(test) @0 -M=D \ No newline at end of file +M=D diff --git a/project-6-assembler/assembler/src/assembler/assembler.py b/project-6-assembler/assembler/src/assembler/assembler.py index 5bfb52b..78b7d10 100644 --- a/project-6-assembler/assembler/src/assembler/assembler.py +++ b/project-6-assembler/assembler/src/assembler/assembler.py @@ -1,5 +1,6 @@ from pathlib import Path +from assembler.constants.command_types import CommandType from assembler.parser import Parser from .symbol_table import SymbolTable @@ -31,27 +32,68 @@ class Assembler: # Counters self.next_variable_address = 16 + self.max_variable_address = 16383 # Will be created later self.parser: Parser - self.lines: list[str] = [] + self.cleaned_lines: list[str] = [] + # Opens file and cleans it for whitespace and comments. with open(input_file, "r") as f: - self.lines = f.readlines() - raise NotImplementedError("Not implemented") + self.cleaned_lines = self._trim_comments_and_whitespace(f.readlines()) def assemble(self) -> None: """ Main assembly process - coordinates first and second pass. Public API method that CLI will call. """ + self._first_pass() + print("Symbol Table After First Pass") + print(self.symbol_table.symbols) + + def _trim_comments_and_whitespace(self, lines: list[str]) -> list[str]: + """ + Remove comments and whitespace from assembly lines. + + Returns: + list[str]: Cleaned lines (no empty lines, no comments, no extra whitespace) + """ + + processed_lines: list[str] = [] + + for line in lines: + # Remove leading/trailing whitespace + line = line.strip() + + # Remove comments (everything after //) + if "//" in line: + line = line[: line.index("//")].strip() + + # Skip empty lines + if not line: + continue + + # Add cleaned line to result + processed_lines.append(line) + return processed_lines def _first_pass(self) -> None: - """ - First pass: Build symbol table with label addresses. - Scans through code to find all (LABEL) declarations. - """ - raise NotImplementedError("Not implemented") + """Build symbol table with labels""" + parser = Parser(self.cleaned_lines) + instruction_address = 0 + + while parser.has_more_commands(): + parser.advance() # Move to next command + + cmd_type = parser.command_type() + + if cmd_type == CommandType.L_COMMAND: + label = parser.symbol() # Extract label name + self.symbol_table.add_entry(label, instruction_address) + # Don't increment address + else: + # A-command or C-command + instruction_address += 1 def _second_pass(self) -> None: """ @@ -78,7 +120,7 @@ class Assembler: """Write binary instructions to output file.""" with open(self.output_file, "w") as f: f.write(f"// Assembled from {self.input_file.name}\n") - f.write(f"// {len(self.lines)} lines processed\n") + f.write(f"// {len(self.cleaned_lines)} lines processed\n") raise NotImplementedError("Not implemented") def _assembled_success(self) -> None: diff --git a/project-6-assembler/assembler/src/assembler/cli.py b/project-6-assembler/assembler/src/assembler/cli.py index a411a0d..bd735d3 100644 --- a/project-6-assembler/assembler/src/assembler/cli.py +++ b/project-6-assembler/assembler/src/assembler/cli.py @@ -26,9 +26,18 @@ def main(): # Assemble try: - # Assemble assembler = Assembler(input_path, output_path) assembler.assemble() - except Exception as e: - print(f"Error during assembly: {e}", file=sys.stderr) + print(f"Successfully assembled {input_path} -> {output_path}") + except ValueError as e: + # Handle validation errors (empty labels, invalid syntax, etc.) + print(f"Assembly Error: {e}", file=sys.stderr) + sys.exit(1) + except FileNotFoundError as e: + # Handle missing files + print(f"File Error: {e}", file=sys.stderr) + sys.exit(1) + except Exception as e: + # Catch any other unexpected errors + print(f"Unexpected Error: {e}", file=sys.stderr) sys.exit(1) diff --git a/project-6-assembler/assembler/src/assembler/constants/command_types.py b/project-6-assembler/assembler/src/assembler/constants/command_types.py new file mode 100644 index 0000000..455c7d0 --- /dev/null +++ b/project-6-assembler/assembler/src/assembler/constants/command_types.py @@ -0,0 +1,7 @@ +from enum import Enum + + +class CommandType(Enum): + A_COMMAND = "A_COMMAND" + C_COMMAND = "C_COMMAND" + L_COMMAND = "L_COMMAND" diff --git a/project-6-assembler/assembler/src/assembler/parser.py b/project-6-assembler/assembler/src/assembler/parser.py index 5c0867e..a6402b4 100644 --- a/project-6-assembler/assembler/src/assembler/parser.py +++ b/project-6-assembler/assembler/src/assembler/parser.py @@ -1,3 +1,6 @@ +from .constants.command_types import CommandType + + class Parser: def __init__(self, file_lines): """ @@ -7,57 +10,63 @@ class Parser: file_lines: List of strings (lines from the .asm file) """ self.lines = file_lines - # Store the file lines - # Initialize current_command index to -1 (before first command) - # No processing happens in constructor for now + self.current_index = -1 # Before first command + self.current_command = "" def has_more_commands(self) -> bool: - """ - Check if there are more commands in the input. + """Are there more commands to process?""" + return self.current_index + 1 < len(self.lines) - Returns: - bool: True if more commands exist, False otherwise - """ - raise NotImplementedError("hasMoreCommands not yet implemented") + def advance(self) -> None: + """Move to next command""" + if self.has_more_commands(): + self.current_index += 1 + self.current_command = self.lines[self.current_index] - def advance(self): - """ - Reads the next command and makes it the current command. - Should only be called if hasMoreCommands() is true. - - For now: does nothing (stub) - Later: will skip whitespace/comments and advance to next valid command - """ - print(self.lines) - pass # Does nothing for now - - def command_type(self) -> str: + def command_type( + self, + ) -> CommandType: """ Returns the type of the current command. Returns: str: "A_COMMAND", "C_COMMAND", or "L_COMMAND" - Raises: - NotImplementedError: Not yet implemented + Determine command type """ - raise NotImplementedError("command_type not yet implemented") + if self.current_command.startswith("(") and self.current_command.endswith(")"): + label_name = self.current_command[1:-1] + + # Check 1: Not empty + if len(label_name) == 0: + raise ValueError("Invalid label: empty label name '()' is not allowed") + + # Check 2: No spaces (optional) + if " " in label_name: + raise ValueError( + f"Invalid label: label name cannot contain spaces: '{self.current_command}'" + ) + + # Check 3: Valid identifier (optional - letters, + # numbers, underscore, no start with digit) + if not label_name[0].isalpha() and label_name[0] != "_": + raise ValueError( + f"Invalid label: label must start with letter or underscore: '{self.current_command}'" + ) + + return CommandType.L_COMMAND + elif self.current_command.startswith("@"): + return CommandType.A_COMMAND + else: + return CommandType.C_COMMAND def symbol(self) -> str: - """ - Returns the symbol or decimal value of the current command. - - Should only be called when commandType() is A_COMMAND or L_COMMAND. - - For @Xxx, returns "Xxx" - - For (Xxx), returns "Xxx" - - Returns: - str: The symbol/decimal value - - Raises: - NotImplementedError: Not yet implemented - """ - raise NotImplementedError("symbol not yet implemented") + """Get symbol from A-command or L-command""" + if self.current_command.startswith("@"): + return self.current_command[1:] # @LOOP → LOOP + elif self.current_command.startswith("("): + return self.current_command[1:-1] # (LOOP) → LOOP + return "" def dest(self) -> str: """ diff --git a/project-6-assembler/assembler/src/assembler/symbol_table.py b/project-6-assembler/assembler/src/assembler/symbol_table.py index d43ec6d..e8e7053 100644 --- a/project-6-assembler/assembler/src/assembler/symbol_table.py +++ b/project-6-assembler/assembler/src/assembler/symbol_table.py @@ -30,10 +30,8 @@ class SymbolTable: } def add_entry(self, symbol: str, address: int) -> None: - print(symbol) - print(address) # Add symbol-address pair to dictionary - raise NotImplementedError("add_entry not yet implemented") + self.symbols[symbol] = address def contains(self, symbol: str) -> bool: print(symbol)