Lexing portion is complete.

Tested manually but not in an automated fashion.
2023-08-28 21:03:29 -07:00 · 2023-08-28 21:03:29 -07:00 · 5f9cd99fe0
commit 5f9cd99fe0
8 changed files with 394 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,2 @@
+zig-cache/
+zig-out/
--- a/README.md
+++ b/README.md
@ -0,0 +1,7 @@
+# ZLoxI
+
+ZLoxI is a Tree-Walk Lox Interpreter written in Zig.
+
+Current state involves lexing the language only.
+
+See the book Crafting Interpreters for more information.
--- a/build.zig
+++ b/build.zig
@ -0,0 +1,34 @@
+const std = @import("std");
+
+pub fn build(b: *std.build.Builder) void {
+    // Standard target options allows the person running `zig build` to choose
+    // what target to build for. Here we do not override the defaults, which
+    // means any target is allowed, and the default is native. Other options
+    // for restricting supported target set are available.
+    const target = b.standardTargetOptions(.{});
+
+    // Standard release options allow the person running `zig build` to select
+    // between Debug, ReleaseSafe, ReleaseFast, and ReleaseSmall.
+    const mode = b.standardReleaseOptions();
+
+    const exe = b.addExecutable("zloxi", "src/main.zig");
+    exe.setTarget(target);
+    exe.setBuildMode(mode);
+    exe.install();
+
+    const run_cmd = exe.run();
+    run_cmd.step.dependOn(b.getInstallStep());
+    if (b.args) |args| {
+        run_cmd.addArgs(args);
+    }
+
+    const run_step = b.step("run", "Run the app");
+    run_step.dependOn(&run_cmd.step);
+
+    const exe_tests = b.addTest("src/main.zig");
+    exe_tests.setTarget(target);
+    exe_tests.setBuildMode(mode);
+
+    const test_step = b.step("test", "Run unit tests");
+    test_step.dependOn(&exe_tests.step);
+}
--- a/src/error.zig
+++ b/src/error.zig
@ -0,0 +1,12 @@
+const std = @import("std");
+
+pub var hasError: bool = false;
+
+pub fn err(line: u64, message: []const u8) void {
+    report(line, "", message);
+}
+
+pub fn report(line: u64, where: []u8, message: []const u8) void {
+    std.debug.print("[line {}] Error{s}: {s}\n", .{ line, where, message });
+    hasError = true;
+}
--- a/src/main.zig
+++ b/src/main.zig
@ -0,0 +1,63 @@
+const std = @import("std");
+
+const scanner = @import("scanner.zig");
+const err = @import("error.zig");
+
+pub fn main() !void {
+    var gpa = std.heap.GeneralPurposeAllocator(.{}){};
+    defer _ = gpa.deinit();
+    const allocator = gpa.allocator();
+
+    const args = try std.process.argsAlloc(allocator);
+    defer std.process.argsFree(allocator, args);
+
+    if (args.len > 2) {
+        std.debug.print("Usage: zloxi [script].", .{});
+        std.process.exit(64);
+    } else if (args.len == 2) {
+        try runFile(allocator, args[1]);
+        if (err.hasError) {
+            std.process.exit(65);
+        }
+    } else {
+        try runPrompt(allocator);
+    }
+}
+
+fn runFile(alloc: std.mem.Allocator, file_name: []u8) !void {
+    var bytes = try std.fs.cwd().readFileAlloc(alloc, file_name, 1_000_000);
+    defer alloc.free(bytes);
+    try run(alloc, bytes);
+}
+
+fn runPrompt(alloc: std.mem.Allocator) !void {
+    const stdout_file = std.io.getStdOut().writer();
+    const stdin_file = std.io.getStdIn().reader();
+    var bw = std.io.bufferedWriter(stdout_file);
+    const stdout = bw.writer();
+
+    while (true) {
+        try stdout.print("> ", .{});
+        try bw.flush();
+
+        const line = try stdin_file.readUntilDelimiterOrEofAlloc(alloc, '\n', 1000);
+        if (line == null) {
+            try stdout.print("\n", .{});
+            break;
+        }
+        defer alloc.free(line.?);
+        try run(alloc, line.?);
+        err.hasError = false;
+    }
+
+    try bw.flush(); // don't forget to flush!
+}
+
+fn run(allocator: std.mem.Allocator, bytes: []u8) !void {
+    var scan = scanner.Scanner.init(allocator, bytes);
+    defer scan.deinit();
+    std.debug.print("{any}\n", .{scan.scanTokens()});
+}
+
+// Error reporting
+// TODO: Move to a separate file.
--- a/src/scanner.zig
+++ b/src/scanner.zig
@ -0,0 +1,214 @@
+const err = @import("error.zig");
+const token = @import("token.zig");
+const std = @import("std");
+
+pub const Scanner = struct {
+    allocator: std.mem.Allocator,
+    source: []u8,
+    tokens: std.ArrayList(token.Token),
+    start: u64 = 0,
+    current: u64 = 0,
+    line: u64 = 1,
+
+    pub fn init(alloc: std.mem.Allocator, source: []u8) Scanner {
+        return Scanner{
+            .allocator = alloc,
+            .source = source,
+            .tokens = std.ArrayList(token.Token).init(alloc),
+        };
+    }
+
+    pub fn deinit(self: Scanner) void {
+        self.tokens.deinit();
+    }
+
+    fn isAtEnd(self: Scanner) bool {
+        return self.current >= self.source.len;
+    }
+
+    pub fn scanTokens(self: *Scanner) std.ArrayList(token.Token) {
+        while (!self.isAtEnd()) {
+            self.start = self.current;
+            self.scanToken();
+        }
+
+        // FIXME: Handle error.
+        self.tokens.append(token.Token{ .token_type = token.TokenType.EOF, .lexeme = "", .line = self.line }) catch {};
+        return self.tokens;
+    }
+
+    fn scanToken(self: *Scanner) void {
+        const c: u8 = self.advance();
+        switch (c) {
+            '(' => self.addToken(token.TokenType.LEFT_PAREN),
+            ')' => self.addToken(token.TokenType.RIGHT_PAREN),
+            '{' => self.addToken(token.TokenType.LEFT_BRACE),
+            '}' => self.addToken(token.TokenType.RIGHT_BRACE),
+            ',' => self.addToken(token.TokenType.COMMA),
+            '.' => self.addToken(token.TokenType.DOT),
+            '-' => self.addToken(token.TokenType.MINUS),
+            '+' => self.addToken(token.TokenType.PLUS),
+            ';' => self.addToken(token.TokenType.SEMICOLON),
+            '*' => self.addToken(token.TokenType.STAR),
+            '!' => {
+                if (self.match('=')) {
+                    self.addToken(token.TokenType.BANG_EQUAL);
+                } else {
+                    self.addToken(token.TokenType.BANG);
+                }
+            },
+            '=' => {
+                if (self.match('=')) {
+                    self.addToken(token.TokenType.EQUAL_EQUAL);
+                } else {
+                    self.addToken(token.TokenType.EQUAL);
+                }
+            },
+            '>' => {
+                if (self.match('=')) {
+                    self.addToken(token.TokenType.GREATER_EQUAL);
+                } else {
+                    self.addToken(token.TokenType.GREATER);
+                }
+            },
+            '<' => {
+                if (self.match('=')) {
+                    self.addToken(token.TokenType.LESS_EQUAL);
+                } else {
+                    self.addToken(token.TokenType.LESS);
+                }
+            },
+            '/' => {
+                if (self.match('/')) {
+                    // Ignore comment.
+                    while (self.peek() != '\n' and !self.isAtEnd()) _ = self.advance();
+                } else {
+                    self.addToken(token.TokenType.SLASH);
+                }
+            },
+            ' ', '\r', '\t' => {},
+            '\n' => self.line += 1,
+            '"' => self.string(),
+            '0'...'9' => self.number(),
+            'a'...'z', 'A'...'Z', '_' => self.identifier(),
+            else => {
+                err.err(self.line, "Unexpected character.");
+            },
+        }
+    }
+
+    fn advance(self: *Scanner) u8 {
+        const current = self.current;
+        self.current += 1;
+        return self.source[current];
+    }
+
+    fn match(self: *Scanner, char: u8) bool {
+        if (self.isAtEnd()) {
+            return false;
+        }
+
+        if (self.source[self.current] != char) {
+            return false;
+        }
+
+        self.current += 1;
+
+        return true;
+    }
+
+    fn peek(self: *Scanner) u8 {
+        if (self.isAtEnd()) {
+            return '\x00';
+        }
+        return self.source[self.current];
+    }
+
+    fn peekNext(self: *Scanner) u8 {
+        if (self.current + 1 >= self.source.len) {
+            return '\x00';
+        }
+        return self.source[self.current + 1];
+    }
+
+    fn string(self: *Scanner) void {
+        while (self.peek() != '"' and !self.isAtEnd()) {
+            _ = self.advance();
+        }
+
+        if (self.isAtEnd()) {
+            err.err(self.line, "Unterminated string.");
+            return;
+        }
+
+        // Closing ".
+        _ = self.advance();
+
+        self.addToken(token.TokenType.STRING);
+    }
+
+    fn number(self: *Scanner) void {
+        while (isDigit(self.peek())) _ = self.advance();
+
+        if (self.peek() == '.' and isDigit(self.peekNext())) {
+            _ = self.advance();
+
+            while (isDigit(self.peek())) _ = self.advance();
+        }
+
+        self.addToken(token.TokenType.NUMBER);
+    }
+
+    fn identifier(self: *Scanner) void {
+        while (isAlphaNumeric(self.peek())) _ = self.advance();
+
+        const keywordmap = std.ComptimeStringMap(token.TokenType, .{
+            .{ "and", token.TokenType.AND },
+            .{ "class", token.TokenType.CLASS },
+            .{ "else", token.TokenType.ELSE },
+            .{ "false", token.TokenType.FALSE },
+            .{ "for", token.TokenType.FOR },
+            .{ "fun", token.TokenType.FUN },
+            .{ "if", token.TokenType.IF },
+            .{ "nil", token.TokenType.NIL },
+            .{ "or", token.TokenType.OR },
+            .{ "print", token.TokenType.PRINT },
+            .{ "return", token.TokenType.RETURN },
+            .{ "super", token.TokenType.SUPER },
+            .{ "this", token.TokenType.THIS },
+            .{ "true", token.TokenType.TRUE },
+            .{ "var", token.TokenType.VAR },
+            .{ "while", token.TokenType.WHILE },
+        });
+
+        const token_type_or = keywordmap.get(self.source[self.start..self.current]);
+        if (token_type_or) |token_type| {
+            self.addToken(token_type);
+        } else {
+            self.addToken(token.TokenType.IDENTIFIER);
+        }
+    }
+
+    fn isDigit(char: u8) bool {
+        return switch (char) {
+            '0'...'9' => true,
+            else => false,
+        };
+    }
+
+    fn isAlpha(char: u8) bool {
+        return switch (char) {
+            'a'...'z', 'A'...'Z', '_' => true,
+            else => false,
+        };
+    }
+
+    fn isAlphaNumeric(char: u8) bool {
+        return isDigit(char) or isAlpha(char);
+    }
+
+    fn addToken(self: *Scanner, token_type: token.TokenType) void {
+        // FIXME: Handle error.
+        self.tokens.append(token.Token{ .token_type = token_type, .lexeme = self.source[self.start..self.current], .line = self.line }) catch {};
+    }
+};
--- a/src/token.zig
+++ b/src/token.zig
@ -0,0 +1,61 @@
+const std = @import("std");
+
+pub const TokenType = enum {
+    // Single-character tokens.
+    LEFT_PAREN,
+    RIGHT_PAREN,
+    LEFT_BRACE,
+    RIGHT_BRACE,
+    COMMA,
+    DOT,
+    MINUS,
+    PLUS,
+    SEMICOLON,
+    SLASH,
+    STAR,
+
+    // One or two character tokens.
+    BANG,
+    BANG_EQUAL,
+    EQUAL,
+    EQUAL_EQUAL,
+    GREATER,
+    GREATER_EQUAL,
+    LESS,
+    LESS_EQUAL,
+
+    // Literals.
+    IDENTIFIER,
+    STRING,
+    NUMBER,
+
+    // Keywords.
+    AND,
+    CLASS,
+    ELSE,
+    FALSE,
+    FUN,
+    FOR,
+    IF,
+    NIL,
+    OR,
+    PRINT,
+    RETURN,
+    SUPER,
+    THIS,
+    TRUE,
+    VAR,
+    WHILE,
+
+    EOF,
+};
+
+pub const Token = struct {
+    token_type: TokenType,
+    lexeme: []u8,
+    line: u64,
+
+    fn toString(self: *Token, alloc: std.mem.Allocator) ![]u8 {
+        return std.fmt.allocPrint(alloc, "{} {} {}", .{ self.token_type, self.lexeme, self.line });
+    }
+};
--- a/test.lox
+++ b/test.lox
@ -0,0 +1 @@
+print "hello world!";