From 5f9cd99fe065ef5fcf821cb8daf4df0f970ed003 Mon Sep 17 00:00:00 2001 From: Drew Galbraith Date: Mon, 28 Aug 2023 21:03:29 -0700 Subject: [PATCH] Lexing portion is complete. Tested manually but not in an automated fashion. --- .gitignore | 2 + README.md | 7 ++ build.zig | 34 ++++++++ src/error.zig | 12 +++ src/main.zig | 63 ++++++++++++++ src/scanner.zig | 214 ++++++++++++++++++++++++++++++++++++++++++++++++ src/token.zig | 61 ++++++++++++++ test.lox | 1 + 8 files changed, 394 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 build.zig create mode 100644 src/error.zig create mode 100644 src/main.zig create mode 100644 src/scanner.zig create mode 100644 src/token.zig create mode 100644 test.lox diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e73c965 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +zig-cache/ +zig-out/ diff --git a/README.md b/README.md new file mode 100644 index 0000000..7c9c3a1 --- /dev/null +++ b/README.md @@ -0,0 +1,7 @@ +# ZLoxI + +ZLoxI is a Tree-Walk Lox Interpreter written in Zig. + +Current state involves lexing the language only. + +See the book Crafting Interpreters for more information. diff --git a/build.zig b/build.zig new file mode 100644 index 0000000..8ec2140 --- /dev/null +++ b/build.zig @@ -0,0 +1,34 @@ +const std = @import("std"); + +pub fn build(b: *std.build.Builder) void { + // Standard target options allows the person running `zig build` to choose + // what target to build for. Here we do not override the defaults, which + // means any target is allowed, and the default is native. Other options + // for restricting supported target set are available. + const target = b.standardTargetOptions(.{}); + + // Standard release options allow the person running `zig build` to select + // between Debug, ReleaseSafe, ReleaseFast, and ReleaseSmall. + const mode = b.standardReleaseOptions(); + + const exe = b.addExecutable("zloxi", "src/main.zig"); + exe.setTarget(target); + exe.setBuildMode(mode); + exe.install(); + + const run_cmd = exe.run(); + run_cmd.step.dependOn(b.getInstallStep()); + if (b.args) |args| { + run_cmd.addArgs(args); + } + + const run_step = b.step("run", "Run the app"); + run_step.dependOn(&run_cmd.step); + + const exe_tests = b.addTest("src/main.zig"); + exe_tests.setTarget(target); + exe_tests.setBuildMode(mode); + + const test_step = b.step("test", "Run unit tests"); + test_step.dependOn(&exe_tests.step); +} diff --git a/src/error.zig b/src/error.zig new file mode 100644 index 0000000..46ee5ad --- /dev/null +++ b/src/error.zig @@ -0,0 +1,12 @@ +const std = @import("std"); + +pub var hasError: bool = false; + +pub fn err(line: u64, message: []const u8) void { + report(line, "", message); +} + +pub fn report(line: u64, where: []u8, message: []const u8) void { + std.debug.print("[line {}] Error{s}: {s}\n", .{ line, where, message }); + hasError = true; +} diff --git a/src/main.zig b/src/main.zig new file mode 100644 index 0000000..5492ee6 --- /dev/null +++ b/src/main.zig @@ -0,0 +1,63 @@ +const std = @import("std"); + +const scanner = @import("scanner.zig"); +const err = @import("error.zig"); + +pub fn main() !void { + var gpa = std.heap.GeneralPurposeAllocator(.{}){}; + defer _ = gpa.deinit(); + const allocator = gpa.allocator(); + + const args = try std.process.argsAlloc(allocator); + defer std.process.argsFree(allocator, args); + + if (args.len > 2) { + std.debug.print("Usage: zloxi [script].", .{}); + std.process.exit(64); + } else if (args.len == 2) { + try runFile(allocator, args[1]); + if (err.hasError) { + std.process.exit(65); + } + } else { + try runPrompt(allocator); + } +} + +fn runFile(alloc: std.mem.Allocator, file_name: []u8) !void { + var bytes = try std.fs.cwd().readFileAlloc(alloc, file_name, 1_000_000); + defer alloc.free(bytes); + try run(alloc, bytes); +} + +fn runPrompt(alloc: std.mem.Allocator) !void { + const stdout_file = std.io.getStdOut().writer(); + const stdin_file = std.io.getStdIn().reader(); + var bw = std.io.bufferedWriter(stdout_file); + const stdout = bw.writer(); + + while (true) { + try stdout.print("> ", .{}); + try bw.flush(); + + const line = try stdin_file.readUntilDelimiterOrEofAlloc(alloc, '\n', 1000); + if (line == null) { + try stdout.print("\n", .{}); + break; + } + defer alloc.free(line.?); + try run(alloc, line.?); + err.hasError = false; + } + + try bw.flush(); // don't forget to flush! +} + +fn run(allocator: std.mem.Allocator, bytes: []u8) !void { + var scan = scanner.Scanner.init(allocator, bytes); + defer scan.deinit(); + std.debug.print("{any}\n", .{scan.scanTokens()}); +} + +// Error reporting +// TODO: Move to a separate file. diff --git a/src/scanner.zig b/src/scanner.zig new file mode 100644 index 0000000..0363b78 --- /dev/null +++ b/src/scanner.zig @@ -0,0 +1,214 @@ +const err = @import("error.zig"); +const token = @import("token.zig"); +const std = @import("std"); + +pub const Scanner = struct { + allocator: std.mem.Allocator, + source: []u8, + tokens: std.ArrayList(token.Token), + start: u64 = 0, + current: u64 = 0, + line: u64 = 1, + + pub fn init(alloc: std.mem.Allocator, source: []u8) Scanner { + return Scanner{ + .allocator = alloc, + .source = source, + .tokens = std.ArrayList(token.Token).init(alloc), + }; + } + + pub fn deinit(self: Scanner) void { + self.tokens.deinit(); + } + + fn isAtEnd(self: Scanner) bool { + return self.current >= self.source.len; + } + + pub fn scanTokens(self: *Scanner) std.ArrayList(token.Token) { + while (!self.isAtEnd()) { + self.start = self.current; + self.scanToken(); + } + + // FIXME: Handle error. + self.tokens.append(token.Token{ .token_type = token.TokenType.EOF, .lexeme = "", .line = self.line }) catch {}; + return self.tokens; + } + + fn scanToken(self: *Scanner) void { + const c: u8 = self.advance(); + switch (c) { + '(' => self.addToken(token.TokenType.LEFT_PAREN), + ')' => self.addToken(token.TokenType.RIGHT_PAREN), + '{' => self.addToken(token.TokenType.LEFT_BRACE), + '}' => self.addToken(token.TokenType.RIGHT_BRACE), + ',' => self.addToken(token.TokenType.COMMA), + '.' => self.addToken(token.TokenType.DOT), + '-' => self.addToken(token.TokenType.MINUS), + '+' => self.addToken(token.TokenType.PLUS), + ';' => self.addToken(token.TokenType.SEMICOLON), + '*' => self.addToken(token.TokenType.STAR), + '!' => { + if (self.match('=')) { + self.addToken(token.TokenType.BANG_EQUAL); + } else { + self.addToken(token.TokenType.BANG); + } + }, + '=' => { + if (self.match('=')) { + self.addToken(token.TokenType.EQUAL_EQUAL); + } else { + self.addToken(token.TokenType.EQUAL); + } + }, + '>' => { + if (self.match('=')) { + self.addToken(token.TokenType.GREATER_EQUAL); + } else { + self.addToken(token.TokenType.GREATER); + } + }, + '<' => { + if (self.match('=')) { + self.addToken(token.TokenType.LESS_EQUAL); + } else { + self.addToken(token.TokenType.LESS); + } + }, + '/' => { + if (self.match('/')) { + // Ignore comment. + while (self.peek() != '\n' and !self.isAtEnd()) _ = self.advance(); + } else { + self.addToken(token.TokenType.SLASH); + } + }, + ' ', '\r', '\t' => {}, + '\n' => self.line += 1, + '"' => self.string(), + '0'...'9' => self.number(), + 'a'...'z', 'A'...'Z', '_' => self.identifier(), + else => { + err.err(self.line, "Unexpected character."); + }, + } + } + + fn advance(self: *Scanner) u8 { + const current = self.current; + self.current += 1; + return self.source[current]; + } + + fn match(self: *Scanner, char: u8) bool { + if (self.isAtEnd()) { + return false; + } + + if (self.source[self.current] != char) { + return false; + } + + self.current += 1; + + return true; + } + + fn peek(self: *Scanner) u8 { + if (self.isAtEnd()) { + return '\x00'; + } + return self.source[self.current]; + } + + fn peekNext(self: *Scanner) u8 { + if (self.current + 1 >= self.source.len) { + return '\x00'; + } + return self.source[self.current + 1]; + } + + fn string(self: *Scanner) void { + while (self.peek() != '"' and !self.isAtEnd()) { + _ = self.advance(); + } + + if (self.isAtEnd()) { + err.err(self.line, "Unterminated string."); + return; + } + + // Closing ". + _ = self.advance(); + + self.addToken(token.TokenType.STRING); + } + + fn number(self: *Scanner) void { + while (isDigit(self.peek())) _ = self.advance(); + + if (self.peek() == '.' and isDigit(self.peekNext())) { + _ = self.advance(); + + while (isDigit(self.peek())) _ = self.advance(); + } + + self.addToken(token.TokenType.NUMBER); + } + + fn identifier(self: *Scanner) void { + while (isAlphaNumeric(self.peek())) _ = self.advance(); + + const keywordmap = std.ComptimeStringMap(token.TokenType, .{ + .{ "and", token.TokenType.AND }, + .{ "class", token.TokenType.CLASS }, + .{ "else", token.TokenType.ELSE }, + .{ "false", token.TokenType.FALSE }, + .{ "for", token.TokenType.FOR }, + .{ "fun", token.TokenType.FUN }, + .{ "if", token.TokenType.IF }, + .{ "nil", token.TokenType.NIL }, + .{ "or", token.TokenType.OR }, + .{ "print", token.TokenType.PRINT }, + .{ "return", token.TokenType.RETURN }, + .{ "super", token.TokenType.SUPER }, + .{ "this", token.TokenType.THIS }, + .{ "true", token.TokenType.TRUE }, + .{ "var", token.TokenType.VAR }, + .{ "while", token.TokenType.WHILE }, + }); + + const token_type_or = keywordmap.get(self.source[self.start..self.current]); + if (token_type_or) |token_type| { + self.addToken(token_type); + } else { + self.addToken(token.TokenType.IDENTIFIER); + } + } + + fn isDigit(char: u8) bool { + return switch (char) { + '0'...'9' => true, + else => false, + }; + } + + fn isAlpha(char: u8) bool { + return switch (char) { + 'a'...'z', 'A'...'Z', '_' => true, + else => false, + }; + } + + fn isAlphaNumeric(char: u8) bool { + return isDigit(char) or isAlpha(char); + } + + fn addToken(self: *Scanner, token_type: token.TokenType) void { + // FIXME: Handle error. + self.tokens.append(token.Token{ .token_type = token_type, .lexeme = self.source[self.start..self.current], .line = self.line }) catch {}; + } +}; diff --git a/src/token.zig b/src/token.zig new file mode 100644 index 0000000..3e17dcb --- /dev/null +++ b/src/token.zig @@ -0,0 +1,61 @@ +const std = @import("std"); + +pub const TokenType = enum { + // Single-character tokens. + LEFT_PAREN, + RIGHT_PAREN, + LEFT_BRACE, + RIGHT_BRACE, + COMMA, + DOT, + MINUS, + PLUS, + SEMICOLON, + SLASH, + STAR, + + // One or two character tokens. + BANG, + BANG_EQUAL, + EQUAL, + EQUAL_EQUAL, + GREATER, + GREATER_EQUAL, + LESS, + LESS_EQUAL, + + // Literals. + IDENTIFIER, + STRING, + NUMBER, + + // Keywords. + AND, + CLASS, + ELSE, + FALSE, + FUN, + FOR, + IF, + NIL, + OR, + PRINT, + RETURN, + SUPER, + THIS, + TRUE, + VAR, + WHILE, + + EOF, +}; + +pub const Token = struct { + token_type: TokenType, + lexeme: []u8, + line: u64, + + fn toString(self: *Token, alloc: std.mem.Allocator) ![]u8 { + return std.fmt.allocPrint(alloc, "{} {} {}", .{ self.token_type, self.lexeme, self.line }); + } +}; diff --git a/test.lox b/test.lox new file mode 100644 index 0000000..0ecbd83 --- /dev/null +++ b/test.lox @@ -0,0 +1 @@ +print "hello world!";