From 7236c0b43a5a48adf632536f769ab1c19bb6ed9b Mon Sep 17 00:00:00 2001 From: Drew Galbraith Date: Tue, 29 Aug 2023 15:17:49 -0700 Subject: [PATCH] First pass of parser complete. --- src/expr.zig | 7 +- src/main.zig | 19 +++-- src/parser.zig | 181 ++++++++++++++++++++++++++++++++++++++++++++++++ src/scanner.zig | 18 +++-- src/token.zig | 6 ++ test.lox | 2 +- 6 files changed, 221 insertions(+), 12 deletions(-) create mode 100644 src/parser.zig diff --git a/src/expr.zig b/src/expr.zig index 11fecec..b13c1a1 100644 --- a/src/expr.zig +++ b/src/expr.zig @@ -24,10 +24,11 @@ pub const LiteralTag = enum { }; pub const LiteralExpr = union(LiteralTag) { - number: u64, - string: []u8, + number: f64, + string: []const u8, boolean: bool, - nil: void, + // FIXME: See if there is a way to make this void. + nil: bool, }; pub const UnaryExpr = struct { diff --git a/src/main.zig b/src/main.zig index 5492ee6..22a7a2c 100644 --- a/src/main.zig +++ b/src/main.zig @@ -1,6 +1,8 @@ const std = @import("std"); const scanner = @import("scanner.zig"); +const expr = @import("expr.zig"); +const parser = @import("parser.zig"); const err = @import("error.zig"); pub fn main() !void { @@ -56,8 +58,17 @@ fn runPrompt(alloc: std.mem.Allocator) !void { fn run(allocator: std.mem.Allocator, bytes: []u8) !void { var scan = scanner.Scanner.init(allocator, bytes); defer scan.deinit(); - std.debug.print("{any}\n", .{scan.scanTokens()}); -} -// Error reporting -// TODO: Move to a separate file. + var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); + defer arena.deinit(); + var alloc = arena.allocator(); + var parse = parser.Parser{ + .tokens = scan.scanTokens(), + .allocator = alloc, + }; + + const expression = try parse.expression(); + // std.debug.print("AST: {}", .{expression.*}); + expr.AstPrint(expression.*); + std.debug.print("\n", .{}); +} diff --git a/src/parser.zig b/src/parser.zig new file mode 100644 index 0000000..849baa2 --- /dev/null +++ b/src/parser.zig @@ -0,0 +1,181 @@ +const std = @import("std"); + +const expr_zig = @import("expr.zig"); +const Expr = expr_zig.Expr; +const BinaryExpr = expr_zig.BinaryExpr; +const UnaryExpr = expr_zig.UnaryExpr; +const LiteralExpr = expr_zig.LiteralExpr; +const GroupingExpr = expr_zig.GroupingExpr; + +const token_zig = @import("token.zig"); +const TokenType = token_zig.TokenType; +const Token = token_zig.Token; + +const errors_zig = @import("error.zig"); +const err = errors_zig.err; + +pub const Parser = struct { + const Self = @This(); + tokens: std.ArrayList(Token), + allocator: std.mem.Allocator, + current: u64 = 0, + + pub fn expression(self: *Self) !*Expr { + return self.equality(); + } + + pub fn equality(self: *Self) !*Expr { + var expr = try self.comparison(); + + while (true) { + switch (self.peekType()) { + TokenType.BANG_EQUAL, TokenType.EQUAL_EQUAL => { + var old_expr = expr; + expr = try self.allocator.create(Expr); + expr.* = Expr{ + .binary = BinaryExpr{ + .operator = self.advance(), + .left = old_expr, + .right = try self.comparison(), + }, + }; + }, + else => break, + } + } + return expr; + } + + fn comparison(self: *Self) !*Expr { + var expr = try self.term(); + + while (true) { + switch (self.peekType()) { + TokenType.GREATER, TokenType.GREATER_EQUAL, TokenType.LESS, TokenType.LESS_EQUAL => { + var old_expr = expr; + expr = try self.allocator.create(Expr); + expr.* = Expr{ + .binary = BinaryExpr{ + .operator = self.advance(), + .left = old_expr, + .right = try self.term(), + }, + }; + }, + else => break, + } + } + return expr; + } + + fn term(self: *Self) !*Expr { + var expr = try self.factor(); + + while (true) { + switch (self.peekType()) { + TokenType.PLUS, TokenType.MINUS => { + var old_expr = expr; + expr = try self.allocator.create(Expr); + expr.* = Expr{ + .binary = BinaryExpr{ + .operator = self.advance(), + .left = old_expr, + .right = try self.factor(), + }, + }; + }, + else => break, + } + } + return expr; + } + + fn factor(self: *Self) !*Expr { + var expr = try self.unary(); + + while (true) { + switch (self.peekType()) { + TokenType.STAR, TokenType.SLASH => { + var old_expr = expr; + expr = try self.allocator.create(Expr); + expr.* = Expr{ + .binary = BinaryExpr{ + .operator = self.advance(), + .left = old_expr, + .right = try self.unary(), + }, + }; + }, + else => break, + } + } + return expr; + } + + fn unary(self: *Self) !*Expr { + switch (self.peekType()) { + TokenType.BANG, TokenType.MINUS => { + var expr = try self.allocator.create(Expr); + expr.* = Expr{ + .unary = UnaryExpr{ + .operator = self.advance(), + .right = try self.unary(), + }, + }; + return expr; + }, + else => return self.primary(), + } + } + + fn primary(self: *Self) error{OutOfMemory}!*Expr { + var expr = try self.allocator.create(Expr); + const token = self.advance(); + switch (token.token_type) { + TokenType.FALSE => expr.* = Expr{ .literal = LiteralExpr{ .boolean = false } }, + TokenType.TRUE => expr.* = Expr{ .literal = LiteralExpr{ .boolean = true } }, + TokenType.NIL => expr.* = Expr{ .literal = LiteralExpr{ .nil = false } }, + TokenType.NUMBER => expr.* = Expr{ .literal = LiteralExpr{ .number = token.value.?.number } }, + TokenType.STRING => expr.* = Expr{ .literal = LiteralExpr{ .string = token.value.?.string } }, + TokenType.LEFT_PAREN => { + expr.* = Expr{ + .grouping = GroupingExpr{ .expr = try self.expression() }, + }; + var next_token = self.advance(); + if (next_token.token_type != TokenType.RIGHT_PAREN) { + err(next_token.line, "Unclosed left paren."); + } + }, + else => { + err(token.line, "Unexpected primary token type."); + expr.* = Expr{ .literal = LiteralExpr{ .nil = false } }; + }, + } + return expr; + } + + fn peekType(self: Self) TokenType { + return self.peek().token_type; + } + + fn peek(self: Self) Token { + return self.tokens.items[self.current]; + } + + fn previous(self: Self) Token { + // FIXME: Bounds check. + return self.tokens.items[self.current - 1]; + } + + fn isAtEnd(self: Self) bool { + return self.peekType() == TokenType.EOF; + } + + fn advance(self: *Self) Token { + if (self.isAtEnd()) { + return self.peek(); + } + self.current += 1; + return self.previous(); + } +}; diff --git a/src/scanner.zig b/src/scanner.zig index 0363b78..9a5b768 100644 --- a/src/scanner.zig +++ b/src/scanner.zig @@ -33,7 +33,7 @@ pub const Scanner = struct { } // FIXME: Handle error. - self.tokens.append(token.Token{ .token_type = token.TokenType.EOF, .lexeme = "", .line = self.line }) catch {}; + self.tokens.append(token.Token{ .token_type = token.TokenType.EOF, .lexeme = "", .line = self.line, .value = null }) catch {}; return self.tokens; } @@ -156,7 +156,7 @@ pub const Scanner = struct { while (isDigit(self.peek())) _ = self.advance(); } - self.addToken(token.TokenType.NUMBER); + self.addNumber(); } fn identifier(self: *Scanner) void { @@ -207,8 +207,18 @@ pub const Scanner = struct { return isDigit(char) or isAlpha(char); } - fn addToken(self: *Scanner, token_type: token.TokenType) void { + fn addTokenInternal(self: *Scanner, token_type: token.TokenType, token_value: ?token.Token.Value) void { // FIXME: Handle error. - self.tokens.append(token.Token{ .token_type = token_type, .lexeme = self.source[self.start..self.current], .line = self.line }) catch {}; + self.tokens.append(token.Token{ .token_type = token_type, .lexeme = self.source[self.start..self.current], .line = self.line, .value = token_value }) catch {}; + } + + fn addToken(self: *Scanner, token_type: token.TokenType) void { + self.addTokenInternal(token_type, null); + } + + fn addNumber(self: *Scanner) void { + // FIXME: Handle errors. + const float = std.fmt.parseFloat(f64, self.source[self.start..self.current]) catch 0; + self.addTokenInternal(token.TokenType.NUMBER, token.Token.Value{ .number = float }); } }; diff --git a/src/token.zig b/src/token.zig index 537a301..5f0cf80 100644 --- a/src/token.zig +++ b/src/token.zig @@ -54,6 +54,12 @@ pub const Token = struct { token_type: TokenType, lexeme: []const u8, line: u64, + value: ?Value, + + pub const Value = union { + number: f64, + string: []const u8, + }; fn toString(self: *Token, alloc: std.mem.Allocator) ![]u8 { return std.fmt.allocPrint(alloc, "{} {} {}", .{ self.token_type, self.lexeme, self.line }); diff --git a/test.lox b/test.lox index 0ecbd83..b9f88a1 100644 --- a/test.lox +++ b/test.lox @@ -1 +1 @@ -print "hello world!"; +1 + 5 / 3 == -9