acadia/yunq/parser.py

350 lines
11 KiB
Python

from enum import Enum
import sys
class LexemeType(Enum):
NONE = 0
EOF = 1
# Identifiers and Keywords
NAME = 2
# Symbols
LEFT_BRACE = 3
RIGHT_BRACE = 4
LEFT_PAREN = 5
RIGHT_PAREN = 6
ARROW = 7
SEMICOLON = 8
DOT = 9
EQUALS = 10
class Lexeme():
def __init__(self, lextype: LexemeType, value = None):
self.type = lextype
self.value = value
def __str__(self):
if self.value:
return "(%s, %s)" % (self.type, self.value)
return "(%s)" % self.type
def __repr__(self):
return self.__str__()
def lexer(program: str):
line = 1
start = 0
current = 0
tokens: list[Lexeme] = []
while current < len(program):
# Scan next token.
start = current
curr = program[current]
if curr == '\n':
line += 1
elif curr == '\t' or curr == ' ' or curr == '\r':
pass
elif curr == '{':
tokens.append(Lexeme(LexemeType.LEFT_BRACE))
elif curr == '}':
tokens.append(Lexeme(LexemeType.RIGHT_BRACE))
elif curr == '(':
tokens.append(Lexeme(LexemeType.LEFT_PAREN))
elif curr == ')':
tokens.append(Lexeme(LexemeType.RIGHT_PAREN))
elif curr == ';':
tokens.append(Lexeme(LexemeType.SEMICOLON))
elif curr == '.':
tokens.append(Lexeme(LexemeType.DOT))
elif curr == '-':
current += 1
if program[current] == '>':
tokens.append(Lexeme(LexemeType.ARROW))
else:
sys.exit("Expected > after - got '%s' on line %d" % (program[current], line))
elif curr.isalpha():
while program[current + 1].isalnum() or program[current + 1] == '_':
current += 1
tokens.append(Lexeme(LexemeType.NAME, program[start:current + 1]))
elif curr == '/' and program[current + 1] == '/':
while program[current] != '\n':
current += 1
else:
sys.exit("Got unexpected token %s on line %s." % (curr, line))
current += 1
tokens.append(Lexeme(LexemeType.EOF))
return tokens
class Package():
def __init__(self, names: list[str]):
self.names = names
def cpp_namespace(self):
return "::".join(self.names)
class Method():
def __init__(self, name: str, request: str, response: str):
self.name = name
self.request = request
self.response = response
class Interface():
def __init__(self, name: str, methods: list[Method]):
self.name = name
self.methods = methods
class Type(Enum):
NONE = 0
U64 = 1
I64 = 2
STRING = 3
BYTES = 4
CAPABILITY = 5
MESSAGE = 6
type_str_dict = {
"u64": Type.U64,
"i64": Type.I64,
"string": Type.STRING,
"bytes": Type.BYTES,
"capability": Type.CAPABILITY,
}
type_to_cppstr = {
Type.U64: "uint64_t",
Type.I64: "int64_t",
Type.STRING: "glcr::String",
Type.CAPABILITY: "z_cap_t",
Type.BYTES: "glcr::Vector<uint8_t>"
}
class Field():
def __init__(self, field_type_str: str, name: str, repeated = False):
if field_type_str in type_str_dict.keys():
self.type = type_str_dict[field_type_str]
else:
self.type = Type.MESSAGE
self.type_str = field_type_str
self.name = name
self.repeated = repeated
def cpp_type(self):
if self.type == Type.MESSAGE:
return self.type_str
return type_to_cppstr[self.type]
class Message():
def __init__(self, name: str, fields: list[Field]):
self.name = name
self.fields = fields
Decl = Interface | Message
name_dict: dict[str, Decl] = {}
class Parser():
def __init__(self, tokens: list[Lexeme]):
self.tokens = tokens
self.current = 0
def peektype(self) -> LexemeType:
return self.tokens[self.current].type
def peekvalue(self) -> str:
return self.tokens[self.current].value
def consume(self) -> Lexeme:
self.current += 1
return self.tokens[self.current - 1]
def consume_identifier(self) -> str:
tok = self.consume()
if tok.type != LexemeType.NAME:
sys.exit("Expected identifier got %s" % tok.type)
return tok.value
def consume_check(self, lex_type: LexemeType):
tok = self.consume()
if tok.type != lex_type:
sys.exit("Expected %s got %s" % (lex_type, tok.type))
def consume_check_identifier(self, name: str):
tok = self.consume()
if tok.type != LexemeType.NAME:
sys.exit("Expected '%s' got a %s" % (name, tok.type))
if tok.value != name:
sys.exit("Expected '%s' got '%s'" % (name, tok.value))
def parse(self) -> list[Decl]:
decls = []
while self.peektype() != LexemeType.EOF:
decls.append(self.decl())
return decls
def decl(self) -> Decl:
token = self.consume()
if token.type != LexemeType.NAME:
sys.exit("Unexpected token: %s", token)
if token.value == "package":
# TODO: Enforce that package decl comes before all messages and interface.
return self.package()
if token.value == "message":
return self.message()
elif token.value == "interface":
return self.interface()
sys.exit("Unexpected identifier '%s', expected package, message, interface" % token.value)
def package(self):
names = [self.consume_identifier()]
while self.peektype() == LexemeType.DOT:
self.consume_check(LexemeType.DOT)
names += [self.consume_identifier()]
self.consume_check(LexemeType.SEMICOLON)
return Package(names)
def interface(self):
# "interface" consumed by decl.
name = self.consume_identifier()
if name in name_dict.keys():
sys.exit("Name '%s' already exists." % name)
self.consume_check(LexemeType.LEFT_BRACE)
methods: list[Method] = []
method_names = set()
next_method_number = 0
while self.peektype() != LexemeType.RIGHT_BRACE:
m = self.method()
m.number = next_method_number
next_method_number += 1
if m.name in method_names:
sys.exit("Method %s declared twice on %s" % (m.name, name))
method_names.add(m.name)
methods.append(m)
self.consume_check(LexemeType.RIGHT_BRACE)
i = Interface(name, methods)
name_dict[name] = i
return i
def method(self):
self.consume_check_identifier("method")
name = self.consume_identifier()
self.consume_check(LexemeType.LEFT_PAREN)
request = None
# FIXME: Fix error handling here (and for response). We want to show
# "expected rparen or identifier" if type is wrong rather than just
# "expected rparen".
if self.peektype() == LexemeType.NAME:
request = self.consume_identifier()
self.consume_check(LexemeType.RIGHT_PAREN)
self.consume_check(LexemeType.ARROW)
self.consume_check(LexemeType.LEFT_PAREN)
response = None
if self.peektype() == LexemeType.NAME:
response = self.consume_identifier()
self.consume_check(LexemeType.RIGHT_PAREN)
self.consume_check(LexemeType.SEMICOLON)
return Method(name, request, response)
def message(self):
# "message" consumed by decl.
name = self.consume_identifier()
if name in name_dict:
sys.exit("Name '%s' already exists." % name)
self.consume_check(LexemeType.LEFT_BRACE)
fields: list[Field] = []
field_names = set()
next_field_num = 0
while self.peektype() != LexemeType.RIGHT_BRACE:
f = self.field()
f.number = next_field_num
next_field_num += 1
if f.name in field_names:
sys.exit("Field %s declared twice on %s" % (f.name, name))
field_names.add(f.name)
fields.append(f)
self.consume_check(LexemeType.RIGHT_BRACE)
m = Message(name, fields)
name_dict[name] = m
return m
def field(self):
repeated = False
field_type_str = self.consume_identifier()
if field_type_str == "repeated":
repeated = True
field_type_str = self.consume_identifier()
name = self.consume_identifier()
self.consume_check(LexemeType.SEMICOLON)
return Field(field_type_str, name, repeated)
def type_check(decls: list[Decl]):
if sum(1 for decl in decls if type(decl) is Package) > 1:
sys.exit("Cannot have more than one package declaration")
for decl in decls:
if type(decl) is Interface:
for method in decl.methods:
if method.request is None and method.response is None:
sys.exit("Method '%s.%s' cannot have empty request and response" % (decl.name, method.name))
if method.request is not None:
if method.request not in name_dict.keys():
sys.exit("Request type '%s' for '%s.%s' does not exist" % (method.request, decl.name, method.name))
if type(name_dict[method.request]) is not Message:
sys.exit("Request type '%s' for '%s.%s' should be a message" % (method.request, decl.name, method.name))
if method.response is not None:
if method.response not in name_dict.keys():
sys.exit("Response type '%s' for '%s.%s' does not exist" % (method.response, decl.name, method.name))
if type(name_dict[method.response]) is not Message:
sys.exit("Response type '%s' for '%s.%s' should be a message" % (method.response, decl.name, method.name))
elif type(decl) is Message:
for field in decl.fields:
if field.type == Type.MESSAGE:
if field.type_str not in name_dict.keys():
sys.exit("Field type '%s' for field '%s' in message '%s' not found." %
(field.type_str, field.name, decl.name))
def print_ast(decls: list[Decl]):
for decl in decls:
if type(decl) is Package:
print("%s (Package)" % decl.cpp_namespace())
elif type(decl) is Interface:
print("%s (Interface)" % decl.name)
for method in decl.methods:
print("\t%s (%s -> %s)" % (method.name, method.request, method.response))
elif type(decl) is Message:
print("%s (Message)" % decl.name)
for field in decl.fields:
print("\t%s %s" % (field.type.name, field.name))
else:
print("unknown type")