350 lines
11 KiB
Python
350 lines
11 KiB
Python
from enum import Enum
|
|
import sys
|
|
|
|
class LexemeType(Enum):
|
|
NONE = 0
|
|
|
|
EOF = 1
|
|
|
|
# Identifiers and Keywords
|
|
NAME = 2
|
|
|
|
# Symbols
|
|
LEFT_BRACE = 3
|
|
RIGHT_BRACE = 4
|
|
LEFT_PAREN = 5
|
|
RIGHT_PAREN = 6
|
|
ARROW = 7
|
|
SEMICOLON = 8
|
|
DOT = 9
|
|
EQUALS = 10
|
|
|
|
|
|
class Lexeme():
|
|
def __init__(self, lextype: LexemeType, value = None):
|
|
self.type = lextype
|
|
self.value = value
|
|
|
|
def __str__(self):
|
|
if self.value:
|
|
return "(%s, %s)" % (self.type, self.value)
|
|
return "(%s)" % self.type
|
|
|
|
def __repr__(self):
|
|
return self.__str__()
|
|
|
|
|
|
def lexer(program: str):
|
|
line = 1
|
|
start = 0
|
|
current = 0
|
|
tokens: list[Lexeme] = []
|
|
while current < len(program):
|
|
# Scan next token.
|
|
start = current
|
|
curr = program[current]
|
|
if curr == '\n':
|
|
line += 1
|
|
elif curr == '\t' or curr == ' ' or curr == '\r':
|
|
pass
|
|
elif curr == '{':
|
|
tokens.append(Lexeme(LexemeType.LEFT_BRACE))
|
|
elif curr == '}':
|
|
tokens.append(Lexeme(LexemeType.RIGHT_BRACE))
|
|
elif curr == '(':
|
|
tokens.append(Lexeme(LexemeType.LEFT_PAREN))
|
|
elif curr == ')':
|
|
tokens.append(Lexeme(LexemeType.RIGHT_PAREN))
|
|
elif curr == ';':
|
|
tokens.append(Lexeme(LexemeType.SEMICOLON))
|
|
elif curr == '.':
|
|
tokens.append(Lexeme(LexemeType.DOT))
|
|
elif curr == '-':
|
|
current += 1
|
|
if program[current] == '>':
|
|
tokens.append(Lexeme(LexemeType.ARROW))
|
|
else:
|
|
sys.exit("Expected > after - got '%s' on line %d" % (program[current], line))
|
|
elif curr.isalpha():
|
|
while program[current + 1].isalnum() or program[current + 1] == '_':
|
|
current += 1
|
|
tokens.append(Lexeme(LexemeType.NAME, program[start:current + 1]))
|
|
elif curr == '/' and program[current + 1] == '/':
|
|
while program[current] != '\n':
|
|
current += 1
|
|
else:
|
|
sys.exit("Got unexpected token %s on line %s." % (curr, line))
|
|
|
|
current += 1
|
|
|
|
tokens.append(Lexeme(LexemeType.EOF))
|
|
|
|
return tokens
|
|
|
|
class Package():
|
|
def __init__(self, names: list[str]):
|
|
self.names = names
|
|
|
|
def cpp_namespace(self):
|
|
return "::".join(self.names)
|
|
|
|
class Method():
|
|
def __init__(self, name: str, request: str, response: str):
|
|
self.name = name
|
|
self.request = request
|
|
self.response = response
|
|
|
|
class Interface():
|
|
def __init__(self, name: str, methods: list[Method]):
|
|
self.name = name
|
|
self.methods = methods
|
|
|
|
class Type(Enum):
|
|
NONE = 0
|
|
U64 = 1
|
|
I64 = 2
|
|
STRING = 3
|
|
BYTES = 4
|
|
CAPABILITY = 5
|
|
MESSAGE = 6
|
|
|
|
type_str_dict = {
|
|
"u64": Type.U64,
|
|
"i64": Type.I64,
|
|
"string": Type.STRING,
|
|
"bytes": Type.BYTES,
|
|
"capability": Type.CAPABILITY,
|
|
}
|
|
|
|
type_to_cppstr = {
|
|
Type.U64: "uint64_t",
|
|
Type.I64: "int64_t",
|
|
Type.STRING: "glcr::String",
|
|
Type.CAPABILITY: "z_cap_t",
|
|
Type.BYTES: "glcr::Vector<uint8_t>"
|
|
}
|
|
|
|
class Field():
|
|
def __init__(self, field_type_str: str, name: str, repeated = False):
|
|
if field_type_str in type_str_dict.keys():
|
|
self.type = type_str_dict[field_type_str]
|
|
else:
|
|
self.type = Type.MESSAGE
|
|
self.type_str = field_type_str
|
|
self.name = name
|
|
self.repeated = repeated
|
|
|
|
def cpp_type(self):
|
|
if self.type == Type.MESSAGE:
|
|
return self.type_str
|
|
return type_to_cppstr[self.type]
|
|
|
|
class Message():
|
|
def __init__(self, name: str, fields: list[Field]):
|
|
self.name = name
|
|
self.fields = fields
|
|
|
|
Decl = Interface | Message
|
|
|
|
name_dict: dict[str, Decl] = {}
|
|
|
|
class Parser():
|
|
def __init__(self, tokens: list[Lexeme]):
|
|
self.tokens = tokens
|
|
self.current = 0
|
|
|
|
def peektype(self) -> LexemeType:
|
|
return self.tokens[self.current].type
|
|
|
|
def peekvalue(self) -> str:
|
|
return self.tokens[self.current].value
|
|
|
|
def consume(self) -> Lexeme:
|
|
self.current += 1
|
|
return self.tokens[self.current - 1]
|
|
|
|
def consume_identifier(self) -> str:
|
|
tok = self.consume()
|
|
if tok.type != LexemeType.NAME:
|
|
sys.exit("Expected identifier got %s" % tok.type)
|
|
return tok.value
|
|
|
|
def consume_check(self, lex_type: LexemeType):
|
|
tok = self.consume()
|
|
if tok.type != lex_type:
|
|
sys.exit("Expected %s got %s" % (lex_type, tok.type))
|
|
|
|
def consume_check_identifier(self, name: str):
|
|
tok = self.consume()
|
|
if tok.type != LexemeType.NAME:
|
|
sys.exit("Expected '%s' got a %s" % (name, tok.type))
|
|
if tok.value != name:
|
|
sys.exit("Expected '%s' got '%s'" % (name, tok.value))
|
|
|
|
def parse(self) -> list[Decl]:
|
|
decls = []
|
|
while self.peektype() != LexemeType.EOF:
|
|
decls.append(self.decl())
|
|
return decls
|
|
|
|
def decl(self) -> Decl:
|
|
token = self.consume()
|
|
if token.type != LexemeType.NAME:
|
|
sys.exit("Unexpected token: %s", token)
|
|
if token.value == "package":
|
|
# TODO: Enforce that package decl comes before all messages and interface.
|
|
return self.package()
|
|
if token.value == "message":
|
|
return self.message()
|
|
elif token.value == "interface":
|
|
return self.interface()
|
|
sys.exit("Unexpected identifier '%s', expected package, message, interface" % token.value)
|
|
|
|
def package(self):
|
|
names = [self.consume_identifier()]
|
|
|
|
while self.peektype() == LexemeType.DOT:
|
|
self.consume_check(LexemeType.DOT)
|
|
names += [self.consume_identifier()]
|
|
|
|
self.consume_check(LexemeType.SEMICOLON)
|
|
|
|
return Package(names)
|
|
|
|
def interface(self):
|
|
# "interface" consumed by decl.
|
|
name = self.consume_identifier()
|
|
|
|
if name in name_dict.keys():
|
|
sys.exit("Name '%s' already exists." % name)
|
|
|
|
self.consume_check(LexemeType.LEFT_BRACE)
|
|
|
|
methods: list[Method] = []
|
|
method_names = set()
|
|
next_method_number = 0
|
|
while self.peektype() != LexemeType.RIGHT_BRACE:
|
|
m = self.method()
|
|
m.number = next_method_number
|
|
next_method_number += 1
|
|
if m.name in method_names:
|
|
sys.exit("Method %s declared twice on %s" % (m.name, name))
|
|
method_names.add(m.name)
|
|
methods.append(m)
|
|
|
|
self.consume_check(LexemeType.RIGHT_BRACE)
|
|
|
|
i = Interface(name, methods)
|
|
name_dict[name] = i
|
|
return i
|
|
|
|
def method(self):
|
|
self.consume_check_identifier("method")
|
|
|
|
name = self.consume_identifier()
|
|
|
|
self.consume_check(LexemeType.LEFT_PAREN)
|
|
|
|
request = None
|
|
# FIXME: Fix error handling here (and for response). We want to show
|
|
# "expected rparen or identifier" if type is wrong rather than just
|
|
# "expected rparen".
|
|
if self.peektype() == LexemeType.NAME:
|
|
request = self.consume_identifier()
|
|
|
|
self.consume_check(LexemeType.RIGHT_PAREN)
|
|
self.consume_check(LexemeType.ARROW)
|
|
self.consume_check(LexemeType.LEFT_PAREN)
|
|
|
|
response = None
|
|
if self.peektype() == LexemeType.NAME:
|
|
response = self.consume_identifier()
|
|
|
|
self.consume_check(LexemeType.RIGHT_PAREN)
|
|
self.consume_check(LexemeType.SEMICOLON)
|
|
|
|
return Method(name, request, response)
|
|
|
|
def message(self):
|
|
# "message" consumed by decl.
|
|
|
|
name = self.consume_identifier()
|
|
|
|
if name in name_dict:
|
|
sys.exit("Name '%s' already exists." % name)
|
|
|
|
self.consume_check(LexemeType.LEFT_BRACE)
|
|
|
|
fields: list[Field] = []
|
|
field_names = set()
|
|
next_field_num = 0
|
|
while self.peektype() != LexemeType.RIGHT_BRACE:
|
|
f = self.field()
|
|
f.number = next_field_num
|
|
next_field_num += 1
|
|
if f.name in field_names:
|
|
sys.exit("Field %s declared twice on %s" % (f.name, name))
|
|
field_names.add(f.name)
|
|
fields.append(f)
|
|
|
|
self.consume_check(LexemeType.RIGHT_BRACE)
|
|
|
|
m = Message(name, fields)
|
|
name_dict[name] = m
|
|
return m
|
|
|
|
def field(self):
|
|
|
|
repeated = False
|
|
field_type_str = self.consume_identifier()
|
|
if field_type_str == "repeated":
|
|
repeated = True
|
|
field_type_str = self.consume_identifier()
|
|
|
|
name = self.consume_identifier()
|
|
self.consume_check(LexemeType.SEMICOLON)
|
|
return Field(field_type_str, name, repeated)
|
|
|
|
def type_check(decls: list[Decl]):
|
|
if sum(1 for decl in decls if type(decl) is Package) > 1:
|
|
sys.exit("Cannot have more than one package declaration")
|
|
for decl in decls:
|
|
if type(decl) is Interface:
|
|
for method in decl.methods:
|
|
if method.request is None and method.response is None:
|
|
sys.exit("Method '%s.%s' cannot have empty request and response" % (decl.name, method.name))
|
|
if method.request is not None:
|
|
if method.request not in name_dict.keys():
|
|
sys.exit("Request type '%s' for '%s.%s' does not exist" % (method.request, decl.name, method.name))
|
|
if type(name_dict[method.request]) is not Message:
|
|
sys.exit("Request type '%s' for '%s.%s' should be a message" % (method.request, decl.name, method.name))
|
|
if method.response is not None:
|
|
if method.response not in name_dict.keys():
|
|
sys.exit("Response type '%s' for '%s.%s' does not exist" % (method.response, decl.name, method.name))
|
|
if type(name_dict[method.response]) is not Message:
|
|
sys.exit("Response type '%s' for '%s.%s' should be a message" % (method.response, decl.name, method.name))
|
|
elif type(decl) is Message:
|
|
for field in decl.fields:
|
|
if field.type == Type.MESSAGE:
|
|
if field.type_str not in name_dict.keys():
|
|
sys.exit("Field type '%s' for field '%s' in message '%s' not found." %
|
|
(field.type_str, field.name, decl.name))
|
|
|
|
def print_ast(decls: list[Decl]):
|
|
for decl in decls:
|
|
if type(decl) is Package:
|
|
print("%s (Package)" % decl.cpp_namespace())
|
|
elif type(decl) is Interface:
|
|
print("%s (Interface)" % decl.name)
|
|
for method in decl.methods:
|
|
print("\t%s (%s -> %s)" % (method.name, method.request, method.response))
|
|
elif type(decl) is Message:
|
|
print("%s (Message)" % decl.name)
|
|
for field in decl.fields:
|
|
print("\t%s %s" % (field.type.name, field.name))
|
|
else:
|
|
print("unknown type")
|
|
|
|
|
|
|