[Yunq] First pass at codegen for message headers and parsing.

This commit is contained in:
Drew Galbraith 2023-10-13 00:15:31 -07:00
parent 3323826f3b
commit 28a6d543ec
4 changed files with 424 additions and 272 deletions

1
.gitignore vendored
View File

@ -1,2 +1,3 @@
builddbg/
__pycache__/
compile_commands.json

145
yunq/codegen_message.py Normal file
View File

@ -0,0 +1,145 @@
from parser import *
HEADER_PRELUDE = """
// Generated file - DO NOT MODIFY
#pragma once
#include <glacier/string/string.h>
"""
MESSAGE_CLASS_PREFIX = """
class {name} {{
public:
{name}() {{}}
// Delete copy and move until implemented.
{name}(const {name}&) = delete;
{name}({name}&&) = delete;
void ParseFromBytes(const glcr::ByteBuffer&);
glcr::ByteBuffer SerializeToBytes();
"""
MESSAGE_CLASS_SET_GET = """
{type} {name}() {{ return {name}_; }}
void set_{name}({type} value) {{
{name}_ = value;
}}
"""
MESSAGE_CLASS_PRIVATE = """
private:
"""
MESSAGE_CLASS_FIELD = """
{type} {name}_;
"""
MESSAGE_CLASS_SUFFIX = """
}
"""
type_to_str = {
Type.U64: "uint64_t",
Type.I64: "int64_t",
Type.STRING: "glcr::String",
Type.CAPABILITY: "zcap_t",
Type.BYTES: "glcr::Vector<uint8_t>"
}
def _type_str(field_type: Type) -> str:
return type_to_str[field_type]
def _generate_message_class(message: Message) -> str:
class_decl = MESSAGE_CLASS_PREFIX.format(name = message.name)
for field in message.fields:
class_decl += MESSAGE_CLASS_SET_GET.format(name = field.name, type = _type_str(field.type))
class_decl += MESSAGE_CLASS_PRIVATE
for field in message.fields:
class_decl += MESSAGE_CLASS_FIELD.format(name = field.name, type = _type_str(field.type))
class_decl += MESSAGE_CLASS_SUFFIX
return class_decl
def generate_message_header(ast: list[Decl]) -> str:
header = HEADER_PRELUDE
for decl in ast:
if type(decl) != Message:
continue
header += _generate_message_class(decl)
return header
IMPL_PRELUDE = """
#include "{file}.h"
namespace {{
const uint64_t header_size = 24; // 4x uint32, 1x uint64
struct ExtPointer {{
uint32_t offset;
uint32_t length;
}}
}} // namespace
"""
IMPL_PARSE_DEF = """
void {name}::ParseFromBytes(const glcr::ByteBuffer& bytes) {{
CheckHeader();
"""
IMPL_PARSE_U64 = """
set_{name}(bytes.At<uint64_t>(header_size + (8 * {offset})));
"""
IMPL_PARSE_I64 = """
set_{name}(bytes.At<int64_t>(header_size + (8 * {offset})));
"""
IMPL_PARSE_STRING = """
auto {name}_pointer = bytes.At<ExtPointer>(header_size + (8 * {offset}));
set_{name}(bytes.StringAt({name}_pointer.offset, {name}_pointer.length));
"""
IMPL_PARSE_DEF_END = """
}
"""
def _generate_message_class_impl(message: Message) -> str:
impl = IMPL_PARSE_DEF.format(name=message.name)
for offset, field in enumerate(message.fields):
if field.type == Type.U64:
impl += IMPL_PARSE_U64.format(name = field.name, offset = offset)
elif field.type == Type.I64:
impl += IMPL_PARSE_I64.format(name = field.name, offset = offset)
elif field.type == Type.STRING:
impl += IMPL_PARSE_STRING.format(name = field.name, offset = offset);
else:
impl += "\n{} unimplemented\n".format(field.type.name)
impl += IMPL_PARSE_DEF_END
return impl
def generate_message_impl(file: str, ast: list[Decl]) -> str:
impl = IMPL_PRELUDE.format(file=file)
for decl in ast:
if type(decl) != Message:
continue
impl += _generate_message_class_impl(decl)
return impl

274
yunq/parser.py Normal file
View File

@ -0,0 +1,274 @@
from enum import Enum
class LexemeType(Enum):
NONE = 0
EOF = 1
# Identifiers and Keywords
NAME = 2
# Symbols
LEFT_BRACE = 3
RIGHT_BRACE = 4
LEFT_PAREN = 5
RIGHT_PAREN = 6
ARROW = 7
SEMICOLON = 8
class Lexeme():
def __init__(self, lextype: LexemeType, value = None):
self.type = lextype
self.value = value
def __str__(self):
if self.value:
return "(%s, %s)" % (self.type, self.value)
return "(%s)" % self.type
def __repr__(self):
return self.__str__()
def lexer(program: str):
line = 1
start = 0
current = 0
tokens: list[Lexeme] = []
while current < len(program):
# Scan next token.
start = current
curr = program[current]
if curr == '\n':
line += 1
elif curr == '\t' or curr == ' ' or curr == '\r':
pass
elif curr == '{':
tokens.append(Lexeme(LexemeType.LEFT_BRACE))
elif curr == '}':
tokens.append(Lexeme(LexemeType.RIGHT_BRACE))
elif curr == '(':
tokens.append(Lexeme(LexemeType.LEFT_PAREN))
elif curr == ')':
tokens.append(Lexeme(LexemeType.RIGHT_PAREN))
elif curr == ';':
tokens.append(Lexeme(LexemeType.SEMICOLON))
elif curr == '-':
current += 1
if program[current] == '>':
tokens.append(Lexeme(LexemeType.ARROW))
else:
sys.exit("Expected > after - got '%s' on line %d" % (program[current], line))
elif curr.isalpha():
while program[current + 1].isalnum() or program[current + 1] == '_':
current += 1
tokens.append(Lexeme(LexemeType.NAME, program[start:current + 1]))
else:
sys.exit("Got unexpected token %s on line %s." % (curr, line))
current += 1
tokens.append(Lexeme(LexemeType.EOF))
return tokens
class Method():
def __init__(self, name: str, request: str, response: str):
self.name = name
self.request = request
self.response = response
class Interface():
def __init__(self, name: str, methods: list[Method]):
self.name = name
self.methods = methods
class Type(Enum):
NONE = 0
U64 = 1
I64 = 2
STRING = 3
BYTES = 4
CAPABILITY = 5
type_str_dict = {
"u64": Type.U64,
"i64": Type.I64,
"string": Type.STRING,
"bytes": Type.BYTES,
"capability": Type.CAPABILITY,
}
class Field():
def __init__(self, fieldtype: Type, name: str):
self.type = fieldtype
self.name = name
class Message():
def __init__(self, name: str, fields: list[Field]):
self.name = name
self.fields = fields
Decl = Interface | Message
name_dict: dict[str, Decl] = {}
class Parser():
def __init__(self, tokens: list[Lexeme]):
self.tokens = tokens
self.current = 0
def peektype(self) -> LexemeType:
return self.tokens[self.current].type
def peekvalue(self) -> str:
return self.tokens[self.current].value
def consume(self) -> Lexeme:
self.current += 1
return self.tokens[self.current - 1]
def consume_identifier(self) -> str:
tok = self.consume()
if tok.type != LexemeType.NAME:
sys.exit("Expected identifier got %s" % tok.type)
return tok.value
def consume_check(self, lex_type: LexemeType):
tok = self.consume()
if tok.type != lex_type:
sys.exit("Expected %s got %s" % (lex_type, tok_type))
def consume_check_identifier(self, name: str):
tok = self.consume()
if tok.type != LexemeType.NAME:
sys.exit("Expected '%s' got a %s" % (name, tok.type))
if tok.value != name:
sys.exit("Expected '%s' got '%s'" % (name, tok.value))
def parse(self) -> list[Decl]:
decls = []
while self.peektype() != LexemeType.EOF:
decls.append(self.decl())
return decls
def decl(self) -> Decl:
token = self.consume()
if token.type != LexemeType.NAME:
sys.exit("Unexpected token: %s", token)
if token.value == "message":
return self.message()
elif token.value == "interface":
return self.interface()
sys.exit("Unexpected identifier '%s', expected message or interface" % token.value)
def interface(self):
# "interface" consumed by decl.
name = self.consume_identifier()
if name in name_dict.keys():
sys.exit("Name '%s' already exists." % name)
self.consume_check(LexemeType.LEFT_BRACE)
methods: list[Method] = []
method_names = set()
while self.peektype() != LexemeType.RIGHT_BRACE:
m = self.method()
if m.name in method_names:
sys.exit("Method %s declared twice on %s" % (m.name, name))
method_names.add(m.name)
methods.append(m)
self.consume_check(LexemeType.RIGHT_BRACE)
i = Interface(name, methods)
name_dict[name] = i
return i
def method(self):
self.consume_check_identifier("method")
name = self.consume_identifier()
self.consume_check(LexemeType.LEFT_PAREN)
request = self.consume_identifier()
self.consume_check(LexemeType.RIGHT_PAREN)
self.consume_check(LexemeType.ARROW)
self.consume_check(LexemeType.LEFT_PAREN)
response = self.consume_identifier()
self.consume_check(LexemeType.RIGHT_PAREN)
self.consume_check(LexemeType.SEMICOLON)
return Method(name, request, response)
def message(self):
# "message" consumed by decl.
name = self.consume_identifier()
if name in name_dict:
sys.exit("Name '%s' already exists." % name)
self.consume_check(LexemeType.LEFT_BRACE)
fields: list[Field] = []
field_names = set()
while self.peektype() != LexemeType.RIGHT_BRACE:
f = self.field()
if f.name in field_names:
sys.exit("Field %s declared twice on %s" % (f.name, name))
field_names.add(f.name)
fields.append(f)
self.consume_check(LexemeType.RIGHT_BRACE)
m = Message(name, fields)
name_dict[name] = m
return m
def field(self):
field_type_str = self.consume_identifier()
if field_type_str not in type_str_dict.keys():
sys.exit("Expected type got '%s'" % field_type_str)
field_type = type_str_dict[field_type_str]
name = self.consume_identifier()
self.consume_check(LexemeType.SEMICOLON)
return Field(field_type, name)
def type_check(decls: list[Decl]):
for decl in decls:
if type(decl) is Interface:
for method in decl.methods:
if method.request not in name_dict.keys():
sys.exit("Request type '%s' for '%s.%s' does not exist" % (method.request, decl.name, method.name))
if type(name_dict[method.request]) is not Message:
sys.exit("Request type '%s' for '%s.%s' should be a message" % (method.request, decl.name, method.name))
if method.response not in name_dict.keys():
sys.exit("Response type '%s' for '%s.%s' does not exist" % (method.response, decl.name, method.name))
if type(name_dict[method.response]) is not Message:
sys.exit("Response type '%s' for '%s.%s' should be a message" % (method.response, decl.name, method.name))
def print_ast(decls: list[Decl]):
for decl in decls:
if type(decl) is Interface:
print("%s (Interface)" % decl.name)
for method in decl.methods:
print("\t%s (%s -> %s)" % (method.name, method.request, method.response))
elif type(decl) is Message:
print("%s (Message)" % decl.name)
for field in decl.fields:
print("\t%s %s" % (field.type.name, field.name))
else:
print("unknown type")

View File

@ -1,277 +1,8 @@
from enum import Enum
import os
import sys
class LexemeType(Enum):
NONE = 0
EOF = 1
# Identifiers and Keywords
NAME = 2
# Symbols
LEFT_BRACE = 3
RIGHT_BRACE = 4
LEFT_PAREN = 5
RIGHT_PAREN = 6
ARROW = 7
SEMICOLON = 8
class Lexeme():
def __init__(self, lextype: LexemeType, value = None):
self.type = lextype
self.value = value
def __str__(self):
if self.value:
return "(%s, %s)" % (self.type, self.value)
return "(%s)" % self.type
def __repr__(self):
return self.__str__()
def lexer(program: str):
line = 1
start = 0
current = 0
tokens: list[Lexeme] = []
while current < len(program):
# Scan next token.
start = current
curr = program[current]
if curr == '\n':
line += 1
elif curr == '\t' or curr == ' ' or curr == '\r':
pass
elif curr == '{':
tokens.append(Lexeme(LexemeType.LEFT_BRACE))
elif curr == '}':
tokens.append(Lexeme(LexemeType.RIGHT_BRACE))
elif curr == '(':
tokens.append(Lexeme(LexemeType.LEFT_PAREN))
elif curr == ')':
tokens.append(Lexeme(LexemeType.RIGHT_PAREN))
elif curr == ';':
tokens.append(Lexeme(LexemeType.SEMICOLON))
elif curr == '-':
current += 1
if program[current] == '>':
tokens.append(Lexeme(LexemeType.ARROW))
else:
sys.exit("Expected > after - got '%s' on line %d" % (program[current], line))
elif curr.isalpha():
while program[current + 1].isalnum() or program[current + 1] == '_':
current += 1
tokens.append(Lexeme(LexemeType.NAME, program[start:current + 1]))
else:
sys.exit("Got unexpected token %s on line %s." % (curr, line))
current += 1
tokens.append(Lexeme(LexemeType.EOF))
return tokens
class Method():
def __init__(self, name: str, request: str, response: str):
self.name = name
self.request = request
self.response = response
class Interface():
def __init__(self, name: str, methods: list[Method]):
self.name = name
self.methods = methods
class Type(Enum):
NONE = 0
U64 = 1
I64 = 2
STRING = 3
BYTES = 4
CAPABILITY = 5
type_str_dict = {
"u64": Type.U64,
"i64": Type.I64,
"string": Type.STRING,
"bytes": Type.BYTES,
"capability": Type.CAPABILITY,
}
class Field():
def __init__(self, fieldtype: Type, name: str):
self.type = fieldtype
self.name = name
class Message():
def __init__(self, name: str, fields: list[Field]):
self.name = name
self.fields = fields
Decl = Interface | Message
name_dict: dict[str, Decl] = {}
class Parser():
def __init__(self, tokens: list[Lexeme]):
self.tokens = tokens
self.current = 0
def peektype(self) -> LexemeType:
return self.tokens[self.current].type
def peekvalue(self) -> str:
return self.tokens[self.current].value
def consume(self) -> Lexeme:
self.current += 1
return self.tokens[self.current - 1]
def consume_identifier(self) -> str:
tok = self.consume()
if tok.type != LexemeType.NAME:
sys.exit("Expected identifier got %s" % tok.type)
return tok.value
def consume_check(self, lex_type: LexemeType):
tok = self.consume()
if tok.type != lex_type:
sys.exit("Expected %s got %s" % (lex_type, tok_type))
def consume_check_identifier(self, name: str):
tok = self.consume()
if tok.type != LexemeType.NAME:
sys.exit("Expected '%s' got a %s" % (name, tok.type))
if tok.value != name:
sys.exit("Expected '%s' got '%s'" % (name, tok.value))
def parse(self) -> list[Decl]:
decls = []
while self.peektype() != LexemeType.EOF:
decls.append(self.decl())
return decls
def decl(self) -> Decl:
token = self.consume()
if token.type != LexemeType.NAME:
sys.exit("Unexpected token: %s", token)
if token.value == "message":
return self.message()
elif token.value == "interface":
return self.interface()
sys.exit("Unexpected identifier '%s', expected message or interface" % token.value)
def interface(self):
# "interface" consumed by decl.
name = self.consume_identifier()
if name in name_dict.keys():
sys.exit("Name '%s' already exists." % name)
self.consume_check(LexemeType.LEFT_BRACE)
methods: list[Method] = []
method_names = set()
while self.peektype() != LexemeType.RIGHT_BRACE:
m = self.method()
if m.name in method_names:
sys.exit("Method %s declared twice on %s" % (m.name, name))
method_names.add(m.name)
methods.append(m)
self.consume_check(LexemeType.RIGHT_BRACE)
i = Interface(name, methods)
name_dict[name] = i
return i
def method(self):
self.consume_check_identifier("method")
name = self.consume_identifier()
self.consume_check(LexemeType.LEFT_PAREN)
request = self.consume_identifier()
self.consume_check(LexemeType.RIGHT_PAREN)
self.consume_check(LexemeType.ARROW)
self.consume_check(LexemeType.LEFT_PAREN)
response = self.consume_identifier()
self.consume_check(LexemeType.RIGHT_PAREN)
self.consume_check(LexemeType.SEMICOLON)
return Method(name, request, response)
def message(self):
# "message" consumed by decl.
name = self.consume_identifier()
if name in name_dict:
sys.exit("Name '%s' already exists." % name)
self.consume_check(LexemeType.LEFT_BRACE)
fields: list[Field] = []
field_names = set()
while self.peektype() != LexemeType.RIGHT_BRACE:
f = self.field()
if f.name in field_names:
sys.exit("Field %s declared twice on %s" % (f.name, name))
field_names.add(f.name)
fields.append(f)
self.consume_check(LexemeType.RIGHT_BRACE)
m = Message(name, fields)
name_dict[name] = m
return m
def field(self):
field_type_str = self.consume_identifier()
if field_type_str not in type_str_dict.keys():
sys.exit("Expected type got '%s'" % field_type_str)
field_type = type_str_dict[field_type_str]
name = self.consume_identifier()
self.consume_check(LexemeType.SEMICOLON)
return Field(field_type, name)
def type_check(decls: list[Decl]):
for decl in decls:
if type(decl) is Interface:
for method in decl.methods:
if method.request not in name_dict.keys():
sys.exit("Request type '%s' for '%s.%s' does not exist" % (method.request, decl.name, method.name))
if type(name_dict[method.request]) is not Message:
sys.exit("Request type '%s' for '%s.%s' should be a message" % (method.request, decl.name, method.name))
if method.response not in name_dict.keys():
sys.exit("Response type '%s' for '%s.%s' does not exist" % (method.response, decl.name, method.name))
if type(name_dict[method.response]) is not Message:
sys.exit("Response type '%s' for '%s.%s' should be a message" % (method.response, decl.name, method.name))
def print_ast(decls: list[Decl]):
for decl in decls:
if type(decl) is Interface:
print("%s (Interface)" % decl.name)
for method in decl.methods:
print("\t%s (%s -> %s)" % (method.name, method.request, method.response))
elif type(decl) is Message:
print("%s (Message)" % decl.name)
for field in decl.fields:
print("\t%s %s" % (field.type.name, field.name))
else:
print("unknown type")
from codegen_message import *
from parser import *
def main():
if len(sys.argv) != 2:
@ -286,7 +17,8 @@ def main():
parser = Parser(lexemes)
ast = parser.parse()
type_check(ast)
print_ast(ast)
print(generate_message_impl(filename, ast))
if __name__ == "__main__":
main()