diff --git a/.gitignore b/.gitignore index c9ae45f..ced1c21 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ builddbg/ +__pycache__/ compile_commands.json diff --git a/yunq/codegen_message.py b/yunq/codegen_message.py new file mode 100644 index 0000000..15e27f4 --- /dev/null +++ b/yunq/codegen_message.py @@ -0,0 +1,145 @@ + +from parser import * + +HEADER_PRELUDE = """ +// Generated file - DO NOT MODIFY +#pragma once + +#include + +""" + +MESSAGE_CLASS_PREFIX = """ +class {name} {{ + public: + {name}() {{}} + // Delete copy and move until implemented. + {name}(const {name}&) = delete; + {name}({name}&&) = delete; + + void ParseFromBytes(const glcr::ByteBuffer&); + glcr::ByteBuffer SerializeToBytes(); +""" + +MESSAGE_CLASS_SET_GET = """ + {type} {name}() {{ return {name}_; }} + void set_{name}({type} value) {{ + {name}_ = value; + }} +""" + +MESSAGE_CLASS_PRIVATE = """ + private: +""" + +MESSAGE_CLASS_FIELD = """ + {type} {name}_; +""" + +MESSAGE_CLASS_SUFFIX = """ +} +""" + +type_to_str = { + Type.U64: "uint64_t", + Type.I64: "int64_t", + Type.STRING: "glcr::String", + Type.CAPABILITY: "zcap_t", + Type.BYTES: "glcr::Vector" + } + +def _type_str(field_type: Type) -> str: + return type_to_str[field_type] + + +def _generate_message_class(message: Message) -> str: + class_decl = MESSAGE_CLASS_PREFIX.format(name = message.name) + + for field in message.fields: + class_decl += MESSAGE_CLASS_SET_GET.format(name = field.name, type = _type_str(field.type)) + + class_decl += MESSAGE_CLASS_PRIVATE + + for field in message.fields: + class_decl += MESSAGE_CLASS_FIELD.format(name = field.name, type = _type_str(field.type)) + + class_decl += MESSAGE_CLASS_SUFFIX + return class_decl + + +def generate_message_header(ast: list[Decl]) -> str: + header = HEADER_PRELUDE + + for decl in ast: + if type(decl) != Message: + continue + header += _generate_message_class(decl) + + return header + +IMPL_PRELUDE = """ +#include "{file}.h" + +namespace {{ + +const uint64_t header_size = 24; // 4x uint32, 1x uint64 + +struct ExtPointer {{ + uint32_t offset; + uint32_t length; +}} + +}} // namespace +""" + +IMPL_PARSE_DEF = """ +void {name}::ParseFromBytes(const glcr::ByteBuffer& bytes) {{ + CheckHeader(); +""" + +IMPL_PARSE_U64 = """ + set_{name}(bytes.At(header_size + (8 * {offset}))); +""" + +IMPL_PARSE_I64 = """ + set_{name}(bytes.At(header_size + (8 * {offset}))); +""" + +IMPL_PARSE_STRING = """ + auto {name}_pointer = bytes.At(header_size + (8 * {offset})); + + set_{name}(bytes.StringAt({name}_pointer.offset, {name}_pointer.length)); +""" + +IMPL_PARSE_DEF_END = """ +} +""" + +def _generate_message_class_impl(message: Message) -> str: + impl = IMPL_PARSE_DEF.format(name=message.name) + + for offset, field in enumerate(message.fields): + if field.type == Type.U64: + impl += IMPL_PARSE_U64.format(name = field.name, offset = offset) + elif field.type == Type.I64: + impl += IMPL_PARSE_I64.format(name = field.name, offset = offset) + elif field.type == Type.STRING: + impl += IMPL_PARSE_STRING.format(name = field.name, offset = offset); + else: + impl += "\n{} unimplemented\n".format(field.type.name) + + impl += IMPL_PARSE_DEF_END + + return impl + + +def generate_message_impl(file: str, ast: list[Decl]) -> str: + impl = IMPL_PRELUDE.format(file=file) + + + for decl in ast: + if type(decl) != Message: + continue + impl += _generate_message_class_impl(decl) + + return impl diff --git a/yunq/parser.py b/yunq/parser.py new file mode 100644 index 0000000..a11f20f --- /dev/null +++ b/yunq/parser.py @@ -0,0 +1,274 @@ +from enum import Enum + +class LexemeType(Enum): + NONE = 0 + + EOF = 1 + + # Identifiers and Keywords + NAME = 2 + + # Symbols + LEFT_BRACE = 3 + RIGHT_BRACE = 4 + LEFT_PAREN = 5 + RIGHT_PAREN = 6 + ARROW = 7 + SEMICOLON = 8 + + +class Lexeme(): + def __init__(self, lextype: LexemeType, value = None): + self.type = lextype + self.value = value + + def __str__(self): + if self.value: + return "(%s, %s)" % (self.type, self.value) + return "(%s)" % self.type + + def __repr__(self): + return self.__str__() + + +def lexer(program: str): + line = 1 + start = 0 + current = 0 + tokens: list[Lexeme] = [] + while current < len(program): + # Scan next token. + start = current + curr = program[current] + if curr == '\n': + line += 1 + elif curr == '\t' or curr == ' ' or curr == '\r': + pass + elif curr == '{': + tokens.append(Lexeme(LexemeType.LEFT_BRACE)) + elif curr == '}': + tokens.append(Lexeme(LexemeType.RIGHT_BRACE)) + elif curr == '(': + tokens.append(Lexeme(LexemeType.LEFT_PAREN)) + elif curr == ')': + tokens.append(Lexeme(LexemeType.RIGHT_PAREN)) + elif curr == ';': + tokens.append(Lexeme(LexemeType.SEMICOLON)) + elif curr == '-': + current += 1 + if program[current] == '>': + tokens.append(Lexeme(LexemeType.ARROW)) + else: + sys.exit("Expected > after - got '%s' on line %d" % (program[current], line)) + elif curr.isalpha(): + while program[current + 1].isalnum() or program[current + 1] == '_': + current += 1 + tokens.append(Lexeme(LexemeType.NAME, program[start:current + 1])) + else: + sys.exit("Got unexpected token %s on line %s." % (curr, line)) + + current += 1 + + tokens.append(Lexeme(LexemeType.EOF)) + + return tokens + + +class Method(): + def __init__(self, name: str, request: str, response: str): + self.name = name + self.request = request + self.response = response + +class Interface(): + def __init__(self, name: str, methods: list[Method]): + self.name = name + self.methods = methods + +class Type(Enum): + NONE = 0 + U64 = 1 + I64 = 2 + STRING = 3 + BYTES = 4 + CAPABILITY = 5 + +type_str_dict = { + "u64": Type.U64, + "i64": Type.I64, + "string": Type.STRING, + "bytes": Type.BYTES, + "capability": Type.CAPABILITY, + } + +class Field(): + def __init__(self, fieldtype: Type, name: str): + self.type = fieldtype + self.name = name + +class Message(): + def __init__(self, name: str, fields: list[Field]): + self.name = name + self.fields = fields + +Decl = Interface | Message + +name_dict: dict[str, Decl] = {} + +class Parser(): + def __init__(self, tokens: list[Lexeme]): + self.tokens = tokens + self.current = 0 + + def peektype(self) -> LexemeType: + return self.tokens[self.current].type + + def peekvalue(self) -> str: + return self.tokens[self.current].value + + def consume(self) -> Lexeme: + self.current += 1 + return self.tokens[self.current - 1] + + def consume_identifier(self) -> str: + tok = self.consume() + if tok.type != LexemeType.NAME: + sys.exit("Expected identifier got %s" % tok.type) + return tok.value + + def consume_check(self, lex_type: LexemeType): + tok = self.consume() + if tok.type != lex_type: + sys.exit("Expected %s got %s" % (lex_type, tok_type)) + + def consume_check_identifier(self, name: str): + tok = self.consume() + if tok.type != LexemeType.NAME: + sys.exit("Expected '%s' got a %s" % (name, tok.type)) + if tok.value != name: + sys.exit("Expected '%s' got '%s'" % (name, tok.value)) + + def parse(self) -> list[Decl]: + decls = [] + while self.peektype() != LexemeType.EOF: + decls.append(self.decl()) + return decls + + def decl(self) -> Decl: + token = self.consume() + if token.type != LexemeType.NAME: + sys.exit("Unexpected token: %s", token) + if token.value == "message": + return self.message() + elif token.value == "interface": + return self.interface() + sys.exit("Unexpected identifier '%s', expected message or interface" % token.value) + + def interface(self): + # "interface" consumed by decl. + name = self.consume_identifier() + + if name in name_dict.keys(): + sys.exit("Name '%s' already exists." % name) + + self.consume_check(LexemeType.LEFT_BRACE) + + methods: list[Method] = [] + method_names = set() + while self.peektype() != LexemeType.RIGHT_BRACE: + m = self.method() + if m.name in method_names: + sys.exit("Method %s declared twice on %s" % (m.name, name)) + method_names.add(m.name) + methods.append(m) + + self.consume_check(LexemeType.RIGHT_BRACE) + + i = Interface(name, methods) + name_dict[name] = i + return i + + def method(self): + self.consume_check_identifier("method") + + name = self.consume_identifier() + + self.consume_check(LexemeType.LEFT_PAREN) + + request = self.consume_identifier() + + self.consume_check(LexemeType.RIGHT_PAREN) + self.consume_check(LexemeType.ARROW) + self.consume_check(LexemeType.LEFT_PAREN) + + response = self.consume_identifier() + + self.consume_check(LexemeType.RIGHT_PAREN) + self.consume_check(LexemeType.SEMICOLON) + + return Method(name, request, response) + + def message(self): + # "message" consumed by decl. + + name = self.consume_identifier() + + if name in name_dict: + sys.exit("Name '%s' already exists." % name) + + self.consume_check(LexemeType.LEFT_BRACE) + + fields: list[Field] = [] + field_names = set() + while self.peektype() != LexemeType.RIGHT_BRACE: + f = self.field() + if f.name in field_names: + sys.exit("Field %s declared twice on %s" % (f.name, name)) + field_names.add(f.name) + fields.append(f) + + self.consume_check(LexemeType.RIGHT_BRACE) + + m = Message(name, fields) + name_dict[name] = m + return m + + def field(self): + + field_type_str = self.consume_identifier() + if field_type_str not in type_str_dict.keys(): + sys.exit("Expected type got '%s'" % field_type_str) + field_type = type_str_dict[field_type_str] + + name = self.consume_identifier() + self.consume_check(LexemeType.SEMICOLON) + return Field(field_type, name) + +def type_check(decls: list[Decl]): + for decl in decls: + if type(decl) is Interface: + for method in decl.methods: + if method.request not in name_dict.keys(): + sys.exit("Request type '%s' for '%s.%s' does not exist" % (method.request, decl.name, method.name)) + if type(name_dict[method.request]) is not Message: + sys.exit("Request type '%s' for '%s.%s' should be a message" % (method.request, decl.name, method.name)) + if method.response not in name_dict.keys(): + sys.exit("Response type '%s' for '%s.%s' does not exist" % (method.response, decl.name, method.name)) + if type(name_dict[method.response]) is not Message: + sys.exit("Response type '%s' for '%s.%s' should be a message" % (method.response, decl.name, method.name)) + +def print_ast(decls: list[Decl]): + for decl in decls: + if type(decl) is Interface: + print("%s (Interface)" % decl.name) + for method in decl.methods: + print("\t%s (%s -> %s)" % (method.name, method.request, method.response)) + elif type(decl) is Message: + print("%s (Message)" % decl.name) + for field in decl.fields: + print("\t%s %s" % (field.type.name, field.name)) + else: + print("unknown type") + + + diff --git a/yunq/yunq.py b/yunq/yunq.py index f4b7afb..7ca1311 100644 --- a/yunq/yunq.py +++ b/yunq/yunq.py @@ -1,277 +1,8 @@ -from enum import Enum import os import sys -class LexemeType(Enum): - NONE = 0 - - EOF = 1 - - # Identifiers and Keywords - NAME = 2 - - # Symbols - LEFT_BRACE = 3 - RIGHT_BRACE = 4 - LEFT_PAREN = 5 - RIGHT_PAREN = 6 - ARROW = 7 - SEMICOLON = 8 - - -class Lexeme(): - def __init__(self, lextype: LexemeType, value = None): - self.type = lextype - self.value = value - - def __str__(self): - if self.value: - return "(%s, %s)" % (self.type, self.value) - return "(%s)" % self.type - - def __repr__(self): - return self.__str__() - - -def lexer(program: str): - line = 1 - start = 0 - current = 0 - tokens: list[Lexeme] = [] - while current < len(program): - # Scan next token. - start = current - curr = program[current] - if curr == '\n': - line += 1 - elif curr == '\t' or curr == ' ' or curr == '\r': - pass - elif curr == '{': - tokens.append(Lexeme(LexemeType.LEFT_BRACE)) - elif curr == '}': - tokens.append(Lexeme(LexemeType.RIGHT_BRACE)) - elif curr == '(': - tokens.append(Lexeme(LexemeType.LEFT_PAREN)) - elif curr == ')': - tokens.append(Lexeme(LexemeType.RIGHT_PAREN)) - elif curr == ';': - tokens.append(Lexeme(LexemeType.SEMICOLON)) - elif curr == '-': - current += 1 - if program[current] == '>': - tokens.append(Lexeme(LexemeType.ARROW)) - else: - sys.exit("Expected > after - got '%s' on line %d" % (program[current], line)) - elif curr.isalpha(): - while program[current + 1].isalnum() or program[current + 1] == '_': - current += 1 - tokens.append(Lexeme(LexemeType.NAME, program[start:current + 1])) - else: - sys.exit("Got unexpected token %s on line %s." % (curr, line)) - - current += 1 - - tokens.append(Lexeme(LexemeType.EOF)) - - return tokens - - -class Method(): - def __init__(self, name: str, request: str, response: str): - self.name = name - self.request = request - self.response = response - -class Interface(): - def __init__(self, name: str, methods: list[Method]): - self.name = name - self.methods = methods - -class Type(Enum): - NONE = 0 - U64 = 1 - I64 = 2 - STRING = 3 - BYTES = 4 - CAPABILITY = 5 - -type_str_dict = { - "u64": Type.U64, - "i64": Type.I64, - "string": Type.STRING, - "bytes": Type.BYTES, - "capability": Type.CAPABILITY, - } - -class Field(): - def __init__(self, fieldtype: Type, name: str): - self.type = fieldtype - self.name = name - -class Message(): - def __init__(self, name: str, fields: list[Field]): - self.name = name - self.fields = fields - -Decl = Interface | Message - -name_dict: dict[str, Decl] = {} - -class Parser(): - def __init__(self, tokens: list[Lexeme]): - self.tokens = tokens - self.current = 0 - - def peektype(self) -> LexemeType: - return self.tokens[self.current].type - - def peekvalue(self) -> str: - return self.tokens[self.current].value - - def consume(self) -> Lexeme: - self.current += 1 - return self.tokens[self.current - 1] - - def consume_identifier(self) -> str: - tok = self.consume() - if tok.type != LexemeType.NAME: - sys.exit("Expected identifier got %s" % tok.type) - return tok.value - - def consume_check(self, lex_type: LexemeType): - tok = self.consume() - if tok.type != lex_type: - sys.exit("Expected %s got %s" % (lex_type, tok_type)) - - def consume_check_identifier(self, name: str): - tok = self.consume() - if tok.type != LexemeType.NAME: - sys.exit("Expected '%s' got a %s" % (name, tok.type)) - if tok.value != name: - sys.exit("Expected '%s' got '%s'" % (name, tok.value)) - - def parse(self) -> list[Decl]: - decls = [] - while self.peektype() != LexemeType.EOF: - decls.append(self.decl()) - return decls - - def decl(self) -> Decl: - token = self.consume() - if token.type != LexemeType.NAME: - sys.exit("Unexpected token: %s", token) - if token.value == "message": - return self.message() - elif token.value == "interface": - return self.interface() - sys.exit("Unexpected identifier '%s', expected message or interface" % token.value) - - def interface(self): - # "interface" consumed by decl. - name = self.consume_identifier() - - if name in name_dict.keys(): - sys.exit("Name '%s' already exists." % name) - - self.consume_check(LexemeType.LEFT_BRACE) - - methods: list[Method] = [] - method_names = set() - while self.peektype() != LexemeType.RIGHT_BRACE: - m = self.method() - if m.name in method_names: - sys.exit("Method %s declared twice on %s" % (m.name, name)) - method_names.add(m.name) - methods.append(m) - - self.consume_check(LexemeType.RIGHT_BRACE) - - i = Interface(name, methods) - name_dict[name] = i - return i - - def method(self): - self.consume_check_identifier("method") - - name = self.consume_identifier() - - self.consume_check(LexemeType.LEFT_PAREN) - - request = self.consume_identifier() - - self.consume_check(LexemeType.RIGHT_PAREN) - self.consume_check(LexemeType.ARROW) - self.consume_check(LexemeType.LEFT_PAREN) - - response = self.consume_identifier() - - self.consume_check(LexemeType.RIGHT_PAREN) - self.consume_check(LexemeType.SEMICOLON) - - return Method(name, request, response) - - def message(self): - # "message" consumed by decl. - - name = self.consume_identifier() - - if name in name_dict: - sys.exit("Name '%s' already exists." % name) - - self.consume_check(LexemeType.LEFT_BRACE) - - fields: list[Field] = [] - field_names = set() - while self.peektype() != LexemeType.RIGHT_BRACE: - f = self.field() - if f.name in field_names: - sys.exit("Field %s declared twice on %s" % (f.name, name)) - field_names.add(f.name) - fields.append(f) - - self.consume_check(LexemeType.RIGHT_BRACE) - - m = Message(name, fields) - name_dict[name] = m - return m - - def field(self): - - field_type_str = self.consume_identifier() - if field_type_str not in type_str_dict.keys(): - sys.exit("Expected type got '%s'" % field_type_str) - field_type = type_str_dict[field_type_str] - - name = self.consume_identifier() - self.consume_check(LexemeType.SEMICOLON) - return Field(field_type, name) - -def type_check(decls: list[Decl]): - for decl in decls: - if type(decl) is Interface: - for method in decl.methods: - if method.request not in name_dict.keys(): - sys.exit("Request type '%s' for '%s.%s' does not exist" % (method.request, decl.name, method.name)) - if type(name_dict[method.request]) is not Message: - sys.exit("Request type '%s' for '%s.%s' should be a message" % (method.request, decl.name, method.name)) - if method.response not in name_dict.keys(): - sys.exit("Response type '%s' for '%s.%s' does not exist" % (method.response, decl.name, method.name)) - if type(name_dict[method.response]) is not Message: - sys.exit("Response type '%s' for '%s.%s' should be a message" % (method.response, decl.name, method.name)) - -def print_ast(decls: list[Decl]): - for decl in decls: - if type(decl) is Interface: - print("%s (Interface)" % decl.name) - for method in decl.methods: - print("\t%s (%s -> %s)" % (method.name, method.request, method.response)) - elif type(decl) is Message: - print("%s (Message)" % decl.name) - for field in decl.fields: - print("\t%s %s" % (field.type.name, field.name)) - else: - print("unknown type") - +from codegen_message import * +from parser import * def main(): if len(sys.argv) != 2: @@ -286,7 +17,8 @@ def main(): parser = Parser(lexemes) ast = parser.parse() type_check(ast) - print_ast(ast) + + print(generate_message_impl(filename, ast)) if __name__ == "__main__": main()