diff --git a/.gitignore b/.gitignore index e8346ca..9e8c8ee 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,4 @@ sysroot/usr rust/target yunq/venv +yunq/rust/target diff --git a/yunq/rust/Cargo.lock b/yunq/rust/Cargo.lock new file mode 100644 index 0000000..7d49b6b --- /dev/null +++ b/yunq/rust/Cargo.lock @@ -0,0 +1,237 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "anstream" +version = "0.6.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "418c75fa768af9c03be99d17643f93f79bbba589895012a80e3452a19ddda15b" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "038dfcf04a5feb68e9c60b21c9625a54c2c0616e79b72b0fd87075a056ae1d1b" + +[[package]] +name = "anstyle-parse" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c03a11a9034d92058ceb6ee011ce58af4a9bf61491aa7e1e59ecd24bd40d22d4" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad186efb764318d35165f1758e7dcef3b10628e26d41a44bc5550652e6804391" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61a38449feb7068f52bb06c12759005cf459ee52bb4adc1d5a7c4322d716fb19" +dependencies = [ + "anstyle", + "windows-sys", +] + +[[package]] +name = "clap" +version = "4.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5db83dced34638ad474f39f250d7fea9598bdd239eaced1bdf45d597da0f433f" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7e204572485eb3fbf28f871612191521df159bc3e15a9f5064c66dba3a8c05f" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c780290ccf4fb26629baa7a1081e68ced113f1d3ec302fa5948f1c381ebf06c6" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b82cf0babdbd58558212896d1a4272303a57bdb245c2bf1147185fb45640e70" + +[[package]] +name = "colorchoice" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b6a852b24ab71dffc585bcb46eaf7959d175cb865a7152e35b348d1b2960422" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8478577c03552c21db0e2724ffb8986a5ce7af88107e6be5d2ee6e158c12800" + +[[package]] +name = "proc-macro2" +version = "1.0.85" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22244ce15aa966053a896d1accb3a6e68469b97c7f33f284b99f0d576879fc23" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "syn" +version = "2.0.66" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c42f3f41a2de00b01c0aaad383c5a45241efc8b2d1eda5661812fda5f3cdcff5" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0" + +[[package]] +name = "yunq" +version = "0.1.0" +dependencies = [ + "clap", +] diff --git a/yunq/rust/Cargo.toml b/yunq/rust/Cargo.toml new file mode 100644 index 0000000..2f9645f --- /dev/null +++ b/yunq/rust/Cargo.toml @@ -0,0 +1,7 @@ +[package] +name = "yunq" +version = "0.1.0" +edition = "2021" + +[dependencies] +clap = { version = "4.5.7", features = ["derive"] } diff --git a/yunq/rust/src/lexer.rs b/yunq/rust/src/lexer.rs new file mode 100644 index 0000000..81eab06 --- /dev/null +++ b/yunq/rust/src/lexer.rs @@ -0,0 +1,225 @@ +#[derive(Debug, PartialEq)] +pub enum TokenType { + EndOfFile, + Name, + LeftBrace, + RightBrace, + LeftParen, + RightParen, + Arrow, + Semicolon, + Dot, + Equals, +} + +#[derive(Debug)] +pub struct Token { + pub token_type: TokenType, + line: usize, + start: usize, + end: usize, + pub chars: String, +} + +impl std::fmt::Display for Token { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "'{}' at line {} position {} ", + self.chars, self.line, self.start + ) + } +} + +pub fn lex_input(input: &str) -> Result, String> { + let mut index = 0; + let mut position = 0; + let mut line = 1; + + let mut tokens = Vec::new(); + + let mut chars = input.chars().peekable(); + loop { + match chars.next() { + Some(c) => { + position += 1; + match c { + '\n' => { + position = 0; + line += 1; + } + ' ' | '\t' | '\r' => {} + '/' => { + match chars.next() { + Some('/') => { + index += 1; + // TODO: Add a consume until function. + loop { + match chars.next() { + Some('\n') => { + index += 1; + position = 0; + line += 1; + break; + } + Some(_) => { + index += 1; + } + None => break, + + } + + } + } + _ => { + return Err(format!("Unexpected token '/' at line {} position {}", line, position)); + } + + } + } + '{' => tokens.push(Token{ + token_type: TokenType::LeftBrace, + line, + start: position, + end: position + 1, + chars: input[index..index+1].to_string(), + + }), + '}' => tokens.push(Token{ + token_type: TokenType::RightBrace, + line, + start: position, + end: position + 1, + chars: input[index..index+1].to_string(), + + }), + '(' => tokens.push(Token{ + token_type: TokenType::LeftParen, + line, + start: position, + end: position + 1, + chars: input[index..index+1].to_string(), + + }), + ')' => tokens.push(Token{ + token_type: TokenType::RightParen, + line, + start: position, + end: position + 1, + chars: input[index..index+1].to_string(), + + }), + ';' => tokens.push(Token{ + token_type: TokenType::Semicolon, + line, + start: position, + end: position + 1, + chars: input[index..index+1].to_string(), + + }), + '.' => tokens.push(Token{ + token_type: TokenType::Dot, + line, + start: position, + end: position + 1, + chars: input[index..index+1].to_string(), + + }), + '=' => tokens.push(Token{ + token_type: TokenType::Equals, + line, + start: position, + end: position + 1, + chars: input[index..index+1].to_string(), + + }), + '-' => match chars.next() { + Some('>') => { + position += 1; + tokens.push(Token{ + token_type: TokenType::Arrow, + line, + start: position - 1, + end: position + 1, + chars: input[index..index+1].to_string(), + }); + index += 1; + } + Some(c) => return Err(format!("Expected > to follow - (to form arrow '->') on line {} at position {}. But got '{}'", line, position + 1, c)), + None => return Err(format!("Expected > to follow - but got end of input instead on line {} position {}", line, position)) + }, + 'a'..='z' | 'A'..='Z' => { + let name_start = index; + let pos_start = position; + loop { + match chars.peek() { + Some(c) => { + match c { + 'a'..='z' | 'A'..='Z' | '0'..='9' | '_' => { + position += 1; + chars.next(); + index += 1; + }, + '\n' => { + chars.next(); + tokens.push(Token{ + token_type: TokenType::Name, + line, + start: pos_start, + end: position + 1, + chars: input[name_start..index+1].to_string(), + }); + position = 0; + index += 1; + line += 1; + break; + }, + _ => { + tokens.push(Token{ + token_type: TokenType::Name, + line, + start: pos_start, + end: position + 1, + chars: input[name_start..index+1].to_string() + }); + break; + }, + } + } + None => { + tokens.push(Token{ + token_type: TokenType::Name, + line, + start: pos_start, + end: position + 1, + chars: input[name_start..index].to_string(), + }); + break; + } + } + } + }, + _ => { + return Err(format!( + "Unexpected token on line {} character {}: {}", + line, position, c + )) + } + } + } + None => { + tokens.push(Token { + token_type: TokenType::EndOfFile, + line, + start: position, + end: position, + chars: "EOF".to_string(), + }); + break; + } + } + index += 1; + } + + Ok(tokens) +} diff --git a/yunq/rust/src/main.rs b/yunq/rust/src/main.rs new file mode 100644 index 0000000..0fb7c97 --- /dev/null +++ b/yunq/rust/src/main.rs @@ -0,0 +1,30 @@ +mod lexer; +mod parser; + +use std::error::Error; +use std::fs::read_to_string; + +use clap::Parser; + +#[derive(Parser)] +#[command(about)] +struct Args { + // The .yunq file to parse + #[arg(short, long)] + input_path: String, +} + +fn main() -> Result<(), Box> { + let args = Args::parse(); + let input = read_to_string(args.input_path)?; + let tokens = lexer::lex_input(&input)?; + + let mut ast_parser = parser::Parser::new(&tokens); + ast_parser.parse_ast()?; + + for decl in ast_parser.ast() { + println!("{:?}", decl); + } + + Ok(()) +} diff --git a/yunq/rust/src/parser.rs b/yunq/rust/src/parser.rs new file mode 100644 index 0000000..d2ad3d2 --- /dev/null +++ b/yunq/rust/src/parser.rs @@ -0,0 +1,293 @@ +use std::fmt::Debug; +use std::fmt::Display; + +use crate::lexer::Token; +use crate::lexer::TokenType; + +pub enum Type { + U64, + I64, + Message(String), +} + +impl Display for Type { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{}", + match self { + Type::U64 => "u64", + Type::I64 => "i64", + Type::Message(s) => s, + } + ) + } +} + +impl TryFrom<&String> for Type { + type Error = String; + + fn try_from(value: &String) -> Result { + match value.as_str() { + "u64" => Ok(Type::U64), + "i64" => Ok(Type::I64), + _ => Ok(Type::Message(value.clone())), + } + } +} + +pub struct Field { + field_type: Type, + name: String, + number: u64, + repeated: bool, +} + +pub struct Message { + name: String, + fields: Vec, +} + +pub struct Method { + name: String, + number: u64, + request: Option, + response: Option, +} + +impl Debug for Method { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let request_str = match &self.request { + Some(s) => format!("({})", s), + None => "()".to_string(), + }; + let response_str = match &self.response { + Some(s) => format!("({})", s), + None => "()".to_string(), + }; + + write!( + f, + "{}: {}: {} -> {}", + self.number, self.name, request_str, response_str + ) + } +} + +pub struct Interface { + name: String, + methods: Vec, +} + +pub enum Decl { + Message(Message), + Interface(Interface), + Package(Vec), +} + +impl Debug for Decl { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Decl::Message(m) => { + writeln!(f, "Message {}", m.name)?; + for field in &m.fields { + let typestr = if field.repeated { + format!("repeated {}", field.field_type) + } else { + field.field_type.to_string() + }; + writeln!(f, "\t{}: {} ({})", field.number, field.name, typestr)?; + } + } + Decl::Interface(i) => { + writeln!(f, "Interface {}", i.name)?; + for method in &i.methods { + writeln!(f, "\t{:?}", method)?; + } + } + Decl::Package(p) => { + writeln!(f, "Package {};", p.join("."))?; + } + } + Ok(()) + } +} + +pub struct Parser<'a> { + tokens: &'a Vec, + current_index: usize, + ast: Vec, +} + +impl<'a> Parser<'a> { + pub fn new(tokens: &'a Vec) -> Self { + Self { + tokens, + current_index: 0, + ast: Vec::new(), + } + } + + fn next_type_is(&self, tok_type: TokenType) -> bool { + self.current_index < self.tokens.len() + && self.tokens[self.current_index].token_type == tok_type + } + + fn consume_token<'b>(&'b mut self) -> &'b Token { + if self.current_index >= self.tokens.len() { + panic!("Consumed tokens past end of input.") + } + + let t = &self.tokens[self.current_index]; + self.current_index += 1; + t + } + + fn consume_token_type<'b>(&'b mut self, t: TokenType) -> Result<&'b Token, String> { + let token = self.consume_token(); + if token.token_type == t { + Ok(token) + } else { + Err(format!("Expected {:?} but found {}", t, token)) + } + } + + fn consume_identifier<'b>(&'b mut self) -> Result<&'b Token, String> { + self.consume_token_type(TokenType::Name) + } + + fn consume_keyword<'b>(&'b mut self, keyword: &str) -> Result<&'b Token, String> { + let token = self.consume_token_type(TokenType::Name)?; + if token.chars != keyword { + Err(format!("Expected '{}' but got {}", keyword, token)) + } else { + Ok(token) + } + } + + fn package(&mut self) -> Result { + // 'package' was consumed by decl. + let mut package_names = vec![self.consume_identifier()?.chars.clone()]; + while self.next_type_is(TokenType::Dot) { + self.consume_token_type(TokenType::Dot)?; + package_names.push(self.consume_identifier()?.chars.clone()); + } + self.consume_token_type(TokenType::Semicolon)?; + Ok(Decl::Package(package_names)) + } + + fn field(&mut self, number: u64) -> Result { + let mut type_identifier = self.consume_identifier()?; + let mut repeated = false; + if type_identifier.chars == "repeated" { + repeated = true; + type_identifier = self.consume_identifier()?; + } + + let parsed_type = Type::try_from(&type_identifier.chars) + .map_err(|err| format!("{}: {}", err, type_identifier))?; + let name_identifier = self.consume_identifier()?.chars.clone(); + + self.consume_token_type(TokenType::Semicolon)?; + + Ok(Field { + field_type: parsed_type, + name: name_identifier, + number, + repeated, + }) + } + + fn message(&mut self) -> Result { + // 'message' was consumed by decl. + let name = self.consume_identifier()?.chars.clone(); + + self.consume_token_type(TokenType::LeftBrace)?; + + let mut fields = Vec::new(); + let mut next_field_number = 0; + while !self.next_type_is(TokenType::RightBrace) { + fields.push(self.field(next_field_number)?); + next_field_number += 1; + } + + self.consume_token_type(TokenType::RightBrace)?; + + Ok(Decl::Message(Message { name, fields })) + } + + fn method(&mut self, number: u64) -> Result { + self.consume_keyword("method")?; + + let name = self.consume_identifier()?.chars.clone(); + + self.consume_token_type(TokenType::LeftParen)?; + let request = if self.next_type_is(TokenType::Name) { + Some(self.consume_identifier()?.chars.clone()) + } else { + None + }; + self.consume_token_type(TokenType::RightParen)?; + + self.consume_token_type(TokenType::Arrow)?; + + self.consume_token_type(TokenType::LeftParen)?; + let response = if self.next_type_is(TokenType::Name) { + Some(self.consume_identifier()?.chars.clone()) + } else { + None + }; + self.consume_token_type(TokenType::RightParen)?; + + self.consume_token_type(TokenType::Semicolon)?; + + Ok(Method { + name, + request, + response, + number, + }) + } + + fn interface(&mut self) -> Result { + let name = self.consume_identifier()?.chars.clone(); + + self.consume_token_type(TokenType::LeftBrace)?; + + let mut methods = Vec::new(); + let mut next_method_number = 0; + while !self.next_type_is(TokenType::RightBrace) { + methods.push(self.method(next_method_number)?); + next_method_number += 1; + } + + self.consume_token_type(TokenType::RightBrace)?; + + Ok(Decl::Interface(Interface { name, methods })) + } + + fn decl(&mut self) -> Result { + let token = self.consume_identifier()?; + match token.chars.as_str() { + "package" => self.package(), + "message" => self.message(), + "interface" => self.interface(), + _ => Err(format!( + "Expected one of 'package', 'message', or 'interface' but got {}", + token + )), + } + } + + pub fn parse_ast(&mut self) -> Result<(), String> { + while !self.next_type_is(TokenType::EndOfFile) { + let decl = self.decl()?; + self.ast.push(decl); + } + Ok(()) + } + + pub fn ast(&'a mut self) -> &'a Vec { + &self.ast + } +}