Create a first pass at a rust parser for the yunq language.

This commit is contained in:
Drew Galbraith 2024-06-11 13:01:58 -07:00
parent 5b1debde54
commit 1cda053758
6 changed files with 793 additions and 0 deletions

1
.gitignore vendored
View File

@ -8,3 +8,4 @@ sysroot/usr
rust/target
yunq/venv
yunq/rust/target

237
yunq/rust/Cargo.lock generated Normal file
View File

@ -0,0 +1,237 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "anstream"
version = "0.6.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "418c75fa768af9c03be99d17643f93f79bbba589895012a80e3452a19ddda15b"
dependencies = [
"anstyle",
"anstyle-parse",
"anstyle-query",
"anstyle-wincon",
"colorchoice",
"is_terminal_polyfill",
"utf8parse",
]
[[package]]
name = "anstyle"
version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "038dfcf04a5feb68e9c60b21c9625a54c2c0616e79b72b0fd87075a056ae1d1b"
[[package]]
name = "anstyle-parse"
version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c03a11a9034d92058ceb6ee011ce58af4a9bf61491aa7e1e59ecd24bd40d22d4"
dependencies = [
"utf8parse",
]
[[package]]
name = "anstyle-query"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ad186efb764318d35165f1758e7dcef3b10628e26d41a44bc5550652e6804391"
dependencies = [
"windows-sys",
]
[[package]]
name = "anstyle-wincon"
version = "3.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "61a38449feb7068f52bb06c12759005cf459ee52bb4adc1d5a7c4322d716fb19"
dependencies = [
"anstyle",
"windows-sys",
]
[[package]]
name = "clap"
version = "4.5.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5db83dced34638ad474f39f250d7fea9598bdd239eaced1bdf45d597da0f433f"
dependencies = [
"clap_builder",
"clap_derive",
]
[[package]]
name = "clap_builder"
version = "4.5.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f7e204572485eb3fbf28f871612191521df159bc3e15a9f5064c66dba3a8c05f"
dependencies = [
"anstream",
"anstyle",
"clap_lex",
"strsim",
]
[[package]]
name = "clap_derive"
version = "4.5.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c780290ccf4fb26629baa7a1081e68ced113f1d3ec302fa5948f1c381ebf06c6"
dependencies = [
"heck",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "clap_lex"
version = "0.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4b82cf0babdbd58558212896d1a4272303a57bdb245c2bf1147185fb45640e70"
[[package]]
name = "colorchoice"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b6a852b24ab71dffc585bcb46eaf7959d175cb865a7152e35b348d1b2960422"
[[package]]
name = "heck"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
[[package]]
name = "is_terminal_polyfill"
version = "1.70.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f8478577c03552c21db0e2724ffb8986a5ce7af88107e6be5d2ee6e158c12800"
[[package]]
name = "proc-macro2"
version = "1.0.85"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "22244ce15aa966053a896d1accb3a6e68469b97c7f33f284b99f0d576879fc23"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.36"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7"
dependencies = [
"proc-macro2",
]
[[package]]
name = "strsim"
version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
[[package]]
name = "syn"
version = "2.0.66"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c42f3f41a2de00b01c0aaad383c5a45241efc8b2d1eda5661812fda5f3cdcff5"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "unicode-ident"
version = "1.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
[[package]]
name = "utf8parse"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
[[package]]
name = "windows-sys"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
dependencies = [
"windows-targets",
]
[[package]]
name = "windows-targets"
version = "0.52.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb"
dependencies = [
"windows_aarch64_gnullvm",
"windows_aarch64_msvc",
"windows_i686_gnu",
"windows_i686_gnullvm",
"windows_i686_msvc",
"windows_x86_64_gnu",
"windows_x86_64_gnullvm",
"windows_x86_64_msvc",
]
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.52.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263"
[[package]]
name = "windows_aarch64_msvc"
version = "0.52.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6"
[[package]]
name = "windows_i686_gnu"
version = "0.52.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670"
[[package]]
name = "windows_i686_gnullvm"
version = "0.52.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9"
[[package]]
name = "windows_i686_msvc"
version = "0.52.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf"
[[package]]
name = "windows_x86_64_gnu"
version = "0.52.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.52.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596"
[[package]]
name = "windows_x86_64_msvc"
version = "0.52.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0"
[[package]]
name = "yunq"
version = "0.1.0"
dependencies = [
"clap",
]

7
yunq/rust/Cargo.toml Normal file
View File

@ -0,0 +1,7 @@
[package]
name = "yunq"
version = "0.1.0"
edition = "2021"
[dependencies]
clap = { version = "4.5.7", features = ["derive"] }

225
yunq/rust/src/lexer.rs Normal file
View File

@ -0,0 +1,225 @@
#[derive(Debug, PartialEq)]
pub enum TokenType {
EndOfFile,
Name,
LeftBrace,
RightBrace,
LeftParen,
RightParen,
Arrow,
Semicolon,
Dot,
Equals,
}
#[derive(Debug)]
pub struct Token {
pub token_type: TokenType,
line: usize,
start: usize,
end: usize,
pub chars: String,
}
impl std::fmt::Display for Token {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"'{}' at line {} position {} ",
self.chars, self.line, self.start
)
}
}
pub fn lex_input(input: &str) -> Result<Vec<Token>, String> {
let mut index = 0;
let mut position = 0;
let mut line = 1;
let mut tokens = Vec::new();
let mut chars = input.chars().peekable();
loop {
match chars.next() {
Some(c) => {
position += 1;
match c {
'\n' => {
position = 0;
line += 1;
}
' ' | '\t' | '\r' => {}
'/' => {
match chars.next() {
Some('/') => {
index += 1;
// TODO: Add a consume until function.
loop {
match chars.next() {
Some('\n') => {
index += 1;
position = 0;
line += 1;
break;
}
Some(_) => {
index += 1;
}
None => break,
}
}
}
_ => {
return Err(format!("Unexpected token '/' at line {} position {}", line, position));
}
}
}
'{' => tokens.push(Token{
token_type: TokenType::LeftBrace,
line,
start: position,
end: position + 1,
chars: input[index..index+1].to_string(),
}),
'}' => tokens.push(Token{
token_type: TokenType::RightBrace,
line,
start: position,
end: position + 1,
chars: input[index..index+1].to_string(),
}),
'(' => tokens.push(Token{
token_type: TokenType::LeftParen,
line,
start: position,
end: position + 1,
chars: input[index..index+1].to_string(),
}),
')' => tokens.push(Token{
token_type: TokenType::RightParen,
line,
start: position,
end: position + 1,
chars: input[index..index+1].to_string(),
}),
';' => tokens.push(Token{
token_type: TokenType::Semicolon,
line,
start: position,
end: position + 1,
chars: input[index..index+1].to_string(),
}),
'.' => tokens.push(Token{
token_type: TokenType::Dot,
line,
start: position,
end: position + 1,
chars: input[index..index+1].to_string(),
}),
'=' => tokens.push(Token{
token_type: TokenType::Equals,
line,
start: position,
end: position + 1,
chars: input[index..index+1].to_string(),
}),
'-' => match chars.next() {
Some('>') => {
position += 1;
tokens.push(Token{
token_type: TokenType::Arrow,
line,
start: position - 1,
end: position + 1,
chars: input[index..index+1].to_string(),
});
index += 1;
}
Some(c) => return Err(format!("Expected > to follow - (to form arrow '->') on line {} at position {}. But got '{}'", line, position + 1, c)),
None => return Err(format!("Expected > to follow - but got end of input instead on line {} position {}", line, position))
},
'a'..='z' | 'A'..='Z' => {
let name_start = index;
let pos_start = position;
loop {
match chars.peek() {
Some(c) => {
match c {
'a'..='z' | 'A'..='Z' | '0'..='9' | '_' => {
position += 1;
chars.next();
index += 1;
},
'\n' => {
chars.next();
tokens.push(Token{
token_type: TokenType::Name,
line,
start: pos_start,
end: position + 1,
chars: input[name_start..index+1].to_string(),
});
position = 0;
index += 1;
line += 1;
break;
},
_ => {
tokens.push(Token{
token_type: TokenType::Name,
line,
start: pos_start,
end: position + 1,
chars: input[name_start..index+1].to_string()
});
break;
},
}
}
None => {
tokens.push(Token{
token_type: TokenType::Name,
line,
start: pos_start,
end: position + 1,
chars: input[name_start..index].to_string(),
});
break;
}
}
}
},
_ => {
return Err(format!(
"Unexpected token on line {} character {}: {}",
line, position, c
))
}
}
}
None => {
tokens.push(Token {
token_type: TokenType::EndOfFile,
line,
start: position,
end: position,
chars: "EOF".to_string(),
});
break;
}
}
index += 1;
}
Ok(tokens)
}

30
yunq/rust/src/main.rs Normal file
View File

@ -0,0 +1,30 @@
mod lexer;
mod parser;
use std::error::Error;
use std::fs::read_to_string;
use clap::Parser;
#[derive(Parser)]
#[command(about)]
struct Args {
// The .yunq file to parse
#[arg(short, long)]
input_path: String,
}
fn main() -> Result<(), Box<dyn Error>> {
let args = Args::parse();
let input = read_to_string(args.input_path)?;
let tokens = lexer::lex_input(&input)?;
let mut ast_parser = parser::Parser::new(&tokens);
ast_parser.parse_ast()?;
for decl in ast_parser.ast() {
println!("{:?}", decl);
}
Ok(())
}

293
yunq/rust/src/parser.rs Normal file
View File

@ -0,0 +1,293 @@
use std::fmt::Debug;
use std::fmt::Display;
use crate::lexer::Token;
use crate::lexer::TokenType;
pub enum Type {
U64,
I64,
Message(String),
}
impl Display for Type {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"{}",
match self {
Type::U64 => "u64",
Type::I64 => "i64",
Type::Message(s) => s,
}
)
}
}
impl TryFrom<&String> for Type {
type Error = String;
fn try_from(value: &String) -> Result<Self, Self::Error> {
match value.as_str() {
"u64" => Ok(Type::U64),
"i64" => Ok(Type::I64),
_ => Ok(Type::Message(value.clone())),
}
}
}
pub struct Field {
field_type: Type,
name: String,
number: u64,
repeated: bool,
}
pub struct Message {
name: String,
fields: Vec<Field>,
}
pub struct Method {
name: String,
number: u64,
request: Option<String>,
response: Option<String>,
}
impl Debug for Method {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let request_str = match &self.request {
Some(s) => format!("({})", s),
None => "()".to_string(),
};
let response_str = match &self.response {
Some(s) => format!("({})", s),
None => "()".to_string(),
};
write!(
f,
"{}: {}: {} -> {}",
self.number, self.name, request_str, response_str
)
}
}
pub struct Interface {
name: String,
methods: Vec<Method>,
}
pub enum Decl {
Message(Message),
Interface(Interface),
Package(Vec<String>),
}
impl Debug for Decl {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Decl::Message(m) => {
writeln!(f, "Message {}", m.name)?;
for field in &m.fields {
let typestr = if field.repeated {
format!("repeated {}", field.field_type)
} else {
field.field_type.to_string()
};
writeln!(f, "\t{}: {} ({})", field.number, field.name, typestr)?;
}
}
Decl::Interface(i) => {
writeln!(f, "Interface {}", i.name)?;
for method in &i.methods {
writeln!(f, "\t{:?}", method)?;
}
}
Decl::Package(p) => {
writeln!(f, "Package {};", p.join("."))?;
}
}
Ok(())
}
}
pub struct Parser<'a> {
tokens: &'a Vec<Token>,
current_index: usize,
ast: Vec<Decl>,
}
impl<'a> Parser<'a> {
pub fn new(tokens: &'a Vec<Token>) -> Self {
Self {
tokens,
current_index: 0,
ast: Vec::new(),
}
}
fn next_type_is(&self, tok_type: TokenType) -> bool {
self.current_index < self.tokens.len()
&& self.tokens[self.current_index].token_type == tok_type
}
fn consume_token<'b>(&'b mut self) -> &'b Token {
if self.current_index >= self.tokens.len() {
panic!("Consumed tokens past end of input.")
}
let t = &self.tokens[self.current_index];
self.current_index += 1;
t
}
fn consume_token_type<'b>(&'b mut self, t: TokenType) -> Result<&'b Token, String> {
let token = self.consume_token();
if token.token_type == t {
Ok(token)
} else {
Err(format!("Expected {:?} but found {}", t, token))
}
}
fn consume_identifier<'b>(&'b mut self) -> Result<&'b Token, String> {
self.consume_token_type(TokenType::Name)
}
fn consume_keyword<'b>(&'b mut self, keyword: &str) -> Result<&'b Token, String> {
let token = self.consume_token_type(TokenType::Name)?;
if token.chars != keyword {
Err(format!("Expected '{}' but got {}", keyword, token))
} else {
Ok(token)
}
}
fn package(&mut self) -> Result<Decl, String> {
// 'package' was consumed by decl.
let mut package_names = vec![self.consume_identifier()?.chars.clone()];
while self.next_type_is(TokenType::Dot) {
self.consume_token_type(TokenType::Dot)?;
package_names.push(self.consume_identifier()?.chars.clone());
}
self.consume_token_type(TokenType::Semicolon)?;
Ok(Decl::Package(package_names))
}
fn field(&mut self, number: u64) -> Result<Field, String> {
let mut type_identifier = self.consume_identifier()?;
let mut repeated = false;
if type_identifier.chars == "repeated" {
repeated = true;
type_identifier = self.consume_identifier()?;
}
let parsed_type = Type::try_from(&type_identifier.chars)
.map_err(|err| format!("{}: {}", err, type_identifier))?;
let name_identifier = self.consume_identifier()?.chars.clone();
self.consume_token_type(TokenType::Semicolon)?;
Ok(Field {
field_type: parsed_type,
name: name_identifier,
number,
repeated,
})
}
fn message(&mut self) -> Result<Decl, String> {
// 'message' was consumed by decl.
let name = self.consume_identifier()?.chars.clone();
self.consume_token_type(TokenType::LeftBrace)?;
let mut fields = Vec::new();
let mut next_field_number = 0;
while !self.next_type_is(TokenType::RightBrace) {
fields.push(self.field(next_field_number)?);
next_field_number += 1;
}
self.consume_token_type(TokenType::RightBrace)?;
Ok(Decl::Message(Message { name, fields }))
}
fn method(&mut self, number: u64) -> Result<Method, String> {
self.consume_keyword("method")?;
let name = self.consume_identifier()?.chars.clone();
self.consume_token_type(TokenType::LeftParen)?;
let request = if self.next_type_is(TokenType::Name) {
Some(self.consume_identifier()?.chars.clone())
} else {
None
};
self.consume_token_type(TokenType::RightParen)?;
self.consume_token_type(TokenType::Arrow)?;
self.consume_token_type(TokenType::LeftParen)?;
let response = if self.next_type_is(TokenType::Name) {
Some(self.consume_identifier()?.chars.clone())
} else {
None
};
self.consume_token_type(TokenType::RightParen)?;
self.consume_token_type(TokenType::Semicolon)?;
Ok(Method {
name,
request,
response,
number,
})
}
fn interface(&mut self) -> Result<Decl, String> {
let name = self.consume_identifier()?.chars.clone();
self.consume_token_type(TokenType::LeftBrace)?;
let mut methods = Vec::new();
let mut next_method_number = 0;
while !self.next_type_is(TokenType::RightBrace) {
methods.push(self.method(next_method_number)?);
next_method_number += 1;
}
self.consume_token_type(TokenType::RightBrace)?;
Ok(Decl::Interface(Interface { name, methods }))
}
fn decl(&mut self) -> Result<Decl, String> {
let token = self.consume_identifier()?;
match token.chars.as_str() {
"package" => self.package(),
"message" => self.message(),
"interface" => self.interface(),
_ => Err(format!(
"Expected one of 'package', 'message', or 'interface' but got {}",
token
)),
}
}
pub fn parse_ast(&mut self) -> Result<(), String> {
while !self.next_type_is(TokenType::EndOfFile) {
let decl = self.decl()?;
self.ast.push(decl);
}
Ok(())
}
pub fn ast(&'a mut self) -> &'a Vec<Decl> {
&self.ast
}
}