diff --git a/crates/prometeu-compiler/src/compiler.rs b/crates/prometeu-compiler/src/compiler.rs index 87feaf3c..badac1c6 100644 --- a/crates/prometeu-compiler/src/compiler.rs +++ b/crates/prometeu-compiler/src/compiler.rs @@ -63,7 +63,7 @@ pub fn compile(project_dir: &Path) -> Result { // 1. Select Frontend // The _frontend is responsible for parsing source code and producing the IR. let _frontend: Box = match config.script_fe.as_str() { - "pbs" => anyhow::bail!("Frontend 'pbs' not yet implemented"), + "pbs" => Box::new(crate::frontends::pbs::PbsFrontend), _ => anyhow::bail!("Invalid frontend: {}", config.script_fe), }; @@ -76,7 +76,13 @@ pub fn compile(project_dir: &Path) -> Result { // This step abstracts away source-specific syntax (like TypeScript) into a // generic set of instructions that the backend can understand. let ir_module = _frontend.compile_to_ir(&entry, &mut file_manager) - .map_err(|bundle| anyhow::anyhow!("Compilation failed with {} errors", bundle.diagnostics.len()))?; + .map_err(|bundle| { + if let Some(diag) = bundle.diagnostics.first() { + anyhow::anyhow!("{}", diag.message) + } else { + anyhow::anyhow!("Compilation failed with {} errors", bundle.diagnostics.len()) + } + })?; // 3. IR Validation // Ensures the generated IR is sound and doesn't violate any VM constraints @@ -119,21 +125,4 @@ mod tests { assert!(result.unwrap_err().to_string().contains("Invalid frontend: invalid")); } - #[test] - fn test_frontend_pbs_not_implemented() { - let dir = tempdir().unwrap(); - let config_path = dir.path().join("prometeu.json"); - fs::write( - config_path, - r#"{ - "script_fe": "pbs", - "entry": "main.pbs" - }"#, - ) - .unwrap(); - - let result = compile(dir.path()); - assert!(result.is_err()); - assert!(result.unwrap_err().to_string().contains("Frontend 'pbs' not yet implemented")); - } } diff --git a/crates/prometeu-compiler/src/frontends/mod.rs b/crates/prometeu-compiler/src/frontends/mod.rs index 154c1142..856dda5c 100644 --- a/crates/prometeu-compiler/src/frontends/mod.rs +++ b/crates/prometeu-compiler/src/frontends/mod.rs @@ -4,6 +4,8 @@ use std::path::Path; use crate::common::files::FileManager; +pub mod pbs; + pub trait Frontend { fn language(&self) -> &'static str; diff --git a/crates/prometeu-compiler/src/frontends/pbs/lexer.rs b/crates/prometeu-compiler/src/frontends/pbs/lexer.rs new file mode 100644 index 00000000..0a2ea519 --- /dev/null +++ b/crates/prometeu-compiler/src/frontends/pbs/lexer.rs @@ -0,0 +1,267 @@ +use crate::common::spans::Span; +use super::token::{Token, TokenKind}; +use std::str::Chars; +use std::iter::Peekable; + +pub struct Lexer<'a> { + chars: Peekable>, + file_id: usize, + pos: u32, +} + +impl<'a> Lexer<'a> { + pub fn new(source: &'a str, file_id: usize) -> Self { + Self { + chars: source.chars().peekable(), + file_id, + pos: 0, + } + } + + fn peek(&mut self) -> Option { + self.chars.peek().copied() + } + + fn next(&mut self) -> Option { + let c = self.chars.next(); + if let Some(c) = c { + self.pos += c.len_utf8() as u32; + } + c + } + + fn skip_whitespace(&mut self) { + while let Some(c) = self.peek() { + if c.is_whitespace() { + self.next(); + } else if c == '/' { + if self.peek_next() == Some('/') { + // Line comment + self.next(); // / + self.next(); // / + while let Some(c) = self.peek() { + if c == '\n' { + break; + } + self.next(); + } + } else { + break; + } + } else { + break; + } + } + } + + fn peek_next(&self) -> Option { + let mut cloned = self.chars.clone(); + cloned.next(); + cloned.peek().copied() + } + + pub fn next_token(&mut self) -> Token { + self.skip_whitespace(); + + let start = self.pos; + let c = match self.next() { + Some(c) => c, + None => return Token::new(TokenKind::Eof, Span::new(self.file_id, start, start)), + }; + + let kind = match c { + '(' => TokenKind::OpenParen, + ')' => TokenKind::CloseParen, + '{' => TokenKind::OpenBrace, + '}' => TokenKind::CloseBrace, + '[' => TokenKind::OpenBracket, + ']' => TokenKind::CloseBracket, + ',' => TokenKind::Comma, + '.' => TokenKind::Dot, + ':' => TokenKind::Colon, + ';' => TokenKind::Semicolon, + '=' => { + if self.peek() == Some('=') { + self.next(); + TokenKind::Eq + } else { + TokenKind::Assign + } + } + '+' => TokenKind::Plus, + '-' => { + if self.peek() == Some('>') { + self.next(); + TokenKind::Arrow + } else { + TokenKind::Minus + } + } + '*' => TokenKind::Star, + '/' => TokenKind::Slash, + '%' => TokenKind::Percent, + '!' => { + if self.peek() == Some('=') { + self.next(); + TokenKind::Neq + } else { + TokenKind::Not + } + } + '<' => { + if self.peek() == Some('=') { + self.next(); + TokenKind::Lte + } else { + TokenKind::Lt + } + } + '>' => { + if self.peek() == Some('=') { + self.next(); + TokenKind::Gte + } else { + TokenKind::Gt + } + } + '&' => { + if self.peek() == Some('&') { + self.next(); + TokenKind::And + } else { + TokenKind::Invalid("&".to_string()) + } + } + '|' => { + if self.peek() == Some('|') { + self.next(); + TokenKind::Or + } else { + TokenKind::Invalid("|".to_string()) + } + } + '"' => self.lex_string(), + '0'..='9' => self.lex_number(c), + c if is_identifier_start(c) => self.lex_identifier(c), + _ => TokenKind::Invalid(c.to_string()), + }; + + Token::new(kind, Span::new(self.file_id, start, self.pos)) + } + + fn lex_string(&mut self) -> TokenKind { + let mut s = String::new(); + while let Some(c) = self.peek() { + if c == '"' { + self.next(); + return TokenKind::StringLit(s); + } + if c == '\n' { + break; // Unterminated string + } + s.push(self.next().unwrap()); + } + TokenKind::Invalid("Unterminated string".to_string()) + } + + fn lex_number(&mut self, first: char) -> TokenKind { + let mut s = String::new(); + s.push(first); + let mut is_float = false; + + while let Some(c) = self.peek() { + if c.is_ascii_digit() { + s.push(self.next().unwrap()); + } else if c == '.' && !is_float { + if let Some(next_c) = self.peek_next() { + if next_c.is_ascii_digit() { + is_float = true; + s.push(self.next().unwrap()); // . + s.push(self.next().unwrap()); // next digit + } else { + break; + } + } else { + break; + } + } else { + break; + } + } + + if self.peek() == Some('b') && !is_float { + self.next(); // consume 'b' + if let Ok(val) = s.parse::() { + return TokenKind::BoundedLit(val); + } + } + + if is_float { + if let Ok(val) = s.parse::() { + return TokenKind::FloatLit(val); + } + } else { + if let Ok(val) = s.parse::() { + return TokenKind::IntLit(val); + } + } + + TokenKind::Invalid(s) + } + + fn lex_identifier(&mut self, first: char) -> TokenKind { + let mut s = String::new(); + s.push(first); + while let Some(c) = self.peek() { + if is_identifier_part(c) { + s.push(self.next().unwrap()); + } else { + break; + } + } + + match s.as_str() { + "import" => TokenKind::Import, + "pub" => TokenKind::Pub, + "mod" => TokenKind::Mod, + "service" => TokenKind::Service, + "fn" => TokenKind::Fn, + "let" => TokenKind::Let, + "mut" => TokenKind::Mut, + "declare" => TokenKind::Declare, + "struct" => TokenKind::Struct, + "contract" => TokenKind::Contract, + "host" => TokenKind::Host, + "error" => TokenKind::Error, + "optional" => TokenKind::Optional, + "result" => TokenKind::Result, + "some" => TokenKind::Some, + "none" => TokenKind::None, + "ok" => TokenKind::Ok, + "err" => TokenKind::Err, + "if" => TokenKind::If, + "else" => TokenKind::Else, + "when" => TokenKind::When, + "for" => TokenKind::For, + "in" => TokenKind::In, + "return" => TokenKind::Return, + "handle" => TokenKind::Handle, + "borrow" => TokenKind::Borrow, + "mutate" => TokenKind::Mutate, + "peek" => TokenKind::Peek, + "take" => TokenKind::Take, + "alloc" => TokenKind::Alloc, + "weak" => TokenKind::Weak, + "as" => TokenKind::As, + _ => TokenKind::Identifier(s), + } + } +} + +fn is_identifier_start(c: char) -> bool { + c.is_alphabetic() || c == '_' +} + +fn is_identifier_part(c: char) -> bool { + c.is_alphanumeric() || c == '_' +} diff --git a/crates/prometeu-compiler/src/frontends/pbs/mod.rs b/crates/prometeu-compiler/src/frontends/pbs/mod.rs new file mode 100644 index 00000000..dbb46688 --- /dev/null +++ b/crates/prometeu-compiler/src/frontends/pbs/mod.rs @@ -0,0 +1,28 @@ +pub mod token; +pub mod lexer; + +pub use lexer::Lexer; +pub use token::{Token, TokenKind}; + +use crate::common::diagnostics::DiagnosticBundle; +use crate::common::files::FileManager; +use crate::frontends::Frontend; +use crate::ir; +use std::path::Path; + +pub struct PbsFrontend; + +impl Frontend for PbsFrontend { + fn language(&self) -> &'static str { + "pbs" + } + + fn compile_to_ir( + &self, + _entry: &Path, + _file_manager: &mut FileManager, + ) -> Result { + // Parsing and full compilation will be implemented in future PRs. + Err(DiagnosticBundle::error("Frontend 'pbs' not yet implemented".to_string(), None)) + } +} diff --git a/crates/prometeu-compiler/src/frontends/pbs/token.rs b/crates/prometeu-compiler/src/frontends/pbs/token.rs new file mode 100644 index 00000000..2cb9042c --- /dev/null +++ b/crates/prometeu-compiler/src/frontends/pbs/token.rs @@ -0,0 +1,92 @@ +use crate::common::spans::Span; +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub enum TokenKind { + // Keywords + Import, + Pub, + Mod, + Service, + Fn, + Let, + Mut, + Declare, + Struct, + Contract, + Host, + Error, + Optional, + Result, + Some, + None, + Ok, + Err, + If, + Else, + When, + For, + In, + Return, + Handle, + Borrow, + Mutate, + Peek, + Take, + Alloc, + Weak, + As, + + // Identifiers and Literals + Identifier(String), + IntLit(i64), + FloatLit(f64), + BoundedLit(u32), + StringLit(String), + + // Punctuation + OpenParen, // ( + CloseParen, // ) + OpenBrace, // { + CloseBrace, // } + OpenBracket, // [ + CloseBracket, // ] + Comma, // , + Dot, // . + Colon, // : + Semicolon, // ; + Arrow, // -> + + // Operators + Assign, // = + Plus, // + + Minus, // - + Star, // * + Slash, // / + Percent, // % + Eq, // == + Neq, // != + Lt, // < + Gt, // > + Lte, // <= + Gte, // >= + And, // && + Or, // || + Not, // ! + + // Special + Eof, + Invalid(String), +} + +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct Token { + pub kind: TokenKind, + pub span: Span, +} + +impl Token { + pub fn new(kind: TokenKind, span: Span) -> Self { + Self { kind, span } + } +} diff --git a/crates/prometeu-compiler/tests/lexer_tests.rs b/crates/prometeu-compiler/tests/lexer_tests.rs new file mode 100644 index 00000000..c1b0f9c9 --- /dev/null +++ b/crates/prometeu-compiler/tests/lexer_tests.rs @@ -0,0 +1,156 @@ +use prometeu_compiler::frontends::pbs::lexer::Lexer; +use prometeu_compiler::frontends::pbs::token::TokenKind; + +#[test] +fn test_lex_basic_tokens() { + let source = "( ) { } [ ] , . : ; -> = == + - * / % ! != < > <= >= && ||"; + let mut lexer = Lexer::new(source, 0); + + let expected = vec![ + TokenKind::OpenParen, TokenKind::CloseParen, + TokenKind::OpenBrace, TokenKind::CloseBrace, + TokenKind::OpenBracket, TokenKind::CloseBracket, + TokenKind::Comma, TokenKind::Dot, TokenKind::Colon, TokenKind::Semicolon, + TokenKind::Arrow, TokenKind::Assign, TokenKind::Eq, + TokenKind::Plus, TokenKind::Minus, TokenKind::Star, TokenKind::Slash, TokenKind::Percent, + TokenKind::Not, TokenKind::Neq, + TokenKind::Lt, TokenKind::Gt, TokenKind::Lte, TokenKind::Gte, + TokenKind::And, TokenKind::Or, + TokenKind::Eof, + ]; + + for kind in expected { + let token = lexer.next_token(); + assert_eq!(token.kind, kind); + } +} + +#[test] +fn test_lex_keywords() { + let source = "import pub mod service fn let mut declare struct contract host error optional result some none ok err if else when for in return handle borrow mutate peek take alloc weak as"; + let mut lexer = Lexer::new(source, 0); + + let expected = vec![ + TokenKind::Import, TokenKind::Pub, TokenKind::Mod, TokenKind::Service, + TokenKind::Fn, TokenKind::Let, TokenKind::Mut, TokenKind::Declare, + TokenKind::Struct, TokenKind::Contract, TokenKind::Host, TokenKind::Error, + TokenKind::Optional, TokenKind::Result, TokenKind::Some, TokenKind::None, + TokenKind::Ok, TokenKind::Err, TokenKind::If, TokenKind::Else, + TokenKind::When, TokenKind::For, TokenKind::In, TokenKind::Return, + TokenKind::Handle, TokenKind::Borrow, TokenKind::Mutate, TokenKind::Peek, + TokenKind::Take, TokenKind::Alloc, TokenKind::Weak, TokenKind::As, + TokenKind::Eof, + ]; + + for kind in expected { + let token = lexer.next_token(); + assert_eq!(token.kind, kind); + } +} + +#[test] +fn test_lex_identifiers() { + let source = "foo bar _baz qux123"; + let mut lexer = Lexer::new(source, 0); + + let expected = vec![ + TokenKind::Identifier("foo".to_string()), + TokenKind::Identifier("bar".to_string()), + TokenKind::Identifier("_baz".to_string()), + TokenKind::Identifier("qux123".to_string()), + TokenKind::Eof, + ]; + + for kind in expected { + let token = lexer.next_token(); + assert_eq!(token.kind, kind); + } +} + +#[test] +fn test_lex_literals() { + let source = "123 3.14 255b \"hello world\""; + let mut lexer = Lexer::new(source, 0); + + let expected = vec![ + TokenKind::IntLit(123), + TokenKind::FloatLit(3.14), + TokenKind::BoundedLit(255), + TokenKind::StringLit("hello world".to_string()), + TokenKind::Eof, + ]; + + for kind in expected { + let token = lexer.next_token(); + assert_eq!(token.kind, kind); + } +} + +#[test] +fn test_lex_comments() { + let source = "let x = 10; // this is a comment\nlet y = 20;"; + let mut lexer = Lexer::new(source, 0); + + let expected = vec![ + TokenKind::Let, + TokenKind::Identifier("x".to_string()), + TokenKind::Assign, + TokenKind::IntLit(10), + TokenKind::Semicolon, + TokenKind::Let, + TokenKind::Identifier("y".to_string()), + TokenKind::Assign, + TokenKind::IntLit(20), + TokenKind::Semicolon, + TokenKind::Eof, + ]; + + for kind in expected { + let token = lexer.next_token(); + assert_eq!(token.kind, kind); + } +} + +#[test] +fn test_lex_spans() { + let source = "let x = 10;"; + let mut lexer = Lexer::new(source, 0); + + let t1 = lexer.next_token(); // let + assert_eq!(t1.span.start, 0); + assert_eq!(t1.span.end, 3); + + let t2 = lexer.next_token(); // x + assert_eq!(t2.span.start, 4); + assert_eq!(t2.span.end, 5); + + let t3 = lexer.next_token(); // = + assert_eq!(t3.span.start, 6); + assert_eq!(t3.span.end, 7); + + let t4 = lexer.next_token(); // 10 + assert_eq!(t4.span.start, 8); + assert_eq!(t4.span.end, 10); + + let t5 = lexer.next_token(); // ; + assert_eq!(t5.span.start, 10); + assert_eq!(t5.span.end, 11); +} + +#[test] +fn test_lex_invalid_tokens() { + let source = "@ #"; + let mut lexer = Lexer::new(source, 0); + + assert!(matches!(lexer.next_token().kind, TokenKind::Invalid(_))); + assert!(matches!(lexer.next_token().kind, TokenKind::Invalid(_))); + assert_eq!(lexer.next_token().kind, TokenKind::Eof); +} + +#[test] +fn test_lex_unterminated_string() { + let source = "\"hello"; + let mut lexer = Lexer::new(source, 0); + + assert!(matches!(lexer.next_token().kind, TokenKind::Invalid(_))); +}