dev/pbs #8
@ -63,7 +63,7 @@ pub fn compile(project_dir: &Path) -> Result<CompilationUnit> {
|
||||
// 1. Select Frontend
|
||||
// The _frontend is responsible for parsing source code and producing the IR.
|
||||
let _frontend: Box<dyn Frontend> = match config.script_fe.as_str() {
|
||||
"pbs" => anyhow::bail!("Frontend 'pbs' not yet implemented"),
|
||||
"pbs" => Box::new(crate::frontends::pbs::PbsFrontend),
|
||||
_ => anyhow::bail!("Invalid frontend: {}", config.script_fe),
|
||||
};
|
||||
|
||||
@ -76,7 +76,13 @@ pub fn compile(project_dir: &Path) -> Result<CompilationUnit> {
|
||||
// This step abstracts away source-specific syntax (like TypeScript) into a
|
||||
// generic set of instructions that the backend can understand.
|
||||
let ir_module = _frontend.compile_to_ir(&entry, &mut file_manager)
|
||||
.map_err(|bundle| anyhow::anyhow!("Compilation failed with {} errors", bundle.diagnostics.len()))?;
|
||||
.map_err(|bundle| {
|
||||
if let Some(diag) = bundle.diagnostics.first() {
|
||||
anyhow::anyhow!("{}", diag.message)
|
||||
} else {
|
||||
anyhow::anyhow!("Compilation failed with {} errors", bundle.diagnostics.len())
|
||||
}
|
||||
})?;
|
||||
|
||||
// 3. IR Validation
|
||||
// Ensures the generated IR is sound and doesn't violate any VM constraints
|
||||
@ -119,21 +125,4 @@ mod tests {
|
||||
assert!(result.unwrap_err().to_string().contains("Invalid frontend: invalid"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_frontend_pbs_not_implemented() {
|
||||
let dir = tempdir().unwrap();
|
||||
let config_path = dir.path().join("prometeu.json");
|
||||
fs::write(
|
||||
config_path,
|
||||
r#"{
|
||||
"script_fe": "pbs",
|
||||
"entry": "main.pbs"
|
||||
}"#,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let result = compile(dir.path());
|
||||
assert!(result.is_err());
|
||||
assert!(result.unwrap_err().to_string().contains("Frontend 'pbs' not yet implemented"));
|
||||
}
|
||||
}
|
||||
|
||||
@ -4,6 +4,8 @@ use std::path::Path;
|
||||
|
||||
use crate::common::files::FileManager;
|
||||
|
||||
pub mod pbs;
|
||||
|
||||
pub trait Frontend {
|
||||
fn language(&self) -> &'static str;
|
||||
|
||||
|
||||
267
crates/prometeu-compiler/src/frontends/pbs/lexer.rs
Normal file
267
crates/prometeu-compiler/src/frontends/pbs/lexer.rs
Normal file
@ -0,0 +1,267 @@
|
||||
use crate::common::spans::Span;
|
||||
use super::token::{Token, TokenKind};
|
||||
use std::str::Chars;
|
||||
use std::iter::Peekable;
|
||||
|
||||
pub struct Lexer<'a> {
|
||||
chars: Peekable<Chars<'a>>,
|
||||
file_id: usize,
|
||||
pos: u32,
|
||||
}
|
||||
|
||||
impl<'a> Lexer<'a> {
|
||||
pub fn new(source: &'a str, file_id: usize) -> Self {
|
||||
Self {
|
||||
chars: source.chars().peekable(),
|
||||
file_id,
|
||||
pos: 0,
|
||||
}
|
||||
}
|
||||
|
||||
fn peek(&mut self) -> Option<char> {
|
||||
self.chars.peek().copied()
|
||||
}
|
||||
|
||||
fn next(&mut self) -> Option<char> {
|
||||
let c = self.chars.next();
|
||||
if let Some(c) = c {
|
||||
self.pos += c.len_utf8() as u32;
|
||||
}
|
||||
c
|
||||
}
|
||||
|
||||
fn skip_whitespace(&mut self) {
|
||||
while let Some(c) = self.peek() {
|
||||
if c.is_whitespace() {
|
||||
self.next();
|
||||
} else if c == '/' {
|
||||
if self.peek_next() == Some('/') {
|
||||
// Line comment
|
||||
self.next(); // /
|
||||
self.next(); // /
|
||||
while let Some(c) = self.peek() {
|
||||
if c == '\n' {
|
||||
break;
|
||||
}
|
||||
self.next();
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn peek_next(&self) -> Option<char> {
|
||||
let mut cloned = self.chars.clone();
|
||||
cloned.next();
|
||||
cloned.peek().copied()
|
||||
}
|
||||
|
||||
pub fn next_token(&mut self) -> Token {
|
||||
self.skip_whitespace();
|
||||
|
||||
let start = self.pos;
|
||||
let c = match self.next() {
|
||||
Some(c) => c,
|
||||
None => return Token::new(TokenKind::Eof, Span::new(self.file_id, start, start)),
|
||||
};
|
||||
|
||||
let kind = match c {
|
||||
'(' => TokenKind::OpenParen,
|
||||
')' => TokenKind::CloseParen,
|
||||
'{' => TokenKind::OpenBrace,
|
||||
'}' => TokenKind::CloseBrace,
|
||||
'[' => TokenKind::OpenBracket,
|
||||
']' => TokenKind::CloseBracket,
|
||||
',' => TokenKind::Comma,
|
||||
'.' => TokenKind::Dot,
|
||||
':' => TokenKind::Colon,
|
||||
';' => TokenKind::Semicolon,
|
||||
'=' => {
|
||||
if self.peek() == Some('=') {
|
||||
self.next();
|
||||
TokenKind::Eq
|
||||
} else {
|
||||
TokenKind::Assign
|
||||
}
|
||||
}
|
||||
'+' => TokenKind::Plus,
|
||||
'-' => {
|
||||
if self.peek() == Some('>') {
|
||||
self.next();
|
||||
TokenKind::Arrow
|
||||
} else {
|
||||
TokenKind::Minus
|
||||
}
|
||||
}
|
||||
'*' => TokenKind::Star,
|
||||
'/' => TokenKind::Slash,
|
||||
'%' => TokenKind::Percent,
|
||||
'!' => {
|
||||
if self.peek() == Some('=') {
|
||||
self.next();
|
||||
TokenKind::Neq
|
||||
} else {
|
||||
TokenKind::Not
|
||||
}
|
||||
}
|
||||
'<' => {
|
||||
if self.peek() == Some('=') {
|
||||
self.next();
|
||||
TokenKind::Lte
|
||||
} else {
|
||||
TokenKind::Lt
|
||||
}
|
||||
}
|
||||
'>' => {
|
||||
if self.peek() == Some('=') {
|
||||
self.next();
|
||||
TokenKind::Gte
|
||||
} else {
|
||||
TokenKind::Gt
|
||||
}
|
||||
}
|
||||
'&' => {
|
||||
if self.peek() == Some('&') {
|
||||
self.next();
|
||||
TokenKind::And
|
||||
} else {
|
||||
TokenKind::Invalid("&".to_string())
|
||||
}
|
||||
}
|
||||
'|' => {
|
||||
if self.peek() == Some('|') {
|
||||
self.next();
|
||||
TokenKind::Or
|
||||
} else {
|
||||
TokenKind::Invalid("|".to_string())
|
||||
}
|
||||
}
|
||||
'"' => self.lex_string(),
|
||||
'0'..='9' => self.lex_number(c),
|
||||
c if is_identifier_start(c) => self.lex_identifier(c),
|
||||
_ => TokenKind::Invalid(c.to_string()),
|
||||
};
|
||||
|
||||
Token::new(kind, Span::new(self.file_id, start, self.pos))
|
||||
}
|
||||
|
||||
fn lex_string(&mut self) -> TokenKind {
|
||||
let mut s = String::new();
|
||||
while let Some(c) = self.peek() {
|
||||
if c == '"' {
|
||||
self.next();
|
||||
return TokenKind::StringLit(s);
|
||||
}
|
||||
if c == '\n' {
|
||||
break; // Unterminated string
|
||||
}
|
||||
s.push(self.next().unwrap());
|
||||
}
|
||||
TokenKind::Invalid("Unterminated string".to_string())
|
||||
}
|
||||
|
||||
fn lex_number(&mut self, first: char) -> TokenKind {
|
||||
let mut s = String::new();
|
||||
s.push(first);
|
||||
let mut is_float = false;
|
||||
|
||||
while let Some(c) = self.peek() {
|
||||
if c.is_ascii_digit() {
|
||||
s.push(self.next().unwrap());
|
||||
} else if c == '.' && !is_float {
|
||||
if let Some(next_c) = self.peek_next() {
|
||||
if next_c.is_ascii_digit() {
|
||||
is_float = true;
|
||||
s.push(self.next().unwrap()); // .
|
||||
s.push(self.next().unwrap()); // next digit
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if self.peek() == Some('b') && !is_float {
|
||||
self.next(); // consume 'b'
|
||||
if let Ok(val) = s.parse::<u32>() {
|
||||
return TokenKind::BoundedLit(val);
|
||||
}
|
||||
}
|
||||
|
||||
if is_float {
|
||||
if let Ok(val) = s.parse::<f64>() {
|
||||
return TokenKind::FloatLit(val);
|
||||
}
|
||||
} else {
|
||||
if let Ok(val) = s.parse::<i64>() {
|
||||
return TokenKind::IntLit(val);
|
||||
}
|
||||
}
|
||||
|
||||
TokenKind::Invalid(s)
|
||||
}
|
||||
|
||||
fn lex_identifier(&mut self, first: char) -> TokenKind {
|
||||
let mut s = String::new();
|
||||
s.push(first);
|
||||
while let Some(c) = self.peek() {
|
||||
if is_identifier_part(c) {
|
||||
s.push(self.next().unwrap());
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
match s.as_str() {
|
||||
"import" => TokenKind::Import,
|
||||
"pub" => TokenKind::Pub,
|
||||
"mod" => TokenKind::Mod,
|
||||
"service" => TokenKind::Service,
|
||||
"fn" => TokenKind::Fn,
|
||||
"let" => TokenKind::Let,
|
||||
"mut" => TokenKind::Mut,
|
||||
"declare" => TokenKind::Declare,
|
||||
"struct" => TokenKind::Struct,
|
||||
"contract" => TokenKind::Contract,
|
||||
"host" => TokenKind::Host,
|
||||
"error" => TokenKind::Error,
|
||||
"optional" => TokenKind::Optional,
|
||||
"result" => TokenKind::Result,
|
||||
"some" => TokenKind::Some,
|
||||
"none" => TokenKind::None,
|
||||
"ok" => TokenKind::Ok,
|
||||
"err" => TokenKind::Err,
|
||||
"if" => TokenKind::If,
|
||||
"else" => TokenKind::Else,
|
||||
"when" => TokenKind::When,
|
||||
"for" => TokenKind::For,
|
||||
"in" => TokenKind::In,
|
||||
"return" => TokenKind::Return,
|
||||
"handle" => TokenKind::Handle,
|
||||
"borrow" => TokenKind::Borrow,
|
||||
"mutate" => TokenKind::Mutate,
|
||||
"peek" => TokenKind::Peek,
|
||||
"take" => TokenKind::Take,
|
||||
"alloc" => TokenKind::Alloc,
|
||||
"weak" => TokenKind::Weak,
|
||||
"as" => TokenKind::As,
|
||||
_ => TokenKind::Identifier(s),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn is_identifier_start(c: char) -> bool {
|
||||
c.is_alphabetic() || c == '_'
|
||||
}
|
||||
|
||||
fn is_identifier_part(c: char) -> bool {
|
||||
c.is_alphanumeric() || c == '_'
|
||||
}
|
||||
28
crates/prometeu-compiler/src/frontends/pbs/mod.rs
Normal file
28
crates/prometeu-compiler/src/frontends/pbs/mod.rs
Normal file
@ -0,0 +1,28 @@
|
||||
pub mod token;
|
||||
pub mod lexer;
|
||||
|
||||
pub use lexer::Lexer;
|
||||
pub use token::{Token, TokenKind};
|
||||
|
||||
use crate::common::diagnostics::DiagnosticBundle;
|
||||
use crate::common::files::FileManager;
|
||||
use crate::frontends::Frontend;
|
||||
use crate::ir;
|
||||
use std::path::Path;
|
||||
|
||||
pub struct PbsFrontend;
|
||||
|
||||
impl Frontend for PbsFrontend {
|
||||
fn language(&self) -> &'static str {
|
||||
"pbs"
|
||||
}
|
||||
|
||||
fn compile_to_ir(
|
||||
&self,
|
||||
_entry: &Path,
|
||||
_file_manager: &mut FileManager,
|
||||
) -> Result<ir::Module, DiagnosticBundle> {
|
||||
// Parsing and full compilation will be implemented in future PRs.
|
||||
Err(DiagnosticBundle::error("Frontend 'pbs' not yet implemented".to_string(), None))
|
||||
}
|
||||
}
|
||||
92
crates/prometeu-compiler/src/frontends/pbs/token.rs
Normal file
92
crates/prometeu-compiler/src/frontends/pbs/token.rs
Normal file
@ -0,0 +1,92 @@
|
||||
use crate::common::spans::Span;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub enum TokenKind {
|
||||
// Keywords
|
||||
Import,
|
||||
Pub,
|
||||
Mod,
|
||||
Service,
|
||||
Fn,
|
||||
Let,
|
||||
Mut,
|
||||
Declare,
|
||||
Struct,
|
||||
Contract,
|
||||
Host,
|
||||
Error,
|
||||
Optional,
|
||||
Result,
|
||||
Some,
|
||||
None,
|
||||
Ok,
|
||||
Err,
|
||||
If,
|
||||
Else,
|
||||
When,
|
||||
For,
|
||||
In,
|
||||
Return,
|
||||
Handle,
|
||||
Borrow,
|
||||
Mutate,
|
||||
Peek,
|
||||
Take,
|
||||
Alloc,
|
||||
Weak,
|
||||
As,
|
||||
|
||||
// Identifiers and Literals
|
||||
Identifier(String),
|
||||
IntLit(i64),
|
||||
FloatLit(f64),
|
||||
BoundedLit(u32),
|
||||
StringLit(String),
|
||||
|
||||
// Punctuation
|
||||
OpenParen, // (
|
||||
CloseParen, // )
|
||||
OpenBrace, // {
|
||||
CloseBrace, // }
|
||||
OpenBracket, // [
|
||||
CloseBracket, // ]
|
||||
Comma, // ,
|
||||
Dot, // .
|
||||
Colon, // :
|
||||
Semicolon, // ;
|
||||
Arrow, // ->
|
||||
|
||||
// Operators
|
||||
Assign, // =
|
||||
Plus, // +
|
||||
Minus, // -
|
||||
Star, // *
|
||||
Slash, // /
|
||||
Percent, // %
|
||||
Eq, // ==
|
||||
Neq, // !=
|
||||
Lt, // <
|
||||
Gt, // >
|
||||
Lte, // <=
|
||||
Gte, // >=
|
||||
And, // &&
|
||||
Or, // ||
|
||||
Not, // !
|
||||
|
||||
// Special
|
||||
Eof,
|
||||
Invalid(String),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct Token {
|
||||
pub kind: TokenKind,
|
||||
pub span: Span,
|
||||
}
|
||||
|
||||
impl Token {
|
||||
pub fn new(kind: TokenKind, span: Span) -> Self {
|
||||
Self { kind, span }
|
||||
}
|
||||
}
|
||||
156
crates/prometeu-compiler/tests/lexer_tests.rs
Normal file
156
crates/prometeu-compiler/tests/lexer_tests.rs
Normal file
@ -0,0 +1,156 @@
|
||||
use prometeu_compiler::frontends::pbs::lexer::Lexer;
|
||||
use prometeu_compiler::frontends::pbs::token::TokenKind;
|
||||
|
||||
#[test]
|
||||
fn test_lex_basic_tokens() {
|
||||
let source = "( ) { } [ ] , . : ; -> = == + - * / % ! != < > <= >= && ||";
|
||||
let mut lexer = Lexer::new(source, 0);
|
||||
|
||||
let expected = vec![
|
||||
TokenKind::OpenParen, TokenKind::CloseParen,
|
||||
TokenKind::OpenBrace, TokenKind::CloseBrace,
|
||||
TokenKind::OpenBracket, TokenKind::CloseBracket,
|
||||
TokenKind::Comma, TokenKind::Dot, TokenKind::Colon, TokenKind::Semicolon,
|
||||
TokenKind::Arrow, TokenKind::Assign, TokenKind::Eq,
|
||||
TokenKind::Plus, TokenKind::Minus, TokenKind::Star, TokenKind::Slash, TokenKind::Percent,
|
||||
TokenKind::Not, TokenKind::Neq,
|
||||
TokenKind::Lt, TokenKind::Gt, TokenKind::Lte, TokenKind::Gte,
|
||||
TokenKind::And, TokenKind::Or,
|
||||
TokenKind::Eof,
|
||||
];
|
||||
|
||||
for kind in expected {
|
||||
let token = lexer.next_token();
|
||||
assert_eq!(token.kind, kind);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_lex_keywords() {
|
||||
let source = "import pub mod service fn let mut declare struct contract host error optional result some none ok err if else when for in return handle borrow mutate peek take alloc weak as";
|
||||
let mut lexer = Lexer::new(source, 0);
|
||||
|
||||
let expected = vec![
|
||||
TokenKind::Import, TokenKind::Pub, TokenKind::Mod, TokenKind::Service,
|
||||
TokenKind::Fn, TokenKind::Let, TokenKind::Mut, TokenKind::Declare,
|
||||
TokenKind::Struct, TokenKind::Contract, TokenKind::Host, TokenKind::Error,
|
||||
TokenKind::Optional, TokenKind::Result, TokenKind::Some, TokenKind::None,
|
||||
TokenKind::Ok, TokenKind::Err, TokenKind::If, TokenKind::Else,
|
||||
TokenKind::When, TokenKind::For, TokenKind::In, TokenKind::Return,
|
||||
TokenKind::Handle, TokenKind::Borrow, TokenKind::Mutate, TokenKind::Peek,
|
||||
TokenKind::Take, TokenKind::Alloc, TokenKind::Weak, TokenKind::As,
|
||||
TokenKind::Eof,
|
||||
];
|
||||
|
||||
for kind in expected {
|
||||
let token = lexer.next_token();
|
||||
assert_eq!(token.kind, kind);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_lex_identifiers() {
|
||||
let source = "foo bar _baz qux123";
|
||||
let mut lexer = Lexer::new(source, 0);
|
||||
|
||||
let expected = vec![
|
||||
TokenKind::Identifier("foo".to_string()),
|
||||
TokenKind::Identifier("bar".to_string()),
|
||||
TokenKind::Identifier("_baz".to_string()),
|
||||
TokenKind::Identifier("qux123".to_string()),
|
||||
TokenKind::Eof,
|
||||
];
|
||||
|
||||
for kind in expected {
|
||||
let token = lexer.next_token();
|
||||
assert_eq!(token.kind, kind);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_lex_literals() {
|
||||
let source = "123 3.14 255b \"hello world\"";
|
||||
let mut lexer = Lexer::new(source, 0);
|
||||
|
||||
let expected = vec![
|
||||
TokenKind::IntLit(123),
|
||||
TokenKind::FloatLit(3.14),
|
||||
TokenKind::BoundedLit(255),
|
||||
TokenKind::StringLit("hello world".to_string()),
|
||||
TokenKind::Eof,
|
||||
];
|
||||
|
||||
for kind in expected {
|
||||
let token = lexer.next_token();
|
||||
assert_eq!(token.kind, kind);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_lex_comments() {
|
||||
let source = "let x = 10; // this is a comment\nlet y = 20;";
|
||||
let mut lexer = Lexer::new(source, 0);
|
||||
|
||||
let expected = vec![
|
||||
TokenKind::Let,
|
||||
TokenKind::Identifier("x".to_string()),
|
||||
TokenKind::Assign,
|
||||
TokenKind::IntLit(10),
|
||||
TokenKind::Semicolon,
|
||||
TokenKind::Let,
|
||||
TokenKind::Identifier("y".to_string()),
|
||||
TokenKind::Assign,
|
||||
TokenKind::IntLit(20),
|
||||
TokenKind::Semicolon,
|
||||
TokenKind::Eof,
|
||||
];
|
||||
|
||||
for kind in expected {
|
||||
let token = lexer.next_token();
|
||||
assert_eq!(token.kind, kind);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_lex_spans() {
|
||||
let source = "let x = 10;";
|
||||
let mut lexer = Lexer::new(source, 0);
|
||||
|
||||
let t1 = lexer.next_token(); // let
|
||||
assert_eq!(t1.span.start, 0);
|
||||
assert_eq!(t1.span.end, 3);
|
||||
|
||||
let t2 = lexer.next_token(); // x
|
||||
assert_eq!(t2.span.start, 4);
|
||||
assert_eq!(t2.span.end, 5);
|
||||
|
||||
let t3 = lexer.next_token(); // =
|
||||
assert_eq!(t3.span.start, 6);
|
||||
assert_eq!(t3.span.end, 7);
|
||||
|
||||
let t4 = lexer.next_token(); // 10
|
||||
assert_eq!(t4.span.start, 8);
|
||||
assert_eq!(t4.span.end, 10);
|
||||
|
||||
let t5 = lexer.next_token(); // ;
|
||||
assert_eq!(t5.span.start, 10);
|
||||
assert_eq!(t5.span.end, 11);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_lex_invalid_tokens() {
|
||||
let source = "@ #";
|
||||
let mut lexer = Lexer::new(source, 0);
|
||||
|
||||
assert!(matches!(lexer.next_token().kind, TokenKind::Invalid(_)));
|
||||
assert!(matches!(lexer.next_token().kind, TokenKind::Invalid(_)));
|
||||
assert_eq!(lexer.next_token().kind, TokenKind::Eof);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_lex_unterminated_string() {
|
||||
let source = "\"hello";
|
||||
let mut lexer = Lexer::new(source, 0);
|
||||
|
||||
assert!(matches!(lexer.next_token().kind, TokenKind::Invalid(_)));
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user