dev/pbs #8

Merged
bquarkz merged 74 commits from dev/pbs into master 2026-02-03 15:28:31 +00:00
6 changed files with 553 additions and 19 deletions
Showing only changes of commit 3509eada8b - Show all commits

View File

@ -63,7 +63,7 @@ pub fn compile(project_dir: &Path) -> Result<CompilationUnit> {
// 1. Select Frontend
// The _frontend is responsible for parsing source code and producing the IR.
let _frontend: Box<dyn Frontend> = match config.script_fe.as_str() {
"pbs" => anyhow::bail!("Frontend 'pbs' not yet implemented"),
"pbs" => Box::new(crate::frontends::pbs::PbsFrontend),
_ => anyhow::bail!("Invalid frontend: {}", config.script_fe),
};
@ -76,7 +76,13 @@ pub fn compile(project_dir: &Path) -> Result<CompilationUnit> {
// This step abstracts away source-specific syntax (like TypeScript) into a
// generic set of instructions that the backend can understand.
let ir_module = _frontend.compile_to_ir(&entry, &mut file_manager)
.map_err(|bundle| anyhow::anyhow!("Compilation failed with {} errors", bundle.diagnostics.len()))?;
.map_err(|bundle| {
if let Some(diag) = bundle.diagnostics.first() {
anyhow::anyhow!("{}", diag.message)
} else {
anyhow::anyhow!("Compilation failed with {} errors", bundle.diagnostics.len())
}
})?;
// 3. IR Validation
// Ensures the generated IR is sound and doesn't violate any VM constraints
@ -119,21 +125,4 @@ mod tests {
assert!(result.unwrap_err().to_string().contains("Invalid frontend: invalid"));
}
#[test]
fn test_frontend_pbs_not_implemented() {
let dir = tempdir().unwrap();
let config_path = dir.path().join("prometeu.json");
fs::write(
config_path,
r#"{
"script_fe": "pbs",
"entry": "main.pbs"
}"#,
)
.unwrap();
let result = compile(dir.path());
assert!(result.is_err());
assert!(result.unwrap_err().to_string().contains("Frontend 'pbs' not yet implemented"));
}
}

View File

@ -4,6 +4,8 @@ use std::path::Path;
use crate::common::files::FileManager;
pub mod pbs;
pub trait Frontend {
fn language(&self) -> &'static str;

View File

@ -0,0 +1,267 @@
use crate::common::spans::Span;
use super::token::{Token, TokenKind};
use std::str::Chars;
use std::iter::Peekable;
pub struct Lexer<'a> {
chars: Peekable<Chars<'a>>,
file_id: usize,
pos: u32,
}
impl<'a> Lexer<'a> {
pub fn new(source: &'a str, file_id: usize) -> Self {
Self {
chars: source.chars().peekable(),
file_id,
pos: 0,
}
}
fn peek(&mut self) -> Option<char> {
self.chars.peek().copied()
}
fn next(&mut self) -> Option<char> {
let c = self.chars.next();
if let Some(c) = c {
self.pos += c.len_utf8() as u32;
}
c
}
fn skip_whitespace(&mut self) {
while let Some(c) = self.peek() {
if c.is_whitespace() {
self.next();
} else if c == '/' {
if self.peek_next() == Some('/') {
// Line comment
self.next(); // /
self.next(); // /
while let Some(c) = self.peek() {
if c == '\n' {
break;
}
self.next();
}
} else {
break;
}
} else {
break;
}
}
}
fn peek_next(&self) -> Option<char> {
let mut cloned = self.chars.clone();
cloned.next();
cloned.peek().copied()
}
pub fn next_token(&mut self) -> Token {
self.skip_whitespace();
let start = self.pos;
let c = match self.next() {
Some(c) => c,
None => return Token::new(TokenKind::Eof, Span::new(self.file_id, start, start)),
};
let kind = match c {
'(' => TokenKind::OpenParen,
')' => TokenKind::CloseParen,
'{' => TokenKind::OpenBrace,
'}' => TokenKind::CloseBrace,
'[' => TokenKind::OpenBracket,
']' => TokenKind::CloseBracket,
',' => TokenKind::Comma,
'.' => TokenKind::Dot,
':' => TokenKind::Colon,
';' => TokenKind::Semicolon,
'=' => {
if self.peek() == Some('=') {
self.next();
TokenKind::Eq
} else {
TokenKind::Assign
}
}
'+' => TokenKind::Plus,
'-' => {
if self.peek() == Some('>') {
self.next();
TokenKind::Arrow
} else {
TokenKind::Minus
}
}
'*' => TokenKind::Star,
'/' => TokenKind::Slash,
'%' => TokenKind::Percent,
'!' => {
if self.peek() == Some('=') {
self.next();
TokenKind::Neq
} else {
TokenKind::Not
}
}
'<' => {
if self.peek() == Some('=') {
self.next();
TokenKind::Lte
} else {
TokenKind::Lt
}
}
'>' => {
if self.peek() == Some('=') {
self.next();
TokenKind::Gte
} else {
TokenKind::Gt
}
}
'&' => {
if self.peek() == Some('&') {
self.next();
TokenKind::And
} else {
TokenKind::Invalid("&".to_string())
}
}
'|' => {
if self.peek() == Some('|') {
self.next();
TokenKind::Or
} else {
TokenKind::Invalid("|".to_string())
}
}
'"' => self.lex_string(),
'0'..='9' => self.lex_number(c),
c if is_identifier_start(c) => self.lex_identifier(c),
_ => TokenKind::Invalid(c.to_string()),
};
Token::new(kind, Span::new(self.file_id, start, self.pos))
}
fn lex_string(&mut self) -> TokenKind {
let mut s = String::new();
while let Some(c) = self.peek() {
if c == '"' {
self.next();
return TokenKind::StringLit(s);
}
if c == '\n' {
break; // Unterminated string
}
s.push(self.next().unwrap());
}
TokenKind::Invalid("Unterminated string".to_string())
}
fn lex_number(&mut self, first: char) -> TokenKind {
let mut s = String::new();
s.push(first);
let mut is_float = false;
while let Some(c) = self.peek() {
if c.is_ascii_digit() {
s.push(self.next().unwrap());
} else if c == '.' && !is_float {
if let Some(next_c) = self.peek_next() {
if next_c.is_ascii_digit() {
is_float = true;
s.push(self.next().unwrap()); // .
s.push(self.next().unwrap()); // next digit
} else {
break;
}
} else {
break;
}
} else {
break;
}
}
if self.peek() == Some('b') && !is_float {
self.next(); // consume 'b'
if let Ok(val) = s.parse::<u32>() {
return TokenKind::BoundedLit(val);
}
}
if is_float {
if let Ok(val) = s.parse::<f64>() {
return TokenKind::FloatLit(val);
}
} else {
if let Ok(val) = s.parse::<i64>() {
return TokenKind::IntLit(val);
}
}
TokenKind::Invalid(s)
}
fn lex_identifier(&mut self, first: char) -> TokenKind {
let mut s = String::new();
s.push(first);
while let Some(c) = self.peek() {
if is_identifier_part(c) {
s.push(self.next().unwrap());
} else {
break;
}
}
match s.as_str() {
"import" => TokenKind::Import,
"pub" => TokenKind::Pub,
"mod" => TokenKind::Mod,
"service" => TokenKind::Service,
"fn" => TokenKind::Fn,
"let" => TokenKind::Let,
"mut" => TokenKind::Mut,
"declare" => TokenKind::Declare,
"struct" => TokenKind::Struct,
"contract" => TokenKind::Contract,
"host" => TokenKind::Host,
"error" => TokenKind::Error,
"optional" => TokenKind::Optional,
"result" => TokenKind::Result,
"some" => TokenKind::Some,
"none" => TokenKind::None,
"ok" => TokenKind::Ok,
"err" => TokenKind::Err,
"if" => TokenKind::If,
"else" => TokenKind::Else,
"when" => TokenKind::When,
"for" => TokenKind::For,
"in" => TokenKind::In,
"return" => TokenKind::Return,
"handle" => TokenKind::Handle,
"borrow" => TokenKind::Borrow,
"mutate" => TokenKind::Mutate,
"peek" => TokenKind::Peek,
"take" => TokenKind::Take,
"alloc" => TokenKind::Alloc,
"weak" => TokenKind::Weak,
"as" => TokenKind::As,
_ => TokenKind::Identifier(s),
}
}
}
fn is_identifier_start(c: char) -> bool {
c.is_alphabetic() || c == '_'
}
fn is_identifier_part(c: char) -> bool {
c.is_alphanumeric() || c == '_'
}

View File

@ -0,0 +1,28 @@
pub mod token;
pub mod lexer;
pub use lexer::Lexer;
pub use token::{Token, TokenKind};
use crate::common::diagnostics::DiagnosticBundle;
use crate::common::files::FileManager;
use crate::frontends::Frontend;
use crate::ir;
use std::path::Path;
pub struct PbsFrontend;
impl Frontend for PbsFrontend {
fn language(&self) -> &'static str {
"pbs"
}
fn compile_to_ir(
&self,
_entry: &Path,
_file_manager: &mut FileManager,
) -> Result<ir::Module, DiagnosticBundle> {
// Parsing and full compilation will be implemented in future PRs.
Err(DiagnosticBundle::error("Frontend 'pbs' not yet implemented".to_string(), None))
}
}

View File

@ -0,0 +1,92 @@
use crate::common::spans::Span;
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub enum TokenKind {
// Keywords
Import,
Pub,
Mod,
Service,
Fn,
Let,
Mut,
Declare,
Struct,
Contract,
Host,
Error,
Optional,
Result,
Some,
None,
Ok,
Err,
If,
Else,
When,
For,
In,
Return,
Handle,
Borrow,
Mutate,
Peek,
Take,
Alloc,
Weak,
As,
// Identifiers and Literals
Identifier(String),
IntLit(i64),
FloatLit(f64),
BoundedLit(u32),
StringLit(String),
// Punctuation
OpenParen, // (
CloseParen, // )
OpenBrace, // {
CloseBrace, // }
OpenBracket, // [
CloseBracket, // ]
Comma, // ,
Dot, // .
Colon, // :
Semicolon, // ;
Arrow, // ->
// Operators
Assign, // =
Plus, // +
Minus, // -
Star, // *
Slash, // /
Percent, // %
Eq, // ==
Neq, // !=
Lt, // <
Gt, // >
Lte, // <=
Gte, // >=
And, // &&
Or, // ||
Not, // !
// Special
Eof,
Invalid(String),
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct Token {
pub kind: TokenKind,
pub span: Span,
}
impl Token {
pub fn new(kind: TokenKind, span: Span) -> Self {
Self { kind, span }
}
}

View File

@ -0,0 +1,156 @@
use prometeu_compiler::frontends::pbs::lexer::Lexer;
use prometeu_compiler::frontends::pbs::token::TokenKind;
#[test]
fn test_lex_basic_tokens() {
let source = "( ) { } [ ] , . : ; -> = == + - * / % ! != < > <= >= && ||";
let mut lexer = Lexer::new(source, 0);
let expected = vec![
TokenKind::OpenParen, TokenKind::CloseParen,
TokenKind::OpenBrace, TokenKind::CloseBrace,
TokenKind::OpenBracket, TokenKind::CloseBracket,
TokenKind::Comma, TokenKind::Dot, TokenKind::Colon, TokenKind::Semicolon,
TokenKind::Arrow, TokenKind::Assign, TokenKind::Eq,
TokenKind::Plus, TokenKind::Minus, TokenKind::Star, TokenKind::Slash, TokenKind::Percent,
TokenKind::Not, TokenKind::Neq,
TokenKind::Lt, TokenKind::Gt, TokenKind::Lte, TokenKind::Gte,
TokenKind::And, TokenKind::Or,
TokenKind::Eof,
];
for kind in expected {
let token = lexer.next_token();
assert_eq!(token.kind, kind);
}
}
#[test]
fn test_lex_keywords() {
let source = "import pub mod service fn let mut declare struct contract host error optional result some none ok err if else when for in return handle borrow mutate peek take alloc weak as";
let mut lexer = Lexer::new(source, 0);
let expected = vec![
TokenKind::Import, TokenKind::Pub, TokenKind::Mod, TokenKind::Service,
TokenKind::Fn, TokenKind::Let, TokenKind::Mut, TokenKind::Declare,
TokenKind::Struct, TokenKind::Contract, TokenKind::Host, TokenKind::Error,
TokenKind::Optional, TokenKind::Result, TokenKind::Some, TokenKind::None,
TokenKind::Ok, TokenKind::Err, TokenKind::If, TokenKind::Else,
TokenKind::When, TokenKind::For, TokenKind::In, TokenKind::Return,
TokenKind::Handle, TokenKind::Borrow, TokenKind::Mutate, TokenKind::Peek,
TokenKind::Take, TokenKind::Alloc, TokenKind::Weak, TokenKind::As,
TokenKind::Eof,
];
for kind in expected {
let token = lexer.next_token();
assert_eq!(token.kind, kind);
}
}
#[test]
fn test_lex_identifiers() {
let source = "foo bar _baz qux123";
let mut lexer = Lexer::new(source, 0);
let expected = vec![
TokenKind::Identifier("foo".to_string()),
TokenKind::Identifier("bar".to_string()),
TokenKind::Identifier("_baz".to_string()),
TokenKind::Identifier("qux123".to_string()),
TokenKind::Eof,
];
for kind in expected {
let token = lexer.next_token();
assert_eq!(token.kind, kind);
}
}
#[test]
fn test_lex_literals() {
let source = "123 3.14 255b \"hello world\"";
let mut lexer = Lexer::new(source, 0);
let expected = vec![
TokenKind::IntLit(123),
TokenKind::FloatLit(3.14),
TokenKind::BoundedLit(255),
TokenKind::StringLit("hello world".to_string()),
TokenKind::Eof,
];
for kind in expected {
let token = lexer.next_token();
assert_eq!(token.kind, kind);
}
}
#[test]
fn test_lex_comments() {
let source = "let x = 10; // this is a comment\nlet y = 20;";
let mut lexer = Lexer::new(source, 0);
let expected = vec![
TokenKind::Let,
TokenKind::Identifier("x".to_string()),
TokenKind::Assign,
TokenKind::IntLit(10),
TokenKind::Semicolon,
TokenKind::Let,
TokenKind::Identifier("y".to_string()),
TokenKind::Assign,
TokenKind::IntLit(20),
TokenKind::Semicolon,
TokenKind::Eof,
];
for kind in expected {
let token = lexer.next_token();
assert_eq!(token.kind, kind);
}
}
#[test]
fn test_lex_spans() {
let source = "let x = 10;";
let mut lexer = Lexer::new(source, 0);
let t1 = lexer.next_token(); // let
assert_eq!(t1.span.start, 0);
assert_eq!(t1.span.end, 3);
let t2 = lexer.next_token(); // x
assert_eq!(t2.span.start, 4);
assert_eq!(t2.span.end, 5);
let t3 = lexer.next_token(); // =
assert_eq!(t3.span.start, 6);
assert_eq!(t3.span.end, 7);
let t4 = lexer.next_token(); // 10
assert_eq!(t4.span.start, 8);
assert_eq!(t4.span.end, 10);
let t5 = lexer.next_token(); // ;
assert_eq!(t5.span.start, 10);
assert_eq!(t5.span.end, 11);
}
#[test]
fn test_lex_invalid_tokens() {
let source = "@ #";
let mut lexer = Lexer::new(source, 0);
assert!(matches!(lexer.next_token().kind, TokenKind::Invalid(_)));
assert!(matches!(lexer.next_token().kind, TokenKind::Invalid(_)));
assert_eq!(lexer.next_token().kind, TokenKind::Eof);
}
#[test]
fn test_lex_unterminated_string() {
let source = "\"hello";
let mut lexer = Lexer::new(source, 0);
assert!(matches!(lexer.next_token().kind, TokenKind::Invalid(_)));
}