From 14da21fe9f7e81a755afff8f4b139329f0720f3e Mon Sep 17 00:00:00 2001 From: bQUARKz Date: Thu, 26 Feb 2026 19:28:14 +0000 Subject: [PATCH] added first steps on pbs --- .../PBS - Language Syntax Specification v0.md | 6 +- .../p/studio/compiler/pbs/ast/PbsAst.java | 2 - .../p/studio/compiler/pbs/lexer/PbsLexer.java | 56 ++++++++++++++----- .../compiler/pbs/lexer/PbsTokenKind.java | 50 +++++++++++++++-- .../studio/compiler/pbs/parser/PbsParser.java | 4 -- 5 files changed, 90 insertions(+), 28 deletions(-) diff --git a/docs/specs/pbs/PBS - Language Syntax Specification v0.md b/docs/specs/pbs/PBS - Language Syntax Specification v0.md index d5dc6fab..a18c0bfc 100644 --- a/docs/specs/pbs/PBS - Language Syntax Specification v0.md +++ b/docs/specs/pbs/PBS - Language Syntax Specification v0.md @@ -73,7 +73,7 @@ Active keywords in `.pbs` files (v0 Core): - `import`, `from`, `as` - `service`, `fn` - `declare`, `struct`, `contract`, `error` -- `let`, `mut` +- `let` - `if`, `else`, `when`, `for`, `in`, `return` - `true`, `false` @@ -221,7 +221,7 @@ ServiceMember ::= 'fn' Identifier ParamList ReturnType? Block ```ebnf FunctionDecl ::= 'fn' Identifier ParamList ReturnType? ElseFallback? Block ParamList ::= '(' Param (',' Param)* ')' -Param ::= 'mut'? Identifier ':' TypeRef +Param ::= Identifier ':' TypeRef ReturnType ::= ':' TypeRef ElseFallback ::= 'else' Expr ``` @@ -248,7 +248,7 @@ Block ::= '{' Stmt* TailExpr? '}' Stmt ::= LetStmt | ReturnStmt | IfStmt | ForStmt | ExprStmt TailExpr ::= Expr -LetStmt ::= 'let' 'mut'? Identifier (':' TypeRef)? '=' Expr ';' +LetStmt ::= 'let' Identifier (':' TypeRef)? '=' Expr ';' ReturnStmt ::= 'return' Expr? ';' ExprStmt ::= Expr ';' diff --git a/prometeu-compiler/frontends/prometeu-frontend-pbs/src/main/java/p/studio/compiler/pbs/ast/PbsAst.java b/prometeu-compiler/frontends/prometeu-frontend-pbs/src/main/java/p/studio/compiler/pbs/ast/PbsAst.java index b07579a7..d8f13806 100644 --- a/prometeu-compiler/frontends/prometeu-frontend-pbs/src/main/java/p/studio/compiler/pbs/ast/PbsAst.java +++ b/prometeu-compiler/frontends/prometeu-frontend-pbs/src/main/java/p/studio/compiler/pbs/ast/PbsAst.java @@ -23,7 +23,6 @@ public final class PbsAst { public record Parameter( String name, - boolean mutable, TypeRef typeRef, Span span) { } @@ -44,7 +43,6 @@ public final class PbsAst { public record LetStatement( String name, - boolean mutable, TypeRef explicitType, Expression initializer, Span span) implements Statement { diff --git a/prometeu-compiler/frontends/prometeu-frontend-pbs/src/main/java/p/studio/compiler/pbs/lexer/PbsLexer.java b/prometeu-compiler/frontends/prometeu-frontend-pbs/src/main/java/p/studio/compiler/pbs/lexer/PbsLexer.java index b8f2fa5f..60f44a1c 100644 --- a/prometeu-compiler/frontends/prometeu-frontend-pbs/src/main/java/p/studio/compiler/pbs/lexer/PbsLexer.java +++ b/prometeu-compiler/frontends/prometeu-frontend-pbs/src/main/java/p/studio/compiler/pbs/lexer/PbsLexer.java @@ -10,6 +10,14 @@ import java.util.Map; public final class PbsLexer { private static final Map KEYWORDS = buildKeywords(); + private enum LexerState { + DEFAULT, + IDENTIFIER, + NUMBER, + STRING, + LINE_COMMENT + } + private final String source; private final String sourceLabel; private final BuildingIssueSink issues; @@ -17,6 +25,7 @@ public final class PbsLexer { private int start; private int current; + private LexerState state = LexerState.DEFAULT; private PbsLexer(final String source, final String sourceLabel, final BuildingIssueSink issues) { this.source = source == null ? "" : source; @@ -33,15 +42,24 @@ public final class PbsLexer { } private ReadOnlyList lexInternal() { - while (!isAtEnd()) { - start = current; - scanToken(); + while (!isAtEnd() || state != LexerState.DEFAULT) { + switch (state) { + case DEFAULT -> scanDefaultState(); + case IDENTIFIER -> scanIdentifierState(); + case NUMBER -> scanNumberState(); + case STRING -> scanStringState(); + case LINE_COMMENT -> scanLineCommentState(); + } } tokens.add(new PbsToken(PbsTokenKind.EOF, "", current, current)); return ReadOnlyList.wrap(tokens); } - private void scanToken() { + private void scanDefaultState() { + if (isAtEnd()) { + return; + } + start = current; final char c = advance(); switch (c) { case ' ', '\r', '\t', '\n' -> { @@ -88,22 +106,19 @@ public final class PbsLexer { } case '/' -> { if (match('/')) { - // Line comment - while (!isAtEnd() && peek() != '\n') { - advance(); - } + state = LexerState.LINE_COMMENT; return; } addToken(PbsTokenKind.SLASH); } - case '"' -> string(); + case '"' -> state = LexerState.STRING; default -> { if (isDigit(c)) { - number(); + state = LexerState.NUMBER; return; } if (isIdentifierStart(c)) { - identifier(); + state = LexerState.IDENTIFIER; return; } report("E_LEX_INVALID_CHAR", "Invalid character: '%s'".formatted(c)); @@ -111,7 +126,7 @@ public final class PbsLexer { } } - private void identifier() { + private void scanIdentifierState() { while (!isAtEnd() && isIdentifierPart(peek())) { advance(); } @@ -119,9 +134,10 @@ public final class PbsLexer { final String text = source.substring(start, current); final PbsTokenKind kind = KEYWORDS.getOrDefault(text, PbsTokenKind.IDENTIFIER); addToken(kind); + state = LexerState.DEFAULT; } - private void number() { + private void scanNumberState() { while (!isAtEnd() && isDigit(peek())) { advance(); } @@ -138,13 +154,15 @@ public final class PbsLexer { if (!isFloat && !isAtEnd() && peek() == 'b') { advance(); addToken(PbsTokenKind.BOUNDED_LITERAL); + state = LexerState.DEFAULT; return; } addToken(isFloat ? PbsTokenKind.FLOAT_LITERAL : PbsTokenKind.INT_LITERAL); + state = LexerState.DEFAULT; } - private void string() { + private void scanStringState() { while (!isAtEnd() && peek() != '"') { if (peek() == '\\' && !isAtEnd()) { advance(); @@ -158,12 +176,21 @@ public final class PbsLexer { if (isAtEnd()) { report("E_LEX_UNTERMINATED_STRING", "Unterminated string literal"); + state = LexerState.DEFAULT; return; } // Closing quote. advance(); addToken(PbsTokenKind.STRING_LITERAL); + state = LexerState.DEFAULT; + } + + private void scanLineCommentState() { + while (!isAtEnd() && peek() != '\n') { + advance(); + } + state = LexerState.DEFAULT; } private void addToken(final PbsTokenKind kind) { @@ -224,7 +251,6 @@ public final class PbsLexer { map.put("service", PbsTokenKind.SERVICE); map.put("fn", PbsTokenKind.FN); map.put("let", PbsTokenKind.LET); - map.put("mut", PbsTokenKind.MUT); map.put("declare", PbsTokenKind.DECLARE); map.put("struct", PbsTokenKind.STRUCT); map.put("contract", PbsTokenKind.CONTRACT); diff --git a/prometeu-compiler/frontends/prometeu-frontend-pbs/src/main/java/p/studio/compiler/pbs/lexer/PbsTokenKind.java b/prometeu-compiler/frontends/prometeu-frontend-pbs/src/main/java/p/studio/compiler/pbs/lexer/PbsTokenKind.java index 63f41159..09c94203 100644 --- a/prometeu-compiler/frontends/prometeu-frontend-pbs/src/main/java/p/studio/compiler/pbs/lexer/PbsTokenKind.java +++ b/prometeu-compiler/frontends/prometeu-frontend-pbs/src/main/java/p/studio/compiler/pbs/lexer/PbsTokenKind.java @@ -1,37 +1,70 @@ package p.studio.compiler.pbs.lexer; +/** + * Token kinds produced by the PBS lexer. + */ public enum PbsTokenKind { + // End of file marker. EOF, + + // User-defined names. + // Example: `sum`, `Vector`, `input_state` IDENTIFIER, + + // Literal values. + // Example: `42`, `3.14`, `255b`, `"hello"` INT_LITERAL, FLOAT_LITERAL, BOUNDED_LITERAL, STRING_LITERAL, - // Keywords + // Declaration and import keywords. + // Example: + // `import { Vec2 as V2 } from @core:math;` IMPORT, FROM, AS, + + // Barrel visibility keywords (reserved in source parser for now). + // Example in barrel: + // `pub fn sum;` PUB, MOD, + + // Top-level declaration keywords. + // Example: + // `service Audio { fn play(): int { return 1; } }` + // `declare struct Point(x: int, y: int);` SERVICE, FN, - LET, - MUT, DECLARE, + + // Statement and type declaration keywords. + // Example: + // `let x: int = 1;` + LET, STRUCT, CONTRACT, ERROR, + + // Control-flow keywords. + // Example: + // `if cond { return 1; } else { return 0; }` IF, ELSE, WHEN, FOR, IN, RETURN, + + // Boolean literals. + // Example: `true && false` TRUE, FALSE, - // Punctuation / operators + // Delimiters and separators. + // Example: + // `fn f(a: int) { return a; }` LEFT_PAREN, RIGHT_PAREN, LEFT_BRACE, @@ -44,12 +77,18 @@ public enum PbsTokenKind { AT, DOT_DOT, + // Arithmetic and unary operators. + // Example: `-a + b * 2` PLUS, MINUS, STAR, SLASH, PERCENT, BANG, + + // Comparison and assignment operators. + // Example: + // `a == b`, `x <= y`, `value = 10` BANG_EQUAL, EQUAL, EQUAL_EQUAL, @@ -57,6 +96,9 @@ public enum PbsTokenKind { LESS_EQUAL, GREATER, GREATER_EQUAL, + + // Logical operators. + // Example: `a && b || c` AND_AND, OR_OR } diff --git a/prometeu-compiler/frontends/prometeu-frontend-pbs/src/main/java/p/studio/compiler/pbs/parser/PbsParser.java b/prometeu-compiler/frontends/prometeu-frontend-pbs/src/main/java/p/studio/compiler/pbs/parser/PbsParser.java index 8efc19c7..86ded9a7 100644 --- a/prometeu-compiler/frontends/prometeu-frontend-pbs/src/main/java/p/studio/compiler/pbs/parser/PbsParser.java +++ b/prometeu-compiler/frontends/prometeu-frontend-pbs/src/main/java/p/studio/compiler/pbs/parser/PbsParser.java @@ -115,13 +115,11 @@ public final class PbsParser { if (!check(PbsTokenKind.RIGHT_PAREN)) { do { final var pStart = peek(); - final boolean mutable = match(PbsTokenKind.MUT); final var pName = consume(PbsTokenKind.IDENTIFIER, "E_PARSE_EXPECTED_TOKEN", "Expected parameter name"); consume(PbsTokenKind.COLON, "E_PARSE_EXPECTED_TOKEN", "Expected ':' after parameter name"); final var typeRef = parseTypeRef(); parameters.add(new PbsAst.Parameter( pName.lexeme(), - mutable, typeRef, span(pStart.start(), typeRef.span().getEnd()))); } while (match(PbsTokenKind.COMMA)); @@ -174,7 +172,6 @@ public final class PbsParser { } private PbsAst.Statement parseLetStatement(final PbsToken letToken) { - final boolean mutable = match(PbsTokenKind.MUT); final var name = consume(PbsTokenKind.IDENTIFIER, "E_PARSE_EXPECTED_TOKEN", "Expected variable name"); PbsAst.TypeRef explicitType = null; @@ -188,7 +185,6 @@ public final class PbsParser { return new PbsAst.LetStatement( name.lexeme(), - mutable, explicitType, initializer, span(letToken.start(), semicolon.end()));