From 833c6ec049e61610d7f7ab55d51b0bf4d8a35ceb Mon Sep 17 00:00:00 2001 From: bQUARKz Date: Fri, 27 Feb 2026 04:46:07 +0000 Subject: [PATCH] add lex and parser errors + comments --- .../compiler/pbs/PbsFrontendCompiler.java | 21 +- .../studio/compiler/pbs/lexer/LexErrors.java | 6 + .../p/studio/compiler/pbs/lexer/PbsLexer.java | 36 +-- .../compiler/pbs/parser/ParseErrors.java | 8 + .../studio/compiler/pbs/parser/PbsParser.java | 266 ++++++++++++++---- .../services/PBSFrontendPhaseService.java | 31 +- .../compiler/pbs/PbsFrontendCompilerTest.java | 14 +- .../compiler/pbs/lexer/PbsLexerTest.java | 9 +- .../compiler/pbs/parser/PbsParserTest.java | 11 +- 9 files changed, 297 insertions(+), 105 deletions(-) create mode 100644 prometeu-compiler/frontends/prometeu-frontend-pbs/src/main/java/p/studio/compiler/pbs/lexer/LexErrors.java create mode 100644 prometeu-compiler/frontends/prometeu-frontend-pbs/src/main/java/p/studio/compiler/pbs/parser/ParseErrors.java diff --git a/prometeu-compiler/frontends/prometeu-frontend-pbs/src/main/java/p/studio/compiler/pbs/PbsFrontendCompiler.java b/prometeu-compiler/frontends/prometeu-frontend-pbs/src/main/java/p/studio/compiler/pbs/PbsFrontendCompiler.java index a90b36d2..fb09390a 100644 --- a/prometeu-compiler/frontends/prometeu-frontend-pbs/src/main/java/p/studio/compiler/pbs/PbsFrontendCompiler.java +++ b/prometeu-compiler/frontends/prometeu-frontend-pbs/src/main/java/p/studio/compiler/pbs/PbsFrontendCompiler.java @@ -1,10 +1,10 @@ package p.studio.compiler.pbs; -import p.studio.compiler.messages.BuildingIssueSink; import p.studio.compiler.models.IRFunction; import p.studio.compiler.pbs.ast.PbsAst; import p.studio.compiler.pbs.lexer.PbsLexer; import p.studio.compiler.pbs.parser.PbsParser; +import p.studio.compiler.source.diagnostics.DiagnosticSink; import p.studio.compiler.source.identifiers.FileId; import p.studio.utilities.structures.ReadOnlyList; @@ -16,27 +16,24 @@ public final class PbsFrontendCompiler { public ReadOnlyList compileFile( final FileId fileId, final String source, - final String sourceLabel, - final BuildingIssueSink issues) { - final var tokens = PbsLexer.lex(source, sourceLabel, issues); - final var ast = PbsParser.parse(tokens, fileId, sourceLabel, issues); - validateFunctionNames(ast, sourceLabel, issues); + final DiagnosticSink diagnostics) { + final var tokens = PbsLexer.lex(source, fileId, diagnostics); + final var ast = PbsParser.parse(tokens, fileId, diagnostics); + validateFunctionNames(ast, diagnostics); return lowerFunctions(fileId, ast); } private void validateFunctionNames( final PbsAst.File ast, - final String sourceLabel, - final BuildingIssueSink issues) { + final DiagnosticSink diagnostics) { final Set names = new HashSet<>(); for (final var fn : ast.functions()) { if (names.add(fn.name())) { continue; } - issues.report(builder -> builder - .error(true) - .message("[PBS:E_RESOLVE_DUPLICATE_SYMBOL] Duplicate function '%s' at %s:[%d,%d)" - .formatted(fn.name(), sourceLabel, fn.span().getStart(), fn.span().getEnd()))); + diagnostics.error("E_RESOLVE_DUPLICATE_SYMBOL", + "Duplicate function '%s'".formatted(fn.name()), + fn.span()); } } diff --git a/prometeu-compiler/frontends/prometeu-frontend-pbs/src/main/java/p/studio/compiler/pbs/lexer/LexErrors.java b/prometeu-compiler/frontends/prometeu-frontend-pbs/src/main/java/p/studio/compiler/pbs/lexer/LexErrors.java new file mode 100644 index 00000000..b620ad62 --- /dev/null +++ b/prometeu-compiler/frontends/prometeu-frontend-pbs/src/main/java/p/studio/compiler/pbs/lexer/LexErrors.java @@ -0,0 +1,6 @@ +package p.studio.compiler.pbs.lexer; + +public enum LexErrors { + E_LEX_INVALID_CHAR, + E_LEX_UNTERMINATED_STRING, +} diff --git a/prometeu-compiler/frontends/prometeu-frontend-pbs/src/main/java/p/studio/compiler/pbs/lexer/PbsLexer.java b/prometeu-compiler/frontends/prometeu-frontend-pbs/src/main/java/p/studio/compiler/pbs/lexer/PbsLexer.java index 60f44a1c..d63520d3 100644 --- a/prometeu-compiler/frontends/prometeu-frontend-pbs/src/main/java/p/studio/compiler/pbs/lexer/PbsLexer.java +++ b/prometeu-compiler/frontends/prometeu-frontend-pbs/src/main/java/p/studio/compiler/pbs/lexer/PbsLexer.java @@ -1,6 +1,8 @@ package p.studio.compiler.pbs.lexer; -import p.studio.compiler.messages.BuildingIssueSink; +import p.studio.compiler.source.Span; +import p.studio.compiler.source.diagnostics.DiagnosticSink; +import p.studio.compiler.source.identifiers.FileId; import p.studio.utilities.structures.ReadOnlyList; import java.util.ArrayList; @@ -19,25 +21,25 @@ public final class PbsLexer { } private final String source; - private final String sourceLabel; - private final BuildingIssueSink issues; + private final FileId fileId; + private final DiagnosticSink diagnostics; private final ArrayList tokens = new ArrayList<>(); private int start; private int current; private LexerState state = LexerState.DEFAULT; - private PbsLexer(final String source, final String sourceLabel, final BuildingIssueSink issues) { + private PbsLexer(final String source, final FileId fileId, final DiagnosticSink diagnostics) { this.source = source == null ? "" : source; - this.sourceLabel = sourceLabel == null ? "" : sourceLabel; - this.issues = issues; + this.fileId = fileId; + this.diagnostics = diagnostics; } public static ReadOnlyList lex( final String source, - final String sourceLabel, - final BuildingIssueSink issues) { - final var lexer = new PbsLexer(source, sourceLabel, issues); + final FileId fileId, + final DiagnosticSink diagnostics) { + final var lexer = new PbsLexer(source, fileId, diagnostics); return lexer.lexInternal(); } @@ -84,7 +86,7 @@ public final class PbsLexer { addToken(PbsTokenKind.DOT_DOT); return; } - report("E_LEX_INVALID_CHAR", "Unexpected '.'"); + report(LexErrors.E_LEX_INVALID_CHAR, "Unexpected '.'"); } case '!' -> addToken(match('=') ? PbsTokenKind.BANG_EQUAL : PbsTokenKind.BANG); case '=' -> addToken(match('=') ? PbsTokenKind.EQUAL_EQUAL : PbsTokenKind.EQUAL); @@ -95,14 +97,14 @@ public final class PbsLexer { addToken(PbsTokenKind.AND_AND); return; } - report("E_LEX_INVALID_CHAR", "Unexpected '&'"); + report(LexErrors.E_LEX_INVALID_CHAR, "Unexpected '&'"); } case '|' -> { if (match('|')) { addToken(PbsTokenKind.OR_OR); return; } - report("E_LEX_INVALID_CHAR", "Unexpected '|'"); + report(LexErrors.E_LEX_INVALID_CHAR, "Unexpected '|'"); } case '/' -> { if (match('/')) { @@ -121,7 +123,7 @@ public final class PbsLexer { state = LexerState.IDENTIFIER; return; } - report("E_LEX_INVALID_CHAR", "Invalid character: '%s'".formatted(c)); + report(LexErrors.E_LEX_INVALID_CHAR, "Invalid character: '%s'".formatted(c)); } } } @@ -175,7 +177,7 @@ public final class PbsLexer { } if (isAtEnd()) { - report("E_LEX_UNTERMINATED_STRING", "Unterminated string literal"); + report(LexErrors.E_LEX_UNTERMINATED_STRING, "Unterminated string literal"); state = LexerState.DEFAULT; return; } @@ -235,10 +237,8 @@ public final class PbsLexer { return c == '_' || Character.isAlphabetic(c) || Character.isDigit(c); } - private void report(final String code, final String message) { - issues.report(builder -> builder - .error(true) - .message("[PBS:%s] %s at %s:[%d,%d)".formatted(code, message, sourceLabel, start, current))); + private void report(final LexErrors lexErrors, final String message) { + diagnostics.error(lexErrors.name(), message, new Span(fileId, start, current)); } private static Map buildKeywords() { diff --git a/prometeu-compiler/frontends/prometeu-frontend-pbs/src/main/java/p/studio/compiler/pbs/parser/ParseErrors.java b/prometeu-compiler/frontends/prometeu-frontend-pbs/src/main/java/p/studio/compiler/pbs/parser/ParseErrors.java new file mode 100644 index 00000000..e9fd9c3f --- /dev/null +++ b/prometeu-compiler/frontends/prometeu-frontend-pbs/src/main/java/p/studio/compiler/pbs/parser/ParseErrors.java @@ -0,0 +1,8 @@ +package p.studio.compiler.pbs.parser; + +public enum ParseErrors { + E_PARSE_EXPECTED_TOKEN, + E_PARSE_UNEXPECTED_TOKEN, + E_PARSE_VISIBILITY_IN_SOURCE, + E_PARSE_NON_ASSOC, +} diff --git a/prometeu-compiler/frontends/prometeu-frontend-pbs/src/main/java/p/studio/compiler/pbs/parser/PbsParser.java b/prometeu-compiler/frontends/prometeu-frontend-pbs/src/main/java/p/studio/compiler/pbs/parser/PbsParser.java index 86ded9a7..b4badd2f 100644 --- a/prometeu-compiler/frontends/prometeu-frontend-pbs/src/main/java/p/studio/compiler/pbs/parser/PbsParser.java +++ b/prometeu-compiler/frontends/prometeu-frontend-pbs/src/main/java/p/studio/compiler/pbs/parser/PbsParser.java @@ -1,10 +1,10 @@ package p.studio.compiler.pbs.parser; -import p.studio.compiler.messages.BuildingIssueSink; import p.studio.compiler.pbs.ast.PbsAst; import p.studio.compiler.pbs.lexer.PbsToken; import p.studio.compiler.pbs.lexer.PbsTokenKind; import p.studio.compiler.source.Span; +import p.studio.compiler.source.diagnostics.DiagnosticSink; import p.studio.compiler.source.identifiers.FileId; import p.studio.utilities.structures.ReadOnlyList; @@ -13,32 +13,43 @@ import java.util.ArrayList; public final class PbsParser { private final ArrayList tokens; private final FileId fileId; - private final String sourceLabel; - private final BuildingIssueSink issues; + private final DiagnosticSink diagnostics; private int current; private PbsParser( final ReadOnlyList tokens, final FileId fileId, - final String sourceLabel, - final BuildingIssueSink issues) { + final DiagnosticSink diagnostics) { this.tokens = new ArrayList<>(); for (final var token : tokens) { this.tokens.add(token); } this.fileId = fileId; - this.sourceLabel = sourceLabel == null ? "" : sourceLabel; - this.issues = issues; + this.diagnostics = diagnostics; } + /** + * Parses a token stream into a PBS file AST. + * + *

Example: + *

{@code
+     * fn sum(a: int, b: int): int {
+     *   return a + b;
+     * }
+     * }
+ */ public static PbsAst.File parse( final ReadOnlyList tokens, final FileId fileId, - final String sourceLabel, - final BuildingIssueSink issues) { - return new PbsParser(tokens, fileId, sourceLabel, issues).parseFile(); + final DiagnosticSink diagnostics) { + return new PbsParser(tokens, fileId, diagnostics).parseFile(); } + /** + * Parses a full file as a sequence of imports and top-level declarations. + * + *

The current slice only stores top-level functions in the AST. + */ private PbsAst.File parseFile() { final var functions = new ArrayList(); @@ -54,7 +65,7 @@ public final class PbsParser { } if (match(PbsTokenKind.MOD, PbsTokenKind.PUB)) { - report(previous(), "E_PARSE_VISIBILITY_IN_SOURCE", + report(previous(), ParseErrors.E_PARSE_VISIBILITY_IN_SOURCE, "Visibility modifiers are barrel-only and cannot appear in .pbs declarations"); synchronizeTopLevel(); continue; @@ -64,7 +75,7 @@ public final class PbsParser { break; } - report(peek(), "E_PARSE_UNEXPECTED_TOKEN", "Expected top-level declaration ('fn') or import"); + report(peek(), ParseErrors.E_PARSE_UNEXPECTED_TOKEN, "Expected top-level declaration ('fn') or import"); synchronizeTopLevel(); } @@ -72,6 +83,15 @@ public final class PbsParser { return new PbsAst.File(ReadOnlyList.wrap(functions), span(0, eof.end())); } + /** + * Parses import syntax for validation and recovery, but does not store imports yet. + * + *

Supported forms: + *

{@code
+     * import @core:math;
+     * import { Vector, Matrix as Mat } from @core:math;
+     * }
+ */ private void parseAndDiscardImport() { // Supports both forms: // import @core:math; @@ -80,43 +100,56 @@ public final class PbsParser { while (!check(PbsTokenKind.RIGHT_BRACE) && !isAtEnd()) { if (match(PbsTokenKind.IDENTIFIER)) { if (match(PbsTokenKind.AS)) { - consume(PbsTokenKind.IDENTIFIER, "E_PARSE_EXPECTED_TOKEN", + consume(PbsTokenKind.IDENTIFIER, "Expected alias identifier after 'as'"); } match(PbsTokenKind.COMMA); continue; } - report(peek(), "E_PARSE_UNEXPECTED_TOKEN", "Invalid import item"); + report(peek(), ParseErrors.E_PARSE_UNEXPECTED_TOKEN, "Invalid import item"); advance(); } - consume(PbsTokenKind.RIGHT_BRACE, "E_PARSE_EXPECTED_TOKEN", "Expected '}' in import list"); - consume(PbsTokenKind.FROM, "E_PARSE_EXPECTED_TOKEN", "Expected 'from' in named import"); + consume(PbsTokenKind.RIGHT_BRACE, "Expected '}' in import list"); + consume(PbsTokenKind.FROM, "Expected 'from' in named import"); } parseModuleRef(); - consume(PbsTokenKind.SEMICOLON, "E_PARSE_EXPECTED_TOKEN", "Expected ';' after import"); + consume(PbsTokenKind.SEMICOLON, "Expected ';' after import"); } + /** + * Parses a module reference such as {@code @core:math/tools}. + */ private void parseModuleRef() { - consume(PbsTokenKind.AT, "E_PARSE_EXPECTED_TOKEN", "Expected '@' in module reference"); - consume(PbsTokenKind.IDENTIFIER, "E_PARSE_EXPECTED_TOKEN", "Expected project identifier in module reference"); - consume(PbsTokenKind.COLON, "E_PARSE_EXPECTED_TOKEN", "Expected ':' in module reference"); - consume(PbsTokenKind.IDENTIFIER, "E_PARSE_EXPECTED_TOKEN", "Expected module identifier"); + consume(PbsTokenKind.AT, "Expected '@' in module reference"); + consume(PbsTokenKind.IDENTIFIER, "Expected project identifier in module reference"); + consume(PbsTokenKind.COLON, "Expected ':' in module reference"); + consume(PbsTokenKind.IDENTIFIER, "Expected module identifier"); while (match(PbsTokenKind.SLASH)) { - consume(PbsTokenKind.IDENTIFIER, "E_PARSE_EXPECTED_TOKEN", "Expected module path segment after '/'"); + consume(PbsTokenKind.IDENTIFIER, "Expected module path segment after '/'"); } } + /** + * Parses a top-level function declaration. + * + *

Example: + *

{@code
+     * fn sum(a: int, b: int): int {
+     *   return a + b;
+     * }
+     * }
+ */ private PbsAst.FunctionDecl parseFunction(final PbsToken fnToken) { - final var name = consume(PbsTokenKind.IDENTIFIER, "E_PARSE_EXPECTED_TOKEN", "Expected function name"); - consume(PbsTokenKind.LEFT_PAREN, "E_PARSE_EXPECTED_TOKEN", "Expected '(' after function name"); + final var name = consume(PbsTokenKind.IDENTIFIER, "Expected function name"); + consume(PbsTokenKind.LEFT_PAREN, "Expected '(' after function name"); final var parameters = new ArrayList(); if (!check(PbsTokenKind.RIGHT_PAREN)) { do { final var pStart = peek(); - final var pName = consume(PbsTokenKind.IDENTIFIER, "E_PARSE_EXPECTED_TOKEN", "Expected parameter name"); - consume(PbsTokenKind.COLON, "E_PARSE_EXPECTED_TOKEN", "Expected ':' after parameter name"); + final var pName = consume(PbsTokenKind.IDENTIFIER, "Expected parameter name"); + consume(PbsTokenKind.COLON, "Expected ':' after parameter name"); final var typeRef = parseTypeRef(); parameters.add(new PbsAst.Parameter( pName.lexeme(), @@ -124,7 +157,7 @@ public final class PbsParser { span(pStart.start(), typeRef.span().getEnd()))); } while (match(PbsTokenKind.COMMA)); } - consume(PbsTokenKind.RIGHT_PAREN, "E_PARSE_EXPECTED_TOKEN", "Expected ')' after parameter list"); + consume(PbsTokenKind.RIGHT_PAREN, "Expected ')' after parameter list"); PbsAst.TypeRef returnType = null; if (match(PbsTokenKind.COLON)) { @@ -146,21 +179,40 @@ public final class PbsParser { span(fnToken.start(), body.span().getEnd())); } + /** + * Parses a simple identifier-based type reference such as {@code int} or {@code Vector}. + */ private PbsAst.TypeRef parseTypeRef() { - final var identifier = consume(PbsTokenKind.IDENTIFIER, "E_PARSE_EXPECTED_TOKEN", "Expected type name"); + final var identifier = consume(PbsTokenKind.IDENTIFIER, "Expected type name"); return new PbsAst.TypeRef(identifier.lexeme(), span(identifier.start(), identifier.end())); } + /** + * Parses a brace-delimited block. + * + *

Example: + *

{@code
+     * {
+     *   let x = 1;
+     *   return x;
+     * }
+     * }
+ */ private PbsAst.Block parseBlock() { - final var leftBrace = consume(PbsTokenKind.LEFT_BRACE, "E_PARSE_EXPECTED_TOKEN", "Expected '{' to start block"); + final var leftBrace = consume(PbsTokenKind.LEFT_BRACE, "Expected '{' to start block"); final var statements = new ArrayList(); while (!check(PbsTokenKind.RIGHT_BRACE) && !isAtEnd()) { statements.add(parseStatement()); } - final var rightBrace = consume(PbsTokenKind.RIGHT_BRACE, "E_PARSE_EXPECTED_TOKEN", "Expected '}' to end block"); + final var rightBrace = consume(PbsTokenKind.RIGHT_BRACE, "Expected '}' to end block"); return new PbsAst.Block(ReadOnlyList.wrap(statements), span(leftBrace.start(), rightBrace.end())); } + /** + * Parses one statement inside a block. + * + *

The current slice supports `let`, `return`, and expression statements. + */ private PbsAst.Statement parseStatement() { if (match(PbsTokenKind.LET)) { return parseLetStatement(previous()); @@ -171,17 +223,26 @@ public final class PbsParser { return parseExpressionStatement(); } + /** + * Parses a local binding statement. + * + *

Examples: + *

{@code
+     * let x = 1;
+     * let y: int = x + 1;
+     * }
+ */ private PbsAst.Statement parseLetStatement(final PbsToken letToken) { - final var name = consume(PbsTokenKind.IDENTIFIER, "E_PARSE_EXPECTED_TOKEN", "Expected variable name"); + final var name = consume(PbsTokenKind.IDENTIFIER, "Expected variable name"); PbsAst.TypeRef explicitType = null; if (match(PbsTokenKind.COLON)) { explicitType = parseTypeRef(); } - consume(PbsTokenKind.EQUAL, "E_PARSE_EXPECTED_TOKEN", "Expected '=' in let statement"); + consume(PbsTokenKind.EQUAL, "Expected '=' in let statement"); final var initializer = parseExpression(); - final var semicolon = consume(PbsTokenKind.SEMICOLON, "E_PARSE_EXPECTED_TOKEN", "Expected ';' after let statement"); + final var semicolon = consume(PbsTokenKind.SEMICOLON, "Expected ';' after let statement"); return new PbsAst.LetStatement( name.lexeme(), @@ -190,25 +251,39 @@ public final class PbsParser { span(letToken.start(), semicolon.end())); } + /** + * Parses a return statement with an optional returned value. + */ private PbsAst.Statement parseReturnStatement(final PbsToken returnToken) { PbsAst.Expression value = null; if (!check(PbsTokenKind.SEMICOLON)) { value = parseExpression(); } - final var semicolon = consume(PbsTokenKind.SEMICOLON, "E_PARSE_EXPECTED_TOKEN", "Expected ';' after return"); + final var semicolon = consume(PbsTokenKind.SEMICOLON, "Expected ';' after return"); return new PbsAst.ReturnStatement(value, span(returnToken.start(), semicolon.end())); } + /** + * Parses an expression statement terminated by a semicolon. + * + *

Example: {@code log(value);} + */ private PbsAst.Statement parseExpressionStatement() { final var expression = parseExpression(); - final var semicolon = consume(PbsTokenKind.SEMICOLON, "E_PARSE_EXPECTED_TOKEN", "Expected ';' after expression"); + final var semicolon = consume(PbsTokenKind.SEMICOLON, "Expected ';' after expression"); return new PbsAst.ExpressionStatement(expression, span(expression.span().getStart(), semicolon.end())); } + /** + * Entry point for expression parsing. + */ private PbsAst.Expression parseExpression() { return parseOr(); } + /** + * Parses left-associative logical-or expressions such as {@code a || b || c}. + */ private PbsAst.Expression parseOr() { var expression = parseAnd(); while (match(PbsTokenKind.OR_OR)) { @@ -220,6 +295,9 @@ public final class PbsParser { return expression; } + /** + * Parses left-associative logical-and expressions such as {@code a && b && c}. + */ private PbsAst.Expression parseAnd() { var expression = parseEquality(); while (match(PbsTokenKind.AND_AND)) { @@ -231,6 +309,12 @@ public final class PbsParser { return expression; } + /** + * Parses equality expressions and rejects chained non-associative forms. + * + *

Accepted: {@code a == b} + *

Rejected: {@code a == b == c} + */ private PbsAst.Expression parseEquality() { var expression = parseComparison(); if (match(PbsTokenKind.EQUAL_EQUAL, PbsTokenKind.BANG_EQUAL)) { @@ -239,7 +323,7 @@ public final class PbsParser { expression = new PbsAst.BinaryExpr(operator.lexeme(), expression, right, span(expression.span().getStart(), right.span().getEnd())); if (check(PbsTokenKind.EQUAL_EQUAL) || check(PbsTokenKind.BANG_EQUAL)) { - report(peek(), "E_PARSE_NON_ASSOC", "Chained equality is not allowed"); + report(peek(), ParseErrors.E_PARSE_NON_ASSOC, "Chained equality is not allowed"); while (match(PbsTokenKind.EQUAL_EQUAL, PbsTokenKind.BANG_EQUAL)) { parseComparison(); } @@ -248,6 +332,12 @@ public final class PbsParser { return expression; } + /** + * Parses comparison expressions and rejects chained non-associative forms. + * + *

Accepted: {@code a < b} + *

Rejected: {@code a < b < c} + */ private PbsAst.Expression parseComparison() { var expression = parseTerm(); if (match(PbsTokenKind.LESS, PbsTokenKind.LESS_EQUAL, PbsTokenKind.GREATER, PbsTokenKind.GREATER_EQUAL)) { @@ -257,7 +347,7 @@ public final class PbsParser { span(expression.span().getStart(), right.span().getEnd())); if (check(PbsTokenKind.LESS) || check(PbsTokenKind.LESS_EQUAL) || check(PbsTokenKind.GREATER) || check(PbsTokenKind.GREATER_EQUAL)) { - report(peek(), "E_PARSE_NON_ASSOC", "Chained comparison is not allowed"); + report(peek(), ParseErrors.E_PARSE_NON_ASSOC, "Chained comparison is not allowed"); while (match(PbsTokenKind.LESS, PbsTokenKind.LESS_EQUAL, PbsTokenKind.GREATER, PbsTokenKind.GREATER_EQUAL)) { parseTerm(); } @@ -266,6 +356,9 @@ public final class PbsParser { return expression; } + /** + * Parses additive expressions such as {@code a + b - c}. + */ private PbsAst.Expression parseTerm() { var expression = parseFactor(); while (match(PbsTokenKind.PLUS, PbsTokenKind.MINUS)) { @@ -277,6 +370,9 @@ public final class PbsParser { return expression; } + /** + * Parses multiplicative expressions such as {@code a * b / c % d}. + */ private PbsAst.Expression parseFactor() { var expression = parseUnary(); while (match(PbsTokenKind.STAR, PbsTokenKind.SLASH, PbsTokenKind.PERCENT)) { @@ -288,6 +384,9 @@ public final class PbsParser { return expression; } + /** + * Parses unary prefix operators such as {@code -x} and {@code !ready}. + */ private PbsAst.Expression parseUnary() { if (match(PbsTokenKind.BANG, PbsTokenKind.MINUS)) { final var operator = previous(); @@ -300,6 +399,16 @@ public final class PbsParser { return parseCall(); } + /** + * Parses call chains after a primary expression. + * + *

Examples: + *

{@code
+     * f()
+     * sum(a, b)
+     * factory()(1)
+     * }
+ */ private PbsAst.Expression parseCall() { var expression = parsePrimary(); @@ -311,7 +420,7 @@ public final class PbsParser { arguments.add(parseExpression()); } while (match(PbsTokenKind.COMMA)); } - final var close = consume(PbsTokenKind.RIGHT_PAREN, "E_PARSE_EXPECTED_TOKEN", "Expected ')' after arguments"); + final var close = consume(PbsTokenKind.RIGHT_PAREN, "Expected ')' after arguments"); expression = new PbsAst.CallExpr( expression, ReadOnlyList.wrap(arguments), @@ -326,6 +435,9 @@ public final class PbsParser { return expression; } + /** + * Parses primary expressions: literals, identifiers, and grouped expressions. + */ private PbsAst.Expression parsePrimary() { if (match(PbsTokenKind.TRUE)) { final var token = previous(); @@ -337,16 +449,16 @@ public final class PbsParser { } if (match(PbsTokenKind.INT_LITERAL)) { final var token = previous(); - return new PbsAst.IntLiteralExpr(parseLongOrDefault(token.lexeme(), 0L), span(token.start(), token.end())); + return new PbsAst.IntLiteralExpr(parseLongOrDefault(token.lexeme()), span(token.start(), token.end())); } if (match(PbsTokenKind.FLOAT_LITERAL)) { final var token = previous(); - return new PbsAst.FloatLiteralExpr(parseDoubleOrDefault(token.lexeme(), 0.0d), span(token.start(), token.end())); + return new PbsAst.FloatLiteralExpr(parseDoubleOrDefault(token.lexeme()), span(token.start(), token.end())); } if (match(PbsTokenKind.BOUNDED_LITERAL)) { final var token = previous(); final var raw = token.lexeme().substring(0, Math.max(token.lexeme().length() - 1, 0)); - return new PbsAst.BoundedLiteralExpr(parseIntOrDefault(raw, 0), span(token.start(), token.end())); + return new PbsAst.BoundedLiteralExpr(parseIntOrDefault(raw), span(token.start(), token.end())); } if (match(PbsTokenKind.STRING_LITERAL)) { final var token = previous(); @@ -359,16 +471,21 @@ public final class PbsParser { if (match(PbsTokenKind.LEFT_PAREN)) { final var open = previous(); final var expression = parseExpression(); - final var close = consume(PbsTokenKind.RIGHT_PAREN, "E_PARSE_EXPECTED_TOKEN", "Expected ')' after grouped expression"); + final var close = consume(PbsTokenKind.RIGHT_PAREN, "Expected ')' after grouped expression"); return new PbsAst.GroupExpr(expression, span(open.start(), close.end())); } final var token = peek(); - report(token, "E_PARSE_UNEXPECTED_TOKEN", "Unexpected token in expression: " + token.kind()); + report(token, ParseErrors.E_PARSE_UNEXPECTED_TOKEN, "Unexpected token in expression: " + token.kind()); advance(); return new PbsAst.IntLiteralExpr(0L, span(token.start(), token.end())); } + /** + * Skips tokens until a safe top-level restart point is reached. + * + *

This allows the parser to continue reporting more than one diagnostic per file. + */ private void synchronizeTopLevel() { while (!isAtEnd()) { if (check(PbsTokenKind.FN) || check(PbsTokenKind.IMPORT)) { @@ -381,6 +498,9 @@ public final class PbsParser { } } + /** + * Consumes the next token if it matches any provided kind. + */ private boolean match(final PbsTokenKind... kinds) { for (final var kind : kinds) { if (check(kind)) { @@ -391,74 +511,110 @@ public final class PbsParser { return false; } - private PbsToken consume(final PbsTokenKind kind, final String code, final String message) { + /** + * Consumes a required token and reports an error if it is missing. + * + *

The parser advances on failure when possible so recovery can continue. + */ + private PbsToken consume(final PbsTokenKind kind, final String message) { if (check(kind)) { return advance(); } final var token = peek(); - report(token, code, message + ", found " + token.kind()); + report(token, ParseErrors.E_PARSE_EXPECTED_TOKEN, message + ", found " + token.kind()); if (!isAtEnd()) { return advance(); } return token; } + /** + * Returns whether the current token matches the expected kind. + */ private boolean check(final PbsTokenKind kind) { if (isAtEnd()) return kind == PbsTokenKind.EOF; return peek().kind() == kind; } + /** + * Advances to the next token and returns the previously current token. + */ private PbsToken advance() { if (!isAtEnd()) current++; return previous(); } + /** + * Returns whether the parser reached the synthetic EOF token. + */ private boolean isAtEnd() { return peek().kind() == PbsTokenKind.EOF; } + /** + * Returns the current token without consuming it. + */ private PbsToken peek() { return tokens.get(current); } + /** + * Returns the most recently consumed token. + */ private PbsToken previous() { return tokens.get(Math.max(current - 1, 0)); } + /** + * Builds a source span for the current file. + */ private Span span(final long start, final long end) { return new Span(fileId, start, end); } - private void report(final PbsToken token, final String code, final String message) { - issues.report(builder -> builder - .error(true) - .message("[PBS:%s] %s at %s:[%d,%d)".formatted(code, message, sourceLabel, token.start(), token.end()))); + /** + * Reports a parser diagnostic at the given token span. + */ + private void report(final PbsToken token, final ParseErrors parseErrors, final String message) { + diagnostics.error(parseErrors.name(), message, new Span(fileId, token.start(), token.end())); } - private long parseLongOrDefault(final String text, final long fallback) { + /** + * Parses an integer literal for AST construction and falls back to zero on malformed input. + */ + private long parseLongOrDefault(final String text) { try { return Long.parseLong(text); } catch (NumberFormatException ignored) { - return fallback; + return 0L; // fallback } } - private int parseIntOrDefault(final String text, final int fallback) { + /** + * Parses a bounded literal payload and falls back to zero on malformed input. + */ + private int parseIntOrDefault(final String text) { try { return Integer.parseInt(text); } catch (NumberFormatException ignored) { - return fallback; + return 0; // fallback } } - private double parseDoubleOrDefault(final String text, final double fallback) { + /** + * Parses a floating-point literal for AST construction and falls back to zero on malformed input. + */ + private double parseDoubleOrDefault(final String text) { try { return Double.parseDouble(text); } catch (NumberFormatException ignored) { - return fallback; + return 0.0; // fallback } } + /** + * Converts a quoted token lexeme such as {@code "\"hello\\n\""} into its unescaped runtime text. + */ private String unescapeString(final String lexeme) { if (lexeme.length() < 2) { return ""; diff --git a/prometeu-compiler/frontends/prometeu-frontend-pbs/src/main/java/p/studio/compiler/services/PBSFrontendPhaseService.java b/prometeu-compiler/frontends/prometeu-frontend-pbs/src/main/java/p/studio/compiler/services/PBSFrontendPhaseService.java index 785ebbfb..68513fe1 100644 --- a/prometeu-compiler/frontends/prometeu-frontend-pbs/src/main/java/p/studio/compiler/services/PBSFrontendPhaseService.java +++ b/prometeu-compiler/frontends/prometeu-frontend-pbs/src/main/java/p/studio/compiler/services/PBSFrontendPhaseService.java @@ -6,6 +6,7 @@ import p.studio.compiler.messages.FrontendPhaseContext; import p.studio.compiler.models.IRBackend; import p.studio.compiler.models.IRFunction; import p.studio.compiler.pbs.PbsFrontendCompiler; +import p.studio.compiler.source.diagnostics.DiagnosticSink; import p.studio.utilities.logs.LogAggregator; import p.studio.utilities.structures.ReadOnlyList; @@ -34,10 +35,14 @@ public class PBSFrontendPhaseService implements FrontendPhaseService { } sourceHandle.readUtf8().ifPresentOrElse( - utf8Content -> functions.addAll(frontendCompiler - .compileFile(fId, utf8Content, sourceHandle.getCanonPath().toString(), issues) - .stream() - .toList()), + utf8Content -> { + final var diagnostics = DiagnosticSink.empty(); + functions.addAll(frontendCompiler + .compileFile(fId, utf8Content, diagnostics) + .stream() + .toList()); + adaptDiagnostics(sourceHandle.getCanonPath().toString(), diagnostics, issues); + }, () -> issues.report(builder -> builder .error(true) .message("Failed to read file content: %s".formatted(sourceHandle.toString())))); @@ -47,4 +52,22 @@ public class PBSFrontendPhaseService implements FrontendPhaseService { logs.using(log).debug("PBS frontend lowered %d function(s) to IR".formatted(functions.size())); return new IRBackend(ReadOnlyList.wrap(functions)); } + + private void adaptDiagnostics( + final String sourceLabel, + final DiagnosticSink diagnostics, + final BuildingIssueSink issues) { + for (final var diagnostic : diagnostics) { + final var span = diagnostic.getSpan(); + issues.report(builder -> builder + .error(diagnostic.getSeverity().isError()) + .message("[%s] %s at %s:[%d,%d)" + .formatted( + diagnostic.getCode(), + diagnostic.getMessage(), + sourceLabel, + span.getStart(), + span.getEnd()))); + } + } } diff --git a/prometeu-compiler/frontends/prometeu-frontend-pbs/src/test/java/p/studio/compiler/pbs/PbsFrontendCompilerTest.java b/prometeu-compiler/frontends/prometeu-frontend-pbs/src/test/java/p/studio/compiler/pbs/PbsFrontendCompilerTest.java index 0fd6ccb9..a5038bce 100644 --- a/prometeu-compiler/frontends/prometeu-frontend-pbs/src/test/java/p/studio/compiler/pbs/PbsFrontendCompilerTest.java +++ b/prometeu-compiler/frontends/prometeu-frontend-pbs/src/test/java/p/studio/compiler/pbs/PbsFrontendCompilerTest.java @@ -1,7 +1,7 @@ package p.studio.compiler.pbs; import org.junit.jupiter.api.Test; -import p.studio.compiler.messages.BuildingIssueSink; +import p.studio.compiler.source.diagnostics.DiagnosticSink; import p.studio.compiler.source.identifiers.FileId; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -21,11 +21,11 @@ class PbsFrontendCompilerTest { } """; - final var issues = BuildingIssueSink.empty(); + final var diagnostics = DiagnosticSink.empty(); final var compiler = new PbsFrontendCompiler(); - final var functions = compiler.compileFile(new FileId(0), source, "compile-test.pbs", issues); + final var functions = compiler.compileFile(new FileId(0), source, diagnostics); - assertTrue(issues.isEmpty(), "Valid program should not report issues"); + assertTrue(diagnostics.isEmpty(), "Valid program should not report diagnostics"); assertEquals(2, functions.size()); assertEquals("a", functions.get(0).name()); assertEquals(0, functions.get(0).parameterCount()); @@ -40,10 +40,10 @@ class PbsFrontendCompilerTest { fn a(): int { return 2; } """; - final var issues = BuildingIssueSink.empty(); + final var diagnostics = DiagnosticSink.empty(); final var compiler = new PbsFrontendCompiler(); - compiler.compileFile(new FileId(0), source, "compile-test.pbs", issues); + compiler.compileFile(new FileId(0), source, diagnostics); - assertTrue(issues.hasErrors()); + assertTrue(diagnostics.hasErrors()); } } diff --git a/prometeu-compiler/frontends/prometeu-frontend-pbs/src/test/java/p/studio/compiler/pbs/lexer/PbsLexerTest.java b/prometeu-compiler/frontends/prometeu-frontend-pbs/src/test/java/p/studio/compiler/pbs/lexer/PbsLexerTest.java index 47df71a5..bd82e346 100644 --- a/prometeu-compiler/frontends/prometeu-frontend-pbs/src/test/java/p/studio/compiler/pbs/lexer/PbsLexerTest.java +++ b/prometeu-compiler/frontends/prometeu-frontend-pbs/src/test/java/p/studio/compiler/pbs/lexer/PbsLexerTest.java @@ -1,7 +1,8 @@ package p.studio.compiler.pbs.lexer; import org.junit.jupiter.api.Test; -import p.studio.compiler.messages.BuildingIssueSink; +import p.studio.compiler.source.diagnostics.DiagnosticSink; +import p.studio.compiler.source.identifiers.FileId; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -11,9 +12,9 @@ class PbsLexerTest { @Test void shouldLexFunctionTokens() { final var source = "fn sum(a: int, b: int): int { return a + b; }"; - final var issues = BuildingIssueSink.empty(); + final var diagnostics = DiagnosticSink.empty(); - final var tokens = PbsLexer.lex(source, "test.pbs", issues); + final var tokens = PbsLexer.lex(source, new FileId(0), diagnostics); assertEquals(PbsTokenKind.FN, tokens.get(0).kind()); assertEquals(PbsTokenKind.IDENTIFIER, tokens.get(1).kind()); @@ -24,6 +25,6 @@ class PbsLexerTest { .orElseThrow() .kind()); assertEquals(PbsTokenKind.EOF, tokens.getLast().kind()); - assertTrue(issues.isEmpty(), "Lexer should not report issues for valid input"); + assertTrue(diagnostics.isEmpty(), "Lexer should not report diagnostics for valid input"); } } diff --git a/prometeu-compiler/frontends/prometeu-frontend-pbs/src/test/java/p/studio/compiler/pbs/parser/PbsParserTest.java b/prometeu-compiler/frontends/prometeu-frontend-pbs/src/test/java/p/studio/compiler/pbs/parser/PbsParserTest.java index 50c1efe2..5689f830 100644 --- a/prometeu-compiler/frontends/prometeu-frontend-pbs/src/test/java/p/studio/compiler/pbs/parser/PbsParserTest.java +++ b/prometeu-compiler/frontends/prometeu-frontend-pbs/src/test/java/p/studio/compiler/pbs/parser/PbsParserTest.java @@ -1,9 +1,9 @@ package p.studio.compiler.pbs.parser; import org.junit.jupiter.api.Test; -import p.studio.compiler.messages.BuildingIssueSink; import p.studio.compiler.pbs.ast.PbsAst; import p.studio.compiler.pbs.lexer.PbsLexer; +import p.studio.compiler.source.diagnostics.DiagnosticSink; import p.studio.compiler.source.identifiers.FileId; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -19,12 +19,13 @@ class PbsParserTest { return a + b; } """; - final var issues = BuildingIssueSink.empty(); - final var tokens = PbsLexer.lex(source, "parser-test.pbs", issues); + final var diagnostics = DiagnosticSink.empty(); + final var fileId = new FileId(0); + final var tokens = PbsLexer.lex(source, fileId, diagnostics); - final PbsAst.File ast = PbsParser.parse(tokens, new FileId(0), "parser-test.pbs", issues); + final PbsAst.File ast = PbsParser.parse(tokens, fileId, diagnostics); - assertTrue(issues.isEmpty(), "Parser should not report issues for valid function"); + assertTrue(diagnostics.isEmpty(), "Parser should not report diagnostics for valid function"); assertEquals(1, ast.functions().size()); final var fn = ast.functions().getFirst();