add lex and parser errors + comments

This commit is contained in:
bQUARKz 2026-02-27 04:46:07 +00:00
parent 14da21fe9f
commit 833c6ec049
Signed by: bquarkz
SSH Key Fingerprint: SHA256:Z7dgqoglWwoK6j6u4QC87OveEq74WOhFN+gitsxtkf8
9 changed files with 297 additions and 105 deletions

View File

@ -1,10 +1,10 @@
package p.studio.compiler.pbs;
import p.studio.compiler.messages.BuildingIssueSink;
import p.studio.compiler.models.IRFunction;
import p.studio.compiler.pbs.ast.PbsAst;
import p.studio.compiler.pbs.lexer.PbsLexer;
import p.studio.compiler.pbs.parser.PbsParser;
import p.studio.compiler.source.diagnostics.DiagnosticSink;
import p.studio.compiler.source.identifiers.FileId;
import p.studio.utilities.structures.ReadOnlyList;
@ -16,27 +16,24 @@ public final class PbsFrontendCompiler {
public ReadOnlyList<IRFunction> compileFile(
final FileId fileId,
final String source,
final String sourceLabel,
final BuildingIssueSink issues) {
final var tokens = PbsLexer.lex(source, sourceLabel, issues);
final var ast = PbsParser.parse(tokens, fileId, sourceLabel, issues);
validateFunctionNames(ast, sourceLabel, issues);
final DiagnosticSink diagnostics) {
final var tokens = PbsLexer.lex(source, fileId, diagnostics);
final var ast = PbsParser.parse(tokens, fileId, diagnostics);
validateFunctionNames(ast, diagnostics);
return lowerFunctions(fileId, ast);
}
private void validateFunctionNames(
final PbsAst.File ast,
final String sourceLabel,
final BuildingIssueSink issues) {
final DiagnosticSink diagnostics) {
final Set<String> names = new HashSet<>();
for (final var fn : ast.functions()) {
if (names.add(fn.name())) {
continue;
}
issues.report(builder -> builder
.error(true)
.message("[PBS:E_RESOLVE_DUPLICATE_SYMBOL] Duplicate function '%s' at %s:[%d,%d)"
.formatted(fn.name(), sourceLabel, fn.span().getStart(), fn.span().getEnd())));
diagnostics.error("E_RESOLVE_DUPLICATE_SYMBOL",
"Duplicate function '%s'".formatted(fn.name()),
fn.span());
}
}

View File

@ -0,0 +1,6 @@
package p.studio.compiler.pbs.lexer;
public enum LexErrors {
E_LEX_INVALID_CHAR,
E_LEX_UNTERMINATED_STRING,
}

View File

@ -1,6 +1,8 @@
package p.studio.compiler.pbs.lexer;
import p.studio.compiler.messages.BuildingIssueSink;
import p.studio.compiler.source.Span;
import p.studio.compiler.source.diagnostics.DiagnosticSink;
import p.studio.compiler.source.identifiers.FileId;
import p.studio.utilities.structures.ReadOnlyList;
import java.util.ArrayList;
@ -19,25 +21,25 @@ public final class PbsLexer {
}
private final String source;
private final String sourceLabel;
private final BuildingIssueSink issues;
private final FileId fileId;
private final DiagnosticSink diagnostics;
private final ArrayList<PbsToken> tokens = new ArrayList<>();
private int start;
private int current;
private LexerState state = LexerState.DEFAULT;
private PbsLexer(final String source, final String sourceLabel, final BuildingIssueSink issues) {
private PbsLexer(final String source, final FileId fileId, final DiagnosticSink diagnostics) {
this.source = source == null ? "" : source;
this.sourceLabel = sourceLabel == null ? "<unknown>" : sourceLabel;
this.issues = issues;
this.fileId = fileId;
this.diagnostics = diagnostics;
}
public static ReadOnlyList<PbsToken> lex(
final String source,
final String sourceLabel,
final BuildingIssueSink issues) {
final var lexer = new PbsLexer(source, sourceLabel, issues);
final FileId fileId,
final DiagnosticSink diagnostics) {
final var lexer = new PbsLexer(source, fileId, diagnostics);
return lexer.lexInternal();
}
@ -84,7 +86,7 @@ public final class PbsLexer {
addToken(PbsTokenKind.DOT_DOT);
return;
}
report("E_LEX_INVALID_CHAR", "Unexpected '.'");
report(LexErrors.E_LEX_INVALID_CHAR, "Unexpected '.'");
}
case '!' -> addToken(match('=') ? PbsTokenKind.BANG_EQUAL : PbsTokenKind.BANG);
case '=' -> addToken(match('=') ? PbsTokenKind.EQUAL_EQUAL : PbsTokenKind.EQUAL);
@ -95,14 +97,14 @@ public final class PbsLexer {
addToken(PbsTokenKind.AND_AND);
return;
}
report("E_LEX_INVALID_CHAR", "Unexpected '&'");
report(LexErrors.E_LEX_INVALID_CHAR, "Unexpected '&'");
}
case '|' -> {
if (match('|')) {
addToken(PbsTokenKind.OR_OR);
return;
}
report("E_LEX_INVALID_CHAR", "Unexpected '|'");
report(LexErrors.E_LEX_INVALID_CHAR, "Unexpected '|'");
}
case '/' -> {
if (match('/')) {
@ -121,7 +123,7 @@ public final class PbsLexer {
state = LexerState.IDENTIFIER;
return;
}
report("E_LEX_INVALID_CHAR", "Invalid character: '%s'".formatted(c));
report(LexErrors.E_LEX_INVALID_CHAR, "Invalid character: '%s'".formatted(c));
}
}
}
@ -175,7 +177,7 @@ public final class PbsLexer {
}
if (isAtEnd()) {
report("E_LEX_UNTERMINATED_STRING", "Unterminated string literal");
report(LexErrors.E_LEX_UNTERMINATED_STRING, "Unterminated string literal");
state = LexerState.DEFAULT;
return;
}
@ -235,10 +237,8 @@ public final class PbsLexer {
return c == '_' || Character.isAlphabetic(c) || Character.isDigit(c);
}
private void report(final String code, final String message) {
issues.report(builder -> builder
.error(true)
.message("[PBS:%s] %s at %s:[%d,%d)".formatted(code, message, sourceLabel, start, current)));
private void report(final LexErrors lexErrors, final String message) {
diagnostics.error(lexErrors.name(), message, new Span(fileId, start, current));
}
private static Map<String, PbsTokenKind> buildKeywords() {

View File

@ -0,0 +1,8 @@
package p.studio.compiler.pbs.parser;
public enum ParseErrors {
E_PARSE_EXPECTED_TOKEN,
E_PARSE_UNEXPECTED_TOKEN,
E_PARSE_VISIBILITY_IN_SOURCE,
E_PARSE_NON_ASSOC,
}

View File

@ -1,10 +1,10 @@
package p.studio.compiler.pbs.parser;
import p.studio.compiler.messages.BuildingIssueSink;
import p.studio.compiler.pbs.ast.PbsAst;
import p.studio.compiler.pbs.lexer.PbsToken;
import p.studio.compiler.pbs.lexer.PbsTokenKind;
import p.studio.compiler.source.Span;
import p.studio.compiler.source.diagnostics.DiagnosticSink;
import p.studio.compiler.source.identifiers.FileId;
import p.studio.utilities.structures.ReadOnlyList;
@ -13,32 +13,43 @@ import java.util.ArrayList;
public final class PbsParser {
private final ArrayList<PbsToken> tokens;
private final FileId fileId;
private final String sourceLabel;
private final BuildingIssueSink issues;
private final DiagnosticSink diagnostics;
private int current;
private PbsParser(
final ReadOnlyList<PbsToken> tokens,
final FileId fileId,
final String sourceLabel,
final BuildingIssueSink issues) {
final DiagnosticSink diagnostics) {
this.tokens = new ArrayList<>();
for (final var token : tokens) {
this.tokens.add(token);
}
this.fileId = fileId;
this.sourceLabel = sourceLabel == null ? "<unknown>" : sourceLabel;
this.issues = issues;
this.diagnostics = diagnostics;
}
/**
* Parses a token stream into a PBS file AST.
*
* <p>Example:
* <pre>{@code
* fn sum(a: int, b: int): int {
* return a + b;
* }
* }</pre>
*/
public static PbsAst.File parse(
final ReadOnlyList<PbsToken> tokens,
final FileId fileId,
final String sourceLabel,
final BuildingIssueSink issues) {
return new PbsParser(tokens, fileId, sourceLabel, issues).parseFile();
final DiagnosticSink diagnostics) {
return new PbsParser(tokens, fileId, diagnostics).parseFile();
}
/**
* Parses a full file as a sequence of imports and top-level declarations.
*
* <p>The current slice only stores top-level functions in the AST.
*/
private PbsAst.File parseFile() {
final var functions = new ArrayList<PbsAst.FunctionDecl>();
@ -54,7 +65,7 @@ public final class PbsParser {
}
if (match(PbsTokenKind.MOD, PbsTokenKind.PUB)) {
report(previous(), "E_PARSE_VISIBILITY_IN_SOURCE",
report(previous(), ParseErrors.E_PARSE_VISIBILITY_IN_SOURCE,
"Visibility modifiers are barrel-only and cannot appear in .pbs declarations");
synchronizeTopLevel();
continue;
@ -64,7 +75,7 @@ public final class PbsParser {
break;
}
report(peek(), "E_PARSE_UNEXPECTED_TOKEN", "Expected top-level declaration ('fn') or import");
report(peek(), ParseErrors.E_PARSE_UNEXPECTED_TOKEN, "Expected top-level declaration ('fn') or import");
synchronizeTopLevel();
}
@ -72,6 +83,15 @@ public final class PbsParser {
return new PbsAst.File(ReadOnlyList.wrap(functions), span(0, eof.end()));
}
/**
* Parses import syntax for validation and recovery, but does not store imports yet.
*
* <p>Supported forms:
* <pre>{@code
* import @core:math;
* import { Vector, Matrix as Mat } from @core:math;
* }</pre>
*/
private void parseAndDiscardImport() {
// Supports both forms:
// import @core:math;
@ -80,43 +100,56 @@ public final class PbsParser {
while (!check(PbsTokenKind.RIGHT_BRACE) && !isAtEnd()) {
if (match(PbsTokenKind.IDENTIFIER)) {
if (match(PbsTokenKind.AS)) {
consume(PbsTokenKind.IDENTIFIER, "E_PARSE_EXPECTED_TOKEN",
consume(PbsTokenKind.IDENTIFIER,
"Expected alias identifier after 'as'");
}
match(PbsTokenKind.COMMA);
continue;
}
report(peek(), "E_PARSE_UNEXPECTED_TOKEN", "Invalid import item");
report(peek(), ParseErrors.E_PARSE_UNEXPECTED_TOKEN, "Invalid import item");
advance();
}
consume(PbsTokenKind.RIGHT_BRACE, "E_PARSE_EXPECTED_TOKEN", "Expected '}' in import list");
consume(PbsTokenKind.FROM, "E_PARSE_EXPECTED_TOKEN", "Expected 'from' in named import");
consume(PbsTokenKind.RIGHT_BRACE, "Expected '}' in import list");
consume(PbsTokenKind.FROM, "Expected 'from' in named import");
}
parseModuleRef();
consume(PbsTokenKind.SEMICOLON, "E_PARSE_EXPECTED_TOKEN", "Expected ';' after import");
consume(PbsTokenKind.SEMICOLON, "Expected ';' after import");
}
/**
* Parses a module reference such as {@code @core:math/tools}.
*/
private void parseModuleRef() {
consume(PbsTokenKind.AT, "E_PARSE_EXPECTED_TOKEN", "Expected '@' in module reference");
consume(PbsTokenKind.IDENTIFIER, "E_PARSE_EXPECTED_TOKEN", "Expected project identifier in module reference");
consume(PbsTokenKind.COLON, "E_PARSE_EXPECTED_TOKEN", "Expected ':' in module reference");
consume(PbsTokenKind.IDENTIFIER, "E_PARSE_EXPECTED_TOKEN", "Expected module identifier");
consume(PbsTokenKind.AT, "Expected '@' in module reference");
consume(PbsTokenKind.IDENTIFIER, "Expected project identifier in module reference");
consume(PbsTokenKind.COLON, "Expected ':' in module reference");
consume(PbsTokenKind.IDENTIFIER, "Expected module identifier");
while (match(PbsTokenKind.SLASH)) {
consume(PbsTokenKind.IDENTIFIER, "E_PARSE_EXPECTED_TOKEN", "Expected module path segment after '/'");
consume(PbsTokenKind.IDENTIFIER, "Expected module path segment after '/'");
}
}
/**
* Parses a top-level function declaration.
*
* <p>Example:
* <pre>{@code
* fn sum(a: int, b: int): int {
* return a + b;
* }
* }</pre>
*/
private PbsAst.FunctionDecl parseFunction(final PbsToken fnToken) {
final var name = consume(PbsTokenKind.IDENTIFIER, "E_PARSE_EXPECTED_TOKEN", "Expected function name");
consume(PbsTokenKind.LEFT_PAREN, "E_PARSE_EXPECTED_TOKEN", "Expected '(' after function name");
final var name = consume(PbsTokenKind.IDENTIFIER, "Expected function name");
consume(PbsTokenKind.LEFT_PAREN, "Expected '(' after function name");
final var parameters = new ArrayList<PbsAst.Parameter>();
if (!check(PbsTokenKind.RIGHT_PAREN)) {
do {
final var pStart = peek();
final var pName = consume(PbsTokenKind.IDENTIFIER, "E_PARSE_EXPECTED_TOKEN", "Expected parameter name");
consume(PbsTokenKind.COLON, "E_PARSE_EXPECTED_TOKEN", "Expected ':' after parameter name");
final var pName = consume(PbsTokenKind.IDENTIFIER, "Expected parameter name");
consume(PbsTokenKind.COLON, "Expected ':' after parameter name");
final var typeRef = parseTypeRef();
parameters.add(new PbsAst.Parameter(
pName.lexeme(),
@ -124,7 +157,7 @@ public final class PbsParser {
span(pStart.start(), typeRef.span().getEnd())));
} while (match(PbsTokenKind.COMMA));
}
consume(PbsTokenKind.RIGHT_PAREN, "E_PARSE_EXPECTED_TOKEN", "Expected ')' after parameter list");
consume(PbsTokenKind.RIGHT_PAREN, "Expected ')' after parameter list");
PbsAst.TypeRef returnType = null;
if (match(PbsTokenKind.COLON)) {
@ -146,21 +179,40 @@ public final class PbsParser {
span(fnToken.start(), body.span().getEnd()));
}
/**
* Parses a simple identifier-based type reference such as {@code int} or {@code Vector}.
*/
private PbsAst.TypeRef parseTypeRef() {
final var identifier = consume(PbsTokenKind.IDENTIFIER, "E_PARSE_EXPECTED_TOKEN", "Expected type name");
final var identifier = consume(PbsTokenKind.IDENTIFIER, "Expected type name");
return new PbsAst.TypeRef(identifier.lexeme(), span(identifier.start(), identifier.end()));
}
/**
* Parses a brace-delimited block.
*
* <p>Example:
* <pre>{@code
* {
* let x = 1;
* return x;
* }
* }</pre>
*/
private PbsAst.Block parseBlock() {
final var leftBrace = consume(PbsTokenKind.LEFT_BRACE, "E_PARSE_EXPECTED_TOKEN", "Expected '{' to start block");
final var leftBrace = consume(PbsTokenKind.LEFT_BRACE, "Expected '{' to start block");
final var statements = new ArrayList<PbsAst.Statement>();
while (!check(PbsTokenKind.RIGHT_BRACE) && !isAtEnd()) {
statements.add(parseStatement());
}
final var rightBrace = consume(PbsTokenKind.RIGHT_BRACE, "E_PARSE_EXPECTED_TOKEN", "Expected '}' to end block");
final var rightBrace = consume(PbsTokenKind.RIGHT_BRACE, "Expected '}' to end block");
return new PbsAst.Block(ReadOnlyList.wrap(statements), span(leftBrace.start(), rightBrace.end()));
}
/**
* Parses one statement inside a block.
*
* <p>The current slice supports `let`, `return`, and expression statements.
*/
private PbsAst.Statement parseStatement() {
if (match(PbsTokenKind.LET)) {
return parseLetStatement(previous());
@ -171,17 +223,26 @@ public final class PbsParser {
return parseExpressionStatement();
}
/**
* Parses a local binding statement.
*
* <p>Examples:
* <pre>{@code
* let x = 1;
* let y: int = x + 1;
* }</pre>
*/
private PbsAst.Statement parseLetStatement(final PbsToken letToken) {
final var name = consume(PbsTokenKind.IDENTIFIER, "E_PARSE_EXPECTED_TOKEN", "Expected variable name");
final var name = consume(PbsTokenKind.IDENTIFIER, "Expected variable name");
PbsAst.TypeRef explicitType = null;
if (match(PbsTokenKind.COLON)) {
explicitType = parseTypeRef();
}
consume(PbsTokenKind.EQUAL, "E_PARSE_EXPECTED_TOKEN", "Expected '=' in let statement");
consume(PbsTokenKind.EQUAL, "Expected '=' in let statement");
final var initializer = parseExpression();
final var semicolon = consume(PbsTokenKind.SEMICOLON, "E_PARSE_EXPECTED_TOKEN", "Expected ';' after let statement");
final var semicolon = consume(PbsTokenKind.SEMICOLON, "Expected ';' after let statement");
return new PbsAst.LetStatement(
name.lexeme(),
@ -190,25 +251,39 @@ public final class PbsParser {
span(letToken.start(), semicolon.end()));
}
/**
* Parses a return statement with an optional returned value.
*/
private PbsAst.Statement parseReturnStatement(final PbsToken returnToken) {
PbsAst.Expression value = null;
if (!check(PbsTokenKind.SEMICOLON)) {
value = parseExpression();
}
final var semicolon = consume(PbsTokenKind.SEMICOLON, "E_PARSE_EXPECTED_TOKEN", "Expected ';' after return");
final var semicolon = consume(PbsTokenKind.SEMICOLON, "Expected ';' after return");
return new PbsAst.ReturnStatement(value, span(returnToken.start(), semicolon.end()));
}
/**
* Parses an expression statement terminated by a semicolon.
*
* <p>Example: {@code log(value);}
*/
private PbsAst.Statement parseExpressionStatement() {
final var expression = parseExpression();
final var semicolon = consume(PbsTokenKind.SEMICOLON, "E_PARSE_EXPECTED_TOKEN", "Expected ';' after expression");
final var semicolon = consume(PbsTokenKind.SEMICOLON, "Expected ';' after expression");
return new PbsAst.ExpressionStatement(expression, span(expression.span().getStart(), semicolon.end()));
}
/**
* Entry point for expression parsing.
*/
private PbsAst.Expression parseExpression() {
return parseOr();
}
/**
* Parses left-associative logical-or expressions such as {@code a || b || c}.
*/
private PbsAst.Expression parseOr() {
var expression = parseAnd();
while (match(PbsTokenKind.OR_OR)) {
@ -220,6 +295,9 @@ public final class PbsParser {
return expression;
}
/**
* Parses left-associative logical-and expressions such as {@code a && b && c}.
*/
private PbsAst.Expression parseAnd() {
var expression = parseEquality();
while (match(PbsTokenKind.AND_AND)) {
@ -231,6 +309,12 @@ public final class PbsParser {
return expression;
}
/**
* Parses equality expressions and rejects chained non-associative forms.
*
* <p>Accepted: {@code a == b}
* <p>Rejected: {@code a == b == c}
*/
private PbsAst.Expression parseEquality() {
var expression = parseComparison();
if (match(PbsTokenKind.EQUAL_EQUAL, PbsTokenKind.BANG_EQUAL)) {
@ -239,7 +323,7 @@ public final class PbsParser {
expression = new PbsAst.BinaryExpr(operator.lexeme(), expression, right,
span(expression.span().getStart(), right.span().getEnd()));
if (check(PbsTokenKind.EQUAL_EQUAL) || check(PbsTokenKind.BANG_EQUAL)) {
report(peek(), "E_PARSE_NON_ASSOC", "Chained equality is not allowed");
report(peek(), ParseErrors.E_PARSE_NON_ASSOC, "Chained equality is not allowed");
while (match(PbsTokenKind.EQUAL_EQUAL, PbsTokenKind.BANG_EQUAL)) {
parseComparison();
}
@ -248,6 +332,12 @@ public final class PbsParser {
return expression;
}
/**
* Parses comparison expressions and rejects chained non-associative forms.
*
* <p>Accepted: {@code a < b}
* <p>Rejected: {@code a < b < c}
*/
private PbsAst.Expression parseComparison() {
var expression = parseTerm();
if (match(PbsTokenKind.LESS, PbsTokenKind.LESS_EQUAL, PbsTokenKind.GREATER, PbsTokenKind.GREATER_EQUAL)) {
@ -257,7 +347,7 @@ public final class PbsParser {
span(expression.span().getStart(), right.span().getEnd()));
if (check(PbsTokenKind.LESS) || check(PbsTokenKind.LESS_EQUAL)
|| check(PbsTokenKind.GREATER) || check(PbsTokenKind.GREATER_EQUAL)) {
report(peek(), "E_PARSE_NON_ASSOC", "Chained comparison is not allowed");
report(peek(), ParseErrors.E_PARSE_NON_ASSOC, "Chained comparison is not allowed");
while (match(PbsTokenKind.LESS, PbsTokenKind.LESS_EQUAL, PbsTokenKind.GREATER, PbsTokenKind.GREATER_EQUAL)) {
parseTerm();
}
@ -266,6 +356,9 @@ public final class PbsParser {
return expression;
}
/**
* Parses additive expressions such as {@code a + b - c}.
*/
private PbsAst.Expression parseTerm() {
var expression = parseFactor();
while (match(PbsTokenKind.PLUS, PbsTokenKind.MINUS)) {
@ -277,6 +370,9 @@ public final class PbsParser {
return expression;
}
/**
* Parses multiplicative expressions such as {@code a * b / c % d}.
*/
private PbsAst.Expression parseFactor() {
var expression = parseUnary();
while (match(PbsTokenKind.STAR, PbsTokenKind.SLASH, PbsTokenKind.PERCENT)) {
@ -288,6 +384,9 @@ public final class PbsParser {
return expression;
}
/**
* Parses unary prefix operators such as {@code -x} and {@code !ready}.
*/
private PbsAst.Expression parseUnary() {
if (match(PbsTokenKind.BANG, PbsTokenKind.MINUS)) {
final var operator = previous();
@ -300,6 +399,16 @@ public final class PbsParser {
return parseCall();
}
/**
* Parses call chains after a primary expression.
*
* <p>Examples:
* <pre>{@code
* f()
* sum(a, b)
* factory()(1)
* }</pre>
*/
private PbsAst.Expression parseCall() {
var expression = parsePrimary();
@ -311,7 +420,7 @@ public final class PbsParser {
arguments.add(parseExpression());
} while (match(PbsTokenKind.COMMA));
}
final var close = consume(PbsTokenKind.RIGHT_PAREN, "E_PARSE_EXPECTED_TOKEN", "Expected ')' after arguments");
final var close = consume(PbsTokenKind.RIGHT_PAREN, "Expected ')' after arguments");
expression = new PbsAst.CallExpr(
expression,
ReadOnlyList.wrap(arguments),
@ -326,6 +435,9 @@ public final class PbsParser {
return expression;
}
/**
* Parses primary expressions: literals, identifiers, and grouped expressions.
*/
private PbsAst.Expression parsePrimary() {
if (match(PbsTokenKind.TRUE)) {
final var token = previous();
@ -337,16 +449,16 @@ public final class PbsParser {
}
if (match(PbsTokenKind.INT_LITERAL)) {
final var token = previous();
return new PbsAst.IntLiteralExpr(parseLongOrDefault(token.lexeme(), 0L), span(token.start(), token.end()));
return new PbsAst.IntLiteralExpr(parseLongOrDefault(token.lexeme()), span(token.start(), token.end()));
}
if (match(PbsTokenKind.FLOAT_LITERAL)) {
final var token = previous();
return new PbsAst.FloatLiteralExpr(parseDoubleOrDefault(token.lexeme(), 0.0d), span(token.start(), token.end()));
return new PbsAst.FloatLiteralExpr(parseDoubleOrDefault(token.lexeme()), span(token.start(), token.end()));
}
if (match(PbsTokenKind.BOUNDED_LITERAL)) {
final var token = previous();
final var raw = token.lexeme().substring(0, Math.max(token.lexeme().length() - 1, 0));
return new PbsAst.BoundedLiteralExpr(parseIntOrDefault(raw, 0), span(token.start(), token.end()));
return new PbsAst.BoundedLiteralExpr(parseIntOrDefault(raw), span(token.start(), token.end()));
}
if (match(PbsTokenKind.STRING_LITERAL)) {
final var token = previous();
@ -359,16 +471,21 @@ public final class PbsParser {
if (match(PbsTokenKind.LEFT_PAREN)) {
final var open = previous();
final var expression = parseExpression();
final var close = consume(PbsTokenKind.RIGHT_PAREN, "E_PARSE_EXPECTED_TOKEN", "Expected ')' after grouped expression");
final var close = consume(PbsTokenKind.RIGHT_PAREN, "Expected ')' after grouped expression");
return new PbsAst.GroupExpr(expression, span(open.start(), close.end()));
}
final var token = peek();
report(token, "E_PARSE_UNEXPECTED_TOKEN", "Unexpected token in expression: " + token.kind());
report(token, ParseErrors.E_PARSE_UNEXPECTED_TOKEN, "Unexpected token in expression: " + token.kind());
advance();
return new PbsAst.IntLiteralExpr(0L, span(token.start(), token.end()));
}
/**
* Skips tokens until a safe top-level restart point is reached.
*
* <p>This allows the parser to continue reporting more than one diagnostic per file.
*/
private void synchronizeTopLevel() {
while (!isAtEnd()) {
if (check(PbsTokenKind.FN) || check(PbsTokenKind.IMPORT)) {
@ -381,6 +498,9 @@ public final class PbsParser {
}
}
/**
* Consumes the next token if it matches any provided kind.
*/
private boolean match(final PbsTokenKind... kinds) {
for (final var kind : kinds) {
if (check(kind)) {
@ -391,74 +511,110 @@ public final class PbsParser {
return false;
}
private PbsToken consume(final PbsTokenKind kind, final String code, final String message) {
/**
* Consumes a required token and reports an error if it is missing.
*
* <p>The parser advances on failure when possible so recovery can continue.
*/
private PbsToken consume(final PbsTokenKind kind, final String message) {
if (check(kind)) {
return advance();
}
final var token = peek();
report(token, code, message + ", found " + token.kind());
report(token, ParseErrors.E_PARSE_EXPECTED_TOKEN, message + ", found " + token.kind());
if (!isAtEnd()) {
return advance();
}
return token;
}
/**
* Returns whether the current token matches the expected kind.
*/
private boolean check(final PbsTokenKind kind) {
if (isAtEnd()) return kind == PbsTokenKind.EOF;
return peek().kind() == kind;
}
/**
* Advances to the next token and returns the previously current token.
*/
private PbsToken advance() {
if (!isAtEnd()) current++;
return previous();
}
/**
* Returns whether the parser reached the synthetic EOF token.
*/
private boolean isAtEnd() {
return peek().kind() == PbsTokenKind.EOF;
}
/**
* Returns the current token without consuming it.
*/
private PbsToken peek() {
return tokens.get(current);
}
/**
* Returns the most recently consumed token.
*/
private PbsToken previous() {
return tokens.get(Math.max(current - 1, 0));
}
/**
* Builds a source span for the current file.
*/
private Span span(final long start, final long end) {
return new Span(fileId, start, end);
}
private void report(final PbsToken token, final String code, final String message) {
issues.report(builder -> builder
.error(true)
.message("[PBS:%s] %s at %s:[%d,%d)".formatted(code, message, sourceLabel, token.start(), token.end())));
/**
* Reports a parser diagnostic at the given token span.
*/
private void report(final PbsToken token, final ParseErrors parseErrors, final String message) {
diagnostics.error(parseErrors.name(), message, new Span(fileId, token.start(), token.end()));
}
private long parseLongOrDefault(final String text, final long fallback) {
/**
* Parses an integer literal for AST construction and falls back to zero on malformed input.
*/
private long parseLongOrDefault(final String text) {
try {
return Long.parseLong(text);
} catch (NumberFormatException ignored) {
return fallback;
return 0L; // fallback
}
}
private int parseIntOrDefault(final String text, final int fallback) {
/**
* Parses a bounded literal payload and falls back to zero on malformed input.
*/
private int parseIntOrDefault(final String text) {
try {
return Integer.parseInt(text);
} catch (NumberFormatException ignored) {
return fallback;
return 0; // fallback
}
}
private double parseDoubleOrDefault(final String text, final double fallback) {
/**
* Parses a floating-point literal for AST construction and falls back to zero on malformed input.
*/
private double parseDoubleOrDefault(final String text) {
try {
return Double.parseDouble(text);
} catch (NumberFormatException ignored) {
return fallback;
return 0.0; // fallback
}
}
/**
* Converts a quoted token lexeme such as {@code "\"hello\\n\""} into its unescaped runtime text.
*/
private String unescapeString(final String lexeme) {
if (lexeme.length() < 2) {
return "";

View File

@ -6,6 +6,7 @@ import p.studio.compiler.messages.FrontendPhaseContext;
import p.studio.compiler.models.IRBackend;
import p.studio.compiler.models.IRFunction;
import p.studio.compiler.pbs.PbsFrontendCompiler;
import p.studio.compiler.source.diagnostics.DiagnosticSink;
import p.studio.utilities.logs.LogAggregator;
import p.studio.utilities.structures.ReadOnlyList;
@ -34,10 +35,14 @@ public class PBSFrontendPhaseService implements FrontendPhaseService {
}
sourceHandle.readUtf8().ifPresentOrElse(
utf8Content -> functions.addAll(frontendCompiler
.compileFile(fId, utf8Content, sourceHandle.getCanonPath().toString(), issues)
.stream()
.toList()),
utf8Content -> {
final var diagnostics = DiagnosticSink.empty();
functions.addAll(frontendCompiler
.compileFile(fId, utf8Content, diagnostics)
.stream()
.toList());
adaptDiagnostics(sourceHandle.getCanonPath().toString(), diagnostics, issues);
},
() -> issues.report(builder -> builder
.error(true)
.message("Failed to read file content: %s".formatted(sourceHandle.toString()))));
@ -47,4 +52,22 @@ public class PBSFrontendPhaseService implements FrontendPhaseService {
logs.using(log).debug("PBS frontend lowered %d function(s) to IR".formatted(functions.size()));
return new IRBackend(ReadOnlyList.wrap(functions));
}
private void adaptDiagnostics(
final String sourceLabel,
final DiagnosticSink diagnostics,
final BuildingIssueSink issues) {
for (final var diagnostic : diagnostics) {
final var span = diagnostic.getSpan();
issues.report(builder -> builder
.error(diagnostic.getSeverity().isError())
.message("[%s] %s at %s:[%d,%d)"
.formatted(
diagnostic.getCode(),
diagnostic.getMessage(),
sourceLabel,
span.getStart(),
span.getEnd())));
}
}
}

View File

@ -1,7 +1,7 @@
package p.studio.compiler.pbs;
import org.junit.jupiter.api.Test;
import p.studio.compiler.messages.BuildingIssueSink;
import p.studio.compiler.source.diagnostics.DiagnosticSink;
import p.studio.compiler.source.identifiers.FileId;
import static org.junit.jupiter.api.Assertions.assertEquals;
@ -21,11 +21,11 @@ class PbsFrontendCompilerTest {
}
""";
final var issues = BuildingIssueSink.empty();
final var diagnostics = DiagnosticSink.empty();
final var compiler = new PbsFrontendCompiler();
final var functions = compiler.compileFile(new FileId(0), source, "compile-test.pbs", issues);
final var functions = compiler.compileFile(new FileId(0), source, diagnostics);
assertTrue(issues.isEmpty(), "Valid program should not report issues");
assertTrue(diagnostics.isEmpty(), "Valid program should not report diagnostics");
assertEquals(2, functions.size());
assertEquals("a", functions.get(0).name());
assertEquals(0, functions.get(0).parameterCount());
@ -40,10 +40,10 @@ class PbsFrontendCompilerTest {
fn a(): int { return 2; }
""";
final var issues = BuildingIssueSink.empty();
final var diagnostics = DiagnosticSink.empty();
final var compiler = new PbsFrontendCompiler();
compiler.compileFile(new FileId(0), source, "compile-test.pbs", issues);
compiler.compileFile(new FileId(0), source, diagnostics);
assertTrue(issues.hasErrors());
assertTrue(diagnostics.hasErrors());
}
}

View File

@ -1,7 +1,8 @@
package p.studio.compiler.pbs.lexer;
import org.junit.jupiter.api.Test;
import p.studio.compiler.messages.BuildingIssueSink;
import p.studio.compiler.source.diagnostics.DiagnosticSink;
import p.studio.compiler.source.identifiers.FileId;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
@ -11,9 +12,9 @@ class PbsLexerTest {
@Test
void shouldLexFunctionTokens() {
final var source = "fn sum(a: int, b: int): int { return a + b; }";
final var issues = BuildingIssueSink.empty();
final var diagnostics = DiagnosticSink.empty();
final var tokens = PbsLexer.lex(source, "test.pbs", issues);
final var tokens = PbsLexer.lex(source, new FileId(0), diagnostics);
assertEquals(PbsTokenKind.FN, tokens.get(0).kind());
assertEquals(PbsTokenKind.IDENTIFIER, tokens.get(1).kind());
@ -24,6 +25,6 @@ class PbsLexerTest {
.orElseThrow()
.kind());
assertEquals(PbsTokenKind.EOF, tokens.getLast().kind());
assertTrue(issues.isEmpty(), "Lexer should not report issues for valid input");
assertTrue(diagnostics.isEmpty(), "Lexer should not report diagnostics for valid input");
}
}

View File

@ -1,9 +1,9 @@
package p.studio.compiler.pbs.parser;
import org.junit.jupiter.api.Test;
import p.studio.compiler.messages.BuildingIssueSink;
import p.studio.compiler.pbs.ast.PbsAst;
import p.studio.compiler.pbs.lexer.PbsLexer;
import p.studio.compiler.source.diagnostics.DiagnosticSink;
import p.studio.compiler.source.identifiers.FileId;
import static org.junit.jupiter.api.Assertions.assertEquals;
@ -19,12 +19,13 @@ class PbsParserTest {
return a + b;
}
""";
final var issues = BuildingIssueSink.empty();
final var tokens = PbsLexer.lex(source, "parser-test.pbs", issues);
final var diagnostics = DiagnosticSink.empty();
final var fileId = new FileId(0);
final var tokens = PbsLexer.lex(source, fileId, diagnostics);
final PbsAst.File ast = PbsParser.parse(tokens, new FileId(0), "parser-test.pbs", issues);
final PbsAst.File ast = PbsParser.parse(tokens, fileId, diagnostics);
assertTrue(issues.isEmpty(), "Parser should not report issues for valid function");
assertTrue(diagnostics.isEmpty(), "Parser should not report diagnostics for valid function");
assertEquals(1, ast.functions().size());
final var fn = ast.functions().getFirst();