diff --git a/prometeu-compiler/frontends/prometeu-frontend-pbs/src/main/java/p/studio/compiler/pbs/parser/PbsExprControlFlowParser.java b/prometeu-compiler/frontends/prometeu-frontend-pbs/src/main/java/p/studio/compiler/pbs/parser/PbsExprControlFlowParser.java new file mode 100644 index 00000000..093e52cf --- /dev/null +++ b/prometeu-compiler/frontends/prometeu-frontend-pbs/src/main/java/p/studio/compiler/pbs/parser/PbsExprControlFlowParser.java @@ -0,0 +1,284 @@ +package p.studio.compiler.pbs.parser; + +import p.studio.compiler.pbs.ast.PbsAst; +import p.studio.compiler.pbs.lexer.PbsToken; +import p.studio.compiler.pbs.lexer.PbsTokenKind; +import p.studio.utilities.structures.ReadOnlyList; + +import java.util.ArrayList; + +final class PbsExprControlFlowParser { + @FunctionalInterface + interface ExpressionParserDelegate { + PbsAst.Expression parse(); + } + + private final PbsExprParserContext context; + private final PbsTokenCursor cursor; + private final ExpressionParserDelegate expressionParserDelegate; + private final ExpressionParserDelegate precedenceParserDelegate; + + PbsExprControlFlowParser( + final PbsExprParserContext context, + final ExpressionParserDelegate expressionParserDelegate, + final ExpressionParserDelegate precedenceParserDelegate) { + this.context = context; + this.cursor = context.cursor(); + this.expressionParserDelegate = expressionParserDelegate; + this.precedenceParserDelegate = precedenceParserDelegate; + } + + PbsAst.Expression parseHandle() { + if (!cursor.match(PbsTokenKind.HANDLE)) { + return parseElse(); + } + + final var handleToken = cursor.previous(); + final var value = parseElse(); + context.consume(PbsTokenKind.LEFT_BRACE, "Expected '{' after handle expression"); + + final var arms = new ArrayList(); + while (!cursor.check(PbsTokenKind.RIGHT_BRACE) && !cursor.isAtEnd()) { + final var pattern = parseHandlePattern(); + context.consume(PbsTokenKind.ARROW, "Expected '->' in handle arm"); + + PbsAst.ErrorPath remapTarget = null; + PbsAst.Block block = null; + long armEnd; + if (cursor.check(PbsTokenKind.LEFT_BRACE)) { + block = context.parseSurfaceBlock("Expected handle arm block"); + armEnd = block.span().getEnd(); + } else { + remapTarget = parseErrorPath(); + armEnd = remapTarget.span().getEnd(); + } + + arms.add(new PbsAst.HandleArm( + pattern, + remapTarget, + block, + context.span(pattern.span().getStart(), armEnd))); + + if (!cursor.match(PbsTokenKind.COMMA)) { + break; + } + } + + final var close = context.consume(PbsTokenKind.RIGHT_BRACE, "Expected '}' to close handle map"); + return new PbsAst.HandleExpr(value, ReadOnlyList.wrap(arms), context.span(handleToken.start(), close.end())); + } + + PbsAst.Expression parseElse() { + var expression = parseIfExpression(); + if (cursor.match(PbsTokenKind.ELSE)) { + final var fallback = parseElse(); + expression = new PbsAst.ElseExpr( + expression, + fallback, + context.span(expression.span().getStart(), fallback.span().getEnd())); + } + return expression; + } + + PbsAst.Expression parseIfExpression() { + if (!cursor.match(PbsTokenKind.IF)) { + return parseSwitchExpression(); + } + return parseIfExpressionFromToken(cursor.previous()); + } + + PbsAst.IfExpr parseIfExpressionFromToken(final PbsToken ifToken) { + final var condition = expressionParserDelegate.parse(); + final var thenBlock = context.parseSurfaceBlock("Expected '{' after if condition"); + + if (!cursor.match(PbsTokenKind.ELSE)) { + context.report(cursor.peek(), ParseErrors.E_PARSE_UNEXPECTED_TOKEN, + "If expression requires an else branch"); + final var unit = new PbsAst.UnitExpr(context.span(thenBlock.span().getEnd(), thenBlock.span().getEnd())); + return new PbsAst.IfExpr(condition, thenBlock, unit, context.span(ifToken.start(), unit.span().getEnd())); + } + + final PbsAst.Expression elseExpression; + if (cursor.check(PbsTokenKind.IF)) { + elseExpression = parseIfExpressionFromToken(cursor.advance()); + } else { + final var elseBlock = context.parseSurfaceBlock("Expected '{' after else"); + elseExpression = new PbsAst.BlockExpr(elseBlock, elseBlock.span()); + } + + return new PbsAst.IfExpr( + condition, + thenBlock, + elseExpression, + context.span(ifToken.start(), elseExpression.span().getEnd())); + } + + PbsAst.Expression parseSwitchExpression() { + if (!cursor.match(PbsTokenKind.SWITCH)) { + return precedenceParserDelegate.parse(); + } + + final var switchToken = cursor.previous(); + final var selector = expressionParserDelegate.parse(); + context.consume(PbsTokenKind.LEFT_BRACE, "Expected '{' after switch selector"); + + final var arms = new ArrayList(); + while (!cursor.check(PbsTokenKind.RIGHT_BRACE) && !cursor.isAtEnd()) { + final var pattern = parseSwitchPattern(); + context.consume(PbsTokenKind.COLON, "Expected ':' after switch pattern"); + final var block = context.parseSurfaceBlock("Expected switch arm block"); + arms.add(new PbsAst.SwitchArm(pattern, block, context.span(pattern.span().getStart(), block.span().getEnd()))); + + if (!cursor.match(PbsTokenKind.COMMA)) { + break; + } + } + + final var close = context.consume(PbsTokenKind.RIGHT_BRACE, "Expected '}' to close switch expression"); + return new PbsAst.SwitchExpr(selector, ReadOnlyList.wrap(arms), context.span(switchToken.start(), close.end())); + } + + PbsAst.SwitchPattern parseSwitchPattern() { + if (cursor.match(PbsTokenKind.DEFAULT)) { + return new PbsAst.WildcardSwitchPattern( + PbsAst.WildcardSwitchKind.DEFAULT, + context.span(cursor.previous().start(), cursor.previous().end())); + } + + if (cursor.check(PbsTokenKind.IDENTIFIER) && "_".equals(cursor.peek().lexeme())) { + final var wildcard = cursor.advance(); + return new PbsAst.WildcardSwitchPattern( + PbsAst.WildcardSwitchKind.UNDERSCORE, + context.span(wildcard.start(), wildcard.end())); + } + + if (cursor.check(PbsTokenKind.IDENTIFIER) + && cursor.peek(1).kind() == PbsTokenKind.DOT + && cursor.peek(2).kind() == PbsTokenKind.IDENTIFIER) { + final var path = parseErrorPath(); + return new PbsAst.EnumCaseSwitchPattern(path, path.span()); + } + + final var literal = parseLiteralPatternExpression(); + if (literal != null) { + return new PbsAst.LiteralSwitchPattern(literal, literal.span()); + } + + final var token = cursor.peek(); + context.report(token, ParseErrors.E_PARSE_INVALID_SWITCH_FORM, "Invalid switch pattern"); + cursor.advance(); + return new PbsAst.WildcardSwitchPattern( + PbsAst.WildcardSwitchKind.RECOVERY, + context.span(token.start(), token.end())); + } + + PbsAst.Expression parseLiteralPatternExpression() { + if (cursor.match(PbsTokenKind.TRUE)) { + final var token = cursor.previous(); + return new PbsAst.BoolLiteralExpr(true, context.span(token.start(), token.end())); + } + if (cursor.match(PbsTokenKind.FALSE)) { + final var token = cursor.previous(); + return new PbsAst.BoolLiteralExpr(false, context.span(token.start(), token.end())); + } + if (cursor.match(PbsTokenKind.INT_LITERAL)) { + final var token = cursor.previous(); + return new PbsAst.IntLiteralExpr(parseLongOrDefault(token.lexeme()), context.span(token.start(), token.end())); + } + if (cursor.match(PbsTokenKind.FLOAT_LITERAL)) { + final var token = cursor.previous(); + return new PbsAst.FloatLiteralExpr(parseDoubleOrDefault(token.lexeme()), context.span(token.start(), token.end())); + } + if (cursor.match(PbsTokenKind.BOUNDED_LITERAL)) { + final var token = cursor.previous(); + final var raw = token.lexeme().substring(0, Math.max(token.lexeme().length() - 1, 0)); + return new PbsAst.BoundedLiteralExpr(parseIntOrDefault(raw), context.span(token.start(), token.end())); + } + if (cursor.match(PbsTokenKind.STRING_LITERAL)) { + final var token = cursor.previous(); + return new PbsAst.StringLiteralExpr(unescapeString(token.lexeme()), context.span(token.start(), token.end())); + } + return null; + } + + PbsAst.HandlePattern parseHandlePattern() { + if (cursor.check(PbsTokenKind.IDENTIFIER) && "_".equals(cursor.peek().lexeme())) { + final var wildcard = cursor.advance(); + return new PbsAst.WildcardHandlePattern(context.span(wildcard.start(), wildcard.end())); + } + + if (cursor.check(PbsTokenKind.IDENTIFIER)) { + final var path = parseErrorPath(); + return new PbsAst.ErrorPathHandlePattern(path, path.span()); + } + + final var token = cursor.peek(); + context.report(token, ParseErrors.E_PARSE_INVALID_HANDLE_FORM, "Invalid handle pattern"); + if (!cursor.isAtEnd()) { + cursor.advance(); + } + return new PbsAst.WildcardHandlePattern(context.span(token.start(), token.end())); + } + + PbsAst.ErrorPath parseErrorPath() { + final var first = context.consume(PbsTokenKind.IDENTIFIER, "Expected identifier in error path"); + final var segments = new ArrayList(); + segments.add(first.lexeme()); + var end = first.end(); + while (cursor.match(PbsTokenKind.DOT)) { + final var segment = context.consume(PbsTokenKind.IDENTIFIER, "Expected identifier after '.' in error path"); + segments.add(segment.lexeme()); + end = segment.end(); + } + return new PbsAst.ErrorPath(ReadOnlyList.wrap(segments), context.span(first.start(), end)); + } + + private long parseLongOrDefault(final String text) { + try { + return Long.parseLong(text); + } catch (NumberFormatException ignored) { + return 0L; + } + } + + private int parseIntOrDefault(final String text) { + try { + return Integer.parseInt(text); + } catch (NumberFormatException ignored) { + return 0; + } + } + + private double parseDoubleOrDefault(final String text) { + try { + return Double.parseDouble(text); + } catch (NumberFormatException ignored) { + return 0.0; + } + } + + private String unescapeString(final String lexeme) { + if (lexeme.length() < 2) { + return ""; + } + final var raw = lexeme.substring(1, lexeme.length() - 1); + final var sb = new StringBuilder(raw.length()); + for (int i = 0; i < raw.length(); i++) { + final char c = raw.charAt(i); + if (c != '\\' || i + 1 >= raw.length()) { + sb.append(c); + continue; + } + final char next = raw.charAt(++i); + switch (next) { + case 'n' -> sb.append('\n'); + case 'r' -> sb.append('\r'); + case 't' -> sb.append('\t'); + case '"' -> sb.append('"'); + case '\\' -> sb.append('\\'); + default -> sb.append('\\').append(next); + } + } + return sb.toString(); + } +} diff --git a/prometeu-compiler/frontends/prometeu-frontend-pbs/src/main/java/p/studio/compiler/pbs/parser/PbsExprParser.java b/prometeu-compiler/frontends/prometeu-frontend-pbs/src/main/java/p/studio/compiler/pbs/parser/PbsExprParser.java index 85c925f2..4fb7ae3c 100644 --- a/prometeu-compiler/frontends/prometeu-frontend-pbs/src/main/java/p/studio/compiler/pbs/parser/PbsExprParser.java +++ b/prometeu-compiler/frontends/prometeu-frontend-pbs/src/main/java/p/studio/compiler/pbs/parser/PbsExprParser.java @@ -21,19 +21,21 @@ final class PbsExprParser { private final PbsExprParserContext context; private final PbsTokenCursor cursor; + private final PbsExprControlFlowParser controlFlowParser; PbsExprParser( final PbsParserContext parserContext, final BlockParserDelegate blockParserDelegate) { this.context = new PbsExprParserContext(parserContext, blockParserDelegate); this.cursor = context.cursor(); + this.controlFlowParser = new PbsExprControlFlowParser(context, this::parseExpression, this::parsePrecedenceExpression); } /** * Entry point for expression parsing. */ PbsAst.Expression parseExpression() { - return parseHandle(); + return controlFlowParser.parseHandle(); } private PbsAst.Expression parsePrecedenceExpression() { @@ -41,116 +43,26 @@ final class PbsExprParser { } private PbsAst.Expression parseHandle() { - if (!cursor.match(PbsTokenKind.HANDLE)) { - return parseElse(); - } - - final var handleToken = cursor.previous(); - final var value = parseElse(); - consume(PbsTokenKind.LEFT_BRACE, "Expected '{' after handle expression"); - - final var arms = new ArrayList(); - while (!cursor.check(PbsTokenKind.RIGHT_BRACE) && !cursor.isAtEnd()) { - final var pattern = parseHandlePattern(); - consume(PbsTokenKind.ARROW, "Expected '->' in handle arm"); - - PbsAst.ErrorPath remapTarget = null; - PbsAst.Block block = null; - long armEnd; - if (cursor.check(PbsTokenKind.LEFT_BRACE)) { - block = parseSurfaceBlock("Expected handle arm block"); - armEnd = block.span().getEnd(); - } else { - remapTarget = parseErrorPath(); - armEnd = remapTarget.span().getEnd(); - } - - arms.add(new PbsAst.HandleArm( - pattern, - remapTarget, - block, - span(pattern.span().getStart(), armEnd))); - - if (!cursor.match(PbsTokenKind.COMMA)) { - break; - } - } - - final var close = consume(PbsTokenKind.RIGHT_BRACE, "Expected '}' to close handle map"); - return new PbsAst.HandleExpr(value, ReadOnlyList.wrap(arms), span(handleToken.start(), close.end())); + return controlFlowParser.parseHandle(); } /** * Parses right-associative else extraction: {@code a else (b else c)}. */ private PbsAst.Expression parseElse() { - var expression = parseIfExpression(); - if (cursor.match(PbsTokenKind.ELSE)) { - final var fallback = parseElse(); - expression = new PbsAst.ElseExpr( - expression, - fallback, - span(expression.span().getStart(), fallback.span().getEnd())); - } - return expression; + return controlFlowParser.parseElse(); } private PbsAst.Expression parseIfExpression() { - if (!cursor.match(PbsTokenKind.IF)) { - return parseSwitchExpression(); - } - return parseIfExpressionFromToken(cursor.previous()); + return controlFlowParser.parseIfExpression(); } private PbsAst.IfExpr parseIfExpressionFromToken(final PbsToken ifToken) { - final var condition = parseExpression(); - final var thenBlock = parseSurfaceBlock("Expected '{' after if condition"); - - if (!cursor.match(PbsTokenKind.ELSE)) { - report(cursor.peek(), ParseErrors.E_PARSE_UNEXPECTED_TOKEN, - "If expression requires an else branch"); - final var unit = new PbsAst.UnitExpr(span(thenBlock.span().getEnd(), thenBlock.span().getEnd())); - return new PbsAst.IfExpr(condition, thenBlock, unit, span(ifToken.start(), unit.span().getEnd())); - } - - final PbsAst.Expression elseExpression; - if (cursor.check(PbsTokenKind.IF)) { - elseExpression = parseIfExpressionFromToken(cursor.advance()); - } else { - final var elseBlock = parseSurfaceBlock("Expected '{' after else"); - elseExpression = new PbsAst.BlockExpr(elseBlock, elseBlock.span()); - } - - return new PbsAst.IfExpr( - condition, - thenBlock, - elseExpression, - span(ifToken.start(), elseExpression.span().getEnd())); + return controlFlowParser.parseIfExpressionFromToken(ifToken); } private PbsAst.Expression parseSwitchExpression() { - if (!cursor.match(PbsTokenKind.SWITCH)) { - return parsePrecedenceExpression(); - } - - final var switchToken = cursor.previous(); - final var selector = parseExpression(); - consume(PbsTokenKind.LEFT_BRACE, "Expected '{' after switch selector"); - - final var arms = new ArrayList(); - while (!cursor.check(PbsTokenKind.RIGHT_BRACE) && !cursor.isAtEnd()) { - final var pattern = parseSwitchPattern(); - consume(PbsTokenKind.COLON, "Expected ':' after switch pattern"); - final var block = parseSurfaceBlock("Expected switch arm block"); - arms.add(new PbsAst.SwitchArm(pattern, block, span(pattern.span().getStart(), block.span().getEnd()))); - - if (!cursor.match(PbsTokenKind.COMMA)) { - break; - } - } - - final var close = consume(PbsTokenKind.RIGHT_BRACE, "Expected '}' to close switch expression"); - return new PbsAst.SwitchExpr(selector, ReadOnlyList.wrap(arms), span(switchToken.start(), close.end())); + return controlFlowParser.parseSwitchExpression(); } /** @@ -517,98 +429,19 @@ final class PbsExprParser { } private PbsAst.SwitchPattern parseSwitchPattern() { - if (cursor.match(PbsTokenKind.DEFAULT)) { - return new PbsAst.WildcardSwitchPattern( - PbsAst.WildcardSwitchKind.DEFAULT, - span(cursor.previous().start(), cursor.previous().end())); - } - - if (cursor.check(PbsTokenKind.IDENTIFIER) && "_".equals(cursor.peek().lexeme())) { - final var wildcard = cursor.advance(); - return new PbsAst.WildcardSwitchPattern( - PbsAst.WildcardSwitchKind.UNDERSCORE, - span(wildcard.start(), wildcard.end())); - } - - if (cursor.check(PbsTokenKind.IDENTIFIER) - && cursor.peek(1).kind() == PbsTokenKind.DOT - && cursor.peek(2).kind() == PbsTokenKind.IDENTIFIER) { - final var path = parseErrorPath(); - return new PbsAst.EnumCaseSwitchPattern(path, path.span()); - } - - final var literal = parseLiteralPatternExpression(); - if (literal != null) { - return new PbsAst.LiteralSwitchPattern(literal, literal.span()); - } - - final var token = cursor.peek(); - report(token, ParseErrors.E_PARSE_INVALID_SWITCH_FORM, "Invalid switch pattern"); - cursor.advance(); - return new PbsAst.WildcardSwitchPattern( - PbsAst.WildcardSwitchKind.RECOVERY, - span(token.start(), token.end())); + return controlFlowParser.parseSwitchPattern(); } private PbsAst.Expression parseLiteralPatternExpression() { - if (cursor.match(PbsTokenKind.TRUE)) { - final var token = cursor.previous(); - return new PbsAst.BoolLiteralExpr(true, span(token.start(), token.end())); - } - if (cursor.match(PbsTokenKind.FALSE)) { - final var token = cursor.previous(); - return new PbsAst.BoolLiteralExpr(false, span(token.start(), token.end())); - } - if (cursor.match(PbsTokenKind.INT_LITERAL)) { - final var token = cursor.previous(); - return new PbsAst.IntLiteralExpr(parseLongOrDefault(token.lexeme()), span(token.start(), token.end())); - } - if (cursor.match(PbsTokenKind.FLOAT_LITERAL)) { - final var token = cursor.previous(); - return new PbsAst.FloatLiteralExpr(parseDoubleOrDefault(token.lexeme()), span(token.start(), token.end())); - } - if (cursor.match(PbsTokenKind.BOUNDED_LITERAL)) { - final var token = cursor.previous(); - final var raw = token.lexeme().substring(0, Math.max(token.lexeme().length() - 1, 0)); - return new PbsAst.BoundedLiteralExpr(parseIntOrDefault(raw), span(token.start(), token.end())); - } - if (cursor.match(PbsTokenKind.STRING_LITERAL)) { - final var token = cursor.previous(); - return new PbsAst.StringLiteralExpr(unescapeString(token.lexeme()), span(token.start(), token.end())); - } - return null; + return controlFlowParser.parseLiteralPatternExpression(); } private PbsAst.HandlePattern parseHandlePattern() { - if (cursor.check(PbsTokenKind.IDENTIFIER) && "_".equals(cursor.peek().lexeme())) { - final var wildcard = cursor.advance(); - return new PbsAst.WildcardHandlePattern(span(wildcard.start(), wildcard.end())); - } - - if (cursor.check(PbsTokenKind.IDENTIFIER)) { - final var path = parseErrorPath(); - return new PbsAst.ErrorPathHandlePattern(path, path.span()); - } - - final var token = cursor.peek(); - report(token, ParseErrors.E_PARSE_INVALID_HANDLE_FORM, "Invalid handle pattern"); - if (!cursor.isAtEnd()) { - cursor.advance(); - } - return new PbsAst.WildcardHandlePattern(span(token.start(), token.end())); + return controlFlowParser.parseHandlePattern(); } private PbsAst.ErrorPath parseErrorPath() { - final var first = consume(PbsTokenKind.IDENTIFIER, "Expected identifier in error path"); - final var segments = new ArrayList(); - segments.add(first.lexeme()); - var end = first.end(); - while (cursor.match(PbsTokenKind.DOT)) { - final var segment = consume(PbsTokenKind.IDENTIFIER, "Expected identifier after '.' in error path"); - segments.add(segment.lexeme()); - end = segment.end(); - } - return new PbsAst.ErrorPath(ReadOnlyList.wrap(segments), span(first.start(), end)); + return controlFlowParser.parseErrorPath(); } private PbsAst.Block parseSurfaceBlock(final String message) {