implements PR006.3
This commit is contained in:
parent
42392f3d02
commit
e94f76db56
@ -1,42 +0,0 @@
|
||||
# PR-006.3 - PBS Syntax Completeness and Module Hygiene
|
||||
|
||||
## Briefing
|
||||
Depois do hardening sintatico principal, ainda restam lacunas de completude do contrato de sintaxe/modulo que afetam determinismo diagnostico e aderencia fina ao spec.
|
||||
Este PR fecha essas lacunas com foco em regras formais e higiene de modulo.
|
||||
|
||||
## Target
|
||||
- Specs:
|
||||
- `docs/pbs/specs/3. Core Syntax Specification.md` (secoes 5.1, 6.1.1, 8, 12)
|
||||
- `docs/pbs/specs/12. Diagnostics Specification.md` (phase = syntax/linking)
|
||||
- Codigo:
|
||||
- `prometeu-compiler/frontends/prometeu-frontend-pbs/src/main/java/p/studio/compiler/pbs/parser/PbsParser.java`
|
||||
- `.../pbs/parser/PbsExprParser.java`
|
||||
- `.../pbs/lexer/PbsLexer.java`
|
||||
- `.../pbs/linking/PbsModuleVisibilityValidator.java`
|
||||
|
||||
## Method
|
||||
1. Atributos (`AttrList`) em `.pbs`:
|
||||
- introduzir parse minimo de atributo no frontend;
|
||||
- em modulo ordinario, rejeitar com diagnostico especifico e recuperacao estavel.
|
||||
2. Regras de modulo:
|
||||
- validar erro quando modulo possui `mod.barrel` mas zero arquivos `.pbs`.
|
||||
3. Ajustes de forma sintatica:
|
||||
- aceitar trailing comma em `StructFieldList`;
|
||||
- aplicar limites de aridade: tupla tipo (1..6) e tupla literal (2..6).
|
||||
4. Lexer/string:
|
||||
- diagnosticar escape de string invalido de forma deterministica (sem aceitar silenciosamente).
|
||||
|
||||
## Acceptance Criteria
|
||||
- Uso de atributos em modulo ordinario gera erro deterministico com span primario no atributo.
|
||||
- Modulo sem `.pbs` e com `mod.barrel` nao passa silenciosamente.
|
||||
- `declare struct S(a: int,);` passa no parser.
|
||||
- Tupla tipo com mais de 6 campos falha deterministicamente.
|
||||
- Tupla literal com mais de 6 itens falha deterministicamente.
|
||||
- Escape invalido em string gera erro lexico dedicado.
|
||||
|
||||
## Tests
|
||||
- Novo teste de parser para atributos em `.pbs` com recuperacao e codigo estavel.
|
||||
- Novo teste de linking para modulo sem `.pbs`.
|
||||
- Testes de parser para trailing comma em struct fields.
|
||||
- Testes de parser para limites de aridade de tupla tipo/tupla literal.
|
||||
- Teste de lexer para escape invalido em string.
|
||||
@ -3,4 +3,5 @@ package p.studio.compiler.pbs.lexer;
|
||||
public enum LexErrors {
|
||||
E_LEX_INVALID_CHAR,
|
||||
E_LEX_UNTERMINATED_STRING,
|
||||
E_LEX_INVALID_STRING_ESCAPE,
|
||||
}
|
||||
|
||||
@ -173,10 +173,18 @@ public final class PbsLexer {
|
||||
|
||||
private void scanStringState() {
|
||||
while (!isAtEnd() && peek() != '"') {
|
||||
if (peek() == '\\' && !isAtEnd()) {
|
||||
if (peek() == '\\') {
|
||||
final var escapeStart = current;
|
||||
advance();
|
||||
if (!isAtEnd()) {
|
||||
advance();
|
||||
final var escaped = advance();
|
||||
if (!isValidStringEscape(escaped)) {
|
||||
report(
|
||||
LexErrors.E_LEX_INVALID_STRING_ESCAPE,
|
||||
"Invalid string escape '\\%s'".formatted(escaped),
|
||||
escapeStart,
|
||||
current);
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
@ -245,8 +253,24 @@ public final class PbsLexer {
|
||||
return c == '_' || Character.isAlphabetic(c) || Character.isDigit(c);
|
||||
}
|
||||
|
||||
private boolean isValidStringEscape(final char escaped) {
|
||||
return escaped == '\\'
|
||||
|| escaped == '"'
|
||||
|| escaped == 'n'
|
||||
|| escaped == 'r'
|
||||
|| escaped == 't';
|
||||
}
|
||||
|
||||
private void report(final LexErrors lexErrors, final String message) {
|
||||
diagnostics.error(lexErrors.name(), message, new Span(fileId, start, current));
|
||||
report(lexErrors, message, start, current);
|
||||
}
|
||||
|
||||
private void report(
|
||||
final LexErrors lexErrors,
|
||||
final String message,
|
||||
final long spanStart,
|
||||
final long spanEnd) {
|
||||
diagnostics.error(lexErrors.name(), message, new Span(fileId, spanStart, spanEnd));
|
||||
}
|
||||
|
||||
private static Map<String, PbsTokenKind> buildKeywords() {
|
||||
|
||||
@ -2,6 +2,7 @@ package p.studio.compiler.pbs.linking;
|
||||
|
||||
public enum PbsLinkErrors {
|
||||
E_LINK_MISSING_BARREL,
|
||||
E_LINK_BARREL_WITHOUT_SOURCE,
|
||||
E_LINK_INVALID_BARREL_FILENAME,
|
||||
E_LINK_DUPLICATE_BARREL_FILE,
|
||||
E_LINK_DUPLICATE_BARREL_ENTRY,
|
||||
|
||||
@ -42,6 +42,12 @@ public final class PbsModuleVisibilityValidator {
|
||||
final var exports = new ModuleExports();
|
||||
|
||||
if (module.sourceFiles().isEmpty()) {
|
||||
if (!module.barrelFiles().isEmpty()) {
|
||||
diagnostics.error(
|
||||
PbsLinkErrors.E_LINK_BARREL_WITHOUT_SOURCE.name(),
|
||||
"Module %s has mod.barrel but no .pbs source files".formatted(displayModule(module.coordinates())),
|
||||
module.barrelFiles().getFirst().ast().span());
|
||||
}
|
||||
return exports;
|
||||
}
|
||||
|
||||
|
||||
@ -19,4 +19,5 @@ public enum ParseErrors {
|
||||
E_PARSE_INVALID_HANDLE_FORM,
|
||||
E_PARSE_INVALID_TUPLE_LITERAL,
|
||||
E_PARSE_INVALID_PROPAGATE_OPERATOR,
|
||||
E_PARSE_ATTRIBUTES_NOT_ALLOWED,
|
||||
}
|
||||
|
||||
@ -14,6 +14,8 @@ import java.util.ArrayList;
|
||||
* Parser for `mod.barrel` files.
|
||||
*/
|
||||
public final class PbsBarrelParser {
|
||||
private static final int MAX_NAMED_TUPLE_ARITY = 6;
|
||||
|
||||
private final PbsTokenCursor cursor;
|
||||
private final FileId fileId;
|
||||
private final DiagnosticSink diagnostics;
|
||||
@ -242,6 +244,10 @@ public final class PbsBarrelParser {
|
||||
consume(PbsTokenKind.COLON, "Expected ':' after tuple field label");
|
||||
final var type = parseTypeRef();
|
||||
fields.add(new PbsAst.NamedTypeField(label.lexeme(), type, span(label.start(), type.span().getEnd())));
|
||||
if (fields.size() == MAX_NAMED_TUPLE_ARITY + 1) {
|
||||
report(label, ParseErrors.E_PARSE_INVALID_TYPE_SURFACE,
|
||||
"Named tuple type arity must be between 1 and 6 fields");
|
||||
}
|
||||
} while (cursor.match(PbsTokenKind.COMMA) && !cursor.check(PbsTokenKind.RIGHT_PAREN));
|
||||
|
||||
final var close = consume(PbsTokenKind.RIGHT_PAREN, "Expected ')' after named tuple type");
|
||||
|
||||
@ -14,6 +14,8 @@ import java.util.ArrayList;
|
||||
* Dedicated expression parser for PBS.
|
||||
*/
|
||||
final class PbsExprParser {
|
||||
private static final int MAX_TUPLE_LITERAL_ARITY = 6;
|
||||
|
||||
@FunctionalInterface
|
||||
interface BlockParserDelegate {
|
||||
PbsAst.Block parse(String message);
|
||||
@ -388,6 +390,10 @@ final class PbsExprParser {
|
||||
report(close, ParseErrors.E_PARSE_INVALID_TUPLE_LITERAL,
|
||||
"Single-slot tuple literal is not allowed in PBS core syntax");
|
||||
}
|
||||
if (items.size() > MAX_TUPLE_LITERAL_ARITY) {
|
||||
report(close, ParseErrors.E_PARSE_INVALID_TUPLE_LITERAL,
|
||||
"Tuple literal arity must be between 2 and 6 items");
|
||||
}
|
||||
return new PbsAst.TupleExpr(ReadOnlyList.wrap(items), span(open.start(), close.end()));
|
||||
}
|
||||
|
||||
@ -417,6 +423,10 @@ final class PbsExprParser {
|
||||
}
|
||||
|
||||
final var close = consume(PbsTokenKind.RIGHT_PAREN, "Expected ')' after tuple literal");
|
||||
if (items.size() > MAX_TUPLE_LITERAL_ARITY) {
|
||||
report(close, ParseErrors.E_PARSE_INVALID_TUPLE_LITERAL,
|
||||
"Tuple literal arity must be between 2 and 6 items");
|
||||
}
|
||||
if (hasLabels) {
|
||||
for (final var item : items) {
|
||||
if (item.label() == null) {
|
||||
@ -666,7 +676,7 @@ final class PbsExprParser {
|
||||
case 't' -> sb.append('\t');
|
||||
case '"' -> sb.append('"');
|
||||
case '\\' -> sb.append('\\');
|
||||
default -> sb.append(next);
|
||||
default -> sb.append('\\').append(next);
|
||||
}
|
||||
}
|
||||
return sb.toString();
|
||||
|
||||
@ -19,6 +19,8 @@ import java.util.HashSet;
|
||||
* navigation is delegated to {@link PbsTokenCursor}.
|
||||
*/
|
||||
public final class PbsParser {
|
||||
private static final int MAX_NAMED_TUPLE_ARITY = 6;
|
||||
|
||||
private final PbsTokenCursor cursor;
|
||||
private final PbsExprParser exprParser;
|
||||
private final FileId fileId;
|
||||
@ -53,6 +55,10 @@ public final class PbsParser {
|
||||
final var topDecls = new ArrayList<PbsAst.TopDecl>();
|
||||
|
||||
while (!cursor.isAtEnd()) {
|
||||
if (cursor.check(PbsTokenKind.LEFT_BRACKET)) {
|
||||
parseRejectedAttributeList();
|
||||
}
|
||||
|
||||
if (cursor.match(PbsTokenKind.IMPORT)) {
|
||||
imports.add(parseImport(cursor.previous()));
|
||||
continue;
|
||||
@ -265,11 +271,98 @@ public final class PbsParser {
|
||||
isPublic,
|
||||
isMutable,
|
||||
span(start.start(), typeRef.span().getEnd())));
|
||||
} while (cursor.match(PbsTokenKind.COMMA));
|
||||
} while (cursor.match(PbsTokenKind.COMMA) && !cursor.check(PbsTokenKind.RIGHT_PAREN));
|
||||
|
||||
return fields;
|
||||
}
|
||||
|
||||
private void parseRejectedAttributeList() {
|
||||
while (cursor.check(PbsTokenKind.LEFT_BRACKET)) {
|
||||
final var attributeSpan = parseAttribute();
|
||||
diagnostics.error(
|
||||
ParseErrors.E_PARSE_ATTRIBUTES_NOT_ALLOWED.name(),
|
||||
"Attributes are not allowed in ordinary .pbs source modules",
|
||||
attributeSpan);
|
||||
}
|
||||
}
|
||||
|
||||
private Span parseAttribute() {
|
||||
final var leftBracket = consume(PbsTokenKind.LEFT_BRACKET, "Expected '[' to start attribute");
|
||||
final var name = consume(PbsTokenKind.IDENTIFIER, "Expected attribute identifier");
|
||||
if (cursor.match(PbsTokenKind.LEFT_PAREN)) {
|
||||
parseAttributeArguments();
|
||||
}
|
||||
|
||||
long end = Math.max(leftBracket.end(), name.end());
|
||||
if (cursor.match(PbsTokenKind.RIGHT_BRACKET)) {
|
||||
end = cursor.previous().end();
|
||||
} else {
|
||||
report(cursor.peek(), ParseErrors.E_PARSE_EXPECTED_TOKEN, "Expected ']' to close attribute");
|
||||
end = recoverUntilAttributeCloseOrTopLevel(end);
|
||||
}
|
||||
return span(leftBracket.start(), end);
|
||||
}
|
||||
|
||||
private void parseAttributeArguments() {
|
||||
if (!cursor.check(PbsTokenKind.RIGHT_PAREN)) {
|
||||
do {
|
||||
parseAttributeArgument();
|
||||
} while (cursor.match(PbsTokenKind.COMMA) && !cursor.check(PbsTokenKind.RIGHT_PAREN));
|
||||
}
|
||||
|
||||
if (cursor.match(PbsTokenKind.RIGHT_PAREN)) {
|
||||
return;
|
||||
}
|
||||
report(cursor.peek(), ParseErrors.E_PARSE_EXPECTED_TOKEN, "Expected ')' after attribute arguments");
|
||||
recoverUntilAttributeArgumentClose();
|
||||
}
|
||||
|
||||
private void parseAttributeArgument() {
|
||||
consume(PbsTokenKind.IDENTIFIER, "Expected attribute argument name");
|
||||
consume(PbsTokenKind.EQUAL, "Expected '=' in attribute argument");
|
||||
parseAttributeValue();
|
||||
}
|
||||
|
||||
private void parseAttributeValue() {
|
||||
if (cursor.match(PbsTokenKind.STRING_LITERAL, PbsTokenKind.INT_LITERAL, PbsTokenKind.TRUE, PbsTokenKind.FALSE)) {
|
||||
return;
|
||||
}
|
||||
|
||||
report(cursor.peek(), ParseErrors.E_PARSE_EXPECTED_TOKEN, "Expected attribute value (string, int, or bool)");
|
||||
if (!cursor.isAtEnd()
|
||||
&& !cursor.check(PbsTokenKind.COMMA)
|
||||
&& !cursor.check(PbsTokenKind.RIGHT_PAREN)
|
||||
&& !cursor.check(PbsTokenKind.RIGHT_BRACKET)) {
|
||||
cursor.advance();
|
||||
}
|
||||
}
|
||||
|
||||
private long recoverUntilAttributeCloseOrTopLevel(final long fallbackEnd) {
|
||||
long end = fallbackEnd;
|
||||
while (!cursor.isAtEnd()) {
|
||||
if (cursor.match(PbsTokenKind.RIGHT_BRACKET)) {
|
||||
return cursor.previous().end();
|
||||
}
|
||||
if (isTopLevelRestartToken(cursor.peek().kind())) {
|
||||
return end;
|
||||
}
|
||||
end = cursor.advance().end();
|
||||
}
|
||||
return end;
|
||||
}
|
||||
|
||||
private void recoverUntilAttributeArgumentClose() {
|
||||
while (!cursor.isAtEnd()) {
|
||||
if (cursor.match(PbsTokenKind.RIGHT_PAREN)) {
|
||||
return;
|
||||
}
|
||||
if (cursor.check(PbsTokenKind.RIGHT_BRACKET) || isTopLevelRestartToken(cursor.peek().kind())) {
|
||||
return;
|
||||
}
|
||||
cursor.advance();
|
||||
}
|
||||
}
|
||||
|
||||
private StructBodyParse parseStructBodyAndConsumeRightBrace(final PbsToken leftBrace) {
|
||||
final var methods = new ArrayList<PbsAst.FunctionDecl>();
|
||||
final var ctors = new ArrayList<PbsAst.CtorDecl>();
|
||||
@ -654,6 +747,10 @@ public final class PbsParser {
|
||||
label.lexeme(),
|
||||
typeRef,
|
||||
span(label.start(), typeRef.span().getEnd())));
|
||||
if (fields.size() == MAX_NAMED_TUPLE_ARITY + 1) {
|
||||
report(label, ParseErrors.E_PARSE_INVALID_TYPE_SURFACE,
|
||||
"Named tuple type arity must be between 1 and 6 fields");
|
||||
}
|
||||
} while (cursor.match(PbsTokenKind.COMMA) && !cursor.check(PbsTokenKind.RIGHT_PAREN));
|
||||
return fields;
|
||||
}
|
||||
@ -1013,10 +1110,7 @@ public final class PbsParser {
|
||||
*/
|
||||
private void synchronizeTopLevel() {
|
||||
while (!cursor.isAtEnd()) {
|
||||
if (cursor.check(PbsTokenKind.FN)
|
||||
|| cursor.check(PbsTokenKind.IMPORT)
|
||||
|| cursor.check(PbsTokenKind.DECLARE)
|
||||
|| cursor.check(PbsTokenKind.IMPLEMENTS)) {
|
||||
if (isTopLevelRestartToken(cursor.peek().kind())) {
|
||||
return;
|
||||
}
|
||||
if (cursor.match(PbsTokenKind.SEMICOLON)) {
|
||||
@ -1026,6 +1120,14 @@ public final class PbsParser {
|
||||
}
|
||||
}
|
||||
|
||||
private boolean isTopLevelRestartToken(final PbsTokenKind kind) {
|
||||
return kind == PbsTokenKind.FN
|
||||
|| kind == PbsTokenKind.IMPORT
|
||||
|| kind == PbsTokenKind.DECLARE
|
||||
|| kind == PbsTokenKind.IMPLEMENTS
|
||||
|| kind == PbsTokenKind.LEFT_BRACKET;
|
||||
}
|
||||
|
||||
/**
|
||||
* Consumes a required token and reports an error if it is missing.
|
||||
*/
|
||||
|
||||
@ -112,6 +112,17 @@ class PbsLexerTest {
|
||||
diagnostics.stream().findFirst().orElseThrow().getCode());
|
||||
}
|
||||
|
||||
@Test
|
||||
void shouldReportInvalidStringEscape() {
|
||||
final var source = "\"bad\\q\"";
|
||||
final var diagnostics = DiagnosticSink.empty();
|
||||
|
||||
PbsLexer.lex(source, new FileId(0), diagnostics);
|
||||
|
||||
assertTrue(diagnostics.hasErrors(), "Lexer should report invalid string escapes");
|
||||
assertTrue(diagnostics.stream().anyMatch(d -> d.getCode().equals(LexErrors.E_LEX_INVALID_STRING_ESCAPE.name())));
|
||||
}
|
||||
|
||||
@Test
|
||||
void shouldReportInvalidCharacter() {
|
||||
final var source = "fn a() { ~ }";
|
||||
|
||||
@ -33,6 +33,19 @@ class PbsModuleVisibilityTest {
|
||||
assertTrue(diagnostics.stream().anyMatch(d -> d.getCode().equals(PbsLinkErrors.E_LINK_MISSING_BARREL.name())));
|
||||
}
|
||||
|
||||
@Test
|
||||
void shouldReportBarrelWithoutSourceFiles() {
|
||||
final var diagnostics = DiagnosticSink.empty();
|
||||
final var nextFileId = new AtomicInteger(0);
|
||||
final var module = module("core", "math", List.of(), """
|
||||
pub fn run() -> int;
|
||||
""", nextFileId, diagnostics);
|
||||
|
||||
new PbsModuleVisibilityValidator().validate(ReadOnlyList.wrap(List.of(module)), diagnostics);
|
||||
|
||||
assertTrue(diagnostics.stream().anyMatch(d -> d.getCode().equals(PbsLinkErrors.E_LINK_BARREL_WITHOUT_SOURCE.name())));
|
||||
}
|
||||
|
||||
@Test
|
||||
void shouldReportDuplicateBarrelEntriesByKindAndSignature() {
|
||||
final var diagnostics = DiagnosticSink.empty();
|
||||
|
||||
@ -166,6 +166,7 @@ class PbsExprParserTest {
|
||||
value?;
|
||||
(a: 1);
|
||||
(a: 1, 2);
|
||||
(1, 2, 3, 4, 5, 6, 7);
|
||||
if a { 1; };
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -121,6 +121,38 @@ class PbsParserTest {
|
||||
assertEquals(1, implementsDecl.methods().size());
|
||||
}
|
||||
|
||||
@Test
|
||||
void shouldRejectAttributesInOrdinarySourceAndRecover() {
|
||||
final var source = """
|
||||
[Host(module = "gfx", name = "draw", version = 1)]
|
||||
fn run() -> int { return 1; }
|
||||
declare struct Point(x: int,);
|
||||
""";
|
||||
final var diagnostics = DiagnosticSink.empty();
|
||||
final var fileId = new FileId(0);
|
||||
|
||||
final PbsAst.File ast = PbsParser.parse(PbsLexer.lex(source, fileId, diagnostics), fileId, diagnostics);
|
||||
|
||||
assertEquals(2, ast.topDecls().size());
|
||||
assertInstanceOf(PbsAst.FunctionDecl.class, ast.topDecls().get(0));
|
||||
assertInstanceOf(PbsAst.StructDecl.class, ast.topDecls().get(1));
|
||||
assertTrue(diagnostics.stream().anyMatch(d -> d.getCode().equals(ParseErrors.E_PARSE_ATTRIBUTES_NOT_ALLOWED.name())));
|
||||
}
|
||||
|
||||
@Test
|
||||
void shouldAcceptStructFieldTrailingComma() {
|
||||
final var source = "declare struct S(a: int,);";
|
||||
final var diagnostics = DiagnosticSink.empty();
|
||||
final var fileId = new FileId(0);
|
||||
|
||||
final PbsAst.File ast = PbsParser.parse(PbsLexer.lex(source, fileId, diagnostics), fileId, diagnostics);
|
||||
|
||||
assertTrue(diagnostics.isEmpty(), "Struct field trailing comma should be accepted");
|
||||
final var decl = assertInstanceOf(PbsAst.StructDecl.class, ast.topDecls().getFirst());
|
||||
assertEquals(1, decl.fields().size());
|
||||
assertEquals("a", decl.fields().getFirst().name());
|
||||
}
|
||||
|
||||
@Test
|
||||
void shouldParseOptionalAndNamedTupleTypes() {
|
||||
final var source = """
|
||||
@ -148,6 +180,21 @@ class PbsParserTest {
|
||||
assertEquals(PbsAst.TypeRefKind.NAMED_TUPLE, returnType.kind());
|
||||
}
|
||||
|
||||
@Test
|
||||
void shouldRejectNamedTupleTypeWithArityAboveSix() {
|
||||
final var source = """
|
||||
fn tooWide(v: (a: int, b: int, c: int, d: int, e: int, f: int, g: int)) -> int {
|
||||
return 0;
|
||||
}
|
||||
""";
|
||||
final var diagnostics = DiagnosticSink.empty();
|
||||
final var fileId = new FileId(0);
|
||||
|
||||
PbsParser.parse(PbsLexer.lex(source, fileId, diagnostics), fileId, diagnostics);
|
||||
|
||||
assertTrue(diagnostics.stream().anyMatch(d -> d.getCode().equals(ParseErrors.E_PARSE_INVALID_TYPE_SURFACE.name())));
|
||||
}
|
||||
|
||||
@Test
|
||||
void shouldRejectReservedDeclareHostAndBuiltinType() {
|
||||
final var source = """
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user