From 04f65f46b3b2debc4486172b16359f0aa19792b9 Mon Sep 17 00:00:00 2001 From: bQUARKz Date: Thu, 5 Mar 2026 09:49:19 +0000 Subject: [PATCH] implements PR001 --- .../p/studio/compiler/pbs/lexer/PbsLexer.java | 67 ++++++++++++-- .../compiler/pbs/lexer/PbsTokenKind.java | 73 +++++++++++---- .../compiler/pbs/parser/PbsTokenCursor.java | 3 + .../compiler/pbs/lexer/PbsLexerTest.java | 91 +++++++++++++++++++ 4 files changed, 204 insertions(+), 30 deletions(-) diff --git a/prometeu-compiler/frontends/prometeu-frontend-pbs/src/main/java/p/studio/compiler/pbs/lexer/PbsLexer.java b/prometeu-compiler/frontends/prometeu-frontend-pbs/src/main/java/p/studio/compiler/pbs/lexer/PbsLexer.java index d63520d3..8f7fe2b0 100644 --- a/prometeu-compiler/frontends/prometeu-frontend-pbs/src/main/java/p/studio/compiler/pbs/lexer/PbsLexer.java +++ b/prometeu-compiler/frontends/prometeu-frontend-pbs/src/main/java/p/studio/compiler/pbs/lexer/PbsLexer.java @@ -77,16 +77,23 @@ public final class PbsLexer { case ':' -> addToken(PbsTokenKind.COLON); case ';' -> addToken(PbsTokenKind.SEMICOLON); case '@' -> addToken(PbsTokenKind.AT); - case '+' -> addToken(PbsTokenKind.PLUS); - case '-' -> addToken(PbsTokenKind.MINUS); - case '*' -> addToken(PbsTokenKind.STAR); - case '%' -> addToken(PbsTokenKind.PERCENT); + case '?' -> addToken(PbsTokenKind.QUESTION); + case '+' -> addToken(match('=') ? PbsTokenKind.PLUS_EQUAL : PbsTokenKind.PLUS); + case '-' -> { + if (match('>')) { + addToken(PbsTokenKind.ARROW); + return; + } + addToken(match('=') ? PbsTokenKind.MINUS_EQUAL : PbsTokenKind.MINUS); + } + case '*' -> addToken(match('=') ? PbsTokenKind.STAR_EQUAL : PbsTokenKind.STAR); + case '%' -> addToken(match('=') ? PbsTokenKind.PERCENT_EQUAL : PbsTokenKind.PERCENT); case '.' -> { if (match('.')) { addToken(PbsTokenKind.DOT_DOT); return; } - report(LexErrors.E_LEX_INVALID_CHAR, "Unexpected '.'"); + addToken(PbsTokenKind.DOT); } case '!' -> addToken(match('=') ? PbsTokenKind.BANG_EQUAL : PbsTokenKind.BANG); case '=' -> addToken(match('=') ? PbsTokenKind.EQUAL_EQUAL : PbsTokenKind.EQUAL); @@ -111,7 +118,7 @@ public final class PbsLexer { state = LexerState.LINE_COMMENT; return; } - addToken(PbsTokenKind.SLASH); + addToken(match('=') ? PbsTokenKind.SLASH_EQUAL : PbsTokenKind.SLASH); } case '"' -> state = LexerState.STRING; default -> { @@ -192,6 +199,7 @@ public final class PbsLexer { while (!isAtEnd() && peek() != '\n') { advance(); } + addToken(PbsTokenKind.COMMENT); state = LexerState.DEFAULT; } @@ -246,23 +254,62 @@ public final class PbsLexer { map.put("import", PbsTokenKind.IMPORT); map.put("from", PbsTokenKind.FROM); map.put("as", PbsTokenKind.AS); - map.put("pub", PbsTokenKind.PUB); - map.put("mod", PbsTokenKind.MOD); + map.put("service", PbsTokenKind.SERVICE); + map.put("host", PbsTokenKind.HOST); map.put("fn", PbsTokenKind.FN); + map.put("apply", PbsTokenKind.APPLY); + map.put("bind", PbsTokenKind.BIND); + map.put("new", PbsTokenKind.NEW); + map.put("implements", PbsTokenKind.IMPLEMENTS); + map.put("using", PbsTokenKind.USING); + map.put("ctor", PbsTokenKind.CTOR); map.put("let", PbsTokenKind.LET); + map.put("const", PbsTokenKind.CONST); map.put("declare", PbsTokenKind.DECLARE); map.put("struct", PbsTokenKind.STRUCT); map.put("contract", PbsTokenKind.CONTRACT); map.put("error", PbsTokenKind.ERROR); + map.put("enum", PbsTokenKind.ENUM); + map.put("callback", PbsTokenKind.CALLBACK); + map.put("builtin", PbsTokenKind.BUILTIN); + map.put("Self", PbsTokenKind.SELF); + map.put("this", PbsTokenKind.THIS); + map.put("pub", PbsTokenKind.PUB); + map.put("mut", PbsTokenKind.MUT); + map.put("mod", PbsTokenKind.MOD); + map.put("type", PbsTokenKind.TYPE); + map.put("if", PbsTokenKind.IF); map.put("else", PbsTokenKind.ELSE); - map.put("when", PbsTokenKind.WHEN); + map.put("switch", PbsTokenKind.SWITCH); + map.put("default", PbsTokenKind.DEFAULT); map.put("for", PbsTokenKind.FOR); - map.put("in", PbsTokenKind.IN); + map.put("until", PbsTokenKind.UNTIL); + map.put("step", PbsTokenKind.STEP); + map.put("while", PbsTokenKind.WHILE); + map.put("break", PbsTokenKind.BREAK); + map.put("continue", PbsTokenKind.CONTINUE); map.put("return", PbsTokenKind.RETURN); + map.put("void", PbsTokenKind.VOID); + map.put("optional", PbsTokenKind.OPTIONAL); + map.put("result", PbsTokenKind.RESULT); + map.put("some", PbsTokenKind.SOME); + map.put("none", PbsTokenKind.NONE); + map.put("ok", PbsTokenKind.OK); + map.put("err", PbsTokenKind.ERR); + map.put("handle", PbsTokenKind.HANDLE); + map.put("true", PbsTokenKind.TRUE); map.put("false", PbsTokenKind.FALSE); + map.put("and", PbsTokenKind.AND); + map.put("or", PbsTokenKind.OR); + map.put("not", PbsTokenKind.NOT); + + map.put("spawn", PbsTokenKind.SPAWN); + map.put("yield", PbsTokenKind.YIELD); + map.put("sleep", PbsTokenKind.SLEEP); + map.put("match", PbsTokenKind.MATCH); return map; } } diff --git a/prometeu-compiler/frontends/prometeu-frontend-pbs/src/main/java/p/studio/compiler/pbs/lexer/PbsTokenKind.java b/prometeu-compiler/frontends/prometeu-frontend-pbs/src/main/java/p/studio/compiler/pbs/lexer/PbsTokenKind.java index 09c94203..4f6790bd 100644 --- a/prometeu-compiler/frontends/prometeu-frontend-pbs/src/main/java/p/studio/compiler/pbs/lexer/PbsTokenKind.java +++ b/prometeu-compiler/frontends/prometeu-frontend-pbs/src/main/java/p/studio/compiler/pbs/lexer/PbsTokenKind.java @@ -17,50 +17,75 @@ public enum PbsTokenKind { FLOAT_LITERAL, BOUNDED_LITERAL, STRING_LITERAL, + COMMENT, - // Declaration and import keywords. + // Import keywords. // Example: // `import { Vec2 as V2 } from @core:math;` IMPORT, FROM, AS, - // Barrel visibility keywords (reserved in source parser for now). - // Example in barrel: - // `pub fn sum;` - PUB, - MOD, - - // Top-level declaration keywords. - // Example: - // `service Audio { fn play(): int { return 1; } }` - // `declare struct Point(x: int, y: int);` + // Declaration and callable keywords. SERVICE, + HOST, FN, + APPLY, + BIND, + NEW, + IMPLEMENTS, + USING, + CTOR, DECLARE, - - // Statement and type declaration keywords. - // Example: - // `let x: int = 1;` LET, + CONST, STRUCT, CONTRACT, ERROR, + ENUM, + CALLBACK, + BUILTIN, + SELF, + THIS, + PUB, + MUT, + MOD, + TYPE, // Control-flow keywords. - // Example: - // `if cond { return 1; } else { return 0; }` IF, ELSE, - WHEN, + SWITCH, + DEFAULT, FOR, - IN, + UNTIL, + STEP, + WHILE, + BREAK, + CONTINUE, RETURN, + VOID, + OPTIONAL, + RESULT, + SOME, + NONE, + OK, + ERR, + HANDLE, // Boolean literals. - // Example: `true && false` + // Example: `true and false` TRUE, FALSE, + AND, + OR, + NOT, + + // Reserved keywords (not active syntax in v1). + SPAWN, + YIELD, + SLEEP, + MATCH, // Delimiters and separators. // Example: @@ -75,7 +100,10 @@ public enum PbsTokenKind { COLON, SEMICOLON, AT, + DOT, DOT_DOT, + ARROW, + QUESTION, // Arithmetic and unary operators. // Example: `-a + b * 2` @@ -85,6 +113,11 @@ public enum PbsTokenKind { SLASH, PERCENT, BANG, + PLUS_EQUAL, + MINUS_EQUAL, + STAR_EQUAL, + SLASH_EQUAL, + PERCENT_EQUAL, // Comparison and assignment operators. // Example: diff --git a/prometeu-compiler/frontends/prometeu-frontend-pbs/src/main/java/p/studio/compiler/pbs/parser/PbsTokenCursor.java b/prometeu-compiler/frontends/prometeu-frontend-pbs/src/main/java/p/studio/compiler/pbs/parser/PbsTokenCursor.java index df331330..dd143d31 100644 --- a/prometeu-compiler/frontends/prometeu-frontend-pbs/src/main/java/p/studio/compiler/pbs/parser/PbsTokenCursor.java +++ b/prometeu-compiler/frontends/prometeu-frontend-pbs/src/main/java/p/studio/compiler/pbs/parser/PbsTokenCursor.java @@ -18,6 +18,9 @@ final class PbsTokenCursor { PbsTokenCursor(final ReadOnlyList tokens) { for (final var token : tokens) { + if (token.kind() == PbsTokenKind.COMMENT) { + continue; + } this.tokens.add(token); } } diff --git a/prometeu-compiler/frontends/prometeu-frontend-pbs/src/test/java/p/studio/compiler/pbs/lexer/PbsLexerTest.java b/prometeu-compiler/frontends/prometeu-frontend-pbs/src/test/java/p/studio/compiler/pbs/lexer/PbsLexerTest.java index 3aa2b625..c8de3386 100644 --- a/prometeu-compiler/frontends/prometeu-frontend-pbs/src/test/java/p/studio/compiler/pbs/lexer/PbsLexerTest.java +++ b/prometeu-compiler/frontends/prometeu-frontend-pbs/src/test/java/p/studio/compiler/pbs/lexer/PbsLexerTest.java @@ -4,6 +4,8 @@ import org.junit.jupiter.api.Test; import p.studio.compiler.source.diagnostics.DiagnosticSink; import p.studio.compiler.source.identifiers.FileId; +import java.util.List; + import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -32,4 +34,93 @@ class PbsLexerTest { assertEquals(PbsTokenKind.EOF, tokens.getLast().kind()); assertTrue(diagnostics.isEmpty(), "Lexer should not report diagnostics for valid input"); } + + @Test + void shouldLexCoreKeywordsOperatorsAndCommentToken() { + final var source = """ + import { Item as Alias } from @core:math; + declare const PI: float = 3.14; + fn run(a: int) -> int { + let const x: int = 1; + x += 1; + if true and false || !false { + return x; + } + return x; + } + // trailing comment + """; + final var diagnostics = DiagnosticSink.empty(); + + final var tokens = PbsLexer.lex(source, new FileId(0), diagnostics); + final List kinds = tokens.stream().map(PbsToken::kind).toList(); + + assertTrue(kinds.contains(PbsTokenKind.IMPORT)); + assertTrue(kinds.contains(PbsTokenKind.AS)); + assertTrue(kinds.contains(PbsTokenKind.DECLARE)); + assertTrue(kinds.contains(PbsTokenKind.CONST)); + assertTrue(kinds.contains(PbsTokenKind.ARROW)); + assertTrue(kinds.contains(PbsTokenKind.PLUS_EQUAL)); + assertTrue(kinds.contains(PbsTokenKind.AND)); + assertTrue(kinds.contains(PbsTokenKind.OR_OR)); + assertTrue(kinds.contains(PbsTokenKind.COMMENT)); + assertEquals(PbsTokenKind.EOF, tokens.getLast().kind()); + assertTrue(diagnostics.isEmpty(), "Lexer should not report diagnostics for valid input"); + } + + @Test + void shouldTokenizeReservedKeywordsAndMemberOperators() { + final var source = """ + apply bind new implements using ctor host enum callback Self this mut const + switch default until step while break continue return void optional result + some none ok err handle and or not spawn yield sleep match type mod a.b a..b ? + """; + final var diagnostics = DiagnosticSink.empty(); + + final var tokens = PbsLexer.lex(source, new FileId(0), diagnostics); + final List kinds = tokens.stream().map(PbsToken::kind).toList(); + + assertTrue(kinds.contains(PbsTokenKind.APPLY)); + assertTrue(kinds.contains(PbsTokenKind.BIND)); + assertTrue(kinds.contains(PbsTokenKind.NEW)); + assertTrue(kinds.contains(PbsTokenKind.IMPLEMENTS)); + assertTrue(kinds.contains(PbsTokenKind.USING)); + assertTrue(kinds.contains(PbsTokenKind.CTOR)); + assertTrue(kinds.contains(PbsTokenKind.HOST)); + assertTrue(kinds.contains(PbsTokenKind.ENUM)); + assertTrue(kinds.contains(PbsTokenKind.CALLBACK)); + assertTrue(kinds.contains(PbsTokenKind.SELF)); + assertTrue(kinds.contains(PbsTokenKind.THIS)); + assertTrue(kinds.contains(PbsTokenKind.SPAWN)); + assertTrue(kinds.contains(PbsTokenKind.MATCH)); + assertTrue(kinds.contains(PbsTokenKind.DOT)); + assertTrue(kinds.contains(PbsTokenKind.DOT_DOT)); + assertTrue(kinds.contains(PbsTokenKind.QUESTION)); + assertEquals(PbsTokenKind.EOF, tokens.getLast().kind()); + assertTrue(diagnostics.isEmpty(), "Lexer should not report diagnostics for valid input"); + } + + @Test + void shouldReportUnterminatedString() { + final var source = "\"unterminated"; + final var diagnostics = DiagnosticSink.empty(); + + PbsLexer.lex(source, new FileId(0), diagnostics); + + assertTrue(diagnostics.hasErrors(), "Lexer should report unterminated strings"); + assertEquals(LexErrors.E_LEX_UNTERMINATED_STRING.name(), + diagnostics.stream().findFirst().orElseThrow().getCode()); + } + + @Test + void shouldReportInvalidCharacter() { + final var source = "fn a() { ~ }"; + final var diagnostics = DiagnosticSink.empty(); + + PbsLexer.lex(source, new FileId(0), diagnostics); + + assertTrue(diagnostics.hasErrors(), "Lexer should report invalid characters"); + assertEquals(LexErrors.E_LEX_INVALID_CHAR.name(), + diagnostics.stream().findFirst().orElseThrow().getCode()); + } }