diff --git a/prometeu-compiler/prometeu-build-pipeline/src/main/java/p/studio/compiler/backend/bytecode/BytecodeMarshalingErrorCode.java b/prometeu-compiler/prometeu-build-pipeline/src/main/java/p/studio/compiler/backend/bytecode/BytecodeMarshalingErrorCode.java new file mode 100644 index 00000000..c5449ecd --- /dev/null +++ b/prometeu-compiler/prometeu-build-pipeline/src/main/java/p/studio/compiler/backend/bytecode/BytecodeMarshalingErrorCode.java @@ -0,0 +1,8 @@ +package p.studio.compiler.backend.bytecode; + +public enum BytecodeMarshalingErrorCode { + MARSHAL_FORMAT_STRING_TOO_LONG, + MARSHAL_FORMAT_SYSC_MODULE_TOO_LONG, + MARSHAL_FORMAT_SYSC_NAME_TOO_LONG, +} + diff --git a/prometeu-compiler/prometeu-build-pipeline/src/main/java/p/studio/compiler/backend/bytecode/BytecodeMarshalingException.java b/prometeu-compiler/prometeu-build-pipeline/src/main/java/p/studio/compiler/backend/bytecode/BytecodeMarshalingException.java new file mode 100644 index 00000000..e5d09f67 --- /dev/null +++ b/prometeu-compiler/prometeu-build-pipeline/src/main/java/p/studio/compiler/backend/bytecode/BytecodeMarshalingException.java @@ -0,0 +1,17 @@ +package p.studio.compiler.backend.bytecode; + +public class BytecodeMarshalingException extends RuntimeException { + private final BytecodeMarshalingErrorCode code; + + public BytecodeMarshalingException( + final BytecodeMarshalingErrorCode code, + final String message) { + super(message); + this.code = code; + } + + public BytecodeMarshalingErrorCode code() { + return code; + } +} + diff --git a/prometeu-compiler/prometeu-build-pipeline/src/main/java/p/studio/compiler/backend/bytecode/BytecodeModule.java b/prometeu-compiler/prometeu-build-pipeline/src/main/java/p/studio/compiler/backend/bytecode/BytecodeModule.java new file mode 100644 index 00000000..3d8b3dfa --- /dev/null +++ b/prometeu-compiler/prometeu-build-pipeline/src/main/java/p/studio/compiler/backend/bytecode/BytecodeModule.java @@ -0,0 +1,346 @@ +package p.studio.compiler.backend.bytecode; + +import p.studio.utilities.structures.ReadOnlyList; + +import java.io.ByteArrayOutputStream; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Objects; + +public record BytecodeModule( + int version, + ReadOnlyList constPool, + ReadOnlyList functions, + byte[] code, + DebugInfo debugInfo, + ReadOnlyList exports, + ReadOnlyList syscalls) { + + private static final int SECTION_KIND_CONST_POOL = 0; + private static final int SECTION_KIND_FUNCTIONS = 1; + private static final int SECTION_KIND_CODE = 2; + private static final int SECTION_KIND_DEBUG = 3; + private static final int SECTION_KIND_EXPORTS = 4; + private static final int SECTION_KIND_SYSCALLS = 5; + + public BytecodeModule { + constPool = constPool == null ? ReadOnlyList.empty() : constPool; + functions = functions == null ? ReadOnlyList.empty() : functions; + code = code == null ? new byte[0] : code.clone(); + exports = exports == null ? ReadOnlyList.empty() : exports; + syscalls = syscalls == null ? ReadOnlyList.empty() : syscalls; + } + + public static BytecodeModule empty() { + return new BytecodeModule( + 0, + ReadOnlyList.empty(), + ReadOnlyList.empty(), + new byte[0], + null, + ReadOnlyList.empty(), + ReadOnlyList.empty()); + } + + public byte[] serialize() { + final var constPoolData = serializeConstPool(); + final var functionData = serializeFunctions(); + final var codeData = code(); + final var debugData = serializeDebug(); + final var exportData = serializeExports(); + final var syscallData = serializeSyscalls(); + + final var sections = new ArrayList(); + if (constPoolData.length > 0) { + sections.add(new SectionData(SECTION_KIND_CONST_POOL, constPoolData)); + } + if (functionData.length > 0) { + sections.add(new SectionData(SECTION_KIND_FUNCTIONS, functionData)); + } + if (codeData.length > 0) { + sections.add(new SectionData(SECTION_KIND_CODE, codeData)); + } + if (debugData.length > 0) { + sections.add(new SectionData(SECTION_KIND_DEBUG, debugData)); + } + if (exportData.length > 0) { + sections.add(new SectionData(SECTION_KIND_EXPORTS, exportData)); + } + // SYSC section is mandatory, even when empty. + sections.add(new SectionData(SECTION_KIND_SYSCALLS, syscallData)); + + final var out = new ByteArrayOutputStream(); + + out.writeBytes(new byte[] { 'P', 'B', 'S', 0 }); + writeU16(out, version & 0xFFFF); + out.write(0); + out.write(0); + writeU32(out, sections.size()); + out.writeBytes(new byte[20]); + + var currentOffset = 32 + (sections.size() * 12); + for (final var section : sections) { + writeU32(out, section.kind); + writeU32(out, currentOffset); + writeU32(out, section.data.length); + currentOffset += section.data.length; + } + + for (final var section : sections) { + out.writeBytes(section.data); + } + + return out.toByteArray(); + } + + private byte[] serializeConstPool() { + if (constPool.isEmpty()) { + return new byte[0]; + } + final var out = new ByteArrayOutputStream(); + writeU32(out, constPool.size()); + for (final var entry : constPool) { + switch (entry) { + case NullConstant ignored -> out.write(0); + case Int64Constant value -> { + out.write(1); + writeI64(out, value.value()); + } + case Float64Constant value -> { + out.write(2); + writeF64(out, value.value()); + } + case BooleanConstant value -> { + out.write(3); + out.write(value.value() ? 1 : 0); + } + case StringConstant value -> { + out.write(4); + writeU32(out, value.utf8().length); + out.writeBytes(value.utf8()); + } + case Int32Constant value -> { + out.write(5); + writeI32(out, value.value()); + } + } + } + return out.toByteArray(); + } + + private byte[] serializeFunctions() { + if (functions.isEmpty()) { + return new byte[0]; + } + final var out = new ByteArrayOutputStream(); + writeU32(out, functions.size()); + for (final var function : functions) { + writeU32(out, function.codeOffset()); + writeU32(out, function.codeLen()); + writeU16(out, function.paramSlots()); + writeU16(out, function.localSlots()); + writeU16(out, function.returnSlots()); + writeU16(out, function.maxStackSlots()); + } + return out.toByteArray(); + } + + private byte[] serializeDebug() { + if (debugInfo == null || (debugInfo.pcToSpan().isEmpty() && debugInfo.functionNames().isEmpty())) { + return new byte[0]; + } + final var out = new ByteArrayOutputStream(); + writeU32(out, debugInfo.pcToSpan().size()); + for (final var entry : debugInfo.pcToSpan()) { + writeU32(out, entry.pc()); + writeU32(out, entry.span().fileId()); + writeU32(out, entry.span().start()); + writeU32(out, entry.span().end()); + } + writeU32(out, debugInfo.functionNames().size()); + for (final var fn : debugInfo.functionNames()) { + writeU32(out, fn.funcIdx()); + writeU32(out, fn.utf8Name().length); + out.writeBytes(fn.utf8Name()); + } + return out.toByteArray(); + } + + private byte[] serializeExports() { + if (exports.isEmpty()) { + return new byte[0]; + } + final var out = new ByteArrayOutputStream(); + writeU32(out, exports.size()); + for (final var export : exports) { + writeU32(out, export.funcIdx()); + writeU32(out, export.utf8Symbol().length); + out.writeBytes(export.utf8Symbol()); + } + return out.toByteArray(); + } + + private byte[] serializeSyscalls() { + final var out = new ByteArrayOutputStream(); + writeU32(out, syscalls.size()); + for (final var syscall : syscalls) { + if (syscall.utf8Module().length > 0xFFFF) { + throw new BytecodeMarshalingException( + BytecodeMarshalingErrorCode.MARSHAL_FORMAT_SYSC_MODULE_TOO_LONG, + "syscall module name exceeds u16: " + syscall.module()); + } + if (syscall.utf8Name().length > 0xFFFF) { + throw new BytecodeMarshalingException( + BytecodeMarshalingErrorCode.MARSHAL_FORMAT_SYSC_NAME_TOO_LONG, + "syscall name exceeds u16: " + syscall.name()); + } + writeU16(out, syscall.utf8Module().length); + out.writeBytes(syscall.utf8Module()); + writeU16(out, syscall.utf8Name().length); + out.writeBytes(syscall.utf8Name()); + writeU16(out, syscall.version()); + writeU16(out, syscall.argSlots()); + writeU16(out, syscall.retSlots()); + } + return out.toByteArray(); + } + + private static void writeU16(final ByteArrayOutputStream out, final int value) { + out.writeBytes(ByteBuffer.allocate(2).order(ByteOrder.LITTLE_ENDIAN).putShort((short) (value & 0xFFFF)).array()); + } + + private static void writeU32(final ByteArrayOutputStream out, final int value) { + out.writeBytes(ByteBuffer.allocate(4).order(ByteOrder.LITTLE_ENDIAN).putInt(value).array()); + } + + private static void writeI32(final ByteArrayOutputStream out, final int value) { + out.writeBytes(ByteBuffer.allocate(4).order(ByteOrder.LITTLE_ENDIAN).putInt(value).array()); + } + + private static void writeI64(final ByteArrayOutputStream out, final long value) { + out.writeBytes(ByteBuffer.allocate(8).order(ByteOrder.LITTLE_ENDIAN).putLong(value).array()); + } + + private static void writeF64(final ByteArrayOutputStream out, final double value) { + out.writeBytes(ByteBuffer.allocate(8).order(ByteOrder.LITTLE_ENDIAN).putDouble(value).array()); + } + + private record SectionData( + int kind, + byte[] data) { + } + + public sealed interface ConstantPoolEntry permits NullConstant, Int64Constant, Float64Constant, BooleanConstant, StringConstant, Int32Constant { + } + + public record NullConstant() implements ConstantPoolEntry { + } + + public record Int64Constant( + long value) implements ConstantPoolEntry { + } + + public record Float64Constant( + double value) implements ConstantPoolEntry { + } + + public record BooleanConstant( + boolean value) implements ConstantPoolEntry { + } + + public record StringConstant( + String value) implements ConstantPoolEntry { + public StringConstant { + Objects.requireNonNull(value, "value"); + if (value.length() > Integer.MAX_VALUE / 4) { + throw new BytecodeMarshalingException( + BytecodeMarshalingErrorCode.MARSHAL_FORMAT_STRING_TOO_LONG, + "constant string is too large"); + } + } + + public byte[] utf8() { + return value.getBytes(StandardCharsets.UTF_8); + } + } + + public record Int32Constant( + int value) implements ConstantPoolEntry { + } + + public record FunctionMeta( + int codeOffset, + int codeLen, + int paramSlots, + int localSlots, + int returnSlots, + int maxStackSlots) { + } + + public record SourceSpan( + int fileId, + int start, + int end) { + } + + public record PcToSpan( + int pc, + SourceSpan span) { + } + + public record FunctionName( + int funcIdx, + String name) { + public FunctionName { + Objects.requireNonNull(name, "name"); + } + + public byte[] utf8Name() { + return name.getBytes(StandardCharsets.UTF_8); + } + } + + public record DebugInfo( + ReadOnlyList pcToSpan, + ReadOnlyList functionNames) { + public DebugInfo { + pcToSpan = pcToSpan == null ? ReadOnlyList.empty() : pcToSpan; + functionNames = functionNames == null ? ReadOnlyList.empty() : functionNames; + } + } + + public record Export( + String symbol, + int funcIdx) { + public Export { + Objects.requireNonNull(symbol, "symbol"); + } + + public byte[] utf8Symbol() { + return symbol.getBytes(StandardCharsets.UTF_8); + } + } + + public record SyscallDecl( + String module, + String name, + int version, + int argSlots, + int retSlots) { + public SyscallDecl { + Objects.requireNonNull(module, "module"); + Objects.requireNonNull(name, "name"); + } + + public byte[] utf8Module() { + return module.getBytes(StandardCharsets.UTF_8); + } + + public byte[] utf8Name() { + return name.getBytes(StandardCharsets.UTF_8); + } + } +} + diff --git a/prometeu-compiler/prometeu-build-pipeline/src/test/java/p/studio/compiler/backend/bytecode/BytecodeModuleTest.java b/prometeu-compiler/prometeu-build-pipeline/src/test/java/p/studio/compiler/backend/bytecode/BytecodeModuleTest.java new file mode 100644 index 00000000..cc76c6c3 --- /dev/null +++ b/prometeu-compiler/prometeu-build-pipeline/src/test/java/p/studio/compiler/backend/bytecode/BytecodeModuleTest.java @@ -0,0 +1,82 @@ +package p.studio.compiler.backend.bytecode; + +import org.junit.jupiter.api.Test; +import p.studio.utilities.structures.ReadOnlyList; + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; + +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; + +class BytecodeModuleTest { + + @Test + void serializeMustAlwaysEmitSyscallsSection() { + final var module = BytecodeModule.empty(); + + final var bytes = module.serialize(); + + assertEquals('P', bytes[0]); + assertEquals('B', bytes[1]); + assertEquals('S', bytes[2]); + assertEquals(0, bytes[3]); + + final var sectionCount = readU32(bytes, 8); + assertEquals(1, sectionCount); + + final var sectionKind = readU32(bytes, 32); + final var sectionOffset = readU32(bytes, 36); + final var sectionLength = readU32(bytes, 40); + assertEquals(5, sectionKind); + assertEquals(44, sectionOffset); + assertEquals(4, sectionLength); + + final var syscCount = readU32(bytes, sectionOffset); + assertEquals(0, syscCount); + } + + @Test + void serializeMustBeDeterministicForSameInput() { + final var module = new BytecodeModule( + 0, + ReadOnlyList.from( + new BytecodeModule.NullConstant(), + new BytecodeModule.Int32Constant(7), + new BytecodeModule.StringConstant("x")), + ReadOnlyList.from( + new BytecodeModule.FunctionMeta(0, 6, 0, 0, 0, 0)), + new byte[] { 1, 2, 3, 4, 5, 6 }, + null, + ReadOnlyList.from( + new BytecodeModule.Export("main", 0)), + ReadOnlyList.empty()); + + final var first = module.serialize(); + final var second = module.serialize(); + + assertArrayEquals(first, second); + } + + @Test + void serializeMustRejectSyscallModuleNameAboveU16() { + final var tooLong = "x".repeat(70_000); + final var module = new BytecodeModule( + 0, + ReadOnlyList.empty(), + ReadOnlyList.empty(), + new byte[0], + null, + ReadOnlyList.empty(), + ReadOnlyList.from(new BytecodeModule.SyscallDecl(tooLong, "draw", 1, 1, 0))); + + final var thrown = assertThrows(BytecodeMarshalingException.class, module::serialize); + assertEquals(BytecodeMarshalingErrorCode.MARSHAL_FORMAT_SYSC_MODULE_TOO_LONG, thrown.code()); + } + + private static int readU32(final byte[] bytes, final int offset) { + return ByteBuffer.wrap(bytes, offset, 4).order(ByteOrder.LITTLE_ENDIAN).getInt(); + } +} +