implements PR-032

This commit is contained in:
bQUARKz 2026-03-07 16:16:58 +00:00
parent f561cf3227
commit 29efbe05bb
Signed by: bquarkz
SSH Key Fingerprint: SHA256:Z7dgqoglWwoK6j6u4QC87OveEq74WOhFN+gitsxtkf8
4 changed files with 453 additions and 0 deletions

View File

@ -0,0 +1,8 @@
package p.studio.compiler.backend.bytecode;
public enum BytecodeMarshalingErrorCode {
MARSHAL_FORMAT_STRING_TOO_LONG,
MARSHAL_FORMAT_SYSC_MODULE_TOO_LONG,
MARSHAL_FORMAT_SYSC_NAME_TOO_LONG,
}

View File

@ -0,0 +1,17 @@
package p.studio.compiler.backend.bytecode;
public class BytecodeMarshalingException extends RuntimeException {
private final BytecodeMarshalingErrorCode code;
public BytecodeMarshalingException(
final BytecodeMarshalingErrorCode code,
final String message) {
super(message);
this.code = code;
}
public BytecodeMarshalingErrorCode code() {
return code;
}
}

View File

@ -0,0 +1,346 @@
package p.studio.compiler.backend.bytecode;
import p.studio.utilities.structures.ReadOnlyList;
import java.io.ByteArrayOutputStream;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Objects;
public record BytecodeModule(
int version,
ReadOnlyList<ConstantPoolEntry> constPool,
ReadOnlyList<FunctionMeta> functions,
byte[] code,
DebugInfo debugInfo,
ReadOnlyList<Export> exports,
ReadOnlyList<SyscallDecl> syscalls) {
private static final int SECTION_KIND_CONST_POOL = 0;
private static final int SECTION_KIND_FUNCTIONS = 1;
private static final int SECTION_KIND_CODE = 2;
private static final int SECTION_KIND_DEBUG = 3;
private static final int SECTION_KIND_EXPORTS = 4;
private static final int SECTION_KIND_SYSCALLS = 5;
public BytecodeModule {
constPool = constPool == null ? ReadOnlyList.empty() : constPool;
functions = functions == null ? ReadOnlyList.empty() : functions;
code = code == null ? new byte[0] : code.clone();
exports = exports == null ? ReadOnlyList.empty() : exports;
syscalls = syscalls == null ? ReadOnlyList.empty() : syscalls;
}
public static BytecodeModule empty() {
return new BytecodeModule(
0,
ReadOnlyList.empty(),
ReadOnlyList.empty(),
new byte[0],
null,
ReadOnlyList.empty(),
ReadOnlyList.empty());
}
public byte[] serialize() {
final var constPoolData = serializeConstPool();
final var functionData = serializeFunctions();
final var codeData = code();
final var debugData = serializeDebug();
final var exportData = serializeExports();
final var syscallData = serializeSyscalls();
final var sections = new ArrayList<SectionData>();
if (constPoolData.length > 0) {
sections.add(new SectionData(SECTION_KIND_CONST_POOL, constPoolData));
}
if (functionData.length > 0) {
sections.add(new SectionData(SECTION_KIND_FUNCTIONS, functionData));
}
if (codeData.length > 0) {
sections.add(new SectionData(SECTION_KIND_CODE, codeData));
}
if (debugData.length > 0) {
sections.add(new SectionData(SECTION_KIND_DEBUG, debugData));
}
if (exportData.length > 0) {
sections.add(new SectionData(SECTION_KIND_EXPORTS, exportData));
}
// SYSC section is mandatory, even when empty.
sections.add(new SectionData(SECTION_KIND_SYSCALLS, syscallData));
final var out = new ByteArrayOutputStream();
out.writeBytes(new byte[] { 'P', 'B', 'S', 0 });
writeU16(out, version & 0xFFFF);
out.write(0);
out.write(0);
writeU32(out, sections.size());
out.writeBytes(new byte[20]);
var currentOffset = 32 + (sections.size() * 12);
for (final var section : sections) {
writeU32(out, section.kind);
writeU32(out, currentOffset);
writeU32(out, section.data.length);
currentOffset += section.data.length;
}
for (final var section : sections) {
out.writeBytes(section.data);
}
return out.toByteArray();
}
private byte[] serializeConstPool() {
if (constPool.isEmpty()) {
return new byte[0];
}
final var out = new ByteArrayOutputStream();
writeU32(out, constPool.size());
for (final var entry : constPool) {
switch (entry) {
case NullConstant ignored -> out.write(0);
case Int64Constant value -> {
out.write(1);
writeI64(out, value.value());
}
case Float64Constant value -> {
out.write(2);
writeF64(out, value.value());
}
case BooleanConstant value -> {
out.write(3);
out.write(value.value() ? 1 : 0);
}
case StringConstant value -> {
out.write(4);
writeU32(out, value.utf8().length);
out.writeBytes(value.utf8());
}
case Int32Constant value -> {
out.write(5);
writeI32(out, value.value());
}
}
}
return out.toByteArray();
}
private byte[] serializeFunctions() {
if (functions.isEmpty()) {
return new byte[0];
}
final var out = new ByteArrayOutputStream();
writeU32(out, functions.size());
for (final var function : functions) {
writeU32(out, function.codeOffset());
writeU32(out, function.codeLen());
writeU16(out, function.paramSlots());
writeU16(out, function.localSlots());
writeU16(out, function.returnSlots());
writeU16(out, function.maxStackSlots());
}
return out.toByteArray();
}
private byte[] serializeDebug() {
if (debugInfo == null || (debugInfo.pcToSpan().isEmpty() && debugInfo.functionNames().isEmpty())) {
return new byte[0];
}
final var out = new ByteArrayOutputStream();
writeU32(out, debugInfo.pcToSpan().size());
for (final var entry : debugInfo.pcToSpan()) {
writeU32(out, entry.pc());
writeU32(out, entry.span().fileId());
writeU32(out, entry.span().start());
writeU32(out, entry.span().end());
}
writeU32(out, debugInfo.functionNames().size());
for (final var fn : debugInfo.functionNames()) {
writeU32(out, fn.funcIdx());
writeU32(out, fn.utf8Name().length);
out.writeBytes(fn.utf8Name());
}
return out.toByteArray();
}
private byte[] serializeExports() {
if (exports.isEmpty()) {
return new byte[0];
}
final var out = new ByteArrayOutputStream();
writeU32(out, exports.size());
for (final var export : exports) {
writeU32(out, export.funcIdx());
writeU32(out, export.utf8Symbol().length);
out.writeBytes(export.utf8Symbol());
}
return out.toByteArray();
}
private byte[] serializeSyscalls() {
final var out = new ByteArrayOutputStream();
writeU32(out, syscalls.size());
for (final var syscall : syscalls) {
if (syscall.utf8Module().length > 0xFFFF) {
throw new BytecodeMarshalingException(
BytecodeMarshalingErrorCode.MARSHAL_FORMAT_SYSC_MODULE_TOO_LONG,
"syscall module name exceeds u16: " + syscall.module());
}
if (syscall.utf8Name().length > 0xFFFF) {
throw new BytecodeMarshalingException(
BytecodeMarshalingErrorCode.MARSHAL_FORMAT_SYSC_NAME_TOO_LONG,
"syscall name exceeds u16: " + syscall.name());
}
writeU16(out, syscall.utf8Module().length);
out.writeBytes(syscall.utf8Module());
writeU16(out, syscall.utf8Name().length);
out.writeBytes(syscall.utf8Name());
writeU16(out, syscall.version());
writeU16(out, syscall.argSlots());
writeU16(out, syscall.retSlots());
}
return out.toByteArray();
}
private static void writeU16(final ByteArrayOutputStream out, final int value) {
out.writeBytes(ByteBuffer.allocate(2).order(ByteOrder.LITTLE_ENDIAN).putShort((short) (value & 0xFFFF)).array());
}
private static void writeU32(final ByteArrayOutputStream out, final int value) {
out.writeBytes(ByteBuffer.allocate(4).order(ByteOrder.LITTLE_ENDIAN).putInt(value).array());
}
private static void writeI32(final ByteArrayOutputStream out, final int value) {
out.writeBytes(ByteBuffer.allocate(4).order(ByteOrder.LITTLE_ENDIAN).putInt(value).array());
}
private static void writeI64(final ByteArrayOutputStream out, final long value) {
out.writeBytes(ByteBuffer.allocate(8).order(ByteOrder.LITTLE_ENDIAN).putLong(value).array());
}
private static void writeF64(final ByteArrayOutputStream out, final double value) {
out.writeBytes(ByteBuffer.allocate(8).order(ByteOrder.LITTLE_ENDIAN).putDouble(value).array());
}
private record SectionData(
int kind,
byte[] data) {
}
public sealed interface ConstantPoolEntry permits NullConstant, Int64Constant, Float64Constant, BooleanConstant, StringConstant, Int32Constant {
}
public record NullConstant() implements ConstantPoolEntry {
}
public record Int64Constant(
long value) implements ConstantPoolEntry {
}
public record Float64Constant(
double value) implements ConstantPoolEntry {
}
public record BooleanConstant(
boolean value) implements ConstantPoolEntry {
}
public record StringConstant(
String value) implements ConstantPoolEntry {
public StringConstant {
Objects.requireNonNull(value, "value");
if (value.length() > Integer.MAX_VALUE / 4) {
throw new BytecodeMarshalingException(
BytecodeMarshalingErrorCode.MARSHAL_FORMAT_STRING_TOO_LONG,
"constant string is too large");
}
}
public byte[] utf8() {
return value.getBytes(StandardCharsets.UTF_8);
}
}
public record Int32Constant(
int value) implements ConstantPoolEntry {
}
public record FunctionMeta(
int codeOffset,
int codeLen,
int paramSlots,
int localSlots,
int returnSlots,
int maxStackSlots) {
}
public record SourceSpan(
int fileId,
int start,
int end) {
}
public record PcToSpan(
int pc,
SourceSpan span) {
}
public record FunctionName(
int funcIdx,
String name) {
public FunctionName {
Objects.requireNonNull(name, "name");
}
public byte[] utf8Name() {
return name.getBytes(StandardCharsets.UTF_8);
}
}
public record DebugInfo(
ReadOnlyList<PcToSpan> pcToSpan,
ReadOnlyList<FunctionName> functionNames) {
public DebugInfo {
pcToSpan = pcToSpan == null ? ReadOnlyList.empty() : pcToSpan;
functionNames = functionNames == null ? ReadOnlyList.empty() : functionNames;
}
}
public record Export(
String symbol,
int funcIdx) {
public Export {
Objects.requireNonNull(symbol, "symbol");
}
public byte[] utf8Symbol() {
return symbol.getBytes(StandardCharsets.UTF_8);
}
}
public record SyscallDecl(
String module,
String name,
int version,
int argSlots,
int retSlots) {
public SyscallDecl {
Objects.requireNonNull(module, "module");
Objects.requireNonNull(name, "name");
}
public byte[] utf8Module() {
return module.getBytes(StandardCharsets.UTF_8);
}
public byte[] utf8Name() {
return name.getBytes(StandardCharsets.UTF_8);
}
}
}

View File

@ -0,0 +1,82 @@
package p.studio.compiler.backend.bytecode;
import org.junit.jupiter.api.Test;
import p.studio.utilities.structures.ReadOnlyList;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;
class BytecodeModuleTest {
@Test
void serializeMustAlwaysEmitSyscallsSection() {
final var module = BytecodeModule.empty();
final var bytes = module.serialize();
assertEquals('P', bytes[0]);
assertEquals('B', bytes[1]);
assertEquals('S', bytes[2]);
assertEquals(0, bytes[3]);
final var sectionCount = readU32(bytes, 8);
assertEquals(1, sectionCount);
final var sectionKind = readU32(bytes, 32);
final var sectionOffset = readU32(bytes, 36);
final var sectionLength = readU32(bytes, 40);
assertEquals(5, sectionKind);
assertEquals(44, sectionOffset);
assertEquals(4, sectionLength);
final var syscCount = readU32(bytes, sectionOffset);
assertEquals(0, syscCount);
}
@Test
void serializeMustBeDeterministicForSameInput() {
final var module = new BytecodeModule(
0,
ReadOnlyList.from(
new BytecodeModule.NullConstant(),
new BytecodeModule.Int32Constant(7),
new BytecodeModule.StringConstant("x")),
ReadOnlyList.from(
new BytecodeModule.FunctionMeta(0, 6, 0, 0, 0, 0)),
new byte[] { 1, 2, 3, 4, 5, 6 },
null,
ReadOnlyList.from(
new BytecodeModule.Export("main", 0)),
ReadOnlyList.empty());
final var first = module.serialize();
final var second = module.serialize();
assertArrayEquals(first, second);
}
@Test
void serializeMustRejectSyscallModuleNameAboveU16() {
final var tooLong = "x".repeat(70_000);
final var module = new BytecodeModule(
0,
ReadOnlyList.empty(),
ReadOnlyList.empty(),
new byte[0],
null,
ReadOnlyList.empty(),
ReadOnlyList.from(new BytecodeModule.SyscallDecl(tooLong, "draw", 1, 1, 0)));
final var thrown = assertThrows(BytecodeMarshalingException.class, module::serialize);
assertEquals(BytecodeMarshalingErrorCode.MARSHAL_FORMAT_SYSC_MODULE_TOO_LONG, thrown.code());
}
private static int readU32(final byte[] bytes, final int offset) {
return ByteBuffer.wrap(bytes, offset, 4).order(ByteOrder.LITTLE_ENDIAN).getInt();
}
}