diff --git a/prometeu-compiler/prometeu-build-pipeline/src/main/java/p/studio/compiler/backend/irvm/IRVMProgram.java b/prometeu-compiler/prometeu-build-pipeline/src/main/java/p/studio/compiler/backend/irvm/IRVMProgram.java index 45cd457f..318982d6 100644 --- a/prometeu-compiler/prometeu-build-pipeline/src/main/java/p/studio/compiler/backend/irvm/IRVMProgram.java +++ b/prometeu-compiler/prometeu-build-pipeline/src/main/java/p/studio/compiler/backend/irvm/IRVMProgram.java @@ -1,8 +1,11 @@ package p.studio.compiler.backend.irvm; import p.studio.compiler.backend.bytecode.BytecodeEmitter; +import p.studio.compiler.backend.bytecode.BytecodeModule; import p.studio.utilities.structures.ReadOnlyList; +import java.util.ArrayList; + public record IRVMProgram( IRVMModule module, BytecodeEmitter.EmissionPlan emissionPlan) { @@ -10,6 +13,9 @@ public record IRVMProgram( public IRVMProgram { module = module == null ? IRVMModule.empty() : module; emissionPlan = emissionPlan == null ? BytecodeEmitter.EmissionPlan.empty() : emissionPlan; + if (!emissionPlan.functions().isEmpty()) { + validateCoherence(module, emissionPlan); + } } public IRVMProgram(final IRVMModule module) { @@ -19,23 +25,21 @@ public record IRVMProgram( public IRVMProgram( final boolean hasInternalOpcodes, final BytecodeEmitter.EmissionPlan emissionPlan) { - this(new IRVMModule( - "core-v1", - ReadOnlyList.from(new IRVMFunction( - "__synthetic__", - 0, - 0, - 0, - 1, - ReadOnlyList.from( - new IRVMInstruction(hasInternalOpcodes ? IRVMOp.INTERNAL_EXT : IRVMOp.HALT, null))))), - emissionPlan); + this(moduleFromPlan(hasInternalOpcodes, emissionPlan), emissionPlan); } public static IRVMProgram empty() { return new IRVMProgram(IRVMModule.empty(), BytecodeEmitter.EmissionPlan.empty()); } + public BytecodeEmitter.EmissionPlan coherentEmissionPlan() { + if (emissionPlan.functions().isEmpty()) { + return deriveEmissionPlan(module); + } + validateCoherence(module, emissionPlan); + return emissionPlan; + } + public boolean hasInternalOpcodes() { for (final var function : module.functions()) { for (final var instruction : function.instructions()) { @@ -46,4 +50,141 @@ public record IRVMProgram( } return false; } + + private void validateCoherence( + final IRVMModule module, + final BytecodeEmitter.EmissionPlan emissionPlan) { + if (module.functions().size() != emissionPlan.functions().size()) { + throw new IllegalArgumentException("irvm module/function-plan size mismatch"); + } + for (var fnIndex = 0; fnIndex < module.functions().size(); fnIndex++) { + final var irvmFunction = module.functions().get(fnIndex); + final var planFunction = emissionPlan.functions().get(fnIndex); + if (!irvmFunction.name().equals(planFunction.name()) + || irvmFunction.paramSlots() != planFunction.paramSlots() + || irvmFunction.localSlots() != planFunction.localSlots() + || irvmFunction.returnSlots() != planFunction.returnSlots() + || irvmFunction.maxStackSlots() != planFunction.maxStackSlots()) { + throw new IllegalArgumentException("irvm/emission plan function header mismatch at index " + fnIndex); + } + if (irvmFunction.instructions().size() != planFunction.operations().size()) { + throw new IllegalArgumentException("irvm/emission plan operation count mismatch at index " + fnIndex); + } + for (var opIndex = 0; opIndex < irvmFunction.instructions().size(); opIndex++) { + final var instruction = irvmFunction.instructions().get(opIndex); + final var operation = planFunction.operations().get(opIndex); + if (!coherentPair(instruction, operation)) { + throw new IllegalArgumentException( + "irvm/emission plan opcode mismatch at function=%d op=%d".formatted(fnIndex, opIndex)); + } + } + } + } + + private boolean coherentPair( + final IRVMInstruction instruction, + final BytecodeEmitter.Operation operation) { + final var immediate = instruction.immediate() == null ? 0 : instruction.immediate(); + return switch (instruction.op().opcode()) { + case 0x01 -> operation.kind() == BytecodeEmitter.OperationKind.HALT; + case 0x51 -> operation.kind() == BytecodeEmitter.OperationKind.RET; + case 0x50 -> operation.kind() == BytecodeEmitter.OperationKind.CALL_FUNC && operation.immediate() == immediate; + case 0x02 -> operation.kind() == BytecodeEmitter.OperationKind.JMP && operation.immediate() == immediate; + case 0x04 -> operation.kind() == BytecodeEmitter.OperationKind.JMP_IF_TRUE && operation.immediate() == immediate; + case 0x03 -> operation.kind() == BytecodeEmitter.OperationKind.JMP_IF_FALSE && operation.immediate() == immediate; + case 0x71 -> operation.kind() == BytecodeEmitter.OperationKind.HOSTCALL; + case 0x72 -> operation.kind() == BytecodeEmitter.OperationKind.INTRINSIC && operation.immediate() == immediate; + default -> false; + }; + } + + private BytecodeEmitter.EmissionPlan deriveEmissionPlan(final IRVMModule module) { + final var functions = new ArrayList(module.functions().size()); + for (final var function : module.functions()) { + final var operations = new ArrayList(function.instructions().size()); + for (final var instruction : function.instructions()) { + operations.add(deriveOperation(instruction)); + } + functions.add(new BytecodeEmitter.FunctionPlan( + function.name(), + function.paramSlots(), + function.localSlots(), + function.returnSlots(), + function.maxStackSlots(), + ReadOnlyList.wrap(operations))); + } + return new BytecodeEmitter.EmissionPlan( + 0, + ReadOnlyList.empty(), + ReadOnlyList.empty(), + ReadOnlyList.wrap(functions)); + } + + private BytecodeEmitter.Operation deriveOperation(final IRVMInstruction instruction) { + final var immediate = instruction.immediate() == null ? 0 : instruction.immediate(); + return switch (instruction.op().opcode()) { + case 0x01 -> BytecodeEmitter.Operation.halt(); + case 0x51 -> BytecodeEmitter.Operation.ret(); + case 0x50 -> BytecodeEmitter.Operation.callFunc(immediate); + case 0x02 -> BytecodeEmitter.Operation.jmp(immediate, null); + case 0x04 -> BytecodeEmitter.Operation.jmpIfTrue(immediate, null); + case 0x03 -> BytecodeEmitter.Operation.jmpIfFalse(immediate, null); + case 0x71 -> BytecodeEmitter.Operation.hostcall( + new BytecodeModule.SyscallDecl("__unknown__", "__unknown__", 0, 0, 0), + null, + null); + case 0x72 -> BytecodeEmitter.Operation.intrinsic(immediate); + default -> throw new IllegalArgumentException("cannot derive emission op for irvm opcode: " + instruction.op().opcode()); + }; + } + + private static IRVMModule moduleFromPlan( + final boolean hasInternalOpcodes, + final BytecodeEmitter.EmissionPlan emissionPlan) { + final var inputPlan = emissionPlan == null ? BytecodeEmitter.EmissionPlan.empty() : emissionPlan; + if (inputPlan.functions().isEmpty()) { + return new IRVMModule( + "core-v1", + ReadOnlyList.from(new IRVMFunction( + "__synthetic__", + 0, + 0, + 0, + 1, + ReadOnlyList.from( + new IRVMInstruction(hasInternalOpcodes ? IRVMOp.INTERNAL_EXT : IRVMOp.HALT, null))))); + } + final var functions = new ArrayList(inputPlan.functions().size()); + for (final var functionPlan : inputPlan.functions()) { + final var instructions = new ArrayList(functionPlan.operations().size()); + for (final var operation : functionPlan.operations()) { + instructions.add(operationToInstruction(operation)); + } + if (hasInternalOpcodes) { + instructions.add(0, new IRVMInstruction(IRVMOp.INTERNAL_EXT, null)); + } + functions.add(new IRVMFunction( + functionPlan.name(), + functionPlan.paramSlots(), + functionPlan.localSlots(), + functionPlan.returnSlots(), + functionPlan.maxStackSlots(), + ReadOnlyList.wrap(instructions))); + } + return new IRVMModule("core-v1", ReadOnlyList.wrap(functions)); + } + + private static IRVMInstruction operationToInstruction(final BytecodeEmitter.Operation operation) { + return switch (operation.kind()) { + case HALT -> new IRVMInstruction(IRVMOp.HALT, null); + case RET -> new IRVMInstruction(IRVMOp.RET, null); + case CALL_FUNC -> new IRVMInstruction(IRVMOp.CALL, operation.immediate()); + case JMP -> new IRVMInstruction(IRVMOp.JMP, operation.immediate()); + case JMP_IF_TRUE -> new IRVMInstruction(IRVMOp.JMP_IF_TRUE, operation.immediate()); + case JMP_IF_FALSE -> new IRVMInstruction(IRVMOp.JMP_IF_FALSE, operation.immediate()); + case HOSTCALL -> new IRVMInstruction(IRVMOp.HOSTCALL, 0); + case INTRINSIC -> new IRVMInstruction(IRVMOp.INTRINSIC, operation.immediate()); + case RAW_SYSCALL -> throw new IllegalArgumentException("raw syscall is not representable in IRVM preload"); + }; + } } diff --git a/prometeu-compiler/prometeu-build-pipeline/src/main/java/p/studio/compiler/workspaces/stages/EmitBytecodePipelineStage.java b/prometeu-compiler/prometeu-build-pipeline/src/main/java/p/studio/compiler/workspaces/stages/EmitBytecodePipelineStage.java index 494cd7ac..60e049fa 100644 --- a/prometeu-compiler/prometeu-build-pipeline/src/main/java/p/studio/compiler/workspaces/stages/EmitBytecodePipelineStage.java +++ b/prometeu-compiler/prometeu-build-pipeline/src/main/java/p/studio/compiler/workspaces/stages/EmitBytecodePipelineStage.java @@ -39,8 +39,16 @@ public class EmitBytecodePipelineStage implements PipelineStage { .message("[BUILD]: optimized IRVM still contains internal opcodes")); } try { - ctx.bytecodeModule = emitter.emit(ctx.optimizedIrvm.emissionPlan()); + ctx.bytecodeModule = emitter.emit(ctx.optimizedIrvm.coherentEmissionPlan()); ctx.bytecodeBytes = ctx.bytecodeModule.serialize(); + } catch (IllegalArgumentException e) { + return BuildingIssueSink.empty() + .report(builder -> builder + .error(true) + .phase("BACKEND_EMIT_BYTECODE") + .code("MARSHAL_VERIFY_PRECHECK_IRVM_PROGRAM_COHERENCE") + .message("[BUILD]: irvm program coherence failed: " + e.getMessage()) + .exception(e)); } catch (BytecodeMarshalingException e) { return BuildingIssueSink.empty() .report(builder -> builder diff --git a/prometeu-compiler/prometeu-build-pipeline/src/test/java/p/studio/compiler/backend/irvm/IRVMProgramTest.java b/prometeu-compiler/prometeu-build-pipeline/src/test/java/p/studio/compiler/backend/irvm/IRVMProgramTest.java new file mode 100644 index 00000000..c5604a92 --- /dev/null +++ b/prometeu-compiler/prometeu-build-pipeline/src/test/java/p/studio/compiler/backend/irvm/IRVMProgramTest.java @@ -0,0 +1,55 @@ +package p.studio.compiler.backend.irvm; + +import org.junit.jupiter.api.Test; +import p.studio.compiler.backend.bytecode.BytecodeEmitter; +import p.studio.utilities.structures.ReadOnlyList; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; + +class IRVMProgramTest { + + @Test + void constructorMustRejectModuleAndEmissionPlanMismatch() { + final var module = new IRVMModule( + "core-v1", + ReadOnlyList.from(new IRVMFunction( + "main", + 0, + 0, + 0, + 1, + ReadOnlyList.from(new IRVMInstruction(IRVMOp.HALT, null))))); + final var mismatchedPlan = new BytecodeEmitter.EmissionPlan( + 0, + ReadOnlyList.empty(), + ReadOnlyList.empty(), + ReadOnlyList.from(new BytecodeEmitter.FunctionPlan( + "main", + 0, + 0, + 0, + 1, + ReadOnlyList.from(BytecodeEmitter.Operation.ret())))); + + assertThrows(IllegalArgumentException.class, () -> new IRVMProgram(module, mismatchedPlan)); + } + + @Test + void coherentEmissionPlanMayBeDerivedFromModuleWhenPlanIsEmpty() { + final var module = new IRVMModule( + "core-v1", + ReadOnlyList.from(new IRVMFunction( + "main", + 0, + 0, + 0, + 1, + ReadOnlyList.from(new IRVMInstruction(IRVMOp.HALT, null))))); + final var program = new IRVMProgram(module, BytecodeEmitter.EmissionPlan.empty()); + + final var plan = program.coherentEmissionPlan(); + assertEquals(1, plan.functions().size()); + assertEquals(BytecodeEmitter.OperationKind.HALT, plan.functions().getFirst().operations().getFirst().kind()); + } +}