Skip to content

Commit

Permalink
v0.8.5+luau617
Browse files Browse the repository at this point in the history
  • Loading branch information
khvzak committed Mar 16, 2024
1 parent eca4447 commit aaa17ba
Show file tree
Hide file tree
Showing 21 changed files with 625 additions and 116 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "luau0-src"
version = "0.8.4+luau616"
version = "0.8.5+luau617"
authors = ["Aleksandr Orlenko <[email protected]>"]
edition = "2021"
repository = "https://github.com/khvzak/luau-src-rs"
Expand Down
5 changes: 3 additions & 2 deletions luau/CodeGen/include/Luau/AssemblyBuilderA64.h
Original file line number Diff line number Diff line change
Expand Up @@ -125,12 +125,12 @@ class AssemblyBuilderA64
// Address of code (label)
void adr(RegisterA64 dst, Label& label);

// Floating-point scalar moves
// Floating-point scalar/vector moves
// Note: constant must be compatible with immediate floating point moves (see isFmovSupported)
void fmov(RegisterA64 dst, RegisterA64 src);
void fmov(RegisterA64 dst, double src);

// Floating-point scalar math
// Floating-point scalar/vector math
void fabs(RegisterA64 dst, RegisterA64 src);
void fadd(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2);
void fdiv(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2);
Expand All @@ -139,6 +139,7 @@ class AssemblyBuilderA64
void fsqrt(RegisterA64 dst, RegisterA64 src);
void fsub(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2);

// Vector component manipulation
void ins_4s(RegisterA64 dst, RegisterA64 src, uint8_t index);
void ins_4s(RegisterA64 dst, uint8_t dstIndex, RegisterA64 src, uint8_t srcIndex);
void dup_4s(RegisterA64 dst, RegisterA64 src, uint8_t index);
Expand Down
6 changes: 6 additions & 0 deletions luau/CodeGen/include/Luau/CodeGen.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,12 @@ bool isSupported();
void create(lua_State* L, AllocationCallback* allocationCallback, void* allocationCallbackContext);
void create(lua_State* L);

// Check if native execution is enabled
[[nodiscard]] bool isNativeExecutionEnabled(lua_State* L);

// Enable or disable native execution according to `enabled` argument
void setNativeExecutionEnabled(lua_State* L, bool enabled);

// Builds target function and all inner functions
CodeGenCompilationResult compile(lua_State* L, int idx, unsigned int flags = 0, CompilationStats* stats = nullptr);

Expand Down
7 changes: 4 additions & 3 deletions luau/CodeGen/include/Luau/IrData.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,8 @@ enum class IrCmd : uint8_t

// Load a TValue from memory
// A: Rn or Kn or pointer (TValue)
// B: int (optional 'A' pointer offset)
// B: int/none (optional 'A' pointer offset)
// C: tag/none (tag of the value being loaded)
LOAD_TVALUE,

// Load current environment table
Expand Down Expand Up @@ -321,7 +322,7 @@ enum class IrCmd : uint8_t
ADJUST_STACK_TO_TOP,

// Execute fastcall builtin function in-place
// A: builtin
// A: unsigned int (builtin id)
// B: Rn (result start)
// C: Rn (argument start)
// D: Rn or Kn or undef (optional second argument)
Expand All @@ -330,7 +331,7 @@ enum class IrCmd : uint8_t
FASTCALL,

// Call the fastcall builtin function
// A: builtin
// A: unsigned int (builtin id)
// B: Rn (result start)
// C: Rn (argument start)
// D: Rn or Kn or undef (optional second argument)
Expand Down
4 changes: 2 additions & 2 deletions luau/CodeGen/include/Luau/IrVisitUseDef.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#include "Luau/Common.h"
#include "Luau/IrData.h"

LUAU_FASTFLAG(LuauCodegenRemoveDeadStores2)
LUAU_FASTFLAG(LuauCodegenRemoveDeadStores3)

namespace Luau
{
Expand Down Expand Up @@ -188,7 +188,7 @@ static void visitVmRegDefsUses(T& visitor, IrFunction& function, const IrInst& i
visitor.def(inst.b);
break;
case IrCmd::FALLBACK_FORGPREP:
if (FFlag::LuauCodegenRemoveDeadStores2)
if (FFlag::LuauCodegenRemoveDeadStores3)
{
// This instruction doesn't always redefine Rn, Rn+1, Rn+2, so we have to mark it as implicit use
visitor.useRange(vmRegOp(inst.b), 3);
Expand Down
42 changes: 35 additions & 7 deletions luau/CodeGen/src/AssemblyBuilderA64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
#include <stdarg.h>
#include <stdio.h>

LUAU_FASTFLAG(LuauCodeGenOptVecA64)

namespace Luau
{
namespace CodeGen
Expand Down Expand Up @@ -557,16 +559,42 @@ void AssemblyBuilderA64::fmov(RegisterA64 dst, RegisterA64 src)

void AssemblyBuilderA64::fmov(RegisterA64 dst, double src)
{
CODEGEN_ASSERT(dst.kind == KindA64::d);
if (FFlag::LuauCodeGenOptVecA64)
{
CODEGEN_ASSERT(dst.kind == KindA64::d || dst.kind == KindA64::q);

int imm = getFmovImm(src);
CODEGEN_ASSERT(imm >= 0 && imm <= 256);
int imm = getFmovImm(src);
CODEGEN_ASSERT(imm >= 0 && imm <= 256);

// fmov can't encode 0, but movi can; movi is otherwise not useful for 64-bit fp immediates because it encodes repeating patterns
if (imm == 256)
placeFMOV("movi", dst, src, 0b001'0111100000'000'1110'01'00000);
// fmov can't encode 0, but movi can; movi is otherwise not useful for fp immediates because it encodes repeating patterns
if (dst.kind == KindA64::d)
{
if (imm == 256)
placeFMOV("movi", dst, src, 0b001'0111100000'000'1110'01'00000);
else
placeFMOV("fmov", dst, src, 0b000'11110'01'1'00000000'100'00000 | (imm << 8));
}
else
{
if (imm == 256)
placeFMOV("movi.4s", dst, src, 0b010'0111100000'000'0000'01'00000);
else
placeFMOV("fmov.4s", dst, src, 0b010'0111100000'000'1111'0'1'00000 | ((imm >> 5) << 11) | (imm & 31));
}
}
else
placeFMOV("fmov", dst, src, 0b000'11110'01'1'00000000'100'00000 | (imm << 8));
{
CODEGEN_ASSERT(dst.kind == KindA64::d);

int imm = getFmovImm(src);
CODEGEN_ASSERT(imm >= 0 && imm <= 256);

// fmov can't encode 0, but movi can; movi is otherwise not useful for 64-bit fp immediates because it encodes repeating patterns
if (imm == 256)
placeFMOV("movi", dst, src, 0b001'0111100000'000'1110'01'00000);
else
placeFMOV("fmov", dst, src, 0b000'11110'01'1'00000000'100'00000 | (imm << 8));
}
}

void AssemblyBuilderA64::fabs(RegisterA64 dst, RegisterA64 src)
Expand Down
151 changes: 137 additions & 14 deletions luau/CodeGen/src/CodeGen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ LUAU_FASTINTVARIABLE(CodegenHeuristicsBlockLimit, 32'768) // 32 K
// Current value is based on some member variables being limited to 16 bits
LUAU_FASTINTVARIABLE(CodegenHeuristicsBlockInstructionLimit, 65'536) // 64 K

LUAU_FASTFLAG(LuauCodegenHeapSizeReport)

namespace Luau
{
namespace CodeGen
Expand All @@ -74,25 +76,94 @@ struct NativeProto
uintptr_t exectarget;
};

static NativeProto createNativeProto(Proto* proto, const IrBuilder& ir)
// Additional data attached to Proto::execdata
// Guaranteed to be aligned to 16 bytes
struct ExtraExecData
{
size_t execDataSize;
size_t codeSize;
};

static int alignTo(int value, int align)
{
CODEGEN_ASSERT(FFlag::LuauCodegenHeapSizeReport);
CODEGEN_ASSERT(align > 0 && (align & (align - 1)) == 0);
return (value + (align - 1)) & ~(align - 1);
}

// Returns the size of execdata required to store all code offsets and ExtraExecData structure at proper alignment
// Always a multiple of 4 bytes
static int calculateExecDataSize(Proto* proto)
{
int sizecode = proto->sizecode;
CODEGEN_ASSERT(FFlag::LuauCodegenHeapSizeReport);
int size = proto->sizecode * sizeof(uint32_t);

size = alignTo(size, 16);
size += sizeof(ExtraExecData);

return size;
}

// Returns pointer to the ExtraExecData inside the Proto::execdata
// Even though 'execdata' is a field in Proto, we require it to support cases where it's not attached to Proto during construction
ExtraExecData* getExtraExecData(Proto* proto, void* execdata)
{
CODEGEN_ASSERT(FFlag::LuauCodegenHeapSizeReport);
int size = proto->sizecode * sizeof(uint32_t);

size = alignTo(size, 16);

uint32_t* instOffsets = new uint32_t[sizecode];
uint32_t instTarget = ir.function.entryLocation;
return reinterpret_cast<ExtraExecData*>(reinterpret_cast<char*>(execdata) + size);
}

for (int i = 0; i < sizecode; i++)
static NativeProto createNativeProto(Proto* proto, const IrBuilder& ir)
{
if (FFlag::LuauCodegenHeapSizeReport)
{
CODEGEN_ASSERT(ir.function.bcMapping[i].asmLocation >= instTarget);
int execDataSize = calculateExecDataSize(proto);
CODEGEN_ASSERT(execDataSize % 4 == 0);

uint32_t* execData = new uint32_t[execDataSize / 4];
uint32_t instTarget = ir.function.entryLocation;

for (int i = 0; i < proto->sizecode; i++)
{
CODEGEN_ASSERT(ir.function.bcMapping[i].asmLocation >= instTarget);

execData[i] = ir.function.bcMapping[i].asmLocation - instTarget;
}

// Set first instruction offset to 0 so that entering this function still executes any generated entry code.
execData[0] = 0;

ExtraExecData* extra = getExtraExecData(proto, execData);
memset(extra, 0, sizeof(ExtraExecData));

extra->execDataSize = execDataSize;

instOffsets[i] = ir.function.bcMapping[i].asmLocation - instTarget;
// entry target will be relocated when assembly is finalized
return {proto, execData, instTarget};
}
else
{
int sizecode = proto->sizecode;

uint32_t* instOffsets = new uint32_t[sizecode];
uint32_t instTarget = ir.function.entryLocation;

// Set first instruction offset to 0 so that entering this function still executes any generated entry code.
instOffsets[0] = 0;
for (int i = 0; i < sizecode; i++)
{
CODEGEN_ASSERT(ir.function.bcMapping[i].asmLocation >= instTarget);

// entry target will be relocated when assembly is finalized
return {proto, instOffsets, instTarget};
instOffsets[i] = ir.function.bcMapping[i].asmLocation - instTarget;
}

// Set first instruction offset to 0 so that entering this function still executes any generated entry code.
instOffsets[0] = 0;

// entry target will be relocated when assembly is finalized
return {proto, instOffsets, instTarget};
}
}

static void destroyExecData(void* execdata)
Expand Down Expand Up @@ -168,6 +239,12 @@ static int onEnter(lua_State* L, Proto* proto)
return GateFn(data->context.gateEntry)(L, proto, target, &data->context);
}

// used to disable native execution, unconditionally
static int onEnterDisabled(lua_State* L, Proto* proto)
{
return 1;
}

void onDisable(lua_State* L, Proto* proto)
{
// do nothing if proto already uses bytecode
Expand Down Expand Up @@ -207,6 +284,17 @@ void onDisable(lua_State* L, Proto* proto)
});
}

size_t getMemorySize(lua_State* L, Proto* proto)
{
CODEGEN_ASSERT(FFlag::LuauCodegenHeapSizeReport);
ExtraExecData* extra = getExtraExecData(proto, proto->execdata);

// While execDataSize is exactly the size of the allocation we made and hold for 'execdata' field, the code size is approximate
// This is because code+data page is shared and owned by all Proto from a single module and each one can keep the whole region alive
// So individual Proto being freed by GC will not reflect memory use by native code correctly
return extra->execDataSize + extra->codeSize;
}

#if defined(__aarch64__)
unsigned int getCpuFeaturesA64()
{
Expand Down Expand Up @@ -301,13 +389,27 @@ void create(lua_State* L, AllocationCallback* allocationCallback, void* allocati
ecb->destroy = onDestroyFunction;
ecb->enter = onEnter;
ecb->disable = onDisable;

if (FFlag::LuauCodegenHeapSizeReport)
ecb->getmemorysize = getMemorySize;
}

void create(lua_State* L)
{
create(L, nullptr, nullptr);
}

[[nodiscard]] bool isNativeExecutionEnabled(lua_State* L)
{
return getNativeState(L) ? (L->global->ecb.enter == onEnter) : false;
}

void setNativeExecutionEnabled(lua_State* L, bool enabled)
{
if (getNativeState(L))
L->global->ecb.enter = enabled ? onEnter : onEnterDisabled;
}

CodeGenCompilationResult compile(lua_State* L, int idx, unsigned int flags, CompilationStats* stats)
{
CODEGEN_ASSERT(lua_isLfunction(L, idx));
Expand Down Expand Up @@ -401,17 +503,38 @@ CodeGenCompilationResult compile(lua_State* L, int idx, unsigned int flags, Comp
return CodeGenCompilationResult::AllocationFailed;
}

if (gPerfLogFn && results.size() > 0)
if (FFlag::LuauCodegenHeapSizeReport)
{
gPerfLogFn(gPerfLogContext, uintptr_t(codeStart), uint32_t(results[0].exectarget), "<luau helpers>");
if (gPerfLogFn && results.size() > 0)
gPerfLogFn(gPerfLogContext, uintptr_t(codeStart), uint32_t(results[0].exectarget), "<luau helpers>");

for (size_t i = 0; i < results.size(); ++i)
{
uint32_t begin = uint32_t(results[i].exectarget);
uint32_t end = i + 1 < results.size() ? uint32_t(results[i + 1].exectarget) : uint32_t(build.code.size() * sizeof(build.code[0]));
CODEGEN_ASSERT(begin < end);

logPerfFunction(results[i].p, uintptr_t(codeStart) + begin, end - begin);
if (gPerfLogFn)
logPerfFunction(results[i].p, uintptr_t(codeStart) + begin, end - begin);

ExtraExecData* extra = getExtraExecData(results[i].p, results[i].execdata);
extra->codeSize = end - begin;
}
}
else
{
if (gPerfLogFn && results.size() > 0)
{
gPerfLogFn(gPerfLogContext, uintptr_t(codeStart), uint32_t(results[0].exectarget), "<luau helpers>");

for (size_t i = 0; i < results.size(); ++i)
{
uint32_t begin = uint32_t(results[i].exectarget);
uint32_t end = i + 1 < results.size() ? uint32_t(results[i + 1].exectarget) : uint32_t(build.code.size() * sizeof(build.code[0]));
CODEGEN_ASSERT(begin < end);

logPerfFunction(results[i].p, uintptr_t(codeStart) + begin, end - begin);
}
}
}

Expand Down
4 changes: 2 additions & 2 deletions luau/CodeGen/src/CodeGenLower.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ LUAU_FASTFLAG(DebugCodegenSkipNumbering)
LUAU_FASTINT(CodegenHeuristicsInstructionLimit)
LUAU_FASTINT(CodegenHeuristicsBlockLimit)
LUAU_FASTINT(CodegenHeuristicsBlockInstructionLimit)
LUAU_FASTFLAG(LuauCodegenRemoveDeadStores2)
LUAU_FASTFLAG(LuauCodegenRemoveDeadStores3)

namespace Luau
{
Expand Down Expand Up @@ -312,7 +312,7 @@ inline bool lowerFunction(IrBuilder& ir, AssemblyBuilder& build, ModuleHelpers&
}
}

if (FFlag::LuauCodegenRemoveDeadStores2)
if (FFlag::LuauCodegenRemoveDeadStores3)
markDeadStoresInBlockChains(ir);
}

Expand Down
Loading

0 comments on commit aaa17ba

Please sign in to comment.