Skip to content

Commit

Permalink
Merge pull request #18121 from unknownbrackets/jit-ir-profiler
Browse files Browse the repository at this point in the history
IR: Add mini native jit MIPS block profiler
  • Loading branch information
hrydgard authored Sep 25, 2023
2 parents 4164f36 + 9b2fa46 commit 5145698
Show file tree
Hide file tree
Showing 17 changed files with 290 additions and 18 deletions.
24 changes: 20 additions & 4 deletions Core/MIPS/ARM64/Arm64IRAsm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,18 @@ static void ShowPC(void *membase, void *jitbase) {
}

void Arm64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
BeginWrite(GetMemoryProtectPageSize());
// This will be used as a writable scratch area, always 32-bit accessible.
const u8 *start = AlignCodePage();
if (DebugProfilerEnabled()) {
ProtectMemoryPages(start, GetMemoryProtectPageSize(), MEM_PROT_READ | MEM_PROT_WRITE);
hooks_.profilerPC = (uint32_t *)GetWritableCodePtr();
Write32(0);
hooks_.profilerStatus = (IRProfilerStatus *)GetWritableCodePtr();
Write32(0);
}

const u8 *disasmStart = AlignCodePage();
BeginWrite(GetMemoryProtectPageSize());

if (jo.useStaticAlloc) {
saveStaticRegisters_ = AlignCode16();
Expand All @@ -63,8 +73,6 @@ void Arm64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
regs_.EmitLoadStaticRegisters();
LDR(INDEX_UNSIGNED, DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount));
RET();

start = saveStaticRegisters_;
} else {
saveStaticRegisters_ = nullptr;
loadStaticRegisters_ = nullptr;
Expand Down Expand Up @@ -152,13 +160,17 @@ void Arm64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
MOVI2R(JITBASEREG, (intptr_t)GetBasePtr() - MIPS_EMUHACK_OPCODE);

LoadStaticRegisters();
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
MovFromPC(SCRATCH1);
WriteDebugPC(SCRATCH1);
outerLoopPCInSCRATCH1_ = GetCodePtr();
MovToPC(SCRATCH1);
outerLoop_ = GetCodePtr();
SaveStaticRegisters(); // Advance can change the downcount, so must save/restore
RestoreRoundingMode(true);
WriteDebugProfilerStatus(IRProfilerStatus::TIMER_ADVANCE);
QuickCallFunction(SCRATCH1_64, &CoreTiming::Advance);
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
ApplyRoundingMode(true);
LoadStaticRegisters();

Expand Down Expand Up @@ -191,6 +203,7 @@ void Arm64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
}

MovFromPC(SCRATCH1);
WriteDebugPC(SCRATCH1);
#ifdef MASKED_PSP_MEMORY
ANDI2R(SCRATCH1, SCRATCH1, Memory::MEMVIEW32_MASK);
#endif
Expand All @@ -206,7 +219,9 @@ void Arm64JitBackend::GenerateFixedCode(MIPSState *mipsState) {

// No block found, let's jit. We don't need to save static regs, they're all callee saved.
RestoreRoundingMode(true);
WriteDebugProfilerStatus(IRProfilerStatus::COMPILING);
QuickCallFunction(SCRATCH1_64, &MIPSComp::JitAt);
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
ApplyRoundingMode(true);

// Let's just dispatch again, we'll enter the block since we know it's there.
Expand All @@ -221,6 +236,7 @@ void Arm64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
const uint8_t *quitLoop = GetCodePtr();
SetJumpTarget(badCoreState);

WriteDebugProfilerStatus(IRProfilerStatus::NOT_RUNNING);
SaveStaticRegisters();
RestoreRoundingMode(true);

Expand Down Expand Up @@ -251,7 +267,7 @@ void Arm64JitBackend::GenerateFixedCode(MIPSState *mipsState) {

// Leave this at the end, add more stuff above.
if (enableDisasm) {
std::vector<std::string> lines = DisassembleArm64(start, (int)(GetCodePtr() - start));
std::vector<std::string> lines = DisassembleArm64(disasmStart, (int)(GetCodePtr() - disasmStart));
for (auto s : lines) {
INFO_LOG(JIT, "%s", s.c_str());
}
Expand Down
4 changes: 4 additions & 0 deletions Core/MIPS/ARM64/Arm64IRCompFPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -508,6 +508,8 @@ void Arm64JitBackend::CompIR_FSpecial(IRInst inst) {

auto callFuncF_F = [&](float (*func)(float)) {
regs_.FlushBeforeCall();
WriteDebugProfilerStatus(IRProfilerStatus::MATH_HELPER);

// It might be in a non-volatile register.
// TODO: May have to handle a transfer if SIMD here.
if (regs_.IsFPRMapped(inst.src1)) {
Expand All @@ -527,6 +529,8 @@ void Arm64JitBackend::CompIR_FSpecial(IRInst inst) {
if (regs_.F(inst.dest) != S0) {
fp_.FMOV(regs_.F(inst.dest), S0);
}

WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
};

switch (inst.op) {
Expand Down
4 changes: 4 additions & 0 deletions Core/MIPS/ARM64/Arm64IRCompSystem.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,7 @@ void Arm64JitBackend::CompIR_System(IRInst inst) {
FlushAll();
SaveStaticRegisters();

WriteDebugProfilerStatus(IRProfilerStatus::SYSCALL);
#ifdef USE_PROFILER
// When profiling, we can't skip CallSyscall, since it times syscalls.
MOVI2R(W0, inst.constant);
Expand All @@ -229,14 +230,17 @@ void Arm64JitBackend::CompIR_System(IRInst inst) {
}
#endif

WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
LoadStaticRegisters();
// This is always followed by an ExitToPC, where we check coreState.
break;

case IROp::CallReplacement:
FlushAll();
SaveStaticRegisters();
WriteDebugProfilerStatus(IRProfilerStatus::REPLACEMENT);
QuickCallFunction(SCRATCH2_64, GetReplacementFunc(inst.constant)->replaceFunc);
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
LoadStaticRegisters();
SUB(DOWNCOUNTREG, DOWNCOUNTREG, W0);
break;
Expand Down
34 changes: 34 additions & 0 deletions Core/MIPS/ARM64/Arm64IRJit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ bool Arm64JitBackend::CompileBlock(IRBlock *block, int block_num, bool preload)
SetBlockCheckedOffset(block_num, (int)GetOffset(GetCodePointer()));
wroteCheckedOffset = true;

WriteDebugPC(startPC);

// Check the sign bit to check if negative.
FixupBranch normalEntry = TBZ(DOWNCOUNTREG, 31);
MOVI2R(SCRATCH1, startPC);
Expand Down Expand Up @@ -129,6 +131,8 @@ bool Arm64JitBackend::CompileBlock(IRBlock *block, int block_num, bool preload)
}

if (jo.enableBlocklink && jo.useBackJump) {
WriteDebugPC(startPC);

// Small blocks are common, check if it's < 32KB long.
ptrdiff_t distance = blockStart - GetCodePointer();
if (distance >= -0x8000 && distance < 0x8000) {
Expand Down Expand Up @@ -229,8 +233,10 @@ void Arm64JitBackend::CompIR_Generic(IRInst inst) {

FlushAll();
SaveStaticRegisters();
WriteDebugProfilerStatus(IRProfilerStatus::IR_INTERPRET);
MOVI2R(X0, value);
QuickCallFunction(SCRATCH2_64, &DoIRInst);
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
LoadStaticRegisters();

// We only need to check the return value if it's a potential exit.
Expand All @@ -256,12 +262,14 @@ void Arm64JitBackend::CompIR_Interpret(IRInst inst) {
// IR protects us against this being a branching instruction (well, hopefully.)
FlushAll();
SaveStaticRegisters();
WriteDebugProfilerStatus(IRProfilerStatus::INTERPRET);
if (DebugStatsEnabled()) {
MOVP2R(X0, MIPSGetName(op));
QuickCallFunction(SCRATCH2_64, &NotifyMIPSInterpret);
}
MOVI2R(X0, inst.constant);
QuickCallFunction(SCRATCH2_64, MIPSGetInterpretFunc(op));
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
LoadStaticRegisters();
}

Expand Down Expand Up @@ -354,6 +362,32 @@ void Arm64JitBackend::MovToPC(ARM64Reg r) {
STR(INDEX_UNSIGNED, r, CTXREG, offsetof(MIPSState, pc));
}

void Arm64JitBackend::WriteDebugPC(uint32_t pc) {
if (hooks_.profilerPC) {
int offset = (int)((const u8 *)hooks_.profilerPC - GetBasePtr());
MOVI2R(SCRATCH2, MIPS_EMUHACK_OPCODE + offset);
MOVI2R(SCRATCH1, pc);
STR(SCRATCH1, JITBASEREG, SCRATCH2);
}
}

void Arm64JitBackend::WriteDebugPC(ARM64Reg r) {
if (hooks_.profilerPC) {
int offset = (int)((const u8 *)hooks_.profilerPC - GetBasePtr());
MOVI2R(SCRATCH2, MIPS_EMUHACK_OPCODE + offset);
STR(r, JITBASEREG, SCRATCH2);
}
}

void Arm64JitBackend::WriteDebugProfilerStatus(IRProfilerStatus status) {
if (hooks_.profilerPC) {
int offset = (int)((const u8 *)hooks_.profilerStatus - GetBasePtr());
MOVI2R(SCRATCH2, MIPS_EMUHACK_OPCODE + offset);
MOVI2R(SCRATCH1, (int)status);
STR(SCRATCH1, JITBASEREG, SCRATCH2);
}
}

void Arm64JitBackend::SaveStaticRegisters() {
if (jo.useStaticAlloc) {
QuickCallFunction(SCRATCH2_64, saveStaticRegisters_);
Expand Down
5 changes: 5 additions & 0 deletions Core/MIPS/ARM64/Arm64IRJit.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,11 @@ class Arm64JitBackend : public Arm64Gen::ARM64CodeBlock, public IRNativeBackend
void UpdateRoundingMode(bool force = false);
void MovFromPC(Arm64Gen::ARM64Reg r);
void MovToPC(Arm64Gen::ARM64Reg r);
// Destroys SCRATCH2.
void WriteDebugPC(uint32_t pc);
void WriteDebugPC(Arm64Gen::ARM64Reg r);
// Destroys SCRATCH2.
void WriteDebugProfilerStatus(IRProfilerStatus status);

void SaveStaticRegisters();
void LoadStaticRegisters();
Expand Down
88 changes: 85 additions & 3 deletions Core/MIPS/IR/IRNativeCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.

#include <atomic>
#include <climits>
#include <thread>
#include "Common/Profiler/Profiler.h"
#include "Common/StringUtils.h"
#include "Common/TimeUtil.h"
Expand All @@ -31,18 +33,57 @@ namespace MIPSComp {

// Compile time flag to enable debug stats for not compiled ops.
static constexpr bool enableDebugStats = false;
// Compile time flag for enabling the simple IR jit profiler.
static constexpr bool enableDebugProfiler = false;

// Used only for debugging when enableDebug is true above.
static std::map<uint8_t, int> debugSeenNotCompiledIR;
static std::map<const char *, int> debugSeenNotCompiled;
static std::map<std::pair<uint32_t, IRProfilerStatus>, int> debugSeenPCUsage;
static double lastDebugStatsLog = 0.0;
static constexpr double debugStatsFrequency = 5.0;

static std::thread debugProfilerThread;
std::atomic<bool> debugProfilerThreadStatus = false;

template <int N>
class IRProfilerTopValues {
public:
void Add(const std::pair<uint32_t, IRProfilerStatus> &v, int c) {
for (int i = 0; i < N; ++i) {
if (c > counts[i]) {
counts[i] = c;
values[i] = v;
return;
}
}
}

int counts[N]{};
std::pair<uint32_t, IRProfilerStatus> values[N]{};
};

const char *IRProfilerStatusToString(IRProfilerStatus s) {
switch (s) {
case IRProfilerStatus::NOT_RUNNING: return "NOT_RUNNING";
case IRProfilerStatus::IN_JIT: return "IN_JIT";
case IRProfilerStatus::TIMER_ADVANCE: return "TIMER_ADVANCE";
case IRProfilerStatus::COMPILING: return "COMPILING";
case IRProfilerStatus::MATH_HELPER: return "MATH_HELPER";
case IRProfilerStatus::REPLACEMENT: return "REPLACEMENT";
case IRProfilerStatus::SYSCALL: return "SYSCALL";
case IRProfilerStatus::INTERPRET: return "INTERPRET";
case IRProfilerStatus::IR_INTERPRET: return "IR_INTERPRET";
}
return "INVALID";
}

static void LogDebugStats() {
if (!enableDebugStats)
if (!enableDebugStats && !enableDebugProfiler)
return;

double now = time_now_d();
if (now < lastDebugStatsLog + 1.0)
if (now < lastDebugStatsLog + debugStatsFrequency)
return;
lastDebugStatsLog = now;

Expand All @@ -66,16 +107,36 @@ static void LogDebugStats() {
}
debugSeenNotCompiled.clear();

IRProfilerTopValues<4> slowestPCs;
int64_t totalCount = 0;
for (auto it : debugSeenPCUsage) {
slowestPCs.Add(it.first, it.second);
totalCount += it.second;
}
debugSeenPCUsage.clear();

if (worstIROp != -1)
WARN_LOG(JIT, "Most not compiled IR op: %s (%d)", GetIRMeta((IROp)worstIROp)->name, worstIRVal);
if (worstName != nullptr)
WARN_LOG(JIT, "Most not compiled op: %s (%d)", worstName, worstVal);
if (slowestPCs.counts[0] != 0) {
for (int i = 0; i < 4; ++i) {
uint32_t pc = slowestPCs.values[i].first;
const char *status = IRProfilerStatusToString(slowestPCs.values[i].second);
const std::string label = g_symbolMap ? g_symbolMap->GetDescription(pc) : "";
WARN_LOG(JIT, "Slowest sampled PC #%d: %08x (%s)/%s (%f%%)", i, pc, label.c_str(), status, 100.0 * (double)slowestPCs.counts[i] / (double)totalCount);
}
}
}

bool IRNativeBackend::DebugStatsEnabled() const {
return enableDebugStats;
}

bool IRNativeBackend::DebugProfilerEnabled() const {
return enableDebugProfiler;
}

void IRNativeBackend::NotifyMIPSInterpret(const char *name) {
_assert_(enableDebugStats);
debugSeenNotCompiled[name]++;
Expand Down Expand Up @@ -120,6 +181,13 @@ int IRNativeBackend::ReportBadAddress(uint32_t addr, uint32_t alignment, uint32_

IRNativeBackend::IRNativeBackend(IRBlockCache &blocks) : blocks_(blocks) {}

IRNativeBackend::~IRNativeBackend() {
if (debugProfilerThreadStatus) {
debugProfilerThreadStatus = false;
debugProfilerThread.join();
}
}

void IRNativeBackend::CompileIRInst(IRInst inst) {
switch (inst.op) {
case IROp::Nop:
Expand Down Expand Up @@ -421,6 +489,20 @@ void IRNativeJit::Init(IRNativeBackend &backend) {

// Wanted this to be a reference, but vtbls get in the way. Shouldn't change.
hooks_ = backend.GetNativeHooks();

if (enableDebugProfiler && hooks_.profilerPC) {
debugProfilerThreadStatus = true;
debugProfilerThread = std::thread([&] {
// Spin, spin spin... maybe could at least hook into sleeps.
while (debugProfilerThreadStatus) {
IRProfilerStatus stat = *hooks_.profilerStatus;
uint32_t pc = *hooks_.profilerPC;
if (stat != IRProfilerStatus::NOT_RUNNING && stat != IRProfilerStatus::SYSCALL) {
debugSeenPCUsage[std::make_pair(pc, stat)]++;
}
}
});
}
}

bool IRNativeJit::CompileTargetBlock(IRBlock *block, int block_num, bool preload) {
Expand All @@ -432,7 +514,7 @@ void IRNativeJit::FinalizeTargetBlock(IRBlock *block, int block_num) {
}

void IRNativeJit::RunLoopUntil(u64 globalticks) {
if constexpr (enableDebugStats) {
if constexpr (enableDebugStats || enableDebugProfiler) {
LogDebugStats();
}

Expand Down
Loading

0 comments on commit 5145698

Please sign in to comment.