From 202041fa79418524278bb36f52c9b6204a2271fc Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Mon, 14 Aug 2023 11:59:20 -0400 Subject: [PATCH 1/4] Fix Expr(:loopinfo) codegen We used a loop-marker intrinsic because the LoopID used to be dropped by optimization passes (this seems no longer true). got optimized by simplifycfg to the point where the loop-marker is now attached to the wrong back-edge. This PR drops the loop-marker and uses the LoopID metadata node directly. (cherry picked from commit 90494c279db4cb3b7a4d0e5c2c4ce4bf98569e57) --- src/codegen.cpp | 45 ++-- src/jl_exported_funcs.inc | 2 +- src/llvm-julia-passes.inc | 2 +- src/llvm-simdloop.cpp | 241 +++++++----------- src/passes.h | 10 +- src/pipeline.cpp | 2 +- test/llvmpasses/julia-simdloop-memoryssa.ll | 55 ++++ .../{simdloop.ll => julia-simdloop.ll} | 29 +-- test/llvmpasses/loopinfo.jl | 22 +- test/llvmpasses/parsing.ll | 2 +- 10 files changed, 208 insertions(+), 202 deletions(-) create mode 100644 test/llvmpasses/julia-simdloop-memoryssa.ll rename test/llvmpasses/{simdloop.ll => julia-simdloop.ll} (75%) diff --git a/src/codegen.cpp b/src/codegen.cpp index 384ee5caf9708..d28971b9a4caf 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -981,14 +981,7 @@ static const auto jl_typeof_func = new JuliaFunction<>{ Attributes(C, {Attribute::NonNull}), None); }, }; -static const auto jl_loopinfo_marker_func = new JuliaFunction<>{ - "julia.loopinfo_marker", - [](LLVMContext &C) { return FunctionType::get(getVoidTy(C), false); }, - [](LLVMContext &C) { return AttributeList::get(C, - Attributes(C, {Attribute::ReadOnly, Attribute::NoRecurse, Attribute::InaccessibleMemOnly}), - AttributeSet(), - None); }, -}; + static const auto jl_write_barrier_func = new JuliaFunction<>{ "julia.write_barrier", [](LLVMContext &C) { return FunctionType::get(getVoidTy(C), @@ -1600,6 +1593,7 @@ class jl_codectx_t { std::map &global_targets; std::map, GlobalVariable*> &external_calls; Function *f = NULL; + MDNode* LoopID = NULL; // local var info. globals are not in here. std::vector slots; std::map phic_slots; @@ -5766,16 +5760,22 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_ } else if (head == jl_loopinfo_sym) { // parse Expr(:loopinfo, "julia.simdloop", ("llvm.loop.vectorize.width", 4)) + // to LLVM LoopID SmallVector MDs; + + // Reserve first location for self reference to the LoopID metadata node. + TempMDTuple TempNode = MDNode::getTemporary(ctx.builder.getContext(), None); + MDs.push_back(TempNode.get()); + for (int i = 0, ie = nargs; i < ie; ++i) { Metadata *MD = to_md_tree(args[i], ctx.builder.getContext()); if (MD) MDs.push_back(MD); } - MDNode* MD = MDNode::get(ctx.builder.getContext(), MDs); - CallInst *I = ctx.builder.CreateCall(prepare_call(jl_loopinfo_marker_func)); - I->setMetadata("julia.loopinfo", MD); + ctx.LoopID = MDNode::getDistinct(ctx.builder.getContext(), MDs); + // Replace the temporary node with a self-reference. + ctx.LoopID->replaceOperandWith(0, ctx.LoopID); return jl_cgval_t(); } else if (head == jl_leave_sym || head == jl_coverageeffect_sym @@ -8045,6 +8045,7 @@ static jl_llvm_functions_t std::map BB; std::map come_from_bb; int cursor = 0; + int current_label = 0; auto find_next_stmt = [&] (int seq_next) { // new style ir is always in dominance order, but frontend IR might not be // `seq_next` is the next statement we want to emit @@ -8061,6 +8062,7 @@ static jl_llvm_functions_t workstack.pop_back(); auto nextbb = BB.find(item + 1); if (nextbb == BB.end()) { + // Not a BB cursor = item; return; } @@ -8071,8 +8073,10 @@ static jl_llvm_functions_t seq_next = -1; // if this BB is non-empty, we've visited it before so skip it if (!nextbb->second->getTerminator()) { + // New BB ctx.builder.SetInsertPoint(nextbb->second); cursor = item; + current_label = item; return; } } @@ -8319,7 +8323,12 @@ static jl_llvm_functions_t if (jl_is_gotonode(stmt)) { int lname = jl_gotonode_label(stmt); come_from_bb[cursor+1] = ctx.builder.GetInsertBlock(); - ctx.builder.CreateBr(BB[lname]); + auto br = ctx.builder.CreateBr(BB[lname]); + // Check if backwards branch + if (ctx.LoopID && lname <= current_label) { + br->setMetadata(LLVMContext::MD_loop, ctx.LoopID); + ctx.LoopID = NULL; + } find_next_stmt(lname - 1); continue; } @@ -8337,10 +8346,17 @@ static jl_llvm_functions_t workstack.push_back(lname - 1); BasicBlock *ifnot = BB[lname]; BasicBlock *ifso = BB[cursor+2]; + Instruction *br; if (ifnot == ifso) - ctx.builder.CreateBr(ifnot); + br = ctx.builder.CreateBr(ifnot); else - ctx.builder.CreateCondBr(isfalse, ifnot, ifso); + br = ctx.builder.CreateCondBr(isfalse, ifnot, ifso); + + // Check if backwards branch + if (ctx.LoopID && lname <= current_label) { + br->setMetadata(LLVMContext::MD_loop, ctx.LoopID); + ctx.LoopID = NULL; + } find_next_stmt(cursor + 1); continue; } @@ -9088,7 +9104,6 @@ static void init_jit_functions(void) add_named_global(jl_object_id__func, &jl_object_id_); add_named_global(jl_alloc_obj_func, (void*)NULL); add_named_global(jl_newbits_func, (void*)jl_new_bits); - add_named_global(jl_loopinfo_marker_func, (void*)NULL); add_named_global(jl_typeof_func, (void*)NULL); add_named_global(jl_write_barrier_func, (void*)NULL); add_named_global(jldlsym_func, &jl_load_and_lookup); diff --git a/src/jl_exported_funcs.inc b/src/jl_exported_funcs.inc index a7ffedd5cba10..745905b113622 100644 --- a/src/jl_exported_funcs.inc +++ b/src/jl_exported_funcs.inc @@ -580,7 +580,6 @@ YY(LLVMExtraAddCPUFeaturesPass) \ YY(LLVMExtraMPMAddCPUFeaturesPass) \ YY(LLVMExtraMPMAddRemoveNIPass) \ - YY(LLVMExtraMPMAddLowerSIMDLoopPass) \ YY(LLVMExtraMPMAddFinalLowerGCPass) \ YY(LLVMExtraMPMAddMultiVersioningPass) \ YY(LLVMExtraMPMAddRemoveJuliaAddrspacesPass) \ @@ -594,6 +593,7 @@ YY(LLVMExtraFPMAddLowerExcHandlersPass) \ YY(LLVMExtraFPMAddGCInvariantVerifierPass) \ YY(LLVMExtraLPMAddJuliaLICMPass) \ + YY(LLVMExtraLPMAddLowerSIMDLoopPass) \ YY(JLJITGetLLVMOrcExecutionSession) \ YY(JLJITGetJuliaOJIT) \ YY(JLJITGetExternalJITDylib) \ diff --git a/src/llvm-julia-passes.inc b/src/llvm-julia-passes.inc index 39030d60a44fc..d12370b1852c7 100644 --- a/src/llvm-julia-passes.inc +++ b/src/llvm-julia-passes.inc @@ -2,7 +2,6 @@ #ifdef MODULE_PASS MODULE_PASS("CPUFeatures", CPUFeaturesPass, CPUFeaturesPass()) MODULE_PASS("RemoveNI", RemoveNIPass, RemoveNIPass()) -MODULE_PASS("LowerSIMDLoop", LowerSIMDLoopPass, LowerSIMDLoopPass()) MODULE_PASS("FinalLowerGC", FinalLowerGCPass, FinalLowerGCPass()) MODULE_PASS("JuliaMultiVersioning", MultiVersioningPass, MultiVersioningPass()) MODULE_PASS("RemoveJuliaAddrspaces", RemoveJuliaAddrspacesPass, RemoveJuliaAddrspacesPass()) @@ -24,4 +23,5 @@ FUNCTION_PASS("GCInvariantVerifier", GCInvariantVerifierPass, GCInvariantVerifie //Loop passes #ifdef LOOP_PASS LOOP_PASS("JuliaLICM", JuliaLICMPass, JuliaLICMPass()) +LOOP_PASS("LowerSIMDLoop", LowerSIMDLoopPass, LowerSIMDLoopPass()) #endif diff --git a/src/llvm-simdloop.cpp b/src/llvm-simdloop.cpp index 21e2ec574d650..f6a1e3a501a98 100644 --- a/src/llvm-simdloop.cpp +++ b/src/llvm-simdloop.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -73,7 +74,7 @@ static unsigned getReduceOpcode(Instruction *J, Instruction *operand) JL_NOTSAFE /// If Phi is part of a reduction cycle of FAdd, FSub, FMul or FDiv, /// mark the ops as permitting reassociation/commuting. /// As of LLVM 4.0, FDiv is not handled by the loop vectorizer -static void enableUnsafeAlgebraIfReduction(PHINode *Phi, Loop *L, OptimizationRemarkEmitter &ORE) JL_NOTSAFEPOINT +static void enableUnsafeAlgebraIfReduction(PHINode *Phi, Loop &L, OptimizationRemarkEmitter &ORE, ScalarEvolution *SE) JL_NOTSAFEPOINT { typedef SmallVector chainVector; chainVector chain; @@ -84,7 +85,7 @@ static void enableUnsafeAlgebraIfReduction(PHINode *Phi, Loop *L, OptimizationRe // Find the user of instruction I that is within loop L. for (User *UI : I->users()) { /*}*/ Instruction *U = cast(UI); - if (L->contains(U)) { + if (L.contains(U)) { if (J) { LLVM_DEBUG(dbgs() << "LSL: not a reduction var because op has two internal uses: " << *I << "\n"); REMARK([&]() { @@ -151,128 +152,94 @@ static void enableUnsafeAlgebraIfReduction(PHINode *Phi, Loop *L, OptimizationRe }); (*K)->setHasAllowReassoc(true); (*K)->setHasAllowContract(true); + if (SE) + SE->forgetValue(*K); ++length; } ReductionChainLength += length; MaxChainLength.updateMax(length); } -static bool markLoopInfo(Module &M, Function *marker, function_ref GetLI) JL_NOTSAFEPOINT +static bool processLoop(Loop &L, OptimizationRemarkEmitter &ORE, ScalarEvolution *SE) JL_NOTSAFEPOINT { - bool Changed = false; - std::vector ToDelete; - for (User *U : marker->users()) { - ++TotalMarkedLoops; - Instruction *I = cast(U); - ToDelete.push_back(I); - - BasicBlock *B = I->getParent(); - OptimizationRemarkEmitter ORE(B->getParent()); - LoopInfo &LI = GetLI(*B->getParent()); - Loop *L = LI.getLoopFor(B); - if (!L) { - I->removeFromParent(); - continue; - } - - LLVM_DEBUG(dbgs() << "LSL: loopinfo marker found\n"); - bool simd = false; - bool ivdep = false; - SmallVector MDs; - - BasicBlock *Lh = L->getHeader(); - LLVM_DEBUG(dbgs() << "LSL: loop header: " << *Lh << "\n"); - - // Reserve first location for self reference to the LoopID metadata node. - TempMDTuple TempNode = MDNode::getTemporary(Lh->getContext(), None); - MDs.push_back(TempNode.get()); - - // Walk `julia.loopinfo` metadata and filter out `julia.simdloop` and `julia.ivdep` - if (I->hasMetadataOtherThanDebugLoc()) { - MDNode *JLMD= I->getMetadata("julia.loopinfo"); - if (JLMD) { - LLVM_DEBUG(dbgs() << "LSL: has julia.loopinfo metadata with " << JLMD->getNumOperands() <<" operands\n"); - for (unsigned i = 0, ie = JLMD->getNumOperands(); i < ie; ++i) { - Metadata *Op = JLMD->getOperand(i); - const MDString *S = dyn_cast(Op); - if (S) { - LLVM_DEBUG(dbgs() << "LSL: found " << S->getString() << "\n"); - if (S->getString().startswith("julia")) { - if (S->getString().equals("julia.simdloop")) - simd = true; - if (S->getString().equals("julia.ivdep")) - ivdep = true; - continue; - } - } - MDs.push_back(Op); - } - } - } - - LLVM_DEBUG(dbgs() << "LSL: simd: " << simd << " ivdep: " << ivdep << "\n"); - - REMARK([=]() { - return OptimizationRemarkAnalysis(DEBUG_TYPE, "Loop SIMD Flags", I->getDebugLoc(), B) - << "Loop marked for SIMD vectorization with flags { \"simd\": " << (simd ? "true" : "false") << ", \"ivdep\": " << (ivdep ? "true" : "false") << " }"; - }); - - MDNode *n = L->getLoopID(); - if (n) { - // Loop already has a LoopID so copy over Metadata - // original loop id is operand 0 - for (unsigned i = 1, ie = n->getNumOperands(); i < ie; ++i) { - Metadata *Op = n->getOperand(i); - MDs.push_back(Op); + MDNode *LoopID = L.getLoopID(); + if (!LoopID) + return false; + bool simd = false; + bool ivdep = false; + + BasicBlock *Lh = L.getHeader(); + LLVM_DEBUG(dbgs() << "LSL: loop header: " << *Lh << "\n"); + + SmallVector MDs(1); + // First Operand is self-reference + // Drop `julia.` prefixes + for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { + Metadata *Op = LoopID->getOperand(i); + const MDString *S = dyn_cast(Op); + if (S) { + LLVM_DEBUG(dbgs() << "LSL: found " << S->getString() << "\n"); + if (S->getString().startswith("julia")) { + if (S->getString().equals("julia.simdloop")) + simd = true; + if (S->getString().equals("julia.ivdep")) + ivdep = true; + continue; } } - MDNode *LoopID = MDNode::getDistinct(Lh->getContext(), MDs); - // Replace the temporary node with a self-reference. - LoopID->replaceOperandWith(0, LoopID); - L->setLoopID(LoopID); - assert(L->getLoopID()); + MDs.push_back(Op); + } + LLVM_DEBUG(dbgs() << "LSL: simd: " << simd << " ivdep: " << ivdep << "\n"); + if (!simd && !ivdep) + return false; + + LLVMContext &Context = L.getHeader()->getContext(); + LoopID = MDNode::get(Context, MDs); + // Set operand 0 to refer to the loop id itself + LoopID->replaceOperandWith(0, LoopID); + L.setLoopID(LoopID); + + REMARK([&]() { + return OptimizationRemarkAnalysis(DEBUG_TYPE, "Loop SIMD Flags", L.getStartLoc(), L.getHeader()) + << "Loop marked for SIMD vectorization with flags { \"simd\": " << (simd ? "true" : "false") << ", \"ivdep\": " << (ivdep ? "true" : "false") << " }"; + }); + + // If ivdep is true we assume that there is no memory dependency between loop iterations + // This is a fairly strong assumption and does often not hold true for generic code. + if (ivdep) { + ++IVDepLoops; MDNode *m = MDNode::get(Lh->getContext(), ArrayRef(LoopID)); - - // If ivdep is true we assume that there is no memory dependency between loop iterations - // This is a fairly strong assumption and does often not hold true for generic code. - if (ivdep) { - ++IVDepLoops; - // Mark memory references so that Loop::isAnnotatedParallel will return true for this loop. - for (BasicBlock *BB : L->blocks()) { - for (Instruction &I : *BB) { - if (I.mayReadOrWriteMemory()) { - ++IVDepInstructions; - I.setMetadata(LLVMContext::MD_mem_parallel_loop_access, m); - } - } + // Mark memory references so that Loop::isAnnotatedParallel will return true for this loop. + for (BasicBlock *BB : L.blocks()) { + for (Instruction &I : *BB) { + if (I.mayReadOrWriteMemory()) { + ++IVDepInstructions; + I.setMetadata(LLVMContext::MD_mem_parallel_loop_access, m); + } } - assert(L->isAnnotatedParallel()); } + assert(L.isAnnotatedParallel()); + } - if (simd) { - ++SimdLoops; - // Mark floating-point reductions as okay to reassociate/commute. - for (BasicBlock::iterator I = Lh->begin(), E = Lh->end(); I != E; ++I) { - if (PHINode *Phi = dyn_cast(I)) - enableUnsafeAlgebraIfReduction(Phi, L, ORE); - else - break; - } + if (simd) { + ++SimdLoops; + // Mark floating-point reductions as okay to reassociate/commute. + for (BasicBlock::iterator I = Lh->begin(), E = Lh->end(); I != E; ++I) { + if (PHINode *Phi = dyn_cast(I)) + enableUnsafeAlgebraIfReduction(Phi, L, ORE, SE); + else + break; } - I->removeFromParent(); - - Changed = true; + if (SE) + SE->forgetLoopDispositions(&L); } - for (Instruction *I : ToDelete) - I->deleteValue(); - marker->eraseFromParent(); #ifdef JL_VERIFY_PASSES - assert(!verifyModule(M, &errs())); + assert(!verifyLLVMIR(L)); #endif - return Changed; + return true; } } // end anonymous namespace @@ -283,23 +250,19 @@ static bool markLoopInfo(Module &M, Function *marker, function_ref(M).getManager(); +PreservedAnalyses LowerSIMDLoopPass::run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, LPMUpdater &U) - auto GetLI = [&FAM](Function &F) -> LoopInfo & { - return FAM.getResult(F); - }; - - if (markLoopInfo(M, loopinfo_marker, GetLI)) { - auto preserved = PreservedAnalyses::allInSet(); - preserved.preserve(); +{ + OptimizationRemarkEmitter ORE(L.getHeader()->getParent()); + if (processLoop(L, ORE, &AR.SE)) { +#ifdef JL_DEBUG_BUILD + if (AR.MSSA) + AR.MSSA->verifyMemorySSA(); +#endif + auto preserved = getLoopPassPreservedAnalyses(); + preserved.preserveSet(); + preserved.preserve(); return preserved; } @@ -307,37 +270,23 @@ PreservedAnalyses LowerSIMDLoopPass::run(Module &M, ModuleAnalysisManager &AM) } namespace { -class LowerSIMDLoopLegacy : public ModulePass { - //LowerSIMDLoop Impl; +class LowerSIMDLoopLegacy : public LoopPass { public: - static char ID; + static char ID; - LowerSIMDLoopLegacy() : ModulePass(ID) { - } - - bool runOnModule(Module &M) override { - bool Changed = false; - - Function *loopinfo_marker = M.getFunction("julia.loopinfo_marker"); - - auto GetLI = [this](Function &F) JL_NOTSAFEPOINT -> LoopInfo & { - return getAnalysis(F).getLoopInfo(); - }; - - if (loopinfo_marker) - Changed |= markLoopInfo(M, loopinfo_marker, GetLI); + LowerSIMDLoopLegacy() : LoopPass(ID) { + } - return Changed; - } + bool runOnLoop(Loop *L, LPPassManager &LPM) override + { + OptimizationRemarkEmitter ORE(L->getHeader()->getParent()); + return processLoop(*L, ORE, nullptr); + } - void getAnalysisUsage(AnalysisUsage &AU) const override - { - ModulePass::getAnalysisUsage(AU); - AU.addRequired(); - AU.addPreserved(); - AU.setPreservesCFG(); - } + void getAnalysisUsage(AnalysisUsage &AU) const override { + getLoopAnalysisUsage(AU); + } }; } // end anonymous namespace diff --git a/src/passes.h b/src/passes.h index 2bb33d6eec60d..030e5f1bfa677 100644 --- a/src/passes.h +++ b/src/passes.h @@ -57,11 +57,6 @@ struct RemoveNIPass : PassInfoMixin { static bool isRequired() { return true; } }; -struct LowerSIMDLoopPass : PassInfoMixin { - PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM) JL_NOTSAFEPOINT; - static bool isRequired() { return true; } -}; - struct FinalLowerGCPass : PassInfoMixin { PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM) JL_NOTSAFEPOINT; static bool isRequired() { return true; } @@ -103,6 +98,11 @@ struct JuliaLICMPass : PassInfoMixin { LoopStandardAnalysisResults &AR, LPMUpdater &U) JL_NOTSAFEPOINT; }; +struct LowerSIMDLoopPass : PassInfoMixin { + PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, LPMUpdater &U) JL_NOTSAFEPOINT; +}; + #define MODULE_MARKER_PASS(NAME) \ struct NAME##MarkerPass : PassInfoMixin { \ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM) JL_NOTSAFEPOINT { return PreservedAnalyses::all(); } \ diff --git a/src/pipeline.cpp b/src/pipeline.cpp index 19cd085602594..88c2da5d729c9 100644 --- a/src/pipeline.cpp +++ b/src/pipeline.cpp @@ -402,6 +402,7 @@ static void buildLoopOptimizerPipeline(FunctionPassManager &FPM, PassBuilder *PB FPM.addPass(BeforeLoopOptimizationMarkerPass()); { LoopPassManager LPM; + LPM.addPass(LowerSIMDLoopPass()); if (O.getSpeedupLevel() >= 2) { LPM.addPass(LoopRotatePass()); } @@ -560,7 +561,6 @@ static void buildPipeline(ModulePassManager &MPM, PassBuilder *PB, OptimizationL buildEarlySimplificationPipeline(MPM, PB, O, options); MPM.addPass(AlwaysInlinerPass()); buildEarlyOptimizerPipeline(MPM, PB, O, options); - MPM.addPass(LowerSIMDLoopPass()); { FunctionPassManager FPM; buildLoopOptimizerPipeline(FPM, PB, O, options); diff --git a/test/llvmpasses/julia-simdloop-memoryssa.ll b/test/llvmpasses/julia-simdloop-memoryssa.ll new file mode 100644 index 0000000000000..0c1c4ac021996 --- /dev/null +++ b/test/llvmpasses/julia-simdloop-memoryssa.ll @@ -0,0 +1,55 @@ +; COM: NewPM-only test, tests that memoryssa is preserved correctly + +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(loop-mssa(LowerSIMDLoop),print)' -S -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=CHECK + +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(loop-mssa(LowerSIMDLoop),print)' -S -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=CHECK + +; CHECK-LABEL: MemorySSA for function: simd_test +; CHECK-LABEL: @simd_test( +define void @simd_test(double *%a, double *%b) { +; CHECK: top: +top: + br label %loop +; CHECK: loop: +loop: +; CHECK-NEXT: [[MPHI:[0-9]+]] = MemoryPhi({top,liveOnEntry},{loop,[[MSSA_USE:[0-9]+]]}) + %i = phi i64 [0, %top], [%nexti, %loop] + %aptr = getelementptr double, double *%a, i64 %i + %bptr = getelementptr double, double *%b, i64 %i +; CHECK: MemoryUse([[MPHI]]) MayAlias +; CHECK: llvm.mem.parallel_loop_access + %aval = load double, double *%aptr +; CHECK: MemoryUse([[MPHI]]) MayAlias + %bval = load double, double *%aptr + %cval = fadd double %aval, %bval +; CHECK: [[MSSA_USE]] = MemoryDef([[MPHI]]) + store double %cval, double *%bptr + %nexti = add i64 %i, 1 + %done = icmp sgt i64 %nexti, 500 + br i1 %done, label %loopdone, label %loop, !llvm.loop !1 +loopdone: + ret void +} + +; CHECK-LABEL: MemorySSA for function: simd_test_sub2 +; CHECK-LABEL: @simd_test_sub2( +define double @simd_test_sub2(double *%a) { +top: + br label %loop +loop: + %i = phi i64 [0, %top], [%nexti, %loop] + %v = phi double [0.000000e+00, %top], [%nextv, %loop] + %aptr = getelementptr double, double *%a, i64 %i +; CHECK: MemoryUse(liveOnEntry) MayAlias + %aval = load double, double *%aptr + %nextv = fsub double %v, %aval +; CHECK: fsub reassoc contract double %v, %aval + %nexti = add i64 %i, 1 + %done = icmp sgt i64 %nexti, 500 + br i1 %done, label %loopdone, label %loop, !llvm.loop !0 +loopdone: + ret double %nextv +} + +!0 = distinct !{!0, !"julia.simdloop"} +!1 = distinct !{!1, !"julia.simdloop", !"julia.ivdep"} \ No newline at end of file diff --git a/test/llvmpasses/simdloop.ll b/test/llvmpasses/julia-simdloop.ll similarity index 75% rename from test/llvmpasses/simdloop.ll rename to test/llvmpasses/julia-simdloop.ll index 929fbeea2c3f5..ad3e125893068 100644 --- a/test/llvmpasses/simdloop.ll +++ b/test/llvmpasses/julia-simdloop.ll @@ -1,12 +1,10 @@ ; This file is a part of Julia. License is MIT: https://julialang.org/license ; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -LowerSIMDLoop -S %s | FileCheck %s -; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='LowerSIMDLoop' -S %s | FileCheck %s +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='loop(LowerSIMDLoop)' -S %s | FileCheck %s ; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -LowerSIMDLoop -S %s | FileCheck %s -; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='LowerSIMDLoop' -S %s | FileCheck %s - -declare void @julia.loopinfo_marker() +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='loop(LowerSIMDLoop)' -S %s | FileCheck %s ; CHECK-LABEL: @simd_test( define void @simd_test(double *%a, double *%b) { @@ -22,9 +20,8 @@ loop: %cval = fadd double %aval, %bval store double %cval, double *%bptr %nexti = add i64 %i, 1 - call void @julia.loopinfo_marker(), !julia.loopinfo !3 %done = icmp sgt i64 %nexti, 500 - br i1 %done, label %loopdone, label %loop + br i1 %done, label %loopdone, label %loop, !llvm.loop !1 loopdone: ret void } @@ -42,9 +39,8 @@ loop: %nextv = fsub double %v, %aval ; CHECK: fsub reassoc contract double %v, %aval %nexti = add i64 %i, 1 - call void @julia.loopinfo_marker(), !julia.loopinfo !3 %done = icmp sgt i64 %nexti, 500 - br i1 %done, label %loopdone, label %loop + br i1 %done, label %loopdone, label %loop, !llvm.loop !1 loopdone: ret double %nextv } @@ -61,9 +57,8 @@ loop: %nextv = fsub double %v, %aval ; CHECK: fsub reassoc contract double %v, %aval %nexti = add i64 %i, 1 - call void @julia.loopinfo_marker(), !julia.loopinfo !2 %done = icmp sgt i64 %nexti, 500 - br i1 %done, label %loopdone, label %loop + br i1 %done, label %loopdone, label %loop, !llvm.loop !0 loopdone: ret double %nextv } @@ -82,20 +77,16 @@ for.body: ; preds = %for.body, %entry %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv store i32 %add, i32* %arrayidx2, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - call void @julia.loopinfo_marker(), !julia.loopinfo !4 %exitcond = icmp eq i64 %indvars.iv.next, 48 ; CHECK: br {{.*}} !llvm.loop [[LOOP:![0-9]+]] - br i1 %exitcond, label %for.end, label %for.body + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !2 for.end: ; preds = %for.body %1 = load i32, i32* %a, align 4 ret i32 %1 } -!1 = !{} -!2 = !{!"julia.simdloop"} -!3 = !{!"julia.simdloop", !"julia.ivdep"} -!4 = !{!"julia.simdloop", !"julia.ivdep", !5} -!5 = !{!"llvm.loop.vectorize.disable", i1 0} -; CHECK: [[LOOP]] = distinct !{[[LOOP]], [[LOOP_DISABLE:![0-9]+]]} -; CHECK-NEXT: [[LOOP_DISABLE]] = !{!"llvm.loop.vectorize.disable", i1 false} +!0 = distinct !{!0, !"julia.simdloop"} +!1 = distinct !{!1, !"julia.simdloop", !"julia.ivdep"} +!2 = distinct !{!2, !"julia.simdloop", !"julia.ivdep", !3} +!3 = !{!"llvm.loop.vectorize.disable", i1 0} diff --git a/test/llvmpasses/loopinfo.jl b/test/llvmpasses/loopinfo.jl index b9b388c73d0c5..559793b70b27f 100644 --- a/test/llvmpasses/loopinfo.jl +++ b/test/llvmpasses/loopinfo.jl @@ -3,7 +3,7 @@ # RUN: julia --startup-file=no %s %t && llvm-link -S %t/* -o %t/module.ll # RUN: cat %t/module.ll | FileCheck %s # RUN: cat %t/module.ll | opt -enable-new-pm=0 -load libjulia-codegen%shlibext -LowerSIMDLoop -S - | FileCheck %s -check-prefix=LOWER -# RUN: cat %t/module.ll | opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='LowerSIMDLoop' -S - | FileCheck %s -check-prefix=LOWER +# RUN: cat %t/module.ll | opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='loop(LowerSIMDLoop)' -S - | FileCheck %s -check-prefix=LOWER # RUN: julia --startup-file=no %s %t -O && llvm-link -S %t/* -o %t/module.ll # RUN: cat %t/module.ll | FileCheck %s -check-prefix=FINAL @@ -27,10 +27,9 @@ function simdf(X) acc = zero(eltype(X)) @simd for x in X acc += x -# CHECK: call void @julia.loopinfo_marker(), {{.*}}, !julia.loopinfo [[LOOPINFO:![0-9]+]] +# CHECK: br {{.*}}, !llvm.loop [[LOOPID:![0-9]+]] # LOWER-NOT: llvm.mem.parallel_loop_access # LOWER: fadd reassoc contract double -# LOWER-NOT: call void @julia.loopinfo_marker() # LOWER: br {{.*}}, !llvm.loop [[LOOPID:![0-9]+]] # FINAL: fadd reassoc contract <{{(vscale x )?}}{{[0-9]+}} x double> end @@ -43,9 +42,8 @@ function simdf2(X) acc = zero(eltype(X)) @simd ivdep for x in X acc += x -# CHECK: call void @julia.loopinfo_marker(), {{.*}}, !julia.loopinfo [[LOOPINFO2:![0-9]+]] +# CHECK: br {{.*}}, !llvm.loop [[LOOPID2:![0-9]+]] # LOWER: llvm.mem.parallel_loop_access -# LOWER-NOT: call void @julia.loopinfo_marker() # LOWER: fadd reassoc contract double # LOWER: br {{.*}}, !llvm.loop [[LOOPID2:![0-9]+]] end @@ -61,8 +59,7 @@ end for i in 1:N iteration(i) $(Expr(:loopinfo, (Symbol("llvm.loop.unroll.count"), 3))) -# CHECK: call void @julia.loopinfo_marker(), {{.*}}, !julia.loopinfo [[LOOPINFO3:![0-9]+]] -# LOWER-NOT: call void @julia.loopinfo_marker() +# CHECK: br {{.*}}, !llvm.loop [[LOOPID3:![0-9]+]] # LOWER: br {{.*}}, !llvm.loop [[LOOPID3:![0-9]+]] # FINAL: call {{(swiftcc )?}}void @j_iteration # FINAL: call {{(swiftcc )?}}void @j_iteration @@ -87,8 +84,7 @@ end iteration(i) end $(Expr(:loopinfo, (Symbol("llvm.loop.unroll.full"),))) -# CHECK: call void @julia.loopinfo_marker(), {{.*}}, !julia.loopinfo [[LOOPINFO4:![0-9]+]] -# LOWER-NOT: call void @julia.loopinfo_marker() +# CHECK: br {{.*}}, !llvm.loop [[LOOPID4:![0-9]+]] # LOWER: br {{.*}}, !llvm.loop [[LOOPID4:![0-9]+]] # FINAL: call {{(swiftcc )?}}void @j_iteration # FINAL: call {{(swiftcc )?}}void @j_iteration @@ -119,11 +115,11 @@ end end ## Check all the MD nodes -# CHECK: [[LOOPINFO]] = !{!"julia.simdloop"} -# CHECK: [[LOOPINFO2]] = !{!"julia.simdloop", !"julia.ivdep"} -# CHECK: [[LOOPINFO3]] = !{[[LOOPUNROLL:![0-9]+]]} +# CHECK: [[LOOPID]] = distinct !{[[LOOPID]], !"julia.simdloop"} +# CHECK: [[LOOPID2]] = distinct !{[[LOOPID2]], !"julia.simdloop", !"julia.ivdep"} +# CHECK: [[LOOPID3]] = distinct !{[[LOOPID3]], [[LOOPUNROLL:![0-9]+]]} # CHECK: [[LOOPUNROLL]] = !{!"llvm.loop.unroll.count", i64 3} -# CHECK: [[LOOPINFO4]] = !{[[LOOPUNROLL2:![0-9]+]]} +# CHECK: [[LOOPID4]] = distinct !{[[LOOPID4]], [[LOOPUNROLL2:![0-9]+]]} # CHECK: [[LOOPUNROLL2]] = !{!"llvm.loop.unroll.full"} # LOWER: [[LOOPID]] = distinct !{[[LOOPID]]} # LOWER: [[LOOPID2]] = distinct !{[[LOOPID2]]} diff --git a/test/llvmpasses/parsing.ll b/test/llvmpasses/parsing.ll index 434ffbb26c95f..dadf7db62db57 100644 --- a/test/llvmpasses/parsing.ll +++ b/test/llvmpasses/parsing.ll @@ -1,6 +1,6 @@ ; COM: NewPM-only test, tests for ability to parse Julia passes -; RUN: opt --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='module(CPUFeatures,RemoveNI,LowerSIMDLoop,FinalLowerGC,JuliaMultiVersioning,RemoveJuliaAddrspaces,LowerPTLSPass,function(DemoteFloat16,CombineMulAdd,LateLowerGCFrame,AllocOpt,PropagateJuliaAddrspaces,LowerExcHandlers,GCInvariantVerifier,loop(JuliaLICM),GCInvariantVerifier,GCInvariantVerifier),LowerPTLSPass,LowerPTLSPass,JuliaMultiVersioning,JuliaMultiVersioning)' -S %s -o /dev/null +; RUN: opt --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='module(CPUFeatures,RemoveNI,FinalLowerGC,JuliaMultiVersioning,RemoveJuliaAddrspaces,LowerPTLSPass,function(DemoteFloat16,CombineMulAdd,LateLowerGCFrame,AllocOpt,PropagateJuliaAddrspaces,LowerExcHandlers,GCInvariantVerifier,loop(LowerSIMDLoop,JuliaLICM),GCInvariantVerifier,GCInvariantVerifier),LowerPTLSPass,LowerPTLSPass,JuliaMultiVersioning,JuliaMultiVersioning)' -S %s -o /dev/null define void @test() { ret void From bb8deca94f7a546c9488d9340b4fd244b7eea482 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Thu, 26 Oct 2023 17:06:20 -0400 Subject: [PATCH 2/4] fix verify --- src/llvm-simdloop.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llvm-simdloop.cpp b/src/llvm-simdloop.cpp index f6a1e3a501a98..9c1e6799f7c9e 100644 --- a/src/llvm-simdloop.cpp +++ b/src/llvm-simdloop.cpp @@ -237,7 +237,7 @@ static bool processLoop(Loop &L, OptimizationRemarkEmitter &ORE, ScalarEvolution } #ifdef JL_VERIFY_PASSES - assert(!verifyLLVMIR(L)); + assert(!verifyFunction(*L->getHeader()->getParent(), &errs())); #endif return true; } From 4fa22e236bce2dbad3b2d698a23c9ddfc7c8e81b Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Fri, 27 Oct 2023 11:58:31 -0400 Subject: [PATCH 3/4] fixup! fix verify --- src/llvm-simdloop.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/llvm-simdloop.cpp b/src/llvm-simdloop.cpp index 9c1e6799f7c9e..da3f10b1fa6d2 100644 --- a/src/llvm-simdloop.cpp +++ b/src/llvm-simdloop.cpp @@ -194,6 +194,7 @@ static bool processLoop(Loop &L, OptimizationRemarkEmitter &ORE, ScalarEvolution if (!simd && !ivdep) return false; + ++TotalMarkedLoops; LLVMContext &Context = L.getHeader()->getContext(); LoopID = MDNode::get(Context, MDs); // Set operand 0 to refer to the loop id itself @@ -237,7 +238,7 @@ static bool processLoop(Loop &L, OptimizationRemarkEmitter &ORE, ScalarEvolution } #ifdef JL_VERIFY_PASSES - assert(!verifyFunction(*L->getHeader()->getParent(), &errs())); + assert(!verifyFunction(L.getHeader()->getParent(), &errs())); #endif return true; } From 170ad06b23e481d5988cc4515c3ed095a61b023a Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Fri, 27 Oct 2023 21:24:23 -0400 Subject: [PATCH 4/4] fixup! fixup! fix verify --- src/llvm-simdloop.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llvm-simdloop.cpp b/src/llvm-simdloop.cpp index da3f10b1fa6d2..ad17e71aff3f5 100644 --- a/src/llvm-simdloop.cpp +++ b/src/llvm-simdloop.cpp @@ -238,7 +238,7 @@ static bool processLoop(Loop &L, OptimizationRemarkEmitter &ORE, ScalarEvolution } #ifdef JL_VERIFY_PASSES - assert(!verifyFunction(L.getHeader()->getParent(), &errs())); + assert(!verifyFunction(*L.getHeader()->getParent(), &errs())); #endif return true; }