From 4e4359207d71bd35b08fbb0af38a50498d794871 Mon Sep 17 00:00:00 2001 From: Yichao Yu Date: Tue, 9 Jun 2015 00:15:46 -0400 Subject: [PATCH 1/6] Revert "Revert "Remove unnecessary entries in jl_gcinfo_t. Update invalid instructions in finalize_gc_frame", which seems to cause windows CI hanging." This reverts commit 7b1895a64355e0d7eaf6ce9e8422e9d09a2f4d7b. --- src/codegen.cpp | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/src/codegen.cpp b/src/codegen.cpp index 1eefa0d47f3d1..7273781b65e05 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -521,13 +521,12 @@ struct jl_gcinfo_t { int argDepth; int maxDepth; int argSpaceOffs; + Instruction *gcframe; /* This should always be the first instruction to + * create the GC frame */ #ifdef JL_GC_MARKSWEEP - Instruction *gcframe; - Instruction *argSpaceInits; StoreInst *storeFrameSize; #endif - BasicBlock::iterator first_gcframe_inst; - BasicBlock::iterator last_gcframe_inst; + Instruction *last_gcframe_inst; std::vector gc_frame_pops; }; @@ -3448,8 +3447,7 @@ static void allocate_gc_frame(size_t n_roots, BasicBlock *b0, jl_codectx_t *ctx) // in finalize_gc_frame. // (Add back first_gcframe_inst if this is not true anymore) gc->gcframe = (Instruction*)builder.CreateAlloca( - jl_pvalue_llvmt, ConstantInt::get(T_int32,n_roots + 2)); - gc->first_gcframe_inst = BasicBlock::iterator(gc->gcframe); + jl_pvalue_llvmt, ConstantInt::get(T_int32, n_roots + 2)); gc->argTemp = (Instruction*)builder.CreateConstGEP1_32(gc->gcframe, 2); gc->storeFrameSize = builder.CreateStore(ConstantInt::get(T_size, n_roots<<1), @@ -3457,13 +3455,12 @@ static void allocate_gc_frame(size_t n_roots, BasicBlock *b0, jl_codectx_t *ctx) builder.CreateStore(builder.CreateLoad(prepare_global(jlpgcstack_var), false), builder.CreateBitCast(builder.CreateConstGEP1_32(gc->gcframe, 1), PointerType::get(jl_ppvalue_llvmt,0))); Instruction *linst = builder.CreateStore(gc->gcframe, prepare_global(jlpgcstack_var), false); - gc->argSpaceInits = &b0->back(); #else // gc->gcframe is assumed to be the first instruction creating the gc frame // in finalize_gc_frame gc->argTemp = builder.CreateAlloca(jl_pvalue_llvmt, ConstantInt::get(T_int32, n_roots)); - gc->first_gcframe_inst = BasicBlock::iterator(gc->argTemp); + gc->gcframe = gc->argTemp; Instruction *linst = gc->argTemp; #endif // initialize local variable stack roots to null @@ -3471,19 +3468,20 @@ static void allocate_gc_frame(size_t n_roots, BasicBlock *b0, jl_codectx_t *ctx) Value *varSlot = emit_local_slot(i, ctx); linst = builder.CreateStore(V_null, varSlot); } - gc->last_gcframe_inst = BasicBlock::iterator(linst); + gc->last_gcframe_inst = linst; } static void clear_gc_frame(jl_gcinfo_t *gc) { // replace instruction uses with Undef first to avoid LLVM assertion failures - BasicBlock::iterator bbi = gc->first_gcframe_inst; + BasicBlock::iterator bbi(gc->gcframe); + BasicBlock::iterator last_gcframe_inst(gc->last_gcframe_inst); while (1) { Instruction &iii = *bbi; Type *ty = iii.getType(); if (ty != T_void) iii.replaceAllUsesWith(UndefValue::get(ty)); - if (bbi == gc->last_gcframe_inst) break; + if (bbi == last_gcframe_inst) break; bbi++; } for (size_t i=0; i < gc->gc_frame_pops.size(); i++) { @@ -3501,9 +3499,9 @@ static void clear_gc_frame(jl_gcinfo_t *gc) // Remove GC frame creation // (instructions from gc->gcframe to gc->last_gcframe_inst) BasicBlock::InstListType &il = gc->gcframe->getParent()->getInstList(); - il.erase(gc->first_gcframe_inst, gc->last_gcframe_inst); + il.erase(BasicBlock::iterator(gc->gcframe), last_gcframe_inst); // erase() erases up *to* the end point; erase last inst too - il.erase(gc->last_gcframe_inst); + il.erase(last_gcframe_inst); // Remove GC pops // (4 instructions from each element in the gc->gc_frame_pops) for (size_t i=0; i < gc->gc_frame_pops.size(); i++) { @@ -3534,6 +3532,7 @@ static void finalize_gc_frame(jl_codectx_t *ctx) gc->maxDepth + 2))); ReplaceInstWithInst(gc->gcframe->getParent()->getInstList(), bbi, newgcframe); + gc->gcframe = newgcframe; BasicBlock::iterator bbi2(gc->storeFrameSize); StoreInst *newFrameSize = @@ -3542,9 +3541,10 @@ static void finalize_gc_frame(jl_codectx_t *ctx) gc->storeFrameSize->getPointerOperand()); ReplaceInstWithInst(gc->storeFrameSize->getParent()->getInstList(), bbi2, newFrameSize); + gc->storeFrameSize = newFrameSize; - BasicBlock::InstListType &instList = gc->argSpaceInits->getParent()->getInstList(); - Instruction *after = gc->argSpaceInits; + Instruction *after = gc->last_gcframe_inst; + BasicBlock::InstListType &instList = after->getParent()->getInstList(); // Initialize the slots for temporary variables to NULL for (int i = 0;i < gc->maxDepth;i++) { @@ -3553,6 +3553,7 @@ static void finalize_gc_frame(jl_codectx_t *ctx) after = new StoreInst(V_null, argTempi); instList.insertAfter(argTempi, after); } + gc->last_gcframe_inst = after; } #else if (gc->maxDepth != 0) { From 9a8701d373987b126c6fd85be230af6d5734a578 Mon Sep 17 00:00:00 2001 From: Yichao Yu Date: Sat, 30 May 2015 21:48:38 -0400 Subject: [PATCH 2/6] Create local gc frame for throw --- src/codegen.cpp | 58 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 57 insertions(+), 1 deletion(-) diff --git a/src/codegen.cpp b/src/codegen.cpp index 7273781b65e05..5ddf989ea3f53 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -530,6 +530,26 @@ struct jl_gcinfo_t { std::vector gc_frame_pops; }; +namespace std { + +template<> +void +swap(jl_gcinfo_t &lhs, jl_gcinfo_t &rhs) +{ + swap(lhs.argTemp, rhs.argTemp); + swap(lhs.argDepth, rhs.argDepth); + swap(lhs.maxDepth, rhs.maxDepth); + swap(lhs.argSpaceOffs, rhs.argSpaceOffs); + swap(lhs.gcframe, rhs.gcframe); +#ifdef JL_GC_MARKSWEEP + swap(lhs.storeFrameSize, rhs.storeFrameSize); +#endif + swap(lhs.last_gcframe_inst, rhs.last_gcframe_inst); + swap(lhs.gc_frame_pops, rhs.gc_frame_pops); +} + +} + // information about the context of a piece of code: its enclosing // function and module, and visible local variables and labels. typedef struct { @@ -588,6 +608,40 @@ static void emit_gcpop(jl_codectx_t *ctx); static void allocate_gc_frame(size_t n_roots, BasicBlock *b0, jl_codectx_t *ctx); static void finalize_gc_frame(jl_codectx_t *ctx); +class LocalGCFrame { + jl_codectx_t *ctx; + jl_gcinfo_t gc; + bool frame_swapped; + + LocalGCFrame(const LocalGCFrame&); // = delete; +public: + LocalGCFrame(jl_codectx_t *_ctx, bool create=true) + : ctx(_ctx), + gc(), + frame_swapped(false) + { + if (create && ctx->gc.argSpaceOffs + ctx->gc.maxDepth == 0) { + std::swap(gc, ctx->gc); + allocate_gc_frame(0, builder.GetInsertBlock(), ctx); + frame_swapped = true; + } + } + void + restore() + { + if (frame_swapped) { + emit_gcpop(ctx); + finalize_gc_frame(ctx); + std::swap(gc, ctx->gc); + frame_swapped = false; + } + } + ~LocalGCFrame() + { + restore(); + } +}; + // NoopType static Type *NoopType; @@ -2139,9 +2193,11 @@ static Value *emit_known_call(jl_value_t *ff, jl_value_t **args, size_t nargs, } } else if (f->fptr == &jl_f_throw && nargs==1) { + LocalGCFrame local_gc_frame(ctx); Value *arg1 = boxed(emit_expr(args[1], ctx), ctx); - JL_GC_POP(); raise_exception_unless(ConstantInt::get(T_int1,0), arg1, ctx); + local_gc_frame.restore(); + JL_GC_POP(); return V_null; } else if (f->fptr == &jl_f_arraylen && nargs==1) { From 3ceac27d4acd68a2f490c7ab2f7b741ff470fce5 Mon Sep 17 00:00:00 2001 From: Yichao Yu Date: Wed, 3 Jun 2015 23:15:40 -0400 Subject: [PATCH 3/6] Working GC frame merge --- src/codegen.cpp | 54 +++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 50 insertions(+), 4 deletions(-) diff --git a/src/codegen.cpp b/src/codegen.cpp index 5ddf989ea3f53..bfff64c80e529 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -91,6 +91,7 @@ #include #include #include +#include using namespace llvm; #if LLVM37 using namespace llvm::legacy; @@ -528,6 +529,8 @@ struct jl_gcinfo_t { #endif Instruction *last_gcframe_inst; std::vector gc_frame_pops; + std::vector > gc_frame_geps; + std::vector > child_frames; }; namespace std { @@ -546,6 +549,8 @@ swap(jl_gcinfo_t &lhs, jl_gcinfo_t &rhs) #endif swap(lhs.last_gcframe_inst, rhs.last_gcframe_inst); swap(lhs.gc_frame_pops, rhs.gc_frame_pops); + swap(lhs.gc_frame_geps, rhs.gc_frame_geps); + swap(lhs.child_frames, rhs.child_frames); } } @@ -633,6 +638,15 @@ class LocalGCFrame { emit_gcpop(ctx); finalize_gc_frame(ctx); std::swap(gc, ctx->gc); + if (gc.argSpaceOffs + gc.maxDepth != 0) { + ctx->gc.child_frames.push_back(std::make_pair(jl_gcinfo_t(), 0)); + std::swap(ctx->gc.child_frames.back().first, gc); + } else if (gc.child_frames.size()) { + ctx->gc.child_frames.reserve(gc.child_frames.size()); + ctx->gc.child_frames.insert(ctx->gc.child_frames.end(), + gc.child_frames.begin(), + gc.child_frames.end()); + } frame_swapped = false; } } @@ -1586,9 +1600,15 @@ static void simple_escape_analysis(jl_value_t *expr, bool esc, jl_codectx_t *ctx // Emit GEP for the @slot-th slot in the GC frame static Value* -emit_local_slot(int slot, jl_codectx_t *ctx) +emit_local_slot(int slot, jl_codectx_t *ctx, bool record=true) { - return builder.CreateConstGEP1_32(ctx->gc.argTemp, slot); + jl_gcinfo_t *gc = &ctx->gc; + Instruction *ins = + (Instruction*)builder.CreateConstGEP1_32(gc->argTemp, slot); + if (record) { + gc->gc_frame_geps.push_back(std::make_pair(ins, slot)); + } + return ins; } // Emit GEP for the @slot-th temporary variable in the GC frame. @@ -1602,7 +1622,7 @@ emit_temp_slot(int slot, jl_codectx_t *ctx) // Create a GEP instruction for the @slot-th slot in the GC frame. // (without inserting it) static Instruction* -create_local_slot(int slot, jl_codectx_t *ctx) +create_local_slot(int slot, jl_codectx_t *ctx, bool record=true) { jl_gcinfo_t *gc = &ctx->gc; #ifdef LLVM37 @@ -1613,6 +1633,9 @@ create_local_slot(int slot, jl_codectx_t *ctx) Instruction *ins = GetElementPtrInst::Create(gc->argTemp, ConstantInt::get(T_int32, slot)); #endif + if (record) { + gc->gc_frame_geps.push_back(std::make_pair(ins, slot)); + } return ins; } @@ -3520,7 +3543,7 @@ static void allocate_gc_frame(size_t n_roots, BasicBlock *b0, jl_codectx_t *ctx) Instruction *linst = gc->argTemp; #endif // initialize local variable stack roots to null - for(size_t i=0; i < (size_t)gc->argSpaceOffs; i++) { + for (int i = 0; i < gc->argSpaceOffs;i++) { Value *varSlot = emit_local_slot(i, ctx); linst = builder.CreateStore(V_null, varSlot); } @@ -3570,6 +3593,22 @@ static void clear_gc_frame(jl_gcinfo_t *gc) } } +static void merge_gc_frame(jl_codectx_t *ctx, jl_gcinfo_t *from, int offset) +{ + for (size_t i = 0;i < from->gc_frame_geps.size();i++) { + Instruction *gep = from->gc_frame_geps[i].first; + BasicBlock::iterator bbi(gep); + int slot = from->gc_frame_geps[i].second; + Instruction *new_gep = create_local_slot(slot + offset, ctx); + ReplaceInstWithInst(gep->getParent()->getInstList(), bbi, new_gep); + } + from->gc_frame_geps.clear(); + if (ctx->gc.maxDepth < offset + from->maxDepth) { + ctx->gc.maxDepth = offset + from->maxDepth; + } + clear_gc_frame(from); +} + static void finalize_gc_frame(jl_codectx_t *ctx) { jl_gcinfo_t *gc = &ctx->gc; @@ -3579,6 +3618,13 @@ static void finalize_gc_frame(jl_codectx_t *ctx) clear_gc_frame(gc); } else { + // Merge child frames if necessary + for (size_t i = 0;i < gc->child_frames.size();i++) { + jl_gcinfo_t &cld = gc->child_frames[i].first; + int offset = gc->child_frames[i].second; + merge_gc_frame(ctx, &cld, offset); + } + gc->child_frames.clear(); // n_frames++; // Fix the size of the GC frame created BasicBlock::iterator bbi(gc->gcframe); From 7be57a43c38a8f9b86a5a668341eb5f4eb196c50 Mon Sep 17 00:00:00 2001 From: Yichao Yu Date: Sun, 31 May 2015 15:37:05 -0400 Subject: [PATCH 4/6] Clean up, restore gcframe depth if the parent is not empty --- src/codegen.cpp | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/src/codegen.cpp b/src/codegen.cpp index bfff64c80e529..e9561e16147f9 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -530,7 +530,7 @@ struct jl_gcinfo_t { Instruction *last_gcframe_inst; std::vector gc_frame_pops; std::vector > gc_frame_geps; - std::vector > child_frames; + std::vector child_frames; }; namespace std { @@ -616,19 +616,25 @@ static void finalize_gc_frame(jl_codectx_t *ctx); class LocalGCFrame { jl_codectx_t *ctx; jl_gcinfo_t gc; + int depth; bool frame_swapped; LocalGCFrame(const LocalGCFrame&); // = delete; public: - LocalGCFrame(jl_codectx_t *_ctx, bool create=true) + LocalGCFrame(jl_codectx_t *_ctx, bool enabled=true) : ctx(_ctx), gc(), + depth(-1), frame_swapped(false) { - if (create && ctx->gc.argSpaceOffs + ctx->gc.maxDepth == 0) { + if (!enabled) + return; + if (ctx->gc.argSpaceOffs + ctx->gc.maxDepth == 0) { std::swap(gc, ctx->gc); allocate_gc_frame(0, builder.GetInsertBlock(), ctx); frame_swapped = true; + } else { + depth = ctx->gc.argDepth; } } void @@ -639,8 +645,8 @@ class LocalGCFrame { finalize_gc_frame(ctx); std::swap(gc, ctx->gc); if (gc.argSpaceOffs + gc.maxDepth != 0) { - ctx->gc.child_frames.push_back(std::make_pair(jl_gcinfo_t(), 0)); - std::swap(ctx->gc.child_frames.back().first, gc); + ctx->gc.child_frames.push_back(jl_gcinfo_t()); + std::swap(ctx->gc.child_frames.back(), gc); } else if (gc.child_frames.size()) { ctx->gc.child_frames.reserve(gc.child_frames.size()); ctx->gc.child_frames.insert(ctx->gc.child_frames.end(), @@ -648,6 +654,9 @@ class LocalGCFrame { gc.child_frames.end()); } frame_swapped = false; + } else if (depth >= 0) { + ctx->gc.argDepth = depth; + depth = -1; } } ~LocalGCFrame() @@ -3620,9 +3629,8 @@ static void finalize_gc_frame(jl_codectx_t *ctx) else { // Merge child frames if necessary for (size_t i = 0;i < gc->child_frames.size();i++) { - jl_gcinfo_t &cld = gc->child_frames[i].first; - int offset = gc->child_frames[i].second; - merge_gc_frame(ctx, &cld, offset); + jl_gcinfo_t &cld = gc->child_frames[i]; + merge_gc_frame(ctx, &cld, 0); } gc->child_frames.clear(); // n_frames++; From aeb49024d5052b68a8d423aaa6b325767b60f965 Mon Sep 17 00:00:00 2001 From: Yichao Yu Date: Thu, 4 Jun 2015 12:25:06 -0400 Subject: [PATCH 5/6] Do not inline throw at julia level --- base/inference.jl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/base/inference.jl b/base/inference.jl index ceace446355a2..5c98a223c7a71 100644 --- a/base/inference.jl +++ b/base/inference.jl @@ -2798,6 +2798,10 @@ function inlining_pass(e::Expr, sv, ast) elseif is_known_call(e, Core.Intrinsics.llvmcall, sv) i0 = 5 isccall = false + elseif is_known_call(e, Core.throw, sv) + # Do not inline throw (or it's arguments). They should not be + # in the hot path and they can cause code inflation + return (e, ()) else i0 = 1 isccall = false From 566aff94cdf6e2e1d44125b64d10bff30e37a432 Mon Sep 17 00:00:00 2001 From: Yichao Yu Date: Thu, 4 Jun 2015 21:34:21 -0400 Subject: [PATCH 6/6] Make the code after throwing an error unreachable and do not even emit GC pop after throw --- src/codegen.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/codegen.cpp b/src/codegen.cpp index e9561e16147f9..cdd844505b897 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -638,10 +638,11 @@ class LocalGCFrame { } } void - restore() + restore(bool unreachable=false) { if (frame_swapped) { - emit_gcpop(ctx); + if (!unreachable) + emit_gcpop(ctx); finalize_gc_frame(ctx); std::swap(gc, ctx->gc); if (gc.argSpaceOffs + gc.maxDepth != 0) { @@ -2228,7 +2229,9 @@ static Value *emit_known_call(jl_value_t *ff, jl_value_t **args, size_t nargs, LocalGCFrame local_gc_frame(ctx); Value *arg1 = boxed(emit_expr(args[1], ctx), ctx); raise_exception_unless(ConstantInt::get(T_int1,0), arg1, ctx); - local_gc_frame.restore(); + // Update GC frame creation. The GC pop instructions should be removed + // by LLVM anyway but not emitting those instructions save us some work + local_gc_frame.restore(true); // unreachable=true JL_GC_POP(); return V_null; }