-
Notifications
You must be signed in to change notification settings - Fork 43
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Error with Julia 1.10 #274
Comments
Julia 1.10: source_filename = "start"
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-n8:16:32"
target triple = "air64-apple-macosx14.2.1"
; Function Attrs: cold noreturn nounwind
declare void @llvm.trap() #0
define internal void @gpu_report_exception() unnamed_addr {
top:
ret void
}
define internal void @gpu_signal_exception() unnamed_addr {
top:
ret void
}
define void @_Z4vadd14MtlDeviceArrayI5Int64Li1ELi1EES_IS0_Li1ELi1EES_IS0_Li1ELi1EE({ i8 addrspace(1)*, [1 x i64] } addrspace(1)* %0, { i8 addrspace(1)*, [1 x i64] } addrspace(1)* %1, { i8 addrspace(1)*, [1 x i64] } addrspace(1)* %2, i32 %thread_position_in_grid) local_unnamed_addr {
conversion:
%3 = getelementptr inbounds { i8 addrspace(1)*, [1 x i64] }, { i8 addrspace(1)*, [1 x i64] } addrspace(1)* %0, i64 0, i32 1, i64 0
%.unpack10.unpack = load i64, i64 addrspace(1)* %3, align 8
%4 = bitcast { i8 addrspace(1)*, [1 x i64] } addrspace(1)* %1 to i64 addrspace(1)* addrspace(1)*
%.unpack16 = load i64 addrspace(1)*, i64 addrspace(1)* addrspace(1)* %4, align 8
%5 = bitcast { i8 addrspace(1)*, [1 x i64] } addrspace(1)* %2 to i64 addrspace(1)* addrspace(1)*
%.unpack20 = load i64 addrspace(1)*, i64 addrspace(1)* addrspace(1)* %5, align 8
%6 = getelementptr inbounds { i8 addrspace(1)*, [1 x i64] }, { i8 addrspace(1)*, [1 x i64] } addrspace(1)* %2, i64 0, i32 1, i64 0
%.unpack18.unpack = load i64, i64 addrspace(1)* %6, align 8
%7 = add i32 %thread_position_in_grid, 1
%8 = call i64 @air.max.s64(i64 %.unpack10.unpack, i64 0)
%9 = icmp eq i32 %7, 0
%10 = zext i32 %7 to i64
%11 = icmp ult i64 %8, %10
%12 = or i1 %9, %11
br i1 %12, label %L20, label %L23
L20: ; preds = %conversion
call void @gpu_report_exception()
call void @gpu_signal_exception()
call void @llvm.trap()
unreachable
L23: ; preds = %conversion
%13 = getelementptr inbounds { i8 addrspace(1)*, [1 x i64] }, { i8 addrspace(1)*, [1 x i64] } addrspace(1)* %1, i64 0, i32 1, i64 0
%.unpack14.unpack = load i64, i64 addrspace(1)* %13, align 8
%14 = bitcast { i8 addrspace(1)*, [1 x i64] } addrspace(1)* %0 to i64 addrspace(1)* addrspace(1)*
%.unpack12 = load i64 addrspace(1)*, i64 addrspace(1)* addrspace(1)* %14, align 8
%15 = sext i32 %thread_position_in_grid to i64
%16 = getelementptr inbounds i64, i64 addrspace(1)* %.unpack12, i64 %15
%17 = load i64, i64 addrspace(1)* %16, align 8, !tbaa !20
%18 = call i64 @air.max.s64(i64 %.unpack14.unpack, i64 0)
%.not = icmp ult i64 %18, %10
br i1 %.not, label %L46, label %L49
L46: ; preds = %L23
call void @gpu_report_exception()
call void @gpu_signal_exception()
call void @llvm.trap()
unreachable
L49: ; preds = %L23
%19 = call i64 @air.max.s64(i64 %.unpack18.unpack, i64 0)
%20 = icmp ult i64 %19, %10
br i1 %20, label %L73, label %L76
L73: ; preds = %L49
call void @gpu_report_exception()
call void @gpu_signal_exception()
call void @llvm.trap()
unreachable
L76: ; preds = %L49
%21 = getelementptr inbounds i64, i64 addrspace(1)* %.unpack16, i64 %15
%22 = load i64, i64 addrspace(1)* %21, align 8, !tbaa !20
%23 = add i64 %22, %17
%24 = getelementptr inbounds i64, i64 addrspace(1)* %.unpack20, i64 %15
store i64 %23, i64 addrspace(1)* %24, align 8, !tbaa !20
ret void
}
declare i64 @air.max.s64(i64, i64) local_unnamed_addr
attributes #0 = { cold noreturn nounwind }
!llvm.module.flags = !{!0, !1, !2, !3, !4, !5, !6, !7, !8}
!julia.kernel = !{!9}
!air.kernel = !{!10}
!llvm.ident = !{!17}
!air.version = !{!18}
!air.language_version = !{!19}
!0 = !{i32 2, !"Dwarf Version", i32 4}
!1 = !{i32 2, !"Debug Info Version", i32 3}
!2 = !{i32 7, !"air.max_device_buffers", i32 31}
!3 = !{i32 7, !"air.max_constant_buffers", i32 31}
!4 = !{i32 7, !"air.max_threadgroup_buffers", i32 31}
!5 = !{i32 7, !"air.max_textures", i32 128}
!6 = !{i32 7, !"air.max_read_write_textures", i32 8}
!7 = !{i32 7, !"air.max_samplers", i32 16}
!8 = !{i32 2, !"SDK Version", [3 x i32] [i32 14, i32 2, i32 1]}
!9 = !{void ({ i8 addrspace(1)*, [1 x i64] } addrspace(1)*, { i8 addrspace(1)*, [1 x i64] } addrspace(1)*, { i8 addrspace(1)*, [1 x i64] } addrspace(1)*, i32)* @_Z4vadd14MtlDeviceArrayI5Int64Li1ELi1EES_IS0_Li1ELi1EES_IS0_Li1ELi1EE}
!10 = !{void ({ i8 addrspace(1)*, [1 x i64] } addrspace(1)*, { i8 addrspace(1)*, [1 x i64] } addrspace(1)*, { i8 addrspace(1)*, [1 x i64] } addrspace(1)*, i32)* @_Z4vadd14MtlDeviceArrayI5Int64Li1ELi1EES_IS0_Li1ELi1EES_IS0_Li1ELi1EE, !11, !12}
!11 = !{}
!12 = !{!13, !14, !15, !16}
!13 = !{i32 0, !"air.buffer", !"air.location_index", i32 0, i32 1, !"air.read_write", !"air.address_space", i32 1, !"air.arg_type_size", i32 16, !"air.arg_type_align_size", i32 8, !"air.arg_type_name", !"MtlDeviceVector{Int64, 1}", !"air.arg_name", !"a"}
!14 = !{i32 1, !"air.buffer", !"air.location_index", i32 1, i32 1, !"air.read_write", !"air.address_space", i32 1, !"air.arg_type_size", i32 16, !"air.arg_type_align_size", i32 8, !"air.arg_type_name", !"MtlDeviceVector{Int64, 1}", !"air.arg_name", !"b"}
!15 = !{i32 2, !"air.buffer", !"air.location_index", i32 2, i32 1, !"air.read_write", !"air.address_space", i32 1, !"air.arg_type_size", i32 16, !"air.arg_type_align_size", i32 8, !"air.arg_type_name", !"MtlDeviceVector{Int64, 1}", !"air.arg_name", !"c"}
!16 = !{i32 3, !"air.thread_position_in_grid", !"air.arg_type_name", !"uint"}
!17 = !{!"Julia 1.10.0 with Metal.jl"}
!18 = !{i32 3, i32 0, i32 0}
!19 = !{!"Metal", i32 3, i32 0, i32 0}
!20 = !{!21, !21, i64 0, i64 0}
!21 = !{!"custom_tbaa_addrspace(1)", !22, i64 0}
!22 = !{!"custom_tbaa"} Julia 1.9: source_filename = "start"
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-n8:16:32"
target triple = "air64-apple-macosx14.2.1"
; Function Attrs: cold noreturn nounwind
declare void @llvm.trap() #0
define internal void @gpu_report_exception() unnamed_addr {
top:
ret void
}
define internal void @gpu_signal_exception() unnamed_addr {
top:
ret void
}
define void @_Z4vadd14MtlDeviceArrayI5Int64Li1ELi1EES_IS0_Li1ELi1EES_IS0_Li1ELi1EE({ i8 addrspace(1)*, [1 x i64] } addrspace(1)* %0, { i8 addrspace(1)*, [1 x i64] } addrspace(1)* %1, { i8 addrspace(1)*, [1 x i64] } addrspace(1)* %2, i32 %thread_position_in_grid) local_unnamed_addr {
conversion:
%3 = getelementptr inbounds { i8 addrspace(1)*, [1 x i64] }, { i8 addrspace(1)*, [1 x i64] } addrspace(1)* %0, i64 0, i32 1, i64 0
%.unpack13.unpack = load i64, i64 addrspace(1)* %3, align 8
%4 = bitcast { i8 addrspace(1)*, [1 x i64] } addrspace(1)* %1 to i64 addrspace(1)* addrspace(1)*
%.unpack19 = load i64 addrspace(1)*, i64 addrspace(1)* addrspace(1)* %4, align 8
%5 = getelementptr inbounds { i8 addrspace(1)*, [1 x i64] }, { i8 addrspace(1)*, [1 x i64] } addrspace(1)* %1, i64 0, i32 1, i64 0
%.unpack17.unpack = load i64, i64 addrspace(1)* %5, align 8
%6 = bitcast { i8 addrspace(1)*, [1 x i64] } addrspace(1)* %2 to i64 addrspace(1)* addrspace(1)*
%.unpack23 = load i64 addrspace(1)*, i64 addrspace(1)* addrspace(1)* %6, align 8
%7 = getelementptr inbounds { i8 addrspace(1)*, [1 x i64] }, { i8 addrspace(1)*, [1 x i64] } addrspace(1)* %2, i64 0, i32 1, i64 0
%.unpack21.unpack = load i64, i64 addrspace(1)* %7, align 8
%8 = add i32 %thread_position_in_grid, 1
%9 = icmp sgt i64 %.unpack13.unpack, 0
%10 = select i1 %9, i64 %.unpack13.unpack, i64 0
%11 = icmp eq i32 %8, 0
%12 = zext i32 %8 to i64
%13 = icmp ult i64 %10, %12
%14 = or i1 %11, %13
br i1 %14, label %L20, label %L23
L20: ; preds = %conversion
call void @gpu_report_exception()
call void @gpu_signal_exception()
call void @llvm.trap()
unreachable
L23: ; preds = %conversion
%15 = bitcast { i8 addrspace(1)*, [1 x i64] } addrspace(1)* %0 to i64 addrspace(1)* addrspace(1)*
%.unpack15 = load i64 addrspace(1)*, i64 addrspace(1)* addrspace(1)* %15, align 8
%16 = sext i32 %thread_position_in_grid to i64
%17 = getelementptr inbounds i64, i64 addrspace(1)* %.unpack15, i64 %16
%18 = load i64, i64 addrspace(1)* %17, align 8, !tbaa !20
%19 = icmp sgt i64 %.unpack17.unpack, 0
%20 = select i1 %19, i64 %.unpack17.unpack, i64 0
%.not = icmp ult i64 %20, %12
br i1 %.not, label %L46, label %L49
L46: ; preds = %L23
call void @gpu_report_exception()
call void @gpu_signal_exception()
call void @llvm.trap()
unreachable
L49: ; preds = %L23
%21 = icmp sgt i64 %.unpack21.unpack, 0
%22 = select i1 %21, i64 %.unpack21.unpack, i64 0
%23 = icmp ult i64 %22, %12
br i1 %23, label %L73, label %L76
L73: ; preds = %L49
call void @gpu_report_exception()
call void @gpu_signal_exception()
call void @llvm.trap()
unreachable
L76: ; preds = %L49
%24 = getelementptr inbounds i64, i64 addrspace(1)* %.unpack19, i64 %16
%25 = load i64, i64 addrspace(1)* %24, align 8, !tbaa !20
%26 = add i64 %25, %18
%27 = getelementptr inbounds i64, i64 addrspace(1)* %.unpack23, i64 %16
store i64 %26, i64 addrspace(1)* %27, align 8, !tbaa !20
ret void
}
attributes #0 = { cold noreturn nounwind }
!llvm.module.flags = !{!0, !1, !2, !3, !4, !5, !6, !7, !8}
!julia.kernel = !{!9}
!air.kernel = !{!10}
!llvm.ident = !{!17}
!air.version = !{!18}
!air.language_version = !{!19}
!0 = !{i32 2, !"Dwarf Version", i32 4}
!1 = !{i32 2, !"Debug Info Version", i32 3}
!2 = !{i32 7, !"air.max_device_buffers", i32 31}
!3 = !{i32 7, !"air.max_constant_buffers", i32 31}
!4 = !{i32 7, !"air.max_threadgroup_buffers", i32 31}
!5 = !{i32 7, !"air.max_textures", i32 128}
!6 = !{i32 7, !"air.max_read_write_textures", i32 8}
!7 = !{i32 7, !"air.max_samplers", i32 16}
!8 = !{i32 2, !"SDK Version", [3 x i32] [i32 14, i32 2, i32 1]}
!9 = !{void ({ i8 addrspace(1)*, [1 x i64] } addrspace(1)*, { i8 addrspace(1)*, [1 x i64] } addrspace(1)*, { i8 addrspace(1)*, [1 x i64] } addrspace(1)*, i32)* @_Z4vadd14MtlDeviceArrayI5Int64Li1ELi1EES_IS0_Li1ELi1EES_IS0_Li1ELi1EE}
!10 = !{void ({ i8 addrspace(1)*, [1 x i64] } addrspace(1)*, { i8 addrspace(1)*, [1 x i64] } addrspace(1)*, { i8 addrspace(1)*, [1 x i64] } addrspace(1)*, i32)* @_Z4vadd14MtlDeviceArrayI5Int64Li1ELi1EES_IS0_Li1ELi1EES_IS0_Li1ELi1EE, !11, !12}
!11 = !{}
!12 = !{!13, !14, !15, !16}
!13 = !{i32 0, !"air.buffer", !"air.location_index", i32 0, i32 1, !"air.read_write", !"air.address_space", i32 1, !"air.arg_type_size", i32 16, !"air.arg_type_align_size", i32 8, !"air.arg_type_name", !"MtlDeviceVector{Int64, 1}", !"air.arg_name", !"a"}
!14 = !{i32 1, !"air.buffer", !"air.location_index", i32 1, i32 1, !"air.read_write", !"air.address_space", i32 1, !"air.arg_type_size", i32 16, !"air.arg_type_align_size", i32 8, !"air.arg_type_name", !"MtlDeviceVector{Int64, 1}", !"air.arg_name", !"b"}
!15 = !{i32 2, !"air.buffer", !"air.location_index", i32 2, i32 1, !"air.read_write", !"air.address_space", i32 1, !"air.arg_type_size", i32 16, !"air.arg_type_align_size", i32 8, !"air.arg_type_name", !"MtlDeviceVector{Int64, 1}", !"air.arg_name", !"c"}
!16 = !{i32 3, !"air.thread_position_in_grid", !"air.arg_type_name", !"uint"}
!17 = !{!"Julia 1.9.4 with Metal.jl"}
!18 = !{i32 3, i32 0, i32 0}
!19 = !{!"Metal", i32 3, i32 0, i32 0}
!20 = !{!21, !21, i64 0, i64 0}
!21 = !{!"custom_tbaa_addrspace(1)", !22, i64 0}
!22 = !{!"custom_tbaa"} The main difference is a call to => Bug seems to be inside of the intrinsic lowering logic in GPUCompiler.jl Most interestingly the |
I am following the example in the README. I am using an Intel MacBook Pro (unsupported, I know). The example works with Julia 1.9 but breaks with Julia 1.10. The backtrace is
jl_uPobCnqvOh.metallib.gz
I'd like to help out. Do you have a pointer to where I could start looking? Could it e.g. be an incompatibility between LLVM versions?
The text was updated successfully, but these errors were encountered: