[llvm] [Attributor] Fix an issue that could potentially cause `AccessList` and `OffsetBins` out of sync (PR #106187)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 28 09:07:40 PDT 2024
arsenm wrote:
Your reducer script runs all of clang, you should start reducing from the IR extracted before the failing pass and just run the one pass. When I do that, I get this:
```
; RUN: opt -passes=amdgpu-attributor %s
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
target triple = "amdgcn-amd-amdhsa"
%struct.ShaderClosure.1472.2484.3440.3732.5208.6220.7176.7468 = type { <3 x float>, i32, float, <3 x float>, [10 x float], [8 x i8] }
%struct.ShaderData.1475.2487.3443.3735.5211.6223.7179.7471 = type { <3 x float>, <3 x float>, <3 x float>, <3 x float>, i32, i32, i32, i32, i32, float, float, i32, i32, float, float, %struct.differential3.1473.2485.3441.3733.5209.6221.7177.7469, %struct.differential3.1473.2485.3441.3733.5209.6221.7177.7469, %struct.differential.1474.2486.3442.3734.5210.6222.7178.7470, %struct.differential.1474.2486.3442.3734.5210.6222.7178.7470, <3 x float>, <3 x float>, <3 x float>, %struct.differential3.1473.2485.3441.3733.5209.6221.7177.7469, i32, i32, i32, float, <3 x float>, <3 x float>, <3 x float>, [64 x %struct.ShaderClosure.1472.2484.3440.3732.5208.6220.7176.7468] }
%struct.differential.1474.2486.3442.3734.5210.6222.7178.7470 = type { float, float }
%struct.differential3.1473.2485.3441.3733.5209.6221.7177.7469 = type { <3 x float>, <3 x float> }
define internal fastcc void @svm_eval_nodes(ptr %kg) {
entry:
%closure.i25.i = getelementptr i8, ptr %kg, i64 336
%num_closure.i26.i = getelementptr i8, ptr %kg, i64 276
br label %while.cond
while.cond: ; preds = %sw.bb92, %cond.true.i.i.i34, %while.cond, %entry
%0 = load i32, ptr %kg, align 4
%idxprom.i = zext i32 %0 to i64
br label %while.cond
cond.true.i.i.i34: ; No predecessors!
%arrayidx.i27.i = getelementptr float, ptr %kg, i64 %idxprom.i
store float 0.000000e+00, ptr %arrayidx.i27.i, align 4
br label %while.cond
sw.bb92: ; No predecessors!
%1 = load i32, ptr %kg, align 8
%2 = insertelement <3 x i32> zeroinitializer, i32 %1, i64 0
%splat.splatinsert.i = bitcast <3 x i32> %2 to <3 x float>
%3 = shufflevector <3 x float> %splat.splatinsert.i, <3 x float> zeroinitializer, <4 x i32> zeroinitializer
%4 = load i32, ptr %num_closure.i26.i, align 4
%idxprom.i27.i = sext i32 %4 to i64
%arrayidx.i28.i = getelementptr [64 x %struct.ShaderClosure.1472.2484.3440.3732.5208.6220.7176.7468], ptr %closure.i25.i, i64 0, i64 %idxprom.i27.i
store <4 x float> %3, ptr %arrayidx.i28.i, align 16
%inc.i30.i = or i32 %4, 1
store i32 %inc.i30.i, ptr %num_closure.i26.i, align 4
br label %while.cond
}
; Function Attrs: norecurse
define amdgpu_kernel void @kernel_ocl_displace() #0 {
entry:
%sd.i11111111111 = alloca [0 x [0 x [0 x [0 x [0 x [0 x [0 x [0 x [0 x [0 x %struct.ShaderData.1475.2487.3443.3735.5211.6223.7179.7471]]]]]]]]]], i32 0, align 16, addrspace(5)
%kglobals.ascast1 = addrspacecast ptr addrspace(5) %sd.i11111111111 to ptr
%num_closure.i.i = getelementptr i8, ptr addrspace(5) %sd.i11111111111, i32 276
store <2 x i32> zeroinitializer, ptr addrspace(5) %num_closure.i.i, align 4
call fastcc void @svm_eval_nodes(ptr %kglobals.ascast1)
ret void
}
attributes #0 = { norecurse }
```
https://github.com/llvm/llvm-project/pull/106187
More information about the llvm-commits
mailing list