[llvm] [Attributor] Fix an issue that could potentially cause `AccessList` and `OffsetBins` out of sync (PR #106187)

Wed Aug 28 09:07:40 PDT 2024

arsenm wrote:

Your reducer script runs all of clang, you should start reducing from the IR extracted before the failing pass and just run the one pass. When I do that, I get this:

```
; RUN: opt -passes=amdgpu-attributor %s

target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
target triple = "amdgcn-amd-amdhsa"

%struct.ShaderClosure.1472.2484.3440.3732.5208.6220.7176.7468 = type { <3 x float>, i32, float, <3 x float>, [10 x float], [8 x i8] }
%struct.ShaderData.1475.2487.3443.3735.5211.6223.7179.7471 = type { <3 x float>, <3 x float>, <3 x float>, <3 x float>, i32, i32, i32, i32, i32, float, float, i32, i32, float, float, %struct.differential3.1473.2485.3441.3733.5209.6221.7177.7469, %struct.differential3.1473.2485.3441.3733.5209.6221.7177.7469, %struct.differential.1474.2486.3442.3734.5210.6222.7178.7470, %struct.differential.1474.2486.3442.3734.5210.6222.7178.7470, <3 x float>, <3 x float>, <3 x float>, %struct.differential3.1473.2485.3441.3733.5209.6221.7177.7469, i32, i32, i32, float, <3 x float>, <3 x float>, <3 x float>, [64 x %struct.ShaderClosure.1472.2484.3440.3732.5208.6220.7176.7468] }
%struct.differential.1474.2486.3442.3734.5210.6222.7178.7470 = type { float, float }
%struct.differential3.1473.2485.3441.3733.5209.6221.7177.7469 = type { <3 x float>, <3 x float> }

define internal fastcc void @svm_eval_nodes(ptr %kg) {
entry:
  %closure.i25.i = getelementptr i8, ptr %kg, i64 336
  %num_closure.i26.i = getelementptr i8, ptr %kg, i64 276
  br label %while.cond

while.cond:                                       ; preds = %sw.bb92, %cond.true.i.i.i34, %while.cond, %entry
  %0 = load i32, ptr %kg, align 4
  %idxprom.i = zext i32 %0 to i64
  br label %while.cond

cond.true.i.i.i34:                                ; No predecessors!
  %arrayidx.i27.i = getelementptr float, ptr %kg, i64 %idxprom.i
  store float 0.000000e+00, ptr %arrayidx.i27.i, align 4
  br label %while.cond

sw.bb92:                                          ; No predecessors!
  %1 = load i32, ptr %kg, align 8
  %2 = insertelement <3 x i32> zeroinitializer, i32 %1, i64 0
  %splat.splatinsert.i = bitcast <3 x i32> %2 to <3 x float>
  %3 = shufflevector <3 x float> %splat.splatinsert.i, <3 x float> zeroinitializer, <4 x i32> zeroinitializer
  %4 = load i32, ptr %num_closure.i26.i, align 4
  %idxprom.i27.i = sext i32 %4 to i64
  %arrayidx.i28.i = getelementptr [64 x %struct.ShaderClosure.1472.2484.3440.3732.5208.6220.7176.7468], ptr %closure.i25.i, i64 0, i64 %idxprom.i27.i
  store <4 x float> %3, ptr %arrayidx.i28.i, align 16
  %inc.i30.i = or i32 %4, 1
  store i32 %inc.i30.i, ptr %num_closure.i26.i, align 4
  br label %while.cond
}

; Function Attrs: norecurse
define amdgpu_kernel void @kernel_ocl_displace() #0 {
entry:
  %sd.i11111111111 = alloca [0 x [0 x [0 x [0 x [0 x [0 x [0 x [0 x [0 x [0 x %struct.ShaderData.1475.2487.3443.3735.5211.6223.7179.7471]]]]]]]]]], i32 0, align 16, addrspace(5)
  %kglobals.ascast1 = addrspacecast ptr addrspace(5) %sd.i11111111111 to ptr
  %num_closure.i.i = getelementptr i8, ptr addrspace(5) %sd.i11111111111, i32 276
  store <2 x i32> zeroinitializer, ptr addrspace(5) %num_closure.i.i, align 4
  call fastcc void @svm_eval_nodes(ptr %kglobals.ascast1)
  ret void
}

attributes #0 = { norecurse }

```

https://github.com/llvm/llvm-project/pull/106187