[llvm] AMDGPU: Allow operand folding between loop body and its preheader (PR #137022)

Wed Apr 23 11:47:34 PDT 2025

================
@@ -0,0 +1,114 @@
+; NOTE: Do not autogenerate
+; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s
+
+; ModuleID = '<stdin>'
+source_filename = "add.cpp"
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
+target triple = "amdgcn-amd-amdhsa"
+
+$main = comdat any
+
+; Function Attrs: convergent mustprogress nofree norecurse nounwind
+define protected amdgpu_kernel void @main(ptr addrspace(1) noundef %args.coerce, ptr addrspace(1) noundef %args.coerce2, ptr addrspace(1) noundef %args.coerce4, i32 noundef %args10, i32 noundef %args12) local_unnamed_addr #0 comdat {
+; GCN-LABEL: main:
+; check that non-redundant readfirstlanes are not removed
+; GCN:      v_readfirstlane_b32
+; check that all redundant readfirstlanes are removed
+; GCN-NOT:  v_readfirstlane_b32
+; GCN:      s_endpgm
+entry:
+    %0 = tail call noundef range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.x()
----------------
arsenm wrote:

Drop the callsite attributes. Also used named values in tests 

https://github.com/llvm/llvm-project/pull/137022