[PATCH] D141895: [AMDGPU] Add missing physical register check in SIFoldOperands::tryFoldLoad

Yashwant Singh via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Mon Jan 16 23:07:37 PST 2023


yassingh created this revision.
yassingh added reviewers: rampitec, arsenm.
Herald added subscribers: kosarev, foad, kerbowa, hiraditya, tpr, dstuttard, yaxunl, jvesely, kzhuravl.
Herald added a project: All.
yassingh requested review of this revision.
Herald added subscribers: llvm-commits, wdng.
Herald added a project: LLVM.

tryFoldLoad() is not meant to work on physical registers moreover 
use_nodbg_instructions(reg) makes the compiler buggy when called with 
physical reg

Fix for SWDEV-373493


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D141895

Files:
  llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
  llvm/test/CodeGen/AMDGPU/swdev373493.ll


Index: llvm/test/CodeGen/AMDGPU/swdev373493.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AMDGPU/swdev373493.ll
@@ -0,0 +1,35 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -O3 < %s
+ at a0000000000000000000000000000000004040 = external protected addrspace(4) externally_initialized global [4096 x i64], align 16
+
+; Function Attrs: noinline
+define hidden fastcc void @func1(ptr %a015194, ptr %a015195, ptr %a14919, ptr %a14642, ptr %a11358, ptr %a10670) unnamed_addr #0 align 2 {
+entry:
+  switch i32 undef, label %sw.epilog [
+    i32 3, label %sw.bb8
+    i32 1, label %sw.bb2
+  ]
+
+sw.bb2:                                           ; preds = %entry
+  %a015194.val = load ptr, ptr undef, align 8
+  tail call fastcc void @func2(ptr noundef addrspacecast (ptr addrspace(4) getelementptr inbounds ([4096 x i64], ptr addrspace(4) @a0000000000000000000000000000000004040, i64 0, i64 243) to ptr), ptr %a015194.val, ptr undef, ptr noundef nonnull align 8 dereferenceable(24) %a015195, ptr noundef %a14919, ptr noundef %a14642, ptr noundef %a11358) #2
+  br label %sw.epilog
+
+sw.bb8:                                           ; preds = %entry
+  tail call fastcc void @func3(ptr noundef nonnull align 8 dereferenceable(24) %a015194, ptr noundef nonnull align 8 dereferenceable(24) %a015195, ptr noundef %a14919, ptr noundef %a14642, ptr noundef %a11358, ptr noundef nonnull align 8 dereferenceable(8) %a10670) #2
+  br label %sw.epilog
+
+sw.epilog:                                        ; preds = %sw.bb8, %sw.bb2, %entry
+  ret void
+}
+
+declare dso_local fastcc void @func2(ptr, ptr, ptr, ptr, ptr, ptr, ptr) unnamed_addr #1 align 2
+
+declare dso_local fastcc void @func3(ptr, ptr, ptr, ptr, ptr, ptr) unnamed_addr #1 align 2
+
+attributes #0 = { noinline }
+attributes #1 = { "target-features"="+16-bit-insts,+add-no-carry-insts,+aperture-regs,+atomic-fadd-no-rtn-insts,+atomic-fadd-rtn-insts,+atomic-pk-fadd-no-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+dpp-64bit,+enable-ds128,+enable-prt-strict-null,+fast-denormal-f32,+fast-fmaf,+flat-address-space,+flat-for-global,+flat-global-insts,+flat-inst-offsets,+flat-scratch-insts,+fma-mix-insts,+fp64,+full-rate-64-ops,+gcn3-encoding,+gfx7-gfx8-gfx9-insts,+gfx8-insts,+gfx9,+gfx9-insts,+gfx90a-insts,+image-insts,+int-clamp-insts,+inv-2pi-inline-imm,+ldsbankcount32,+load-store-opt,+localmemorysize65536,+mad-mac-f32-insts,+mai-insts,+negative-scratch-offset-bug,+packed-fp32-ops,+packed-tid,+pk-fmac-f16-inst,+promote-alloca,+r128-a16,+s-memrealtime,+s-memtime-inst,+scalar-atomics,+scalar-flat-scratch-insts,+scalar-stores,+sdwa,+sdwa-omod,+sdwa-scalar,+sdwa-sdst,+sramecc-support,+trap-handler,+unaligned-access-mode,+unaligned-buffer-access,+unaligned-ds-access,+vgpr-index-mode,+vop3p,-wavefrontsize16,-wavefrontsize32,+wavefrontsize64,+xnack-support" }
+attributes #2 = { convergent nounwind }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"AMD clang version 15.0.0 (ssh://gerritgit/lightning/ec/llvm-project amd-mainline-open 22343 2c8ca8227af55c93a857bb3b4d8b118c917fca7c)"}
Index: llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -1709,6 +1709,9 @@
     if (!I->isCopy() && !I->isRegSequence())
       return false;
     Register DstReg = I->getOperand(0).getReg();
+    // Physical registers may have more than one instruction definitions
+    if (DstReg.isPhysical())
+      return false;
     if (TRI->isAGPR(*MRI, DstReg))
       continue;
     MoveRegs.push_back(DstReg);


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D141895.489706.patch
Type: text/x-patch
Size: 3733 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20230117/491b7259/attachment.bin>


More information about the llvm-commits mailing list