[llvm] [DAGISel] Fix MMRA Handling in copyExtraInfo (PR #124730)

Pierre van Houtryve via llvm-commits llvm-commits at lists.llvm.org
Tue Jan 28 03:13:11 PST 2025


https://github.com/Pierre-vh created https://github.com/llvm/llvm-project/pull/124730

#78569 did not implement this correctly and an edge case breaks it by triggering `Assertion `!Leafs.empty()' failed.`

Fixes SWDEV-507698

>From 4e1c208371b65de292c51124474d5b0971fdabe9 Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Tue, 28 Jan 2025 12:11:23 +0100
Subject: [PATCH] [DAGISel] Fix MMRA Handling in copyExtraInfo

#78569 did not implement this correctly, I didn't
understand the function well enough and an edge case came up where it breaks it.
---
 .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp |   2 +-
 llvm/test/CodeGen/AMDGPU/mmra.ll              | 105 +++++++++++++++++-
 2 files changed, 103 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 0f9790a10a1397..b416c0efbbc4fc 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -13635,7 +13635,7 @@ void SelectionDAG::copyExtraInfo(SDNode *From, SDNode *To) {
   // Use of operator[] on the DenseMap may cause an insertion, which invalidates
   // the iterator, hence the need to make a copy to prevent a use-after-free.
   NodeExtraInfo NEI = I->second;
-  if (LLVM_LIKELY(!NEI.PCSections) && LLVM_LIKELY(!NEI.MMRA)) {
+  if (LLVM_LIKELY(!NEI.PCSections)) {
     // No deep copy required for the types of extra info set.
     //
     // FIXME: Investigate if other types of extra info also need deep copy. This
diff --git a/llvm/test/CodeGen/AMDGPU/mmra.ll b/llvm/test/CodeGen/AMDGPU/mmra.ll
index 39650f4295c76e..d0696bf329af8d 100644
--- a/llvm/test/CodeGen/AMDGPU/mmra.ll
+++ b/llvm/test/CodeGen/AMDGPU/mmra.ll
@@ -17,7 +17,7 @@ define void @fence_loads(ptr %ptr) {
   ; CHECK-NEXT:   ATOMIC_FENCE 5, 1, mmra !0
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]], mmra !1
   ; CHECK-NEXT:   [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr, mmra !1 :: (load acquire (s8) from %ir.ptr, align 4)
-  ; CHECK-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec, mmra !2
+  ; CHECK-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
   ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]], mmra !2
   ; CHECK-NEXT:   FLAT_STORE_BYTE [[COPY3]], killed [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr, mmra !2 :: (store release (s8) into %ir.ptr, align 4)
   ; CHECK-NEXT:   SI_RETURN
@@ -82,7 +82,7 @@ define void @atomicrmw_rel(ptr %ptr) {
   ; CHECK-NEXT:   [[V_AND_B32_e64_2:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PHI1]], killed [[V_OR_B32_e64_]], implicit $exec
   ; CHECK-NEXT:   [[DEF4:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
   ; CHECK-NEXT:   [[DEF5:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
-  ; CHECK-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_2]], %subreg.sub0, [[PHI1]], %subreg.sub1, mmra !2
+  ; CHECK-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_2]], %subreg.sub0, [[PHI1]], %subreg.sub1
   ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE2]], mmra !2
   ; CHECK-NEXT:   [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY4]], killed [[COPY6]], 0, 1, implicit $exec, implicit $flat_scr, mmra !2 :: (load store release monotonic (s32) on %ir.AlignedAddr)
   ; CHECK-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[FLAT_ATOMIC_CMPSWAP_RTN]], [[PHI1]], implicit $exec, mmra !2
@@ -140,7 +140,7 @@ define void @cmpxchg(ptr %ptr) {
   ; CHECK-NEXT:   [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[PHI2]], [[V_LSHLREV_B32_e64_2]], implicit $exec
   ; CHECK-NEXT:   [[DEF5:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
   ; CHECK-NEXT:   [[DEF6:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
-  ; CHECK-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_OR_B32_e64_]], %subreg.sub0, [[PHI2]], %subreg.sub1, mmra !1
+  ; CHECK-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_OR_B32_e64_]], %subreg.sub0, [[PHI2]], %subreg.sub1
   ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE2]], mmra !1
   ; CHECK-NEXT:   [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY4]], killed [[COPY6]], 0, 1, implicit $exec, implicit $flat_scr, mmra !1 :: (load store acquire acquire (s32) on %ir.AlignedAddr)
   ; CHECK-NEXT:   [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 [[FLAT_ATOMIC_CMPSWAP_RTN]], [[PHI2]], implicit $exec
@@ -180,6 +180,105 @@ define void @cmpxchg(ptr %ptr) {
   ret void
 }
 
+declare <32 x half> @f()
+
+; Variant of atomicrmw_rel that provoked a crash in SelectionDAG::copyExtraInfo
+define void @atomicrmw_rel_deepcopy(ptr %ptr) {
+  ; CHECK-LABEL: name: atomicrmw_rel_deepcopy
+  ; CHECK: bb.0 (%ir-block.0):
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr31
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
+  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
+  ; CHECK-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+  ; CHECK-NEXT:   [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @f, target-flags(amdgpu-gotprel32-hi) @f, implicit-def dead $scc
+  ; CHECK-NEXT:   [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[SI_PC_ADD_REL_OFFSET]], 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4)
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]]
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY9]]
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[COPY8]]
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY7]]
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY6]]
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY5]]
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY4]]
+  ; CHECK-NEXT:   $sgpr15 = COPY [[COPY3]]
+  ; CHECK-NEXT:   $vgpr31 = COPY [[COPY]]
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY12]]
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL killed [[S_LOAD_DWORDX2_IMM]], @f, csr_amdgpu, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7, implicit-def $vgpr8, implicit-def $vgpr9, implicit-def $vgpr10, implicit-def $vgpr11, implicit-def $vgpr12, implicit-def $vgpr13, implicit-def $vgpr14, implicit-def $vgpr15
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:vgpr_32 = COPY $vgpr4
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:vgpr_32 = COPY $vgpr5
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:vgpr_32 = COPY $vgpr6
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:vgpr_32 = COPY $vgpr7
+  ; CHECK-NEXT:   [[COPY21:%[0-9]+]]:vgpr_32 = COPY $vgpr8
+  ; CHECK-NEXT:   [[COPY22:%[0-9]+]]:vgpr_32 = COPY $vgpr9
+  ; CHECK-NEXT:   [[COPY23:%[0-9]+]]:vgpr_32 = COPY $vgpr10
+  ; CHECK-NEXT:   [[COPY24:%[0-9]+]]:vgpr_32 = COPY $vgpr11
+  ; CHECK-NEXT:   [[COPY25:%[0-9]+]]:vgpr_32 = COPY $vgpr12
+  ; CHECK-NEXT:   [[COPY26:%[0-9]+]]:vgpr_32 = COPY $vgpr13
+  ; CHECK-NEXT:   [[COPY27:%[0-9]+]]:vgpr_32 = COPY $vgpr14
+  ; CHECK-NEXT:   [[COPY28:%[0-9]+]]:vgpr_32 = COPY $vgpr15
+  ; CHECK-NEXT:   [[COPY29:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+  ; CHECK-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -4
+  ; CHECK-NEXT:   [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY29]], killed [[S_MOV_B32_]], implicit $exec
+  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
+  ; CHECK-NEXT:   [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
+  ; CHECK-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY11]], %subreg.sub1
+  ; CHECK-NEXT:   [[COPY30:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
+  ; CHECK-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 3
+  ; CHECK-NEXT:   [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY29]], [[S_MOV_B32_1]], implicit $exec
+  ; CHECK-NEXT:   [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[S_MOV_B32_1]], killed [[V_AND_B32_e64_1]], implicit $exec
+  ; CHECK-NEXT:   [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 255
+  ; CHECK-NEXT:   [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 killed [[V_LSHLREV_B32_e64_]], killed [[S_MOV_B32_2]], implicit $exec
+  ; CHECK-NEXT:   [[V_NOT_B32_e32_:%[0-9]+]]:vgpr_32 = V_NOT_B32_e32 [[V_LSHLREV_B32_e64_1]], implicit $exec
+  ; CHECK-NEXT:   [[COPY31:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]], mmra !0
+  ; CHECK-NEXT:   [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY31]], 0, 0, implicit $exec, implicit $flat_scr, mmra !0 :: (load (s32) from %ir.AlignedAddr)
+  ; CHECK-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1.atomicrmw.start:
+  ; CHECK-NEXT:   successors: %bb.2(0x04000000), %bb.1(0x7c000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[PHI:%[0-9]+]]:sreg_64 = PHI [[S_MOV_B64_]], %bb.0, %7, %bb.1
+  ; CHECK-NEXT:   [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[FLAT_LOAD_DWORD]], %bb.0, %6, %bb.1
+  ; CHECK-NEXT:   [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_NOT_B32_e32_]], [[V_LSHLREV_B32_e64_1]], implicit $exec
+  ; CHECK-NEXT:   [[V_AND_B32_e64_2:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PHI1]], killed [[V_OR_B32_e64_]], implicit $exec
+  ; CHECK-NEXT:   [[DEF4:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
+  ; CHECK-NEXT:   [[DEF5:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
+  ; CHECK-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_2]], %subreg.sub0, [[PHI1]], %subreg.sub1
+  ; CHECK-NEXT:   [[COPY32:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE2]], mmra !0
+  ; CHECK-NEXT:   [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY30]], killed [[COPY32]], 0, 1, implicit $exec, implicit $flat_scr, mmra !0 :: (load store release monotonic (s32) on %ir.AlignedAddr)
+  ; CHECK-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[FLAT_ATOMIC_CMPSWAP_RTN]], [[PHI1]], implicit $exec, mmra !0
+  ; CHECK-NEXT:   [[SI_IF_BREAK:%[0-9]+]]:sreg_64 = SI_IF_BREAK killed [[V_CMP_EQ_U32_e64_]], [[PHI]], implicit-def dead $scc
+  ; CHECK-NEXT:   SI_LOOP [[SI_IF_BREAK]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; CHECK-NEXT:   S_BRANCH %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2.atomicrmw.end:
+  ; CHECK-NEXT:   [[PHI2:%[0-9]+]]:sreg_64 = PHI [[SI_IF_BREAK]], %bb.1
+  ; CHECK-NEXT:   SI_END_CF [[PHI2]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; CHECK-NEXT:   SI_RETURN
+  %C = call <32 x half> @f()
+  %old.2 = atomicrmw add ptr %ptr, i8 0 release, align 1, !mmra !0
+  ret void
+}
+
 attributes #0 = { memory(read) }
 attributes #1 = { memory(write) }
 



More information about the llvm-commits mailing list