[llvm] 2a832d0 - [AMDGPU] Add missing physical register check in SIFoldOperands::tryFoldLoad
Yashwant Singh via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 24 00:54:57 PST 2023
Author: Yashwant Singh
Date: 2023-01-24T14:24:41+05:30
New Revision: 2a832d0f09f73faf46aae54ff73cdcd99a7bacf3
URL: https://github.com/llvm/llvm-project/commit/2a832d0f09f73faf46aae54ff73cdcd99a7bacf3
DIFF: https://github.com/llvm/llvm-project/commit/2a832d0f09f73faf46aae54ff73cdcd99a7bacf3.diff
LOG: [AMDGPU] Add missing physical register check in SIFoldOperands::tryFoldLoad
tryFoldLoad() is not meant to work on physical registers moreover
use_nodbg_instructions(reg) makes the compiler buggy when called with
physical reg
Fix for SWDEV-373493
Reviewed By: arsenm
Differential Revision: https://reviews.llvm.org/D141895
Added:
llvm/test/CodeGen/AMDGPU/fold-vgpr-phyreg.mir
llvm/test/CodeGen/AMDGPU/swdev373493.ll
Modified:
llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index a003a3d41b1a3..9c0c665a318c3 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -1706,6 +1706,9 @@ bool SIFoldOperands::tryFoldLoad(MachineInstr &MI) {
if (!I->isCopy() && !I->isRegSequence())
return false;
Register DstReg = I->getOperand(0).getReg();
+ // Physical registers may have more than one instruction definitions
+ if (DstReg.isPhysical())
+ return false;
if (TRI->isAGPR(*MRI, DstReg))
continue;
MoveRegs.push_back(DstReg);
diff --git a/llvm/test/CodeGen/AMDGPU/fold-vgpr-phyreg.mir b/llvm/test/CodeGen/AMDGPU/fold-vgpr-phyreg.mir
new file mode 100644
index 0000000000000..2d5909e3297d2
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/fold-vgpr-phyreg.mir
@@ -0,0 +1,44 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -o - %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -run-pass=si-fold-operands -verify-machineinstrs | FileCheck %s
+
+---
+name: lshl_add_u64_gep
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+
+ ; CHECK-LABEL: name: lshl_add_u64_gep
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
+ ; CHECK-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64_align2 = V_LSHLREV_B64_e64 2, [[REG_SEQUENCE]], implicit $exec
+ ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[V_LSHLREV_B64_e64_]].sub0, 0, implicit $exec
+ ; CHECK-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY2]], [[V_LSHLREV_B64_e64_]].sub1, killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; CHECK-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD killed [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr
+ ; CHECK-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
+ ; CHECK-NEXT: SI_RETURN implicit $vgpr0
+ %0:vgpr_32 = COPY $vgpr3
+ %1:vgpr_32 = COPY $vgpr2
+ %2:vgpr_32 = COPY $vgpr1
+ %3:vgpr_32 = COPY $vgpr0
+ %4:vreg_64_align2 = REG_SEQUENCE %1, %subreg.sub0, %0, %subreg.sub1
+ %5:sreg_32 = S_MOV_B32 2
+ %6:vreg_64_align2 = V_LSHLREV_B64_e64 killed %5, %4, implicit $exec
+ %7:vgpr_32 = COPY %3
+ %8:vgpr_32 = COPY %6.sub0
+ %9:vgpr_32 = COPY %2
+ %10:vgpr_32 = COPY %6.sub1
+ %11:vgpr_32, %12:sreg_64_xexec = V_ADD_CO_U32_e64 %7, %8, 0, implicit $exec
+ %13:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 %9, %10, killed %12, 0, implicit $exec
+ %15:vreg_64_align2 = REG_SEQUENCE %11, %subreg.sub0, %13, %subreg.sub1
+ %16:vgpr_32 = FLAT_LOAD_DWORD killed %15, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr0 = COPY %16
+ SI_RETURN implicit $vgpr0
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/swdev373493.ll b/llvm/test/CodeGen/AMDGPU/swdev373493.ll
new file mode 100644
index 0000000000000..dea192aad80a7
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/swdev373493.ll
@@ -0,0 +1,78 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -o - %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs | FileCheck %s
+
+ at global = external protected addrspace(4) externally_initialized global [4096 x i64], align 16
+
+define hidden fastcc void @bar(i32 %arg, ptr %arg1, ptr %arg2, ptr %arg3, ptr %arg4, ptr %arg5, ptr %arg6) unnamed_addr align 2 {
+; CHECK-LABEL: bar:
+; CHECK: ; %bb.0: ; %bb
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_mov_b32_e32 v15, v12
+; CHECK-NEXT: v_mov_b32_e32 v14, v11
+; CHECK-NEXT: v_mov_b32_e32 v13, v10
+; CHECK-NEXT: v_mov_b32_e32 v12, v9
+; CHECK-NEXT: v_mov_b32_e32 v11, v8
+; CHECK-NEXT: v_mov_b32_e32 v10, v7
+; CHECK-NEXT: v_mov_b32_e32 v9, v6
+; CHECK-NEXT: v_mov_b32_e32 v8, v5
+; CHECK-NEXT: v_mov_b32_e32 v7, v4
+; CHECK-NEXT: v_mov_b32_e32 v6, v3
+; CHECK-NEXT: s_cmp_lt_i32 s4, 3
+; CHECK-NEXT: s_cbranch_scc0 .LBB0_3
+; CHECK-NEXT: ; %bb.1: ; %LeafBlock
+; CHECK-NEXT: s_cbranch_scc1 .LBB0_5
+; CHECK-NEXT: ; %bb.2: ; %bb7
+; CHECK-NEXT: flat_load_dwordx2 v[2:3], v[0:1]
+; CHECK-NEXT: s_getpc_b64 s[16:17]
+; CHECK-NEXT: s_add_u32 s16, s16, global at rel32@lo+1948
+; CHECK-NEXT: s_addc_u32 s17, s17, global at rel32@hi+1956
+; CHECK-NEXT: v_mov_b32_e32 v5, 0
+; CHECK-NEXT: v_mov_b32_e32 v0, s16
+; CHECK-NEXT: v_mov_b32_e32 v1, s17
+; CHECK-NEXT: s_getpc_b64 s[18:19]
+; CHECK-NEXT: s_add_u32 s18, s18, eggs at rel32@lo+4
+; CHECK-NEXT: s_addc_u32 s19, s19, eggs at rel32@hi+12
+; CHECK-NEXT: s_setpc_b64 s[18:19]
+; CHECK-NEXT: .LBB0_3: ; %LeafBlock1
+; CHECK-NEXT: s_cbranch_scc0 .LBB0_5
+; CHECK-NEXT: ; %bb.4: ; %bb8
+; CHECK-NEXT: v_mov_b32_e32 v0, v1
+; CHECK-NEXT: v_mov_b32_e32 v1, v2
+; CHECK-NEXT: v_mov_b32_e32 v2, v6
+; CHECK-NEXT: v_mov_b32_e32 v3, v7
+; CHECK-NEXT: v_mov_b32_e32 v4, v8
+; CHECK-NEXT: v_mov_b32_e32 v5, v9
+; CHECK-NEXT: v_mov_b32_e32 v6, v10
+; CHECK-NEXT: v_mov_b32_e32 v7, v11
+; CHECK-NEXT: v_mov_b32_e32 v8, v12
+; CHECK-NEXT: v_mov_b32_e32 v9, v13
+; CHECK-NEXT: v_mov_b32_e32 v10, v14
+; CHECK-NEXT: v_mov_b32_e32 v11, v15
+; CHECK-NEXT: s_getpc_b64 s[16:17]
+; CHECK-NEXT: s_add_u32 s16, s16, quux at rel32@lo+4
+; CHECK-NEXT: s_addc_u32 s17, s17, quux at rel32@hi+12
+; CHECK-NEXT: s_setpc_b64 s[16:17]
+; CHECK-NEXT: .LBB0_5: ; %bb9
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+bb:
+ switch i32 undef, label %bb9 [
+ i32 3, label %bb8
+ i32 1, label %bb7
+ ]
+
+bb7: ; preds = %bb
+ %tmp = load ptr, ptr undef, align 8
+ tail call fastcc void @eggs(ptr noundef addrspacecast (ptr addrspace(4) getelementptr inbounds ([4096 x i64], ptr addrspace(4) @global, i64 0, i64 243) to ptr), ptr %tmp, ptr undef, ptr noundef nonnull align 8 dereferenceable(24) %arg2, ptr noundef %arg3, ptr noundef %arg4, ptr noundef %arg5)
+ br label %bb9
+
+bb8: ; preds = %bb
+ tail call fastcc void @quux(ptr noundef nonnull align 8 dereferenceable(24) %arg1, ptr noundef nonnull align 8 dereferenceable(24) %arg2, ptr noundef %arg3, ptr noundef %arg4, ptr noundef %arg5, ptr noundef nonnull align 8 dereferenceable(8) %arg6)
+ br label %bb9
+
+bb9: ; preds = %bb8, %bb7, %bb
+ ret void
+}
+
+declare dso_local fastcc void @eggs(ptr, ptr, ptr, ptr, ptr, ptr, ptr) unnamed_addr align 2
+
+declare dso_local fastcc void @quux(ptr, ptr, ptr, ptr, ptr, ptr) unnamed_addr align 2
More information about the llvm-commits
mailing list