[llvm] d2edff8 - [AMDGPU] PeepholeSDWA: Don't assume inst srcs are registers (#69576)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 19 03:13:50 PDT 2023
Author: Pierre van Houtryve
Date: 2023-10-19T12:13:45+02:00
New Revision: d2edff839d0da9cf84e0b78b1514d902abe1ab02
URL: https://github.com/llvm/llvm-project/commit/d2edff839d0da9cf84e0b78b1514d902abe1ab02
DIFF: https://github.com/llvm/llvm-project/commit/d2edff839d0da9cf84e0b78b1514d902abe1ab02.diff
LOG: [AMDGPU] PeepholeSDWA: Don't assume inst srcs are registers (#69576)
To fix that ticket we only needed to address the V_LSHLREV_B16 case, but
I did it for all insts just in case.
Fixes #66899
Added:
Modified:
llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
llvm/test/CodeGen/AMDGPU/sdwa-peephole.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
index 97b3161c7f98b13..53fc2c0686245f9 100644
--- a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
@@ -546,7 +546,8 @@ SIPeepholeSDWA::matchSDWAOperand(MachineInstr &MI) {
MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
- if (Src1->getReg().isPhysical() || Dst->getReg().isPhysical())
+ if (!Src1->isReg() || Src1->getReg().isPhysical() ||
+ Dst->getReg().isPhysical())
break;
if (Opcode == AMDGPU::V_LSHLREV_B32_e32 ||
@@ -584,7 +585,8 @@ SIPeepholeSDWA::matchSDWAOperand(MachineInstr &MI) {
MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
- if (Src1->getReg().isPhysical() || Dst->getReg().isPhysical())
+ if (!Src1->isReg() || Src1->getReg().isPhysical() ||
+ Dst->getReg().isPhysical())
break;
if (Opcode == AMDGPU::V_LSHLREV_B16_e32 ||
@@ -647,7 +649,8 @@ SIPeepholeSDWA::matchSDWAOperand(MachineInstr &MI) {
MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
- if (Src0->getReg().isPhysical() || Dst->getReg().isPhysical())
+ if (!Src0->isReg() || Src0->getReg().isPhysical() ||
+ Dst->getReg().isPhysical())
break;
return std::make_unique<SDWASrcOperand>(
@@ -675,7 +678,8 @@ SIPeepholeSDWA::matchSDWAOperand(MachineInstr &MI) {
MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
- if (ValSrc->getReg().isPhysical() || Dst->getReg().isPhysical())
+ if (!ValSrc->isReg() || ValSrc->getReg().isPhysical() ||
+ Dst->getReg().isPhysical())
break;
return std::make_unique<SDWASrcOperand>(
diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-peephole.ll b/llvm/test/CodeGen/AMDGPU/sdwa-peephole.ll
index 79e96b45a901c4c..086fab3f5217589 100644
--- a/llvm/test/CodeGen/AMDGPU/sdwa-peephole.ll
+++ b/llvm/test/CodeGen/AMDGPU/sdwa-peephole.ll
@@ -2098,6 +2098,113 @@ bb11: ; preds = %bb10, %bb2
br label %bb1
}
+define void @crash_lshlrevb16_not_reg_op() {
+; NOSDWA-LABEL: crash_lshlrevb16_not_reg_op:
+; NOSDWA: ; %bb.0: ; %bb0
+; NOSDWA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; NOSDWA-NEXT: s_mov_b64 s[4:5], 0
+; NOSDWA-NEXT: s_and_b64 vcc, exec, -1
+; NOSDWA-NEXT: v_lshlrev_b16_e64 v3, 8, 1
+; NOSDWA-NEXT: .LBB22_1: ; %bb1
+; NOSDWA-NEXT: ; =>This Inner Loop Header: Depth=1
+; NOSDWA-NEXT: v_mov_b32_e32 v0, s4
+; NOSDWA-NEXT: v_mov_b32_e32 v2, 0xff
+; NOSDWA-NEXT: s_lshl_b32 s6, s4, 3
+; NOSDWA-NEXT: v_mov_b32_e32 v1, s5
+; NOSDWA-NEXT: s_mov_b64 s[4:5], 1
+; NOSDWA-NEXT: v_and_b32_e32 v2, s4, v2
+; NOSDWA-NEXT: v_or_b32_e32 v2, v2, v3
+; NOSDWA-NEXT: v_lshrrev_b16_e32 v2, s6, v2
+; NOSDWA-NEXT: flat_store_byte v[0:1], v2
+; NOSDWA-NEXT: s_mov_b64 vcc, vcc
+; NOSDWA-NEXT: s_cbranch_vccnz .LBB22_1
+; NOSDWA-NEXT: ; %bb.2: ; %DummyReturnBlock
+; NOSDWA-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; NOSDWA-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX89-LABEL: crash_lshlrevb16_not_reg_op:
+; GFX89: ; %bb.0: ; %bb0
+; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX89-NEXT: s_mov_b64 s[4:5], 0
+; GFX89-NEXT: s_and_b64 vcc, exec, -1
+; GFX89-NEXT: v_lshlrev_b16_e64 v0, 8, 1
+; GFX89-NEXT: .LBB22_1: ; %bb1
+; GFX89-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX89-NEXT: v_mov_b32_e32 v3, s4
+; GFX89-NEXT: s_lshl_b32 s6, s4, 3
+; GFX89-NEXT: v_mov_b32_e32 v1, s4
+; GFX89-NEXT: v_or_b32_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX89-NEXT: v_mov_b32_e32 v2, s5
+; GFX89-NEXT: s_mov_b64 s[4:5], 1
+; GFX89-NEXT: v_lshrrev_b16_e32 v3, s6, v3
+; GFX89-NEXT: flat_store_byte v[1:2], v3
+; GFX89-NEXT: s_mov_b64 vcc, vcc
+; GFX89-NEXT: s_cbranch_vccnz .LBB22_1
+; GFX89-NEXT: ; %bb.2: ; %DummyReturnBlock
+; GFX89-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX89-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: crash_lshlrevb16_not_reg_op:
+; GFX9: ; %bb.0: ; %bb0
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_mov_b64 s[4:5], 0
+; GFX9-NEXT: v_lshlrev_b16_e64 v0, 8, 1
+; GFX9-NEXT: s_and_b64 vcc, exec, -1
+; GFX9-NEXT: v_or_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX9-NEXT: .LBB22_1: ; %bb1
+; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX9-NEXT: s_lshl_b32 s6, s4, 3
+; GFX9-NEXT: v_mov_b32_e32 v1, s4
+; GFX9-NEXT: v_mov_b32_e32 v2, s5
+; GFX9-NEXT: s_mov_b64 s[4:5], 1
+; GFX9-NEXT: v_lshrrev_b16_e32 v3, s6, v0
+; GFX9-NEXT: flat_store_byte v[1:2], v3
+; GFX9-NEXT: s_mov_b64 vcc, vcc
+; GFX9-NEXT: s_cbranch_vccnz .LBB22_1
+; GFX9-NEXT: ; %bb.2: ; %DummyReturnBlock
+; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: crash_lshlrevb16_not_reg_op:
+; GFX10: ; %bb.0: ; %bb0
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_lshlrev_b16 v0, 8, 1
+; GFX10-NEXT: s_mov_b32 vcc_lo, exec_lo
+; GFX10-NEXT: v_or_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX10-NEXT: s_mov_b64 s[4:5], 0
+; GFX10-NEXT: .LBB22_1: ; %bb1
+; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX10-NEXT: s_lshl_b32 s6, s4, 3
+; GFX10-NEXT: v_mov_b32_e32 v1, s4
+; GFX10-NEXT: v_mov_b32_e32 v2, s5
+; GFX10-NEXT: v_lshrrev_b16 v3, s6, v0
+; GFX10-NEXT: s_mov_b64 s[4:5], 1
+; GFX10-NEXT: flat_store_byte v[1:2], v3
+; GFX10-NEXT: s_cbranch_vccnz .LBB22_1
+; GFX10-NEXT: ; %bb.2: ; %DummyReturnBlock
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %1 = alloca [2 x i8], align 1, addrspace(5)
+ %2 = getelementptr [2 x i8], ptr addrspace(5) %1, i32 0, i32 1
+ br label %bb0
+
+bb0:
+ store i8 1, ptr addrspace(5) %2, align 1
+ br label %bb1
+
+bb1:
+ %3 = phi i64 [ 1, %bb1 ], [ 0, %bb0 ]
+ %4 = trunc i64 %3 to i32
+ %5 = getelementptr i8, ptr addrspace(5) %1, i32 %4
+ %6 = load i8, ptr addrspace(5) %5, align 1
+ %7 = getelementptr i8, ptr null, i64 %3
+ store i8 %6, ptr %7, align 1
+ br i1 false, label %bb2, label %bb1
+
+bb2:
+ br label %bb0
+}
+
declare i32 @llvm.amdgcn.workitem.id.x()
attributes #0 = { "denormal-fp-math"="preserve-sign,preserve-sign" }
More information about the llvm-commits
mailing list