[llvm] r363899 - AMDGPU: Undo sub x, c canonicalization for v2i16
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 19 16:37:43 PDT 2019
Author: arsenm
Date: Wed Jun 19 16:37:43 2019
New Revision: 363899
URL: http://llvm.org/viewvc/llvm-project?rev=363899&view=rev
Log:
AMDGPU: Undo sub x, c canonicalization for v2i16
Should avoid regression from D62341
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
llvm/trunk/lib/Target/AMDGPU/VOP3PInstructions.td
llvm/trunk/test/CodeGen/AMDGPU/add.v2i16.ll
llvm/trunk/test/CodeGen/AMDGPU/chain-hi-to-lo.ll
llvm/trunk/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll
llvm/trunk/test/CodeGen/AMDGPU/sminmax.v2i16.ll
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp?rev=363899&r1=363898&r2=363899&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp Wed Jun 19 16:37:43 2019
@@ -67,6 +67,42 @@ class R600InstrInfo;
namespace {
+static bool getConstantValue(SDValue N, uint32_t &Out) {
+ if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
+ Out = C->getAPIntValue().getSExtValue();
+ return true;
+ }
+
+ if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) {
+ Out = C->getValueAPF().bitcastToAPInt().getSExtValue();
+ return true;
+ }
+
+ return false;
+}
+
+// TODO: Handle undef as zero
+static SDNode *packConstantV2I16(const SDNode *N, SelectionDAG &DAG,
+ bool Negate = false) {
+ assert(N->getOpcode() == ISD::BUILD_VECTOR && N->getNumOperands() == 2);
+ uint32_t LHSVal, RHSVal;
+ if (getConstantValue(N->getOperand(0), LHSVal) &&
+ getConstantValue(N->getOperand(1), RHSVal)) {
+ SDLoc SL(N);
+ uint32_t K = Negate ?
+ (-LHSVal & 0xffff) | (-RHSVal << 16) :
+ (LHSVal & 0xffff) | (RHSVal << 16);
+ return DAG.getMachineNode(AMDGPU::S_MOV_B32, SL, N->getValueType(0),
+ DAG.getTargetConstant(K, SL, MVT::i32));
+ }
+
+ return nullptr;
+}
+
+static SDNode *packNegConstantV2I16(const SDNode *N, SelectionDAG &DAG) {
+ return packConstantV2I16(N, DAG, true);
+}
+
/// AMDGPU specific code to select AMDGPU machine instructions for
/// SelectionDAG operations.
class AMDGPUDAGToDAGISel : public SelectionDAGISel {
@@ -104,7 +140,11 @@ protected:
private:
std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const;
bool isNoNanSrc(SDValue N) const;
- bool isInlineImmediate(const SDNode *N) const;
+ bool isInlineImmediate(const SDNode *N, bool Negated = false) const;
+ bool isNegInlineImmediate(const SDNode *N) const {
+ return isInlineImmediate(N, true);
+ }
+
bool isVGPRImm(const SDNode *N) const;
bool isUniformLoad(const SDNode *N) const;
bool isUniformBr(const SDNode *N) const;
@@ -437,14 +477,25 @@ bool AMDGPUDAGToDAGISel::isNoNanSrc(SDVa
return CurDAG->isKnownNeverNaN(N);
}
-bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const {
- const SIInstrInfo *TII = Subtarget->getInstrInfo();
+bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N,
+ bool Negated) const {
+ // TODO: Handle undef
- if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
- return TII->isInlineConstant(C->getAPIntValue());
+ const SIInstrInfo *TII = Subtarget->getInstrInfo();
+ if (Negated) {
+ if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
+ return TII->isInlineConstant(-C->getAPIntValue());
+
+ if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
+ return TII->isInlineConstant(-C->getValueAPF().bitcastToAPInt());
+
+ } else {
+ if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
+ return TII->isInlineConstant(C->getAPIntValue());
- if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
- return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt());
+ if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
+ return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt());
+ }
return false;
}
@@ -563,20 +614,6 @@ static unsigned selectSGPRVectorRegClass
llvm_unreachable("invalid vector size");
}
-static bool getConstantValue(SDValue N, uint32_t &Out) {
- if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
- Out = C->getAPIntValue().getZExtValue();
- return true;
- }
-
- if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) {
- Out = C->getValueAPF().bitcastToAPInt().getZExtValue();
- return true;
- }
-
- return false;
-}
-
void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
EVT VT = N->getValueType(0);
unsigned NumVectorElts = VT.getVectorNumElements();
@@ -685,12 +722,8 @@ void AMDGPUDAGToDAGISel::Select(SDNode *
unsigned NumVectorElts = VT.getVectorNumElements();
if (VT.getScalarSizeInBits() == 16) {
if (Opc == ISD::BUILD_VECTOR && NumVectorElts == 2) {
- uint32_t LHSVal, RHSVal;
- if (getConstantValue(N->getOperand(0), LHSVal) &&
- getConstantValue(N->getOperand(1), RHSVal)) {
- uint32_t K = LHSVal | (RHSVal << 16);
- CurDAG->SelectNodeTo(N, AMDGPU::S_MOV_B32, VT,
- CurDAG->getTargetConstant(K, SDLoc(N), MVT::i32));
+ if (SDNode *Packed = packConstantV2I16(N, *CurDAG)) {
+ ReplaceNode(N, Packed);
return;
}
}
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td?rev=363899&r1=363898&r2=363899&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td Wed Jun 19 16:37:43 2019
@@ -606,6 +606,24 @@ def ShiftAmt32Imm : PatLeaf <(imm), [{
return N->getZExtValue() < 32;
}]>;
+def getNegV2I16Imm : SDNodeXForm<build_vector, [{
+ return SDValue(packNegConstantV2I16(N, *CurDAG), 0);
+}]>;
+
+
+// TODO: Handle undef as 0
+def NegSubInlineConstV216 : PatLeaf<(build_vector), [{
+ assert(N->getNumOperands() == 2);
+ assert(N->getOperand(0).getValueType().getSizeInBits() == 16);
+ SDValue Src0 = N->getOperand(0);
+ SDValue Src1 = N->getOperand(1);
+ if (Src0 == Src1)
+ return isNegInlineImmediate(Src0.getNode());
+
+ return (isNullConstant(Src0) && isNegInlineImmediate(Src1.getNode())) ||
+ (isNullConstant(Src1) && isNegInlineImmediate(Src0.getNode()));
+}], getNegV2I16Imm>;
+
//===----------------------------------------------------------------------===//
// Custom Operands
//===----------------------------------------------------------------------===//
Modified: llvm/trunk/lib/Target/AMDGPU/VOP3PInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/VOP3PInstructions.td?rev=363899&r1=363898&r2=363899&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/VOP3PInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/VOP3PInstructions.td Wed Jun 19 16:37:43 2019
@@ -69,6 +69,16 @@ def V_PK_LSHLREV_B16 : VOP3PInst<"v_pk_l
def V_PK_ASHRREV_I16 : VOP3PInst<"v_pk_ashrrev_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, ashr_rev>;
def V_PK_LSHRREV_B16 : VOP3PInst<"v_pk_lshrrev_b16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, lshr_rev>;
+
+// Undo sub x, c -> add x, -c canonicalization since c is more likely
+// an inline immediate than -c.
+// The constant will be emitted as a mov, and folded later.
+// TODO: We could directly encode the immediate now
+def : GCNPat<
+ (add (v2i16 (VOP3PMods0 v2i16:$src0, i32:$src0_modifiers, i1:$clamp)), NegSubInlineConstV216:$src1),
+ (V_PK_SUB_U16 $src0_modifiers, $src0, SRCMODS.OP_SEL_1, NegSubInlineConstV216:$src1, $clamp)
+>;
+
multiclass MadFmaMixPats<SDPatternOperator fma_like,
Instruction mix_inst,
Instruction mixlo_inst,
Modified: llvm/trunk/test/CodeGen/AMDGPU/add.v2i16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/add.v2i16.ll?rev=363899&r1=363898&r2=363899&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/add.v2i16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/add.v2i16.ll Wed Jun 19 16:37:43 2019
@@ -103,7 +103,7 @@ define amdgpu_kernel void @v_test_add_v2
}
; GCN-LABEL: {{^}}v_test_add_v2i16_inline_neg1:
-; GFX9: v_pk_add_u16 v{{[0-9]+}}, v{{[0-9]+}}, -1 op_sel_hi:[1,0]{{$}}
+; GFX9: v_pk_sub_u16 v{{[0-9]+}}, v{{[0-9]+}}, 1 op_sel_hi:[1,0]{{$}}
; VI: v_mov_b32_e32 v[[SCONST:[0-9]+]], -1
; VI: flat_load_dword [[LOAD:v[0-9]+]]
Modified: llvm/trunk/test/CodeGen/AMDGPU/chain-hi-to-lo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/chain-hi-to-lo.ll?rev=363899&r1=363898&r2=363899&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/chain-hi-to-lo.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/chain-hi-to-lo.ll Wed Jun 19 16:37:43 2019
@@ -181,7 +181,7 @@ entry:
; GCN-LABEL: {{^}}chain_hi_to_lo_group_other_dep:
; GFX900: ds_read_u16_d16_hi v1, v0
; GFX900-NEXT: s_waitcnt lgkmcnt(0)
-; GFX900-NEXT: v_pk_add_u16 v1, v1, 12 op_sel_hi:[1,0]
+; GFX900-NEXT: v_pk_sub_u16 v1, v1, -12 op_sel_hi:[1,0]
; GFX900-NEXT: ds_read_u16_d16 v1, v0 offset:2
; GFX900-NEXT: s_waitcnt lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v0, v1
@@ -204,7 +204,7 @@ bb:
; GFX900-NEXT: ds_read_u16_d16_hi v0, v0
; GFX900-NEXT: v_mov_b32_e32 [[MASK:v[0-9]+]], 0xffff
; GFX900-NEXT: s_waitcnt lgkmcnt(0)
-; GFX900-NEXT: v_pk_add_u16 v0, v0, 12 op_sel_hi:[1,0]
+; GFX900-NEXT: v_pk_sub_u16 v0, v0, -12 op_sel_hi:[1,0]
; GFX900-NEXT: v_bfi_b32 v0, [[MASK]], v1, v0
; GFX900-NEXT: s_setpc_b64
define <2 x i16> @chain_hi_to_lo_group_other_dep_multi_chain(i16 addrspace(3)* %ptr) {
@@ -222,7 +222,7 @@ bb:
; GCN-LABEL: {{^}}chain_hi_to_lo_private_other_dep:
; GFX900: buffer_load_short_d16_hi v1, v0, s[0:3], s4 offen
; GFX900-NEXT: s_waitcnt vmcnt(0)
-; GFX900-NEXT: v_pk_add_u16 v1, v1, 12 op_sel_hi:[1,0]
+; GFX900-NEXT: v_pk_sub_u16 v1, v1, -12 op_sel_hi:[1,0]
; GFX900-NEXT: buffer_load_short_d16 v1, v0, s[0:3], s4 offen offset:2
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v0, v1
@@ -244,7 +244,7 @@ bb:
; GFX900-NEXT: global_load_short_d16_hi v0, v[0:1], off
; GFX900-NEXT: v_mov_b32_e32 [[MASK:v[0-9]+]], 0xffff
; GFX900-NEXT: s_waitcnt vmcnt(0)
-; GFX900-NEXT: v_pk_add_u16 v0, v0, 12 op_sel_hi:[1,0]
+; GFX900-NEXT: v_pk_sub_u16 v0, v0, -12 op_sel_hi:[1,0]
; GFX900-NEXT: v_bfi_b32 v0, [[MASK]], v2, v0
; GFX900-NEXT: s_setpc_b64
define <2 x i16> @chain_hi_to_lo_global_other_dep(i16 addrspace(1)* %ptr) {
@@ -264,7 +264,7 @@ bb:
; GFX900-NEXT: flat_load_short_d16_hi v0, v[0:1]
; GFX900-NEXT: v_mov_b32_e32 [[MASK:v[0-9]+]], 0xffff
; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_pk_add_u16 v0, v0, 12 op_sel_hi:[1,0]
+; GFX900-NEXT: v_pk_sub_u16 v0, v0, -12 op_sel_hi:[1,0]
; GFX900-NEXT: v_bfi_b32 v0, v1, v2, v0
; GFX900-NEXT: s_setpc_b64
define <2 x i16> @chain_hi_to_lo_flat_other_dep(i16 addrspace(0)* %ptr) {
Modified: llvm/trunk/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll?rev=363899&r1=363898&r2=363899&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll Wed Jun 19 16:37:43 2019
@@ -1233,7 +1233,6 @@ define amdgpu_kernel void @v_test_v2i16_
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 2, v0
-; GFX9-NEXT: s_movk_i32 s4, 0xffe0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v1, s3
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s2, v2
@@ -1243,7 +1242,7 @@ define amdgpu_kernel void @v_test_v2i16_
; GFX9-NEXT: v_mov_b32_e32 v1, s1
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_pk_add_u16 v2, v3, s4 op_sel_hi:[1,0]
+; GFX9-NEXT: v_pk_sub_u16 v2, v3, 32 op_sel_hi:[1,0]
; GFX9-NEXT: global_store_dword v[0:1], v2, off
; GFX9-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -1296,7 +1295,6 @@ define amdgpu_kernel void @v_test_v2i16_
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 2, v0
-; GFX9-NEXT: s_mov_b32 s4, 0xffe00000
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v1, s3
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s2, v2
@@ -1306,7 +1304,7 @@ define amdgpu_kernel void @v_test_v2i16_
; GFX9-NEXT: v_mov_b32_e32 v1, s1
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_pk_add_u16 v2, v3, s4
+; GFX9-NEXT: v_pk_sub_u16 v2, v3, 32 op_sel:[0,1] op_sel_hi:[1,0]
; GFX9-NEXT: global_store_dword v[0:1], v2, off
; GFX9-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -1362,7 +1360,6 @@ define amdgpu_kernel void @v_test_v2i16_
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 2, v0
-; GFX9-NEXT: s_mov_b32 s4, 0xffe0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v1, s3
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s2, v2
@@ -1372,7 +1369,7 @@ define amdgpu_kernel void @v_test_v2i16_
; GFX9-NEXT: v_mov_b32_e32 v1, s1
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_pk_add_u16 v2, v3, s4
+; GFX9-NEXT: v_pk_sub_u16 v2, v3, 32
; GFX9-NEXT: global_store_dword v[0:1], v2, off
; GFX9-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -1440,7 +1437,7 @@ define amdgpu_kernel void @v_test_v2i16_
; GFX9-NEXT: v_mov_b32_e32 v1, s1
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_pk_add_u16 v2, v3, -16 op_sel_hi:[1,0]
+; GFX9-NEXT: v_pk_sub_u16 v2, v3, 16 op_sel_hi:[1,0]
; GFX9-NEXT: global_store_dword v[0:1], v2, off
; GFX9-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -1502,7 +1499,7 @@ define amdgpu_kernel void @v_test_v2i16_
; GFX9-NEXT: v_mov_b32_e32 v1, s1
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_pk_add_u16 v2, v3, -16 op_sel:[0,1] op_sel_hi:[1,0]
+; GFX9-NEXT: v_pk_sub_u16 v2, v3, 16 op_sel:[0,1] op_sel_hi:[1,0]
; GFX9-NEXT: global_store_dword v[0:1], v2, off
; GFX9-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -1567,7 +1564,7 @@ define amdgpu_kernel void @v_test_v2i16_
; GFX9-NEXT: v_mov_b32_e32 v1, s1
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_pk_add_u16 v2, v3, -16
+; GFX9-NEXT: v_pk_sub_u16 v2, v3, 16
; GFX9-NEXT: global_store_dword v[0:1], v2, off
; GFX9-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -1634,7 +1631,7 @@ define amdgpu_kernel void @v_test_v2i16_
; GFX9-NEXT: v_mov_b32_e32 v1, s1
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_pk_add_u16 v2, v3, -4.0 op_sel_hi:[1,0]
+; GFX9-NEXT: v_pk_sub_u16 v2, v3, 1.0 op_sel_hi:[1,0]
; GFX9-NEXT: global_store_dword v[0:1], v2, off
; GFX9-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -1701,7 +1698,7 @@ define amdgpu_kernel void @v_test_v2i16_
; GFX9-NEXT: v_mov_b32_e32 v1, s1
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_pk_add_u16 v2, v3, 4.0 op_sel_hi:[1,0]
+; GFX9-NEXT: v_pk_sub_u16 v2, v3, -1.0 op_sel_hi:[1,0]
; GFX9-NEXT: global_store_dword v[0:1], v2, off
; GFX9-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -1768,7 +1765,7 @@ define amdgpu_kernel void @v_test_v2i16_
; GFX9-NEXT: v_mov_b32_e32 v1, s1
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_pk_add_u16 v2, v3, 2.0 op_sel_hi:[1,0]
+; GFX9-NEXT: v_pk_sub_u16 v2, v3, -2.0 op_sel_hi:[1,0]
; GFX9-NEXT: global_store_dword v[0:1], v2, off
; GFX9-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -1835,7 +1832,7 @@ define amdgpu_kernel void @v_test_v2i16_
; GFX9-NEXT: v_mov_b32_e32 v1, s1
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_pk_add_u16 v2, v3, -2.0 op_sel_hi:[1,0]
+; GFX9-NEXT: v_pk_sub_u16 v2, v3, 2.0 op_sel_hi:[1,0]
; GFX9-NEXT: global_store_dword v[0:1], v2, off
; GFX9-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
Modified: llvm/trunk/test/CodeGen/AMDGPU/sminmax.v2i16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sminmax.v2i16.ll?rev=363899&r1=363898&r2=363899&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sminmax.v2i16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sminmax.v2i16.ll Wed Jun 19 16:37:43 2019
@@ -6,7 +6,7 @@
; GFX9: s_load_dword [[VAL:s[0-9]+]]
; GFX9: v_pk_sub_i16 [[SUB:v[0-9]+]], 0, [[VAL]]
; GFX9: v_pk_max_i16 [[MAX:v[0-9]+]], [[VAL]], [[SUB]]
-; GFX9: v_pk_add_u16 [[ADD:v[0-9]+]], [[MAX]], 2
+; GFX9: v_pk_sub_u16 [[ADD:v[0-9]+]], [[MAX]], -2 op_sel_hi:[1,0]
; CIVI: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 16
; CIVI: s_sub_i32
@@ -30,7 +30,7 @@ define amdgpu_kernel void @s_abs_v2i16(<
; GFX9: global_load_dword [[VAL:v[0-9]+]]
; GFX9: v_pk_sub_i16 [[SUB:v[0-9]+]], 0, [[VAL]]
; GFX9: v_pk_max_i16 [[MAX:v[0-9]+]], [[VAL]], [[SUB]]
-; GFX9: v_pk_add_u16 [[ADD:v[0-9]+]], [[MAX]], 2
+; GFX9: v_pk_sub_u16 [[ADD:v[0-9]+]], [[MAX]], -2 op_sel_hi:[1,0]
; VI: v_mov_b32_e32 [[TWO:v[0-9]+]], 2
; VI: v_lshrrev_b32_e32 v{{[0-9]+}}, 16,
@@ -70,7 +70,7 @@ define amdgpu_kernel void @v_abs_v2i16(<
; GFX9: s_load_dword [[VAL:s[0-9]+]]
; GFX9: v_pk_sub_i16 [[SUB:v[0-9]+]], 0, [[VAL]]
; GFX9: v_pk_max_i16 [[MAX:v[0-9]+]], [[VAL]], [[SUB]]
-; GFX9: v_pk_add_u16 [[ADD:v[0-9]+]], [[MAX]], 2
+; GFX9: v_pk_sub_u16 [[ADD:v[0-9]+]], [[MAX]], -2 op_sel_hi:[1,0]
define amdgpu_kernel void @s_abs_v2i16_2(<2 x i16> addrspace(1)* %out, <2 x i16> %val) #0 {
%z0 = insertelement <2 x i16> undef, i16 0, i16 0
%z1 = insertelement <2 x i16> %z0, i16 0, i16 1
@@ -88,7 +88,7 @@ define amdgpu_kernel void @s_abs_v2i16_2
; GFX9: buffer_load_dword [[VAL:v[0-9]+]]
; GFX9: v_pk_sub_i16 [[SUB:v[0-9]+]], 0, [[VAL]]
; GFX9: v_pk_max_i16 [[MAX:v[0-9]+]], [[VAL]], [[SUB]]
-; GFX9: v_pk_add_u16 [[ADD:v[0-9]+]], [[MAX]], 2
+; GFX9: v_pk_sub_u16 [[ADD:v[0-9]+]], [[MAX]], -2 op_sel_hi:[1,0]
define amdgpu_kernel void @v_abs_v2i16_2(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %src) #0 {
%z0 = insertelement <2 x i16> undef, i16 0, i16 0
%z1 = insertelement <2 x i16> %z0, i16 0, i16 1
@@ -109,8 +109,8 @@ define amdgpu_kernel void @v_abs_v2i16_2
; GFX9-DAG: v_pk_sub_i16 [[SUB1:v[0-9]+]], 0, s[[VAL1]]
; GFX9-DAG: v_pk_max_i16 [[MAX0:v[0-9]+]], s[[VAL0]], [[SUB0]]
; GFX9-DAG: v_pk_max_i16 [[MAX1:v[0-9]+]], s[[VAL1]], [[SUB1]]
-; GFX9-DAG: v_pk_add_u16 [[ADD0:v[0-9]+]], [[MAX0]], 2 op_sel_hi:[1,0]
-; GFX9-DAG: v_pk_add_u16 [[ADD1:v[0-9]+]], [[MAX1]], 2 op_sel_hi:[1,0]
+; GFX9-DAG: v_pk_sub_u16 [[ADD0:v[0-9]+]], [[MAX0]], -2 op_sel_hi:[1,0]
+; GFX9-DAG: v_pk_sub_u16 [[ADD1:v[0-9]+]], [[MAX1]], -2 op_sel_hi:[1,0]
define amdgpu_kernel void @s_abs_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %val) #0 {
%z0 = insertelement <4 x i16> undef, i16 0, i16 0
%z1 = insertelement <4 x i16> %z0, i16 0, i16 1
@@ -133,11 +133,11 @@ define amdgpu_kernel void @s_abs_v4i16(<
; GFX9-DAG: v_pk_sub_i16 [[SUB0:v[0-9]+]], 0, v[[VAL0]]
; GFX9-DAG: v_pk_max_i16 [[MAX0:v[0-9]+]], v[[VAL0]], [[SUB0]]
-; GFX9-DAG: v_pk_add_u16 [[ADD0:v[0-9]+]], [[MAX0]], 2
+; GFX9-DAG: v_pk_sub_u16 [[ADD0:v[0-9]+]], [[MAX0]], -2 op_sel_hi:[1,0]
; GFX9-DAG: v_pk_sub_i16 [[SUB1:v[0-9]+]], 0, v[[VAL1]]
; GFX9-DAG: v_pk_max_i16 [[MAX1:v[0-9]+]], v[[VAL1]], [[SUB1]]
-; GFX9-DAG: v_pk_add_u16 [[ADD1:v[0-9]+]], [[MAX1]], 2
+; GFX9-DAG: v_pk_sub_u16 [[ADD1:v[0-9]+]], [[MAX1]], -2 op_sel_hi:[1,0]
define amdgpu_kernel void @v_abs_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %src) #0 {
%z0 = insertelement <4 x i16> undef, i16 0, i16 0
%z1 = insertelement <4 x i16> %z0, i16 0, i16 1
More information about the llvm-commits
mailing list