[llvm] r363899 - AMDGPU: Undo sub x, c canonicalization for v2i16

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Wed Jun 19 16:37:43 PDT 2019


Author: arsenm
Date: Wed Jun 19 16:37:43 2019
New Revision: 363899

URL: http://llvm.org/viewvc/llvm-project?rev=363899&view=rev
Log:
AMDGPU: Undo sub x, c canonicalization for v2i16

Should avoid regression from D62341

Modified:
    llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
    llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
    llvm/trunk/lib/Target/AMDGPU/VOP3PInstructions.td
    llvm/trunk/test/CodeGen/AMDGPU/add.v2i16.ll
    llvm/trunk/test/CodeGen/AMDGPU/chain-hi-to-lo.ll
    llvm/trunk/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll
    llvm/trunk/test/CodeGen/AMDGPU/sminmax.v2i16.ll

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp?rev=363899&r1=363898&r2=363899&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp Wed Jun 19 16:37:43 2019
@@ -67,6 +67,42 @@ class R600InstrInfo;
 
 namespace {
 
+static bool getConstantValue(SDValue N, uint32_t &Out) {
+  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
+    Out = C->getAPIntValue().getSExtValue();
+    return true;
+  }
+
+  if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) {
+    Out = C->getValueAPF().bitcastToAPInt().getSExtValue();
+    return true;
+  }
+
+  return false;
+}
+
+// TODO: Handle undef as zero
+static SDNode *packConstantV2I16(const SDNode *N, SelectionDAG &DAG,
+                                 bool Negate = false) {
+  assert(N->getOpcode() == ISD::BUILD_VECTOR && N->getNumOperands() == 2);
+  uint32_t LHSVal, RHSVal;
+  if (getConstantValue(N->getOperand(0), LHSVal) &&
+      getConstantValue(N->getOperand(1), RHSVal)) {
+    SDLoc SL(N);
+    uint32_t K = Negate ?
+      (-LHSVal & 0xffff) | (-RHSVal << 16) :
+      (LHSVal & 0xffff) | (RHSVal << 16);
+    return DAG.getMachineNode(AMDGPU::S_MOV_B32, SL, N->getValueType(0),
+                              DAG.getTargetConstant(K, SL, MVT::i32));
+  }
+
+  return nullptr;
+}
+
+static SDNode *packNegConstantV2I16(const SDNode *N, SelectionDAG &DAG) {
+  return packConstantV2I16(N, DAG, true);
+}
+
 /// AMDGPU specific code to select AMDGPU machine instructions for
 /// SelectionDAG operations.
 class AMDGPUDAGToDAGISel : public SelectionDAGISel {
@@ -104,7 +140,11 @@ protected:
 private:
   std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const;
   bool isNoNanSrc(SDValue N) const;
-  bool isInlineImmediate(const SDNode *N) const;
+  bool isInlineImmediate(const SDNode *N, bool Negated = false) const;
+  bool isNegInlineImmediate(const SDNode *N) const {
+    return isInlineImmediate(N, true);
+  }
+
   bool isVGPRImm(const SDNode *N) const;
   bool isUniformLoad(const SDNode *N) const;
   bool isUniformBr(const SDNode *N) const;
@@ -437,14 +477,25 @@ bool AMDGPUDAGToDAGISel::isNoNanSrc(SDVa
   return CurDAG->isKnownNeverNaN(N);
 }
 
-bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const {
-  const SIInstrInfo *TII = Subtarget->getInstrInfo();
+bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N,
+                                           bool Negated) const {
+  // TODO: Handle undef
 
-  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
-    return TII->isInlineConstant(C->getAPIntValue());
+  const SIInstrInfo *TII = Subtarget->getInstrInfo();
+  if (Negated) {
+    if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
+      return TII->isInlineConstant(-C->getAPIntValue());
+
+    if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
+      return TII->isInlineConstant(-C->getValueAPF().bitcastToAPInt());
+
+  } else {
+    if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
+      return TII->isInlineConstant(C->getAPIntValue());
 
-  if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
-    return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt());
+    if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
+      return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt());
+  }
 
   return false;
 }
@@ -563,20 +614,6 @@ static unsigned selectSGPRVectorRegClass
   llvm_unreachable("invalid vector size");
 }
 
-static bool getConstantValue(SDValue N, uint32_t &Out) {
-  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
-    Out = C->getAPIntValue().getZExtValue();
-    return true;
-  }
-
-  if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) {
-    Out = C->getValueAPF().bitcastToAPInt().getZExtValue();
-    return true;
-  }
-
-  return false;
-}
-
 void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
   EVT VT = N->getValueType(0);
   unsigned NumVectorElts = VT.getVectorNumElements();
@@ -685,12 +722,8 @@ void AMDGPUDAGToDAGISel::Select(SDNode *
     unsigned NumVectorElts = VT.getVectorNumElements();
     if (VT.getScalarSizeInBits() == 16) {
       if (Opc == ISD::BUILD_VECTOR && NumVectorElts == 2) {
-        uint32_t LHSVal, RHSVal;
-        if (getConstantValue(N->getOperand(0), LHSVal) &&
-            getConstantValue(N->getOperand(1), RHSVal)) {
-          uint32_t K = LHSVal | (RHSVal << 16);
-          CurDAG->SelectNodeTo(N, AMDGPU::S_MOV_B32, VT,
-                               CurDAG->getTargetConstant(K, SDLoc(N), MVT::i32));
+        if (SDNode *Packed = packConstantV2I16(N, *CurDAG)) {
+          ReplaceNode(N, Packed);
           return;
         }
       }

Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td?rev=363899&r1=363898&r2=363899&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td Wed Jun 19 16:37:43 2019
@@ -606,6 +606,24 @@ def ShiftAmt32Imm : PatLeaf <(imm), [{
   return N->getZExtValue() < 32;
 }]>;
 
+def getNegV2I16Imm : SDNodeXForm<build_vector, [{
+  return SDValue(packNegConstantV2I16(N, *CurDAG), 0);
+}]>;
+
+
+// TODO: Handle undef as 0
+def NegSubInlineConstV216 : PatLeaf<(build_vector), [{
+  assert(N->getNumOperands() == 2);
+  assert(N->getOperand(0).getValueType().getSizeInBits() == 16);
+  SDValue Src0 = N->getOperand(0);
+  SDValue Src1 = N->getOperand(1);
+  if (Src0 == Src1)
+    return isNegInlineImmediate(Src0.getNode());
+
+  return (isNullConstant(Src0) && isNegInlineImmediate(Src1.getNode())) ||
+         (isNullConstant(Src1) && isNegInlineImmediate(Src0.getNode()));
+}], getNegV2I16Imm>;
+
 //===----------------------------------------------------------------------===//
 // Custom Operands
 //===----------------------------------------------------------------------===//

Modified: llvm/trunk/lib/Target/AMDGPU/VOP3PInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/VOP3PInstructions.td?rev=363899&r1=363898&r2=363899&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/VOP3PInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/VOP3PInstructions.td Wed Jun 19 16:37:43 2019
@@ -69,6 +69,16 @@ def V_PK_LSHLREV_B16 : VOP3PInst<"v_pk_l
 def V_PK_ASHRREV_I16 : VOP3PInst<"v_pk_ashrrev_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, ashr_rev>;
 def V_PK_LSHRREV_B16 : VOP3PInst<"v_pk_lshrrev_b16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, lshr_rev>;
 
+
+// Undo sub x, c -> add x, -c canonicalization since c is more likely
+// an inline immediate than -c.
+// The constant will be emitted as a mov, and folded later.
+// TODO: We could directly encode the immediate now
+def : GCNPat<
+  (add (v2i16 (VOP3PMods0 v2i16:$src0, i32:$src0_modifiers, i1:$clamp)), NegSubInlineConstV216:$src1),
+  (V_PK_SUB_U16 $src0_modifiers, $src0, SRCMODS.OP_SEL_1, NegSubInlineConstV216:$src1, $clamp)
+>;
+
 multiclass MadFmaMixPats<SDPatternOperator fma_like,
                          Instruction mix_inst,
                          Instruction mixlo_inst,

Modified: llvm/trunk/test/CodeGen/AMDGPU/add.v2i16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/add.v2i16.ll?rev=363899&r1=363898&r2=363899&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/add.v2i16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/add.v2i16.ll Wed Jun 19 16:37:43 2019
@@ -103,7 +103,7 @@ define amdgpu_kernel void @v_test_add_v2
 }
 
 ; GCN-LABEL: {{^}}v_test_add_v2i16_inline_neg1:
-; GFX9: v_pk_add_u16 v{{[0-9]+}}, v{{[0-9]+}}, -1 op_sel_hi:[1,0]{{$}}
+; GFX9: v_pk_sub_u16 v{{[0-9]+}}, v{{[0-9]+}}, 1 op_sel_hi:[1,0]{{$}}
 
 ; VI: v_mov_b32_e32 v[[SCONST:[0-9]+]], -1
 ; VI: flat_load_dword [[LOAD:v[0-9]+]]

Modified: llvm/trunk/test/CodeGen/AMDGPU/chain-hi-to-lo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/chain-hi-to-lo.ll?rev=363899&r1=363898&r2=363899&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/chain-hi-to-lo.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/chain-hi-to-lo.ll Wed Jun 19 16:37:43 2019
@@ -181,7 +181,7 @@ entry:
 ; GCN-LABEL: {{^}}chain_hi_to_lo_group_other_dep:
 ; GFX900: ds_read_u16_d16_hi v1, v0
 ; GFX900-NEXT: s_waitcnt lgkmcnt(0)
-; GFX900-NEXT: v_pk_add_u16 v1, v1, 12 op_sel_hi:[1,0]
+; GFX900-NEXT: v_pk_sub_u16 v1, v1, -12 op_sel_hi:[1,0]
 ; GFX900-NEXT: ds_read_u16_d16 v1, v0 offset:2
 ; GFX900-NEXT: s_waitcnt lgkmcnt(0)
 ; GFX900-NEXT: v_mov_b32_e32 v0, v1
@@ -204,7 +204,7 @@ bb:
 ; GFX900-NEXT: ds_read_u16_d16_hi v0, v0
 ; GFX900-NEXT: v_mov_b32_e32 [[MASK:v[0-9]+]], 0xffff
 ; GFX900-NEXT: s_waitcnt lgkmcnt(0)
-; GFX900-NEXT: v_pk_add_u16 v0, v0, 12 op_sel_hi:[1,0]
+; GFX900-NEXT: v_pk_sub_u16 v0, v0, -12 op_sel_hi:[1,0]
 ; GFX900-NEXT: v_bfi_b32 v0, [[MASK]], v1, v0
 ; GFX900-NEXT: s_setpc_b64
 define <2 x i16> @chain_hi_to_lo_group_other_dep_multi_chain(i16 addrspace(3)* %ptr) {
@@ -222,7 +222,7 @@ bb:
 ; GCN-LABEL: {{^}}chain_hi_to_lo_private_other_dep:
 ; GFX900: buffer_load_short_d16_hi v1, v0, s[0:3], s4 offen
 ; GFX900-NEXT: s_waitcnt vmcnt(0)
-; GFX900-NEXT: v_pk_add_u16 v1, v1, 12 op_sel_hi:[1,0]
+; GFX900-NEXT: v_pk_sub_u16 v1, v1, -12 op_sel_hi:[1,0]
 ; GFX900-NEXT: buffer_load_short_d16 v1, v0, s[0:3], s4 offen offset:2
 ; GFX900-NEXT: s_waitcnt vmcnt(0)
 ; GFX900-NEXT: v_mov_b32_e32 v0, v1
@@ -244,7 +244,7 @@ bb:
 ; GFX900-NEXT: global_load_short_d16_hi v0, v[0:1], off
 ; GFX900-NEXT: v_mov_b32_e32 [[MASK:v[0-9]+]], 0xffff
 ; GFX900-NEXT: s_waitcnt vmcnt(0)
-; GFX900-NEXT: v_pk_add_u16 v0, v0, 12 op_sel_hi:[1,0]
+; GFX900-NEXT: v_pk_sub_u16 v0, v0, -12 op_sel_hi:[1,0]
 ; GFX900-NEXT: v_bfi_b32 v0, [[MASK]], v2, v0
 ; GFX900-NEXT: s_setpc_b64
 define <2 x i16> @chain_hi_to_lo_global_other_dep(i16 addrspace(1)* %ptr) {
@@ -264,7 +264,7 @@ bb:
 ; GFX900-NEXT: flat_load_short_d16_hi v0, v[0:1]
 ; GFX900-NEXT: v_mov_b32_e32 [[MASK:v[0-9]+]], 0xffff
 ; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_pk_add_u16 v0, v0, 12 op_sel_hi:[1,0]
+; GFX900-NEXT: v_pk_sub_u16 v0, v0, -12 op_sel_hi:[1,0]
 ; GFX900-NEXT: v_bfi_b32 v0, v1, v2, v0
 ; GFX900-NEXT: s_setpc_b64
 define <2 x i16> @chain_hi_to_lo_flat_other_dep(i16 addrspace(0)* %ptr) {

Modified: llvm/trunk/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll?rev=363899&r1=363898&r2=363899&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll Wed Jun 19 16:37:43 2019
@@ -1233,7 +1233,6 @@ define amdgpu_kernel void @v_test_v2i16_
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
-; GFX9-NEXT:    s_movk_i32 s4, 0xffe0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, s2, v2
@@ -1243,7 +1242,7 @@ define amdgpu_kernel void @v_test_v2i16_
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_pk_add_u16 v2, v3, s4 op_sel_hi:[1,0]
+; GFX9-NEXT:    v_pk_sub_u16 v2, v3, 32 op_sel_hi:[1,0]
 ; GFX9-NEXT:    global_store_dword v[0:1], v2, off
 ; GFX9-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -1296,7 +1295,6 @@ define amdgpu_kernel void @v_test_v2i16_
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
-; GFX9-NEXT:    s_mov_b32 s4, 0xffe00000
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, s2, v2
@@ -1306,7 +1304,7 @@ define amdgpu_kernel void @v_test_v2i16_
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_pk_add_u16 v2, v3, s4
+; GFX9-NEXT:    v_pk_sub_u16 v2, v3, 32 op_sel:[0,1] op_sel_hi:[1,0]
 ; GFX9-NEXT:    global_store_dword v[0:1], v2, off
 ; GFX9-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -1362,7 +1360,6 @@ define amdgpu_kernel void @v_test_v2i16_
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
-; GFX9-NEXT:    s_mov_b32 s4, 0xffe0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, s2, v2
@@ -1372,7 +1369,7 @@ define amdgpu_kernel void @v_test_v2i16_
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_pk_add_u16 v2, v3, s4
+; GFX9-NEXT:    v_pk_sub_u16 v2, v3, 32
 ; GFX9-NEXT:    global_store_dword v[0:1], v2, off
 ; GFX9-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -1440,7 +1437,7 @@ define amdgpu_kernel void @v_test_v2i16_
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_pk_add_u16 v2, v3, -16 op_sel_hi:[1,0]
+; GFX9-NEXT:    v_pk_sub_u16 v2, v3, 16 op_sel_hi:[1,0]
 ; GFX9-NEXT:    global_store_dword v[0:1], v2, off
 ; GFX9-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -1502,7 +1499,7 @@ define amdgpu_kernel void @v_test_v2i16_
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_pk_add_u16 v2, v3, -16 op_sel:[0,1] op_sel_hi:[1,0]
+; GFX9-NEXT:    v_pk_sub_u16 v2, v3, 16 op_sel:[0,1] op_sel_hi:[1,0]
 ; GFX9-NEXT:    global_store_dword v[0:1], v2, off
 ; GFX9-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -1567,7 +1564,7 @@ define amdgpu_kernel void @v_test_v2i16_
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_pk_add_u16 v2, v3, -16
+; GFX9-NEXT:    v_pk_sub_u16 v2, v3, 16
 ; GFX9-NEXT:    global_store_dword v[0:1], v2, off
 ; GFX9-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -1634,7 +1631,7 @@ define amdgpu_kernel void @v_test_v2i16_
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_pk_add_u16 v2, v3, -4.0 op_sel_hi:[1,0]
+; GFX9-NEXT:    v_pk_sub_u16 v2, v3, 1.0 op_sel_hi:[1,0]
 ; GFX9-NEXT:    global_store_dword v[0:1], v2, off
 ; GFX9-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -1701,7 +1698,7 @@ define amdgpu_kernel void @v_test_v2i16_
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_pk_add_u16 v2, v3, 4.0 op_sel_hi:[1,0]
+; GFX9-NEXT:    v_pk_sub_u16 v2, v3, -1.0 op_sel_hi:[1,0]
 ; GFX9-NEXT:    global_store_dword v[0:1], v2, off
 ; GFX9-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -1768,7 +1765,7 @@ define amdgpu_kernel void @v_test_v2i16_
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_pk_add_u16 v2, v3, 2.0 op_sel_hi:[1,0]
+; GFX9-NEXT:    v_pk_sub_u16 v2, v3, -2.0 op_sel_hi:[1,0]
 ; GFX9-NEXT:    global_store_dword v[0:1], v2, off
 ; GFX9-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -1835,7 +1832,7 @@ define amdgpu_kernel void @v_test_v2i16_
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_pk_add_u16 v2, v3, -2.0 op_sel_hi:[1,0]
+; GFX9-NEXT:    v_pk_sub_u16 v2, v3, 2.0 op_sel_hi:[1,0]
 ; GFX9-NEXT:    global_store_dword v[0:1], v2, off
 ; GFX9-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()

Modified: llvm/trunk/test/CodeGen/AMDGPU/sminmax.v2i16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sminmax.v2i16.ll?rev=363899&r1=363898&r2=363899&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sminmax.v2i16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sminmax.v2i16.ll Wed Jun 19 16:37:43 2019
@@ -6,7 +6,7 @@
 ; GFX9: s_load_dword [[VAL:s[0-9]+]]
 ; GFX9: v_pk_sub_i16 [[SUB:v[0-9]+]], 0, [[VAL]]
 ; GFX9: v_pk_max_i16 [[MAX:v[0-9]+]], [[VAL]], [[SUB]]
-; GFX9: v_pk_add_u16 [[ADD:v[0-9]+]], [[MAX]], 2
+; GFX9: v_pk_sub_u16 [[ADD:v[0-9]+]], [[MAX]], -2 op_sel_hi:[1,0]
 
 ; CIVI: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 16
 ; CIVI: s_sub_i32
@@ -30,7 +30,7 @@ define amdgpu_kernel void @s_abs_v2i16(<
 ; GFX9: global_load_dword [[VAL:v[0-9]+]]
 ; GFX9: v_pk_sub_i16 [[SUB:v[0-9]+]], 0, [[VAL]]
 ; GFX9: v_pk_max_i16 [[MAX:v[0-9]+]], [[VAL]], [[SUB]]
-; GFX9: v_pk_add_u16 [[ADD:v[0-9]+]], [[MAX]], 2
+; GFX9: v_pk_sub_u16 [[ADD:v[0-9]+]], [[MAX]], -2 op_sel_hi:[1,0]
 
 ; VI: v_mov_b32_e32 [[TWO:v[0-9]+]], 2
 ; VI: v_lshrrev_b32_e32 v{{[0-9]+}}, 16,
@@ -70,7 +70,7 @@ define amdgpu_kernel void @v_abs_v2i16(<
 ; GFX9: s_load_dword [[VAL:s[0-9]+]]
 ; GFX9: v_pk_sub_i16 [[SUB:v[0-9]+]], 0, [[VAL]]
 ; GFX9: v_pk_max_i16 [[MAX:v[0-9]+]], [[VAL]], [[SUB]]
-; GFX9: v_pk_add_u16 [[ADD:v[0-9]+]], [[MAX]], 2
+; GFX9: v_pk_sub_u16 [[ADD:v[0-9]+]], [[MAX]], -2 op_sel_hi:[1,0]
 define amdgpu_kernel void @s_abs_v2i16_2(<2 x i16> addrspace(1)* %out, <2 x i16> %val) #0 {
   %z0 = insertelement <2 x i16> undef, i16 0, i16 0
   %z1 = insertelement <2 x i16> %z0, i16 0, i16 1
@@ -88,7 +88,7 @@ define amdgpu_kernel void @s_abs_v2i16_2
 ; GFX9: buffer_load_dword [[VAL:v[0-9]+]]
 ; GFX9: v_pk_sub_i16 [[SUB:v[0-9]+]], 0, [[VAL]]
 ; GFX9: v_pk_max_i16 [[MAX:v[0-9]+]], [[VAL]], [[SUB]]
-; GFX9: v_pk_add_u16 [[ADD:v[0-9]+]], [[MAX]], 2
+; GFX9: v_pk_sub_u16 [[ADD:v[0-9]+]], [[MAX]], -2 op_sel_hi:[1,0]
 define amdgpu_kernel void @v_abs_v2i16_2(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %src) #0 {
   %z0 = insertelement <2 x i16> undef, i16 0, i16 0
   %z1 = insertelement <2 x i16> %z0, i16 0, i16 1
@@ -109,8 +109,8 @@ define amdgpu_kernel void @v_abs_v2i16_2
 ; GFX9-DAG: v_pk_sub_i16 [[SUB1:v[0-9]+]], 0, s[[VAL1]]
 ; GFX9-DAG: v_pk_max_i16 [[MAX0:v[0-9]+]], s[[VAL0]], [[SUB0]]
 ; GFX9-DAG: v_pk_max_i16 [[MAX1:v[0-9]+]], s[[VAL1]], [[SUB1]]
-; GFX9-DAG: v_pk_add_u16 [[ADD0:v[0-9]+]], [[MAX0]], 2 op_sel_hi:[1,0]
-; GFX9-DAG: v_pk_add_u16 [[ADD1:v[0-9]+]], [[MAX1]], 2 op_sel_hi:[1,0]
+; GFX9-DAG: v_pk_sub_u16 [[ADD0:v[0-9]+]], [[MAX0]], -2 op_sel_hi:[1,0]
+; GFX9-DAG: v_pk_sub_u16 [[ADD1:v[0-9]+]], [[MAX1]], -2 op_sel_hi:[1,0]
 define amdgpu_kernel void @s_abs_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %val) #0 {
   %z0 = insertelement <4 x i16> undef, i16 0, i16 0
   %z1 = insertelement <4 x i16> %z0, i16 0, i16 1
@@ -133,11 +133,11 @@ define amdgpu_kernel void @s_abs_v4i16(<
 
 ; GFX9-DAG: v_pk_sub_i16 [[SUB0:v[0-9]+]], 0, v[[VAL0]]
 ; GFX9-DAG: v_pk_max_i16 [[MAX0:v[0-9]+]], v[[VAL0]], [[SUB0]]
-; GFX9-DAG: v_pk_add_u16 [[ADD0:v[0-9]+]], [[MAX0]], 2
+; GFX9-DAG: v_pk_sub_u16 [[ADD0:v[0-9]+]], [[MAX0]], -2 op_sel_hi:[1,0]
 
 ; GFX9-DAG: v_pk_sub_i16 [[SUB1:v[0-9]+]], 0, v[[VAL1]]
 ; GFX9-DAG: v_pk_max_i16 [[MAX1:v[0-9]+]], v[[VAL1]], [[SUB1]]
-; GFX9-DAG: v_pk_add_u16 [[ADD1:v[0-9]+]], [[MAX1]], 2
+; GFX9-DAG: v_pk_sub_u16 [[ADD1:v[0-9]+]], [[MAX1]], -2 op_sel_hi:[1,0]
 define amdgpu_kernel void @v_abs_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %src) #0 {
   %z0 = insertelement <4 x i16> undef, i16 0, i16 0
   %z1 = insertelement <4 x i16> %z0, i16 0, i16 1




More information about the llvm-commits mailing list