[llvm] 40163f1 - [x86] add special-case lowering for usubsat for AVX512

Sanjay Patel via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 20 13:41:19 PDT 2021


Author: Sanjay Patel
Date: 2021-10-20T16:41:13-04:00
New Revision: 40163f1df8c60f987e8adc0cb78edb289f73b771

URL: https://github.com/llvm/llvm-project/commit/40163f1df8c60f987e8adc0cb78edb289f73b771
DIFF: https://github.com/llvm/llvm-project/commit/40163f1df8c60f987e8adc0cb78edb289f73b771.diff

LOG: [x86] add special-case lowering for usubsat for AVX512

This is a small extension of D112095 to avoid another regression
seen with D112085.
In this case, we allow the same conversion from usubsat to ALU
ops if the target supports vpternlog.

That pattern will get converted later in X86DAGToDAGISel::tryVPTERNLOG().
This seems better than putting a magic immediate constant directly in
this code to create the exact vpternlog that we need. It's possible that
there are other special-cases along these lines, so we should try to
keep all of the vpternlog magic in one place.

Differential Revision: https://reviews.llvm.org/D112138

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/psubus.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index ba6cc40562087..75820c68b1600 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -28139,26 +28139,32 @@ static SDValue LowerADDSAT_SUBSAT(SDValue Op, SelectionDAG &DAG,
       TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
 
   unsigned BitWidth = VT.getScalarSizeInBits();
-  if (Opcode == ISD::USUBSAT && !TLI.isOperationLegal(ISD::UMAX, VT)) {
-    // Handle a special-case with a bit-hack instead of cmp+select:
-    // usubsat X, SMIN --> (X ^ SMIN) & (X s>> BW-1)
-    ConstantSDNode *C = isConstOrConstSplat(Y, true);
-    if (C && C->getAPIntValue().isSignMask()) {
-      SDValue SignMask = DAG.getConstant(C->getAPIntValue(), DL, VT);
-      SDValue ShiftAmt = DAG.getConstant(BitWidth - 1, DL, VT);
-      SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, X, SignMask);
-      SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShiftAmt);
-      return DAG.getNode(ISD::AND, DL, VT, Xor, Sra);
-    }
-
-    // usubsat X, Y --> (X >u Y) ? X - Y : 0
-    SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, X, Y);
-    SDValue Cmp = DAG.getSetCC(DL, SetCCResultType, X, Y, ISD::SETUGT);
-    // TODO: Move this to DAGCombiner?
-    if (SetCCResultType == VT &&
-        DAG.ComputeNumSignBits(Cmp) == VT.getScalarSizeInBits())
-      return DAG.getNode(ISD::AND, DL, VT, Cmp, Sub);
-    return DAG.getSelect(DL, VT, Cmp, Sub, DAG.getConstant(0, DL, VT));
+  if (Opcode == ISD::USUBSAT) {
+    if (!TLI.isOperationLegal(ISD::UMAX, VT) || useVPTERNLOG(Subtarget, VT)) {
+      // Handle a special-case with a bit-hack instead of cmp+select:
+      // usubsat X, SMIN --> (X ^ SMIN) & (X s>> BW-1)
+      // If the target can use VPTERNLOG, DAGToDAG will match this as
+      // "vpsra + vpternlog" which is better than "vpmax + vpsub" with a
+      // "broadcast" constant load.
+      ConstantSDNode *C = isConstOrConstSplat(Y, true);
+      if (C && C->getAPIntValue().isSignMask()) {
+        SDValue SignMask = DAG.getConstant(C->getAPIntValue(), DL, VT);
+        SDValue ShiftAmt = DAG.getConstant(BitWidth - 1, DL, VT);
+        SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, X, SignMask);
+        SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShiftAmt);
+        return DAG.getNode(ISD::AND, DL, VT, Xor, Sra);
+      }
+    }
+    if (!TLI.isOperationLegal(ISD::UMAX, VT)) {
+      // usubsat X, Y --> (X >u Y) ? X - Y : 0
+      SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, X, Y);
+      SDValue Cmp = DAG.getSetCC(DL, SetCCResultType, X, Y, ISD::SETUGT);
+      // TODO: Move this to DAGCombiner?
+      if (SetCCResultType == VT &&
+          DAG.ComputeNumSignBits(Cmp) == VT.getScalarSizeInBits())
+        return DAG.getNode(ISD::AND, DL, VT, Cmp, Sub);
+      return DAG.getSelect(DL, VT, Cmp, Sub, DAG.getConstant(0, DL, VT));
+    }
   }
 
   if ((Opcode == ISD::SADDSAT || Opcode == ISD::SSUBSAT) &&

diff  --git a/llvm/test/CodeGen/X86/psubus.ll b/llvm/test/CodeGen/X86/psubus.ll
index 9de480ab9d1df..046818c86716c 100644
--- a/llvm/test/CodeGen/X86/psubus.ll
+++ b/llvm/test/CodeGen/X86/psubus.ll
@@ -162,9 +162,8 @@ define <4 x i32> @usubsat_custom(<4 x i32> %x) nounwind {
 ;
 ; AVX512-LABEL: usubsat_custom:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
-; AVX512-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
+; AVX512-NEXT:    vpsrad $31, %xmm0, %xmm1
+; AVX512-NEXT:    vpternlogd $72, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm0
 ; AVX512-NEXT:    retq
   %res = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %x, <4 x i32> <i32 2147483648, i32 2147483648, i32 2147483648, i32 undef>)
   ret <4 x i32> %res


        


More information about the llvm-commits mailing list