[llvm] de2378b - [X86] Fix a KNL miscompile caused by combineSetCC swapping LHS/RHS variables before a later use.

Fri Dec 20 11:24:58 PST 2019

Author: Craig Topper
Date: 2019-12-20T11:24:45-08:00
New Revision: de2378b4f3c4c88eae412a30a8bfaec82f54d1b7

URL: https://github.com/llvm/llvm-project/commit/de2378b4f3c4c88eae412a30a8bfaec82f54d1b7
DIFF: https://github.com/llvm/llvm-project/commit/de2378b4f3c4c88eae412a30a8bfaec82f54d1b7.diff

LOG: [X86] Fix a KNL miscompile caused by combineSetCC swapping LHS/RHS variables before a later use.

The setcc operands are copied into LHS and RHS variables at the top of the function. We also capture the condition code.

A later piece of code swaps the operands and changing the CC variable as part of a canonicalization to make some other checks simpler. But we might not make the transform we canonicalized for. So we continue on through the function where we can use the swapped LHS/RHS variables and access the original condition code operand instead of the modified CC variable. This leads to a setcc being created with the original condition code, but with swapped operands.

To mitigate this, this patch does a couple things. The LHS/RHS/CC variables are made const to keep them from being modified like this again. The transform that needs the swap now uses temporary copies of the variables. And the transform that used the original condition code operand has been altered to use the CC variable we cached originally. Either of these changes are enough to fix the issue, but doing both to make this code very safe.

I also considered rewriting the swap code in some way to check both permutations without explicitly swapping or needing temporary variables, but held off on that.

Differential Revision: https://reviews.llvm.org/D71736

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/avx512-vec-cmp.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 911912883441..54a64052ebb6 100644

--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -43316,9 +43316,9 @@ static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG,
 
 static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG,
                             const X86Subtarget &Subtarget) {
-  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
-  SDValue LHS = N->getOperand(0);
-  SDValue RHS = N->getOperand(1);
+  const ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+  const SDValue LHS = N->getOperand(0);
+  const SDValue RHS = N->getOperand(1);
   EVT VT = N->getValueType(0);
   EVT OpVT = LHS.getValueType();
   SDLoc DL(N);
@@ -43345,30 +43345,35 @@ static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG,
 
   if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
       (CC == ISD::SETNE || CC == ISD::SETEQ || ISD::isSignedIntSetCC(CC))) {
-    // Put build_vectors on the right.
-    if (LHS.getOpcode() == ISD::BUILD_VECTOR) {
-      std::swap(LHS, RHS);
-      CC = ISD::getSetCCSwappedOperands(CC);
+    // Using temporaries to avoid messing up operand ordering for later
+    // transformations if this doesn't work.
+    SDValue Op0 = LHS;
+    SDValue Op1 = RHS;
+    ISD::CondCode TmpCC = CC;
+    // Put build_vector on the right.
+    if (Op0.getOpcode() == ISD::BUILD_VECTOR) {
+      std::swap(Op0, Op1);
+      TmpCC = ISD::getSetCCSwappedOperands(TmpCC);
     }
 
     bool IsSEXT0 =
-        (LHS.getOpcode() == ISD::SIGN_EXTEND) &&
-        (LHS.getOperand(0).getValueType().getVectorElementType() == MVT::i1);
-    bool IsVZero1 = ISD::isBuildVectorAllZeros(RHS.getNode());
+        (Op0.getOpcode() == ISD::SIGN_EXTEND) &&
+        (Op0.getOperand(0).getValueType().getVectorElementType() == MVT::i1);
+    bool IsVZero1 = ISD::isBuildVectorAllZeros(Op1.getNode());
 
     if (IsSEXT0 && IsVZero1) {
-      assert(VT == LHS.getOperand(0).getValueType() &&
+      assert(VT == Op0.getOperand(0).getValueType() &&
              "Uexpected operand type");
-      if (CC == ISD::SETGT)
+      if (TmpCC == ISD::SETGT)
         return DAG.getConstant(0, DL, VT);
-      if (CC == ISD::SETLE)
+      if (TmpCC == ISD::SETLE)
         return DAG.getConstant(1, DL, VT);
-      if (CC == ISD::SETEQ || CC == ISD::SETGE)
-        return DAG.getNOT(DL, LHS.getOperand(0), VT);
+      if (TmpCC == ISD::SETEQ || TmpCC == ISD::SETGE)
+        return DAG.getNOT(DL, Op0.getOperand(0), VT);
 
-      assert((CC == ISD::SETNE || CC == ISD::SETLT) &&
+      assert((TmpCC == ISD::SETNE || TmpCC == ISD::SETLT) &&
              "Unexpected condition code!");
-      return LHS.getOperand(0);
+      return Op0.getOperand(0);
     }
   }
 
@@ -43381,8 +43386,7 @@ static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG,
       VT.getVectorElementType() == MVT::i1 &&
       (OpVT.getVectorElementType() == MVT::i8 ||
        OpVT.getVectorElementType() == MVT::i16)) {
-    SDValue Setcc = DAG.getNode(ISD::SETCC, DL, OpVT, LHS, RHS,
-                                N->getOperand(2));
+    SDValue Setcc = DAG.getSetCC(DL, OpVT, LHS, RHS, CC);
     return DAG.getNode(ISD::TRUNCATE, DL, VT, Setcc);
   }
 

diff  --git a/llvm/test/CodeGen/X86/avx512-vec-cmp.ll b/llvm/test/CodeGen/X86/avx512-vec-cmp.ll
index bc5b396a421f..7abcabe9c055 100644
--- a/llvm/test/CodeGen/X86/avx512-vec-cmp.ll
+++ b/llvm/test/CodeGen/X86/avx512-vec-cmp.ll
@@ -1532,8 +1532,7 @@ entry:
   ret void
 }
 
-; FIXME: This test is currently miscompiled for KNL with the operands of the
-; vcmpgtb swapped.
+; This test used to end up with the vpcmpgtb on KNL having its operands in the wrong order.
 define <8 x i64> @cmp_swap_bug(<16 x i8>* %x, <8 x i64> %y, <8 x i64> %z) {
 ; KNL-LABEL: cmp_swap_bug:
 ; KNL:       ## %bb.0: ## %entry
@@ -1542,7 +1541,7 @@ define <8 x i64> @cmp_swap_bug(<16 x i8>* %x, <8 x i64> %y, <8 x i64> %z) {
 ; KNL-NEXT:    ## encoding: [0xc4,0xe2,0x69,0x00,0x15,A,A,A,A]
 ; KNL-NEXT:    ## fixup A - offset: 5, value: LCPI69_0-4, kind: reloc_riprel_4byte
 ; KNL-NEXT:    vpxor %xmm3, %xmm3, %xmm3 ## encoding: [0xc5,0xe1,0xef,0xdb]
-; KNL-NEXT:    vpcmpgtb %xmm3, %xmm2, %xmm2 ## encoding: [0xc5,0xe9,0x64,0xd3]
+; KNL-NEXT:    vpcmpgtb %xmm2, %xmm3, %xmm2 ## encoding: [0xc5,0xe1,0x64,0xd2]
 ; KNL-NEXT:    vpmovsxbd %xmm2, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x21,0xd2]
 ; KNL-NEXT:    vptestmd %zmm2, %zmm2, %k1 ## encoding: [0x62,0xf2,0x6d,0x48,0x27,0xca]
 ; KNL-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]