[llvm] r291806 - [X86] Replace AND+IMM64 with SRL/SHL

Thu Jan 12 11:54:28 PST 2017

Author: n.bozhenov
Date: Thu Jan 12 13:54:27 2017
New Revision: 291806

URL: http://llvm.org/viewvc/llvm-project?rev=291806&view=rev
Log:
[X86] Replace AND+IMM64 with SRL/SHL

Emit SHRQ/SHLQ instead of ANDQ with a 64 bit constant mask if the result
is unused and the mask has only higher/lower bits set. For example, with
this patch LLVM emits

  shrq $41, %rdi
  je

instead of

  movabsq $0xFFFFFE0000000000, %rcx
  testq   %rcx, %rdi
  je

This reduces number of instructions, code size and register pressure.
The transformation is applied only for cases where the mask cannot be
encoded as an immediate value within TESTQ instruction.

Differential Revision: https://reviews.llvm.org/D28198

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/bypass-slow-division-64.ll
    llvm/trunk/test/CodeGen/X86/bypass-slow-division-tune.ll
    llvm/trunk/test/CodeGen/X86/cmp.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=291806&r1=291805&r2=291806&view=diff
==============================================================================

--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Jan 12 13:54:27 2017
@@ -16018,6 +16018,12 @@ SDValue X86TargetLowering::EmitTest(SDVa
       }
   }
 
+  // Sometimes flags can be set either with an AND or with an SRL/SHL
+  // instruction. SRL/SHL variant should be preferred for masks longer than this
+  // number of bits.
+  const int ShiftToAndMaxMaskWidth = 32;
+  const bool ZeroCheck = (X86CC == X86::COND_E || X86CC == X86::COND_NE);
+
   // NOTICE: In the code below we use ArithOp to hold the arithmetic operation
   // which may be the result of a CAST.  We use the variable 'Op', which is the
   // non-casted variable when we check for possible users.
@@ -16066,7 +16072,7 @@ SDValue X86TargetLowering::EmitTest(SDVa
     // If we have a constant logical shift that's only used in a comparison
     // against zero turn it into an equivalent AND. This allows turning it into
     // a TEST instruction later.
-    if ((X86CC == X86::COND_E || X86CC == X86::COND_NE) && Op->hasOneUse() &&
+    if (ZeroCheck && Op->hasOneUse() &&
         isa<ConstantSDNode>(Op->getOperand(1)) && !hasNonFlagsUse(Op)) {
       EVT VT = Op.getValueType();
       unsigned BitWidth = VT.getSizeInBits();
@@ -16076,7 +16082,7 @@ SDValue X86TargetLowering::EmitTest(SDVa
       APInt Mask = ArithOp.getOpcode() == ISD::SRL
                        ? APInt::getHighBitsSet(BitWidth, BitWidth - ShAmt)
                        : APInt::getLowBitsSet(BitWidth, BitWidth - ShAmt);
-      if (!Mask.isSignedIntN(32)) // Avoid large immediates.
+      if (!Mask.isSignedIntN(ShiftToAndMaxMaskWidth))
         break;
       Op = DAG.getNode(ISD::AND, dl, VT, Op->getOperand(0),
                        DAG.getConstant(Mask, dl, VT));
@@ -16085,18 +16091,59 @@ SDValue X86TargetLowering::EmitTest(SDVa
 
   case ISD::AND:
     // If the primary 'and' result isn't used, don't bother using X86ISD::AND,
-    // because a TEST instruction will be better.
+    // because a TEST instruction will be better. However, AND should be
+    // preferred if the instruction can be combined into ANDN.
     if (!hasNonFlagsUse(Op)) {
       SDValue Op0 = ArithOp->getOperand(0);
       SDValue Op1 = ArithOp->getOperand(1);
       EVT VT = ArithOp.getValueType();
       bool isAndn = isBitwiseNot(Op0) || isBitwiseNot(Op1);
       bool isLegalAndnType = VT == MVT::i32 || VT == MVT::i64;
+      bool isProperAndn = isAndn && isLegalAndnType && Subtarget.hasBMI();
+
+      // If we cannot select an ANDN instruction, check if we can replace
+      // AND+IMM64 with a shift before giving up. This is possible for masks
+      // like 0xFF000000 or 0x00FFFFFF and if we care only about the zero flag.
+      if (!isProperAndn) {
+        if (!ZeroCheck)
+          break;
+
+        assert(!isa<ConstantSDNode>(Op0) && "AND node isn't canonicalized");
+        auto *CN = dyn_cast<ConstantSDNode>(Op1);
+        if (!CN)
+          break;
+
+        const APInt &Mask = CN->getAPIntValue();
+        if (Mask.isSignedIntN(ShiftToAndMaxMaskWidth))
+          break; // Prefer TEST instruction.
+
+        unsigned BitWidth = Mask.getBitWidth();
+        unsigned LeadingOnes = Mask.countLeadingOnes();
+        unsigned TrailingZeros = Mask.countTrailingZeros();
+
+        if (LeadingOnes + TrailingZeros == BitWidth) {
+          assert(TrailingZeros < VT.getSizeInBits() &&
+                 "Shift amount should be less than the type width");
+          MVT ShTy = getScalarShiftAmountTy(DAG.getDataLayout(), VT);
+          SDValue ShAmt = DAG.getConstant(TrailingZeros, dl, ShTy);
+          Op = DAG.getNode(ISD::SRL, dl, VT, Op0, ShAmt);
+          break;
+        }
+
+        unsigned LeadingZeros = Mask.countLeadingZeros();
+        unsigned TrailingOnes = Mask.countTrailingOnes();
+
+        if (LeadingZeros + TrailingOnes == BitWidth) {
+          assert(LeadingZeros < VT.getSizeInBits() &&
+                 "Shift amount should be less than the type width");
+          MVT ShTy = getScalarShiftAmountTy(DAG.getDataLayout(), VT);
+          SDValue ShAmt = DAG.getConstant(LeadingZeros, dl, ShTy);
+          Op = DAG.getNode(ISD::SHL, dl, VT, Op0, ShAmt);
+          break;
+        }
 
-      // But if we can combine this into an ANDN operation, then create an AND
-      // now and allow it to be pattern matched into an ANDN.
-      if (!Subtarget.hasBMI() || !isAndn || !isLegalAndnType)
         break;
+      }
     }
     LLVM_FALLTHROUGH;
   case ISD::SUB:
@@ -16116,7 +16163,7 @@ SDValue X86TargetLowering::EmitTest(SDVa
     case ISD::XOR: Opcode = X86ISD::XOR; break;
     case ISD::AND: Opcode = X86ISD::AND; break;
     case ISD::OR: {
-      if (!NeedTruncation && (X86CC == X86::COND_E || X86CC == X86::COND_NE)) {
+      if (!NeedTruncation && ZeroCheck) {
         if (SDValue EFLAGS = LowerVectorAllZeroTest(Op, Subtarget, DAG))
           return EFLAGS;
       }

Modified: llvm/trunk/test/CodeGen/X86/bypass-slow-division-64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/bypass-slow-division-64.ll?rev=291806&r1=291805&r2=291806&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/bypass-slow-division-64.ll (original)
+++ llvm/trunk/test/CodeGen/X86/bypass-slow-division-64.ll Thu Jan 12 13:54:27 2017
@@ -9,8 +9,7 @@ define i64 @Test_get_quotient(i64 %a, i6
 ; CHECK:       # BB#0:
 ; CHECK-NEXT:    movq %rdi, %rax
 ; CHECK-NEXT:    orq %rsi, %rax
-; CHECK-NEXT:    movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000
-; CHECK-NEXT:    testq %rcx, %rax
+; CHECK-NEXT:    shrq $32, %rax
 ; CHECK-NEXT:    je .LBB0_1
 ; CHECK-NEXT:  # BB#2:
 ; CHECK-NEXT:    movq %rdi, %rax
@@ -32,8 +31,7 @@ define i64 @Test_get_remainder(i64 %a, i
 ; CHECK:       # BB#0:
 ; CHECK-NEXT:    movq %rdi, %rax
 ; CHECK-NEXT:    orq %rsi, %rax
-; CHECK-NEXT:    movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000
-; CHECK-NEXT:    testq %rcx, %rax
+; CHECK-NEXT:    shrq $32, %rax
 ; CHECK-NEXT:    je .LBB1_1
 ; CHECK-NEXT:  # BB#2:
 ; CHECK-NEXT:    movq %rdi, %rax
@@ -57,8 +55,7 @@ define i64 @Test_get_quotient_and_remain
 ; CHECK:       # BB#0:
 ; CHECK-NEXT:    movq %rdi, %rax
 ; CHECK-NEXT:    orq %rsi, %rax
-; CHECK-NEXT:    movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000
-; CHECK-NEXT:    testq %rcx, %rax
+; CHECK-NEXT:    shrq $32, %rax
 ; CHECK-NEXT:    je .LBB2_1
 ; CHECK-NEXT:  # BB#2:
 ; CHECK-NEXT:    movq %rdi, %rax

Modified: llvm/trunk/test/CodeGen/X86/bypass-slow-division-tune.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/bypass-slow-division-tune.ll?rev=291806&r1=291805&r2=291806&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/bypass-slow-division-tune.ll (original)
+++ llvm/trunk/test/CodeGen/X86/bypass-slow-division-tune.ll Thu Jan 12 13:54:27 2017
@@ -22,9 +22,8 @@ entry:
 define i64 @div64(i64 %a, i64 %b) {
 entry:
 ; CHECK-LABEL: div64:
-; CHECK-DAG: movabsq $-4294967296, [[REGMSK:%[a-z]+]]
-; CHECK-DAG: orq     %{{.*}}, [[REG:%[a-z]+]]
-; CHECK:     testq   [[REGMSK]], [[REG]]
+; CHECK:     orq     %{{.*}}, [[REG:%[a-z]+]]
+; CHECK:     shrq    $32, [[REG]]
 ; CHECK:     divl
 ;
   %div = sdiv i64 %a, %b

Modified: llvm/trunk/test/CodeGen/X86/cmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/cmp.ll?rev=291806&r1=291805&r2=291806&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/cmp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/cmp.ll Thu Jan 12 13:54:27 2017
@@ -281,4 +281,54 @@ define void @test20(i32 %bf.load, i8 %x1
 ; CHECK: setne
 ; CHECK: testl
 ; CHECK: setne
-}
\ No newline at end of file
+}
+
+define i32 @test21(i64 %val) {
+  %and = and i64 %val, -2199023255552 ; 0xFFFFFE0000000000
+  %cmp = icmp ne i64 %and, 0
+  %ret = zext i1 %cmp to i32
+  ret i32 %ret
+
+; CHECK-LABEL: test21
+; CHECK: shrq $41, %rdi
+; CHECK-NOT: test
+; CHECK: setne %al
+; CHECK: retq
+}
+
+; AND-to-SHR transformation is enabled for eq/ne condition codes only.
+define i32 @test22(i64 %val) {
+  %and = and i64 %val, -2199023255552 ; 0xFFFFFE0000000000
+  %cmp = icmp ult i64 %and, 0
+  %ret = zext i1 %cmp to i32
+  ret i32 %ret
+
+; CHECK-LABEL: test22
+; CHECK-NOT: shrq $41
+; CHECK: retq
+}
+
+define i32 @test23(i64 %val) {
+  %and = and i64 %val, -1048576 ; 0xFFFFFFFFFFF00000
+  %cmp = icmp ne i64 %and, 0
+  %ret = zext i1 %cmp to i32
+  ret i32 %ret
+
+; CHECK-LABEL: test23
+; CHECK: testq $-1048576, %rdi
+; CHECK: setne %al
+; CHECK: retq
+}
+
+define i32 @test24(i64 %val) {
+  %and = and i64 %val, 281474976710655 ; 0x0000FFFFFFFFFFFF
+  %cmp = icmp ne i64 %and, 0
+  %ret = zext i1 %cmp to i32
+  ret i32 %ret
+
+; CHECK-LABEL: test24
+; CHECK: shlq $16, %rdi
+; CHECK-NOT: test
+; CHECK: setne %al
+; CHECK: retq
+}