[llvm] 842d0bf - [x86] Improve select lowering for smin(x, 0) & smax(x, 0)

Thu Apr 7 00:53:50 PDT 2022

Author: Wei Xiao
Date: 2022-04-07T15:53:24+08:00
New Revision: 842d0bf93176b9cb1e0d6894a2bbfb32ad33ebb8

URL: https://github.com/llvm/llvm-project/commit/842d0bf93176b9cb1e0d6894a2bbfb32ad33ebb8
DIFF: https://github.com/llvm/llvm-project/commit/842d0bf93176b9cb1e0d6894a2bbfb32ad33ebb8.diff

LOG: [x86] Improve select lowering for smin(x, 0) & smax(x, 0)

smin(x, 0):
  (select (x < 0), x, 0) -> ((x >> (size_in_bits(x)-1))) & x

smax(x, 0):
  (select (x > 0), x, 0) -> (~(x >> (size_in_bits(x)-1))) & x
  The comparison is testing for a positive value, we have to invert the sign
  bit mask, so only do that transform if the target has a bitwise 'and not'
  instruction (the invert is free).

The transform is performed only when CMP has a single user to avoid
increasing total instruction number.

https://alive2.llvm.org/ce/z/euUnNm
https://alive2.llvm.org/ce/z/37339J

Differential Revision: https://reviews.llvm.org/D123109

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/select-smin-smax.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 05187c4230237..ff67dcb27cb11 100644

--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -24699,6 +24699,8 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
   // (select (x != 0), -1, y) -> ~(sign_bit (x - 1)) | y
   // (select (and (x , 0x1) == 0), y, (z ^ y) ) -> (-(and (x , 0x1)) & z ) ^ y
   // (select (and (x , 0x1) == 0), y, (z | y) ) -> (-(and (x , 0x1)) & z ) | y
+  // (select (x > 0), x, 0) -> (~(x >> (size_in_bits(x)-1))) & x
+  // (select (x < 0), x, 0) -> ((x >> (size_in_bits(x)-1))) & x
   if (Cond.getOpcode() == X86ISD::SETCC &&
       Cond.getOperand(1).getOpcode() == X86ISD::CMP &&
       isNullConstant(Cond.getOperand(1).getOperand(1))) {
@@ -24779,6 +24781,22 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
         SDValue And = DAG.getNode(ISD::AND, DL, VT, Mask, Src1); // Mask & z
         return DAG.getNode(Op2.getOpcode(), DL, VT, And, Src2);  // And Op y
       }
+    } else if ((VT == MVT::i32 || VT == MVT::i64) && isNullConstant(Op2) &&
+               Cmp.getNode()->hasOneUse() && (CmpOp0 == Op1) &&
+               ((CondCode == X86::COND_S) ||                    // smin(x, 0)
+                (CondCode == X86::COND_G && hasAndNot(Op1)))) { // smax(x, 0)
+      // (select (x < 0), x, 0) -> ((x >> (size_in_bits(x)-1))) & x
+      //
+      // If the comparison is testing for a positive value, we have to invert
+      // the sign bit mask, so only do that transform if the target has a
+      // bitwise 'and not' instruction (the invert is free).
+      // (select (x > 0), x, 0) -> (~(x >> (size_in_bits(x)-1))) & x
+      unsigned ShCt = VT.getSizeInBits() - 1;
+      SDValue ShiftAmt = DAG.getConstant(ShCt, DL, VT);
+      SDValue Shift = DAG.getNode(ISD::SRA, DL, VT, Op1, ShiftAmt);
+      if (CondCode == X86::COND_G)
+        Shift = DAG.getNOT(DL, Shift, VT);
+      return DAG.getNode(ISD::AND, DL, VT, Shift, Op1);
     }
   }
 

diff  --git a/llvm/test/CodeGen/X86/select-smin-smax.ll b/llvm/test/CodeGen/X86/select-smin-smax.ll
index 8450dd0199af0..a7fb60f37ea0c 100644
--- a/llvm/test/CodeGen/X86/select-smin-smax.ll
+++ b/llvm/test/CodeGen/X86/select-smin-smax.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=-bmi < %s | FileCheck %s --check-prefixes=CHECK,CHECK-NOBMI
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi < %s | FileCheck %s --check-prefixes=CHECK,CHECK-BMI
 
 declare i32 @llvm.smax.i32(i32, i32)
 declare i32 @llvm.smin.i32(i32, i32)
@@ -7,12 +8,19 @@ declare i64 @llvm.smax.i64(i64, i64)
 declare i64 @llvm.smin.i64(i64, i64)
 
 define i32 @test_i32_smax(i32 %a) nounwind {
-; CHECK-LABEL: test_i32_smax:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    testl %edi, %edi
-; CHECK-NEXT:    cmovgl %edi, %eax
-; CHECK-NEXT:    retq
+; CHECK-NOBMI-LABEL: test_i32_smax:
+; CHECK-NOBMI:       # %bb.0:
+; CHECK-NOBMI-NEXT:    xorl %eax, %eax
+; CHECK-NOBMI-NEXT:    testl %edi, %edi
+; CHECK-NOBMI-NEXT:    cmovgl %edi, %eax
+; CHECK-NOBMI-NEXT:    retq
+;
+; CHECK-BMI-LABEL: test_i32_smax:
+; CHECK-BMI:       # %bb.0:
+; CHECK-BMI-NEXT:    movl %edi, %eax
+; CHECK-BMI-NEXT:    sarl $31, %eax
+; CHECK-BMI-NEXT:    andnl %edi, %eax, %eax
+; CHECK-BMI-NEXT:    retq
   %r = call i32 @llvm.smax.i32(i32 %a, i32 0)
   ret i32 %r
 }
@@ -20,21 +28,28 @@ define i32 @test_i32_smax(i32 %a) nounwind {
 define i32 @test_i32_smin(i32 %a) nounwind {
 ; CHECK-LABEL: test_i32_smin:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    testl %edi, %edi
-; CHECK-NEXT:    cmovsl %edi, %eax
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    sarl $31, %eax
+; CHECK-NEXT:    andl %edi, %eax
 ; CHECK-NEXT:    retq
   %r = call i32 @llvm.smin.i32(i32 %a, i32 0)
   ret i32 %r
 }
 
 define i64 @test_i64_smax(i64 %a) nounwind {
-; CHECK-LABEL: test_i64_smax:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    testq %rdi, %rdi
-; CHECK-NEXT:    cmovgq %rdi, %rax
-; CHECK-NEXT:    retq
+; CHECK-NOBMI-LABEL: test_i64_smax:
+; CHECK-NOBMI:       # %bb.0:
+; CHECK-NOBMI-NEXT:    xorl %eax, %eax
+; CHECK-NOBMI-NEXT:    testq %rdi, %rdi
+; CHECK-NOBMI-NEXT:    cmovgq %rdi, %rax
+; CHECK-NOBMI-NEXT:    retq
+;
+; CHECK-BMI-LABEL: test_i64_smax:
+; CHECK-BMI:       # %bb.0:
+; CHECK-BMI-NEXT:    movq %rdi, %rax
+; CHECK-BMI-NEXT:    sarq $63, %rax
+; CHECK-BMI-NEXT:    andnq %rdi, %rax, %rax
+; CHECK-BMI-NEXT:    retq
   %r = call i64 @llvm.smax.i64(i64 %a, i64 0)
   ret i64 %r
 }
@@ -42,9 +57,9 @@ define i64 @test_i64_smax(i64 %a) nounwind {
 define i64 @test_i64_smin(i64 %a) nounwind {
 ; CHECK-LABEL: test_i64_smin:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    testq %rdi, %rdi
-; CHECK-NEXT:    cmovsq %rdi, %rax
+; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    sarq $63, %rax
+; CHECK-NEXT:    andq %rdi, %rax
 ; CHECK-NEXT:    retq
   %r = call i64 @llvm.smin.i64(i64 %a, i64 0)
   ret i64 %r