[llvm] r326333 - [ARM] Lower lower saturate to 0 and lower saturate to -1 using bit-operations

Pablo Barrio via llvm-commits llvm-commits at lists.llvm.org
Wed Feb 28 09:13:07 PST 2018


Author: pabbar01
Date: Wed Feb 28 09:13:07 2018
New Revision: 326333

URL: http://llvm.org/viewvc/llvm-project?rev=326333&view=rev
Log:
[ARM] Lower lower saturate to 0 and lower saturate to -1 using bit-operations

Summary:
Expressions of the form x < 0 ? 0 :  x; and x < -1 ? -1 : x can be lowered using bit-operations instead of branching or conditional moves

In thumb-mode this results in a two-instruction sequence, a shift followed by a bic or or while in ARM/thumb2 mode that has flexible second operand the shift can be folded into a single bic/or instructions. In most cases this results in smaller code and possibly less branches, and in no case larger than before.

Patch by Martin Svanfeldt

Reviewers: fhahn, pbarrio, rogfer01

Reviewed By: pbarrio, rogfer01

Subscribers: chrib, yroux, eugenis, efriedma, rogfer01, aemerson, javed.absar, kristof.beyls, llvm-commits

Differential Revision: https://reviews.llvm.org/D42574

Added:
    llvm/trunk/test/CodeGen/ARM/sat-to-bitop.ll
Modified:
    llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
    llvm/trunk/test/CodeGen/ARM/atomic-op.ll

Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=326333&r1=326332&r2=326333&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Wed Feb 28 09:13:07 2018
@@ -4384,6 +4384,48 @@ static bool isSaturatingConditional(cons
   return false;
 }
 
+// Check if a condition of the type x < k ? k : x can be converted into a
+// bit operation instead of conditional moves.
+// Currently this is allowed given:
+// - The conditions and values match up
+// - k is 0 or -1 (all ones)
+// This function will not check the last condition, thats up to the caller
+// It returns true if the transformation can be made, and in such case
+// returns x in V, and k in SatK.
+static bool isLowerSaturatingConditional(const SDValue &Op, SDValue &V,
+                                         SDValue &SatK)
+{
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
+  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+  SDValue TrueVal = Op.getOperand(2);
+  SDValue FalseVal = Op.getOperand(3);
+
+  SDValue *K = isa<ConstantSDNode>(LHS) ? &LHS : isa<ConstantSDNode>(RHS)
+                                               ? &RHS
+                                               : nullptr;
+
+  // No constant operation in comparison, early out
+  if (!K)
+    return false;
+
+  SDValue KTmp = isa<ConstantSDNode>(TrueVal) ? TrueVal : FalseVal;
+  V = (KTmp == TrueVal) ? FalseVal : TrueVal;
+  SDValue VTmp = (K && *K == LHS) ? RHS : LHS;
+
+  // If the constant on left and right side, or variable on left and right,
+  // does not match, early out
+  if (*K != KTmp || V != VTmp)
+    return false;
+
+  if (isLowerSaturate(LHS, RHS, TrueVal, FalseVal, CC, *K)) {
+    SatK = *K;
+    return true;
+  }
+
+  return false;
+}
+
 SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
   EVT VT = Op.getValueType();
   SDLoc dl(Op);
@@ -4402,6 +4444,25 @@ SDValue ARMTargetLowering::LowerSELECT_C
                          DAG.getConstant(countTrailingOnes(SatConstant), dl, VT));
   }
 
+  // Try to convert expressions of the form x < k ? k : x (and similar forms)
+  // into more efficient bit operations, which is possible when k is 0 or -1
+  // On ARM and Thumb-2 which have flexible operand 2 this will result in
+  // single instructions. On Thumb the shift and the bit operation will be two
+  // instructions.
+  // Only allow this transformation on full-width (32-bit) operations
+  SDValue LowerSatConstant;
+  if (VT == MVT::i32 &&
+      isLowerSaturatingConditional(Op, SatValue, LowerSatConstant)) {
+    SDValue ShiftV = DAG.getNode(ISD::SRA, dl, VT, SatValue,
+                                 DAG.getConstant(31, dl, VT));
+    if (isNullConstant(LowerSatConstant)) {
+      SDValue NotShiftV = DAG.getNode(ISD::XOR, dl, VT, ShiftV,
+                                      DAG.getAllOnesConstant(dl, VT));
+      return DAG.getNode(ISD::AND, dl, VT, SatValue, NotShiftV);
+    } else if (isAllOnesConstant(LowerSatConstant))
+      return DAG.getNode(ISD::OR, dl, VT, SatValue, ShiftV);
+  }
+
   SDValue LHS = Op.getOperand(0);
   SDValue RHS = Op.getOperand(1);
   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();

Modified: llvm/trunk/test/CodeGen/ARM/atomic-op.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/atomic-op.ll?rev=326333&r1=326332&r2=326333&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/atomic-op.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/atomic-op.ll Wed Feb 28 09:13:07 2018
@@ -129,11 +129,12 @@ entry:
 	store i32 %9, i32* %old
 	call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"()
   ; CHECK: ldrex
-  ; CHECK: cmp
+  ; CHECK: bic
+  ; CHECK-NOT: cmp
   ; CHECK: strex
   ; CHECK-T1: bl ___sync_fetch_and_max_4
   ; CHECK-T1-M0: bl ___sync_fetch_and_max_4
-  ; CHECK-BAREMETAL: cmp
+  ; CHECK-BAREMETAL: bic
   ; CHECK-BAREMETAL-NOT: __sync
   %10 = atomicrmw max i32* %val2, i32 0 monotonic
 	store i32 %10, i32* %old

Added: llvm/trunk/test/CodeGen/ARM/sat-to-bitop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/sat-to-bitop.ll?rev=326333&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/sat-to-bitop.ll (added)
+++ llvm/trunk/test/CodeGen/ARM/sat-to-bitop.ll Wed Feb 28 09:13:07 2018
@@ -0,0 +1,157 @@
+; RUN: llc -mtriple=arm %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM --check-prefix=CHECK-CMP
+; RUN: llc -mtriple=thumb-eabi %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-T --check-prefix=CHECK-CMP
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-T2 --check-prefix=CHECK-CMP
+
+
+; Check for clipping against 0 that should result in bic
+;
+; Base tests with different bit widths
+;
+
+; x < 0 ? 0 : x
+; 32-bit base test
+define i32 @sat0_base_32bit(i32 %x) #0 {
+; CHECK-LABEL: sat0_base_32bit:
+; CHECK-CMP-NOT: cmp
+; CHECK-ARM: bic {{r[0-9]}}, [[INPUT:r[0-9]]], [[INPUT]], asr #31
+; CHECK-T2: bic.w {{r[0-9]}}, [[INPUT:r[0-9]]], [[INPUT]], asr #31
+; CHECK-T: asrs [[IM:r[0-9]]], {{r[0-9]}}, #31
+; CHECK-T-NEXT: bics {{r[0-9]}}, [[IM]]
+entry:
+  %cmpLow = icmp slt i32 %x, 0
+  %saturateLow = select i1 %cmpLow, i32 0, i32 %x
+  ret i32 %saturateLow
+}
+
+; x < 0 ? 0 : x
+; 16-bit base test
+define i16 @sat0_base_16bit(i16 %x) #0 {
+; CHECK-LABEL: sat0_base_16bit:
+; CHECK-CMP: cmp
+; CHECK-ARM-NOT: bic
+; CHECK-T2-NOT: bic.w
+; CHECK-T-NOT: bics
+entry:
+  %cmpLow = icmp slt i16 %x, 0
+  %saturateLow = select i1 %cmpLow, i16 0, i16 %x
+  ret i16 %saturateLow
+}
+
+; x < 0 ? 0 : x
+; 8-bit base test
+define i8 @sat0_base_8bit(i8 %x) #0 {
+; CHECK-LABEL: sat0_base_8bit:
+; CHECK-CMP: cmp
+; CHECK-ARM-NOT: bic
+; CHECK-T2-NOT: bic.w
+entry:
+  %cmpLow = icmp slt i8 %x, 0
+  %saturateLow = select i1 %cmpLow, i8 0, i8 %x
+  ret i8 %saturateLow
+}
+
+; Test where the conditional is formed in a different way
+
+; x > 0 ? x : 0
+define i32 @sat0_lower_1(i32 %x) #0 {
+; CHECK-LABEL: sat0_lower_1:
+; CHECK-CMP-NOT: cmp
+; CHECK-ARM: bic {{r[0-9]}}, [[INPUT:r[0-9]]], [[INPUT]], asr #31
+; CHECK-T2: bic.w {{r[0-9]}}, [[INPUT:r[0-9]]], [[INPUT]], asr #31
+; CHECK-T: asrs [[IM:r[0-9]]], {{r[0-9]}}, #31
+; CHECK-T-NEXT: bics {{r[0-9]}}, [[IM]]
+entry:
+  %cmpGt = icmp sgt i32 %x, 0
+  %saturateLow = select i1 %cmpGt, i32 %x, i32 0
+  ret i32 %saturateLow
+}
+
+
+; Check for clipping against -1 that should result in orr
+;
+; Base tests with different bit widths
+;
+
+; x < -1 ? -1 : x
+; 32-bit base test
+define i32 @sat1_base_32bit(i32 %x) #0 {
+; CHECK-LABEL: sat1_base_32bit:
+; CHECK-CMP-NOT: cmp
+; CHECK-ARM: orr {{r[0-9]}}, [[INPUT:r[0-9]]], [[INPUT]], asr #31
+; CHECK-T2: orr.w {{r[0-9]}}, [[INPUT:r[0-9]]], [[INPUT]], asr #31
+; CHECK-T: asrs [[IM:r[0-9]]], {{r[0-9]}}, #31
+; CHECK-T-NEXT: orrs {{r[0-9]}}, [[IM]]
+entry:
+  %cmpLow = icmp slt i32 %x, -1
+  %saturateLow = select i1 %cmpLow, i32 -1, i32 %x
+  ret i32 %saturateLow
+}
+
+; x < -1 ? -1 : x
+; 16-bit base test
+define i16 @sat1_base_16bit(i16 %x) #0 {
+; CHECK-LABEL: sat1_base_16bit:
+; CHECK-ARM: cmn
+; CHECK-T2: cmp
+; CHECK-T: cmp
+entry:
+  %cmpLow = icmp slt i16 %x, -1
+  %saturateLow = select i1 %cmpLow, i16 -1, i16 %x
+  ret i16 %saturateLow
+}
+
+; x < -1 ? -1 : x
+; 8-bit base test
+define i8 @sat1_base_8bit(i8 %x) #0 {
+; CHECK-LABEL: sat1_base_8bit:
+; CHECK-ARM: cmn
+; CHECK-T2: cmp
+; CHECK-T: cmp
+entry:
+  %cmpLow = icmp slt i8 %x, -1
+  %saturateLow = select i1 %cmpLow, i8 -1, i8 %x
+  ret i8 %saturateLow
+}
+
+; Test where the conditional is formed in a different way
+
+; x > -1 ? x : -1
+define i32 @sat1_lower_1(i32 %x) #0 {
+; CHECK-LABEL: sat1_lower_1:
+; CHECK-ARM: orr {{r[0-9]}}, [[INPUT:r[0-9]]], [[INPUT]], asr #31
+; CHECK-T2: orr.w {{r[0-9]}}, [[INPUT:r[0-9]]], [[INPUT]], asr #31
+; CHECK-T: asrs [[IM:r[0-9]]], {{r[0-9]}}, #31
+; CHECK-T-NEXT: orrs {{r[0-9]}}, [[IM]]
+; CHECK-CMP-NOT: cmp
+entry:
+  %cmpGt = icmp sgt i32 %x, -1
+  %saturateLow = select i1 %cmpGt, i32 %x, i32 -1
+  ret i32 %saturateLow
+}
+
+; The following tests for patterns that should not transform into bitops
+; but that are similar enough that could confuse the selector.
+
+; x < 0 ? 0 : y where x and y does not properly match
+define i32 @no_sat0_incorrect_variable(i32 %x, i32 %y) #0 {
+; CHECK-LABEL: no_sat0_incorrect_variable:
+; CHECK-NOT: bic
+; CHECK-NOT: asrs
+; CHECK-CMP: cmp
+entry:
+  %cmpLow = icmp slt i32 %x, 0
+  %saturateLow = select i1 %cmpLow, i32 0, i32 %y
+  ret i32 %saturateLow
+}
+
+; x < 0 ? -1 : x
+define i32 @no_sat0_incorrect_constant(i32 %x) #0 {
+; CHECK-LABEL: no_sat0_incorrect_constant:
+; CHECK-NOT: bic
+; CHECK-NOT: asrs
+; CHECK-CMP: cmp
+entry:
+  %cmpLow = icmp slt i32 %x, 0
+  %saturateLow = select i1 %cmpLow, i32 -1, i32 %x
+  ret i32 %saturateLow
+}




More information about the llvm-commits mailing list