[llvm] r326333 - [ARM] Lower lower saturate to 0 and lower saturate to -1 using bit-operations
Pablo Barrio via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 28 09:13:07 PST 2018
Author: pabbar01
Date: Wed Feb 28 09:13:07 2018
New Revision: 326333
URL: http://llvm.org/viewvc/llvm-project?rev=326333&view=rev
Log:
[ARM] Lower lower saturate to 0 and lower saturate to -1 using bit-operations
Summary:
Expressions of the form x < 0 ? 0 : x; and x < -1 ? -1 : x can be lowered using bit-operations instead of branching or conditional moves
In thumb-mode this results in a two-instruction sequence, a shift followed by a bic or or while in ARM/thumb2 mode that has flexible second operand the shift can be folded into a single bic/or instructions. In most cases this results in smaller code and possibly less branches, and in no case larger than before.
Patch by Martin Svanfeldt
Reviewers: fhahn, pbarrio, rogfer01
Reviewed By: pbarrio, rogfer01
Subscribers: chrib, yroux, eugenis, efriedma, rogfer01, aemerson, javed.absar, kristof.beyls, llvm-commits
Differential Revision: https://reviews.llvm.org/D42574
Added:
llvm/trunk/test/CodeGen/ARM/sat-to-bitop.ll
Modified:
llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
llvm/trunk/test/CodeGen/ARM/atomic-op.ll
Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=326333&r1=326332&r2=326333&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Wed Feb 28 09:13:07 2018
@@ -4384,6 +4384,48 @@ static bool isSaturatingConditional(cons
return false;
}
+// Check if a condition of the type x < k ? k : x can be converted into a
+// bit operation instead of conditional moves.
+// Currently this is allowed given:
+// - The conditions and values match up
+// - k is 0 or -1 (all ones)
+// This function will not check the last condition, thats up to the caller
+// It returns true if the transformation can be made, and in such case
+// returns x in V, and k in SatK.
+static bool isLowerSaturatingConditional(const SDValue &Op, SDValue &V,
+ SDValue &SatK)
+{
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+ SDValue TrueVal = Op.getOperand(2);
+ SDValue FalseVal = Op.getOperand(3);
+
+ SDValue *K = isa<ConstantSDNode>(LHS) ? &LHS : isa<ConstantSDNode>(RHS)
+ ? &RHS
+ : nullptr;
+
+ // No constant operation in comparison, early out
+ if (!K)
+ return false;
+
+ SDValue KTmp = isa<ConstantSDNode>(TrueVal) ? TrueVal : FalseVal;
+ V = (KTmp == TrueVal) ? FalseVal : TrueVal;
+ SDValue VTmp = (K && *K == LHS) ? RHS : LHS;
+
+ // If the constant on left and right side, or variable on left and right,
+ // does not match, early out
+ if (*K != KTmp || V != VTmp)
+ return false;
+
+ if (isLowerSaturate(LHS, RHS, TrueVal, FalseVal, CC, *K)) {
+ SatK = *K;
+ return true;
+ }
+
+ return false;
+}
+
SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
SDLoc dl(Op);
@@ -4402,6 +4444,25 @@ SDValue ARMTargetLowering::LowerSELECT_C
DAG.getConstant(countTrailingOnes(SatConstant), dl, VT));
}
+ // Try to convert expressions of the form x < k ? k : x (and similar forms)
+ // into more efficient bit operations, which is possible when k is 0 or -1
+ // On ARM and Thumb-2 which have flexible operand 2 this will result in
+ // single instructions. On Thumb the shift and the bit operation will be two
+ // instructions.
+ // Only allow this transformation on full-width (32-bit) operations
+ SDValue LowerSatConstant;
+ if (VT == MVT::i32 &&
+ isLowerSaturatingConditional(Op, SatValue, LowerSatConstant)) {
+ SDValue ShiftV = DAG.getNode(ISD::SRA, dl, VT, SatValue,
+ DAG.getConstant(31, dl, VT));
+ if (isNullConstant(LowerSatConstant)) {
+ SDValue NotShiftV = DAG.getNode(ISD::XOR, dl, VT, ShiftV,
+ DAG.getAllOnesConstant(dl, VT));
+ return DAG.getNode(ISD::AND, dl, VT, SatValue, NotShiftV);
+ } else if (isAllOnesConstant(LowerSatConstant))
+ return DAG.getNode(ISD::OR, dl, VT, SatValue, ShiftV);
+ }
+
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
Modified: llvm/trunk/test/CodeGen/ARM/atomic-op.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/atomic-op.ll?rev=326333&r1=326332&r2=326333&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/atomic-op.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/atomic-op.ll Wed Feb 28 09:13:07 2018
@@ -129,11 +129,12 @@ entry:
store i32 %9, i32* %old
call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"()
; CHECK: ldrex
- ; CHECK: cmp
+ ; CHECK: bic
+ ; CHECK-NOT: cmp
; CHECK: strex
; CHECK-T1: bl ___sync_fetch_and_max_4
; CHECK-T1-M0: bl ___sync_fetch_and_max_4
- ; CHECK-BAREMETAL: cmp
+ ; CHECK-BAREMETAL: bic
; CHECK-BAREMETAL-NOT: __sync
%10 = atomicrmw max i32* %val2, i32 0 monotonic
store i32 %10, i32* %old
Added: llvm/trunk/test/CodeGen/ARM/sat-to-bitop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/sat-to-bitop.ll?rev=326333&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/sat-to-bitop.ll (added)
+++ llvm/trunk/test/CodeGen/ARM/sat-to-bitop.ll Wed Feb 28 09:13:07 2018
@@ -0,0 +1,157 @@
+; RUN: llc -mtriple=arm %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM --check-prefix=CHECK-CMP
+; RUN: llc -mtriple=thumb-eabi %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-T --check-prefix=CHECK-CMP
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-T2 --check-prefix=CHECK-CMP
+
+
+; Check for clipping against 0 that should result in bic
+;
+; Base tests with different bit widths
+;
+
+; x < 0 ? 0 : x
+; 32-bit base test
+define i32 @sat0_base_32bit(i32 %x) #0 {
+; CHECK-LABEL: sat0_base_32bit:
+; CHECK-CMP-NOT: cmp
+; CHECK-ARM: bic {{r[0-9]}}, [[INPUT:r[0-9]]], [[INPUT]], asr #31
+; CHECK-T2: bic.w {{r[0-9]}}, [[INPUT:r[0-9]]], [[INPUT]], asr #31
+; CHECK-T: asrs [[IM:r[0-9]]], {{r[0-9]}}, #31
+; CHECK-T-NEXT: bics {{r[0-9]}}, [[IM]]
+entry:
+ %cmpLow = icmp slt i32 %x, 0
+ %saturateLow = select i1 %cmpLow, i32 0, i32 %x
+ ret i32 %saturateLow
+}
+
+; x < 0 ? 0 : x
+; 16-bit base test
+define i16 @sat0_base_16bit(i16 %x) #0 {
+; CHECK-LABEL: sat0_base_16bit:
+; CHECK-CMP: cmp
+; CHECK-ARM-NOT: bic
+; CHECK-T2-NOT: bic.w
+; CHECK-T-NOT: bics
+entry:
+ %cmpLow = icmp slt i16 %x, 0
+ %saturateLow = select i1 %cmpLow, i16 0, i16 %x
+ ret i16 %saturateLow
+}
+
+; x < 0 ? 0 : x
+; 8-bit base test
+define i8 @sat0_base_8bit(i8 %x) #0 {
+; CHECK-LABEL: sat0_base_8bit:
+; CHECK-CMP: cmp
+; CHECK-ARM-NOT: bic
+; CHECK-T2-NOT: bic.w
+entry:
+ %cmpLow = icmp slt i8 %x, 0
+ %saturateLow = select i1 %cmpLow, i8 0, i8 %x
+ ret i8 %saturateLow
+}
+
+; Test where the conditional is formed in a different way
+
+; x > 0 ? x : 0
+define i32 @sat0_lower_1(i32 %x) #0 {
+; CHECK-LABEL: sat0_lower_1:
+; CHECK-CMP-NOT: cmp
+; CHECK-ARM: bic {{r[0-9]}}, [[INPUT:r[0-9]]], [[INPUT]], asr #31
+; CHECK-T2: bic.w {{r[0-9]}}, [[INPUT:r[0-9]]], [[INPUT]], asr #31
+; CHECK-T: asrs [[IM:r[0-9]]], {{r[0-9]}}, #31
+; CHECK-T-NEXT: bics {{r[0-9]}}, [[IM]]
+entry:
+ %cmpGt = icmp sgt i32 %x, 0
+ %saturateLow = select i1 %cmpGt, i32 %x, i32 0
+ ret i32 %saturateLow
+}
+
+
+; Check for clipping against -1 that should result in orr
+;
+; Base tests with different bit widths
+;
+
+; x < -1 ? -1 : x
+; 32-bit base test
+define i32 @sat1_base_32bit(i32 %x) #0 {
+; CHECK-LABEL: sat1_base_32bit:
+; CHECK-CMP-NOT: cmp
+; CHECK-ARM: orr {{r[0-9]}}, [[INPUT:r[0-9]]], [[INPUT]], asr #31
+; CHECK-T2: orr.w {{r[0-9]}}, [[INPUT:r[0-9]]], [[INPUT]], asr #31
+; CHECK-T: asrs [[IM:r[0-9]]], {{r[0-9]}}, #31
+; CHECK-T-NEXT: orrs {{r[0-9]}}, [[IM]]
+entry:
+ %cmpLow = icmp slt i32 %x, -1
+ %saturateLow = select i1 %cmpLow, i32 -1, i32 %x
+ ret i32 %saturateLow
+}
+
+; x < -1 ? -1 : x
+; 16-bit base test
+define i16 @sat1_base_16bit(i16 %x) #0 {
+; CHECK-LABEL: sat1_base_16bit:
+; CHECK-ARM: cmn
+; CHECK-T2: cmp
+; CHECK-T: cmp
+entry:
+ %cmpLow = icmp slt i16 %x, -1
+ %saturateLow = select i1 %cmpLow, i16 -1, i16 %x
+ ret i16 %saturateLow
+}
+
+; x < -1 ? -1 : x
+; 8-bit base test
+define i8 @sat1_base_8bit(i8 %x) #0 {
+; CHECK-LABEL: sat1_base_8bit:
+; CHECK-ARM: cmn
+; CHECK-T2: cmp
+; CHECK-T: cmp
+entry:
+ %cmpLow = icmp slt i8 %x, -1
+ %saturateLow = select i1 %cmpLow, i8 -1, i8 %x
+ ret i8 %saturateLow
+}
+
+; Test where the conditional is formed in a different way
+
+; x > -1 ? x : -1
+define i32 @sat1_lower_1(i32 %x) #0 {
+; CHECK-LABEL: sat1_lower_1:
+; CHECK-ARM: orr {{r[0-9]}}, [[INPUT:r[0-9]]], [[INPUT]], asr #31
+; CHECK-T2: orr.w {{r[0-9]}}, [[INPUT:r[0-9]]], [[INPUT]], asr #31
+; CHECK-T: asrs [[IM:r[0-9]]], {{r[0-9]}}, #31
+; CHECK-T-NEXT: orrs {{r[0-9]}}, [[IM]]
+; CHECK-CMP-NOT: cmp
+entry:
+ %cmpGt = icmp sgt i32 %x, -1
+ %saturateLow = select i1 %cmpGt, i32 %x, i32 -1
+ ret i32 %saturateLow
+}
+
+; The following tests for patterns that should not transform into bitops
+; but that are similar enough that could confuse the selector.
+
+; x < 0 ? 0 : y where x and y does not properly match
+define i32 @no_sat0_incorrect_variable(i32 %x, i32 %y) #0 {
+; CHECK-LABEL: no_sat0_incorrect_variable:
+; CHECK-NOT: bic
+; CHECK-NOT: asrs
+; CHECK-CMP: cmp
+entry:
+ %cmpLow = icmp slt i32 %x, 0
+ %saturateLow = select i1 %cmpLow, i32 0, i32 %y
+ ret i32 %saturateLow
+}
+
+; x < 0 ? -1 : x
+define i32 @no_sat0_incorrect_constant(i32 %x) #0 {
+; CHECK-LABEL: no_sat0_incorrect_constant:
+; CHECK-NOT: bic
+; CHECK-NOT: asrs
+; CHECK-CMP: cmp
+entry:
+ %cmpLow = icmp slt i32 %x, 0
+ %saturateLow = select i1 %cmpLow, i32 -1, i32 %x
+ ret i32 %saturateLow
+}
More information about the llvm-commits
mailing list