[clang] 1a60ae0 - [InstCombine] fold mask-with-signbit-splat to icmp+select
Sanjay Patel via cfe-commits
cfe-commits at lists.llvm.org
Tue Dec 14 13:02:02 PST 2021
Author: Sanjay Patel
Date: 2021-12-14T16:00:42-05:00
New Revision: 1a60ae02c65d26981017f59bc5918d3c2e363bfd
URL: https://github.com/llvm/llvm-project/commit/1a60ae02c65d26981017f59bc5918d3c2e363bfd
DIFF: https://github.com/llvm/llvm-project/commit/1a60ae02c65d26981017f59bc5918d3c2e363bfd.diff
LOG: [InstCombine] fold mask-with-signbit-splat to icmp+select
~(iN X s>> (N-1)) & Y --> (X s< 0) ? 0 : Y
https://alive2.llvm.org/ce/z/JKlQ9x
This is similar to D111410 / 727e642e970d028049d ,
but it includes a 'not' of the signbit and so it
saves an instruction in the basic pattern.
DAGCombiner or target-specific folds can expand
this back into bit-hacks.
The diffs in the logical-select tests are not true
regressions - running early-cse and another round
of instcombine is expected in a normal opt pipeline,
and that reduces back to a minimal form as shown
in the duplicated PhaseOrdering test.
I have no understanding of the SystemZ diffs, so
I made the minimal edits suggested by FileCheck to
make that test pass again. That whole test file is
wrong though. It is running the entire optimizer (-O2)
to check IR, and then topping that by even running
codegen and checking asm. It needs to be split up.
Fixes #52631
Added:
Modified:
clang/test/CodeGen/SystemZ/builtins-systemz-zvector.c
llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
llvm/test/Transforms/InstCombine/and.ll
llvm/test/Transforms/InstCombine/logical-select-inseltpoison.ll
llvm/test/Transforms/InstCombine/logical-select.ll
llvm/test/Transforms/InstCombine/vec_sext.ll
llvm/test/Transforms/PhaseOrdering/vector-select.ll
Removed:
################################################################################
diff --git a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector.c b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector.c
index 7cd4a951741f0..38f0c2908825a 100644
--- a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector.c
+++ b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector.c
@@ -3289,13 +3289,13 @@ void test_integer(void) {
// CHECK-ASM: vsrlb
vsc = vec_abs(vsc);
- // CHECK-ASM: vlpb
+ // CHECK-ASM: vlcb
vss = vec_abs(vss);
- // CHECK-ASM: vlph
+ // CHECK-ASM: vlch
vsi = vec_abs(vsi);
- // CHECK-ASM: vlpf
+ // CHECK-ASM: vlcf
vsl = vec_abs(vsl);
- // CHECK-ASM: vlpg
+ // CHECK-ASM: vlcg
vsc = vec_max(vsc, vsc);
// CHECK-ASM: vmxb
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 9023619b14280..08cd1a7f97e60 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -2133,6 +2133,15 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {
Value *Cmp = Builder.CreateICmpSLT(X, Zero, "isneg");
return SelectInst::Create(Cmp, Y, Zero);
}
+ // If there's a 'not' of the shifted value, swap the select operands:
+ // ~(iN X s>> (N-1)) & Y --> (X s< 0) ? 0 : Y
+ if (match(&I, m_c_And(m_OneUse(m_Not(
+ m_AShr(m_Value(X), m_SpecificInt(FullShift)))),
+ m_Value(Y)))) {
+ Constant *Zero = ConstantInt::getNullValue(Ty);
+ Value *Cmp = Builder.CreateICmpSLT(X, Zero, "isneg");
+ return SelectInst::Create(Cmp, Zero, Y);
+ }
// (~x) & y --> ~(x | (~y)) iff that gets rid of inversions
if (sinkNotIntoOtherHandOfAndOrOr(I))
diff --git a/llvm/test/Transforms/InstCombine/and.ll b/llvm/test/Transforms/InstCombine/and.ll
index edaef78b631d8..53c7f09189ff5 100644
--- a/llvm/test/Transforms/InstCombine/and.ll
+++ b/llvm/test/Transforms/InstCombine/and.ll
@@ -1463,9 +1463,8 @@ define i8 @lshr_bitwidth_mask(i8 %x, i8 %y) {
define i8 @not_ashr_bitwidth_mask(i8 %x, i8 %y) {
; CHECK-LABEL: @not_ashr_bitwidth_mask(
-; CHECK-NEXT: [[SIGN:%.*]] = ashr i8 [[X:%.*]], 7
-; CHECK-NEXT: [[NOT:%.*]] = xor i8 [[SIGN]], -1
-; CHECK-NEXT: [[POS_OR_ZERO:%.*]] = and i8 [[NOT]], [[Y:%.*]]
+; CHECK-NEXT: [[ISNEG:%.*]] = icmp slt i8 [[X:%.*]], 0
+; CHECK-NEXT: [[POS_OR_ZERO:%.*]] = select i1 [[ISNEG]], i8 0, i8 [[Y:%.*]]
; CHECK-NEXT: ret i8 [[POS_OR_ZERO]]
;
%sign = ashr i8 %x, 7
@@ -1477,9 +1476,8 @@ define i8 @not_ashr_bitwidth_mask(i8 %x, i8 %y) {
define <2 x i8> @not_ashr_bitwidth_mask_vec_commute(<2 x i8> %x, <2 x i8> %py) {
; CHECK-LABEL: @not_ashr_bitwidth_mask_vec_commute(
; CHECK-NEXT: [[Y:%.*]] = mul <2 x i8> [[PY:%.*]], <i8 42, i8 2>
-; CHECK-NEXT: [[SIGN:%.*]] = ashr <2 x i8> [[X:%.*]], <i8 7, i8 7>
-; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i8> [[SIGN]], <i8 -1, i8 -1>
-; CHECK-NEXT: [[POS_OR_ZERO:%.*]] = and <2 x i8> [[Y]], [[NOT]]
+; CHECK-NEXT: [[ISNEG:%.*]] = icmp slt <2 x i8> [[X:%.*]], zeroinitializer
+; CHECK-NEXT: [[POS_OR_ZERO:%.*]] = select <2 x i1> [[ISNEG]], <2 x i8> zeroinitializer, <2 x i8> [[Y]]
; CHECK-NEXT: ret <2 x i8> [[POS_OR_ZERO]]
;
%y = mul <2 x i8> %py, <i8 42, i8 2> ; thwart complexity-based ordering
@@ -1489,12 +1487,14 @@ define <2 x i8> @not_ashr_bitwidth_mask_vec_commute(<2 x i8> %x, <2 x i8> %py) {
ret <2 x i8> %pos_or_zero
}
+; extra use of shift is ok
+
define i8 @not_ashr_bitwidth_mask_use1(i8 %x, i8 %y) {
; CHECK-LABEL: @not_ashr_bitwidth_mask_use1(
; CHECK-NEXT: [[SIGN:%.*]] = ashr i8 [[X:%.*]], 7
; CHECK-NEXT: call void @use8(i8 [[SIGN]])
-; CHECK-NEXT: [[NOT:%.*]] = xor i8 [[SIGN]], -1
-; CHECK-NEXT: [[R:%.*]] = and i8 [[NOT]], [[Y:%.*]]
+; CHECK-NEXT: [[ISNEG:%.*]] = icmp slt i8 [[X]], 0
+; CHECK-NEXT: [[R:%.*]] = select i1 [[ISNEG]], i8 0, i8 [[Y:%.*]]
; CHECK-NEXT: ret i8 [[R]]
;
%sign = ashr i8 %x, 7
@@ -1504,6 +1504,8 @@ define i8 @not_ashr_bitwidth_mask_use1(i8 %x, i8 %y) {
ret i8 %r
}
+; negative test - extra use
+
define i8 @not_ashr_bitwidth_mask_use2(i8 %x, i8 %y) {
; CHECK-LABEL: @not_ashr_bitwidth_mask_use2(
; CHECK-NEXT: [[SIGN:%.*]] = ashr i8 [[X:%.*]], 7
@@ -1519,6 +1521,8 @@ define i8 @not_ashr_bitwidth_mask_use2(i8 %x, i8 %y) {
ret i8 %r
}
+; negative test - wrong shift amount
+
define i8 @not_ashr_not_bitwidth_mask(i8 %x, i8 %y) {
; CHECK-LABEL: @not_ashr_not_bitwidth_mask(
; CHECK-NEXT: [[SIGN:%.*]] = ashr i8 [[X:%.*]], 6
@@ -1532,6 +1536,8 @@ define i8 @not_ashr_not_bitwidth_mask(i8 %x, i8 %y) {
ret i8 %r
}
+; negative test - wrong shift opcode
+
define i8 @not_lshr_bitwidth_mask(i8 %x, i8 %y) {
; CHECK-LABEL: @not_lshr_bitwidth_mask(
; CHECK-NEXT: [[SIGN:%.*]] = lshr i8 [[X:%.*]], 7
diff --git a/llvm/test/Transforms/InstCombine/logical-select-inseltpoison.ll b/llvm/test/Transforms/InstCombine/logical-select-inseltpoison.ll
index 43e97cca05bab..2d05c264db825 100644
--- a/llvm/test/Transforms/InstCombine/logical-select-inseltpoison.ll
+++ b/llvm/test/Transforms/InstCombine/logical-select-inseltpoison.ll
@@ -582,9 +582,12 @@ define <4 x i32> @vec_sel_xor_multi_use(<4 x i32> %a, <4 x i32> %b, <4 x i1> %c)
define i32 @allSignBits(i32 %cond, i32 %tval, i32 %fval) {
; CHECK-LABEL: @allSignBits(
-; CHECK-NEXT: [[DOTNOT:%.*]] = icmp sgt i32 [[COND:%.*]], -1
-; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[DOTNOT]], i32 [[FVAL:%.*]], i32 [[TVAL:%.*]]
-; CHECK-NEXT: ret i32 [[TMP1]]
+; CHECK-NEXT: [[ISNEG1:%.*]] = icmp slt i32 [[COND:%.*]], 0
+; CHECK-NEXT: [[A1:%.*]] = select i1 [[ISNEG1]], i32 [[TVAL:%.*]], i32 0
+; CHECK-NEXT: [[ISNEG:%.*]] = icmp slt i32 [[COND]], 0
+; CHECK-NEXT: [[A2:%.*]] = select i1 [[ISNEG]], i32 0, i32 [[FVAL:%.*]]
+; CHECK-NEXT: [[SEL:%.*]] = or i32 [[A1]], [[A2]]
+; CHECK-NEXT: ret i32 [[SEL]]
;
%bitmask = ashr i32 %cond, 31
%not_bitmask = xor i32 %bitmask, -1
@@ -596,9 +599,12 @@ define i32 @allSignBits(i32 %cond, i32 %tval, i32 %fval) {
define <4 x i8> @allSignBits_vec(<4 x i8> %cond, <4 x i8> %tval, <4 x i8> %fval) {
; CHECK-LABEL: @allSignBits_vec(
-; CHECK-NEXT: [[DOTNOT:%.*]] = icmp sgt <4 x i8> [[COND:%.*]], <i8 -1, i8 -1, i8 -1, i8 -1>
-; CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[DOTNOT]], <4 x i8> [[FVAL:%.*]], <4 x i8> [[TVAL:%.*]]
-; CHECK-NEXT: ret <4 x i8> [[TMP1]]
+; CHECK-NEXT: [[ISNEG1:%.*]] = icmp slt <4 x i8> [[COND:%.*]], zeroinitializer
+; CHECK-NEXT: [[A1:%.*]] = select <4 x i1> [[ISNEG1]], <4 x i8> [[TVAL:%.*]], <4 x i8> zeroinitializer
+; CHECK-NEXT: [[ISNEG:%.*]] = icmp slt <4 x i8> [[COND]], zeroinitializer
+; CHECK-NEXT: [[A2:%.*]] = select <4 x i1> [[ISNEG]], <4 x i8> zeroinitializer, <4 x i8> [[FVAL:%.*]]
+; CHECK-NEXT: [[SEL:%.*]] = or <4 x i8> [[A2]], [[A1]]
+; CHECK-NEXT: ret <4 x i8> [[SEL]]
;
%bitmask = ashr <4 x i8> %cond, <i8 7, i8 7, i8 7, i8 7>
%not_bitmask = xor <4 x i8> %bitmask, <i8 -1, i8 -1, i8 -1, i8 -1>
diff --git a/llvm/test/Transforms/InstCombine/logical-select.ll b/llvm/test/Transforms/InstCombine/logical-select.ll
index 5093f20cba0c4..c23f851bf8da7 100644
--- a/llvm/test/Transforms/InstCombine/logical-select.ll
+++ b/llvm/test/Transforms/InstCombine/logical-select.ll
@@ -583,9 +583,12 @@ define <4 x i32> @vec_sel_xor_multi_use(<4 x i32> %a, <4 x i32> %b, <4 x i1> %c)
define i32 @allSignBits(i32 %cond, i32 %tval, i32 %fval) {
; CHECK-LABEL: @allSignBits(
-; CHECK-NEXT: [[DOTNOT:%.*]] = icmp sgt i32 [[COND:%.*]], -1
-; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[DOTNOT]], i32 [[FVAL:%.*]], i32 [[TVAL:%.*]]
-; CHECK-NEXT: ret i32 [[TMP1]]
+; CHECK-NEXT: [[ISNEG1:%.*]] = icmp slt i32 [[COND:%.*]], 0
+; CHECK-NEXT: [[A1:%.*]] = select i1 [[ISNEG1]], i32 [[TVAL:%.*]], i32 0
+; CHECK-NEXT: [[ISNEG:%.*]] = icmp slt i32 [[COND]], 0
+; CHECK-NEXT: [[A2:%.*]] = select i1 [[ISNEG]], i32 0, i32 [[FVAL:%.*]]
+; CHECK-NEXT: [[SEL:%.*]] = or i32 [[A1]], [[A2]]
+; CHECK-NEXT: ret i32 [[SEL]]
;
%bitmask = ashr i32 %cond, 31
%not_bitmask = xor i32 %bitmask, -1
@@ -597,9 +600,12 @@ define i32 @allSignBits(i32 %cond, i32 %tval, i32 %fval) {
define <4 x i8> @allSignBits_vec(<4 x i8> %cond, <4 x i8> %tval, <4 x i8> %fval) {
; CHECK-LABEL: @allSignBits_vec(
-; CHECK-NEXT: [[DOTNOT:%.*]] = icmp sgt <4 x i8> [[COND:%.*]], <i8 -1, i8 -1, i8 -1, i8 -1>
-; CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[DOTNOT]], <4 x i8> [[FVAL:%.*]], <4 x i8> [[TVAL:%.*]]
-; CHECK-NEXT: ret <4 x i8> [[TMP1]]
+; CHECK-NEXT: [[ISNEG1:%.*]] = icmp slt <4 x i8> [[COND:%.*]], zeroinitializer
+; CHECK-NEXT: [[A1:%.*]] = select <4 x i1> [[ISNEG1]], <4 x i8> [[TVAL:%.*]], <4 x i8> zeroinitializer
+; CHECK-NEXT: [[ISNEG:%.*]] = icmp slt <4 x i8> [[COND]], zeroinitializer
+; CHECK-NEXT: [[A2:%.*]] = select <4 x i1> [[ISNEG]], <4 x i8> zeroinitializer, <4 x i8> [[FVAL:%.*]]
+; CHECK-NEXT: [[SEL:%.*]] = or <4 x i8> [[A2]], [[A1]]
+; CHECK-NEXT: ret <4 x i8> [[SEL]]
;
%bitmask = ashr <4 x i8> %cond, <i8 7, i8 7, i8 7, i8 7>
%not_bitmask = xor <4 x i8> %bitmask, <i8 -1, i8 -1, i8 -1, i8 -1>
diff --git a/llvm/test/Transforms/InstCombine/vec_sext.ll b/llvm/test/Transforms/InstCombine/vec_sext.ll
index 39bd40874160b..93107e38365ee 100644
--- a/llvm/test/Transforms/InstCombine/vec_sext.ll
+++ b/llvm/test/Transforms/InstCombine/vec_sext.ll
@@ -4,9 +4,12 @@
define <4 x i32> @vec_select(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: @vec_select(
; CHECK-NEXT: [[SUB:%.*]] = sub nsw <4 x i32> zeroinitializer, [[A:%.*]]
-; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[B:%.*]], <i32 -1, i32 -1, i32 -1, i32 -1>
-; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[A]], <4 x i32> [[SUB]]
-; CHECK-NEXT: ret <4 x i32> [[TMP2]]
+; CHECK-NEXT: [[ISNEG:%.*]] = icmp slt <4 x i32> [[B:%.*]], zeroinitializer
+; CHECK-NEXT: [[T2:%.*]] = select <4 x i1> [[ISNEG]], <4 x i32> zeroinitializer, <4 x i32> [[A]]
+; CHECK-NEXT: [[ISNEG1:%.*]] = icmp slt <4 x i32> [[B]], zeroinitializer
+; CHECK-NEXT: [[T3:%.*]] = select <4 x i1> [[ISNEG1]], <4 x i32> [[SUB]], <4 x i32> zeroinitializer
+; CHECK-NEXT: [[COND:%.*]] = or <4 x i32> [[T2]], [[T3]]
+; CHECK-NEXT: ret <4 x i32> [[COND]]
;
%cmp = icmp slt <4 x i32> %b, zeroinitializer
%sext = sext <4 x i1> %cmp to <4 x i32>
@@ -23,9 +26,12 @@ define <4 x i32> @vec_select(<4 x i32> %a, <4 x i32> %b) {
define <4 x i32> @vec_select_alternate_sign_bit_test(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: @vec_select_alternate_sign_bit_test(
; CHECK-NEXT: [[SUB:%.*]] = sub nsw <4 x i32> zeroinitializer, [[A:%.*]]
-; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[B:%.*]], <i32 -1, i32 -1, i32 -1, i32 -1>
-; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[SUB]], <4 x i32> [[A]]
-; CHECK-NEXT: ret <4 x i32> [[TMP2]]
+; CHECK-NEXT: [[ISNEG1:%.*]] = icmp slt <4 x i32> [[B:%.*]], zeroinitializer
+; CHECK-NEXT: [[T2:%.*]] = select <4 x i1> [[ISNEG1]], <4 x i32> [[A]], <4 x i32> zeroinitializer
+; CHECK-NEXT: [[ISNEG:%.*]] = icmp slt <4 x i32> [[B]], zeroinitializer
+; CHECK-NEXT: [[T3:%.*]] = select <4 x i1> [[ISNEG]], <4 x i32> zeroinitializer, <4 x i32> [[SUB]]
+; CHECK-NEXT: [[COND:%.*]] = or <4 x i32> [[T2]], [[T3]]
+; CHECK-NEXT: ret <4 x i32> [[COND]]
;
%cmp = icmp sgt <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
%sext = sext <4 x i1> %cmp to <4 x i32>
diff --git a/llvm/test/Transforms/PhaseOrdering/vector-select.ll b/llvm/test/Transforms/PhaseOrdering/vector-select.ll
index 3533c9e846a94..84876b24c5d9b 100644
--- a/llvm/test/Transforms/PhaseOrdering/vector-select.ll
+++ b/llvm/test/Transforms/PhaseOrdering/vector-select.ll
@@ -3,16 +3,9 @@
define <3 x float> @PR52631(<3 x float> %a, <3 x float> %b, <3 x i32> %c) {
; CHECK-LABEL: @PR52631(
-; CHECK-NEXT: [[ASTYPE:%.*]] = bitcast <3 x float> [[B:%.*]] to <3 x i32>
-; CHECK-NEXT: [[ISNEG:%.*]] = icmp slt <3 x i32> [[C:%.*]], zeroinitializer
-; CHECK-NEXT: [[AND:%.*]] = select <3 x i1> [[ISNEG]], <3 x i32> [[ASTYPE]], <3 x i32> zeroinitializer
-; CHECK-NEXT: [[C_LOBIT2:%.*]] = ashr <3 x i32> [[C]], <i32 31, i32 31, i32 31>
-; CHECK-NEXT: [[C_LOBIT2_NOT:%.*]] = xor <3 x i32> [[C_LOBIT2]], <i32 -1, i32 -1, i32 -1>
-; CHECK-NEXT: [[ASTYPE28:%.*]] = bitcast <3 x float> [[A:%.*]] to <3 x i32>
-; CHECK-NEXT: [[AND29:%.*]] = and <3 x i32> [[C_LOBIT2_NOT]], [[ASTYPE28]]
-; CHECK-NEXT: [[OR:%.*]] = or <3 x i32> [[AND29]], [[AND]]
-; CHECK-NEXT: [[ASTYPE33:%.*]] = bitcast <3 x i32> [[OR]] to <3 x float>
-; CHECK-NEXT: ret <3 x float> [[ASTYPE33]]
+; CHECK-NEXT: [[ISNEG3:%.*]] = icmp slt <3 x i32> [[C:%.*]], zeroinitializer
+; CHECK-NEXT: [[OR_V:%.*]] = select <3 x i1> [[ISNEG3]], <3 x float> [[B:%.*]], <3 x float> [[A:%.*]]
+; CHECK-NEXT: ret <3 x float> [[OR_V]]
;
%a.addr = alloca <3 x float>, align 16
%b.addr = alloca <3 x float>, align 16
@@ -85,9 +78,9 @@ define <3 x float> @PR52631(<3 x float> %a, <3 x float> %b, <3 x i32> %c) {
define <4 x i8> @allSignBits_vec(<4 x i8> %cond, <4 x i8> %tval, <4 x i8> %fval) {
; CHECK-LABEL: @allSignBits_vec(
-; CHECK-NEXT: [[DOTNOT:%.*]] = icmp sgt <4 x i8> [[COND:%.*]], <i8 -1, i8 -1, i8 -1, i8 -1>
-; CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[DOTNOT]], <4 x i8> [[FVAL:%.*]], <4 x i8> [[TVAL:%.*]]
-; CHECK-NEXT: ret <4 x i8> [[TMP1]]
+; CHECK-NEXT: [[ISNEG1:%.*]] = icmp slt <4 x i8> [[COND:%.*]], zeroinitializer
+; CHECK-NEXT: [[SEL:%.*]] = select <4 x i1> [[ISNEG1]], <4 x i8> [[TVAL:%.*]], <4 x i8> [[FVAL:%.*]]
+; CHECK-NEXT: ret <4 x i8> [[SEL]]
;
%bitmask = ashr <4 x i8> %cond, <i8 7, i8 7, i8 7, i8 7>
%not_bitmask = xor <4 x i8> %bitmask, <i8 -1, i8 -1, i8 -1, i8 -1>
More information about the cfe-commits
mailing list