[clang] 1a60ae0 - [InstCombine] fold mask-with-signbit-splat to icmp+select

Tue Dec 14 13:02:02 PST 2021

Author: Sanjay Patel
Date: 2021-12-14T16:00:42-05:00
New Revision: 1a60ae02c65d26981017f59bc5918d3c2e363bfd

URL: https://github.com/llvm/llvm-project/commit/1a60ae02c65d26981017f59bc5918d3c2e363bfd
DIFF: https://github.com/llvm/llvm-project/commit/1a60ae02c65d26981017f59bc5918d3c2e363bfd.diff

LOG: [InstCombine] fold mask-with-signbit-splat to icmp+select

~(iN X s>> (N-1)) & Y --> (X s< 0) ? 0 : Y

https://alive2.llvm.org/ce/z/JKlQ9x

This is similar to D111410 / 727e642e970d028049d ,
but it includes a 'not' of the signbit and so it
saves an instruction in the basic pattern.

DAGCombiner or target-specific folds can expand
this back into bit-hacks.

The diffs in the logical-select tests are not true
regressions - running early-cse and another round
of instcombine is expected in a normal opt pipeline,
and that reduces back to a minimal form as shown
in the duplicated PhaseOrdering test.

I have no understanding of the SystemZ diffs, so
I made the minimal edits suggested by FileCheck to
make that test pass again. That whole test file is
wrong though. It is running the entire optimizer (-O2)
to check IR, and then topping that by even running
codegen and checking asm. It needs to be split up.

Fixes #52631

Added: 
    

Modified: 
    clang/test/CodeGen/SystemZ/builtins-systemz-zvector.c
    llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
    llvm/test/Transforms/InstCombine/and.ll
    llvm/test/Transforms/InstCombine/logical-select-inseltpoison.ll
    llvm/test/Transforms/InstCombine/logical-select.ll
    llvm/test/Transforms/InstCombine/vec_sext.ll
    llvm/test/Transforms/PhaseOrdering/vector-select.ll

Removed: 
    


################################################################################
diff  --git a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector.c b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector.c
index 7cd4a951741f0..38f0c2908825a 100644

--- a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector.c
+++ b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector.c
@@ -3289,13 +3289,13 @@ void test_integer(void) {
   // CHECK-ASM: vsrlb
 
   vsc = vec_abs(vsc);
-  // CHECK-ASM: vlpb
+  // CHECK-ASM: vlcb
   vss = vec_abs(vss);
-  // CHECK-ASM: vlph
+  // CHECK-ASM: vlch
   vsi = vec_abs(vsi);
-  // CHECK-ASM: vlpf
+  // CHECK-ASM: vlcf
   vsl = vec_abs(vsl);
-  // CHECK-ASM: vlpg
+  // CHECK-ASM: vlcg
 
   vsc = vec_max(vsc, vsc);
   // CHECK-ASM: vmxb

diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 9023619b14280..08cd1a7f97e60 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -2133,6 +2133,15 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {
     Value *Cmp = Builder.CreateICmpSLT(X, Zero, "isneg");
     return SelectInst::Create(Cmp, Y, Zero);
   }
+  // If there's a 'not' of the shifted value, swap the select operands:
+  // ~(iN X s>> (N-1)) & Y --> (X s< 0) ? 0 : Y
+  if (match(&I, m_c_And(m_OneUse(m_Not(
+                            m_AShr(m_Value(X), m_SpecificInt(FullShift)))),
+                        m_Value(Y)))) {
+    Constant *Zero = ConstantInt::getNullValue(Ty);
+    Value *Cmp = Builder.CreateICmpSLT(X, Zero, "isneg");
+    return SelectInst::Create(Cmp, Zero, Y);
+  }
 
   // (~x) & y  -->  ~(x | (~y))  iff that gets rid of inversions
   if (sinkNotIntoOtherHandOfAndOrOr(I))

diff  --git a/llvm/test/Transforms/InstCombine/and.ll b/llvm/test/Transforms/InstCombine/and.ll
index edaef78b631d8..53c7f09189ff5 100644
--- a/llvm/test/Transforms/InstCombine/and.ll
+++ b/llvm/test/Transforms/InstCombine/and.ll
@@ -1463,9 +1463,8 @@ define i8 @lshr_bitwidth_mask(i8 %x, i8 %y) {
 
 define i8 @not_ashr_bitwidth_mask(i8 %x, i8 %y) {
 ; CHECK-LABEL: @not_ashr_bitwidth_mask(
-; CHECK-NEXT:    [[SIGN:%.*]] = ashr i8 [[X:%.*]], 7
-; CHECK-NEXT:    [[NOT:%.*]] = xor i8 [[SIGN]], -1
-; CHECK-NEXT:    [[POS_OR_ZERO:%.*]] = and i8 [[NOT]], [[Y:%.*]]
+; CHECK-NEXT:    [[ISNEG:%.*]] = icmp slt i8 [[X:%.*]], 0
+; CHECK-NEXT:    [[POS_OR_ZERO:%.*]] = select i1 [[ISNEG]], i8 0, i8 [[Y:%.*]]
 ; CHECK-NEXT:    ret i8 [[POS_OR_ZERO]]
 ;
   %sign = ashr i8 %x, 7
@@ -1477,9 +1476,8 @@ define i8 @not_ashr_bitwidth_mask(i8 %x, i8 %y) {
 define <2 x i8> @not_ashr_bitwidth_mask_vec_commute(<2 x i8> %x, <2 x i8> %py) {
 ; CHECK-LABEL: @not_ashr_bitwidth_mask_vec_commute(
 ; CHECK-NEXT:    [[Y:%.*]] = mul <2 x i8> [[PY:%.*]], <i8 42, i8 2>
-; CHECK-NEXT:    [[SIGN:%.*]] = ashr <2 x i8> [[X:%.*]], <i8 7, i8 7>
-; CHECK-NEXT:    [[NOT:%.*]] = xor <2 x i8> [[SIGN]], <i8 -1, i8 -1>
-; CHECK-NEXT:    [[POS_OR_ZERO:%.*]] = and <2 x i8> [[Y]], [[NOT]]
+; CHECK-NEXT:    [[ISNEG:%.*]] = icmp slt <2 x i8> [[X:%.*]], zeroinitializer
+; CHECK-NEXT:    [[POS_OR_ZERO:%.*]] = select <2 x i1> [[ISNEG]], <2 x i8> zeroinitializer, <2 x i8> [[Y]]
 ; CHECK-NEXT:    ret <2 x i8> [[POS_OR_ZERO]]
 ;
   %y = mul <2 x i8> %py, <i8 42, i8 2>      ; thwart complexity-based ordering
@@ -1489,12 +1487,14 @@ define <2 x i8> @not_ashr_bitwidth_mask_vec_commute(<2 x i8> %x, <2 x i8> %py) {
   ret <2 x i8> %pos_or_zero
 }
 
+; extra use of shift is ok
+
 define i8 @not_ashr_bitwidth_mask_use1(i8 %x, i8 %y) {
 ; CHECK-LABEL: @not_ashr_bitwidth_mask_use1(
 ; CHECK-NEXT:    [[SIGN:%.*]] = ashr i8 [[X:%.*]], 7
 ; CHECK-NEXT:    call void @use8(i8 [[SIGN]])
-; CHECK-NEXT:    [[NOT:%.*]] = xor i8 [[SIGN]], -1
-; CHECK-NEXT:    [[R:%.*]] = and i8 [[NOT]], [[Y:%.*]]
+; CHECK-NEXT:    [[ISNEG:%.*]] = icmp slt i8 [[X]], 0
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[ISNEG]], i8 0, i8 [[Y:%.*]]
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
   %sign = ashr i8 %x, 7
@@ -1504,6 +1504,8 @@ define i8 @not_ashr_bitwidth_mask_use1(i8 %x, i8 %y) {
   ret i8 %r
 }
 
+; negative test - extra use
+
 define i8 @not_ashr_bitwidth_mask_use2(i8 %x, i8 %y) {
 ; CHECK-LABEL: @not_ashr_bitwidth_mask_use2(
 ; CHECK-NEXT:    [[SIGN:%.*]] = ashr i8 [[X:%.*]], 7
@@ -1519,6 +1521,8 @@ define i8 @not_ashr_bitwidth_mask_use2(i8 %x, i8 %y) {
   ret i8 %r
 }
 
+; negative test - wrong shift amount
+
 define i8 @not_ashr_not_bitwidth_mask(i8 %x, i8 %y) {
 ; CHECK-LABEL: @not_ashr_not_bitwidth_mask(
 ; CHECK-NEXT:    [[SIGN:%.*]] = ashr i8 [[X:%.*]], 6
@@ -1532,6 +1536,8 @@ define i8 @not_ashr_not_bitwidth_mask(i8 %x, i8 %y) {
   ret i8 %r
 }
 
+; negative test - wrong shift opcode
+
 define i8 @not_lshr_bitwidth_mask(i8 %x, i8 %y) {
 ; CHECK-LABEL: @not_lshr_bitwidth_mask(
 ; CHECK-NEXT:    [[SIGN:%.*]] = lshr i8 [[X:%.*]], 7

diff  --git a/llvm/test/Transforms/InstCombine/logical-select-inseltpoison.ll b/llvm/test/Transforms/InstCombine/logical-select-inseltpoison.ll
index 43e97cca05bab..2d05c264db825 100644
--- a/llvm/test/Transforms/InstCombine/logical-select-inseltpoison.ll
+++ b/llvm/test/Transforms/InstCombine/logical-select-inseltpoison.ll
@@ -582,9 +582,12 @@ define <4 x i32> @vec_sel_xor_multi_use(<4 x i32> %a, <4 x i32> %b, <4 x i1> %c)
 
 define i32 @allSignBits(i32 %cond, i32 %tval, i32 %fval) {
 ; CHECK-LABEL: @allSignBits(
-; CHECK-NEXT:    [[DOTNOT:%.*]] = icmp sgt i32 [[COND:%.*]], -1
-; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[DOTNOT]], i32 [[FVAL:%.*]], i32 [[TVAL:%.*]]
-; CHECK-NEXT:    ret i32 [[TMP1]]
+; CHECK-NEXT:    [[ISNEG1:%.*]] = icmp slt i32 [[COND:%.*]], 0
+; CHECK-NEXT:    [[A1:%.*]] = select i1 [[ISNEG1]], i32 [[TVAL:%.*]], i32 0
+; CHECK-NEXT:    [[ISNEG:%.*]] = icmp slt i32 [[COND]], 0
+; CHECK-NEXT:    [[A2:%.*]] = select i1 [[ISNEG]], i32 0, i32 [[FVAL:%.*]]
+; CHECK-NEXT:    [[SEL:%.*]] = or i32 [[A1]], [[A2]]
+; CHECK-NEXT:    ret i32 [[SEL]]
 ;
   %bitmask = ashr i32 %cond, 31
   %not_bitmask = xor i32 %bitmask, -1
@@ -596,9 +599,12 @@ define i32 @allSignBits(i32 %cond, i32 %tval, i32 %fval) {
 
 define <4 x i8> @allSignBits_vec(<4 x i8> %cond, <4 x i8> %tval, <4 x i8> %fval) {
 ; CHECK-LABEL: @allSignBits_vec(
-; CHECK-NEXT:    [[DOTNOT:%.*]] = icmp sgt <4 x i8> [[COND:%.*]], <i8 -1, i8 -1, i8 -1, i8 -1>
-; CHECK-NEXT:    [[TMP1:%.*]] = select <4 x i1> [[DOTNOT]], <4 x i8> [[FVAL:%.*]], <4 x i8> [[TVAL:%.*]]
-; CHECK-NEXT:    ret <4 x i8> [[TMP1]]
+; CHECK-NEXT:    [[ISNEG1:%.*]] = icmp slt <4 x i8> [[COND:%.*]], zeroinitializer
+; CHECK-NEXT:    [[A1:%.*]] = select <4 x i1> [[ISNEG1]], <4 x i8> [[TVAL:%.*]], <4 x i8> zeroinitializer
+; CHECK-NEXT:    [[ISNEG:%.*]] = icmp slt <4 x i8> [[COND]], zeroinitializer
+; CHECK-NEXT:    [[A2:%.*]] = select <4 x i1> [[ISNEG]], <4 x i8> zeroinitializer, <4 x i8> [[FVAL:%.*]]
+; CHECK-NEXT:    [[SEL:%.*]] = or <4 x i8> [[A2]], [[A1]]
+; CHECK-NEXT:    ret <4 x i8> [[SEL]]
 ;
   %bitmask = ashr <4 x i8> %cond, <i8 7, i8 7, i8 7, i8 7>
   %not_bitmask = xor <4 x i8> %bitmask, <i8 -1, i8 -1, i8 -1, i8 -1>

diff  --git a/llvm/test/Transforms/InstCombine/logical-select.ll b/llvm/test/Transforms/InstCombine/logical-select.ll
index 5093f20cba0c4..c23f851bf8da7 100644
--- a/llvm/test/Transforms/InstCombine/logical-select.ll
+++ b/llvm/test/Transforms/InstCombine/logical-select.ll
@@ -583,9 +583,12 @@ define <4 x i32> @vec_sel_xor_multi_use(<4 x i32> %a, <4 x i32> %b, <4 x i1> %c)
 
 define i32 @allSignBits(i32 %cond, i32 %tval, i32 %fval) {
 ; CHECK-LABEL: @allSignBits(
-; CHECK-NEXT:    [[DOTNOT:%.*]] = icmp sgt i32 [[COND:%.*]], -1
-; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[DOTNOT]], i32 [[FVAL:%.*]], i32 [[TVAL:%.*]]
-; CHECK-NEXT:    ret i32 [[TMP1]]
+; CHECK-NEXT:    [[ISNEG1:%.*]] = icmp slt i32 [[COND:%.*]], 0
+; CHECK-NEXT:    [[A1:%.*]] = select i1 [[ISNEG1]], i32 [[TVAL:%.*]], i32 0
+; CHECK-NEXT:    [[ISNEG:%.*]] = icmp slt i32 [[COND]], 0
+; CHECK-NEXT:    [[A2:%.*]] = select i1 [[ISNEG]], i32 0, i32 [[FVAL:%.*]]
+; CHECK-NEXT:    [[SEL:%.*]] = or i32 [[A1]], [[A2]]
+; CHECK-NEXT:    ret i32 [[SEL]]
 ;
   %bitmask = ashr i32 %cond, 31
   %not_bitmask = xor i32 %bitmask, -1
@@ -597,9 +600,12 @@ define i32 @allSignBits(i32 %cond, i32 %tval, i32 %fval) {
 
 define <4 x i8> @allSignBits_vec(<4 x i8> %cond, <4 x i8> %tval, <4 x i8> %fval) {
 ; CHECK-LABEL: @allSignBits_vec(
-; CHECK-NEXT:    [[DOTNOT:%.*]] = icmp sgt <4 x i8> [[COND:%.*]], <i8 -1, i8 -1, i8 -1, i8 -1>
-; CHECK-NEXT:    [[TMP1:%.*]] = select <4 x i1> [[DOTNOT]], <4 x i8> [[FVAL:%.*]], <4 x i8> [[TVAL:%.*]]
-; CHECK-NEXT:    ret <4 x i8> [[TMP1]]
+; CHECK-NEXT:    [[ISNEG1:%.*]] = icmp slt <4 x i8> [[COND:%.*]], zeroinitializer
+; CHECK-NEXT:    [[A1:%.*]] = select <4 x i1> [[ISNEG1]], <4 x i8> [[TVAL:%.*]], <4 x i8> zeroinitializer
+; CHECK-NEXT:    [[ISNEG:%.*]] = icmp slt <4 x i8> [[COND]], zeroinitializer
+; CHECK-NEXT:    [[A2:%.*]] = select <4 x i1> [[ISNEG]], <4 x i8> zeroinitializer, <4 x i8> [[FVAL:%.*]]
+; CHECK-NEXT:    [[SEL:%.*]] = or <4 x i8> [[A2]], [[A1]]
+; CHECK-NEXT:    ret <4 x i8> [[SEL]]
 ;
   %bitmask = ashr <4 x i8> %cond, <i8 7, i8 7, i8 7, i8 7>
   %not_bitmask = xor <4 x i8> %bitmask, <i8 -1, i8 -1, i8 -1, i8 -1>

diff  --git a/llvm/test/Transforms/InstCombine/vec_sext.ll b/llvm/test/Transforms/InstCombine/vec_sext.ll
index 39bd40874160b..93107e38365ee 100644
--- a/llvm/test/Transforms/InstCombine/vec_sext.ll
+++ b/llvm/test/Transforms/InstCombine/vec_sext.ll
@@ -4,9 +4,12 @@
 define <4 x i32> @vec_select(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: @vec_select(
 ; CHECK-NEXT:    [[SUB:%.*]] = sub nsw <4 x i32> zeroinitializer, [[A:%.*]]
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i32> [[B:%.*]], <i32 -1, i32 -1, i32 -1, i32 -1>
-; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[A]], <4 x i32> [[SUB]]
-; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+; CHECK-NEXT:    [[ISNEG:%.*]] = icmp slt <4 x i32> [[B:%.*]], zeroinitializer
+; CHECK-NEXT:    [[T2:%.*]] = select <4 x i1> [[ISNEG]], <4 x i32> zeroinitializer, <4 x i32> [[A]]
+; CHECK-NEXT:    [[ISNEG1:%.*]] = icmp slt <4 x i32> [[B]], zeroinitializer
+; CHECK-NEXT:    [[T3:%.*]] = select <4 x i1> [[ISNEG1]], <4 x i32> [[SUB]], <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[COND:%.*]] = or <4 x i32> [[T2]], [[T3]]
+; CHECK-NEXT:    ret <4 x i32> [[COND]]
 ;
   %cmp = icmp slt <4 x i32> %b, zeroinitializer
   %sext = sext <4 x i1> %cmp to <4 x i32>
@@ -23,9 +26,12 @@ define <4 x i32> @vec_select(<4 x i32> %a, <4 x i32> %b) {
 define <4 x i32> @vec_select_alternate_sign_bit_test(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: @vec_select_alternate_sign_bit_test(
 ; CHECK-NEXT:    [[SUB:%.*]] = sub nsw <4 x i32> zeroinitializer, [[A:%.*]]
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i32> [[B:%.*]], <i32 -1, i32 -1, i32 -1, i32 -1>
-; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[SUB]], <4 x i32> [[A]]
-; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+; CHECK-NEXT:    [[ISNEG1:%.*]] = icmp slt <4 x i32> [[B:%.*]], zeroinitializer
+; CHECK-NEXT:    [[T2:%.*]] = select <4 x i1> [[ISNEG1]], <4 x i32> [[A]], <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[ISNEG:%.*]] = icmp slt <4 x i32> [[B]], zeroinitializer
+; CHECK-NEXT:    [[T3:%.*]] = select <4 x i1> [[ISNEG]], <4 x i32> zeroinitializer, <4 x i32> [[SUB]]
+; CHECK-NEXT:    [[COND:%.*]] = or <4 x i32> [[T2]], [[T3]]
+; CHECK-NEXT:    ret <4 x i32> [[COND]]
 ;
   %cmp = icmp sgt <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
   %sext = sext <4 x i1> %cmp to <4 x i32>

diff  --git a/llvm/test/Transforms/PhaseOrdering/vector-select.ll b/llvm/test/Transforms/PhaseOrdering/vector-select.ll
index 3533c9e846a94..84876b24c5d9b 100644
--- a/llvm/test/Transforms/PhaseOrdering/vector-select.ll
+++ b/llvm/test/Transforms/PhaseOrdering/vector-select.ll
@@ -3,16 +3,9 @@
 
 define <3 x float> @PR52631(<3 x float> %a, <3 x float> %b, <3 x i32> %c) {
 ; CHECK-LABEL: @PR52631(
-; CHECK-NEXT:    [[ASTYPE:%.*]] = bitcast <3 x float> [[B:%.*]] to <3 x i32>
-; CHECK-NEXT:    [[ISNEG:%.*]] = icmp slt <3 x i32> [[C:%.*]], zeroinitializer
-; CHECK-NEXT:    [[AND:%.*]] = select <3 x i1> [[ISNEG]], <3 x i32> [[ASTYPE]], <3 x i32> zeroinitializer
-; CHECK-NEXT:    [[C_LOBIT2:%.*]] = ashr <3 x i32> [[C]], <i32 31, i32 31, i32 31>
-; CHECK-NEXT:    [[C_LOBIT2_NOT:%.*]] = xor <3 x i32> [[C_LOBIT2]], <i32 -1, i32 -1, i32 -1>
-; CHECK-NEXT:    [[ASTYPE28:%.*]] = bitcast <3 x float> [[A:%.*]] to <3 x i32>
-; CHECK-NEXT:    [[AND29:%.*]] = and <3 x i32> [[C_LOBIT2_NOT]], [[ASTYPE28]]
-; CHECK-NEXT:    [[OR:%.*]] = or <3 x i32> [[AND29]], [[AND]]
-; CHECK-NEXT:    [[ASTYPE33:%.*]] = bitcast <3 x i32> [[OR]] to <3 x float>
-; CHECK-NEXT:    ret <3 x float> [[ASTYPE33]]
+; CHECK-NEXT:    [[ISNEG3:%.*]] = icmp slt <3 x i32> [[C:%.*]], zeroinitializer
+; CHECK-NEXT:    [[OR_V:%.*]] = select <3 x i1> [[ISNEG3]], <3 x float> [[B:%.*]], <3 x float> [[A:%.*]]
+; CHECK-NEXT:    ret <3 x float> [[OR_V]]
 ;
   %a.addr = alloca <3 x float>, align 16
   %b.addr = alloca <3 x float>, align 16
@@ -85,9 +78,9 @@ define <3 x float> @PR52631(<3 x float> %a, <3 x float> %b, <3 x i32> %c) {
 
 define <4 x i8> @allSignBits_vec(<4 x i8> %cond, <4 x i8> %tval, <4 x i8> %fval) {
 ; CHECK-LABEL: @allSignBits_vec(
-; CHECK-NEXT:    [[DOTNOT:%.*]] = icmp sgt <4 x i8> [[COND:%.*]], <i8 -1, i8 -1, i8 -1, i8 -1>
-; CHECK-NEXT:    [[TMP1:%.*]] = select <4 x i1> [[DOTNOT]], <4 x i8> [[FVAL:%.*]], <4 x i8> [[TVAL:%.*]]
-; CHECK-NEXT:    ret <4 x i8> [[TMP1]]
+; CHECK-NEXT:    [[ISNEG1:%.*]] = icmp slt <4 x i8> [[COND:%.*]], zeroinitializer
+; CHECK-NEXT:    [[SEL:%.*]] = select <4 x i1> [[ISNEG1]], <4 x i8> [[TVAL:%.*]], <4 x i8> [[FVAL:%.*]]
+; CHECK-NEXT:    ret <4 x i8> [[SEL]]
 ;
   %bitmask = ashr <4 x i8> %cond, <i8 7, i8 7, i8 7, i8 7>
   %not_bitmask = xor <4 x i8> %bitmask, <i8 -1, i8 -1, i8 -1, i8 -1>