[llvm] [ARM] hasAndNot in ARM supports vectors. (PR #156488)

Tue Sep 2 10:52:59 PDT 2025

https://github.com/AZero13 updated https://github.com/llvm/llvm-project/pull/156488

>From 284ef3f868a4956fab774ff0c09932a944344e00 Mon Sep 17 00:00:00 2001
From: AZero13 <gfunni234 at gmail.com>
Date: Tue, 2 Sep 2025 12:40:27 -0400
Subject: [PATCH] [ARM] hasAndNot in ARM supports vectors.

---
 llvm/lib/Target/ARM/ARMISelLowering.cpp        | 12 ++++++++++++
 llvm/lib/Target/ARM/ARMISelLowering.h          |  2 ++
 .../CodeGen/Thumb2/mve-vselect-constants.ll    | 18 ++++++------------
 3 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index b5c01eafcf108..221d5ae0054ec 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -16125,6 +16125,18 @@ static bool TryCombineBaseUpdate(struct BaseUpdateTarget &Target,
   return true;
 }
 
+bool ARMTargetLowering::hasAndNot(SDValue Y) const {
+  EVT VT = Y.getValueType();
+
+  if (!VT.isVector())
+    return hasAndNotCompare(Y);
+
+  if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps())
+    return VT.getFixedSizeInBits() >= 64; // vector 'bic'
+
+  return false;
+}
+
 // If (opcode ptr inc) is and ADD-like instruction, return the
 // increment value. Otherwise return 0.
 static unsigned getPointerConstIncrement(unsigned Opcode, SDValue Ptr,
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index 196ecb1b9f678..c36be449b1a4a 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -611,6 +611,8 @@ class VectorType;
 
     bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
 
+    bool hasAndNot(SDValue Y) const override;
+
     bool hasAndNotCompare(SDValue V) const override {
       // We can use bics for any scalar.
       return V.getValueType().isScalarInteger();
diff --git a/llvm/test/CodeGen/Thumb2/mve-vselect-constants.ll b/llvm/test/CodeGen/Thumb2/mve-vselect-constants.ll
index 024de2b36667b..529e907537ba4 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vselect-constants.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vselect-constants.ll
@@ -240,10 +240,8 @@ define arm_aapcs_vfpcc <2 x i64> @signbit_setmask_v2i64(<2 x i64> %a, <2 x i64>
 define arm_aapcs_vfpcc <16 x i8> @not_signbit_mask_v16i8(<16 x i8> %a, <16 x i8> %b) {
 ; CHECK-LABEL: not_signbit_mask_v16i8:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    vmov.i8 q2, #0xff
-; CHECK-NEXT:    vmov.i32 q3, #0x0
-; CHECK-NEXT:    vcmp.s8 gt, q0, q2
-; CHECK-NEXT:    vpsel q0, q1, q3
+; CHECK-NEXT:    vshr.s8 q0, q0, #7
+; CHECK-NEXT:    vbic q0, q1, q0
 ; CHECK-NEXT:    bx lr
   %cond = icmp sgt <16 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
   %r = select <16 x i1> %cond, <16 x i8> %b, <16 x i8> zeroinitializer
@@ -253,10 +251,8 @@ define arm_aapcs_vfpcc <16 x i8> @not_signbit_mask_v16i8(<16 x i8> %a, <16 x i8>
 define arm_aapcs_vfpcc <8 x i16> @not_signbit_mask_v8i16(<8 x i16> %a, <8 x i16> %b) {
 ; CHECK-LABEL: not_signbit_mask_v8i16:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    vmov.i8 q2, #0xff
-; CHECK-NEXT:    vmov.i32 q3, #0x0
-; CHECK-NEXT:    vcmp.s16 gt, q0, q2
-; CHECK-NEXT:    vpsel q0, q1, q3
+; CHECK-NEXT:    vshr.s16 q0, q0, #15
+; CHECK-NEXT:    vbic q0, q1, q0
 ; CHECK-NEXT:    bx lr
   %cond = icmp sgt <8 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
   %r = select <8 x i1> %cond, <8 x i16> %b, <8 x i16> zeroinitializer
@@ -266,10 +262,8 @@ define arm_aapcs_vfpcc <8 x i16> @not_signbit_mask_v8i16(<8 x i16> %a, <8 x i16>
 define arm_aapcs_vfpcc <4 x i32> @not_signbit_mask_v4i32(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: not_signbit_mask_v4i32:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    vmov.i8 q2, #0xff
-; CHECK-NEXT:    vmov.i32 q3, #0x0
-; CHECK-NEXT:    vcmp.s32 gt, q0, q2
-; CHECK-NEXT:    vpsel q0, q1, q3
+; CHECK-NEXT:    vshr.s32 q0, q0, #31
+; CHECK-NEXT:    vbic q0, q1, q0
 ; CHECK-NEXT:    bx lr
   %cond = icmp sgt <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1>
   %r = select <4 x i1> %cond, <4 x i32> %b, <4 x i32> zeroinitializer