[llvm] 6f26867 - [AArch64][SVE2] Generate SVE2 BSL instruction in LLVM for add/sub. (#88413)

Fri Apr 19 02:48:31 PDT 2024

Author: Dinar Temirbulatov
Date: 2024-04-19T10:48:27+01:00
New Revision: 6f26867cfa4c1333e69a17f8a2fc8297a4ab6d45

URL: https://github.com/llvm/llvm-project/commit/6f26867cfa4c1333e69a17f8a2fc8297a4ab6d45
DIFF: https://github.com/llvm/llvm-project/commit/6f26867cfa4c1333e69a17f8a2fc8297a4ab6d45.diff

LOG: [AArch64][SVE2] Generate SVE2 BSL instruction in LLVM for add/sub. (#88413)

Allow to fold or/and-and to BSL instuction for scalable vectors.

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/test/CodeGen/AArch64/sve2-bsl.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 7947d73f9a4dd0..3d1453e3beb9a1 100644

--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -17927,11 +17927,11 @@ static SDValue tryCombineToBSL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
       } else
         continue;
 
-      if (!ISD::isBuildVectorAllZeros(Sub.getOperand(0).getNode()))
+      if (!ISD::isConstantSplatVectorAllZeros(Sub.getOperand(0).getNode()))
         continue;
 
       // Constant ones is always righthand operand of the Add.
-      if (!ISD::isBuildVectorAllOnes(Add.getOperand(1).getNode()))
+      if (!ISD::isConstantSplatVectorAllOnes(Add.getOperand(1).getNode()))
         continue;
 
       if (Sub.getOperand(1) != Add.getOperand(0))

diff  --git a/llvm/test/CodeGen/AArch64/sve2-bsl.ll b/llvm/test/CodeGen/AArch64/sve2-bsl.ll
index 11f67634a3fb2c..23b2622f5f5863 100644
--- a/llvm/test/CodeGen/AArch64/sve2-bsl.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-bsl.ll
@@ -13,6 +13,21 @@ define <vscale x 4 x i32> @bsl(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
   ret <vscale x 4 x i32> %c
 }
 
+define <vscale x 4 x i32> @bsl_add_sub(<vscale x 4 x i32> %pre_cond, <vscale x 4 x i32> %left, <vscale x 4 x i32> %right) #0 {
+; CHECK-LABEL: bsl_add_sub:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    subr z0.s, z0.s, #0 // =0x0
+; CHECK-NEXT:    bsl z1.d, z1.d, z2.d, z0.d
+; CHECK-NEXT:    mov z0.d, z1.d
+; CHECK-NEXT:    ret
+  %neg_cond = sub <vscale x 4 x i32> zeroinitializer, %pre_cond
+  %min_cond = add <vscale x 4 x i32> %pre_cond, splat(i32 -1)
+  %left_bits_0 = and <vscale x 4 x i32> %neg_cond, %left
+  %right_bits_0 = and <vscale x 4 x i32> %min_cond, %right
+  %bsl0000 = or <vscale x 4 x i32> %right_bits_0, %left_bits_0
+  ret <vscale x 4 x i32> %bsl0000
+}
+
 ; we are not expecting bsl instruction here. the constants do not match to fold to bsl.
 define <vscale x 4 x i32> @no_bsl_fold(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: no_bsl_fold: