[llvm] [LLVM][SelectionDAG] Remove scalable vector restriction from poison analysis. (PR #102504)

Fri Aug 9 05:35:21 PDT 2024

https://github.com/paulwalker-arm updated https://github.com/llvm/llvm-project/pull/102504

>From 9e6f2b40baac75d3c555cd59efd06b33a4fe6edd Mon Sep 17 00:00:00 2001
From: Paul Walker <paul.walker at arm.com>
Date: Thu, 8 Aug 2024 17:24:39 +0100
Subject: [PATCH 1/3] Add test to show effect of missing freeze removal.

---
 llvm/test/CodeGen/AArch64/sve-fcmp.ll | 128 ++++++++++++++++++++++++++
 1 file changed, 128 insertions(+)

diff --git a/llvm/test/CodeGen/AArch64/sve-fcmp.ll b/llvm/test/CodeGen/AArch64/sve-fcmp.ll
index 35cbe65c6a8b86..56898206964cfd 100644
--- a/llvm/test/CodeGen/AArch64/sve-fcmp.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fcmp.ll
@@ -544,3 +544,131 @@ define %svboolx2 @and_of_multiuse_fcmp_olt_zero(<vscale x 4 x i1> %pg, <vscale x
   %ins.2 = insertvalue %svboolx2 %ins.1, <vscale x 4 x i1> %cmp, 1
   ret %svboolx2 %ins.2
 }
+
+define <vscale x 8 x i1> @logical_and_oeq_zero_pred(<vscale x 8 x i1> %pg, <vscale x 8 x half> %x) {
+; CHECK-LABEL: logical_and_oeq_zero_pred:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p1.h
+; CHECK-NEXT:    fcmeq p1.h, p1/z, z0.h, #0.0
+; CHECK-NEXT:    and p0.b, p0/z, p0.b, p1.b
+; CHECK-NEXT:    ret
+  %y = fcmp oeq <vscale x 8 x half> %x, zeroinitializer
+  %z = select <vscale x 8 x i1> %pg, <vscale x 8 x i1> %y, <vscale x 8 x i1> zeroinitializer
+ ret <vscale x 8 x i1> %z
+}
+
+define <vscale x 4 x i1> @logical_and_ogt_zero_pred(<vscale x 4 x i1> %pg, <vscale x 4 x half> %x) {
+; CHECK-LABEL: logical_and_ogt_zero_pred:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p1.s
+; CHECK-NEXT:    fcmgt p1.h, p1/z, z0.h, #0.0
+; CHECK-NEXT:    and p0.b, p0/z, p0.b, p1.b
+; CHECK-NEXT:    ret
+  %y = fcmp ogt <vscale x 4 x half> %x, zeroinitializer
+  %z = select <vscale x 4 x i1> %pg, <vscale x 4 x i1> %y, <vscale x 4 x i1> zeroinitializer
+  ret <vscale x 4 x i1> %z
+}
+
+define <vscale x 2 x i1> @logical_and_oge_zero_pred(<vscale x 2 x i1> %pg, <vscale x 2 x half> %x) {
+; CHECK-LABEL: logical_and_oge_zero_pred:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p1.d
+; CHECK-NEXT:    fcmge p1.h, p1/z, z0.h, #0.0
+; CHECK-NEXT:    and p0.b, p0/z, p0.b, p1.b
+; CHECK-NEXT:    ret
+  %y = fcmp oge <vscale x 2 x half> %x, zeroinitializer
+  %z = select <vscale x 2 x i1> %pg, <vscale x 2 x i1> %y, <vscale x 2 x i1> zeroinitializer
+  ret <vscale x 2 x i1> %z
+}
+
+define <vscale x 4 x i1> @logical_and_olt_zero_pred(<vscale x 4 x i1> %pg, <vscale x 4 x float> %x) {
+; CHECK-LABEL: logical_and_olt_zero_pred:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p1.s
+; CHECK-NEXT:    fcmlt p1.s, p1/z, z0.s, #0.0
+; CHECK-NEXT:    and p0.b, p0/z, p0.b, p1.b
+; CHECK-NEXT:    ret
+  %y = fcmp olt <vscale x 4 x float> %x, zeroinitializer
+  %z = select <vscale x 4 x i1> %pg, <vscale x 4 x i1> %y, <vscale x 4 x i1> zeroinitializer
+  ret <vscale x 4 x i1> %z
+}
+
+define <vscale x 2 x i1> @logical_and_ole_zero_pred(<vscale x 2 x i1> %pg, <vscale x 2 x float> %x) {
+; CHECK-LABEL: logical_and_ole_zero_pred:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p1.d
+; CHECK-NEXT:    fcmle p1.s, p1/z, z0.s, #0.0
+; CHECK-NEXT:    and p0.b, p0/z, p0.b, p1.b
+; CHECK-NEXT:    ret
+  %y = fcmp ole <vscale x 2 x float> %x, zeroinitializer
+  %z = select <vscale x 2 x i1> %pg, <vscale x 2 x i1> %y, <vscale x 2 x i1> zeroinitializer
+  ret <vscale x 2 x i1> %z
+}
+
+define <vscale x 2 x i1> @logical_and_une_zero_pred(<vscale x 2 x i1> %pg, <vscale x 2 x double> %x) {
+; CHECK-LABEL: logical_and_une_zero_pred:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p1.d
+; CHECK-NEXT:    fcmne p1.d, p1/z, z0.d, #0.0
+; CHECK-NEXT:    and p0.b, p0/z, p0.b, p1.b
+; CHECK-NEXT:    ret
+  %y = fcmp une <vscale x 2 x double> %x, zeroinitializer
+  %z = select <vscale x 2 x i1> %pg, <vscale x 2 x i1> %y, <vscale x 2 x i1> zeroinitializer
+  ret <vscale x 2 x i1> %z
+}
+
+define %svboolx2 @logical_and_of_multiuse_fcmp_ogt(<vscale x 4 x i1> %pg, <vscale x 4 x float> %x, <vscale x 4 x float> %y) {
+; CHECK-LABEL: logical_and_of_multiuse_fcmp_ogt:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p1.s
+; CHECK-NEXT:    fcmgt p1.s, p1/z, z0.s, z1.s
+; CHECK-NEXT:    and p0.b, p0/z, p0.b, p1.b
+; CHECK-NEXT:    ret
+  %cmp = fcmp ogt <vscale x 4 x float> %x, %y
+  %and = select <vscale x 4 x i1> %pg, <vscale x 4 x i1> %cmp, <vscale x 4 x i1> zeroinitializer
+  %ins.1 = insertvalue %svboolx2 poison, <vscale x 4 x i1> %and, 0
+  %ins.2 = insertvalue %svboolx2 %ins.1, <vscale x 4 x i1> %cmp, 1
+  ret %svboolx2 %ins.2
+}
+
+define %svboolx2 @logical_and_of_multiuse_fcmp_ogt_zero(<vscale x 4 x i1> %pg, <vscale x 4 x float> %x) {
+; CHECK-LABEL: logical_and_of_multiuse_fcmp_ogt_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p1.s
+; CHECK-NEXT:    fcmgt p1.s, p1/z, z0.s, #0.0
+; CHECK-NEXT:    and p0.b, p0/z, p0.b, p1.b
+; CHECK-NEXT:    ret
+  %cmp = fcmp ogt <vscale x 4 x float> %x, zeroinitializer
+  %and = select <vscale x 4 x i1> %pg, <vscale x 4 x i1> %cmp, <vscale x 4 x i1> zeroinitializer
+  %ins.1 = insertvalue %svboolx2 poison, <vscale x 4 x i1> %and, 0
+  %ins.2 = insertvalue %svboolx2 %ins.1, <vscale x 4 x i1> %cmp, 1
+  ret %svboolx2 %ins.2
+}
+
+define %svboolx2 @logical_and_of_multiuse_fcmp_olt(<vscale x 4 x i1> %pg, <vscale x 4 x float> %x, <vscale x 4 x float> %y) {
+; CHECK-LABEL: logical_and_of_multiuse_fcmp_olt:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p1.s
+; CHECK-NEXT:    fcmgt p1.s, p1/z, z1.s, z0.s
+; CHECK-NEXT:    and p0.b, p0/z, p0.b, p1.b
+; CHECK-NEXT:    ret
+  %cmp = fcmp olt <vscale x 4 x float> %x, %y
+  %and = select <vscale x 4 x i1> %pg, <vscale x 4 x i1> %cmp, <vscale x 4 x i1> zeroinitializer
+  %ins.1 = insertvalue %svboolx2 poison, <vscale x 4 x i1> %and, 0
+  %ins.2 = insertvalue %svboolx2 %ins.1, <vscale x 4 x i1> %cmp, 1
+  ret %svboolx2 %ins.2
+}
+
+define %svboolx2 @logical_and_of_multiuse_fcmp_olt_zero(<vscale x 4 x i1> %pg, <vscale x 4 x float> %x) {
+; CHECK-LABEL: logical_and_of_multiuse_fcmp_olt_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p1.s
+; CHECK-NEXT:    fcmlt p1.s, p1/z, z0.s, #0.0
+; CHECK-NEXT:    and p0.b, p0/z, p0.b, p1.b
+; CHECK-NEXT:    ret
+  %cmp = fcmp olt <vscale x 4 x float> %x, zeroinitializer
+  %and = select <vscale x 4 x i1> %pg, <vscale x 4 x i1> %cmp, <vscale x 4 x i1> zeroinitializer
+  %ins.1 = insertvalue %svboolx2 poison, <vscale x 4 x i1> %and, 0
+  %ins.2 = insertvalue %svboolx2 %ins.1, <vscale x 4 x i1> %cmp, 1
+  ret %svboolx2 %ins.2
+}

>From d4e9e6b1d3a6e83d43c9c1fc484f761059e3b07a Mon Sep 17 00:00:00 2001
From: Paul Walker <paul.walker at arm.com>
Date: Thu, 8 Aug 2024 16:59:20 +0100
Subject: [PATCH 2/3] [LLVM][SelectionDAG] Remove scalable vector restriction
 from poison analysis.

The following functions have an early exit for scalable vectors:
  SelectionDAG::canCreateUndefOrPoison
  SelectionDAG:isGuaranteedNotToBeUndefOrPoison

The implementations of these don't look to be sensitive to the
vector types other than some uses of demanded elts analysis that
doesn't fully support scalable types.  That said the initial
calculation demans all elements and so I've followed the same
scheme as used by TargetLowering::SimplifyDemandedBits.
---
 .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 35 +++++++++----------
 ...rleaving-reductions-predicated-scalable.ll |  4 +--
 .../AArch64/intrinsic-cttz-elts-sve.ll        |  3 +-
 llvm/test/CodeGen/AArch64/sve-fcmp.ll         | 24 ++++---------
 .../CodeGen/AArch64/sve-fp-int-min-max.ll     |  3 +-
 llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll   | 26 +++++++++-----
 6 files changed, 43 insertions(+), 52 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index c3a7df5361cd45..3900b95f7ea1d0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5140,14 +5140,11 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly,
   if (Op.getOpcode() == ISD::FREEZE)
     return true;
 
-  // TODO: Assume we don't know anything for now.
   EVT VT = Op.getValueType();
-  if (VT.isScalableVector())
-    return false;
-
-  APInt DemandedElts = VT.isVector()
-                           ? APInt::getAllOnes(VT.getVectorNumElements())
-                           : APInt(1, 1);
+  APInt DemandedElts =
+      VT.isVector() ? APInt::getAllOnes(
+                          VT.isScalableVector() ? 1 : VT.getVectorNumElements())
+                    : APInt(1, 1);
   return isGuaranteedNotToBeUndefOrPoison(Op, DemandedElts, PoisonOnly, Depth);
 }
 
@@ -5190,6 +5187,10 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op,
     }
     return true;
 
+  case ISD::SPLAT_VECTOR:
+    return isGuaranteedNotToBeUndefOrPoison(Op.getOperand(0), PoisonOnly,
+                                            Depth + 1);
+
   case ISD::VECTOR_SHUFFLE: {
     APInt DemandedLHS, DemandedRHS;
     auto *SVN = cast<ShuffleVectorSDNode>(Op);
@@ -5236,14 +5237,14 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op,
 bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, bool PoisonOnly,
                                           bool ConsiderFlags,
                                           unsigned Depth) const {
-  // TODO: Assume we don't know anything for now.
+  // Since the number of lanes in a scalable vector is unknown at compile time,
+  // we track one bit which is implicitly broadcast to all lanes.  This means
+  // that all lanes in a scalable vector are considered demanded.
   EVT VT = Op.getValueType();
-  if (VT.isScalableVector())
-    return true;
-
-  APInt DemandedElts = VT.isVector()
-                           ? APInt::getAllOnes(VT.getVectorNumElements())
-                           : APInt(1, 1);
+  APInt DemandedElts =
+      VT.isVector() ? APInt::getAllOnes(
+                          VT.isScalableVector() ? 1 : VT.getVectorNumElements())
+                    : APInt(1, 1);
   return canCreateUndefOrPoison(Op, DemandedElts, PoisonOnly, ConsiderFlags,
                                 Depth);
 }
@@ -5251,11 +5252,6 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, bool PoisonOnly,
 bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
                                           bool PoisonOnly, bool ConsiderFlags,
                                           unsigned Depth) const {
-  // TODO: Assume we don't know anything for now.
-  EVT VT = Op.getValueType();
-  if (VT.isScalableVector())
-    return true;
-
   if (ConsiderFlags && Op->hasPoisonGeneratingFlags())
     return true;
 
@@ -5292,6 +5288,7 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
   case ISD::BITCAST:
   case ISD::BUILD_VECTOR:
   case ISD::BUILD_PAIR:
+  case ISD::SPLAT_VECTOR:
     return false;
 
   case ISD::SELECT_CC:
diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll
index 1c8a8d635274ec..dcc11609ca2316 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll
@@ -229,8 +229,8 @@ define %"class.std::complex" @complex_mul_predicated_x2_v2f64(ptr %a, ptr %b, pt
 ; CHECK-NEXT:    mov z6.d, z1.d
 ; CHECK-NEXT:    mov z7.d, z0.d
 ; CHECK-NEXT:    add x2, x2, x11
-; CHECK-NEXT:    cmpne p2.d, p0/z, z2.d, #0
-; CHECK-NEXT:    and p1.b, p1/z, p1.b, p2.b
+; CHECK-NEXT:    and z2.d, z2.d, #0xffffffff
+; CHECK-NEXT:    cmpne p1.d, p1/z, z2.d, #0
 ; CHECK-NEXT:    zip2 p2.d, p1.d, p1.d
 ; CHECK-NEXT:    zip1 p1.d, p1.d, p1.d
 ; CHECK-NEXT:    ld1d { z2.d }, p2/z, [x0, #1, mul vl]
diff --git a/llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll b/llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll
index 66d670d0b796bf..cdf2a962f93226 100644
--- a/llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll
+++ b/llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll
@@ -319,9 +319,8 @@ define i32 @ctz_nxv16i1_poison(<vscale x 16 x i1> %a) {
 define i32 @ctz_and_nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: ctz_and_nxv16i1:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmpne p0.b, p0/z, z0.b, z1.b
 ; CHECK-NEXT:    ptrue p1.b
-; CHECK-NEXT:    cmpne p2.b, p1/z, z0.b, z1.b
-; CHECK-NEXT:    and p0.b, p0/z, p0.b, p2.b
 ; CHECK-NEXT:    brkb p0.b, p1/z, p0.b
 ; CHECK-NEXT:    cntp x0, p0, p0.b
 ; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
diff --git a/llvm/test/CodeGen/AArch64/sve-fcmp.ll b/llvm/test/CodeGen/AArch64/sve-fcmp.ll
index 56898206964cfd..fc5e640aed4aea 100644
--- a/llvm/test/CodeGen/AArch64/sve-fcmp.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fcmp.ll
@@ -548,9 +548,7 @@ define %svboolx2 @and_of_multiuse_fcmp_olt_zero(<vscale x 4 x i1> %pg, <vscale x
 define <vscale x 8 x i1> @logical_and_oeq_zero_pred(<vscale x 8 x i1> %pg, <vscale x 8 x half> %x) {
 ; CHECK-LABEL: logical_and_oeq_zero_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p1.h
-; CHECK-NEXT:    fcmeq p1.h, p1/z, z0.h, #0.0
-; CHECK-NEXT:    and p0.b, p0/z, p0.b, p1.b
+; CHECK-NEXT:    fcmeq p0.h, p0/z, z0.h, #0.0
 ; CHECK-NEXT:    ret
   %y = fcmp oeq <vscale x 8 x half> %x, zeroinitializer
   %z = select <vscale x 8 x i1> %pg, <vscale x 8 x i1> %y, <vscale x 8 x i1> zeroinitializer
@@ -560,9 +558,7 @@ define <vscale x 8 x i1> @logical_and_oeq_zero_pred(<vscale x 8 x i1> %pg, <vsca
 define <vscale x 4 x i1> @logical_and_ogt_zero_pred(<vscale x 4 x i1> %pg, <vscale x 4 x half> %x) {
 ; CHECK-LABEL: logical_and_ogt_zero_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p1.s
-; CHECK-NEXT:    fcmgt p1.h, p1/z, z0.h, #0.0
-; CHECK-NEXT:    and p0.b, p0/z, p0.b, p1.b
+; CHECK-NEXT:    fcmgt p0.h, p0/z, z0.h, #0.0
 ; CHECK-NEXT:    ret
   %y = fcmp ogt <vscale x 4 x half> %x, zeroinitializer
   %z = select <vscale x 4 x i1> %pg, <vscale x 4 x i1> %y, <vscale x 4 x i1> zeroinitializer
@@ -572,9 +568,7 @@ define <vscale x 4 x i1> @logical_and_ogt_zero_pred(<vscale x 4 x i1> %pg, <vsca
 define <vscale x 2 x i1> @logical_and_oge_zero_pred(<vscale x 2 x i1> %pg, <vscale x 2 x half> %x) {
 ; CHECK-LABEL: logical_and_oge_zero_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p1.d
-; CHECK-NEXT:    fcmge p1.h, p1/z, z0.h, #0.0
-; CHECK-NEXT:    and p0.b, p0/z, p0.b, p1.b
+; CHECK-NEXT:    fcmge p0.h, p0/z, z0.h, #0.0
 ; CHECK-NEXT:    ret
   %y = fcmp oge <vscale x 2 x half> %x, zeroinitializer
   %z = select <vscale x 2 x i1> %pg, <vscale x 2 x i1> %y, <vscale x 2 x i1> zeroinitializer
@@ -584,9 +578,7 @@ define <vscale x 2 x i1> @logical_and_oge_zero_pred(<vscale x 2 x i1> %pg, <vsca
 define <vscale x 4 x i1> @logical_and_olt_zero_pred(<vscale x 4 x i1> %pg, <vscale x 4 x float> %x) {
 ; CHECK-LABEL: logical_and_olt_zero_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p1.s
-; CHECK-NEXT:    fcmlt p1.s, p1/z, z0.s, #0.0
-; CHECK-NEXT:    and p0.b, p0/z, p0.b, p1.b
+; CHECK-NEXT:    fcmlt p0.s, p0/z, z0.s, #0.0
 ; CHECK-NEXT:    ret
   %y = fcmp olt <vscale x 4 x float> %x, zeroinitializer
   %z = select <vscale x 4 x i1> %pg, <vscale x 4 x i1> %y, <vscale x 4 x i1> zeroinitializer
@@ -596,9 +588,7 @@ define <vscale x 4 x i1> @logical_and_olt_zero_pred(<vscale x 4 x i1> %pg, <vsca
 define <vscale x 2 x i1> @logical_and_ole_zero_pred(<vscale x 2 x i1> %pg, <vscale x 2 x float> %x) {
 ; CHECK-LABEL: logical_and_ole_zero_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p1.d
-; CHECK-NEXT:    fcmle p1.s, p1/z, z0.s, #0.0
-; CHECK-NEXT:    and p0.b, p0/z, p0.b, p1.b
+; CHECK-NEXT:    fcmle p0.s, p0/z, z0.s, #0.0
 ; CHECK-NEXT:    ret
   %y = fcmp ole <vscale x 2 x float> %x, zeroinitializer
   %z = select <vscale x 2 x i1> %pg, <vscale x 2 x i1> %y, <vscale x 2 x i1> zeroinitializer
@@ -608,9 +598,7 @@ define <vscale x 2 x i1> @logical_and_ole_zero_pred(<vscale x 2 x i1> %pg, <vsca
 define <vscale x 2 x i1> @logical_and_une_zero_pred(<vscale x 2 x i1> %pg, <vscale x 2 x double> %x) {
 ; CHECK-LABEL: logical_and_une_zero_pred:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p1.d
-; CHECK-NEXT:    fcmne p1.d, p1/z, z0.d, #0.0
-; CHECK-NEXT:    and p0.b, p0/z, p0.b, p1.b
+; CHECK-NEXT:    fcmne p0.d, p0/z, z0.d, #0.0
 ; CHECK-NEXT:    ret
   %y = fcmp une <vscale x 2 x double> %x, zeroinitializer
   %z = select <vscale x 2 x i1> %pg, <vscale x 2 x i1> %y, <vscale x 2 x i1> zeroinitializer
diff --git a/llvm/test/CodeGen/AArch64/sve-fp-int-min-max.ll b/llvm/test/CodeGen/AArch64/sve-fp-int-min-max.ll
index 0d7f2300626508..afe13851f0b953 100644
--- a/llvm/test/CodeGen/AArch64/sve-fp-int-min-max.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fp-int-min-max.ll
@@ -24,8 +24,7 @@ define i64 @scalable_int_min_max(ptr %arg, ptr %arg1, <vscale x 2 x ptr> %i37, <
 ; CHECK-NEXT:    fadd z0.s, p0/m, z0.s, z4.s
 ; CHECK-NEXT:    fcmge p2.s, p0/z, z0.s, z3.s
 ; CHECK-NEXT:    add z0.d, z2.d, z1.d
-; CHECK-NEXT:    not p2.b, p0/z, p2.b
-; CHECK-NEXT:    and p2.b, p1/z, p1.b, p2.b
+; CHECK-NEXT:    bic p2.b, p1/z, p1.b, p2.b
 ; CHECK-NEXT:    mov z0.d, p2/m, z2.d
 ; CHECK-NEXT:    sel z0.d, p1, z0.d, z2.d
 ; CHECK-NEXT:    uaddv d0, p0, z0.d
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll
index 06b31657e0eca0..a4d58985b75de5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll
@@ -1501,18 +1501,23 @@ define <vscale x 8 x i32> @vwadd_vx_splat_zext_i1(<vscale x 8 x i1> %va, i16 %b)
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    slli a0, a0, 16
 ; RV32-NEXT:    srli a0, a0, 16
-; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
+; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vmv.v.x v8, a0
-; RV32-NEXT:    vadd.vi v8, v8, 1, v0.t
+; RV32-NEXT:    addi a0, a0, 1
+; RV32-NEXT:    vmerge.vxm v8, v8, a0, v0
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vwadd_vx_splat_zext_i1:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    slli a0, a0, 48
 ; RV64-NEXT:    srli a0, a0, 48
-; RV64-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
+; RV64-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
+; RV64-NEXT:    vmv.v.x v12, a0
+; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; RV64-NEXT:    vmv.v.x v8, a0
-; RV64-NEXT:    vadd.vi v8, v8, 1, v0.t
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
+; RV64-NEXT:    vwaddu.vx v8, v12, a0, v0.t
 ; RV64-NEXT:    ret
   %zb = zext i16 %b to i32
   %head = insertelement <vscale x 8 x i32> poison, i32 %zb, i32 0
@@ -1570,20 +1575,23 @@ define <vscale x 8 x i32> @vwadd_vx_splat_sext_i1(<vscale x 8 x i1> %va, i16 %b)
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    slli a0, a0, 16
 ; RV32-NEXT:    srai a0, a0, 16
-; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
+; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vmv.v.x v8, a0
-; RV32-NEXT:    li a0, 1
-; RV32-NEXT:    vsub.vx v8, v8, a0, v0.t
+; RV32-NEXT:    addi a0, a0, -1
+; RV32-NEXT:    vmerge.vxm v8, v8, a0, v0
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vwadd_vx_splat_sext_i1:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    slli a0, a0, 48
 ; RV64-NEXT:    srai a0, a0, 48
-; RV64-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
+; RV64-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
+; RV64-NEXT:    vmv.v.x v12, a0
+; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; RV64-NEXT:    vmv.v.x v8, a0
 ; RV64-NEXT:    li a0, 1
-; RV64-NEXT:    vsub.vx v8, v8, a0, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
+; RV64-NEXT:    vwsub.vx v8, v12, a0, v0.t
 ; RV64-NEXT:    ret
   %sb = sext i16 %b to i32
   %head = insertelement <vscale x 8 x i32> poison, i32 %sb, i32 0

>From 2e819a1c1d8467d1862d25bd5269d6ea9ffb1e77 Mon Sep 17 00:00:00 2001
From: Paul Walker <paul.walker at arm.com>
Date: Fri, 9 Aug 2024 11:10:10 +0100
Subject: [PATCH 3/3] Use nicer syntax to compute DemandedElt.

---
 llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 3900b95f7ea1d0..6ed77fc8d8f179 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5141,10 +5141,9 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly,
     return true;
 
   EVT VT = Op.getValueType();
-  APInt DemandedElts =
-      VT.isVector() ? APInt::getAllOnes(
-                          VT.isScalableVector() ? 1 : VT.getVectorNumElements())
-                    : APInt(1, 1);
+  APInt DemandedElts = VT.isFixedLengthVector()
+                           ? APInt::getAllOnes(VT.getVectorNumElements())
+                           : APInt(1, 1);
   return isGuaranteedNotToBeUndefOrPoison(Op, DemandedElts, PoisonOnly, Depth);
 }
 
@@ -5237,14 +5236,10 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op,
 bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, bool PoisonOnly,
                                           bool ConsiderFlags,
                                           unsigned Depth) const {
-  // Since the number of lanes in a scalable vector is unknown at compile time,
-  // we track one bit which is implicitly broadcast to all lanes.  This means
-  // that all lanes in a scalable vector are considered demanded.
   EVT VT = Op.getValueType();
-  APInt DemandedElts =
-      VT.isVector() ? APInt::getAllOnes(
-                          VT.isScalableVector() ? 1 : VT.getVectorNumElements())
-                    : APInt(1, 1);
+  APInt DemandedElts = VT.isFixedLengthVector()
+                           ? APInt::getAllOnes(VT.getVectorNumElements())
+                           : APInt(1, 1);
   return canCreateUndefOrPoison(Op, DemandedElts, PoisonOnly, ConsiderFlags,
                                 Depth);
 }