[llvm] 03647e2 - [AArch64] Handle scalable vectors in combineFMulOrFDivWithIntPow2.

Tue Sep 26 07:34:40 PDT 2023

Author: David Green
Date: 2023-09-26T15:34:34+01:00
New Revision: 03647e2e4b68056851a4410d2f129ebc28162de9

URL: https://github.com/llvm/llvm-project/commit/03647e2e4b68056851a4410d2f129ebc28162de9
DIFF: https://github.com/llvm/llvm-project/commit/03647e2e4b68056851a4410d2f129ebc28162de9.diff

LOG: [AArch64] Handle scalable vectors in combineFMulOrFDivWithIntPow2.

The transform will still not trigger as takeInexpensiveLog2 will bail out for
any scalable vector, but this guards against a scalable typesize error.

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/test/CodeGen/AArch64/fold-int-pow2-with-fmul-or-fdiv.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 7d5dc96bd0e2c94..0d34ebb117667aa 100644

--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -16418,7 +16418,7 @@ SDValue DAGCombiner::combineFMulOrFDivWithIntPow2(SDNode *N) {
   EVT NewIntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits());
   if (VT.isVector())
     NewIntVT = EVT::getVectorVT(*DAG.getContext(), NewIntVT,
-                                VT.getVectorNumElements());
+                                VT.getVectorElementCount());
 
   SDValue Log2 = BuildLogBase2(Pow2Op, DL, DAG.isKnownNeverZero(Pow2Op),
                                /*InexpensiveOnly*/ true, NewIntVT);

diff  --git a/llvm/test/CodeGen/AArch64/fold-int-pow2-with-fmul-or-fdiv.ll b/llvm/test/CodeGen/AArch64/fold-int-pow2-with-fmul-or-fdiv.ll
index 9c74aa59db3aa72..c30c53835cb44fb 100644
--- a/llvm/test/CodeGen/AArch64/fold-int-pow2-with-fmul-or-fdiv.ll
+++ b/llvm/test/CodeGen/AArch64/fold-int-pow2-with-fmul-or-fdiv.ll
@@ -601,3 +601,32 @@ define fastcc i1 @quantum_hadamard(i32 %0) {
   %7 = fcmp olt float 0.000000e+00, %6
   ret i1 %7
 }
+
+define <vscale x 4 x float> @fdiv_pow2_nx4xfloat(<vscale x 4 x i32> %i) "target-features"="+sve" {
+; CHECK-LABEL: fdiv_pow2_nx4xfloat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    mov z1.s, #1 // =0x1
+; CHECK-NEXT:    lslr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    fmov z1.s, #9.00000000
+; CHECK-NEXT:    ucvtf z0.s, p0/m, z0.s
+; CHECK-NEXT:    fdivr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
+  %p2 = shl <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), %i
+  %p2_f = uitofp <vscale x 4 x i32> %p2 to <vscale x 4 x float>
+  %r = fdiv <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 9.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), %p2_f
+  ret <vscale x 4 x float> %r
+}
+
+define <vscale x 2 x double> @scalable2(<vscale x 2 x i64> %0) "target-features"="+sve" {
+; CHECK-LABEL: scalable2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fmov z1.d, #1.00000000
+; CHECK-NEXT:    ucvtf z0.d, p0/m, z0.d
+; CHECK-NEXT:    fdivr z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
+  %2 = uitofp <vscale x 2 x i64> %0 to <vscale x 2 x double>
+  %3 = fdiv <vscale x 2 x double> shufflevector (<vscale x 2 x double> insertelement (<vscale x 2 x double> poison, double 1.000000e+00, i64 0), <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer), %2
+  ret <vscale x 2 x double> %3
+}