[llvm] ee4d6c8 - [VectorCombine] Enable scalarizeBinopOrCmp for scalable vectors

Wed Nov 23 05:17:54 PST 2022

Author: Matt Devereau
Date: 2022-11-23T13:17:21Z
New Revision: ee4d6c8bf05d97238453d128770d1febb9a62216

URL: https://github.com/llvm/llvm-project/commit/ee4d6c8bf05d97238453d128770d1febb9a62216
DIFF: https://github.com/llvm/llvm-project/commit/ee4d6c8bf05d97238453d128770d1febb9a62216.diff

LOG: [VectorCombine] Enable scalarizeBinopOrCmp for scalable vectors

This reverts a change to exclude scalarizeBinopOrCmp in VectorCombine for
scalable vectors which caused poor scalable Binop codegen.

Differential Revision: https://reviews.llvm.org/D138545

Added: 
    llvm/test/Transforms/VectorCombine/AArch64/scalarize-scalable.ll

Modified: 
    llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 4d0ebd4d0599c..9dec820c94bd1 100644

--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -1717,10 +1717,15 @@ bool VectorCombine::run() {
         MadeChange |= scalarizeLoadExtract(I);
         break;
       default:
-        MadeChange |= scalarizeBinopOrCmp(I);
         break;
       }
     }
+
+    // This transform works with scalable and fixed vectors
+    // TODO: Identify and allow other scalable transforms
+    if (isa<VectorType>(I.getType()))
+      MadeChange |= scalarizeBinopOrCmp(I);
+
     if (Opcode == Instruction::Store)
       MadeChange |= foldSingleElementStore(I);
 

diff  --git a/llvm/test/Transforms/VectorCombine/AArch64/scalarize-scalable.ll b/llvm/test/Transforms/VectorCombine/AArch64/scalarize-scalable.ll
new file mode 100644
index 0000000000000..ac7bc91fa3f25
--- /dev/null
+++ b/llvm/test/Transforms/VectorCombine/AArch64/scalarize-scalable.ll
@@ -0,0 +1,48 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes=vector-combine -S %s | FileCheck %s
+
+define <vscale x 4 x float> @scalarize_scalable(float %0, float %1, float %2, float %3) {
+; CHECK-LABEL: @scalarize_scalable(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[DOTSCALAR:%.*]] = fadd fast float [[TMP1:%.*]], [[TMP0:%.*]]
+; CHECK-NEXT:    [[DOTSCALAR1:%.*]] = fadd fast float [[TMP2:%.*]], [[DOTSCALAR]]
+; CHECK-NEXT:    [[DOTSCALAR2:%.*]] = fadd fast float [[TMP3:%.*]], [[DOTSCALAR1]]
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <vscale x 4 x float> poison, float [[DOTSCALAR2]], i64 0
+; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <vscale x 4 x float> [[TMP4]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP5]]
+;
+entry:
+  %broadcast.splatinsert = insertelement <vscale x 4 x float> poison, float %0, i64 0
+  %broadcast.splatinsert28 = insertelement <vscale x 4 x float> poison, float %1, i64 0
+  %broadcast.splatinsert30 = insertelement <vscale x 4 x float> poison, float %2, i64 0
+  %broadcast.splatinsert32 = insertelement <vscale x 4 x float> poison, float %3, i64 0
+  %4 = fadd fast <vscale x 4 x float> %broadcast.splatinsert28, %broadcast.splatinsert
+  %5 = fadd fast <vscale x 4 x float> %broadcast.splatinsert30, %4
+  %6 = fadd fast <vscale x 4 x float> %broadcast.splatinsert32, %5
+  %7 = shufflevector <vscale x 4 x float> %6, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
+  ret <vscale x 4 x float> %7
+}
+
+define <vscale x 4 x i32> @scalarize_scalable_udiv(i32 %x, i32 %y) {
+; CHECK-LABEL: @scalarize_scalable_udiv(
+; CHECK-NEXT:    [[R_SCALAR:%.*]] = udiv i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[R_SCALAR]], i64 0
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %splatx = insertelement <vscale x 4 x i32> poison, i32 %x, i64 0
+  %splaty = insertelement <vscale x 4 x i32> poison, i32 %y, i64 0
+  %r = udiv <vscale x 4 x i32> %splatx, %splaty
+  ret <vscale x 4 x i32> %r
+}
+
+define <vscale x 4 x i1> @scalarize_scalable_icmp(i32 %x, i32 %y) {
+; CHECK-LABEL: @scalarize_scalable_icmp(
+; CHECK-NEXT:    [[R_SCALAR:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = insertelement <vscale x 4 x i1> poison, i1 [[R_SCALAR]], i64 0
+; CHECK-NEXT:    ret <vscale x 4 x i1> [[R]]
+;
+  %splatx = insertelement <vscale x 4 x i32> poison, i32 %x, i64 0
+  %splaty = insertelement <vscale x 4 x i32> poison, i32 %y, i64 0
+  %r = icmp sgt <vscale x 4 x i32> %splatx, %splaty
+  ret <vscale x 4 x i1> %r
+}