[llvm] 954ed05 - [VPlan] Simplify MUL operands at recipe construction.

Mon Sep 2 13:26:03 PDT 2024

Author: Florian Hahn
Date: 2024-09-02T21:25:31+01:00
New Revision: 954ed05c1001489802ae43d4bea403181570a799

URL: https://github.com/llvm/llvm-project/commit/954ed05c1001489802ae43d4bea403181570a799
DIFF: https://github.com/llvm/llvm-project/commit/954ed05c1001489802ae43d4bea403181570a799.diff

LOG: [VPlan] Simplify MUL operands at recipe construction.

This moves the logic to create simplified operands using SCEV to MUL
recipe creation. This is needed to match the behavior of the legacy's cost
model. TODOs are to extend to other opcodes and move to a transform.

Note that this also restricts the number of SCEV simplifications we
apply to more precisely match the cases handled by the legacy cost
model.

Fixes https://github.com/llvm/llvm-project/issues/107015.

Added: 
    llvm/test/Transforms/LoopVectorize/AArch64/mul-simplification.ll

Modified: 
    llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
    llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index a8722db654f5c9..a64848c1fdcebd 100644

--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8393,6 +8393,20 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I,
   case Instruction::Sub:
   case Instruction::Xor:
   case Instruction::Freeze:
+    if (I->getOpcode() == Instruction::Mul) {
+      // Simplify operands of multiplications using SCEV. This is needed at the
+      // moment to match the behavior of the legacy cost-model.
+      // TODO: Generalize to any opcode and move to VPlan transformation.
+      SmallVector<VPValue *> NewOps(Operands);
+      ScalarEvolution &SE = *PSE.getSE();
+      for (unsigned I = 0; I < Operands.size(); ++I) {
+        Value *V = NewOps[I]->getUnderlyingValue();
+        if (!isa<Constant>(V) && SE.isSCEVable(V->getType()))
+          if (auto *C = dyn_cast<SCEVConstant>(PSE.getSE()->getSCEV(V)))
+            NewOps[I] = Plan.getOrAddLiveIn(C->getValue());
+      }
+      return new VPWidenRecipe(*I, make_range(NewOps.begin(), NewOps.end()));
+    }
     return new VPWidenRecipe(*I, make_range(Operands.begin(), Operands.end()));
   };
 }

diff  --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
index 0fbdeeddaf4a17..25b8bf3e089e54 100644
--- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
+++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
@@ -174,11 +174,6 @@ class VPRecipeBuilder {
       if (auto *R = Ingredient2Recipe.lookup(I))
         return R->getVPSingleValue();
     }
-    ScalarEvolution &SE = *PSE.getSE();
-    if (!isa<Constant>(V) && SE.isSCEVable(V->getType()))
-      if (auto *C = dyn_cast<SCEVConstant>(PSE.getSE()->getSCEV(V)))
-        return Plan.getOrAddLiveIn(C->getValue());
-
     return Plan.getOrAddLiveIn(V);
   }
 };

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/mul-simplification.ll b/llvm/test/Transforms/LoopVectorize/AArch64/mul-simplification.ll
new file mode 100644
index 00000000000000..771dd00034c707
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/mul-simplification.ll
@@ -0,0 +1,64 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -p loop-vectorize -S %s | FileCheck %s
+
+target triple = "arm64-apple-macosx"
+
+; Test case for https://github.com/llvm/llvm-project/issues/107015.
+define i64 @mul_select_operand_known_1_via_scev() {
+; CHECK-LABEL: define i64 @mul_select_operand_known_1_via_scev() {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <2 x i64> [ <i64 12, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_PHI]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
+; CHECK-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> [[VEC_PHI]])
+; CHECK-NEXT:    br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK:       [[SCALAR_PH]]:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ 2, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP0]], %[[MIDDLE_BLOCK]] ], [ 12, %[[ENTRY]] ]
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[RED:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[IV]], 1
+; CHECK-NEXT:    [[CMP1_I:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    [[NARROW_I:%.*]] = select i1 [[CMP1_I]], i32 1, i32 [[IV]]
+; CHECK-NEXT:    [[MUL:%.*]] = zext nneg i32 [[NARROW_I]] to i64
+; CHECK-NEXT:    [[RED_NEXT]] = mul nsw i64 [[RED]], [[MUL]]
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
+; CHECK-NEXT:    [[EC:%.*]] = icmp eq i32 [[IV]], 1
+; CHECK-NEXT:    br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[RES:%.*]] = phi i64 [ [[RED_NEXT]], %[[LOOP]] ], [ [[TMP0]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    ret i64 [[RES]]
+;
+entry:
+  br label %loop
+
+loop:
+  %red = phi i64 [ 12, %entry ], [ %red.next, %loop ]
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+  %0 = and i32 %iv, 1
+  %cmp1.i = icmp eq i32 %0, 0
+  %narrow.i = select i1 %cmp1.i, i32 1, i32 %iv
+  %mul = zext nneg i32 %narrow.i to i64
+  %red.next = mul nsw i64 %red, %mul
+  %iv.next = add nuw nsw i32 %iv, 1
+  %ec = icmp eq i32 %iv, 1
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  %res = phi i64 [ %red.next, %loop ]
+  ret i64 %res
+}
+;.
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
+;.