[llvm] 2ab5c47 - [VPlan] Don't replace scalarizing recipe with VPWidenCastRecipe.

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 4 12:40:18 PST 2024


Author: Florian Hahn
Date: 2024-01-04T20:39:44Z
New Revision: 2ab5c47c8752b444885d6bfaf6f570a482fb4cdf

URL: https://github.com/llvm/llvm-project/commit/2ab5c47c8752b444885d6bfaf6f570a482fb4cdf
DIFF: https://github.com/llvm/llvm-project/commit/2ab5c47c8752b444885d6bfaf6f570a482fb4cdf.diff

LOG: [VPlan] Don't replace scalarizing recipe with VPWidenCastRecipe.

Don't replace a scalarizing recipe with a VPWidenCastRecipe. This would
introduce wide (vectorizing) recipes when interleaving only.

Fixes https://github.com/llvm/llvm-project/issues/76986

Added: 
    

Modified: 
    llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
    llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 33132880d5a444..5c430620a2dcdd 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -829,15 +829,20 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
     Type *ATy = TypeInfo.inferScalarType(A);
     if (TruncTy == ATy) {
       Trunc->replaceAllUsesWith(A);
-    } else if (ATy->getScalarSizeInBits() < TruncTy->getScalarSizeInBits()) {
-      auto *VPC =
-          new VPWidenCastRecipe(Instruction::CastOps(ExtOpcode), A, TruncTy);
-      VPC->insertBefore(&R);
-      Trunc->replaceAllUsesWith(VPC);
-    } else if (ATy->getScalarSizeInBits() > TruncTy->getScalarSizeInBits()) {
-      auto *VPC = new VPWidenCastRecipe(Instruction::Trunc, A, TruncTy);
-      VPC->insertBefore(&R);
-      Trunc->replaceAllUsesWith(VPC);
+    } else {
+      // Don't replace a scalarizing recipe with a widened cast.
+      if (isa<VPReplicateRecipe>(&R))
+        break;
+      if (ATy->getScalarSizeInBits() < TruncTy->getScalarSizeInBits()) {
+        auto *VPC =
+            new VPWidenCastRecipe(Instruction::CastOps(ExtOpcode), A, TruncTy);
+        VPC->insertBefore(&R);
+        Trunc->replaceAllUsesWith(VPC);
+      } else if (ATy->getScalarSizeInBits() > TruncTy->getScalarSizeInBits()) {
+        auto *VPC = new VPWidenCastRecipe(Instruction::Trunc, A, TruncTy);
+        VPC->insertBefore(&R);
+        Trunc->replaceAllUsesWith(VPC);
+      }
     }
 #ifndef NDEBUG
     // Verify that the cached type info is for both A and its users is still

diff  --git a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll
index 7b9d7f7986396b..297cd2a7c12f9a 100644
--- a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll
+++ b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll
@@ -310,3 +310,48 @@ loop:
 exit:
   ret void
 }
+
+define void @pr76986_trunc_sext_interleaving_only(i16 %arg, ptr noalias %src, ptr noalias %dst) {
+; CHECK-LABEL: define void @pr76986_trunc_sext_interleaving_only(
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
+; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr %src, i64 [[TMP0]]
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr %src, i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr [[TMP2]], align 1
+; CHECK-NEXT:    [[TMP5:%.*]] = load i8, ptr [[TMP3]], align 1
+; CHECK-NEXT:    [[TMP6:%.*]] = sext i8 [[TMP4]] to i32
+; CHECK-NEXT:    [[TMP7:%.*]] = sext i8 [[TMP5]] to i32
+; CHECK-NEXT:    [[TMP8:%.*]] = trunc i32 [[TMP6]] to i16
+; CHECK-NEXT:    [[TMP9:%.*]] = trunc i32 [[TMP7]] to i16
+; CHECK-NEXT:    [[TMP10:%.*]] = sdiv i16 [[TMP8]], %arg
+; CHECK-NEXT:    [[TMP11:%.*]] = sdiv i16 [[TMP9]], %arg
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i16, ptr %dst, i64 [[TMP0]]
+; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i16, ptr %dst, i64 [[TMP1]]
+; CHECK-NEXT:    store i16 [[TMP10]], ptr [[TMP12]], align 2
+; CHECK-NEXT:    store i16 [[TMP11]], ptr [[TMP13]], align 2
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; CHECK-NEXT:    [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 14934
+; CHECK-NEXT:    br i1 [[TMP14]], label %middle.block, label %vector.body
+;
+bb:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %bb ], [ %iv.next, %loop ]
+  %gep.src = getelementptr inbounds i8, ptr %src, i64 %iv
+  %l = load i8, ptr %gep.src
+  %sext = sext i8 %l to i32
+  %trunc = trunc i32 %sext to i16
+  %sdiv = sdiv i16 %trunc, %arg
+  %gep.dst = getelementptr inbounds i16, ptr %dst, i64 %iv
+  store i16 %sdiv, ptr %gep.dst
+  %iv.next = add i64 %iv, 1
+  %icmp = icmp ult i64 %iv, 14933
+  br i1 %icmp, label %loop, label %exit
+
+exit:
+  ret void
+}
+


        


More information about the llvm-commits mailing list