[llvm] 9a82bda - [RISCV] Fix assertion of getShuffleCost

via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 23 05:12:45 PDT 2023


Author: wangpc
Date: 2023-08-23T20:10:50+08:00
New Revision: 9a82bda9dedff03348c906979da9b95c0934f902

URL: https://github.com/llvm/llvm-project/commit/9a82bda9dedff03348c906979da9b95c0934f902
DIFF: https://github.com/llvm/llvm-project/commit/9a82bda9dedff03348c906979da9b95c0934f902.diff

LOG: [RISCV] Fix assertion of getShuffleCost

This assertion is introduced by D157425.

We should calculate the cost iff `Mask` is not empty.

Fixes 64901

Reviewed By: ABataev

Differential Revision: https://reviews.llvm.org/D158590

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
    llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 0b116c1bea514d..ecfcf64c7991d1 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -361,7 +361,7 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
       // We are going to permute multiple sources and the result will be in
       // multiple destinations. Providing an accurate cost only for splits where
       // the element type remains the same.
-      if (LT.first.isValid() && LT.first != 1 &&
+      if (!Mask.empty() && LT.first.isValid() && LT.first != 1 &&
           LT.second.isFixedLengthVector() &&
           LT.second.getVectorElementType().getSizeInBits() ==
               Tp->getElementType()->getPrimitiveSizeInBits() &&

diff  --git a/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll
index 5eb95ccc5eeb36..d6921f6a6eca95 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll
@@ -210,8 +210,8 @@ entry:
   ret i8 %and13.7
 }
 
-define i8 @reduce_or(ptr %a, ptr %b) {
-; CHECK-LABEL: @reduce_or(
+define i8 @reduce_or_1(ptr %a, ptr %b) {
+; CHECK-LABEL: @reduce_or_1(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_BUF:%.*]], ptr [[A:%.*]], i64 0, i32 0, i64 0
 ; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_BUF]], ptr [[B:%.*]], i64 0, i32 0, i64 0
@@ -273,6 +273,95 @@ entry:
   ret i8 %or13.7
 }
 
+define void @reduce_or_2() {
+; CHECK-LABEL: @reduce_or_2(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i64 0, 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <16 x i64> <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 poison>, i64 [[TMP1]], i32 15
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ult <16 x i64> [[TMP2]], zeroinitializer
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <16 x i64> <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 poison, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>, i64 [[TMP1]], i32 6
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp ult <16 x i64> [[TMP4]], zeroinitializer
+; CHECK-NEXT:    [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP3]])
+; CHECK-NEXT:    [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP5]])
+; CHECK-NEXT:    [[OP_RDX:%.*]] = or i1 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    br i1 [[OP_RDX]], label [[TMP9:%.*]], label [[TMP8:%.*]]
+; CHECK:       8:
+; CHECK-NEXT:    ret void
+; CHECK:       9:
+; CHECK-NEXT:    ret void
+;
+  %1 = shl i64 0, 0
+  %2 = icmp ult i64 0, 0
+  %3 = icmp ult i64 0, 0
+  %4 = or i1 %2, %3
+  %5 = icmp ult i64 0, 0
+  %6 = or i1 %4, %5
+  %7 = icmp ult i64 0, 0
+  %8 = or i1 %6, %7
+  %9 = icmp ult i64 0, 0
+  %10 = or i1 %8, %9
+  %11 = icmp ult i64 0, 0
+  %12 = or i1 %10, %11
+  %13 = icmp ult i64 0, 0
+  %14 = or i1 %12, %13
+  %15 = icmp ult i64 0, 0
+  %16 = or i1 %14, %15
+  %17 = icmp ult i64 0, 0
+  %18 = or i1 %16, %17
+  %19 = icmp ult i64 0, 0
+  %20 = or i1 %18, %19
+  %21 = icmp ult i64 0, 0
+  %22 = or i1 %20, %21
+  %23 = icmp ult i64 0, 0
+  %24 = or i1 %22, %23
+  %25 = icmp ult i64 0, 0
+  %26 = or i1 %24, %25
+  %27 = icmp ult i64 0, 0
+  %28 = or i1 %26, %27
+  %29 = icmp ult i64 0, 0
+  %30 = or i1 %28, %29
+  %31 = icmp ult i64 %1, 0
+  %32 = or i1 %30, %31
+  %33 = icmp ult i64 0, 0
+  %34 = or i1 %32, %33
+  %35 = icmp ult i64 0, 0
+  %36 = or i1 %34, %35
+  %37 = icmp ult i64 0, 0
+  %38 = or i1 %36, %37
+  %39 = icmp ult i64 0, 0
+  %40 = or i1 %38, %39
+  %41 = icmp ult i64 0, 0
+  %42 = or i1 %40, %41
+  %43 = icmp ult i64 0, 0
+  %44 = or i1 %42, %43
+  %45 = icmp ult i64 %1, 0
+  %46 = or i1 %44, %45
+  %47 = icmp ult i64 0, 0
+  %48 = or i1 %46, %47
+  %49 = icmp ult i64 0, 0
+  %50 = or i1 %48, %49
+  %51 = icmp ult i64 0, 0
+  %52 = or i1 %50, %51
+  %53 = icmp ult i64 0, 0
+  %54 = or i1 %52, %53
+  %55 = icmp ult i64 0, 0
+  %56 = or i1 %54, %55
+  %57 = icmp ult i64 0, 0
+  %58 = or i1 %56, %57
+  %59 = icmp ult i64 0, 0
+  %60 = or i1 %58, %59
+  %61 = icmp ult i64 0, 0
+  %62 = or i1 %60, %61
+  %63 = icmp ult i64 0, 0
+  %64 = or i1 %62, %63
+  br i1 %64, label %66, label %65
+
+65:                                               ; preds = %0
+  ret void
+
+66:                                               ; preds = %0
+  ret void
+}
+
 define i8 @reduce_xor(ptr %a, ptr %b) {
 ; CHECK-LABEL: @reduce_xor(
 ; CHECK-NEXT:  entry:


        


More information about the llvm-commits mailing list