[llvm] 40b408c - [RISCV] Enable SLP in RISC-V SLP reduction tests

Fri Mar 24 10:31:23 PDT 2023

Author: Luke Lau
Date: 2023-03-24T17:30:32Z
New Revision: 40b408cb0548bb679b491d2e074942561c629531

URL: https://github.com/llvm/llvm-project/commit/40b408cb0548bb679b491d2e074942561c629531
DIFF: https://github.com/llvm/llvm-project/commit/40b408cb0548bb679b491d2e074942561c629531.diff

LOG: [RISCV] Enable SLP in RISC-V SLP reduction tests

Horizontal reduction can still kick in even when the max VF is set to 0,
but strange stuff can happen as it affects the cost model.
Enable it for these tests as eventually the goal will be to have SLP
enabled.

Added: 
    

Modified: 
    llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll
index 10f9c04892972..1fe6cf04a341a 100644

--- a/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll
@@ -1,10 +1,13 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -passes=slp-vectorizer -mtriple=riscv64 -mattr=+v \
-; RUN: -riscv-v-vector-bits-min=128 -S | FileCheck %s --check-prefixes=CHECK
+; RUN: -riscv-v-vector-bits-min=128 -riscv-v-slp-max-vf=0 -S \
+; RUN: | FileCheck %s --check-prefixes=CHECK
 ; RUN: opt < %s -passes=slp-vectorizer -mtriple=riscv64 -mattr=+v \
-; RUN: -riscv-v-vector-bits-min=256 -S | FileCheck %s --check-prefixes=CHECK
+; RUN: -riscv-v-vector-bits-min=256 -riscv-v-slp-max-vf=0 -S \
+; RUN: | FileCheck %s --check-prefixes=CHECK
 ; RUN: opt < %s -passes=slp-vectorizer -mtriple=riscv64 -mattr=+v \
-; RUN: -riscv-v-vector-bits-min=512 -S | FileCheck %s --check-prefixes=CHECK
+; RUN: -riscv-v-vector-bits-min=512 -riscv-v-slp-max-vf=0 -S \
+; RUN: | FileCheck %s --check-prefixes=CHECK
 
 target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
 target triple = "riscv64"
@@ -855,29 +858,23 @@ declare i32 @llvm.abs.i32(i32, i1)
 ; }
 define i32 @stride_sum_abs_
diff (ptr %p, ptr %q, i64 %stride) {
 ; CHECK-LABEL: @stride_sum_abs_
diff (
-; CHECK-NEXT:    [[P_1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1
-; CHECK-NEXT:    [[Q_1:%.*]] = getelementptr inbounds i32, ptr [[Q:%.*]], i64 1
-; CHECK-NEXT:    [[P_2:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 [[STRIDE:%.*]]
-; CHECK-NEXT:    [[Q_2:%.*]] = getelementptr inbounds i32, ptr [[Q]], i64 [[STRIDE]]
+; CHECK-NEXT:    [[P_2:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 [[STRIDE:%.*]]
+; CHECK-NEXT:    [[Q_2:%.*]] = getelementptr inbounds i32, ptr [[Q:%.*]], i64 [[STRIDE]]
 ; CHECK-NEXT:    [[P_3:%.*]] = getelementptr inbounds i32, ptr [[P_2]], i64 1
 ; CHECK-NEXT:    [[Q_3:%.*]] = getelementptr inbounds i32, ptr [[Q_2]], i64 1
-; CHECK-NEXT:    [[X_0:%.*]] = load i32, ptr [[P]], align 4
-; CHECK-NEXT:    [[Y_0:%.*]] = load i32, ptr [[Q]], align 4
-; CHECK-NEXT:    [[X_1:%.*]] = load i32, ptr [[P_1]], align 4
-; CHECK-NEXT:    [[Y_1:%.*]] = load i32, ptr [[Q_1]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[P]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i32>, ptr [[Q]], align 4
 ; CHECK-NEXT:    [[X_2:%.*]] = load i32, ptr [[P_2]], align 4
 ; CHECK-NEXT:    [[Y_2:%.*]] = load i32, ptr [[Q_2]], align 4
 ; CHECK-NEXT:    [[X_3:%.*]] = load i32, ptr [[P_3]], align 4
 ; CHECK-NEXT:    [[Y_3:%.*]] = load i32, ptr [[Q_3]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[X_0]], i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[X_1]], i32 1
-; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[X_2]], i32 2
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[X_3]], i32 3
-; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[Y_0]], i32 0
-; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[Y_1]], i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[X_2]], i32 2
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[X_3]], i32 3
+; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[Y_2]], i32 2
 ; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[Y_3]], i32 3
-; CHECK-NEXT:    [[TMP9:%.*]] = sub <4 x i32> [[TMP4]], [[TMP8]]
+; CHECK-NEXT:    [[TMP9:%.*]] = sub <4 x i32> [[TMP5]], [[TMP8]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = call <4 x i32> @llvm.abs.v4i32(<4 x i32> [[TMP9]], i1 true)
 ; CHECK-NEXT:    [[TMP11:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP10]])
 ; CHECK-NEXT:    ret i32 [[TMP11]]