[llvm] a849940 - [RISCV] Use TU vmv.v.v instead of vslideup.vi N, M, 0 for insert vector_shuffles
Luke Lau via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 12 13:08:58 PDT 2023
Author: Luke Lau
Date: 2023-06-12T21:08:49+01:00
New Revision: a84994071c8e59e30933cf1482031856ca4c77d4
URL: https://github.com/llvm/llvm-project/commit/a84994071c8e59e30933cf1482031856ca4c77d4
DIFF: https://github.com/llvm/llvm-project/commit/a84994071c8e59e30933cf1482031856ca4c77d4.diff
LOG: [RISCV] Use TU vmv.v.v instead of vslideup.vi N,M,0 for insert vector_shuffles
This applies the same technique in https://reviews.llvm.org/D152565 to
vslideups when lowering vector_shuffles that have an insert pattern in
their mask.
Reviewed By: reames
Differential Revision: https://reviews.llvm.org/D152724
Added:
Modified:
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 4903fb8f8ef8f..3dc25cf43cf0c 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -3766,14 +3766,21 @@ static SDValue lowerVECTOR_SHUFFLEAsVSlideup(const SDLoc &DL, MVT VT,
// up to the very end of it, then we don't actually care about the tail.
if (NumSubElts + Index >= (int)NumElts)
Policy |= RISCVII::TAIL_AGNOSTIC;
- SDValue Slideup = getVSlideup(
- DAG, Subtarget, DL, ContainerVT,
- convertToScalableVector(ContainerVT, InPlace, DAG, Subtarget),
- convertToScalableVector(ContainerVT, ToInsert, DAG, Subtarget),
- DAG.getConstant(Index, DL, XLenVT), TrueMask,
- DAG.getConstant(NumSubElts + Index, DL, XLenVT),
- Policy);
- return convertFromScalableVector(VT, Slideup, DAG, Subtarget);
+
+ InPlace = convertToScalableVector(ContainerVT, InPlace, DAG, Subtarget);
+ ToInsert = convertToScalableVector(ContainerVT, ToInsert, DAG, Subtarget);
+ SDValue VL = DAG.getConstant(NumSubElts + Index, DL, XLenVT);
+
+ SDValue Res;
+ // If we're inserting into the lowest elements, use a tail undisturbed
+ // vmv.v.v.
+ if (Index == 0)
+ Res = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, InPlace, ToInsert,
+ VL);
+ else
+ Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, InPlace, ToInsert,
+ DAG.getConstant(Index, DL, XLenVT), TrueMask, VL, Policy);
+ return convertFromScalableVector(VT, Res, DAG, Subtarget);
}
/// Match v(f)slide1up/down idioms. These operations involve sliding
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
index 3418f8660202e..13ac5fd3d4860 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
@@ -43,7 +43,7 @@ define <4 x float> @hang_when_merging_stores_after_legalization(<8 x float> %x,
; LMULMAX1-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; LMULMAX1-NEXT: vslideup.vi v11, v10, 2
; LMULMAX1-NEXT: vsetivli zero, 2, e32, m1, tu, ma
-; LMULMAX1-NEXT: vslideup.vi v11, v12, 0
+; LMULMAX1-NEXT: vmv.v.v v11, v12
; LMULMAX1-NEXT: vmv1r.v v8, v11
; LMULMAX1-NEXT: ret
;
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
index 79b84f94f5d69..e9fce03485aac 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
@@ -690,7 +690,7 @@ define <8 x i8> @merge_end_into_end(<8 x i8> %v, <8 x i8> %w) {
; CHECK-LABEL: merge_end_into_end:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
-; CHECK-NEXT: vslideup.vi v9, v8, 0
+; CHECK-NEXT: vmv.v.v v9, v8
; CHECK-NEXT: vmv1r.v v8, v9
; CHECK-NEXT: ret
%res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
@@ -711,7 +711,7 @@ define <8 x i8> @merge_start_into_start(<8 x i8> %v, <8 x i8> %w) {
; CHECK-LABEL: merge_start_into_start:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
-; CHECK-NEXT: vslideup.vi v8, v9, 0
+; CHECK-NEXT: vmv.v.v v8, v9
; CHECK-NEXT: ret
%res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
ret <8 x i8> %res
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
index f0539e7566b67..494e06faf9f83 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
@@ -262,7 +262,7 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload
-; RV32-NEXT: vslideup.vi v8, v24, 0
+; RV32-NEXT: vmv.v.v v8, v24
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: li a3, 41
; RV32-NEXT: mul a1, a1, a3
@@ -335,7 +335,7 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
; RV32-NEXT: vrgather.vv v24, v8, v16, v0.t
; RV32-NEXT: vsetivli zero, 12, e32, m4, tu, ma
-; RV32-NEXT: vslideup.vi v4, v24, 0
+; RV32-NEXT: vmv.v.v v4, v24
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: li a3, 25
; RV32-NEXT: mul a1, a1, a3
@@ -419,7 +419,7 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload
-; RV32-NEXT: vslideup.vi v12, v8, 0
+; RV32-NEXT: vmv.v.v v12, v8
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: li a3, 37
; RV32-NEXT: mul a1, a1, a3
@@ -501,7 +501,7 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload
-; RV32-NEXT: vslideup.vi v12, v8, 0
+; RV32-NEXT: vmv.v.v v12, v8
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: slli a3, a1, 5
; RV32-NEXT: add a1, a3, a1
@@ -572,7 +572,7 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; RV32-NEXT: vrgather.vv v8, v16, v24, v0.t
; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma
-; RV32-NEXT: vslideup.vi v4, v8, 0
+; RV32-NEXT: vmv.v.v v4, v8
; RV32-NEXT: lui a1, %hi(.LCPI6_13)
; RV32-NEXT: addi a1, a1, %lo(.LCPI6_13)
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu
@@ -638,7 +638,7 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload
-; RV32-NEXT: vslideup.vi v12, v8, 0
+; RV32-NEXT: vmv.v.v v12, v8
; RV32-NEXT: addi a1, a0, 320
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; RV32-NEXT: vse32.v v12, (a1)
@@ -768,7 +768,7 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
-; RV64-NEXT: vslideup.vi v8, v16, 0
+; RV64-NEXT: vmv.v.v v8, v16
; RV64-NEXT: csrr a1, vlenb
; RV64-NEXT: li a2, 29
; RV64-NEXT: mul a1, a1, a2
@@ -826,7 +826,7 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
; RV64-NEXT: vrgather.vi v8, v16, 3, v0.t
; RV64-NEXT: vsetivli zero, 6, e64, m4, tu, ma
-; RV64-NEXT: vslideup.vi v8, v24, 0
+; RV64-NEXT: vmv.v.v v8, v24
; RV64-NEXT: csrr a1, vlenb
; RV64-NEXT: li a2, 25
; RV64-NEXT: mul a1, a1, a2
@@ -899,7 +899,7 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload
-; RV64-NEXT: vslideup.vi v8, v24, 0
+; RV64-NEXT: vmv.v.v v8, v24
; RV64-NEXT: csrr a1, vlenb
; RV64-NEXT: li a2, 45
; RV64-NEXT: mul a1, a1, a2
@@ -991,7 +991,7 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload
-; RV64-NEXT: vslideup.vi v8, v16, 0
+; RV64-NEXT: vmv.v.v v8, v16
; RV64-NEXT: csrr a1, vlenb
; RV64-NEXT: li a2, 41
; RV64-NEXT: mul a1, a1, a2
@@ -1085,7 +1085,7 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload
-; RV64-NEXT: vslideup.vi v8, v16, 0
+; RV64-NEXT: vmv.v.v v8, v16
; RV64-NEXT: csrr a1, vlenb
; RV64-NEXT: slli a2, a1, 5
; RV64-NEXT: add a1, a2, a1
@@ -1156,7 +1156,7 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; RV64-NEXT: vrgather.vv v8, v24, v16, v0.t
; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma
-; RV64-NEXT: vslideup.vi v4, v8, 0
+; RV64-NEXT: vmv.v.v v4, v8
; RV64-NEXT: addi a1, a0, 320
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64-NEXT: vse64.v v4, (a1)
More information about the llvm-commits
mailing list