[llvm] [RISCV] Improve performCONCAT_VECTORCombine stride matching (PR #68726)
Michael Maitland via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 16 11:15:53 PDT 2023
https://github.com/michaelmaitland updated https://github.com/llvm/llvm-project/pull/68726
>From 165657974d31a547a79f1f1c8263dcefbd24f667 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Sun, 15 Oct 2023 08:49:46 -0700
Subject: [PATCH 1/3] [RISCV] Pre-commit concat-vectors-constant-stride.ll
This patch commits tests that can be optimized by improving
performCONCAT_VECTORCombine to do a better job at decomposing the base
pointer and recognizing a constant offset.
---
.../rvv/concat-vectors-constant-stride.ll | 231 ++++++++++++++++++
1 file changed, 231 insertions(+)
create mode 100644 llvm/test/CodeGen/RISCV/rvv/concat-vectors-constant-stride.ll
diff --git a/llvm/test/CodeGen/RISCV/rvv/concat-vectors-constant-stride.ll b/llvm/test/CodeGen/RISCV/rvv/concat-vectors-constant-stride.ll
new file mode 100644
index 000000000000000..611270ab98ebdaf
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/concat-vectors-constant-stride.ll
@@ -0,0 +1,231 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+v,+unaligned-vector-mem -target-abi=ilp32 \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v,+unaligned-vector-mem -target-abi=lp64 \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+
+define void @constant_forward_stride(ptr %s, ptr %d) {
+; CHECK-LABEL: constant_forward_stride:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, a0, 16
+; CHECK-NEXT: addi a3, a0, 32
+; CHECK-NEXT: addi a4, a0, 48
+; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
+; CHECK-NEXT: vle8.v v8, (a0)
+; CHECK-NEXT: vle8.v v9, (a2)
+; CHECK-NEXT: vle8.v v10, (a3)
+; CHECK-NEXT: vle8.v v11, (a4)
+; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
+; CHECK-NEXT: vslideup.vi v8, v9, 2
+; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma
+; CHECK-NEXT: vslideup.vi v8, v10, 4
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vslideup.vi v8, v11, 6
+; CHECK-NEXT: vse8.v v8, (a1)
+; CHECK-NEXT: ret
+ %1 = getelementptr inbounds i8, ptr %s, i64 16
+ %2 = getelementptr inbounds i8, ptr %s, i64 32
+ %3 = getelementptr inbounds i8, ptr %s, i64 48
+ %4 = load <2 x i8>, ptr %s, align 1
+ %5 = load <2 x i8>, ptr %1, align 1
+ %6 = load <2 x i8>, ptr %2, align 1
+ %7 = load <2 x i8>, ptr %3, align 1
+ %8 = shufflevector <2 x i8> %4, <2 x i8> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %9 = shufflevector <2 x i8> %6, <2 x i8> %7, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %10 = shufflevector <4 x i8> %8, <4 x i8> %9, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ store <8 x i8> %10, ptr %d, align 1
+ ret void
+}
+
+define void @constant_forward_stride2(ptr %s, ptr %d) {
+; CHECK-LABEL: constant_forward_stride2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, a0, -16
+; CHECK-NEXT: addi a3, a0, -32
+; CHECK-NEXT: addi a4, a0, -48
+; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
+; CHECK-NEXT: vle8.v v8, (a4)
+; CHECK-NEXT: vle8.v v9, (a3)
+; CHECK-NEXT: vle8.v v10, (a2)
+; CHECK-NEXT: vle8.v v11, (a0)
+; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
+; CHECK-NEXT: vslideup.vi v8, v9, 2
+; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma
+; CHECK-NEXT: vslideup.vi v8, v10, 4
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vslideup.vi v8, v11, 6
+; CHECK-NEXT: vse8.v v8, (a1)
+; CHECK-NEXT: ret
+ %1 = getelementptr inbounds i8, ptr %s, i64 -16
+ %2 = getelementptr inbounds i8, ptr %s, i64 -32
+ %3 = getelementptr inbounds i8, ptr %s, i64 -48
+ %4 = load <2 x i8>, ptr %3, align 1
+ %5 = load <2 x i8>, ptr %2, align 1
+ %6 = load <2 x i8>, ptr %1, align 1
+ %7 = load <2 x i8>, ptr %s, align 1
+ %8 = shufflevector <2 x i8> %4, <2 x i8> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %9 = shufflevector <2 x i8> %6, <2 x i8> %7, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %10 = shufflevector <4 x i8> %8, <4 x i8> %9, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ store <8 x i8> %10, ptr %d, align 1
+ ret void
+}
+
+define void @constant_forward_stride3(ptr %s, ptr %d) {
+; CHECK-LABEL: constant_forward_stride3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, a0, 16
+; CHECK-NEXT: addi a3, a0, 32
+; CHECK-NEXT: addi a4, a0, 48
+; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
+; CHECK-NEXT: vle8.v v8, (a0)
+; CHECK-NEXT: vle8.v v9, (a2)
+; CHECK-NEXT: vle8.v v10, (a3)
+; CHECK-NEXT: vle8.v v11, (a4)
+; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
+; CHECK-NEXT: vslideup.vi v8, v9, 2
+; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma
+; CHECK-NEXT: vslideup.vi v8, v10, 4
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vslideup.vi v8, v11, 6
+; CHECK-NEXT: vse8.v v8, (a1)
+; CHECK-NEXT: ret
+ %1 = getelementptr inbounds i8, ptr %s, i64 16
+ %2 = getelementptr inbounds i8, ptr %s, i64 32
+ %3 = getelementptr inbounds i8, ptr %s, i64 48
+ %4 = getelementptr inbounds i8, ptr %1, i64 0
+ %5 = getelementptr inbounds i8, ptr %2, i64 0
+ %6 = getelementptr inbounds i8, ptr %3, i64 0
+ %7 = load <2 x i8>, ptr %s, align 1
+ %8 = load <2 x i8>, ptr %4, align 1
+ %9 = load <2 x i8>, ptr %5, align 1
+ %10 = load <2 x i8>, ptr %6, align 1
+ %11 = shufflevector <2 x i8> %7, <2 x i8> %8, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %12 = shufflevector <2 x i8> %9, <2 x i8> %10, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %13 = shufflevector <4 x i8> %11, <4 x i8> %12, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ store <8 x i8> %13, ptr %d, align 1
+ ret void
+}
+
+define void @constant_back_stride(ptr %s, ptr %d) {
+; CHECK-LABEL: constant_back_stride:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, a0, -16
+; CHECK-NEXT: addi a3, a0, -32
+; CHECK-NEXT: addi a4, a0, -48
+; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
+; CHECK-NEXT: vle8.v v8, (a0)
+; CHECK-NEXT: vle8.v v9, (a2)
+; CHECK-NEXT: vle8.v v10, (a3)
+; CHECK-NEXT: vle8.v v11, (a4)
+; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
+; CHECK-NEXT: vslideup.vi v8, v9, 2
+; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma
+; CHECK-NEXT: vslideup.vi v8, v10, 4
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vslideup.vi v8, v11, 6
+; CHECK-NEXT: vse8.v v8, (a1)
+; CHECK-NEXT: ret
+ %1 = getelementptr inbounds i8, ptr %s, i64 -16
+ %2 = getelementptr inbounds i8, ptr %s, i64 -32
+ %3 = getelementptr inbounds i8, ptr %s, i64 -48
+ %4 = load <2 x i8>, ptr %s, align 1
+ %5 = load <2 x i8>, ptr %1, align 1
+ %6 = load <2 x i8>, ptr %2, align 1
+ %7 = load <2 x i8>, ptr %3, align 1
+ %8 = shufflevector <2 x i8> %4, <2 x i8> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %9 = shufflevector <2 x i8> %6, <2 x i8> %7, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %10 = shufflevector <4 x i8> %8, <4 x i8> %9, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ store <8 x i8> %10, ptr %d, align 1
+ ret void
+}
+
+define void @constant_back_stride2(ptr %s, ptr %d) {
+; CHECK-LABEL: constant_back_stride2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, a0, 16
+; CHECK-NEXT: addi a3, a0, 32
+; CHECK-NEXT: addi a4, a0, 48
+; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
+; CHECK-NEXT: vle8.v v8, (a4)
+; CHECK-NEXT: vle8.v v9, (a3)
+; CHECK-NEXT: vle8.v v10, (a2)
+; CHECK-NEXT: vle8.v v11, (a0)
+; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
+; CHECK-NEXT: vslideup.vi v8, v9, 2
+; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma
+; CHECK-NEXT: vslideup.vi v8, v10, 4
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vslideup.vi v8, v11, 6
+; CHECK-NEXT: vse8.v v8, (a1)
+; CHECK-NEXT: ret
+ %1 = getelementptr inbounds i8, ptr %s, i64 16
+ %2 = getelementptr inbounds i8, ptr %s, i64 32
+ %3 = getelementptr inbounds i8, ptr %s, i64 48
+ %4 = load <2 x i8>, ptr %3, align 1
+ %5 = load <2 x i8>, ptr %2, align 1
+ %6 = load <2 x i8>, ptr %1, align 1
+ %7 = load <2 x i8>, ptr %s, align 1
+ %8 = shufflevector <2 x i8> %4, <2 x i8> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %9 = shufflevector <2 x i8> %6, <2 x i8> %7, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %10 = shufflevector <4 x i8> %8, <4 x i8> %9, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ store <8 x i8> %10, ptr %d, align 1
+ ret void
+}
+
+define void @constant_back_stride3(ptr %s, ptr %d) {
+; CHECK-LABEL: constant_back_stride3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, a0, -16
+; CHECK-NEXT: addi a3, a0, -32
+; CHECK-NEXT: addi a4, a0, -48
+; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
+; CHECK-NEXT: vle8.v v8, (a0)
+; CHECK-NEXT: vle8.v v9, (a2)
+; CHECK-NEXT: vle8.v v10, (a3)
+; CHECK-NEXT: vle8.v v11, (a4)
+; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
+; CHECK-NEXT: vslideup.vi v8, v9, 2
+; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma
+; CHECK-NEXT: vslideup.vi v8, v10, 4
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vslideup.vi v8, v11, 6
+; CHECK-NEXT: vse8.v v8, (a1)
+; CHECK-NEXT: ret
+ %1 = getelementptr inbounds i8, ptr %s, i64 -16
+ %2 = getelementptr inbounds i8, ptr %s, i64 -32
+ %3 = getelementptr inbounds i8, ptr %s, i64 -48
+ %4 = getelementptr inbounds i8, ptr %1, i64 0
+ %5 = getelementptr inbounds i8, ptr %2, i64 0
+ %6 = getelementptr inbounds i8, ptr %3, i64 0
+ %7 = load <2 x i8>, ptr %s, align 1
+ %8 = load <2 x i8>, ptr %4, align 1
+ %9 = load <2 x i8>, ptr %5, align 1
+ %10 = load <2 x i8>, ptr %6, align 1
+ %11 = shufflevector <2 x i8> %7, <2 x i8> %8, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %12 = shufflevector <2 x i8> %9, <2 x i8> %10, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %13 = shufflevector <4 x i8> %11, <4 x i8> %12, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ store <8 x i8> %13, ptr %d, align 1
+ ret void
+}
+
+define void @constant_zero_stride(ptr %s, ptr %d) {
+; CHECK-LABEL: constant_zero_stride:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
+; CHECK-NEXT: vle8.v v8, (a0)
+; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
+; CHECK-NEXT: vmv1r.v v9, v8
+; CHECK-NEXT: vslideup.vi v9, v8, 2
+; CHECK-NEXT: vse8.v v9, (a1)
+; CHECK-NEXT: ret
+ %1 = getelementptr inbounds i8, ptr %s, i64 0
+ %2 = load <2 x i8>, ptr %s, align 1
+ %3 = load <2 x i8>, ptr %1, align 1
+ %4 = shufflevector <2 x i8> %2, <2 x i8> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ store <4 x i8> %4, ptr %d, align 1
+ ret void
+}
+
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; RV32: {{.*}}
+; RV64: {{.*}}
>From 23687a3056f19aa047f9a2295489f0d29196561f Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Sun, 15 Oct 2023 09:00:04 -0700
Subject: [PATCH 2/3] [RISCV] Improve performCONCAT_VECTORCombine stride
matching
If the load ptrs can be decomposed into a common (Base + Index) with a
common constant stride, then return the constant stride.
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 15 ++-
.../rvv/concat-vectors-constant-stride.ll | 116 ++++--------------
2 files changed, 39 insertions(+), 92 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 6eb253cc5146635..a4a11ab5c50e3ef 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -27,6 +27,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
@@ -13804,8 +13805,18 @@ static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG,
}
using PtrDiff = std::pair<SDValue, bool>;
- auto GetPtrDiff = [](LoadSDNode *Ld1,
- LoadSDNode *Ld2) -> std::optional<PtrDiff> {
+ auto GetPtrDiff = [&DAG, &DL](LoadSDNode *Ld1,
+ LoadSDNode *Ld2) -> std::optional<PtrDiff> {
+ // If the load ptrs can be decomposed into a common (Base + Index) with a
+ // common constant stride, then return the constant stride.
+ BaseIndexOffset BIO1 = BaseIndexOffset::match(Ld1, DAG);
+ BaseIndexOffset BIO2 = BaseIndexOffset::match(Ld2, DAG);
+ if (BIO1.equalBaseIndex(BIO2, DAG))
+ return {{DAG.getConstant(BIO2.getOffset() - BIO1.getOffset(), DL,
+ Ld1->getOffset().getValueType()),
+ false}};
+
+ // Otherwise try to match (add LastPtr, Stride) or (add NextPtr, Stride)
SDValue P1 = Ld1->getBasePtr();
SDValue P2 = Ld2->getBasePtr();
if (P2.getOpcode() == ISD::ADD && P2.getOperand(0) == P1)
diff --git a/llvm/test/CodeGen/RISCV/rvv/concat-vectors-constant-stride.ll b/llvm/test/CodeGen/RISCV/rvv/concat-vectors-constant-stride.ll
index 611270ab98ebdaf..ff35043dbd7e75e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/concat-vectors-constant-stride.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/concat-vectors-constant-stride.ll
@@ -7,21 +7,10 @@
define void @constant_forward_stride(ptr %s, ptr %d) {
; CHECK-LABEL: constant_forward_stride:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi a2, a0, 16
-; CHECK-NEXT: addi a3, a0, 32
-; CHECK-NEXT: addi a4, a0, 48
-; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
-; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vle8.v v9, (a2)
-; CHECK-NEXT: vle8.v v10, (a3)
-; CHECK-NEXT: vle8.v v11, (a4)
-; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
-; CHECK-NEXT: vslideup.vi v8, v9, 2
-; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma
-; CHECK-NEXT: vslideup.vi v8, v10, 4
-; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: vslideup.vi v8, v11, 6
-; CHECK-NEXT: vse8.v v8, (a1)
+; CHECK-NEXT: li a2, 16
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT: vlse16.v v8, (a0), a2
+; CHECK-NEXT: vse16.v v8, (a1)
; CHECK-NEXT: ret
%1 = getelementptr inbounds i8, ptr %s, i64 16
%2 = getelementptr inbounds i8, ptr %s, i64 32
@@ -40,21 +29,11 @@ define void @constant_forward_stride(ptr %s, ptr %d) {
define void @constant_forward_stride2(ptr %s, ptr %d) {
; CHECK-LABEL: constant_forward_stride2:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi a2, a0, -16
-; CHECK-NEXT: addi a3, a0, -32
-; CHECK-NEXT: addi a4, a0, -48
-; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
-; CHECK-NEXT: vle8.v v8, (a4)
-; CHECK-NEXT: vle8.v v9, (a3)
-; CHECK-NEXT: vle8.v v10, (a2)
-; CHECK-NEXT: vle8.v v11, (a0)
-; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
-; CHECK-NEXT: vslideup.vi v8, v9, 2
-; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma
-; CHECK-NEXT: vslideup.vi v8, v10, 4
-; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: vslideup.vi v8, v11, 6
-; CHECK-NEXT: vse8.v v8, (a1)
+; CHECK-NEXT: addi a0, a0, -48
+; CHECK-NEXT: li a2, 16
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT: vlse16.v v8, (a0), a2
+; CHECK-NEXT: vse16.v v8, (a1)
; CHECK-NEXT: ret
%1 = getelementptr inbounds i8, ptr %s, i64 -16
%2 = getelementptr inbounds i8, ptr %s, i64 -32
@@ -73,21 +52,10 @@ define void @constant_forward_stride2(ptr %s, ptr %d) {
define void @constant_forward_stride3(ptr %s, ptr %d) {
; CHECK-LABEL: constant_forward_stride3:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi a2, a0, 16
-; CHECK-NEXT: addi a3, a0, 32
-; CHECK-NEXT: addi a4, a0, 48
-; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
-; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vle8.v v9, (a2)
-; CHECK-NEXT: vle8.v v10, (a3)
-; CHECK-NEXT: vle8.v v11, (a4)
-; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
-; CHECK-NEXT: vslideup.vi v8, v9, 2
-; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma
-; CHECK-NEXT: vslideup.vi v8, v10, 4
-; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: vslideup.vi v8, v11, 6
-; CHECK-NEXT: vse8.v v8, (a1)
+; CHECK-NEXT: li a2, 16
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT: vlse16.v v8, (a0), a2
+; CHECK-NEXT: vse16.v v8, (a1)
; CHECK-NEXT: ret
%1 = getelementptr inbounds i8, ptr %s, i64 16
%2 = getelementptr inbounds i8, ptr %s, i64 32
@@ -109,21 +77,10 @@ define void @constant_forward_stride3(ptr %s, ptr %d) {
define void @constant_back_stride(ptr %s, ptr %d) {
; CHECK-LABEL: constant_back_stride:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi a2, a0, -16
-; CHECK-NEXT: addi a3, a0, -32
-; CHECK-NEXT: addi a4, a0, -48
-; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
-; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vle8.v v9, (a2)
-; CHECK-NEXT: vle8.v v10, (a3)
-; CHECK-NEXT: vle8.v v11, (a4)
-; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
-; CHECK-NEXT: vslideup.vi v8, v9, 2
-; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma
-; CHECK-NEXT: vslideup.vi v8, v10, 4
-; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: vslideup.vi v8, v11, 6
-; CHECK-NEXT: vse8.v v8, (a1)
+; CHECK-NEXT: li a2, -16
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT: vlse16.v v8, (a0), a2
+; CHECK-NEXT: vse16.v v8, (a1)
; CHECK-NEXT: ret
%1 = getelementptr inbounds i8, ptr %s, i64 -16
%2 = getelementptr inbounds i8, ptr %s, i64 -32
@@ -142,21 +99,11 @@ define void @constant_back_stride(ptr %s, ptr %d) {
define void @constant_back_stride2(ptr %s, ptr %d) {
; CHECK-LABEL: constant_back_stride2:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi a2, a0, 16
-; CHECK-NEXT: addi a3, a0, 32
-; CHECK-NEXT: addi a4, a0, 48
-; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
-; CHECK-NEXT: vle8.v v8, (a4)
-; CHECK-NEXT: vle8.v v9, (a3)
-; CHECK-NEXT: vle8.v v10, (a2)
-; CHECK-NEXT: vle8.v v11, (a0)
-; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
-; CHECK-NEXT: vslideup.vi v8, v9, 2
-; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma
-; CHECK-NEXT: vslideup.vi v8, v10, 4
-; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: vslideup.vi v8, v11, 6
-; CHECK-NEXT: vse8.v v8, (a1)
+; CHECK-NEXT: addi a0, a0, 48
+; CHECK-NEXT: li a2, -16
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT: vlse16.v v8, (a0), a2
+; CHECK-NEXT: vse16.v v8, (a1)
; CHECK-NEXT: ret
%1 = getelementptr inbounds i8, ptr %s, i64 16
%2 = getelementptr inbounds i8, ptr %s, i64 32
@@ -175,21 +122,10 @@ define void @constant_back_stride2(ptr %s, ptr %d) {
define void @constant_back_stride3(ptr %s, ptr %d) {
; CHECK-LABEL: constant_back_stride3:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi a2, a0, -16
-; CHECK-NEXT: addi a3, a0, -32
-; CHECK-NEXT: addi a4, a0, -48
-; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
-; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vle8.v v9, (a2)
-; CHECK-NEXT: vle8.v v10, (a3)
-; CHECK-NEXT: vle8.v v11, (a4)
-; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
-; CHECK-NEXT: vslideup.vi v8, v9, 2
-; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma
-; CHECK-NEXT: vslideup.vi v8, v10, 4
-; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: vslideup.vi v8, v11, 6
-; CHECK-NEXT: vse8.v v8, (a1)
+; CHECK-NEXT: li a2, -16
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT: vlse16.v v8, (a0), a2
+; CHECK-NEXT: vse16.v v8, (a1)
; CHECK-NEXT: ret
%1 = getelementptr inbounds i8, ptr %s, i64 -16
%2 = getelementptr inbounds i8, ptr %s, i64 -32
>From 46f67254d4d80a7466cc97e27c638a5e1cd8c42b Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Mon, 16 Oct 2023 11:15:31 -0700
Subject: [PATCH 3/3] Defer construction of constant SDNode
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 16 +++++++++-------
1 file changed, 9 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index a4a11ab5c50e3ef..114bba69ca62268 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -13804,17 +13804,15 @@ static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG,
Align = std::min(Align, Ld->getAlign());
}
- using PtrDiff = std::pair<SDValue, bool>;
- auto GetPtrDiff = [&DAG, &DL](LoadSDNode *Ld1,
- LoadSDNode *Ld2) -> std::optional<PtrDiff> {
+ using PtrDiff = std::pair<std::variant<int64_t, SDValue>, bool>;
+ auto GetPtrDiff = [&DAG](LoadSDNode *Ld1,
+ LoadSDNode *Ld2) -> std::optional<PtrDiff> {
// If the load ptrs can be decomposed into a common (Base + Index) with a
// common constant stride, then return the constant stride.
BaseIndexOffset BIO1 = BaseIndexOffset::match(Ld1, DAG);
BaseIndexOffset BIO2 = BaseIndexOffset::match(Ld2, DAG);
if (BIO1.equalBaseIndex(BIO2, DAG))
- return {{DAG.getConstant(BIO2.getOffset() - BIO1.getOffset(), DL,
- Ld1->getOffset().getValueType()),
- false}};
+ return {{BIO2.getOffset() - BIO1.getOffset(), false}};
// Otherwise try to match (add LastPtr, Stride) or (add NextPtr, Stride)
SDValue P1 = Ld1->getBasePtr();
@@ -13855,7 +13853,11 @@ static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG,
if (!TLI.isLegalStridedLoadStore(WideVecVT, Align))
return SDValue();
- auto [Stride, MustNegateStride] = *BaseDiff;
+ auto [StrideVariant, MustNegateStride] = *BaseDiff;
+ SDValue Stride = std::holds_alternative<SDValue>(StrideVariant)
+ ? std::get<SDValue>(StrideVariant)
+ : DAG.getConstant(std::get<int64_t>(StrideVariant), DL,
+ Lds[0]->getOffset().getValueType());
if (MustNegateStride)
Stride = DAG.getNegative(Stride, DL, Stride.getValueType());
More information about the llvm-commits
mailing list