[llvm] [RISCV] Prefer alt opcode vectorirazion if unaligned vector mem accesses (PR #154153)
Mikhail Gudim via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 18 13:00:37 PDT 2025
https://github.com/mgudim updated https://github.com/llvm/llvm-project/pull/154153
>From fdb931f8dfb12dbeca3aa00749777b14be2af7ca Mon Sep 17 00:00:00 2001
From: Mikhail Gudim <mgudim at ventanamicro.com>
Date: Mon, 18 Aug 2025 09:28:29 -0700
Subject: [PATCH] [RISCV] Unaligned vec mem => prefer alt opc vec
Return `true` in `RISCVTTIImpl::preferAlternateOpcodeVectorization` if
subtarget supports unaligned memory accesses.
---
.../Target/RISCV/RISCVTargetTransformInfo.cpp | 4 +
.../Target/RISCV/RISCVTargetTransformInfo.h | 2 +-
.../RISCV/alt-opc-vectorization.ll | 82 +++++++++++++++++++
3 files changed, 87 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/Transforms/SLPVectorizer/RISCV/alt-opc-vectorization.ll
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 85b3059d87da7..a0763e3d42991 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -2713,6 +2713,10 @@ unsigned RISCVTTIImpl::getMinTripCountTailFoldingThreshold() const {
return RVVMinTripCount;
}
+bool RISCVTTIImpl::preferAlternateOpcodeVectorization() const {
+ return ST->enableUnalignedVectorMem();
+}
+
TTI::AddressingModeKind
RISCVTTIImpl::getPreferredAddressingMode(const Loop *L,
ScalarEvolution *SE) const {
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index 6a1f4b3e3bedf..254908f97186c 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -132,7 +132,7 @@ class RISCVTTIImpl final : public BasicTTIImplBase<RISCVTTIImpl> {
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override;
- bool preferAlternateOpcodeVectorization() const override { return false; }
+ bool preferAlternateOpcodeVectorization() const override;
bool preferEpilogueVectorization() const override {
// Epilogue vectorization is usually unprofitable - tail folding or
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/alt-opc-vectorization.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/alt-opc-vectorization.ll
new file mode 100644
index 0000000000000..bd4d512705f20
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/alt-opc-vectorization.ll
@@ -0,0 +1,82 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -mtriple=riscv64 -mattr=+v,+unaligned-vector-mem \
+; RUN: -passes=slp-vectorizer -S \
+; RUN: < %s | FileCheck %s --check-prefixes=UNALIGNED_VEC_MEM
+
+; RUN: opt -mtriple=riscv64 -mattr=+v \
+; RUN: -passes=slp-vectorizer -S \
+; RUN: < %s | FileCheck %s --check-prefixes=NO_UNALIGNED_VEC_MEM
+
+define void @alternate_opcodes(ptr %pl, ptr %ps, i8 %x) {
+; UNALIGNED_VEC_MEM-LABEL: define void @alternate_opcodes(
+; UNALIGNED_VEC_MEM-SAME: ptr [[PL:%.*]], ptr [[PS:%.*]], i8 [[X:%.*]]) #[[ATTR0:[0-9]+]] {
+; UNALIGNED_VEC_MEM-NEXT: [[GEP_L0:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 0
+; UNALIGNED_VEC_MEM-NEXT: [[GEP_S0:%.*]] = getelementptr inbounds i8, ptr [[PS]], i64 0
+; UNALIGNED_VEC_MEM-NEXT: [[TMP1:%.*]] = call <8 x i8> @llvm.experimental.vp.strided.load.v8i8.p0.i64(ptr align 1 [[GEP_L0]], i64 20, <8 x i1> splat (i1 true), i32 8)
+; UNALIGNED_VEC_MEM-NEXT: [[TMP6:%.*]] = insertelement <8 x i8> poison, i8 [[X]], i32 0
+; UNALIGNED_VEC_MEM-NEXT: [[TMP3:%.*]] = shufflevector <8 x i8> [[TMP6]], <8 x i8> poison, <8 x i32> zeroinitializer
+; UNALIGNED_VEC_MEM-NEXT: [[TMP4:%.*]] = add <8 x i8> [[TMP1]], [[TMP3]]
+; UNALIGNED_VEC_MEM-NEXT: [[TMP5:%.*]] = sub <8 x i8> [[TMP1]], [[TMP3]]
+; UNALIGNED_VEC_MEM-NEXT: [[TMP2:%.*]] = shufflevector <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
+; UNALIGNED_VEC_MEM-NEXT: store <8 x i8> [[TMP2]], ptr [[GEP_S0]], align 1
+; UNALIGNED_VEC_MEM-NEXT: ret void
+;
+; NO_UNALIGNED_VEC_MEM-LABEL: define void @alternate_opcodes(
+; NO_UNALIGNED_VEC_MEM-SAME: ptr [[PL:%.*]], ptr [[PS:%.*]], i8 [[X:%.*]]) #[[ATTR0:[0-9]+]] {
+; NO_UNALIGNED_VEC_MEM-NEXT: [[GEP_L0:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 0
+; NO_UNALIGNED_VEC_MEM-NEXT: [[GEP_S0:%.*]] = getelementptr inbounds i8, ptr [[PS]], i64 0
+; NO_UNALIGNED_VEC_MEM-NEXT: [[TMP1:%.*]] = call <8 x i8> @llvm.experimental.vp.strided.load.v8i8.p0.i64(ptr align 1 [[GEP_L0]], i64 20, <8 x i1> splat (i1 true), i32 8)
+; NO_UNALIGNED_VEC_MEM-NEXT: [[TMP6:%.*]] = insertelement <8 x i8> poison, i8 [[X]], i32 0
+; NO_UNALIGNED_VEC_MEM-NEXT: [[TMP3:%.*]] = shufflevector <8 x i8> [[TMP6]], <8 x i8> poison, <8 x i32> zeroinitializer
+; NO_UNALIGNED_VEC_MEM-NEXT: [[TMP4:%.*]] = add <8 x i8> [[TMP1]], [[TMP3]]
+; NO_UNALIGNED_VEC_MEM-NEXT: [[TMP5:%.*]] = sub <8 x i8> [[TMP1]], [[TMP3]]
+; NO_UNALIGNED_VEC_MEM-NEXT: [[TMP2:%.*]] = shufflevector <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
+; NO_UNALIGNED_VEC_MEM-NEXT: store <8 x i8> [[TMP2]], ptr [[GEP_S0]], align 1
+; NO_UNALIGNED_VEC_MEM-NEXT: ret void
+;
+ %gep_l0 = getelementptr inbounds i8, ptr %pl, i64 0
+ %gep_l1 = getelementptr inbounds i8, ptr %pl, i64 20
+ %gep_l2 = getelementptr inbounds i8, ptr %pl, i64 40
+ %gep_l3 = getelementptr inbounds i8, ptr %pl, i64 60
+ %gep_l4 = getelementptr inbounds i8, ptr %pl, i64 80
+ %gep_l5 = getelementptr inbounds i8, ptr %pl, i64 100
+ %gep_l6 = getelementptr inbounds i8, ptr %pl, i64 120
+ %gep_l7 = getelementptr inbounds i8, ptr %pl, i64 140
+
+ %load0 = load i8, ptr %gep_l0
+ %load1 = load i8, ptr %gep_l1
+ %load2 = load i8, ptr %gep_l2
+ %load3 = load i8, ptr %gep_l3
+ %load4 = load i8, ptr %gep_l4
+ %load5 = load i8, ptr %gep_l5
+ %load6 = load i8, ptr %gep_l6
+ %load7 = load i8, ptr %gep_l7
+
+ %add0 = add i8 %load0, %x
+ %add1 = add i8 %load1, %x
+ %add2 = add i8 %load2, %x
+ %add3 = add i8 %load3, %x
+ %sub0 = sub i8 %load4, %x
+ %sub1 = sub i8 %load5, %x
+ %sub2 = sub i8 %load6, %x
+ %sub3 = sub i8 %load7, %x
+
+ %gep_s0 = getelementptr inbounds i8, ptr %ps, i64 0
+ %gep_s1 = getelementptr inbounds i8, ptr %ps, i64 1
+ %gep_s2 = getelementptr inbounds i8, ptr %ps, i64 2
+ %gep_s3 = getelementptr inbounds i8, ptr %ps, i64 3
+ %gep_s4 = getelementptr inbounds i8, ptr %ps, i64 4
+ %gep_s5 = getelementptr inbounds i8, ptr %ps, i64 5
+ %gep_s6 = getelementptr inbounds i8, ptr %ps, i64 6
+ %gep_s7 = getelementptr inbounds i8, ptr %ps, i64 7
+
+ store i8 %add0, ptr %gep_s0
+ store i8 %add1, ptr %gep_s1
+ store i8 %add2, ptr %gep_s2
+ store i8 %add3, ptr %gep_s3
+ store i8 %sub0, ptr %gep_s4
+ store i8 %sub1, ptr %gep_s5
+ store i8 %sub2, ptr %gep_s6
+ store i8 %sub3, ptr %gep_s7
+ ret void
+}
More information about the llvm-commits
mailing list