[llvm] [SLP]Improve analysis of copyables operands for commmutative main instruction (PR #185320)
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Sun Mar 8 13:32:44 PDT 2026
https://github.com/alexey-bataev created https://github.com/llvm/llvm-project/pull/185320
For commutative copyables, instruction operands are always LHS and other
are RHS. But if some instruction is main and has 2 instructions
operands and RHS is more compatible with LHS operands, than LHS
operands, need to swap such operands for better analysis.
>From 74b009c29527161bc821b42a6e387bd5f9cb2cb3 Mon Sep 17 00:00:00 2001
From: Alexey Bataev <a.bataev at outlook.com>
Date: Sun, 8 Mar 2026 13:32:34 -0700
Subject: [PATCH] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20initia?=
=?UTF-8?q?l=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Created using spr 1.3.7
---
.../Transforms/Vectorize/SLPVectorizer.cpp | 17 +++++++++++
.../X86/bswap-i64-by-i32-chunks.ll | 30 ++++++++++++-------
2 files changed, 36 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 75bfa34093d47..51a4f9841e331 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -11503,18 +11503,35 @@ class InstructionsCompatibilityAnalysis {
// Check profitability if number of copyables > VL.size() / 2.
// 1. Reorder operands for better matching.
if (isCommutative(MainOp)) {
+ Value *BestFrontOp = nullptr;
for (auto [OpL, OpR] : zip(Operands.front(), Operands.back())) {
// Make instructions the first operands.
if (!isa<Instruction>(OpL) && isa<Instruction>(OpR)) {
+ BestFrontOp = OpR;
std::swap(OpL, OpR);
continue;
}
// Make constants the second operands.
if ((isa<Constant>(OpL) && !match(OpR, m_Zero())) ||
match(OpL, m_Zero())) {
+ BestFrontOp = OpR;
std::swap(OpL, OpR);
continue;
}
+ if (isa<Instruction>(OpL))
+ BestFrontOp = OpL;
+ }
+ // If some of the RHS operands better match most of LHS - swap such
+ // operands to increase matching rate.
+ if (auto *BestLHS = dyn_cast_if_present<Instruction>(BestFrontOp)) {
+ const unsigned BestOpcode = BestLHS->getOpcode();
+ for (auto [OpL, OpR] : zip(Operands.front(), Operands.back())) {
+ auto *OpRI = dyn_cast<Instruction>(OpR);
+ if (!OpRI)
+ continue;
+ if (OpRI->getOpcode() == BestOpcode)
+ std::swap(OpL, OpR);
+ }
}
}
// 2. Check, if operands can be vectorized.
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/bswap-i64-by-i32-chunks.ll b/llvm/test/Transforms/SLPVectorizer/X86/bswap-i64-by-i32-chunks.ll
index 1ce883cb293be..754bab4107bee 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/bswap-i64-by-i32-chunks.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/bswap-i64-by-i32-chunks.ll
@@ -5,19 +5,27 @@ define i64 @test(ptr %buf) {
; CHECK-LABEL: define i64 @test(
; CHECK-SAME: ptr [[BUF:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[BUF]], align 1
-; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP0]])
-; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
-; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[BUF]], i64 4
-; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i8>, ptr [[ADD_PTR]], align 1
-; CHECK-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds nuw i8, ptr [[BUF]], i64 6
+; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[BUF]], align 1
+; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP0]] to i64
+; CHECK-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds nuw i8, ptr [[BUF]], i64 1
; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr [[ARRAYIDX22]], align 1
; CHECK-NEXT: [[CONV23:%.*]] = zext i8 [[TMP4]] to i64
-; CHECK-NEXT: [[TMP5:%.*]] = zext <2 x i8> [[TMP3]] to <2 x i64>
-; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i64> [[TMP5]], <2 x i64> poison, <4 x i32> <i32 1, i32 0, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i64> [[TMP6]], i64 [[TMP2]], i32 2
-; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i64> [[TMP7]], i64 [[CONV23]], i32 3
-; CHECK-NEXT: [[TMP9:%.*]] = shl nuw <4 x i64> [[TMP8]], <i64 16, i64 24, i64 32, i64 8>
+; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw i8, ptr [[BUF]], i64 2
+; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[ARRAYIDX4]], align 1
+; CHECK-NEXT: [[CONV5:%.*]] = zext i8 [[TMP2]] to i64
+; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw i8, ptr [[BUF]], i64 3
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i64> <i64 poison, i64 0, i64 0, i64 0>, i64 [[CONV]], i32 0
+; CHECK-NEXT: [[TMP14:%.*]] = shl nuw nsw <4 x i64> [[TMP3]], <i64 24, i64 0, i64 0, i64 0>
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i64> <i64 poison, i64 0, i64 0, i64 0>, i64 [[CONV23]], i32 0
+; CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw <4 x i64> [[TMP5]], <i64 16, i64 0, i64 0, i64 0>
+; CHECK-NEXT: [[TMP7:%.*]] = or disjoint <4 x i64> [[TMP6]], [[TMP14]]
+; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i64> <i64 poison, i64 0, i64 0, i64 0>, i64 [[CONV5]], i32 0
+; CHECK-NEXT: [[TMP15:%.*]] = shl nuw nsw <4 x i64> [[TMP8]], <i64 8, i64 0, i64 0, i64 0>
+; CHECK-NEXT: [[TMP16:%.*]] = or disjoint <4 x i64> [[TMP7]], [[TMP15]]
+; CHECK-NEXT: [[TMP17:%.*]] = load <4 x i8>, ptr [[ARRAYIDX8]], align 1
+; CHECK-NEXT: [[TMP12:%.*]] = zext <4 x i8> [[TMP17]] to <4 x i64>
+; CHECK-NEXT: [[TMP13:%.*]] = or disjoint <4 x i64> [[TMP16]], [[TMP12]]
+; CHECK-NEXT: [[TMP9:%.*]] = shl nuw <4 x i64> [[TMP13]], <i64 32, i64 24, i64 16, i64 8>
; CHECK-NEXT: [[ARRAYIDX27:%.*]] = getelementptr inbounds nuw i8, ptr [[BUF]], i64 7
; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[ARRAYIDX27]], align 1
; CHECK-NEXT: [[CONV28:%.*]] = zext i8 [[TMP10]] to i64
More information about the llvm-commits
mailing list