[llvm] c064244 - [X86] X86FixupInstTunings - attempt to convert VPERMQri to VINSERTI128rri (#177327)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 28 02:04:00 PST 2026
Author: Julian Pokrovsky
Date: 2026-01-28T10:03:55Z
New Revision: c064244fa28ba15d37e22ff1f1485ce114935a83
URL: https://github.com/llvm/llvm-project/commit/c064244fa28ba15d37e22ff1f1485ce114935a83
DIFF: https://github.com/llvm/llvm-project/commit/c064244fa28ba15d37e22ff1f1485ce114935a83.diff
LOG: [X86] X86FixupInstTunings - attempt to convert VPERMQri to VINSERTI128rri (#177327)
When the immediate is 0x44, VPERMQ/VPERMPD duplicates the lower 128-bit
lane to both lanes. This is equivalent to inserting the lower 128-bits
into the upper 128-bit position, which VINSERTI128/VINSERTF128 can
potentially do more efficiently on some targets.
This patch enables X86FixupInstTuning to convert:
VPERMQ ymm, ymm, 0x44 -> VINSERTI128 ymm, ymm, xmm, 1
VPERMPD ymm, ymm, 0x44 -> VINSERTF128 ymm, ymm, xmm, 1
Resolves #159410
Added:
llvm/test/CodeGen/X86/fixup-vpermq-to-vinsert.mir
Modified:
llvm/lib/Target/X86/X86FixupInstTuning.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86FixupInstTuning.cpp b/llvm/lib/Target/X86/X86FixupInstTuning.cpp
index 2ea6dec91e717..4747977e3f43b 100644
--- a/llvm/lib/Target/X86/X86FixupInstTuning.cpp
+++ b/llvm/lib/Target/X86/X86FixupInstTuning.cpp
@@ -23,6 +23,7 @@
#include "X86.h"
#include "X86InstrInfo.h"
+#include "X86RegisterInfo.h"
#include "X86Subtarget.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunctionAnalysisManager.h"
@@ -49,6 +50,7 @@ class X86FixupInstTuningImpl {
const X86InstrInfo *TII = nullptr;
const X86Subtarget *ST = nullptr;
const MCSchedModel *SM = nullptr;
+ const X86RegisterInfo *TRI = nullptr;
};
class X86FixupInstTuningLegacy : public MachineFunctionPass {
@@ -304,6 +306,40 @@ bool X86FixupInstTuningImpl::processInstruction(
return false;
};
+ // `vpermq ymm, ymm, 0x44` -> `vinserti128 ymm, ymm, xmm, 1`
+ // `vpermpd ymm, ymm, 0x44` -> `vinsertf128 ymm, ymm, xmm, 1`
+ // When the immediate is 0x44, VPERMQ/VPERMPD duplicates the lower 128-bit
+ // lane to both lanes. 0x44 = 0b01_00_01_00 means qwords[3:0] = {src[1],
+ // src[0], src[1], src[0]} This is equivalent to inserting the lower 128-bits
+ // into the upper 128-bit position.
+ auto ProcessVPERMQToVINSERT128 = [&](unsigned NewOpc) -> bool {
+ if (MI.getOperand(NumOperands - 1).getImm() != 0x44)
+ return false;
+ if (!NewOpcPreferable(NewOpc, /*ReplaceInTie*/ false))
+ return false;
+
+ // Get the XMM subregister of the source YMM register.
+ Register SrcReg = MI.getOperand(1).getReg();
+ Register XmmReg = TRI->getSubReg(SrcReg, X86::sub_xmm);
+
+ LLVM_DEBUG(dbgs() << "Replacing: " << MI);
+ {
+ // Transform: VPERMQ $dst, $src, $0x44
+ // Into: VINSERTI128 $dst, $src, $xmm_src, $1
+ MI.setDesc(TII->get(NewOpc));
+ // Remove the immediate operand.
+ MI.removeOperand(NumOperands - 1);
+ // Add the XMM subregister operand.
+ MI.addOperand(MachineOperand::CreateReg(XmmReg, /*isDef=*/false,
+ /*isImp=*/false,
+ /*isKill=*/false));
+ // Add the immediate (1 = insert into high 128-bits).
+ MI.addOperand(MachineOperand::CreateImm(1));
+ }
+ LLVM_DEBUG(dbgs() << " With: " << MI);
+ return true;
+ };
+
switch (Opc) {
case X86::BLENDPDrri:
return ProcessBLENDToMOV(X86::MOVSDrr, 0x3, 0x1);
@@ -392,7 +428,10 @@ bool X86FixupInstTuningImpl::processInstruction(
return ProcessVPERMILPSmi(X86::VPSHUFDZ256mik);
case X86::VPERMILPSZmik:
return ProcessVPERMILPSmi(X86::VPSHUFDZmik);
-
+ case X86::VPERMQYri:
+ return ProcessVPERMQToVINSERT128(X86::VINSERTI128rri);
+ case X86::VPERMPDYri:
+ return ProcessVPERMQToVINSERT128(X86::VINSERTF128rri);
case X86::MOVLHPSrr:
case X86::UNPCKLPDrr:
return ProcessUNPCKLPDrr(X86::PUNPCKLQDQrr, X86::SHUFPDrri);
@@ -638,6 +677,7 @@ bool X86FixupInstTuningImpl::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
ST = &MF.getSubtarget<X86Subtarget>();
TII = ST->getInstrInfo();
+ TRI = ST->getRegisterInfo();
SM = &ST->getSchedModel();
for (MachineBasicBlock &MBB : MF) {
diff --git a/llvm/test/CodeGen/X86/fixup-vpermq-to-vinsert.mir b/llvm/test/CodeGen/X86/fixup-vpermq-to-vinsert.mir
new file mode 100644
index 0000000000000..6613e380eefa3
--- /dev/null
+++ b/llvm/test/CodeGen/X86/fixup-vpermq-to-vinsert.mir
@@ -0,0 +1,71 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=x86_64-- -run-pass x86-fixup-inst-tuning -mcpu=znver1 -o - %s | FileCheck %s --check-prefix=ZNVER
+# RUN: llc -mtriple=x86_64-- -run-pass x86-fixup-inst-tuning -mcpu=znver3 -o - %s | FileCheck %s --check-prefix=ZNVER
+# RUN: llc -mtriple=x86_64-- -run-pass x86-fixup-inst-tuning -mcpu=haswell -o - %s | FileCheck %s --check-prefix=INTEL
+# RUN: llc -mtriple=x86_64-- -run-pass x86-fixup-inst-tuning -mcpu=alderlake -o - %s | FileCheck %s --check-prefix=INTEL
+
+--- |
+ define void @test_vpermq_0x44() { ret void }
+ define void @test_vpermpd_0x44() { ret void }
+ define void @test_vpermq_other_imm() { ret void }
+...
+---
+name: test_vpermq_0x44
+body: |
+ bb.0:
+ liveins: $ymm0
+ ; ZNVER-LABEL: name: test_vpermq_0x44
+ ; ZNVER: liveins: $ymm0
+ ; ZNVER-NEXT: {{ $}}
+ ; ZNVER-NEXT: $ymm0 = VINSERTI128rri $ymm0, $xmm0, 1
+ ; ZNVER-NEXT: RET64 $ymm0
+ ;
+ ; INTEL-LABEL: name: test_vpermq_0x44
+ ; INTEL: liveins: $ymm0
+ ; INTEL-NEXT: {{ $}}
+ ; INTEL-NEXT: $ymm0 = VPERMQYri $ymm0, 68
+ ; INTEL-NEXT: RET64 $ymm0
+ $ymm0 = VPERMQYri $ymm0, 68
+ RET64 $ymm0
+...
+---
+name: test_vpermpd_0x44
+body: |
+ bb.0:
+ liveins: $ymm0
+ ; ZNVER-LABEL: name: test_vpermpd_0x44
+ ; ZNVER: liveins: $ymm0
+ ; ZNVER-NEXT: {{ $}}
+ ; ZNVER-NEXT: $ymm0 = VINSERTF128rri $ymm0, $xmm0, 1
+ ; ZNVER-NEXT: RET64 $ymm0
+ ;
+ ; INTEL-LABEL: name: test_vpermpd_0x44
+ ; INTEL: liveins: $ymm0
+ ; INTEL-NEXT: {{ $}}
+ ; INTEL-NEXT: $ymm0 = VPERMPDYri $ymm0, 68
+ ; INTEL-NEXT: RET64 $ymm0
+ $ymm0 = VPERMPDYri $ymm0, 68
+ RET64 $ymm0
+...
+---
+name: test_vpermq_other_imm
+body: |
+ bb.0:
+ liveins: $ymm0, $ymm1
+ ; ZNVER-LABEL: name: test_vpermq_other_imm
+ ; ZNVER: liveins: $ymm0, $ymm1
+ ; ZNVER-NEXT: {{ $}}
+ ; ZNVER-NEXT: $ymm0 = VPERMQYri $ymm0, 228
+ ; ZNVER-NEXT: $ymm1 = VPERMQYri $ymm1, 0
+ ; ZNVER-NEXT: RET64 $ymm0, $ymm1
+ ;
+ ; INTEL-LABEL: name: test_vpermq_other_imm
+ ; INTEL: liveins: $ymm0, $ymm1
+ ; INTEL-NEXT: {{ $}}
+ ; INTEL-NEXT: $ymm0 = VPERMQYri $ymm0, 228
+ ; INTEL-NEXT: $ymm1 = VPERMQYri $ymm1, 0
+ ; INTEL-NEXT: RET64 $ymm0, $ymm1
+ $ymm0 = VPERMQYri $ymm0, 228
+ $ymm1 = VPERMQYri $ymm1, 0
+ RET64 $ymm0, $ymm1
+...
More information about the llvm-commits
mailing list