[llvm] c064244 - [X86] X86FixupInstTunings - attempt to convert VPERMQri to VINSERTI128rri (#177327)

via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 28 02:04:00 PST 2026


Author: Julian Pokrovsky
Date: 2026-01-28T10:03:55Z
New Revision: c064244fa28ba15d37e22ff1f1485ce114935a83

URL: https://github.com/llvm/llvm-project/commit/c064244fa28ba15d37e22ff1f1485ce114935a83
DIFF: https://github.com/llvm/llvm-project/commit/c064244fa28ba15d37e22ff1f1485ce114935a83.diff

LOG: [X86] X86FixupInstTunings - attempt to convert VPERMQri to VINSERTI128rri (#177327)

When the immediate is 0x44, VPERMQ/VPERMPD duplicates the lower 128-bit
lane to both lanes. This is equivalent to inserting the lower 128-bits
into the upper 128-bit position, which VINSERTI128/VINSERTF128 can
potentially do more efficiently on some targets.

This patch enables X86FixupInstTuning to convert:
  VPERMQ  ymm, ymm, 0x44 -> VINSERTI128 ymm, ymm, xmm, 1
  VPERMPD ymm, ymm, 0x44 -> VINSERTF128 ymm, ymm, xmm, 1

Resolves #159410

Added: 
    llvm/test/CodeGen/X86/fixup-vpermq-to-vinsert.mir

Modified: 
    llvm/lib/Target/X86/X86FixupInstTuning.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86FixupInstTuning.cpp b/llvm/lib/Target/X86/X86FixupInstTuning.cpp
index 2ea6dec91e717..4747977e3f43b 100644
--- a/llvm/lib/Target/X86/X86FixupInstTuning.cpp
+++ b/llvm/lib/Target/X86/X86FixupInstTuning.cpp
@@ -23,6 +23,7 @@
 
 #include "X86.h"
 #include "X86InstrInfo.h"
+#include "X86RegisterInfo.h"
 #include "X86Subtarget.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineFunctionAnalysisManager.h"
@@ -49,6 +50,7 @@ class X86FixupInstTuningImpl {
   const X86InstrInfo *TII = nullptr;
   const X86Subtarget *ST = nullptr;
   const MCSchedModel *SM = nullptr;
+  const X86RegisterInfo *TRI = nullptr;
 };
 
 class X86FixupInstTuningLegacy : public MachineFunctionPass {
@@ -304,6 +306,40 @@ bool X86FixupInstTuningImpl::processInstruction(
     return false;
   };
 
+  // `vpermq ymm, ymm, 0x44` -> `vinserti128 ymm, ymm, xmm, 1`
+  // `vpermpd ymm, ymm, 0x44` -> `vinsertf128 ymm, ymm, xmm, 1`
+  // When the immediate is 0x44, VPERMQ/VPERMPD duplicates the lower 128-bit
+  // lane to both lanes. 0x44 = 0b01_00_01_00 means qwords[3:0] = {src[1],
+  // src[0], src[1], src[0]} This is equivalent to inserting the lower 128-bits
+  // into the upper 128-bit position.
+  auto ProcessVPERMQToVINSERT128 = [&](unsigned NewOpc) -> bool {
+    if (MI.getOperand(NumOperands - 1).getImm() != 0x44)
+      return false;
+    if (!NewOpcPreferable(NewOpc, /*ReplaceInTie*/ false))
+      return false;
+
+    // Get the XMM subregister of the source YMM register.
+    Register SrcReg = MI.getOperand(1).getReg();
+    Register XmmReg = TRI->getSubReg(SrcReg, X86::sub_xmm);
+
+    LLVM_DEBUG(dbgs() << "Replacing: " << MI);
+    {
+      // Transform: VPERMQ $dst, $src, $0x44
+      // Into:      VINSERTI128 $dst, $src, $xmm_src, $1
+      MI.setDesc(TII->get(NewOpc));
+      // Remove the immediate operand.
+      MI.removeOperand(NumOperands - 1);
+      // Add the XMM subregister operand.
+      MI.addOperand(MachineOperand::CreateReg(XmmReg, /*isDef=*/false,
+                                              /*isImp=*/false,
+                                              /*isKill=*/false));
+      // Add the immediate (1 = insert into high 128-bits).
+      MI.addOperand(MachineOperand::CreateImm(1));
+    }
+    LLVM_DEBUG(dbgs() << "     With: " << MI);
+    return true;
+  };
+
   switch (Opc) {
   case X86::BLENDPDrri:
     return ProcessBLENDToMOV(X86::MOVSDrr, 0x3, 0x1);
@@ -392,7 +428,10 @@ bool X86FixupInstTuningImpl::processInstruction(
     return ProcessVPERMILPSmi(X86::VPSHUFDZ256mik);
   case X86::VPERMILPSZmik:
     return ProcessVPERMILPSmi(X86::VPSHUFDZmik);
-
+  case X86::VPERMQYri:
+    return ProcessVPERMQToVINSERT128(X86::VINSERTI128rri);
+  case X86::VPERMPDYri:
+    return ProcessVPERMQToVINSERT128(X86::VINSERTF128rri);
   case X86::MOVLHPSrr:
   case X86::UNPCKLPDrr:
     return ProcessUNPCKLPDrr(X86::PUNPCKLQDQrr, X86::SHUFPDrri);
@@ -638,6 +677,7 @@ bool X86FixupInstTuningImpl::runOnMachineFunction(MachineFunction &MF) {
   bool Changed = false;
   ST = &MF.getSubtarget<X86Subtarget>();
   TII = ST->getInstrInfo();
+  TRI = ST->getRegisterInfo();
   SM = &ST->getSchedModel();
 
   for (MachineBasicBlock &MBB : MF) {

diff  --git a/llvm/test/CodeGen/X86/fixup-vpermq-to-vinsert.mir b/llvm/test/CodeGen/X86/fixup-vpermq-to-vinsert.mir
new file mode 100644
index 0000000000000..6613e380eefa3
--- /dev/null
+++ b/llvm/test/CodeGen/X86/fixup-vpermq-to-vinsert.mir
@@ -0,0 +1,71 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=x86_64-- -run-pass x86-fixup-inst-tuning -mcpu=znver1 -o - %s | FileCheck %s --check-prefix=ZNVER
+# RUN: llc -mtriple=x86_64-- -run-pass x86-fixup-inst-tuning -mcpu=znver3 -o - %s | FileCheck %s --check-prefix=ZNVER
+# RUN: llc -mtriple=x86_64-- -run-pass x86-fixup-inst-tuning -mcpu=haswell -o - %s | FileCheck %s --check-prefix=INTEL
+# RUN: llc -mtriple=x86_64-- -run-pass x86-fixup-inst-tuning -mcpu=alderlake -o - %s | FileCheck %s --check-prefix=INTEL
+
+--- |
+  define void @test_vpermq_0x44() { ret void }
+  define void @test_vpermpd_0x44() { ret void }
+  define void @test_vpermq_other_imm() { ret void }
+...
+---
+name:            test_vpermq_0x44
+body:             |
+  bb.0:
+    liveins: $ymm0
+    ; ZNVER-LABEL: name: test_vpermq_0x44
+    ; ZNVER: liveins: $ymm0
+    ; ZNVER-NEXT: {{  $}}
+    ; ZNVER-NEXT: $ymm0 = VINSERTI128rri $ymm0, $xmm0, 1
+    ; ZNVER-NEXT: RET64 $ymm0
+    ;
+    ; INTEL-LABEL: name: test_vpermq_0x44
+    ; INTEL: liveins: $ymm0
+    ; INTEL-NEXT: {{  $}}
+    ; INTEL-NEXT: $ymm0 = VPERMQYri $ymm0, 68
+    ; INTEL-NEXT: RET64 $ymm0
+    $ymm0 = VPERMQYri $ymm0, 68
+    RET64 $ymm0
+...
+---
+name:            test_vpermpd_0x44
+body:             |
+  bb.0:
+    liveins: $ymm0
+    ; ZNVER-LABEL: name: test_vpermpd_0x44
+    ; ZNVER: liveins: $ymm0
+    ; ZNVER-NEXT: {{  $}}
+    ; ZNVER-NEXT: $ymm0 = VINSERTF128rri $ymm0, $xmm0, 1
+    ; ZNVER-NEXT: RET64 $ymm0
+    ;
+    ; INTEL-LABEL: name: test_vpermpd_0x44
+    ; INTEL: liveins: $ymm0
+    ; INTEL-NEXT: {{  $}}
+    ; INTEL-NEXT: $ymm0 = VPERMPDYri $ymm0, 68
+    ; INTEL-NEXT: RET64 $ymm0
+    $ymm0 = VPERMPDYri $ymm0, 68
+    RET64 $ymm0
+...
+---
+name:            test_vpermq_other_imm
+body:             |
+  bb.0:
+    liveins: $ymm0, $ymm1
+    ; ZNVER-LABEL: name: test_vpermq_other_imm
+    ; ZNVER: liveins: $ymm0, $ymm1
+    ; ZNVER-NEXT: {{  $}}
+    ; ZNVER-NEXT: $ymm0 = VPERMQYri $ymm0, 228
+    ; ZNVER-NEXT: $ymm1 = VPERMQYri $ymm1, 0
+    ; ZNVER-NEXT: RET64 $ymm0, $ymm1
+    ;
+    ; INTEL-LABEL: name: test_vpermq_other_imm
+    ; INTEL: liveins: $ymm0, $ymm1
+    ; INTEL-NEXT: {{  $}}
+    ; INTEL-NEXT: $ymm0 = VPERMQYri $ymm0, 228
+    ; INTEL-NEXT: $ymm1 = VPERMQYri $ymm1, 0
+    ; INTEL-NEXT: RET64 $ymm0, $ymm1
+    $ymm0 = VPERMQYri $ymm0, 228
+    $ymm1 = VPERMQYri $ymm1, 0
+    RET64 $ymm0, $ymm1
+...


        


More information about the llvm-commits mailing list