[llvm] [X86][GISel] Fix crash on bitcasting i16 <-> half with gisel enabled. (PR #168456)

via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 19 02:25:50 PST 2025


https://github.com/GrumpyPigSkin updated https://github.com/llvm/llvm-project/pull/168456

>From d750cc2e41361752535d74084cead772da1d36cf Mon Sep 17 00:00:00 2001
From: GrumpyPigSkin <oliver61 at live.co.uk>
Date: Mon, 17 Nov 2025 22:32:26 +0000
Subject: [PATCH 1/7] [X86][GISel] Fix crash on casting i16 <-> half.

---
 llvm/lib/Target/X86/X86InstrInfo.cpp          | 40 ++++++++++++++--
 .../test/CodeGen/X86/GlobalISel/fp-bitcast.ll | 46 +++++++++++++++++++
 2 files changed, 83 insertions(+), 3 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll

diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index cb0208a4a5f32..30c2e535a9a35 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -4294,10 +4294,28 @@ static unsigned CopyToFromAsymmetricReg(Register DestReg, Register SrcReg,
 
   if (X86::VR128XRegClass.contains(DestReg) &&
       X86::GR32RegClass.contains(SrcReg))
-    // Copy from a VR128 register to a VR128 register.
+    // Copy from a GR32 register to a VR128 register.
     return HasAVX512 ? X86::VMOVDI2PDIZrr
            : HasAVX  ? X86::VMOVDI2PDIrr
                      : X86::MOVDI2PDIrr;
+
+  // SrcReg(VR128) -> DestReg(GR16)
+  // SrcReg(GR16)  -> DestReg(VR128)
+
+  if (X86::GR16RegClass.contains(DestReg) &&
+      X86::VR128XRegClass.contains(SrcReg))
+    // Copy from a VR128 register to a GR16 register.
+    return HasAVX512 ? X86::VPEXTRWZrri
+           : HasAVX  ? X86::VPEXTRWrri
+                     : X86::PEXTRWrri;
+
+  if (X86::VR128XRegClass.contains(DestReg) &&
+      X86::GR16RegClass.contains(SrcReg))
+    // Copy from a GR16 register to a VR128 register.
+    return HasAVX512 ? X86::VPINSRWZrri
+           : HasAVX  ? X86::VPINSRWrri
+                     : X86::PINSRWrri;
+
   return 0;
 }
 
@@ -4370,8 +4388,24 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     Opc = CopyToFromAsymmetricReg(DestReg, SrcReg, Subtarget);
 
   if (Opc) {
-    BuildMI(MBB, MI, DL, get(Opc), DestReg)
-        .addReg(SrcReg, getKillRegState(KillSrc));
+    auto MIB = BuildMI(MBB, MI, DL, get(Opc), DestReg);
+    switch (Opc) {
+    case X86::VPINSRWZrri:
+    case X86::VPINSRWrri:
+    case X86::PINSRWrri:
+      MIB.addReg(DestReg, RegState::Undef)
+          .addReg(SrcReg, getKillRegState(KillSrc))
+          .addImm(0);
+      break;
+    case X86::VPEXTRWZrri:
+    case X86::VPEXTRWrri:
+    case X86::PEXTRWrri:
+      MIB.addReg(SrcReg, getKillRegState(KillSrc)).addImm(0);
+      break;
+    default:
+      MIB.addReg(SrcReg, getKillRegState(KillSrc));
+      break;
+    }
     return;
   }
 
diff --git a/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll b/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll
new file mode 100644
index 0000000000000..1d2bd50dbc368
--- /dev/null
+++ b/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll
@@ -0,0 +1,46 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -global-isel | FileCheck %s --check-prefixes=CHECK,SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -global-isel -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -global-isel -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512
+
+define dso_local noundef half @bar(i16 %0) {
+; SSE2-LABEL: bar:
+; SSE2:       # %bb.0: # %entry
+; SSE2-NEXT:    pinsrw $0, %di, %xmm0
+; SSE2-NEXT:    retq
+;
+; AVX-LABEL: bar:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    vpinsrw $0, %di, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: bar:
+; AVX512:       # %bb.0: # %entry
+; AVX512-NEXT:    vpinsrw $0, %di, %xmm0, %xmm0
+; AVX512-NEXT:    retq
+entry:
+  %2 = bitcast i16 %0 to half
+  ret half %2
+}
+
+define dso_local noundef i16 @test_half_to_i16(half %0) {
+; SSE2-LABEL: test_half_to_i16:
+; SSE2:       # %bb.0: # %entry
+; SSE2-NEXT:    pextrw $0, %xmm0, %ax
+; SSE2-NEXT:    retq
+;
+; AVX-LABEL: test_half_to_i16:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    vpextrw $0, %xmm0, %ax
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: test_half_to_i16:
+; AVX512:       # %bb.0: # %entry
+; AVX512-NEXT:    vpextrw $0, %xmm0, %ax
+; AVX512-NEXT:    retq
+entry:
+  %2 = bitcast half %0 to i16
+  ret i16 %2
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}

>From 84f4536a41fa7a1258695f2a50dc90505cc2d1e5 Mon Sep 17 00:00:00 2001
From: GrumpyPigSkin <oliver61 at live.co.uk>
Date: Mon, 17 Nov 2025 22:58:11 +0000
Subject: [PATCH 2/7] [X86][GISel] Updated fp-bitcast test name

---
 llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll b/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll
index 1d2bd50dbc368..7d44841df1cf9 100644
--- a/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll
+++ b/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll
@@ -3,18 +3,18 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -global-isel -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -global-isel -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512
 
-define dso_local noundef half @bar(i16 %0) {
-; SSE2-LABEL: bar:
+define dso_local noundef half @test_i16_to_half(i16 %0) {
+; SSE2-LABEL: test_i16_to_half:
 ; SSE2:       # %bb.0: # %entry
 ; SSE2-NEXT:    pinsrw $0, %di, %xmm0
 ; SSE2-NEXT:    retq
 ;
-; AVX-LABEL: bar:
+; AVX-LABEL: test_i16_to_half:
 ; AVX:       # %bb.0: # %entry
 ; AVX-NEXT:    vpinsrw $0, %di, %xmm0, %xmm0
 ; AVX-NEXT:    retq
 ;
-; AVX512-LABEL: bar:
+; AVX512-LABEL: test_i16_to_half:
 ; AVX512:       # %bb.0: # %entry
 ; AVX512-NEXT:    vpinsrw $0, %di, %xmm0, %xmm0
 ; AVX512-NEXT:    retq

>From 4d6d6e90933da209c2fc294a55677de2765aa2e6 Mon Sep 17 00:00:00 2001
From: GrumpyPigSkin <oliver61 at live.co.uk>
Date: Mon, 17 Nov 2025 23:05:11 +0000
Subject: [PATCH 3/7] [X86][GISel] Removed redundant check from tests.

---
 llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll b/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll
index 7d44841df1cf9..c41f2cd596ff1 100644
--- a/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll
+++ b/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -global-isel | FileCheck %s --check-prefixes=CHECK,SSE2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -global-isel -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -global-isel -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -global-isel | FileCheck %s --check-prefixes=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -global-isel -mattr=+avx | FileCheck %s --check-prefixes=AVX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -global-isel -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512
 
 define dso_local noundef half @test_i16_to_half(i16 %0) {
 ; SSE2-LABEL: test_i16_to_half:
@@ -42,5 +42,3 @@ entry:
   %2 = bitcast half %0 to i16
   ret i16 %2
 }
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; CHECK: {{.*}}

>From 7860e6e12e7b556fdafc04dc734d41b9a43e673f Mon Sep 17 00:00:00 2001
From: GrumpyPigSkin <oliver61 at live.co.uk>
Date: Tue, 18 Nov 2025 19:54:30 +0000
Subject: [PATCH 4/7] [X86][GISel] Changed insert/extract functions to use movd
 instructions

---
 llvm/lib/Target/X86/X86InstrInfo.cpp          | 80 +++++++++++--------
 .../test/CodeGen/X86/GlobalISel/fp-bitcast.ll | 19 +++--
 2 files changed, 56 insertions(+), 43 deletions(-)

diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 30c2e535a9a35..20363917045cd 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -4299,23 +4299,6 @@ static unsigned CopyToFromAsymmetricReg(Register DestReg, Register SrcReg,
            : HasAVX  ? X86::VMOVDI2PDIrr
                      : X86::MOVDI2PDIrr;
 
-  // SrcReg(VR128) -> DestReg(GR16)
-  // SrcReg(GR16)  -> DestReg(VR128)
-
-  if (X86::GR16RegClass.contains(DestReg) &&
-      X86::VR128XRegClass.contains(SrcReg))
-    // Copy from a VR128 register to a GR16 register.
-    return HasAVX512 ? X86::VPEXTRWZrri
-           : HasAVX  ? X86::VPEXTRWrri
-                     : X86::PEXTRWrri;
-
-  if (X86::VR128XRegClass.contains(DestReg) &&
-      X86::GR16RegClass.contains(SrcReg))
-    // Copy from a GR16 register to a VR128 register.
-    return HasAVX512 ? X86::VPINSRWZrri
-           : HasAVX  ? X86::VPINSRWrri
-                     : X86::PINSRWrri;
-
   return 0;
 }
 
@@ -4384,28 +4367,55 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
   else if (X86::VK16RegClass.contains(DestReg, SrcReg))
     Opc = Subtarget.hasBWI() ? (HasEGPR ? X86::KMOVQkk_EVEX : X86::KMOVQkk)
                              : (HasEGPR ? X86::KMOVQkk_EVEX : X86::KMOVWkk);
+
+  else if (X86::GR16RegClass.contains(DestReg) &&
+           X86::VR128XRegClass.contains(SrcReg)) {
+    // Special case for moving xmm to GPR16 registers, get super reg and fall
+    // use CopyToFromAsymmetricReg
+    const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
+    DestReg =
+        TRI->getMatchingSuperReg(DestReg, X86::sub_16bit, &X86::GR32RegClass);
+  } else if (X86::VR128XRegClass.contains(DestReg) &&
+             X86::GR16RegClass.contains(SrcReg)) {
+
+    const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
+
+    // Zero extend GPR16 register to GPR32
+    Register Src32 =
+        TRI->getMatchingSuperReg(SrcReg, X86::sub_16bit, &X86::GR32RegClass);
+
+    BuildMI(MBB, MI, DL, get(X86::MOVZX32rr16), Src32)
+        .addReg(SrcReg, getKillRegState(KillSrc));
+
+    // Assign Src32 to SrcReg and use CopyToFromAsymmetricReg
+    SrcReg = Src32;
+  }
+
   if (!Opc)
     Opc = CopyToFromAsymmetricReg(DestReg, SrcReg, Subtarget);
 
   if (Opc) {
-    auto MIB = BuildMI(MBB, MI, DL, get(Opc), DestReg);
-    switch (Opc) {
-    case X86::VPINSRWZrri:
-    case X86::VPINSRWrri:
-    case X86::PINSRWrri:
-      MIB.addReg(DestReg, RegState::Undef)
-          .addReg(SrcReg, getKillRegState(KillSrc))
-          .addImm(0);
-      break;
-    case X86::VPEXTRWZrri:
-    case X86::VPEXTRWrri:
-    case X86::PEXTRWrri:
-      MIB.addReg(SrcReg, getKillRegState(KillSrc)).addImm(0);
-      break;
-    default:
-      MIB.addReg(SrcReg, getKillRegState(KillSrc));
-      break;
-    }
+    BuildMI(MBB, MI, DL, get(Opc), DestReg)
+        .addReg(SrcReg, getKillRegState(KillSrc));
+    return;
+  }
+
+  // Special case for moving GPR16 to xmm registers
+  if (X86::VR128XRegClass.contains(DestReg) &&
+      X86::GR16RegClass.contains(SrcReg)) {
+
+    const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
+
+    // Zero extend GPR16 register to GPR32
+    Register Src32 =
+        TRI->getMatchingSuperReg(SrcReg, X86::sub_16bit, &X86::GR32RegClass);
+
+    BuildMI(MBB, MI, DL, get(X86::MOVZX32rr16), Src32)
+        .addReg(SrcReg, getKillRegState(KillSrc));
+
+    unsigned Opc = CopyToFromAsymmetricReg(DestReg, Src32, Subtarget);
+    BuildMI(MBB, MI, DL, get(Opc), DestReg).addReg(Src32, RegState::Kill);
+
     return;
   }
 
diff --git a/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll b/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll
index c41f2cd596ff1..96ee93fd18762 100644
--- a/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll
+++ b/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll
@@ -3,40 +3,43 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -global-isel -mattr=+avx | FileCheck %s --check-prefixes=AVX
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -global-isel -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512
 
-define dso_local noundef half @test_i16_to_half(i16 %0) {
+define half @test_i16_to_half(i16 %0) {
 ; SSE2-LABEL: test_i16_to_half:
 ; SSE2:       # %bb.0: # %entry
-; SSE2-NEXT:    pinsrw $0, %di, %xmm0
+; SSE2-NEXT:    movzwl %di, %edi
+; SSE2-NEXT:    movd %edi, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; AVX-LABEL: test_i16_to_half:
 ; AVX:       # %bb.0: # %entry
-; AVX-NEXT:    vpinsrw $0, %di, %xmm0, %xmm0
+; AVX-NEXT:    movzwl %di, %edi
+; AVX-NEXT:    vmovd %edi, %xmm0
 ; AVX-NEXT:    retq
 ;
 ; AVX512-LABEL: test_i16_to_half:
 ; AVX512:       # %bb.0: # %entry
-; AVX512-NEXT:    vpinsrw $0, %di, %xmm0, %xmm0
+; AVX512-NEXT:    movzwl %di, %edi
+; AVX512-NEXT:    vmovd %edi, %xmm0
 ; AVX512-NEXT:    retq
 entry:
   %2 = bitcast i16 %0 to half
   ret half %2
 }
 
-define dso_local noundef i16 @test_half_to_i16(half %0) {
+define i16 @test_half_to_i16(half %0) {
 ; SSE2-LABEL: test_half_to_i16:
 ; SSE2:       # %bb.0: # %entry
-; SSE2-NEXT:    pextrw $0, %xmm0, %ax
+; SSE2-NEXT:    movd %xmm0, %eax
 ; SSE2-NEXT:    retq
 ;
 ; AVX-LABEL: test_half_to_i16:
 ; AVX:       # %bb.0: # %entry
-; AVX-NEXT:    vpextrw $0, %xmm0, %ax
+; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    retq
 ;
 ; AVX512-LABEL: test_half_to_i16:
 ; AVX512:       # %bb.0: # %entry
-; AVX512-NEXT:    vpextrw $0, %xmm0, %ax
+; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    retq
 entry:
   %2 = bitcast half %0 to i16

>From 29941cb87e7124a48ad5837f235d3033eac8015e Mon Sep 17 00:00:00 2001
From: GrumpyPigSkin <oliver61 at live.co.uk>
Date: Tue, 18 Nov 2025 20:27:56 +0000
Subject: [PATCH 5/7] [X86][GISel] Replace variable TRI with RI

---
 llvm/lib/Target/X86/X86InstrInfo.cpp | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 20363917045cd..11bfcedd37e11 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -4372,17 +4372,14 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
            X86::VR128XRegClass.contains(SrcReg)) {
     // Special case for moving xmm to GPR16 registers, get super reg and fall
     // use CopyToFromAsymmetricReg
-    const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
     DestReg =
-        TRI->getMatchingSuperReg(DestReg, X86::sub_16bit, &X86::GR32RegClass);
+        RI.getMatchingSuperReg(DestReg, X86::sub_16bit, &X86::GR32RegClass);
   } else if (X86::VR128XRegClass.contains(DestReg) &&
              X86::GR16RegClass.contains(SrcReg)) {
 
-    const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
-
     // Zero extend GPR16 register to GPR32
     Register Src32 =
-        TRI->getMatchingSuperReg(SrcReg, X86::sub_16bit, &X86::GR32RegClass);
+        RI.getMatchingSuperReg(SrcReg, X86::sub_16bit, &X86::GR32RegClass);
 
     BuildMI(MBB, MI, DL, get(X86::MOVZX32rr16), Src32)
         .addReg(SrcReg, getKillRegState(KillSrc));

>From e35ca3c44d9e5081dc6ea294822471ac496bed79 Mon Sep 17 00:00:00 2001
From: GrumpyPigSkin <oliver61 at live.co.uk>
Date: Wed, 19 Nov 2025 00:16:19 +0000
Subject: [PATCH 6/7] [X86][GISel] Moved special GPR16 <-> XMM case to 
 X86InstructionSelector::selectCopy

---
 .../X86/GISel/X86InstructionSelector.cpp      | 46 +++++++++++++++++++
 llvm/lib/Target/X86/X86InstrInfo.cpp          | 20 --------
 .../test/CodeGen/X86/GlobalISel/fp-bitcast.ll |  3 ++
 3 files changed, 49 insertions(+), 20 deletions(-)

diff --git a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp
index 53ec7125a6490..a2a1644677ccb 100644
--- a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp
+++ b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp
@@ -310,6 +310,52 @@ bool X86InstructionSelector::selectCopy(MachineInstr &I,
 
         I.getOperand(1).setReg(ExtSrc);
       }
+
+      const int RegBankSize = 16;
+
+      // Special case GPR16 -> XMM
+      if (SrcSize == RegBankSize && SrcRegBank.getID() == X86::GPRRegBankID &&
+          (SrcRegBank.getID() == X86::VECRRegBankID)) {
+
+        const DebugLoc &DL = I.getDebugLoc();
+
+        // Zero extend GP16 -> GP32
+        Register ExtReg = MRI.createVirtualRegister(&X86::GR32RegClass);
+        BuildMI(*I.getParent(), I, DL, TII.get(TargetOpcode::COPY), ExtReg)
+            .addReg(SrcReg);
+
+        // Copy GPR32 -> XMM
+        BuildMI(*I.getParent(), I, DL, TII.get(TargetOpcode::COPY), DstReg)
+            .addReg(ExtReg);
+
+        I.eraseFromParent();
+      }
+
+      // Special case XMM -> GPR16
+      if (DstSize == RegBankSize && DstRegBank.getID() == X86::GPRRegBankID &&
+          (SrcRegBank.getID() == X86::VECRRegBankID)) {
+
+        const DebugLoc &DL = I.getDebugLoc();
+
+        // Move XMM to GPR32 register.
+        Register Temp32 = MRI.createVirtualRegister(&X86::GR32RegClass);
+        BuildMI(*I.getParent(), I, DL, TII.get(TargetOpcode::COPY), Temp32)
+            .addReg(SrcReg);
+
+        // Extract the lower 16 bits
+        if (Register Dst32 = TRI.getMatchingSuperReg(DstReg, X86::sub_16bit,
+                                                     &X86::GR32RegClass)) {
+          // Optimization for Physical Dst (e.g. AX): Copy to EAX directly.
+          BuildMI(*I.getParent(), I, DL, TII.get(TargetOpcode::COPY), Dst32)
+              .addReg(Temp32);
+        } else {
+          // Handle if there is no super.
+          BuildMI(*I.getParent(), I, DL, TII.get(TargetOpcode::COPY), DstReg)
+              .addReg(Temp32, 0, X86::sub_16bit);
+        }
+
+        I.eraseFromParent();
+      }
     }
 
     return true;
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 11bfcedd37e11..7e8823ee8761e 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -4368,26 +4368,6 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     Opc = Subtarget.hasBWI() ? (HasEGPR ? X86::KMOVQkk_EVEX : X86::KMOVQkk)
                              : (HasEGPR ? X86::KMOVQkk_EVEX : X86::KMOVWkk);
 
-  else if (X86::GR16RegClass.contains(DestReg) &&
-           X86::VR128XRegClass.contains(SrcReg)) {
-    // Special case for moving xmm to GPR16 registers, get super reg and fall
-    // use CopyToFromAsymmetricReg
-    DestReg =
-        RI.getMatchingSuperReg(DestReg, X86::sub_16bit, &X86::GR32RegClass);
-  } else if (X86::VR128XRegClass.contains(DestReg) &&
-             X86::GR16RegClass.contains(SrcReg)) {
-
-    // Zero extend GPR16 register to GPR32
-    Register Src32 =
-        RI.getMatchingSuperReg(SrcReg, X86::sub_16bit, &X86::GR32RegClass);
-
-    BuildMI(MBB, MI, DL, get(X86::MOVZX32rr16), Src32)
-        .addReg(SrcReg, getKillRegState(KillSrc));
-
-    // Assign Src32 to SrcReg and use CopyToFromAsymmetricReg
-    SrcReg = Src32;
-  }
-
   if (!Opc)
     Opc = CopyToFromAsymmetricReg(DestReg, SrcReg, Subtarget);
 
diff --git a/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll b/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll
index 96ee93fd18762..a5d3c02b29a92 100644
--- a/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll
+++ b/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll
@@ -30,16 +30,19 @@ define i16 @test_half_to_i16(half %0) {
 ; SSE2-LABEL: test_half_to_i16:
 ; SSE2:       # %bb.0: # %entry
 ; SSE2-NEXT:    movd %xmm0, %eax
+; SSE2-NEXT:    # kill: def $eax killed $eax def $ax
 ; SSE2-NEXT:    retq
 ;
 ; AVX-LABEL: test_half_to_i16:
 ; AVX:       # %bb.0: # %entry
 ; AVX-NEXT:    vmovd %xmm0, %eax
+; AVX-NEXT:    # kill: def $eax killed $eax def $ax
 ; AVX-NEXT:    retq
 ;
 ; AVX512-LABEL: test_half_to_i16:
 ; AVX512:       # %bb.0: # %entry
 ; AVX512-NEXT:    vmovd %xmm0, %eax
+; AVX512-NEXT:    # kill: def $eax killed $eax def $ax
 ; AVX512-NEXT:    retq
 entry:
   %2 = bitcast half %0 to i16

>From 3455c83ecd1694247ab99a2fe32c6e94fdc21b73 Mon Sep 17 00:00:00 2001
From: GrumpyPigSkin <oliver61 at live.co.uk>
Date: Wed, 19 Nov 2025 10:25:24 +0000
Subject: [PATCH 7/7] [X86][GISel] Removed missed code and updated test.

---
 .../X86/GISel/X86InstructionSelector.cpp      | 76 +++++++++----------
 llvm/lib/Target/X86/X86InstrInfo.cpp          | 19 -----
 .../test/CodeGen/X86/GlobalISel/fp-bitcast.ll | 12 +--
 3 files changed, 44 insertions(+), 63 deletions(-)

diff --git a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp
index a2a1644677ccb..7cddef10146c7 100644
--- a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp
+++ b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp
@@ -310,52 +310,52 @@ bool X86InstructionSelector::selectCopy(MachineInstr &I,
 
         I.getOperand(1).setReg(ExtSrc);
       }
+    }
 
-      const int RegBankSize = 16;
+    const int RegBankSize = 16;
 
-      // Special case GPR16 -> XMM
-      if (SrcSize == RegBankSize && SrcRegBank.getID() == X86::GPRRegBankID &&
-          (SrcRegBank.getID() == X86::VECRRegBankID)) {
+    // Special case GPR16 -> XMM
+    if (SrcSize == RegBankSize && SrcRegBank.getID() == X86::GPRRegBankID &&
+        (DstRegBank.getID() == X86::VECRRegBankID)) {
 
-        const DebugLoc &DL = I.getDebugLoc();
+      const DebugLoc &DL = I.getDebugLoc();
 
-        // Zero extend GP16 -> GP32
-        Register ExtReg = MRI.createVirtualRegister(&X86::GR32RegClass);
-        BuildMI(*I.getParent(), I, DL, TII.get(TargetOpcode::COPY), ExtReg)
-            .addReg(SrcReg);
+      // Zero extend GPR16 -> GPR32
+      Register ExtReg = MRI.createVirtualRegister(&X86::GR32RegClass);
+      BuildMI(*I.getParent(), I, DL, TII.get(X86::MOVZX32rr16), ExtReg)
+          .addReg(SrcReg);
 
-        // Copy GPR32 -> XMM
-        BuildMI(*I.getParent(), I, DL, TII.get(TargetOpcode::COPY), DstReg)
-            .addReg(ExtReg);
+      // Copy to GPR32 -> XMM
+      BuildMI(*I.getParent(), I, DL, TII.get(TargetOpcode::COPY), DstReg)
+          .addReg(ExtReg);
 
-        I.eraseFromParent();
-      }
+      I.eraseFromParent();
+    }
 
-      // Special case XMM -> GPR16
-      if (DstSize == RegBankSize && DstRegBank.getID() == X86::GPRRegBankID &&
-          (SrcRegBank.getID() == X86::VECRRegBankID)) {
-
-        const DebugLoc &DL = I.getDebugLoc();
-
-        // Move XMM to GPR32 register.
-        Register Temp32 = MRI.createVirtualRegister(&X86::GR32RegClass);
-        BuildMI(*I.getParent(), I, DL, TII.get(TargetOpcode::COPY), Temp32)
-            .addReg(SrcReg);
-
-        // Extract the lower 16 bits
-        if (Register Dst32 = TRI.getMatchingSuperReg(DstReg, X86::sub_16bit,
-                                                     &X86::GR32RegClass)) {
-          // Optimization for Physical Dst (e.g. AX): Copy to EAX directly.
-          BuildMI(*I.getParent(), I, DL, TII.get(TargetOpcode::COPY), Dst32)
-              .addReg(Temp32);
-        } else {
-          // Handle if there is no super.
-          BuildMI(*I.getParent(), I, DL, TII.get(TargetOpcode::COPY), DstReg)
-              .addReg(Temp32, 0, X86::sub_16bit);
-        }
-
-        I.eraseFromParent();
+    // Special case XMM -> GPR16
+    if (DstSize == RegBankSize && DstRegBank.getID() == X86::GPRRegBankID &&
+        (SrcRegBank.getID() == X86::VECRRegBankID)) {
+
+      const DebugLoc &DL = I.getDebugLoc();
+
+      // Move XMM to GPR32 register.
+      Register Temp32 = MRI.createVirtualRegister(&X86::GR32RegClass);
+      BuildMI(*I.getParent(), I, DL, TII.get(TargetOpcode::COPY), Temp32)
+          .addReg(SrcReg);
+
+      // Extract the lower 16 bits
+      if (Register Dst32 = TRI.getMatchingSuperReg(DstReg, X86::sub_16bit,
+                                                   &X86::GR32RegClass)) {
+        // Optimization for Physical Dst (e.g. AX): Copy to EAX directly.
+        BuildMI(*I.getParent(), I, DL, TII.get(TargetOpcode::COPY), Dst32)
+            .addReg(Temp32);
+      } else {
+        // Handle if there is no super.
+        BuildMI(*I.getParent(), I, DL, TII.get(TargetOpcode::COPY), DstReg)
+            .addReg(Temp32, 0, X86::sub_16bit);
       }
+
+      I.eraseFromParent();
     }
 
     return true;
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 7e8823ee8761e..e03b3ae68a7df 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -4377,25 +4377,6 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     return;
   }
 
-  // Special case for moving GPR16 to xmm registers
-  if (X86::VR128XRegClass.contains(DestReg) &&
-      X86::GR16RegClass.contains(SrcReg)) {
-
-    const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
-
-    // Zero extend GPR16 register to GPR32
-    Register Src32 =
-        TRI->getMatchingSuperReg(SrcReg, X86::sub_16bit, &X86::GR32RegClass);
-
-    BuildMI(MBB, MI, DL, get(X86::MOVZX32rr16), Src32)
-        .addReg(SrcReg, getKillRegState(KillSrc));
-
-    unsigned Opc = CopyToFromAsymmetricReg(DestReg, Src32, Subtarget);
-    BuildMI(MBB, MI, DL, get(Opc), DestReg).addReg(Src32, RegState::Kill);
-
-    return;
-  }
-
   if (SrcReg == X86::EFLAGS || DestReg == X86::EFLAGS) {
     // FIXME: We use a fatal error here because historically LLVM has tried
     // lower some of these physreg copies and we want to ensure we get
diff --git a/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll b/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll
index a5d3c02b29a92..12728bf82f55e 100644
--- a/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll
+++ b/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll
@@ -6,20 +6,20 @@
 define half @test_i16_to_half(i16 %0) {
 ; SSE2-LABEL: test_i16_to_half:
 ; SSE2:       # %bb.0: # %entry
-; SSE2-NEXT:    movzwl %di, %edi
-; SSE2-NEXT:    movd %edi, %xmm0
+; SSE2-NEXT:    movzwl %di, %eax
+; SSE2-NEXT:    movd %eax, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; AVX-LABEL: test_i16_to_half:
 ; AVX:       # %bb.0: # %entry
-; AVX-NEXT:    movzwl %di, %edi
-; AVX-NEXT:    vmovd %edi, %xmm0
+; AVX-NEXT:    movzwl %di, %eax
+; AVX-NEXT:    vmovd %eax, %xmm0
 ; AVX-NEXT:    retq
 ;
 ; AVX512-LABEL: test_i16_to_half:
 ; AVX512:       # %bb.0: # %entry
-; AVX512-NEXT:    movzwl %di, %edi
-; AVX512-NEXT:    vmovd %edi, %xmm0
+; AVX512-NEXT:    movzwl %di, %eax
+; AVX512-NEXT:    vmovd %eax, %xmm0
 ; AVX512-NEXT:    retq
 entry:
   %2 = bitcast i16 %0 to half



More information about the llvm-commits mailing list