[llvm] [X86][GISel] Fix crash on bitcasting i16 <-> half with gisel enabled. (PR #168456)

via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 18 11:55:17 PST 2025


https://github.com/GrumpyPigSkin updated https://github.com/llvm/llvm-project/pull/168456

>From d750cc2e41361752535d74084cead772da1d36cf Mon Sep 17 00:00:00 2001
From: GrumpyPigSkin <oliver61 at live.co.uk>
Date: Mon, 17 Nov 2025 22:32:26 +0000
Subject: [PATCH 1/4] [X86][GISel] Fix crash on casting i16 <-> half.

---
 llvm/lib/Target/X86/X86InstrInfo.cpp          | 40 ++++++++++++++--
 .../test/CodeGen/X86/GlobalISel/fp-bitcast.ll | 46 +++++++++++++++++++
 2 files changed, 83 insertions(+), 3 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll

diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index cb0208a4a5f32..30c2e535a9a35 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -4294,10 +4294,28 @@ static unsigned CopyToFromAsymmetricReg(Register DestReg, Register SrcReg,
 
   if (X86::VR128XRegClass.contains(DestReg) &&
       X86::GR32RegClass.contains(SrcReg))
-    // Copy from a VR128 register to a VR128 register.
+    // Copy from a GR32 register to a VR128 register.
     return HasAVX512 ? X86::VMOVDI2PDIZrr
            : HasAVX  ? X86::VMOVDI2PDIrr
                      : X86::MOVDI2PDIrr;
+
+  // SrcReg(VR128) -> DestReg(GR16)
+  // SrcReg(GR16)  -> DestReg(VR128)
+
+  if (X86::GR16RegClass.contains(DestReg) &&
+      X86::VR128XRegClass.contains(SrcReg))
+    // Copy from a VR128 register to a GR16 register.
+    return HasAVX512 ? X86::VPEXTRWZrri
+           : HasAVX  ? X86::VPEXTRWrri
+                     : X86::PEXTRWrri;
+
+  if (X86::VR128XRegClass.contains(DestReg) &&
+      X86::GR16RegClass.contains(SrcReg))
+    // Copy from a GR16 register to a VR128 register.
+    return HasAVX512 ? X86::VPINSRWZrri
+           : HasAVX  ? X86::VPINSRWrri
+                     : X86::PINSRWrri;
+
   return 0;
 }
 
@@ -4370,8 +4388,24 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     Opc = CopyToFromAsymmetricReg(DestReg, SrcReg, Subtarget);
 
   if (Opc) {
-    BuildMI(MBB, MI, DL, get(Opc), DestReg)
-        .addReg(SrcReg, getKillRegState(KillSrc));
+    auto MIB = BuildMI(MBB, MI, DL, get(Opc), DestReg);
+    switch (Opc) {
+    case X86::VPINSRWZrri:
+    case X86::VPINSRWrri:
+    case X86::PINSRWrri:
+      MIB.addReg(DestReg, RegState::Undef)
+          .addReg(SrcReg, getKillRegState(KillSrc))
+          .addImm(0);
+      break;
+    case X86::VPEXTRWZrri:
+    case X86::VPEXTRWrri:
+    case X86::PEXTRWrri:
+      MIB.addReg(SrcReg, getKillRegState(KillSrc)).addImm(0);
+      break;
+    default:
+      MIB.addReg(SrcReg, getKillRegState(KillSrc));
+      break;
+    }
     return;
   }
 
diff --git a/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll b/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll
new file mode 100644
index 0000000000000..1d2bd50dbc368
--- /dev/null
+++ b/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll
@@ -0,0 +1,46 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -global-isel | FileCheck %s --check-prefixes=CHECK,SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -global-isel -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -global-isel -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512
+
+define dso_local noundef half @bar(i16 %0) {
+; SSE2-LABEL: bar:
+; SSE2:       # %bb.0: # %entry
+; SSE2-NEXT:    pinsrw $0, %di, %xmm0
+; SSE2-NEXT:    retq
+;
+; AVX-LABEL: bar:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    vpinsrw $0, %di, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: bar:
+; AVX512:       # %bb.0: # %entry
+; AVX512-NEXT:    vpinsrw $0, %di, %xmm0, %xmm0
+; AVX512-NEXT:    retq
+entry:
+  %2 = bitcast i16 %0 to half
+  ret half %2
+}
+
+define dso_local noundef i16 @test_half_to_i16(half %0) {
+; SSE2-LABEL: test_half_to_i16:
+; SSE2:       # %bb.0: # %entry
+; SSE2-NEXT:    pextrw $0, %xmm0, %ax
+; SSE2-NEXT:    retq
+;
+; AVX-LABEL: test_half_to_i16:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    vpextrw $0, %xmm0, %ax
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: test_half_to_i16:
+; AVX512:       # %bb.0: # %entry
+; AVX512-NEXT:    vpextrw $0, %xmm0, %ax
+; AVX512-NEXT:    retq
+entry:
+  %2 = bitcast half %0 to i16
+  ret i16 %2
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}

>From 84f4536a41fa7a1258695f2a50dc90505cc2d1e5 Mon Sep 17 00:00:00 2001
From: GrumpyPigSkin <oliver61 at live.co.uk>
Date: Mon, 17 Nov 2025 22:58:11 +0000
Subject: [PATCH 2/4] [X86][GISel] Updated fp-bitcast test name

---
 llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll b/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll
index 1d2bd50dbc368..7d44841df1cf9 100644
--- a/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll
+++ b/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll
@@ -3,18 +3,18 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -global-isel -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -global-isel -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512
 
-define dso_local noundef half @bar(i16 %0) {
-; SSE2-LABEL: bar:
+define dso_local noundef half @test_i16_to_half(i16 %0) {
+; SSE2-LABEL: test_i16_to_half:
 ; SSE2:       # %bb.0: # %entry
 ; SSE2-NEXT:    pinsrw $0, %di, %xmm0
 ; SSE2-NEXT:    retq
 ;
-; AVX-LABEL: bar:
+; AVX-LABEL: test_i16_to_half:
 ; AVX:       # %bb.0: # %entry
 ; AVX-NEXT:    vpinsrw $0, %di, %xmm0, %xmm0
 ; AVX-NEXT:    retq
 ;
-; AVX512-LABEL: bar:
+; AVX512-LABEL: test_i16_to_half:
 ; AVX512:       # %bb.0: # %entry
 ; AVX512-NEXT:    vpinsrw $0, %di, %xmm0, %xmm0
 ; AVX512-NEXT:    retq

>From 4d6d6e90933da209c2fc294a55677de2765aa2e6 Mon Sep 17 00:00:00 2001
From: GrumpyPigSkin <oliver61 at live.co.uk>
Date: Mon, 17 Nov 2025 23:05:11 +0000
Subject: [PATCH 3/4] [X86][GISel] Removed redundant check from tests.

---
 llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll b/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll
index 7d44841df1cf9..c41f2cd596ff1 100644
--- a/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll
+++ b/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -global-isel | FileCheck %s --check-prefixes=CHECK,SSE2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -global-isel -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -global-isel -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -global-isel | FileCheck %s --check-prefixes=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -global-isel -mattr=+avx | FileCheck %s --check-prefixes=AVX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -global-isel -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512
 
 define dso_local noundef half @test_i16_to_half(i16 %0) {
 ; SSE2-LABEL: test_i16_to_half:
@@ -42,5 +42,3 @@ entry:
   %2 = bitcast half %0 to i16
   ret i16 %2
 }
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; CHECK: {{.*}}

>From 7860e6e12e7b556fdafc04dc734d41b9a43e673f Mon Sep 17 00:00:00 2001
From: GrumpyPigSkin <oliver61 at live.co.uk>
Date: Tue, 18 Nov 2025 19:54:30 +0000
Subject: [PATCH 4/4] [X86][GISel] Changed insert/extract functions to use movd
 instructions

---
 llvm/lib/Target/X86/X86InstrInfo.cpp          | 80 +++++++++++--------
 .../test/CodeGen/X86/GlobalISel/fp-bitcast.ll | 19 +++--
 2 files changed, 56 insertions(+), 43 deletions(-)

diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 30c2e535a9a35..20363917045cd 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -4299,23 +4299,6 @@ static unsigned CopyToFromAsymmetricReg(Register DestReg, Register SrcReg,
            : HasAVX  ? X86::VMOVDI2PDIrr
                      : X86::MOVDI2PDIrr;
 
-  // SrcReg(VR128) -> DestReg(GR16)
-  // SrcReg(GR16)  -> DestReg(VR128)
-
-  if (X86::GR16RegClass.contains(DestReg) &&
-      X86::VR128XRegClass.contains(SrcReg))
-    // Copy from a VR128 register to a GR16 register.
-    return HasAVX512 ? X86::VPEXTRWZrri
-           : HasAVX  ? X86::VPEXTRWrri
-                     : X86::PEXTRWrri;
-
-  if (X86::VR128XRegClass.contains(DestReg) &&
-      X86::GR16RegClass.contains(SrcReg))
-    // Copy from a GR16 register to a VR128 register.
-    return HasAVX512 ? X86::VPINSRWZrri
-           : HasAVX  ? X86::VPINSRWrri
-                     : X86::PINSRWrri;
-
   return 0;
 }
 
@@ -4384,28 +4367,55 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
   else if (X86::VK16RegClass.contains(DestReg, SrcReg))
     Opc = Subtarget.hasBWI() ? (HasEGPR ? X86::KMOVQkk_EVEX : X86::KMOVQkk)
                              : (HasEGPR ? X86::KMOVQkk_EVEX : X86::KMOVWkk);
+
+  else if (X86::GR16RegClass.contains(DestReg) &&
+           X86::VR128XRegClass.contains(SrcReg)) {
+    // Special case for moving xmm to GPR16 registers, get super reg and fall
+    // use CopyToFromAsymmetricReg
+    const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
+    DestReg =
+        TRI->getMatchingSuperReg(DestReg, X86::sub_16bit, &X86::GR32RegClass);
+  } else if (X86::VR128XRegClass.contains(DestReg) &&
+             X86::GR16RegClass.contains(SrcReg)) {
+
+    const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
+
+    // Zero extend GPR16 register to GPR32
+    Register Src32 =
+        TRI->getMatchingSuperReg(SrcReg, X86::sub_16bit, &X86::GR32RegClass);
+
+    BuildMI(MBB, MI, DL, get(X86::MOVZX32rr16), Src32)
+        .addReg(SrcReg, getKillRegState(KillSrc));
+
+    // Assign Src32 to SrcReg and use CopyToFromAsymmetricReg
+    SrcReg = Src32;
+  }
+
   if (!Opc)
     Opc = CopyToFromAsymmetricReg(DestReg, SrcReg, Subtarget);
 
   if (Opc) {
-    auto MIB = BuildMI(MBB, MI, DL, get(Opc), DestReg);
-    switch (Opc) {
-    case X86::VPINSRWZrri:
-    case X86::VPINSRWrri:
-    case X86::PINSRWrri:
-      MIB.addReg(DestReg, RegState::Undef)
-          .addReg(SrcReg, getKillRegState(KillSrc))
-          .addImm(0);
-      break;
-    case X86::VPEXTRWZrri:
-    case X86::VPEXTRWrri:
-    case X86::PEXTRWrri:
-      MIB.addReg(SrcReg, getKillRegState(KillSrc)).addImm(0);
-      break;
-    default:
-      MIB.addReg(SrcReg, getKillRegState(KillSrc));
-      break;
-    }
+    BuildMI(MBB, MI, DL, get(Opc), DestReg)
+        .addReg(SrcReg, getKillRegState(KillSrc));
+    return;
+  }
+
+  // Special case for moving GPR16 to xmm registers
+  if (X86::VR128XRegClass.contains(DestReg) &&
+      X86::GR16RegClass.contains(SrcReg)) {
+
+    const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
+
+    // Zero extend GPR16 register to GPR32
+    Register Src32 =
+        TRI->getMatchingSuperReg(SrcReg, X86::sub_16bit, &X86::GR32RegClass);
+
+    BuildMI(MBB, MI, DL, get(X86::MOVZX32rr16), Src32)
+        .addReg(SrcReg, getKillRegState(KillSrc));
+
+    unsigned Opc = CopyToFromAsymmetricReg(DestReg, Src32, Subtarget);
+    BuildMI(MBB, MI, DL, get(Opc), DestReg).addReg(Src32, RegState::Kill);
+
     return;
   }
 
diff --git a/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll b/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll
index c41f2cd596ff1..96ee93fd18762 100644
--- a/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll
+++ b/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll
@@ -3,40 +3,43 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -global-isel -mattr=+avx | FileCheck %s --check-prefixes=AVX
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -global-isel -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512
 
-define dso_local noundef half @test_i16_to_half(i16 %0) {
+define half @test_i16_to_half(i16 %0) {
 ; SSE2-LABEL: test_i16_to_half:
 ; SSE2:       # %bb.0: # %entry
-; SSE2-NEXT:    pinsrw $0, %di, %xmm0
+; SSE2-NEXT:    movzwl %di, %edi
+; SSE2-NEXT:    movd %edi, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; AVX-LABEL: test_i16_to_half:
 ; AVX:       # %bb.0: # %entry
-; AVX-NEXT:    vpinsrw $0, %di, %xmm0, %xmm0
+; AVX-NEXT:    movzwl %di, %edi
+; AVX-NEXT:    vmovd %edi, %xmm0
 ; AVX-NEXT:    retq
 ;
 ; AVX512-LABEL: test_i16_to_half:
 ; AVX512:       # %bb.0: # %entry
-; AVX512-NEXT:    vpinsrw $0, %di, %xmm0, %xmm0
+; AVX512-NEXT:    movzwl %di, %edi
+; AVX512-NEXT:    vmovd %edi, %xmm0
 ; AVX512-NEXT:    retq
 entry:
   %2 = bitcast i16 %0 to half
   ret half %2
 }
 
-define dso_local noundef i16 @test_half_to_i16(half %0) {
+define i16 @test_half_to_i16(half %0) {
 ; SSE2-LABEL: test_half_to_i16:
 ; SSE2:       # %bb.0: # %entry
-; SSE2-NEXT:    pextrw $0, %xmm0, %ax
+; SSE2-NEXT:    movd %xmm0, %eax
 ; SSE2-NEXT:    retq
 ;
 ; AVX-LABEL: test_half_to_i16:
 ; AVX:       # %bb.0: # %entry
-; AVX-NEXT:    vpextrw $0, %xmm0, %ax
+; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    retq
 ;
 ; AVX512-LABEL: test_half_to_i16:
 ; AVX512:       # %bb.0: # %entry
-; AVX512-NEXT:    vpextrw $0, %xmm0, %ax
+; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    retq
 entry:
   %2 = bitcast half %0 to i16



More information about the llvm-commits mailing list