[llvm] [X86][GISel] Fix crash on bitcasting i16 <-> half with gisel enabled. (PR #168456)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 19 02:25:50 PST 2025
https://github.com/GrumpyPigSkin updated https://github.com/llvm/llvm-project/pull/168456
>From d750cc2e41361752535d74084cead772da1d36cf Mon Sep 17 00:00:00 2001
From: GrumpyPigSkin <oliver61 at live.co.uk>
Date: Mon, 17 Nov 2025 22:32:26 +0000
Subject: [PATCH 1/7] [X86][GISel] Fix crash on casting i16 <-> half.
---
llvm/lib/Target/X86/X86InstrInfo.cpp | 40 ++++++++++++++--
.../test/CodeGen/X86/GlobalISel/fp-bitcast.ll | 46 +++++++++++++++++++
2 files changed, 83 insertions(+), 3 deletions(-)
create mode 100644 llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index cb0208a4a5f32..30c2e535a9a35 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -4294,10 +4294,28 @@ static unsigned CopyToFromAsymmetricReg(Register DestReg, Register SrcReg,
if (X86::VR128XRegClass.contains(DestReg) &&
X86::GR32RegClass.contains(SrcReg))
- // Copy from a VR128 register to a VR128 register.
+ // Copy from a GR32 register to a VR128 register.
return HasAVX512 ? X86::VMOVDI2PDIZrr
: HasAVX ? X86::VMOVDI2PDIrr
: X86::MOVDI2PDIrr;
+
+ // SrcReg(VR128) -> DestReg(GR16)
+ // SrcReg(GR16) -> DestReg(VR128)
+
+ if (X86::GR16RegClass.contains(DestReg) &&
+ X86::VR128XRegClass.contains(SrcReg))
+ // Copy from a VR128 register to a GR16 register.
+ return HasAVX512 ? X86::VPEXTRWZrri
+ : HasAVX ? X86::VPEXTRWrri
+ : X86::PEXTRWrri;
+
+ if (X86::VR128XRegClass.contains(DestReg) &&
+ X86::GR16RegClass.contains(SrcReg))
+ // Copy from a GR16 register to a VR128 register.
+ return HasAVX512 ? X86::VPINSRWZrri
+ : HasAVX ? X86::VPINSRWrri
+ : X86::PINSRWrri;
+
return 0;
}
@@ -4370,8 +4388,24 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Opc = CopyToFromAsymmetricReg(DestReg, SrcReg, Subtarget);
if (Opc) {
- BuildMI(MBB, MI, DL, get(Opc), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
+ auto MIB = BuildMI(MBB, MI, DL, get(Opc), DestReg);
+ switch (Opc) {
+ case X86::VPINSRWZrri:
+ case X86::VPINSRWrri:
+ case X86::PINSRWrri:
+ MIB.addReg(DestReg, RegState::Undef)
+ .addReg(SrcReg, getKillRegState(KillSrc))
+ .addImm(0);
+ break;
+ case X86::VPEXTRWZrri:
+ case X86::VPEXTRWrri:
+ case X86::PEXTRWrri:
+ MIB.addReg(SrcReg, getKillRegState(KillSrc)).addImm(0);
+ break;
+ default:
+ MIB.addReg(SrcReg, getKillRegState(KillSrc));
+ break;
+ }
return;
}
diff --git a/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll b/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll
new file mode 100644
index 0000000000000..1d2bd50dbc368
--- /dev/null
+++ b/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll
@@ -0,0 +1,46 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -global-isel | FileCheck %s --check-prefixes=CHECK,SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -global-isel -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -global-isel -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512
+
+define dso_local noundef half @bar(i16 %0) {
+; SSE2-LABEL: bar:
+; SSE2: # %bb.0: # %entry
+; SSE2-NEXT: pinsrw $0, %di, %xmm0
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: bar:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vpinsrw $0, %di, %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; AVX512-LABEL: bar:
+; AVX512: # %bb.0: # %entry
+; AVX512-NEXT: vpinsrw $0, %di, %xmm0, %xmm0
+; AVX512-NEXT: retq
+entry:
+ %2 = bitcast i16 %0 to half
+ ret half %2
+}
+
+define dso_local noundef i16 @test_half_to_i16(half %0) {
+; SSE2-LABEL: test_half_to_i16:
+; SSE2: # %bb.0: # %entry
+; SSE2-NEXT: pextrw $0, %xmm0, %ax
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: test_half_to_i16:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vpextrw $0, %xmm0, %ax
+; AVX-NEXT: retq
+;
+; AVX512-LABEL: test_half_to_i16:
+; AVX512: # %bb.0: # %entry
+; AVX512-NEXT: vpextrw $0, %xmm0, %ax
+; AVX512-NEXT: retq
+entry:
+ %2 = bitcast half %0 to i16
+ ret i16 %2
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}
>From 84f4536a41fa7a1258695f2a50dc90505cc2d1e5 Mon Sep 17 00:00:00 2001
From: GrumpyPigSkin <oliver61 at live.co.uk>
Date: Mon, 17 Nov 2025 22:58:11 +0000
Subject: [PATCH 2/7] [X86][GISel] Updated fp-bitcast test name
---
llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll b/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll
index 1d2bd50dbc368..7d44841df1cf9 100644
--- a/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll
+++ b/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll
@@ -3,18 +3,18 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -global-isel -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -global-isel -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512
-define dso_local noundef half @bar(i16 %0) {
-; SSE2-LABEL: bar:
+define dso_local noundef half @test_i16_to_half(i16 %0) {
+; SSE2-LABEL: test_i16_to_half:
; SSE2: # %bb.0: # %entry
; SSE2-NEXT: pinsrw $0, %di, %xmm0
; SSE2-NEXT: retq
;
-; AVX-LABEL: bar:
+; AVX-LABEL: test_i16_to_half:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vpinsrw $0, %di, %xmm0, %xmm0
; AVX-NEXT: retq
;
-; AVX512-LABEL: bar:
+; AVX512-LABEL: test_i16_to_half:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vpinsrw $0, %di, %xmm0, %xmm0
; AVX512-NEXT: retq
>From 4d6d6e90933da209c2fc294a55677de2765aa2e6 Mon Sep 17 00:00:00 2001
From: GrumpyPigSkin <oliver61 at live.co.uk>
Date: Mon, 17 Nov 2025 23:05:11 +0000
Subject: [PATCH 3/7] [X86][GISel] Removed redundant check from tests.
---
llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll | 8 +++-----
1 file changed, 3 insertions(+), 5 deletions(-)
diff --git a/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll b/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll
index 7d44841df1cf9..c41f2cd596ff1 100644
--- a/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll
+++ b/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -global-isel | FileCheck %s --check-prefixes=CHECK,SSE2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -global-isel -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -global-isel -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -global-isel | FileCheck %s --check-prefixes=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -global-isel -mattr=+avx | FileCheck %s --check-prefixes=AVX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -global-isel -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512
define dso_local noundef half @test_i16_to_half(i16 %0) {
; SSE2-LABEL: test_i16_to_half:
@@ -42,5 +42,3 @@ entry:
%2 = bitcast half %0 to i16
ret i16 %2
}
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; CHECK: {{.*}}
>From 7860e6e12e7b556fdafc04dc734d41b9a43e673f Mon Sep 17 00:00:00 2001
From: GrumpyPigSkin <oliver61 at live.co.uk>
Date: Tue, 18 Nov 2025 19:54:30 +0000
Subject: [PATCH 4/7] [X86][GISel] Changed insert/extract functions to use movd
instructions
---
llvm/lib/Target/X86/X86InstrInfo.cpp | 80 +++++++++++--------
.../test/CodeGen/X86/GlobalISel/fp-bitcast.ll | 19 +++--
2 files changed, 56 insertions(+), 43 deletions(-)
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 30c2e535a9a35..20363917045cd 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -4299,23 +4299,6 @@ static unsigned CopyToFromAsymmetricReg(Register DestReg, Register SrcReg,
: HasAVX ? X86::VMOVDI2PDIrr
: X86::MOVDI2PDIrr;
- // SrcReg(VR128) -> DestReg(GR16)
- // SrcReg(GR16) -> DestReg(VR128)
-
- if (X86::GR16RegClass.contains(DestReg) &&
- X86::VR128XRegClass.contains(SrcReg))
- // Copy from a VR128 register to a GR16 register.
- return HasAVX512 ? X86::VPEXTRWZrri
- : HasAVX ? X86::VPEXTRWrri
- : X86::PEXTRWrri;
-
- if (X86::VR128XRegClass.contains(DestReg) &&
- X86::GR16RegClass.contains(SrcReg))
- // Copy from a GR16 register to a VR128 register.
- return HasAVX512 ? X86::VPINSRWZrri
- : HasAVX ? X86::VPINSRWrri
- : X86::PINSRWrri;
-
return 0;
}
@@ -4384,28 +4367,55 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
else if (X86::VK16RegClass.contains(DestReg, SrcReg))
Opc = Subtarget.hasBWI() ? (HasEGPR ? X86::KMOVQkk_EVEX : X86::KMOVQkk)
: (HasEGPR ? X86::KMOVQkk_EVEX : X86::KMOVWkk);
+
+ else if (X86::GR16RegClass.contains(DestReg) &&
+ X86::VR128XRegClass.contains(SrcReg)) {
+ // Special case for moving xmm to GPR16 registers, get super reg and fall
+ // use CopyToFromAsymmetricReg
+ const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
+ DestReg =
+ TRI->getMatchingSuperReg(DestReg, X86::sub_16bit, &X86::GR32RegClass);
+ } else if (X86::VR128XRegClass.contains(DestReg) &&
+ X86::GR16RegClass.contains(SrcReg)) {
+
+ const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
+
+ // Zero extend GPR16 register to GPR32
+ Register Src32 =
+ TRI->getMatchingSuperReg(SrcReg, X86::sub_16bit, &X86::GR32RegClass);
+
+ BuildMI(MBB, MI, DL, get(X86::MOVZX32rr16), Src32)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+
+ // Assign Src32 to SrcReg and use CopyToFromAsymmetricReg
+ SrcReg = Src32;
+ }
+
if (!Opc)
Opc = CopyToFromAsymmetricReg(DestReg, SrcReg, Subtarget);
if (Opc) {
- auto MIB = BuildMI(MBB, MI, DL, get(Opc), DestReg);
- switch (Opc) {
- case X86::VPINSRWZrri:
- case X86::VPINSRWrri:
- case X86::PINSRWrri:
- MIB.addReg(DestReg, RegState::Undef)
- .addReg(SrcReg, getKillRegState(KillSrc))
- .addImm(0);
- break;
- case X86::VPEXTRWZrri:
- case X86::VPEXTRWrri:
- case X86::PEXTRWrri:
- MIB.addReg(SrcReg, getKillRegState(KillSrc)).addImm(0);
- break;
- default:
- MIB.addReg(SrcReg, getKillRegState(KillSrc));
- break;
- }
+ BuildMI(MBB, MI, DL, get(Opc), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
+
+ // Special case for moving GPR16 to xmm registers
+ if (X86::VR128XRegClass.contains(DestReg) &&
+ X86::GR16RegClass.contains(SrcReg)) {
+
+ const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
+
+ // Zero extend GPR16 register to GPR32
+ Register Src32 =
+ TRI->getMatchingSuperReg(SrcReg, X86::sub_16bit, &X86::GR32RegClass);
+
+ BuildMI(MBB, MI, DL, get(X86::MOVZX32rr16), Src32)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+
+ unsigned Opc = CopyToFromAsymmetricReg(DestReg, Src32, Subtarget);
+ BuildMI(MBB, MI, DL, get(Opc), DestReg).addReg(Src32, RegState::Kill);
+
return;
}
diff --git a/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll b/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll
index c41f2cd596ff1..96ee93fd18762 100644
--- a/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll
+++ b/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll
@@ -3,40 +3,43 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -global-isel -mattr=+avx | FileCheck %s --check-prefixes=AVX
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -global-isel -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512
-define dso_local noundef half @test_i16_to_half(i16 %0) {
+define half @test_i16_to_half(i16 %0) {
; SSE2-LABEL: test_i16_to_half:
; SSE2: # %bb.0: # %entry
-; SSE2-NEXT: pinsrw $0, %di, %xmm0
+; SSE2-NEXT: movzwl %di, %edi
+; SSE2-NEXT: movd %edi, %xmm0
; SSE2-NEXT: retq
;
; AVX-LABEL: test_i16_to_half:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: vpinsrw $0, %di, %xmm0, %xmm0
+; AVX-NEXT: movzwl %di, %edi
+; AVX-NEXT: vmovd %edi, %xmm0
; AVX-NEXT: retq
;
; AVX512-LABEL: test_i16_to_half:
; AVX512: # %bb.0: # %entry
-; AVX512-NEXT: vpinsrw $0, %di, %xmm0, %xmm0
+; AVX512-NEXT: movzwl %di, %edi
+; AVX512-NEXT: vmovd %edi, %xmm0
; AVX512-NEXT: retq
entry:
%2 = bitcast i16 %0 to half
ret half %2
}
-define dso_local noundef i16 @test_half_to_i16(half %0) {
+define i16 @test_half_to_i16(half %0) {
; SSE2-LABEL: test_half_to_i16:
; SSE2: # %bb.0: # %entry
-; SSE2-NEXT: pextrw $0, %xmm0, %ax
+; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: retq
;
; AVX-LABEL: test_half_to_i16:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: vpextrw $0, %xmm0, %ax
+; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: retq
;
; AVX512-LABEL: test_half_to_i16:
; AVX512: # %bb.0: # %entry
-; AVX512-NEXT: vpextrw $0, %xmm0, %ax
+; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: retq
entry:
%2 = bitcast half %0 to i16
>From 29941cb87e7124a48ad5837f235d3033eac8015e Mon Sep 17 00:00:00 2001
From: GrumpyPigSkin <oliver61 at live.co.uk>
Date: Tue, 18 Nov 2025 20:27:56 +0000
Subject: [PATCH 5/7] [X86][GISel] Replace variable TRI with RI
---
llvm/lib/Target/X86/X86InstrInfo.cpp | 7 ++-----
1 file changed, 2 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 20363917045cd..11bfcedd37e11 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -4372,17 +4372,14 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
X86::VR128XRegClass.contains(SrcReg)) {
// Special case for moving xmm to GPR16 registers, get super reg and fall
// use CopyToFromAsymmetricReg
- const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
DestReg =
- TRI->getMatchingSuperReg(DestReg, X86::sub_16bit, &X86::GR32RegClass);
+ RI.getMatchingSuperReg(DestReg, X86::sub_16bit, &X86::GR32RegClass);
} else if (X86::VR128XRegClass.contains(DestReg) &&
X86::GR16RegClass.contains(SrcReg)) {
- const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
-
// Zero extend GPR16 register to GPR32
Register Src32 =
- TRI->getMatchingSuperReg(SrcReg, X86::sub_16bit, &X86::GR32RegClass);
+ RI.getMatchingSuperReg(SrcReg, X86::sub_16bit, &X86::GR32RegClass);
BuildMI(MBB, MI, DL, get(X86::MOVZX32rr16), Src32)
.addReg(SrcReg, getKillRegState(KillSrc));
>From e35ca3c44d9e5081dc6ea294822471ac496bed79 Mon Sep 17 00:00:00 2001
From: GrumpyPigSkin <oliver61 at live.co.uk>
Date: Wed, 19 Nov 2025 00:16:19 +0000
Subject: [PATCH 6/7] [X86][GISel] Moved special GPR16 <-> XMM case to
X86InstructionSelector::selectCopy
---
.../X86/GISel/X86InstructionSelector.cpp | 46 +++++++++++++++++++
llvm/lib/Target/X86/X86InstrInfo.cpp | 20 --------
.../test/CodeGen/X86/GlobalISel/fp-bitcast.ll | 3 ++
3 files changed, 49 insertions(+), 20 deletions(-)
diff --git a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp
index 53ec7125a6490..a2a1644677ccb 100644
--- a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp
+++ b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp
@@ -310,6 +310,52 @@ bool X86InstructionSelector::selectCopy(MachineInstr &I,
I.getOperand(1).setReg(ExtSrc);
}
+
+ const int RegBankSize = 16;
+
+ // Special case GPR16 -> XMM
+ if (SrcSize == RegBankSize && SrcRegBank.getID() == X86::GPRRegBankID &&
+ (SrcRegBank.getID() == X86::VECRRegBankID)) {
+
+ const DebugLoc &DL = I.getDebugLoc();
+
+ // Zero extend GP16 -> GP32
+ Register ExtReg = MRI.createVirtualRegister(&X86::GR32RegClass);
+ BuildMI(*I.getParent(), I, DL, TII.get(TargetOpcode::COPY), ExtReg)
+ .addReg(SrcReg);
+
+ // Copy GPR32 -> XMM
+ BuildMI(*I.getParent(), I, DL, TII.get(TargetOpcode::COPY), DstReg)
+ .addReg(ExtReg);
+
+ I.eraseFromParent();
+ }
+
+ // Special case XMM -> GPR16
+ if (DstSize == RegBankSize && DstRegBank.getID() == X86::GPRRegBankID &&
+ (SrcRegBank.getID() == X86::VECRRegBankID)) {
+
+ const DebugLoc &DL = I.getDebugLoc();
+
+ // Move XMM to GPR32 register.
+ Register Temp32 = MRI.createVirtualRegister(&X86::GR32RegClass);
+ BuildMI(*I.getParent(), I, DL, TII.get(TargetOpcode::COPY), Temp32)
+ .addReg(SrcReg);
+
+ // Extract the lower 16 bits
+ if (Register Dst32 = TRI.getMatchingSuperReg(DstReg, X86::sub_16bit,
+ &X86::GR32RegClass)) {
+ // Optimization for Physical Dst (e.g. AX): Copy to EAX directly.
+ BuildMI(*I.getParent(), I, DL, TII.get(TargetOpcode::COPY), Dst32)
+ .addReg(Temp32);
+ } else {
+ // Handle if there is no super.
+ BuildMI(*I.getParent(), I, DL, TII.get(TargetOpcode::COPY), DstReg)
+ .addReg(Temp32, 0, X86::sub_16bit);
+ }
+
+ I.eraseFromParent();
+ }
}
return true;
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 11bfcedd37e11..7e8823ee8761e 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -4368,26 +4368,6 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Opc = Subtarget.hasBWI() ? (HasEGPR ? X86::KMOVQkk_EVEX : X86::KMOVQkk)
: (HasEGPR ? X86::KMOVQkk_EVEX : X86::KMOVWkk);
- else if (X86::GR16RegClass.contains(DestReg) &&
- X86::VR128XRegClass.contains(SrcReg)) {
- // Special case for moving xmm to GPR16 registers, get super reg and fall
- // use CopyToFromAsymmetricReg
- DestReg =
- RI.getMatchingSuperReg(DestReg, X86::sub_16bit, &X86::GR32RegClass);
- } else if (X86::VR128XRegClass.contains(DestReg) &&
- X86::GR16RegClass.contains(SrcReg)) {
-
- // Zero extend GPR16 register to GPR32
- Register Src32 =
- RI.getMatchingSuperReg(SrcReg, X86::sub_16bit, &X86::GR32RegClass);
-
- BuildMI(MBB, MI, DL, get(X86::MOVZX32rr16), Src32)
- .addReg(SrcReg, getKillRegState(KillSrc));
-
- // Assign Src32 to SrcReg and use CopyToFromAsymmetricReg
- SrcReg = Src32;
- }
-
if (!Opc)
Opc = CopyToFromAsymmetricReg(DestReg, SrcReg, Subtarget);
diff --git a/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll b/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll
index 96ee93fd18762..a5d3c02b29a92 100644
--- a/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll
+++ b/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll
@@ -30,16 +30,19 @@ define i16 @test_half_to_i16(half %0) {
; SSE2-LABEL: test_half_to_i16:
; SSE2: # %bb.0: # %entry
; SSE2-NEXT: movd %xmm0, %eax
+; SSE2-NEXT: # kill: def $eax killed $eax def $ax
; SSE2-NEXT: retq
;
; AVX-LABEL: test_half_to_i16:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vmovd %xmm0, %eax
+; AVX-NEXT: # kill: def $eax killed $eax def $ax
; AVX-NEXT: retq
;
; AVX512-LABEL: test_half_to_i16:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vmovd %xmm0, %eax
+; AVX512-NEXT: # kill: def $eax killed $eax def $ax
; AVX512-NEXT: retq
entry:
%2 = bitcast half %0 to i16
>From 3455c83ecd1694247ab99a2fe32c6e94fdc21b73 Mon Sep 17 00:00:00 2001
From: GrumpyPigSkin <oliver61 at live.co.uk>
Date: Wed, 19 Nov 2025 10:25:24 +0000
Subject: [PATCH 7/7] [X86][GISel] Removed missed code and updated test.
---
.../X86/GISel/X86InstructionSelector.cpp | 76 +++++++++----------
llvm/lib/Target/X86/X86InstrInfo.cpp | 19 -----
.../test/CodeGen/X86/GlobalISel/fp-bitcast.ll | 12 +--
3 files changed, 44 insertions(+), 63 deletions(-)
diff --git a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp
index a2a1644677ccb..7cddef10146c7 100644
--- a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp
+++ b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp
@@ -310,52 +310,52 @@ bool X86InstructionSelector::selectCopy(MachineInstr &I,
I.getOperand(1).setReg(ExtSrc);
}
+ }
- const int RegBankSize = 16;
+ const int RegBankSize = 16;
- // Special case GPR16 -> XMM
- if (SrcSize == RegBankSize && SrcRegBank.getID() == X86::GPRRegBankID &&
- (SrcRegBank.getID() == X86::VECRRegBankID)) {
+ // Special case GPR16 -> XMM
+ if (SrcSize == RegBankSize && SrcRegBank.getID() == X86::GPRRegBankID &&
+ (DstRegBank.getID() == X86::VECRRegBankID)) {
- const DebugLoc &DL = I.getDebugLoc();
+ const DebugLoc &DL = I.getDebugLoc();
- // Zero extend GP16 -> GP32
- Register ExtReg = MRI.createVirtualRegister(&X86::GR32RegClass);
- BuildMI(*I.getParent(), I, DL, TII.get(TargetOpcode::COPY), ExtReg)
- .addReg(SrcReg);
+ // Zero extend GPR16 -> GPR32
+ Register ExtReg = MRI.createVirtualRegister(&X86::GR32RegClass);
+ BuildMI(*I.getParent(), I, DL, TII.get(X86::MOVZX32rr16), ExtReg)
+ .addReg(SrcReg);
- // Copy GPR32 -> XMM
- BuildMI(*I.getParent(), I, DL, TII.get(TargetOpcode::COPY), DstReg)
- .addReg(ExtReg);
+ // Copy to GPR32 -> XMM
+ BuildMI(*I.getParent(), I, DL, TII.get(TargetOpcode::COPY), DstReg)
+ .addReg(ExtReg);
- I.eraseFromParent();
- }
+ I.eraseFromParent();
+ }
- // Special case XMM -> GPR16
- if (DstSize == RegBankSize && DstRegBank.getID() == X86::GPRRegBankID &&
- (SrcRegBank.getID() == X86::VECRRegBankID)) {
-
- const DebugLoc &DL = I.getDebugLoc();
-
- // Move XMM to GPR32 register.
- Register Temp32 = MRI.createVirtualRegister(&X86::GR32RegClass);
- BuildMI(*I.getParent(), I, DL, TII.get(TargetOpcode::COPY), Temp32)
- .addReg(SrcReg);
-
- // Extract the lower 16 bits
- if (Register Dst32 = TRI.getMatchingSuperReg(DstReg, X86::sub_16bit,
- &X86::GR32RegClass)) {
- // Optimization for Physical Dst (e.g. AX): Copy to EAX directly.
- BuildMI(*I.getParent(), I, DL, TII.get(TargetOpcode::COPY), Dst32)
- .addReg(Temp32);
- } else {
- // Handle if there is no super.
- BuildMI(*I.getParent(), I, DL, TII.get(TargetOpcode::COPY), DstReg)
- .addReg(Temp32, 0, X86::sub_16bit);
- }
-
- I.eraseFromParent();
+ // Special case XMM -> GPR16
+ if (DstSize == RegBankSize && DstRegBank.getID() == X86::GPRRegBankID &&
+ (SrcRegBank.getID() == X86::VECRRegBankID)) {
+
+ const DebugLoc &DL = I.getDebugLoc();
+
+ // Move XMM to GPR32 register.
+ Register Temp32 = MRI.createVirtualRegister(&X86::GR32RegClass);
+ BuildMI(*I.getParent(), I, DL, TII.get(TargetOpcode::COPY), Temp32)
+ .addReg(SrcReg);
+
+ // Extract the lower 16 bits
+ if (Register Dst32 = TRI.getMatchingSuperReg(DstReg, X86::sub_16bit,
+ &X86::GR32RegClass)) {
+ // Optimization for Physical Dst (e.g. AX): Copy to EAX directly.
+ BuildMI(*I.getParent(), I, DL, TII.get(TargetOpcode::COPY), Dst32)
+ .addReg(Temp32);
+ } else {
+ // Handle if there is no super.
+ BuildMI(*I.getParent(), I, DL, TII.get(TargetOpcode::COPY), DstReg)
+ .addReg(Temp32, 0, X86::sub_16bit);
}
+
+ I.eraseFromParent();
}
return true;
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 7e8823ee8761e..e03b3ae68a7df 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -4377,25 +4377,6 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
return;
}
- // Special case for moving GPR16 to xmm registers
- if (X86::VR128XRegClass.contains(DestReg) &&
- X86::GR16RegClass.contains(SrcReg)) {
-
- const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
-
- // Zero extend GPR16 register to GPR32
- Register Src32 =
- TRI->getMatchingSuperReg(SrcReg, X86::sub_16bit, &X86::GR32RegClass);
-
- BuildMI(MBB, MI, DL, get(X86::MOVZX32rr16), Src32)
- .addReg(SrcReg, getKillRegState(KillSrc));
-
- unsigned Opc = CopyToFromAsymmetricReg(DestReg, Src32, Subtarget);
- BuildMI(MBB, MI, DL, get(Opc), DestReg).addReg(Src32, RegState::Kill);
-
- return;
- }
-
if (SrcReg == X86::EFLAGS || DestReg == X86::EFLAGS) {
// FIXME: We use a fatal error here because historically LLVM has tried
// lower some of these physreg copies and we want to ensure we get
diff --git a/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll b/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll
index a5d3c02b29a92..12728bf82f55e 100644
--- a/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll
+++ b/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll
@@ -6,20 +6,20 @@
define half @test_i16_to_half(i16 %0) {
; SSE2-LABEL: test_i16_to_half:
; SSE2: # %bb.0: # %entry
-; SSE2-NEXT: movzwl %di, %edi
-; SSE2-NEXT: movd %edi, %xmm0
+; SSE2-NEXT: movzwl %di, %eax
+; SSE2-NEXT: movd %eax, %xmm0
; SSE2-NEXT: retq
;
; AVX-LABEL: test_i16_to_half:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: movzwl %di, %edi
-; AVX-NEXT: vmovd %edi, %xmm0
+; AVX-NEXT: movzwl %di, %eax
+; AVX-NEXT: vmovd %eax, %xmm0
; AVX-NEXT: retq
;
; AVX512-LABEL: test_i16_to_half:
; AVX512: # %bb.0: # %entry
-; AVX512-NEXT: movzwl %di, %edi
-; AVX512-NEXT: vmovd %edi, %xmm0
+; AVX512-NEXT: movzwl %di, %eax
+; AVX512-NEXT: vmovd %eax, %xmm0
; AVX512-NEXT: retq
entry:
%2 = bitcast i16 %0 to half
More information about the llvm-commits
mailing list