[llvm] [X86][GlobalISel] Support G_SELECT for x86_fp80 (PR #157377)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Sep 7 18:22:53 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-globalisel
Author: Evgenii Kudriashov (e-kud)
<details>
<summary>Changes</summary>
G_SELECT for x86_fp80 is required to implement G_FPTOUI and G_UITOFP.
Legalize the predicate operand first to prevent clamping of fp80 type. Since every G_SELECT initially uses s8 for the predicate type, we were always clamping the input. To maintain this behavior, we add additional conditions for legal types, such as Is64Bit and HasCMOV.
---
Full diff: https://github.com/llvm/llvm-project/pull/157377.diff
6 Files Affected:
- (modified) llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp (+27-21)
- (modified) llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp (+6-3)
- (modified) llvm/test/CodeGen/X86/GlobalISel/legalize-leading-zeros.mir (+4-2)
- (modified) llvm/test/CodeGen/X86/GlobalISel/legalize-select.mir (+2-2)
- (removed) llvm/test/CodeGen/X86/fcmove.ll (-15)
- (added) llvm/test/CodeGen/X86/isel-select-fcmov.ll (+41)
``````````diff
diff --git a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp
index 3090ad313b90d..7561a7df5c138 100644
--- a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp
+++ b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp
@@ -1877,28 +1877,34 @@ bool X86InstructionSelector::selectSelect(MachineInstr &I,
unsigned OpCmp;
LLT Ty = MRI.getType(DstReg);
- switch (Ty.getSizeInBits()) {
- default:
- return false;
- case 8:
- OpCmp = X86::CMOV_GR8;
- break;
- case 16:
- OpCmp = STI.canUseCMOV() ? X86::CMOV16rr : X86::CMOV_GR16;
- break;
- case 32:
- OpCmp = STI.canUseCMOV() ? X86::CMOV32rr : X86::CMOV_GR32;
- break;
- case 64:
- assert(STI.is64Bit() && STI.canUseCMOV());
- OpCmp = X86::CMOV64rr;
- break;
+ if (Ty.getSizeInBits() == 80) {
+ BuildMI(*Sel.getParent(), Sel, Sel.getDebugLoc(), TII.get(X86::CMOVE_Fp80),
+ DstReg)
+ .addReg(Sel.getTrueReg())
+ .addReg(Sel.getFalseReg());
+ } else {
+ switch (Ty.getSizeInBits()) {
+ default:
+ return false;
+ case 8:
+ OpCmp = X86::CMOV_GR8;
+ break;
+ case 16:
+ OpCmp = STI.canUseCMOV() ? X86::CMOV16rr : X86::CMOV_GR16;
+ break;
+ case 32:
+ OpCmp = STI.canUseCMOV() ? X86::CMOV32rr : X86::CMOV_GR32;
+ break;
+ case 64:
+ assert(STI.is64Bit() && STI.canUseCMOV());
+ OpCmp = X86::CMOV64rr;
+ break;
+ }
+ BuildMI(*Sel.getParent(), Sel, Sel.getDebugLoc(), TII.get(OpCmp), DstReg)
+ .addReg(Sel.getTrueReg())
+ .addReg(Sel.getFalseReg())
+ .addImm(X86::COND_E);
}
- BuildMI(*Sel.getParent(), Sel, Sel.getDebugLoc(), TII.get(OpCmp), DstReg)
- .addReg(Sel.getTrueReg())
- .addReg(Sel.getFalseReg())
- .addImm(X86::COND_E);
-
const TargetRegisterClass *DstRC = getRegClass(Ty, DstReg, MRI);
if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
LLVM_DEBUG(dbgs() << "Failed to constrain CMOV\n");
diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
index 7fe58539cd4ec..b01aed448ab22 100644
--- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
+++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
@@ -568,10 +568,13 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
// todo: vectors and address spaces
getActionDefinitionsBuilder(G_SELECT)
- .legalFor({{s8, s32}, {s16, s32}, {s32, s32}, {s64, s32}, {p0, s32}})
+ .legalFor({{s16, s32}, {s32, s32}, {p0, s32}})
+ .legalFor(!HasCMOV, {{s8, s32}})
+ .legalFor(Is64Bit, {{s64, s32}})
+ .legalFor(UseX87, {{s80, s32}})
+ .clampScalar(1, s32, s32)
.widenScalarToNextPow2(0, /*Min=*/8)
- .clampScalar(0, HasCMOV ? s16 : s8, sMaxScalar)
- .clampScalar(1, s32, s32);
+ .clampScalar(0, HasCMOV ? s16 : s8, sMaxScalar);
// memory intrinsics
getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE, G_MEMSET}).libcall();
diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-leading-zeros.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-leading-zeros.mir
index 19fe5b84c73ce..85f7c77cbdb60 100644
--- a/llvm/test/CodeGen/X86/GlobalISel/legalize-leading-zeros.mir
+++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-leading-zeros.mir
@@ -25,6 +25,7 @@ body: |
; X64-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[CTLZ]], [[C1]]
; X64-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[SUB]], [[C]]
; X64-NEXT: RET 0, implicit [[AND1]](s64)
+ ;
; X86-LABEL: name: test_ctlz35
; X86: [[COPY:%[0-9]+]]:_(s64) = COPY $rdx
; X86-NEXT: [[TRUNC:%[0-9]+]]:_(s35) = G_TRUNC [[COPY]](s64)
@@ -36,9 +37,9 @@ body: |
; X86-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
; X86-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[CTLZ]], [[C1]]
; X86-NEXT: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s64) = G_CTLZ_ZERO_UNDEF [[UV1]](s32)
+ ; X86-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1)
; X86-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ADD]](s64)
; X86-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[CTLZ_ZERO_UNDEF]](s64)
- ; X86-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1)
; X86-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ZEXT1]](s32), [[UV2]], [[UV4]]
; X86-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ZEXT1]](s32), [[UV3]], [[UV5]]
; X86-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32)
@@ -97,6 +98,7 @@ body: |
; X64-NEXT: [[CTLZ:%[0-9]+]]:_(s64) = G_CTLZ [[DEF]](s64)
; X64-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[CTLZ]](s64)
; X64-NEXT: RET 0, implicit [[COPY]](s64)
+ ;
; X86-LABEL: name: test_ctlz64
; X86: [[DEF:%[0-9]+]]:_(s64) = IMPLICIT_DEF
; X86-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s64)
@@ -106,9 +108,9 @@ body: |
; X86-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
; X86-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[CTLZ]], [[C1]]
; X86-NEXT: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s64) = G_CTLZ_ZERO_UNDEF [[UV1]](s32)
+ ; X86-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1)
; X86-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ADD]](s64)
; X86-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[CTLZ_ZERO_UNDEF]](s64)
- ; X86-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1)
; X86-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ZEXT]](s32), [[UV2]], [[UV4]]
; X86-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ZEXT]](s32), [[UV3]], [[UV5]]
; X86-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32)
diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-select.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-select.mir
index a7cbb35e3f74c..6ab424eeaa780 100644
--- a/llvm/test/CodeGen/X86/GlobalISel/legalize-select.mir
+++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-select.mir
@@ -33,9 +33,9 @@ body: |
; X86: [[DEF:%[0-9]+]]:_(s64) = IMPLICIT_DEF
; X86-NEXT: [[DEF1:%[0-9]+]]:_(s64) = IMPLICIT_DEF
; X86-NEXT: [[DEF2:%[0-9]+]]:_(s1) = IMPLICIT_DEF
+ ; X86-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[DEF2]](s1)
; X86-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64)
; X86-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s64)
- ; X86-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[DEF2]](s1)
; X86-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ZEXT]](s32), [[UV]], [[UV2]]
; X86-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ZEXT]](s32), [[UV1]], [[UV3]]
; X86-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32)
@@ -115,9 +115,9 @@ body: |
; X64: [[DEF:%[0-9]+]]:_(s8) = IMPLICIT_DEF
; X64-NEXT: [[DEF1:%[0-9]+]]:_(s8) = IMPLICIT_DEF
; X64-NEXT: [[DEF2:%[0-9]+]]:_(s1) = IMPLICIT_DEF
+ ; X64-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[DEF2]](s1)
; X64-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[DEF1]](s8)
; X64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[DEF]](s8)
- ; X64-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[DEF2]](s1)
; X64-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ZEXT]](s32), [[ANYEXT]], [[ANYEXT1]]
; X64-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[SELECT]](s16)
; X64-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8)
diff --git a/llvm/test/CodeGen/X86/fcmove.ll b/llvm/test/CodeGen/X86/fcmove.ll
deleted file mode 100644
index 6bb014858d048..0000000000000
--- a/llvm/test/CodeGen/X86/fcmove.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: llc %s -o - -verify-machineinstrs | FileCheck %s
-
-target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-unknown"
-
-; Test that we can generate an fcmove, and also that it passes verification.
-
-; CHECK-LABEL: cmove_f
-; CHECK: fcmove %st({{[0-7]}}), %st
-define x86_fp80 @cmove_f(x86_fp80 %a, x86_fp80 %b, i32 %c) {
- %test = icmp eq i32 %c, 0
- %add = fadd x86_fp80 %a, %b
- %ret = select i1 %test, x86_fp80 %add, x86_fp80 %b
- ret x86_fp80 %ret
-}
diff --git a/llvm/test/CodeGen/X86/isel-select-fcmov.ll b/llvm/test/CodeGen/X86/isel-select-fcmov.ll
new file mode 100644
index 0000000000000..f50abf4e49ea1
--- /dev/null
+++ b/llvm/test/CodeGen/X86/isel-select-fcmov.ll
@@ -0,0 +1,41 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -fast-isel -fast-isel-abort=1 -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -global-isel -global-isel-abort=1 -verify-machineinstrs | FileCheck %s --check-prefix=GISEL
+; RUN: llc < %s -fast-isel=0 -global-isel=0 -verify-machineinstrs | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-unknown"
+
+; Test that we can generate an fcmove, and also that it passes verification.
+
+define x86_fp80 @cmove_f(x86_fp80 %a, x86_fp80 %b, i32 %c) {
+; CHECK-LABEL: cmove_f:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: testl %edi, %edi
+; CHECK-NEXT: fadd %st(1), %st
+; CHECK-NEXT: fxch %st(1)
+; CHECK-NEXT: fcmove %st(1), %st
+; CHECK-NEXT: fstp %st(1)
+; CHECK-NEXT: retq
+;
+; GISEL-LABEL: cmove_f:
+; GISEL: # %bb.0:
+; GISEL-NEXT: fldt {{[0-9]+}}(%rsp)
+; GISEL-NEXT: fldt {{[0-9]+}}(%rsp)
+; GISEL-NEXT: xorl %eax, %eax
+; GISEL-NEXT: cmpl $0, %edi
+; GISEL-NEXT: sete %al
+; GISEL-NEXT: fadd %st, %st(1)
+; GISEL-NEXT: andl $1, %eax
+; GISEL-NEXT: testl %eax, %eax
+; GISEL-NEXT: fxch %st(1)
+; GISEL-NEXT: fcmove %st(1), %st
+; GISEL-NEXT: fstp %st(1)
+; GISEL-NEXT: retq
+ %test = icmp eq i32 %c, 0
+ %add = fadd x86_fp80 %a, %b
+ %ret = select i1 %test, x86_fp80 %add, x86_fp80 %b
+ ret x86_fp80 %ret
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/157377
More information about the llvm-commits
mailing list