[llvm] [X86] Fix expand-fp on optnone functions (PR #156900)
Frederik Harwath via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 4 07:52:46 PDT 2025
https://github.com/frederik-h created https://github.com/llvm/llvm-project/pull/156900
As observed by @mikaelholmen, PR #130988
"[AMDGPU] Implement IR expansion for frem instruction" introduced a regression on x86. Its changes led to the pass being skipped on functions with the optnone attribute. @bjope also noted that a check concerning the optnone handling is wrong.
This patch fixes both issues which in conjunction fix the regression.
>From 1a778ec3c89a890f821816f168568bcd0f1adb30 Mon Sep 17 00:00:00 2001
From: Frederik Harwath <fharwath at amd.com>
Date: Thu, 4 Sep 2025 10:36:45 -0400
Subject: [PATCH] [X86] Fix expand-fp on optnone functions
As observed by @mikaelholmen, PR #130988
"[AMDGPU] Implement IR expansion for frem instruction" introduced
a regression on x86. Its changes led to the pass being skipped on
functions with the optnone attribute. @bjope also noted that
a check concerning the optnone handling is wrong.
This patch fixes both issues which in conjunction fix the regression.
---
llvm/lib/CodeGen/ExpandFp.cpp | 5 +-
.../X86/expand-large-fp-optnone.ll | 252 ++++++++++++++++++
2 files changed, 253 insertions(+), 4 deletions(-)
create mode 100644 llvm/test/Transforms/ExpandLargeFpConvert/X86/expand-large-fp-optnone.ll
diff --git a/llvm/lib/CodeGen/ExpandFp.cpp b/llvm/lib/CodeGen/ExpandFp.cpp
index adebd704791ee..1fc2277c71693 100644
--- a/llvm/lib/CodeGen/ExpandFp.cpp
+++ b/llvm/lib/CodeGen/ExpandFp.cpp
@@ -1108,14 +1108,11 @@ class ExpandFpLegacyPass : public FunctionPass {
ExpandFpLegacyPass() : ExpandFpLegacyPass(CodeGenOptLevel::None) {};
bool runOnFunction(Function &F) override {
- if (skipFunction(F))
- return false;
-
auto *TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
auto *TLI = TM->getSubtargetImpl(F)->getTargetLowering();
AssumptionCache *AC = nullptr;
- if (OptLevel != CodeGenOptLevel::None || F.hasOptNone())
+ if (OptLevel != CodeGenOptLevel::None && !F.hasOptNone())
AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
return runImpl(F, *TLI, AC);
}
diff --git a/llvm/test/Transforms/ExpandLargeFpConvert/X86/expand-large-fp-optnone.ll b/llvm/test/Transforms/ExpandLargeFpConvert/X86/expand-large-fp-optnone.ll
new file mode 100644
index 0000000000000..f6be7a97b7cef
--- /dev/null
+++ b/llvm/test/Transforms/ExpandLargeFpConvert/X86/expand-large-fp-optnone.ll
@@ -0,0 +1,252 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=x86_64 < %s | FileCheck %s
+
+; expand-fp must also run with optnone
+
+; Function Attrs: noinline optnone
+define double @main(i224 %0) #0 {
+; CHECK-LABEL: main:
+; CHECK: # %bb.0: # %entryitofp-entry
+; CHECK-NEXT: pushq %rbp
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: pushq %r15
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: pushq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: pushq %r13
+; CHECK-NEXT: .cfi_def_cfa_offset 40
+; CHECK-NEXT: pushq %r12
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 56
+; CHECK-NEXT: subq $88, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 144
+; CHECK-NEXT: .cfi_offset %rbx, -56
+; CHECK-NEXT: .cfi_offset %r12, -48
+; CHECK-NEXT: .cfi_offset %r13, -40
+; CHECK-NEXT: .cfi_offset %r14, -32
+; CHECK-NEXT: .cfi_offset %r15, -24
+; CHECK-NEXT: .cfi_offset %rbp, -16
+; CHECK-NEXT: movq %rdi, %rax
+; CHECK-NEXT: orq %rdx, %rax
+; CHECK-NEXT: movl %ecx, %r8d
+; CHECK-NEXT: movq %rsi, %r9
+; CHECK-NEXT: orq %r8, %r9
+; CHECK-NEXT: xorps %xmm0, %xmm0
+; CHECK-NEXT: orq %r9, %rax
+; CHECK-NEXT: je .LBB0_10
+; CHECK-NEXT: jmp .LBB0_1
+; CHECK-NEXT: .LBB0_1: # %itofp-if-end
+; CHECK-NEXT: movslq %ecx, %rax
+; CHECK-NEXT: movq %rax, %r9
+; CHECK-NEXT: sarq $31, %r9
+; CHECK-NEXT: sarq $63, %rax
+; CHECK-NEXT: xorq %rax, %rcx
+; CHECK-NEXT: xorq %rax, %rdx
+; CHECK-NEXT: xorq %rax, %rsi
+; CHECK-NEXT: xorq %r9, %rdi
+; CHECK-NEXT: subq %r9, %rdi
+; CHECK-NEXT: sbbq %rax, %rsi
+; CHECK-NEXT: sbbq %rax, %rdx
+; CHECK-NEXT: sbbq %rax, %rcx
+; CHECK-NEXT: movq %rcx, %r8
+; CHECK-NEXT: shldq $32, %rdx, %r8
+; CHECK-NEXT: bsrq %r8, %rax
+; CHECK-NEXT: xorl $63, %eax
+; CHECK-NEXT: movq %rdx, %r10
+; CHECK-NEXT: shldq $32, %rsi, %r10
+; CHECK-NEXT: bsrq %r10, %r11
+; CHECK-NEXT: xorl $63, %r11d
+; CHECK-NEXT: orl $64, %r11d
+; CHECK-NEXT: testq %r8, %r8
+; CHECK-NEXT: cmovnel %eax, %r11d
+; CHECK-NEXT: movq %rsi, %rbx
+; CHECK-NEXT: shldq $32, %rdi, %rbx
+; CHECK-NEXT: bsrq %rbx, %r14
+; CHECK-NEXT: xorl $63, %r14d
+; CHECK-NEXT: movq %rdi, %rax
+; CHECK-NEXT: shlq $32, %rax
+; CHECK-NEXT: bsrq %rax, %rax
+; CHECK-NEXT: xorl $63, %eax
+; CHECK-NEXT: orl $64, %eax
+; CHECK-NEXT: testq %rbx, %rbx
+; CHECK-NEXT: cmovnel %r14d, %eax
+; CHECK-NEXT: subl $-128, %eax
+; CHECK-NEXT: orq %r8, %r10
+; CHECK-NEXT: cmovnel %r11d, %eax
+; CHECK-NEXT: movl $224, %r11d
+; CHECK-NEXT: subl %eax, %r11d
+; CHECK-NEXT: movl $223, %r10d
+; CHECK-NEXT: subl %eax, %r10d
+; CHECK-NEXT: cmpl $53, %r11d
+; CHECK-NEXT: jle .LBB0_8
+; CHECK-NEXT: # %bb.2: # %itofp-if-then4
+; CHECK-NEXT: movl %r11d, %r8d
+; CHECK-NEXT: subl $54, %r8d
+; CHECK-NEXT: je .LBB0_4
+; CHECK-NEXT: jmp .LBB0_3
+; CHECK-NEXT: .LBB0_3: # %itofp-if-then4
+; CHECK-NEXT: movl %r11d, %r8d
+; CHECK-NEXT: subl $55, %r8d
+; CHECK-NEXT: jne .LBB0_5
+; CHECK-NEXT: # %bb.11:
+; CHECK-NEXT: jmp .LBB0_6
+; CHECK-NEXT: .LBB0_4: # %itofp-sw-bb
+; CHECK-NEXT: movq %rsi, %rax
+; CHECK-NEXT: shldq $1, %rdi, %rax
+; CHECK-NEXT: movq %rdx, %r8
+; CHECK-NEXT: shldq $1, %rsi, %r8
+; CHECK-NEXT: shldq $1, %rdx, %rcx
+; CHECK-NEXT: addq %rdi, %rdi
+; CHECK-NEXT: movq %rax, %rsi
+; CHECK-NEXT: movq %r8, %rdx
+; CHECK-NEXT: jmp .LBB0_6
+; CHECK-NEXT: .LBB0_5: # %itofp-sw-default
+; CHECK-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movq %rsi, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movl %ecx, %r8d
+; CHECK-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movb $-87, %r8b
+; CHECK-NEXT: subb %al, %r8b
+; CHECK-NEXT: movb %r8b, %bl
+; CHECK-NEXT: shrb $6, %bl
+; CHECK-NEXT: movzbl %bl, %r12d
+; CHECK-NEXT: movq $0, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: movq $0, (%rsp)
+; CHECK-NEXT: movq $0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movq $0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movq -24(%rsp,%r12,8), %rbx
+; CHECK-NEXT: movq -32(%rsp,%r12,8), %r13
+; CHECK-NEXT: movq %rcx, %rbp
+; CHECK-NEXT: movb %r8b, %cl
+; CHECK-NEXT: movq %r13, %r14
+; CHECK-NEXT: shrdq %cl, %rbx, %r14
+; CHECK-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: movq -48(%rsp,%r12,8), %r15
+; CHECK-NEXT: movq -40(%rsp,%r12,8), %r12
+; CHECK-NEXT: movb %r8b, %cl
+; CHECK-NEXT: movq %r12, %r14
+; CHECK-NEXT: shrdq %cl, %r13, %r14
+; CHECK-NEXT: movb %r8b, %cl
+; CHECK-NEXT: shrq %cl, %rbx
+; CHECK-NEXT: movb %r8b, %cl
+; CHECK-NEXT: shrdq %cl, %r12, %r15
+; CHECK-NEXT: addb $55, %al
+; CHECK-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movq %rsi, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movq %rbp, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movq $0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movq $0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movq $0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movq $0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movb %al, %cl
+; CHECK-NEXT: shrb $3, %cl
+; CHECK-NEXT: andb $24, %cl
+; CHECK-NEXT: negb %cl
+; CHECK-NEXT: movsbq %cl, %rdx
+; CHECK-NEXT: movq -80(%rsp,%rdx), %rsi
+; CHECK-NEXT: movq -72(%rsp,%rdx), %rdi
+; CHECK-NEXT: movq -64(%rsp,%rdx), %r8
+; CHECK-NEXT: movb %al, %cl
+; CHECK-NEXT: movq %r8, %r12
+; CHECK-NEXT: shldq %cl, %rdi, %r12
+; CHECK-NEXT: movb %al, %cl
+; CHECK-NEXT: movq %rsi, %r13
+; CHECK-NEXT: shlq %cl, %r13
+; CHECK-NEXT: orq %r12, %r13
+; CHECK-NEXT: movq -56(%rsp,%rdx), %rdx
+; CHECK-NEXT: movb %al, %cl
+; CHECK-NEXT: shldq %cl, %r8, %rdx
+; CHECK-NEXT: movl %edx, %edx
+; CHECK-NEXT: movb %al, %cl
+; CHECK-NEXT: shldq %cl, %rsi, %rdi
+; CHECK-NEXT: orq %rdx, %rdi
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: orq %rdi, %r13
+; CHECK-NEXT: setne %al
+; CHECK-NEXT: orq %rax, %r15
+; CHECK-NEXT: movq %r15, %rdi
+; CHECK-NEXT: movq %r14, %rsi
+; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; CHECK-NEXT: movq %rbx, %rcx
+; CHECK-NEXT: jmp .LBB0_6
+; CHECK-NEXT: .LBB0_6: # %itofp-sw-epilog
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: shrl $2, %eax
+; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: orq %rax, %rdi
+; CHECK-NEXT: addq $1, %rdi
+; CHECK-NEXT: adcq $0, %rsi
+; CHECK-NEXT: adcq $0, %rdx
+; CHECK-NEXT: adcq $0, %rcx
+; CHECK-NEXT: movq %rsi, %rdx
+; CHECK-NEXT: shldq $62, %rdi, %rdx
+; CHECK-NEXT: movq %rdx, %rax
+; CHECK-NEXT: shrq $32, %rax
+; CHECK-NEXT: btq $55, %rdi
+; CHECK-NEXT: jae .LBB0_9
+; CHECK-NEXT: jmp .LBB0_7
+; CHECK-NEXT: .LBB0_7: # %itofp-if-then20
+; CHECK-NEXT: shldq $61, %rdi, %rsi
+; CHECK-NEXT: movq %rsi, %rax
+; CHECK-NEXT: shrq $32, %rax
+; CHECK-NEXT: movq %rsi, %rdx
+; CHECK-NEXT: movl %r11d, %r10d
+; CHECK-NEXT: jmp .LBB0_9
+; CHECK-NEXT: .LBB0_8: # %itofp-if-else
+; CHECK-NEXT: movq %rdi, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: movq %rsi, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: movq %rcx, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: movq $0, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: movq $0, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: movq $0, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: movq $0, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: addb $85, %al
+; CHECK-NEXT: movb %al, %cl
+; CHECK-NEXT: shrb $3, %cl
+; CHECK-NEXT: andb $24, %cl
+; CHECK-NEXT: negb %cl
+; CHECK-NEXT: movsbq %cl, %rcx
+; CHECK-NEXT: movq 48(%rsp,%rcx), %rdx
+; CHECK-NEXT: movb %al, %cl
+; CHECK-NEXT: shlq %cl, %rdx
+; CHECK-NEXT: movq %rdx, %rax
+; CHECK-NEXT: shrq $32, %rax
+; CHECK-NEXT: .LBB0_9: # %itofp-if-end26
+; CHECK-NEXT: andl $-2147483648, %r9d # imm = 0x80000000
+; CHECK-NEXT: shll $20, %r10d
+; CHECK-NEXT: addl $1072693248, %r10d # imm = 0x3FF00000
+; CHECK-NEXT: andl $1048575, %eax # imm = 0xFFFFF
+; CHECK-NEXT: orl %r9d, %eax
+; CHECK-NEXT: orl %r10d, %eax
+; CHECK-NEXT: movl %eax, %eax
+; CHECK-NEXT: shlq $32, %rax
+; CHECK-NEXT: movabsq $4294967295, %rcx # imm = 0xFFFFFFFF
+; CHECK-NEXT: andq %rcx, %rdx
+; CHECK-NEXT: orq %rdx, %rax
+; CHECK-NEXT: movq %rax, %xmm0
+; CHECK-NEXT: .LBB0_10: # %itofp-return
+; CHECK-NEXT: addq $88, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 56
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: popq %r12
+; CHECK-NEXT: .cfi_def_cfa_offset 40
+; CHECK-NEXT: popq %r13
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: popq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: popq %r15
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: popq %rbp
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+entry:
+ %x = sitofp i224 %0 to double
+ ret double %x
+}
+
+attributes #0 = { noinline optnone }
More information about the llvm-commits
mailing list