[llvm] [X86] Combine LRINT/LLRINT and TRUNC when nuw/nsw (PR #126217)

via llvm-commits llvm-commits at lists.llvm.org
Fri Feb 7 02:04:30 PST 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-x86

Author: Phoebe Wang (phoebewang)

<details>
<summary>Changes</summary>

Try to improve performance after #<!-- -->125848

---
Full diff: https://github.com/llvm/llvm-project/pull/126217.diff


3 Files Affected:

- (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+5) 
- (modified) llvm/test/CodeGen/X86/llrint-conv.ll (+88) 
- (modified) llvm/test/CodeGen/X86/lrint-conv-i64.ll (+32) 


``````````diff
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 744e4e740cb2102..182cdd90c9d680a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -53919,6 +53919,11 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
       return DAG.getNode(X86ISD::MMX_MOVD2W, DL, MVT::i32, BCSrc);
   }
 
+  if ((N->getFlags().hasNoUnsignedWrap() || N->getFlags().hasNoSignedWrap()) &&
+      (Src.getOpcode() == ISD::LRINT || Src.getOpcode() == ISD::LLRINT) &&
+      VT.getScalarType() == MVT::i32 && Src.hasOneUse())
+    return DAG.getNode(ISD::LRINT, DL, VT, Src.getOperand(0));
+
   return SDValue();
 }
 
diff --git a/llvm/test/CodeGen/X86/llrint-conv.ll b/llvm/test/CodeGen/X86/llrint-conv.ll
index 402daf80a15e873..2f69824c5f61573 100644
--- a/llvm/test/CodeGen/X86/llrint-conv.ll
+++ b/llvm/test/CodeGen/X86/llrint-conv.ll
@@ -183,6 +183,94 @@ entry:
   ret i64 %0
 }
 
+define i32 @combine_f32_trunc(float %x) nounwind {
+; SSE-LABEL: combine_trunc:
+; SSE:       # %bb.0: # %entry
+; SSE-NEXT:    cvtss2si %xmm0, %eax
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_trunc:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    vcvtss2si %xmm0, %eax
+; AVX-NEXT:    retq
+; X86-NOSSE-LABEL: combine_f32_trunc:
+; X86-NOSSE:       # %bb.0: # %entry
+; X86-NOSSE-NEXT:    pushl %eax
+; X86-NOSSE-NEXT:    flds {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT:    fistpl (%esp)
+; X86-NOSSE-NEXT:    movl (%esp), %eax
+; X86-NOSSE-NEXT:    popl %ecx
+; X86-NOSSE-NEXT:    retl
+;
+; X86-SSE2-LABEL: combine_f32_trunc:
+; X86-SSE2:       # %bb.0: # %entry
+; X86-SSE2-NEXT:    cvtss2si {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT:    retl
+;
+; X86-AVX-LABEL: combine_f32_trunc:
+; X86-AVX:       # %bb.0: # %entry
+; X86-AVX-NEXT:    vcvtss2si {{[0-9]+}}(%esp), %eax
+; X86-AVX-NEXT:    retl
+;
+; X64-SSE-LABEL: combine_f32_trunc:
+; X64-SSE:       # %bb.0: # %entry
+; X64-SSE-NEXT:    cvtss2si %xmm0, %eax
+; X64-SSE-NEXT:    retq
+;
+; X64-AVX-LABEL: combine_f32_trunc:
+; X64-AVX:       # %bb.0: # %entry
+; X64-AVX-NEXT:    vcvtss2si %xmm0, %eax
+; X64-AVX-NEXT:    retq
+entry:
+  %0 = tail call i64 @llvm.llrint.f32(float %x)
+  %1 = trunc nsw i64 %0 to i32
+  ret i32 %1
+}
+
+define i32 @combine_f64_trunc(double %x) nounwind {
+; SSE-LABEL: combine_trunc:
+; SSE:       # %bb.0: # %entry
+; SSE-NEXT:    cvtss2si %xmm0, %eax
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_trunc:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    vcvtss2si %xmm0, %eax
+; AVX-NEXT:    retq
+; X86-NOSSE-LABEL: combine_f64_trunc:
+; X86-NOSSE:       # %bb.0: # %entry
+; X86-NOSSE-NEXT:    pushl %eax
+; X86-NOSSE-NEXT:    fldl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT:    fistpl (%esp)
+; X86-NOSSE-NEXT:    movl (%esp), %eax
+; X86-NOSSE-NEXT:    popl %ecx
+; X86-NOSSE-NEXT:    retl
+;
+; X86-SSE2-LABEL: combine_f64_trunc:
+; X86-SSE2:       # %bb.0: # %entry
+; X86-SSE2-NEXT:    cvtsd2si {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT:    retl
+;
+; X86-AVX-LABEL: combine_f64_trunc:
+; X86-AVX:       # %bb.0: # %entry
+; X86-AVX-NEXT:    vcvtsd2si {{[0-9]+}}(%esp), %eax
+; X86-AVX-NEXT:    retl
+;
+; X64-SSE-LABEL: combine_f64_trunc:
+; X64-SSE:       # %bb.0: # %entry
+; X64-SSE-NEXT:    cvtsd2si %xmm0, %eax
+; X64-SSE-NEXT:    retq
+;
+; X64-AVX-LABEL: combine_f64_trunc:
+; X64-AVX:       # %bb.0: # %entry
+; X64-AVX-NEXT:    vcvtsd2si %xmm0, %eax
+; X64-AVX-NEXT:    retq
+entry:
+  %0 = tail call i64 @llvm.llrint.f64(double %x)
+  %1 = trunc nuw i64 %0 to i32
+  ret i32 %1
+}
+
 declare i64 @llvm.llrint.f32(float) nounwind readnone
 declare i64 @llvm.llrint.f64(double) nounwind readnone
 declare i64 @llvm.llrint.f80(x86_fp80) nounwind readnone
diff --git a/llvm/test/CodeGen/X86/lrint-conv-i64.ll b/llvm/test/CodeGen/X86/lrint-conv-i64.ll
index 38fa09085e1898d..6b9acc02ad7c983 100644
--- a/llvm/test/CodeGen/X86/lrint-conv-i64.ll
+++ b/llvm/test/CodeGen/X86/lrint-conv-i64.ll
@@ -63,6 +63,38 @@ entry:
   ret i32 %1
 }
 
+define i32 @combine_f32_trunc(float %x) {
+; SSE-LABEL: combine_f32_trunc:
+; SSE:       # %bb.0: # %entry
+; SSE-NEXT:    cvtss2si %xmm0, %eax
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_f32_trunc:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    vcvtss2si %xmm0, %eax
+; AVX-NEXT:    retq
+entry:
+  %0 = tail call i64 @llvm.lrint.i64.f32(float %x)
+  %1 = trunc nuw i64 %0 to i32
+  ret i32 %1
+}
+
+define i32 @combine_f64_trunc(double %x) {
+; SSE-LABEL: combine_f64_trunc:
+; SSE:       # %bb.0: # %entry
+; SSE-NEXT:    cvtsd2si %xmm0, %eax
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_f64_trunc:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    vcvtsd2si %xmm0, %eax
+; AVX-NEXT:    retq
+entry:
+  %0 = tail call i64 @llvm.lrint.i64.f64(double %x)
+  %1 = trunc nsw i64 %0 to i32
+  ret i32 %1
+}
+
 declare i64 @llvm.lrint.i64.f32(float) nounwind readnone
 declare i64 @llvm.lrint.i64.f64(double) nounwind readnone
 declare i64 @llvm.lrint.i64.f80(x86_fp80) nounwind readnone

``````````

</details>


https://github.com/llvm/llvm-project/pull/126217


More information about the llvm-commits mailing list