[PATCH] D37464: [X86] Limit store merge size when implicitfloat is enabled (PR34421)
Simon Pilgrim via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 5 03:58:35 PDT 2017
RKSimon created this revision.
As suggested by @niravd : https://bugs.llvm.org/show_bug.cgi?id=34421#c2
Repository:
rL LLVM
https://reviews.llvm.org/D37464
Files:
lib/Target/X86/X86ISelLowering.cpp
lib/Target/X86/X86ISelLowering.h
test/CodeGen/X86/pr34421.ll
Index: test/CodeGen/X86/pr34421.ll
===================================================================
--- test/CodeGen/X86/pr34421.ll
+++ test/CodeGen/X86/pr34421.ll
@@ -0,0 +1,46 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-apple-macosx10.13.0 | FileCheck %s --check-prefix=X86
+; RUN: llc < %s -mtriple=x86_64-apple-macosx10.13.0 | FileCheck %s --check-prefix=X64
+
+; Function Attrs: noimplicitfloat noredzone nounwind
+define void @thread_selfcounts() local_unnamed_addr #0 {
+; X86-LABEL: thread_selfcounts:
+; X86: ## BB#0: ## %entry
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: subl $56, %esp
+; X86-NEXT: movl -56(%ebp), %eax
+; X86-NEXT: movl -52(%ebp), %ecx
+; X86-NEXT: movl %ecx, -20(%ebp)
+; X86-NEXT: movl %eax, -24(%ebp)
+; X86-NEXT: movl -48(%ebp), %eax
+; X86-NEXT: movl -44(%ebp), %ecx
+; X86-NEXT: movl %ecx, -12(%ebp)
+; X86-NEXT: movl %eax, -16(%ebp)
+; X86-NEXT: ## -- End function
+;
+; X64-LABEL: thread_selfcounts:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: pushq %rbp
+; X64-NEXT: movq %rsp, %rbp
+; X64-NEXT: subq $48, %rsp
+; X64-NEXT: movq -48(%rbp), %rax
+; X64-NEXT: movq -40(%rbp), %rcx
+; X64-NEXT: movq %rax, -16(%rbp)
+; X64-NEXT: movq %rcx, -8(%rbp)
+; X64-NEXT: ## -- End function
+entry:
+ %counts = alloca [2 x i64], align 16
+ %thread_counts = alloca [3 x i64], align 16
+ %arraydecay = getelementptr inbounds [3 x i64], [3 x i64]* %thread_counts, i64 0, i64 0
+ %0 = load i64, i64* %arraydecay, align 16
+ %arrayidx3 = getelementptr inbounds [2 x i64], [2 x i64]* %counts, i64 0, i64 0
+ store i64 %0, i64* %arrayidx3, align 16
+ %arrayidx6 = getelementptr inbounds [3 x i64], [3 x i64]* %thread_counts, i64 0, i64 1
+ %1 = load i64, i64* %arrayidx6, align 8
+ %arrayidx10 = getelementptr inbounds [2 x i64], [2 x i64]* %counts, i64 0, i64 1
+ store i64 %1, i64* %arrayidx10, align 8
+ unreachable
+}
+
+attributes #0 = { noimplicitfloat noredzone nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+fxsr,+mmx,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
Index: lib/Target/X86/X86ISelLowering.h
===================================================================
--- lib/Target/X86/X86ISelLowering.h
+++ lib/Target/X86/X86ISelLowering.h
@@ -814,6 +814,9 @@
bool mergeStoresAfterLegalization() const override { return true; }
+ bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
+ const SelectionDAG &DAG) const override;
+
bool isCheapToSpeculateCttz() const override;
bool isCheapToSpeculateCtlz() const override;
Index: lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- lib/Target/X86/X86ISelLowering.cpp
+++ lib/Target/X86/X86ISelLowering.cpp
@@ -4611,6 +4611,20 @@
return Subtarget.hasLZCNT();
}
+bool X86TargetLowering::canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
+ const SelectionDAG &DAG) const {
+ // Do not merge to float value size (128 bytes) if no implicit
+ // float attribute is set.
+ bool NoFloat = DAG.getMachineFunction().getFunction()->hasFnAttribute(
+ Attribute::NoImplicitFloat);
+
+ if (NoFloat) {
+ unsigned MaxIntSize = Subtarget.is64Bit() ? 64 : 32;
+ return (MemVT.getSizeInBits() <= MaxIntSize);
+ }
+ return true;
+}
+
bool X86TargetLowering::isCtlzFast() const {
return Subtarget.hasFastLZCNT();
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D37464.113828.patch
Type: text/x-patch
Size: 3992 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170905/ab749cf1/attachment.bin>
More information about the llvm-commits
mailing list