[llvm] r260828 - [x86-64] allow mfence even with -mno-sse (PR23203)
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Sat Feb 13 09:26:29 PST 2016
Author: spatel
Date: Sat Feb 13 11:26:29 2016
New Revision: 260828
URL: http://llvm.org/viewvc/llvm-project?rev=260828&view=rev
Log:
[x86-64] allow mfence even with -mno-sse (PR23203)
As shown in:
https://llvm.org/bugs/show_bug.cgi?id=23203
...we currently die because lowering believes that mfence is allowed without SSE2 on x86-64,
but the instruction def doesn't know that.
I don't know if allowing mfence without SSE is right, but if not, at least now it's consistently wrong. :)
Differential Revision: http://reviews.llvm.org/D17219
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86InstrInfo.td
llvm/trunk/lib/Target/X86/X86InstrSSE.td
llvm/trunk/lib/Target/X86/X86Subtarget.h
llvm/trunk/test/CodeGen/X86/mfence.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=260828&r1=260827&r2=260828&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat Feb 13 11:26:29 2016
@@ -19717,13 +19717,6 @@ X86TargetLowering::shouldExpandAtomicRMW
}
}
-static bool hasMFENCE(const X86Subtarget &Subtarget) {
- // Use mfence if we have SSE2 or we're on x86-64 (even if we asked for
- // no-sse2). There isn't any reason to disable it if the target processor
- // supports it.
- return Subtarget.hasSSE2() || Subtarget.is64Bit();
-}
-
LoadInst *
X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32;
@@ -19763,7 +19756,7 @@ X86TargetLowering::lowerIdempotentRMWInt
// the IR level, so we must wrap it in an intrinsic.
return nullptr;
- if (!hasMFENCE(Subtarget))
+ if (!Subtarget.hasMFence())
// FIXME: it might make sense to use a locked operation here but on a
// different cache-line to prevent cache-line bouncing. In practice it
// is probably a small win, and x86 processors without mfence are rare
@@ -19794,7 +19787,7 @@ static SDValue LowerATOMIC_FENCE(SDValue
// The only fence that needs an instruction is a sequentially-consistent
// cross-thread fence.
if (FenceOrdering == SequentiallyConsistent && FenceScope == CrossThread) {
- if (hasMFENCE(Subtarget))
+ if (Subtarget.hasMFence())
return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0));
SDValue Chain = Op.getOperand(0);
Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.td?rev=260828&r1=260827&r2=260828&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.td Sat Feb 13 11:26:29 2016
@@ -845,6 +845,7 @@ def CallImmAddr : Predicate<"Subtarget-
def FavorMemIndirectCall : Predicate<"!Subtarget->callRegIndirect()">;
def NotSlowIncDec : Predicate<"!Subtarget->slowIncDec()">;
def HasFastMem32 : Predicate<"!Subtarget->isUnalignedMem32Slow()">;
+def HasMFence : Predicate<"Subtarget->hasMFence()">;
//===----------------------------------------------------------------------===//
// X86 Instruction Format Definitions.
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=260828&r1=260827&r2=260828&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Sat Feb 13 11:26:29 2016
@@ -3762,6 +3762,8 @@ def PAUSE : I<0x90, RawFrm, (outs), (ins
let SchedRW = [WriteFence] in {
// Load, store, and memory fence
+// TODO: As with mfence, we may want to ease the availablity of sfence/lfence
+// to include any 64-bit target.
def SFENCE : I<0xAE, MRM_F8, (outs), (ins),
"sfence", [(int_x86_sse_sfence)], IIC_SSE_SFENCE>,
PS, Requires<[HasSSE1]>;
@@ -3770,7 +3772,7 @@ def LFENCE : I<0xAE, MRM_E8, (outs), (in
TB, Requires<[HasSSE2]>;
def MFENCE : I<0xAE, MRM_F0, (outs), (ins),
"mfence", [(int_x86_sse2_mfence)], IIC_SSE_MFENCE>,
- TB, Requires<[HasSSE2]>;
+ TB, Requires<[HasMFence]>;
} // SchedRW
def : Pat<(X86SFence), (SFENCE)>;
Modified: llvm/trunk/lib/Target/X86/X86Subtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Subtarget.h?rev=260828&r1=260827&r2=260828&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86Subtarget.h (original)
+++ llvm/trunk/lib/Target/X86/X86Subtarget.h Sat Feb 13 11:26:29 2016
@@ -446,6 +446,11 @@ public:
bool isSLM() const { return X86ProcFamily == IntelSLM; }
bool useSoftFloat() const { return UseSoftFloat; }
+ /// Use mfence if we have SSE2 or we're on x86-64 (even if we asked for
+ /// no-sse2). There isn't any reason to disable it if the target processor
+ /// supports it.
+ bool hasMFence() const { return hasSSE2() || is64Bit(); }
+
const Triple &getTargetTriple() const { return TargetTriple; }
bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
Modified: llvm/trunk/test/CodeGen/X86/mfence.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/mfence.ll?rev=260828&r1=260827&r2=260828&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/mfence.ll (original)
+++ llvm/trunk/test/CodeGen/X86/mfence.ll Sat Feb 13 11:26:29 2016
@@ -1,11 +1,37 @@
-; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+sse2 | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=-sse2 | FileCheck %s --check-prefix=X64
+
+; It doesn't matter if an x86-64 target has specified "no-sse2"; we still can use mfence.
define void @test() {
-; CHECK-LABEL: test:
-; CHECK: # BB#0:
-; CHECK-NEXT: mfence
-; CHECK-NEXT: retl
+; X32-LABEL: test:
+; X32: # BB#0:
+; X32-NEXT: mfence
+; X32-NEXT: retl
+;
+; X64-LABEL: test:
+; X64: # BB#0:
+; X64-NEXT: mfence
+; X64-NEXT: retq
fence seq_cst
ret void
}
+define i32 @fence(i32* %ptr) {
+; X32-LABEL: fence:
+; X32: # BB#0:
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: mfence
+; X32-NEXT: movl (%eax), %eax
+; X32-NEXT: retl
+;
+; X64-LABEL: fence:
+; X64: # BB#0:
+; X64-NEXT: mfence
+; X64-NEXT: movl (%rdi), %eax
+; X64-NEXT: retq
+ %atomic = atomicrmw add i32* %ptr, i32 0 seq_cst
+ ret i32 %atomic
+}
+
More information about the llvm-commits
mailing list