[llvm-commits] [llvm] r109078 - in /llvm/trunk: lib/Target/X86/README.txt lib/Target/X86/X86ISelLowering.cpp lib/Target/X86/X86ISelLowering.h lib/Target/X86/X86InstrInfo.td lib/Target/X86/X86InstrSSE.td test/CodeGen/X86/barrier-sse.ll test/CodeGen/X86/barrier.ll
Eric Christopher
echristo at apple.com
Wed Jul 21 19:48:34 PDT 2010
Author: echristo
Date: Wed Jul 21 21:48:34 2010
New Revision: 109078
URL: http://llvm.org/viewvc/llvm-project?rev=109078&view=rev
Log:
Custom lower the memory barrier instructions and add support
for lowering without sse2. Add a couple of new testcases.
Fixes a few libgomp tests and latent bugs. Remove a few todos.
Added:
llvm/trunk/test/CodeGen/X86/barrier-sse.ll
llvm/trunk/test/CodeGen/X86/barrier.ll
Modified:
llvm/trunk/lib/Target/X86/README.txt
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86ISelLowering.h
llvm/trunk/lib/Target/X86/X86InstrInfo.td
llvm/trunk/lib/Target/X86/X86InstrSSE.td
Modified: llvm/trunk/lib/Target/X86/README.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/README.txt?rev=109078&r1=109077&r2=109078&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/README.txt (original)
+++ llvm/trunk/lib/Target/X86/README.txt Wed Jul 21 21:48:34 2010
@@ -1135,13 +1135,6 @@
//===---------------------------------------------------------------------===//
-handling llvm.memory.barrier on pre SSE2 cpus
-
-should generate:
-lock ; mov %esp, %esp
-
-//===---------------------------------------------------------------------===//
-
The generated code on x86 for checking for signed overflow on a multiply the
obvious way is much longer than it needs to be.
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=109078&r1=109077&r2=109078&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed Jul 21 21:48:34 2010
@@ -343,8 +343,9 @@
if (Subtarget->hasSSE1())
setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
- if (!Subtarget->hasSSE2())
- setOperationAction(ISD::MEMBARRIER , MVT::Other, Expand);
+ // We may not have a libcall for MEMBARRIER so we should lower this.
+ setOperationAction(ISD::MEMBARRIER , MVT::Other, Custom);
+
// On X86 and X86-64, atomic operations are lowered to locked instructions.
// Locked instructions, in turn, have implicit fence semantics (all memory
// operations are flushed before issuing the locked instruction, and they
@@ -7509,6 +7510,36 @@
return Sum;
}
+SDValue X86TargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const{
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (!Subtarget->hasSSE2())
+ return DAG.getNode(X86ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0),
+ DAG.getConstant(0, MVT::i32));
+
+ unsigned isDev = cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue();
+ if(!isDev)
+ return DAG.getNode(X86ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
+ else {
+ unsigned Op1 = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ unsigned Op2 = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
+ unsigned Op3 = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
+ unsigned Op4 = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
+
+ // def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>;
+ if (!Op1 && !Op2 && !Op3 && Op4)
+ return DAG.getNode(X86ISD::SFENCE, dl, MVT::Other, Op.getOperand(0));
+
+ // def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>;
+ if (Op1 && !Op2 && !Op3 && !Op4)
+ return DAG.getNode(X86ISD::LFENCE, dl, MVT::Other, Op.getOperand(0));
+
+ // def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm), (i8 1)),
+ // (MFENCE)>;
+ return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0));
+ }
+}
+
SDValue X86TargetLowering::LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) const {
EVT T = Op.getValueType();
DebugLoc dl = Op.getDebugLoc();
@@ -7598,6 +7629,7 @@
SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default: llvm_unreachable("Should not custom lower this!");
+ case ISD::MEMBARRIER: return LowerMEMBARRIER(Op,DAG);
case ISD::ATOMIC_CMP_SWAP: return LowerCMP_SWAP(Op,DAG);
case ISD::ATOMIC_LOAD_SUB: return LowerLOAD_SUB(Op,DAG);
case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=109078&r1=109077&r2=109078&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Wed Jul 21 21:48:34 2010
@@ -265,7 +265,13 @@
ATOMXOR64_DAG,
ATOMAND64_DAG,
ATOMNAND64_DAG,
- ATOMSWAP64_DAG
+ ATOMSWAP64_DAG,
+
+ // Memory barrier
+ MEMBARRIER,
+ MFENCE,
+ SFENCE,
+ LFENCE
// WARNING: Do not add anything in the end unless you want the node to
// have memop! In fact, starting from ATOMADD64_DAG all opcodes will be
@@ -715,6 +721,7 @@
SDValue LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerREADCYCLECOUNTER(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const;
virtual SDValue
LowerFormalArguments(SDValue Chain,
Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.td?rev=109078&r1=109077&r2=109078&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.td Wed Jul 21 21:48:34 2010
@@ -80,6 +80,21 @@
def SDT_X86TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisVT<1, i32>]>;
+def SDT_X86MEMBARRIER : SDTypeProfile<0, 0, []>;
+def SDT_X86MEMBARRIERNoSSE : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+
+def X86MemBarrier : SDNode<"X86ISD::MEMBARRIER", SDT_X86MEMBARRIER,
+ [SDNPHasChain]>;
+def X86MemBarrierNoSSE : SDNode<"X86ISD::MEMBARRIER", SDT_X86MEMBARRIERNoSSE,
+ [SDNPHasChain]>;
+def X86MFence : SDNode<"X86ISD::MFENCE", SDT_X86MEMBARRIER,
+ [SDNPHasChain]>;
+def X86SFence : SDNode<"X86ISD::SFENCE", SDT_X86MEMBARRIER,
+ [SDNPHasChain]>;
+def X86LFence : SDNode<"X86ISD::LFENCE", SDT_X86MEMBARRIER,
+ [SDNPHasChain]>;
+
+
def X86bsf : SDNode<"X86ISD::BSF", SDTUnaryArithWithFlags>;
def X86bsr : SDNode<"X86ISD::BSR", SDTUnaryArithWithFlags>;
def X86shld : SDNode<"X86ISD::SHLD", SDTIntShiftDOp>;
@@ -3906,6 +3921,20 @@
// Atomic support
//
+// Memory barriers
+let hasSideEffects = 1 in {
+def Int_MemBarrier : I<0, Pseudo, (outs), (ins),
+ "#MEMBARRIER",
+ [(X86MemBarrier)]>, Requires<[HasSSE2]>;
+
+// TODO: Get this to fold the constant into the instruction.
+let Uses = [ESP] in
+def Int_MemBarrierNoSSE : I<0x0B, Pseudo, (outs), (ins GR32:$zero),
+ "lock\n\t"
+ "or{l}\t{$zero, (%esp)|(%esp), $zero}",
+ [(X86MemBarrierNoSSE GR32:$zero)]>, LOCK;
+}
+
// Atomic swap. These are just normal xchg instructions. But since a memory
// operand is referenced, the atomicity is ensured.
let Constraints = "$val = $dst" in {
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=109078&r1=109077&r2=109078&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Wed Jul 21 21:48:34 2010
@@ -2001,6 +2001,7 @@
// Load, store, and memory fence
def SFENCE : I<0xAE, MRM_F8, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>,
TB, Requires<[HasSSE1]>;
+def : Pat<(X86SFence), (SFENCE)>;
// Alias instructions that map zero vector to pxor / xorp* for sse.
// We set canFoldAsLoad because this can be converted to a constant-pool
@@ -3024,19 +3025,14 @@
"lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasSSE2]>;
def MFENCE : I<0xAE, MRM_F0, (outs), (ins),
"mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>;
+def : Pat<(X86LFence), (LFENCE)>;
+def : Pat<(X86MFence), (MFENCE)>;
+
// Pause. This "instruction" is encoded as "rep; nop", so even though it
// was introduced with SSE2, it's backward compatible.
def PAUSE : I<0x90, RawFrm, (outs), (ins), "pause", []>, REP;
-//TODO: custom lower this so as to never even generate the noop
-def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm),
- (i8 0)), (NOOP)>;
-def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>;
-def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>;
-def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm),
- (i8 1)), (MFENCE)>;
-
// Alias instructions that map zero vector to pxor / xorp* for sse.
// We set canFoldAsLoad because this can be converted to a constant-pool
// load of an all-ones value if folding it would be beneficial.
Added: llvm/trunk/test/CodeGen/X86/barrier-sse.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/barrier-sse.ll?rev=109078&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/barrier-sse.ll (added)
+++ llvm/trunk/test/CodeGen/X86/barrier-sse.ll Wed Jul 21 21:48:34 2010
@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep sfence
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep lfence
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep mfence
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep MEMBARRIER
+
+
+declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1)
+
+define void @test() {
+ call void @llvm.memory.barrier( i1 true, i1 true, i1 false, i1 false, i1 false)
+ call void @llvm.memory.barrier( i1 true, i1 false, i1 true, i1 false, i1 false)
+ call void @llvm.memory.barrier( i1 true, i1 false, i1 false, i1 true, i1 false)
+
+ call void @llvm.memory.barrier( i1 true, i1 true, i1 true, i1 false, i1 false)
+ call void @llvm.memory.barrier( i1 true, i1 true, i1 false, i1 true, i1 false)
+ call void @llvm.memory.barrier( i1 true, i1 false, i1 true, i1 true, i1 false)
+
+ call void @llvm.memory.barrier( i1 true, i1 true, i1 true, i1 true , i1 false)
+ call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 false , i1 false)
+ ret void
+}
Added: llvm/trunk/test/CodeGen/X86/barrier.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/barrier.ll?rev=109078&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/barrier.ll (added)
+++ llvm/trunk/test/CodeGen/X86/barrier.ll Wed Jul 21 21:48:34 2010
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=x86 -mattr=-sse2 | grep lock
+declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1)
+
+define void @test() {
+ call void @llvm.memory.barrier( i1 true, i1 true, i1 false, i1 false, i1 false)
+ ret void
+}
\ No newline at end of file
More information about the llvm-commits
mailing list