[PATCH][X86] Add support for Read Time Stamp Counter intrinsics ('__rdtsc' and '__rdtscp').

Andrea Di Biagio andrea.dibiagio at gmail.com
Thu Apr 24 08:00:30 PDT 2014


ping.

On Thu, Apr 17, 2014 at 4:55 PM, Andrea Di Biagio
<andrea.dibiagio at gmail.com> wrote:
> Hi,
>
> this patch adds the support for two new x86 GCCBuiltin intrinsics:
>  - int_x86_rdtsc;
>  - int_x86_rdtscp.
>
> -- BACKGROUND --
> From the Intel manuals:
>
> Instruction
> RDTSC (Read Time-Stamp Counter)
>   Loads the current value of the processor’s time-stamp counter (a
> 64-bit MSR) into the EDX:EAX registers. The EDX register is loaded
> with the high-order 32 bits of the MSR and the EAX register is loaded
> with the low-order 32 bits.
>
> Instruction
> RDTSCP (Read Time-Stamp Counter and Processor ID)
>   Loads the current value of the processor’s time-stamp counter (a
> 64-bit MSR) into the EDX:EAX registers and also loads the IA32_TSC_AUX
> MSR (address C000_0103H) into the ECX register. The EDX register is
> loaded with the
> high-order 32 bits of the IA32_TSC MSR; the EAX register is loaded
> with the low-order 32 bits of the IA32_TSC MSR; and the ECX register
> is loaded with the low-order 32-bits of IA32_TSC_AUX MSR.
>
> Intrinsic __rdtsc
>  - returns the content of the time stamp counter as a 64bit value.
>
> Intrinsic __rdtscp
>  - Takes in input a pointer to int (unsigned int* __A). Returns the
> content of the time stamp counter as a 64bit value and stores the
> processor ID at the address pointed by the input (__A).
>
> -- WHAT WE HAVE
> Clang only provides a definition for intrinsic __rdtsc in ia32intrin.h.
> That means, ia32intrin.h does not define
> unsigned long long __rdtscp(unsigned int *__A);
>
> Intrinsic `__rdtsc` is currently implemented using inline assembly.
> On X86, ISD::READCYCLECOUNTER dag nodes have the same semantic as
> intrinsic __rdtsc (i.e. they both emit instruction RDTSC and return
> the value as a 'i64').
>
> -- CHANGES INTRODUCED BY THIS PATCH --
> This patch:
>  1. Adds two new X86 intrinsics named 'int_x86_rdtsc' and 'int_x86_rdtscp';
>  2. Teaches the backend how to lower the two new builtins;
>  3. Reuses a common function to lower 'int_x86_rdtsc',
> 'int_x86_rdtscp' and 'ISD::READCYCLECOUNTER' dag nodes.
>  4. Modernizes and extends the existing test 'rdtsc.ll' to verify that
> READCYCLECOUNTER and the two new intrinsics work for 64bit Subtargets
> and non-64bit subtargets.
> //
>
> A Clang patch would follow that:
>  1. Adds a definition for two new Builtinsx86:
>     __builtin_ia32_rdtsc;
>     __builtin_ia32_rdtscp;
>  2. Replaces the definition of intrinsic __rdtsc (in ia32intrin.h)
> with a simple call to the GCC builtin __builtin_ia32_rdtsc();
>  3. Adds a definition for the new intrinsic __rdtscp in file
> ia32intrin.h. Intrinsic __rdtscp would be implemented with a simple
> call to the GCC builtin __builtin_ia32_rdtscp();
>
> Please let me know if ok to submit.
>
> Thanks,
> Andrea Di Biagio
> SN Systems - Sony Computer Entertainment Group
-------------- next part --------------
Index: lib/Target/X86/X86ISelLowering.h
===================================================================
--- lib/Target/X86/X86ISelLowering.h	(revision 207118)
+++ lib/Target/X86/X86ISelLowering.h	(working copy)
@@ -83,6 +83,9 @@
       /// readcyclecounter
       RDTSC_DAG,
 
+      /// X86 Read Time-Stamp Counter and Processor ID.
+      RDTSCP_DAG,
+
       /// X86 compare and logical compare instructions.
       CMP, COMI, UCOMI,
 
Index: lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- lib/Target/X86/X86ISelLowering.cpp	(revision 207118)
+++ lib/Target/X86/X86ISelLowering.cpp	(working copy)
@@ -1472,6 +1472,8 @@
   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
   setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
+  if (!Subtarget->is64Bit())
+    setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
 
   // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
   // handle type legalization for these operations here.
@@ -12261,6 +12263,71 @@
   return SDValue(Res, 1);
 }
 
+// getReadTimeStampCounter - Handles the lowering of builtin intrinsics that
+// read the time stamp counter (x86_rdtsc and x86_rdtscp). This function is
+// also used to custom lower READCYCLECOUNTER nodes.
+static void getReadTimeStampCounter(SDNode *N, SDLoc DL, unsigned Opcode,
+                              SelectionDAG &DAG, const X86Subtarget *Subtarget,
+                              SmallVectorImpl<SDValue> &Results) {
+  SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
+  SDValue TheChain = N->getOperand(0);
+  SDValue rd = DAG.getNode(Opcode, DL, Tys, &TheChain, 1);
+  SDValue LO, HI;
+
+  // The processor's time-stamp counter (a 64-bit MSR) is stored into the
+  // EDX:EAX registers. EDX is loaded with the high-order 32 bits of the MSR
+  // and the EAX register is loaded with the low-order 32 bits.
+  if (Subtarget->is64Bit()) {
+    LO = DAG.getCopyFromReg(rd, DL, X86::RAX, MVT::i64, rd.getValue(1));
+    HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::RDX, MVT::i64,
+                            LO.getValue(2));
+  } else {
+    LO = DAG.getCopyFromReg(rd, DL, X86::EAX, MVT::i32, rd.getValue(1));
+    HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::EDX, MVT::i32,
+                            LO.getValue(2));
+  }
+  SDValue Chain = HI.getValue(1);
+
+  if (Opcode == X86ISD::RDTSCP_DAG) {
+    assert(N->getNumOperands() == 3 && "Unexpected number of operands!");
+
+    // Instruction RDTSCP loads the IA32:TSC_AUX_MSR (address C000_0103H) into
+    // the ECX register. Add 'ecx' explicitly to the chain.
+    SDValue ecx = DAG.getCopyFromReg(Chain, DL, X86::ECX, MVT::i32,
+                                     HI.getValue(2));
+    // Explicitly store the content of ECX at the location passed in input
+    // to the 'rdtscp' intrinsic.
+    Chain = DAG.getStore(ecx.getValue(1), DL, ecx, N->getOperand(2),
+                         MachinePointerInfo(), false, false, 0);
+  }
+
+  if (Subtarget->is64Bit()) {
+    // The EDX register is loaded with the high-order 32 bits of the MSR, and
+    // the EAX register is loaded with the low-order 32 bits.
+    SDValue Tmp = DAG.getNode(ISD::SHL, DL, MVT::i64, HI,
+                              DAG.getConstant(32, MVT::i8));
+    Results.push_back(DAG.getNode(ISD::OR, DL, MVT::i64, LO, Tmp));
+    Results.push_back(Chain);
+    return;
+  }
+
+  // Use a buildpair to merge the two 32-bit values into a 64-bit one.
+  SDValue Ops[] = { LO, HI };
+  SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops,
+                             array_lengthof(Ops));
+  Results.push_back(Pair);
+  Results.push_back(Chain);
+}
+
+static SDValue LowerREADCYCLECOUNTER(SDValue Op, const X86Subtarget *Subtarget,
+                                     SelectionDAG &DAG) {
+  SmallVector<SDValue, 2> Results;
+  SDLoc DL(Op);
+  getReadTimeStampCounter(Op.getNode(), DL, X86ISD::RDTSC_DAG, DAG, Subtarget,
+                          Results);
+  return DAG.getMergeValues(&Results[0], Results.size(), DL);
+}
+
 static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
                                       SelectionDAG &DAG) {
   SDLoc dl(Op);
@@ -12435,6 +12502,22 @@
     SDValue Scale = Op.getOperand(6);
     return getMScatterNode(Opc, Op, DAG, Src, Mask, Base, Index, Scale, Chain);
   }
+  // Read Time Stamp Counter (RDTSC).
+  case Intrinsic::x86_rdtsc:
+  // Read Time Stamp Counter and Processor ID (RDTSCP).
+  case Intrinsic::x86_rdtscp: {
+    unsigned Opc;
+    switch (IntNo) {
+    default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+    case Intrinsic::x86_rdtsc:
+      Opc = X86ISD::RDTSC_DAG; break;
+    case Intrinsic::x86_rdtscp:
+      Opc = X86ISD::RDTSCP_DAG; break;
+    }
+    SmallVector<SDValue, 2> Results;
+    getReadTimeStampCounter(Op.getNode(), dl, Opc, DAG, Subtarget, Results);
+    return DAG.getMergeValues(&Results[0], Results.size(), dl);
+  }
   // XTEST intrinsics.
   case Intrinsic::x86_xtest: {
     SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::Other);
@@ -13805,25 +13888,6 @@
   return cpOut;
 }
 
-static SDValue LowerREADCYCLECOUNTER(SDValue Op, const X86Subtarget *Subtarget,
-                                     SelectionDAG &DAG) {
-  assert(Subtarget->is64Bit() && "Result not type legalized?");
-  SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
-  SDValue TheChain = Op.getOperand(0);
-  SDLoc dl(Op);
-  SDValue rd = DAG.getNode(X86ISD::RDTSC_DAG, dl, Tys, &TheChain, 1);
-  SDValue rax = DAG.getCopyFromReg(rd, dl, X86::RAX, MVT::i64, rd.getValue(1));
-  SDValue rdx = DAG.getCopyFromReg(rax.getValue(1), dl, X86::RDX, MVT::i64,
-                                   rax.getValue(2));
-  SDValue Tmp = DAG.getNode(ISD::SHL, dl, MVT::i64, rdx,
-                            DAG.getConstant(32, MVT::i8));
-  SDValue Ops[] = {
-    DAG.getNode(ISD::OR, dl, MVT::i64, rax, Tmp),
-    rdx.getValue(1)
-  };
-  return DAG.getMergeValues(Ops, array_lengthof(Ops), dl);
-}
-
 static SDValue LowerBITCAST(SDValue Op, const X86Subtarget *Subtarget,
                             SelectionDAG &DAG) {
   MVT SrcVT = Op.getOperand(0).getSimpleValueType();
@@ -14158,20 +14222,22 @@
     Results.push_back(V);
     return;
   }
+  case ISD::INTRINSIC_W_CHAIN: {
+    unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+    switch (IntNo) {
+    default : llvm_unreachable("Do not know how to custom type "
+                               "legalize this intrinsic operation!");
+    case Intrinsic::x86_rdtsc:
+      return getReadTimeStampCounter(N, dl, X86ISD::RDTSC_DAG, DAG, Subtarget,
+                                     Results);
+    case Intrinsic::x86_rdtscp:
+      return getReadTimeStampCounter(N, dl, X86ISD::RDTSCP_DAG, DAG, Subtarget,
+                                     Results);
+    }
+  }
   case ISD::READCYCLECOUNTER: {
-    SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
-    SDValue TheChain = N->getOperand(0);
-    SDValue rd = DAG.getNode(X86ISD::RDTSC_DAG, dl, Tys, &TheChain, 1);
-    SDValue eax = DAG.getCopyFromReg(rd, dl, X86::EAX, MVT::i32,
-                                     rd.getValue(1));
-    SDValue edx = DAG.getCopyFromReg(eax.getValue(1), dl, X86::EDX, MVT::i32,
-                                     eax.getValue(2));
-    // Use a buildpair to merge the two 32-bit values into a 64-bit one.
-    SDValue Ops[] = { eax, edx };
-    Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Ops,
-                                  array_lengthof(Ops)));
-    Results.push_back(edx.getValue(1));
-    return;
+    return getReadTimeStampCounter(N, dl, X86ISD::RDTSC_DAG, DAG, Subtarget,
+                                   Results);
   }
   case ISD::ATOMIC_CMP_SWAP: {
     EVT T = N->getValueType(0);
Index: lib/Target/X86/X86InstrSystem.td
===================================================================
--- lib/Target/X86/X86InstrSystem.td	(revision 207118)
+++ lib/Target/X86/X86InstrSystem.td	(working copy)
@@ -19,7 +19,7 @@
               TB;
 
 let Defs = [RAX, RCX, RDX] in
-  def RDTSCP : I<0x01, MRM_F9, (outs), (ins), "rdtscp", []>, TB;
+  def RDTSCP : I<0x01, MRM_F9, (outs), (ins), "rdtscp", [(X86rdtscp)]>, TB;
 
 // CPU flow control instructions
 
Index: lib/Target/X86/X86InstrInfo.td
===================================================================
--- lib/Target/X86/X86InstrInfo.td	(revision 207118)
+++ lib/Target/X86/X86InstrInfo.td	(working copy)
@@ -206,6 +206,8 @@
 
 def X86rdtsc   : SDNode<"X86ISD::RDTSC_DAG", SDTX86Void,
                         [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
+def X86rdtscp  : SDNode<"X86ISD::RDTSCP_DAG", SDTX86Void,
+                        [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
 
 def X86Wrapper    : SDNode<"X86ISD::Wrapper",     SDTX86Wrapper>;
 def X86WrapperRIP : SDNode<"X86ISD::WrapperRIP",  SDTX86Wrapper>;
Index: test/CodeGen/X86/rdtsc.ll
===================================================================
--- test/CodeGen/X86/rdtsc.ll	(revision 207118)
+++ test/CodeGen/X86/rdtsc.ll	(working copy)
@@ -1,8 +1,49 @@
-; RUN: llc < %s -march=x86 | grep rdtsc
-; RUN: llc < %s -march=x86-64 | grep rdtsc
+; RUN: llc < %s -march=x86-64 -mcpu=generic | FileCheck %s
+; RUN: llc < %s -march=x86 -mcpu=generic | FileCheck %s --check-prefix=CHECK --check-prefix=X86
+
+; Verify that we correctly lower ISD::READCYCLECOUNTER.
+
+
+define i64 @test_builtin_readcyclecounter() {
+  %1 = tail call i64 @llvm.readcyclecounter()
+  ret i64 %1
+}
+; CHECK-LABEL: test_builtin_readcyclecounter
+; CHECK: rdtsc
+; X86-NOT: shlq
+; X86-NOT: or
+; CHECK-NOT: mov
+; CHECK: ret
+
+
+; Verify that we correctly lower the Read Cycle Counter GCC x86 builtins
+; (i.e. RDTSC and RDTSCP).
+
+define i64 @test_builtin_rdtsc() {
+  %1 = tail call i64 @llvm.x86.rdtsc()
+  ret i64 %1
+}
+; CHECK-LABEL: test_builtin_rdtsc
+; CHECK: rdtsc
+; X86-NOT: shlq
+; X86-NOT: or
+; CHECK-NOT: mov
+; CHECK: ret
+
+
+define i64 @test_builtin_rdtscp(i8* %A) {
+  %1 = tail call i64 @llvm.x86.rdtscp(i8* %A)
+  ret i64 %1
+}
+; CHECK-LABEL: test_builtin_rdtscp
+; CHECK: rdtscp
+; X86-NOT: shlq
+; CHECK:   movl	%ecx, (%{{[a-z]+}})
+; X86-NOT: shlq
+; CHECK: ret
+
+
 declare i64 @llvm.readcyclecounter()
+declare i64 @llvm.x86.rdtscp(i8*)
+declare i64 @llvm.x86.rdtsc()
 
-define i64 @foo() {
-	%tmp.1 = call i64 @llvm.readcyclecounter( )		; <i64> [#uses=1]
-	ret i64 %tmp.1
-}
Index: include/llvm/IR/IntrinsicsX86.td
===================================================================
--- include/llvm/IR/IntrinsicsX86.td	(revision 207118)
+++ include/llvm/IR/IntrinsicsX86.td	(working copy)
@@ -18,6 +18,15 @@
 }
 
 //===----------------------------------------------------------------------===//
+// Read Time Stamp Counter.
+let TargetPrefix = "x86" in {
+  def int_x86_rdtsc : GCCBuiltin<"__builtin_ia32_rdtsc">,
+              Intrinsic<[llvm_i64_ty], [], []>;
+  def int_x86_rdtscp : GCCBuiltin<"__builtin_ia32_rdtscp">,
+              Intrinsic<[llvm_i64_ty], [llvm_ptr_ty], [IntrReadWriteArgMem]>;
+}
+
+//===----------------------------------------------------------------------===//
 // 3DNow!
 
 let TargetPrefix = "x86" in {


More information about the llvm-commits mailing list