[PATCH][X86] Add support for builtin 'rdpmc' to read performance monitoring counters.

Andrea Di Biagio andrea.dibiagio at gmail.com
Mon Jun 30 09:00:25 PDT 2014


ping.


On Mon, Jun 23, 2014 at 8:07 PM, Andrea Di Biagio
<andrea.dibiagio at gmail.com> wrote:
> Hi,
>
> This patch adds support for a new builtin instruction called
> '__builtin_ia32_rdpmc'.
> Builtin '__builtin_ia32_rdpmc' is defined as a "GCC builtin"; On X86,
> it can be used to read performance monitoring counters. It takes as
> input the index of the performance counter to read, and returns the
> value of the specified performance counter (a 64-bit quantity).
>
> Calls to this new builtin will map to instruction RDPMC.
> The index in input to the builtin call is moved to register %ECX. The
> result of the builtin call is the value of the specified performance
> counter (a 64-bit quantity - instruction RDPMC will return that
> quantity in registers RDX:RAX).
>
> This patch:
>  1) Adds builtin int_x86_rdpmc as a new GCCBuiltin;
>  2) Adds a new X86 DAG node called RDPMC_DAG;
>  3) Teaches how to lower this new builtin;
>  4) Adds an ISel pattern to instruction RDPMC;
>  5) Fixes the definition of instruction RDPMC adding %RAX and %RDX as
> implicit definitions, and adding %ECX as implicit-use;
>  6) Adds an LLVM test to verify that we correctly select the new intrinsic.
>
> A clang patch will follow that:
>  - Adds a definition for the new x86 builtin '__builtin_ia32_rdpmc';
>  - Adds an intrinsic function named '__rdpmc' to <ia32intrin.h>
>
> Please let me know if ok to commit.
>
> Thanks,
> Andrea Di Biagio.
-------------- next part --------------
Index: lib/Target/X86/X86ISelLowering.h
===================================================================
--- lib/Target/X86/X86ISelLowering.h	(revision 212046)
+++ lib/Target/X86/X86ISelLowering.h	(working copy)
@@ -86,6 +86,9 @@
       /// X86 Read Time-Stamp Counter and Processor ID.
       RDTSCP_DAG,
 
+      /// X86 Read Performance Monitoring Counters.
+      RDPMC_DAG,
+
       /// X86 compare and logical compare instructions.
       CMP, COMI, UCOMI,
 
Index: lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- lib/Target/X86/X86ISelLowering.cpp	(revision 212046)
+++ lib/Target/X86/X86ISelLowering.cpp	(working copy)
@@ -14225,6 +14225,51 @@
   return SDValue(Res, 0);
 }
 
+// getReadPerformanceCounter - Handles the lowering of builtin intrinsics that
+// read performance monitor counters (x86_rdpmc).
+static void getReadPerformanceCounter(SDNode *N, SDLoc DL,
+                              SelectionDAG &DAG, const X86Subtarget *Subtarget,
+                              SmallVectorImpl<SDValue> &Results) {
+  assert(N->getNumOperands() == 3 && "Unexpected number of operands!");
+  SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
+  SDValue LO, HI;
+
+  // The ECX register is used to select the index of the performance counter
+  // to read.
+  SDValue Chain = DAG.getCopyToReg(N->getOperand(0), DL, X86::ECX,
+                                   N->getOperand(2));
+  SDValue rd = DAG.getNode(X86ISD::RDPMC_DAG, DL, Tys, Chain);
+
+  // Reads the content of a 64-bit performance counter and returns it in the
+  // registers EDX:EAX.
+  if (Subtarget->is64Bit()) {
+    LO = DAG.getCopyFromReg(rd, DL, X86::RAX, MVT::i64, rd.getValue(1));
+    HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::RDX, MVT::i64,
+                            LO.getValue(2));
+  } else {
+    LO = DAG.getCopyFromReg(rd, DL, X86::EAX, MVT::i32, rd.getValue(1));
+    HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::EDX, MVT::i32,
+                            LO.getValue(2));
+  }
+  Chain = HI.getValue(1);
+
+  if (Subtarget->is64Bit()) {
+    // The EAX register is loaded with the low-order 32 bits. The EDX register
+    // is loaded with the supported high-order bits of the counter.
+    SDValue Tmp = DAG.getNode(ISD::SHL, DL, MVT::i64, HI,
+                              DAG.getConstant(32, MVT::i8));
+    Results.push_back(DAG.getNode(ISD::OR, DL, MVT::i64, LO, Tmp));
+    Results.push_back(Chain);
+    return;
+  }
+
+  // Use a buildpair to merge the two 32-bit values into a 64-bit one.
+  SDValue Ops[] = { LO, HI };
+  SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops);
+  Results.push_back(Pair);
+  Results.push_back(Chain);
+}
+
 // getReadTimeStampCounter - Handles the lowering of builtin intrinsics that
 // read the time stamp counter (x86_rdtsc and x86_rdtscp). This function is
 // also used to custom lower READCYCLECOUNTER nodes.
@@ -14289,7 +14334,7 @@
 }
 
 enum IntrinsicType {
-  GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDTSC, XTEST
+  GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST
 };
 
 struct IntrinsicData {
@@ -14383,6 +14428,8 @@
                                 IntrinsicData(RDTSC,  X86ISD::RDTSC_DAG, 0)));
   IntrMap.insert(std::make_pair(Intrinsic::x86_rdtscp,
                                 IntrinsicData(RDTSC,  X86ISD::RDTSCP_DAG, 0)));
+  IntrMap.insert(std::make_pair(Intrinsic::x86_rdpmc,
+                                IntrinsicData(RDPMC,  X86ISD::RDPMC_DAG, 0)));
   Initialized = true;
 }
 
@@ -14458,6 +14505,12 @@
     getReadTimeStampCounter(Op.getNode(), dl, Intr.Opc0, DAG, Subtarget, Results);
     return DAG.getMergeValues(Results, dl);
   }
+  // Read Performance Monitoring Counters.
+  case RDPMC: {
+    SmallVector<SDValue, 2> Results;
+    getReadPerformanceCounter(Op.getNode(), dl, DAG, Subtarget, Results);
+    return DAG.getMergeValues(Results, dl);
+  }
   // XTEST intrinsics.
   case XTEST: {
     SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::Other);
@@ -16282,6 +16335,8 @@
     case Intrinsic::x86_rdtscp:
       return getReadTimeStampCounter(N, dl, X86ISD::RDTSCP_DAG, DAG, Subtarget,
                                      Results);
+    case Intrinsic::x86_rdpmc:
+      return getReadPerformanceCounter(N, dl, DAG, Subtarget, Results);
     }
   }
   case ISD::READCYCLECOUNTER: {
@@ -16446,6 +16501,7 @@
   case X86ISD::CALL:               return "X86ISD::CALL";
   case X86ISD::RDTSC_DAG:          return "X86ISD::RDTSC_DAG";
   case X86ISD::RDTSCP_DAG:         return "X86ISD::RDTSCP_DAG";
+  case X86ISD::RDPMC_DAG:          return "X86ISD::RDPMC_DAG";
   case X86ISD::BT:                 return "X86ISD::BT";
   case X86ISD::CMP:                return "X86ISD::CMP";
   case X86ISD::COMI:               return "X86ISD::COMI";
Index: lib/Target/X86/X86InstrSystem.td
===================================================================
--- lib/Target/X86/X86InstrSystem.td	(revision 212046)
+++ lib/Target/X86/X86InstrSystem.td	(working copy)
@@ -439,8 +439,11 @@
 let SchedRW = [WriteSystem] in {
 def WRMSR : I<0x30, RawFrm, (outs), (ins), "wrmsr", [], IIC_WRMSR>, TB;
 def RDMSR : I<0x32, RawFrm, (outs), (ins), "rdmsr", [], IIC_RDMSR>, TB;
-def RDPMC : I<0x33, RawFrm, (outs), (ins), "rdpmc", [], IIC_RDPMC>, TB;
 
+let Defs = [RAX, RDX], Uses = [ECX] in
+  def RDPMC : I<0x33, RawFrm, (outs), (ins), "rdpmc", [(X86rdpmc)], IIC_RDPMC>,
+              TB;
+
 def SMSW16r : I<0x01, MRM4r, (outs GR16:$dst), (ins), 
                 "smsw{w}\t$dst", [], IIC_SMSW>, OpSize16, TB;
 def SMSW32r : I<0x01, MRM4r, (outs GR32:$dst), (ins), 
Index: lib/Target/X86/X86InstrInfo.td
===================================================================
--- lib/Target/X86/X86InstrInfo.td	(revision 212046)
+++ lib/Target/X86/X86InstrInfo.td	(working copy)
@@ -208,6 +208,8 @@
                         [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
 def X86rdtscp  : SDNode<"X86ISD::RDTSCP_DAG", SDTX86Void,
                         [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
+def X86rdpmc   : SDNode<"X86ISD::RDPMC_DAG", SDTX86Void,
+                        [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>; 
 
 def X86Wrapper    : SDNode<"X86ISD::Wrapper",     SDTX86Wrapper>;
 def X86WrapperRIP : SDNode<"X86ISD::WrapperRIP",  SDTX86Wrapper>;
Index: test/CodeGen/X86/rdpmc.ll
===================================================================
--- test/CodeGen/X86/rdpmc.ll	(revision 0)
+++ test/CodeGen/X86/rdpmc.ll	(working copy)
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=x86-64 -mcpu=generic | FileCheck %s --check-prefix=CHECK --check-prefix=X86-64
+; RUN: llc < %s -march=x86 -mcpu=generic | FileCheck %s --check-prefix=CHECK --check-prefix=X86
+
+; Verify that we correctly lower the "Read Performance-Monitoring Counters"
+; x86 builtin.
+
+
+define i64 @test_builtin_read_pmc(i32 %ID) {
+  %1 = tail call i64 @llvm.x86.rdpmc(i32 %ID)
+  ret i64 %1
+}
+; CHECK-LABEL: test_builtin_read_pmc
+; CHECK: rdpmc
+; X86-NOT: shlq
+; X86-NOT: or
+; X86-64: shlq
+; X86-64: or
+; CHECK-NOT: mov
+; CHECK: ret
+
+declare i64 @llvm.x86.rdpmc(i32 %ID)
+
Index: include/llvm/IR/IntrinsicsX86.td
===================================================================
--- include/llvm/IR/IntrinsicsX86.td	(revision 212046)
+++ include/llvm/IR/IntrinsicsX86.td	(working copy)
@@ -26,6 +26,12 @@
               Intrinsic<[llvm_i64_ty], [llvm_ptr_ty], [IntrReadWriteArgMem]>;
 }
 
+// Read Performance-Monitoring Counter.
+let TargetPrefix = "x86" in {
+  def int_x86_rdpmc : GCCBuiltin<"__builtin_ia32_rdpmc">,
+              Intrinsic<[llvm_i64_ty], [llvm_i32_ty], []>;
+}
+
 //===----------------------------------------------------------------------===//
 // 3DNow!
 


More information about the llvm-commits mailing list