[PATCH] [PowerPC] Add Hardware Transaction Memory builtins support

Adhemerval Zanella azanella at linux.vnet.ibm.com
Fri Feb 20 05:22:08 PST 2015


This patch adds Hardware Transaction Memory (HTM) support supported by
ISA 2.07 (POWER8).  The intrinsic support is based on GCC one [1], but
currently only the 'PowerPC HTM Low Level Built-in Function' are
implemented.

The HTM instructions follows the RC ones and the transaction initiation
result is set on RC0 (with exception of tcheck).  Currently approach is
to create a register copy from CR0 to GPR and comapring.  Although this
is suboptimal, since the branch could be taken directly by comparing
the CR0 value, it generates code correctly on both test and branch and
just return value.  A possible future optimization could be elimitate
the MFCR instruction to branch directly.

The HTM usage requires a recently newer kernel with PPC HTM enabled.
Tested on powerpc64 and powerpc64le.

This is send along a clang patch to enabled the builtins and option
switch.

[1] https://gcc.gnu.org/onlinedocs/gcc/PowerPC-Hardware-Transactional-Memory-Built-in-Functions.html
---
 include/llvm/IR/IntrinsicsPowerPC.td               |  60 +++++++
 lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp      |   2 +
 .../PowerPC/Disassembler/PPCDisassembler.cpp       |   6 +
 lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp  |   7 +
 lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h    |   1 +
 lib/Target/PowerPC/PPC.td                          |   4 +-
 lib/Target/PowerPC/PPCISelLowering.cpp             |   2 +-
 lib/Target/PowerPC/PPCInstr64Bit.td                |   6 +
 lib/Target/PowerPC/PPCInstrFormats.td              |  54 ++++++
 lib/Target/PowerPC/PPCInstrHTM.td                  | 183 +++++++++++++++++++++
 lib/Target/PowerPC/PPCInstrInfo.cpp                |  53 ++++++
 lib/Target/PowerPC/PPCInstrInfo.td                 |  22 +++
 lib/Target/PowerPC/PPCRegisterInfo.cpp             |  31 ----
 lib/Target/PowerPC/PPCRegisterInfo.h               |  33 ++++
 lib/Target/PowerPC/PPCRegisterInfo.td              |   2 +
 lib/Target/PowerPC/PPCSubtarget.cpp                |   1 +
 lib/Target/PowerPC/PPCSubtarget.h                  |   2 +
 test/CodeGen/PowerPC/htm.ll                        | 127 ++++++++++++++
 test/MC/PowerPC/htm.s                              |  53 ++++++
 19 files changed, 616 insertions(+), 33 deletions(-)
 create mode 100644 lib/Target/PowerPC/PPCInstrHTM.td
 create mode 100644 test/CodeGen/PowerPC/htm.ll
 create mode 100644 test/MC/PowerPC/htm.s

diff --git a/include/llvm/IR/IntrinsicsPowerPC.td b/include/llvm/IR/IntrinsicsPowerPC.td
index 5cdabde..d825460 100644
--- a/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/include/llvm/IR/IntrinsicsPowerPC.td
@@ -542,3 +542,63 @@ def int_ppc_vsx_xsmindp : PowerPC_VSX_Sca_DDD_Intrinsic<"xsmindp">;
 def int_ppc_vsx_xvdivdp : PowerPC_VSX_Vec_DDD_Intrinsic<"xvdivdp">;
 def int_ppc_vsx_xvdivsp : PowerPC_VSX_Vec_FFF_Intrinsic<"xvdivsp">;
 }
+
+//===----------------------------------------------------------------------===//
+// PowerPC HTM Intrinsic Definitions.
+
+let TargetPrefix = "ppc" in {  // All intrinsics start with "llvm.ppc.".
+
+def int_ppc_tbegin :
+      Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
+def int_ppc_tend :
+      Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
+
+def int_ppc_tabort : GCCBuiltin<"__builtin_tabort">,
+      Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
+def int_ppc_tabortwc :
+      Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>;
+def int_ppc_tabortwci :
+      Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>;
+def int_ppc_tabortdc :
+      Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>;
+def int_ppc_tabortdci :
+      Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>;
+
+def int_ppc_tcheck : GCCBuiltin<"__builtin_tcheck">,
+      Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
+def int_ppc_treclaim : GCCBuiltin<"__builtin_treclaim">,
+      Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
+def int_ppc_trechkpt : GCCBuiltin<"__builtin_trechkpt">,
+      Intrinsic<[llvm_i32_ty], [], []>;
+def int_ppc_tsr :
+      Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
+
+def int_ppc_get_texasr : GCCBuiltin<"__builtin_get_texasr">,
+      Intrinsic<[llvm_i64_ty], [], []>;
+def int_ppc_get_texasru : GCCBuiltin<"__builtin_get_texasru">,
+      Intrinsic<[llvm_i64_ty], [], []>;
+def int_ppc_get_tfhar : GCCBuiltin<"__builtin_get_tfhar">,
+      Intrinsic<[llvm_i64_ty], [], []>;
+def int_ppc_get_tfiar : GCCBuiltin<"__builtin_get_tfiar">,
+      Intrinsic<[llvm_i64_ty], [], []>;
+
+def int_ppc_set_texasr : GCCBuiltin<"__builtin_set_texasr">,
+      Intrinsic<[], [llvm_i64_ty], []>;
+def int_ppc_set_texasru : GCCBuiltin<"__builtin_set_texasru">,
+      Intrinsic<[], [llvm_i64_ty], []>;
+def int_ppc_set_tfhar : GCCBuiltin<"__builtin_set_tfhar">,
+      Intrinsic<[], [llvm_i64_ty], []>;
+def int_ppc_set_tfiar : GCCBuiltin<"__builtin_set_tfiar">,
+      Intrinsic<[], [llvm_i64_ty], []>;
+
+// Extended mnemonics
+def int_ppc_tendall : GCCBuiltin<"__builtin_tendall">,
+      Intrinsic<[llvm_i32_ty], [], []>;
+def int_ppc_tresume : GCCBuiltin<"__builtin_tresume">,
+      Intrinsic<[llvm_i32_ty], [], []>;
+def int_ppc_tsuspend : GCCBuiltin<"__builtin_tsuspend">,
+      Intrinsic<[llvm_i32_ty], [], []>;
+
+def int_ppc_ttest : GCCBuiltin<"__builtin_ttest">,
+      Intrinsic<[llvm_i64_ty], [], []>;
+}
diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
index cd36e58..b0d3a57 100644
--- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -415,7 +415,9 @@ public:
 
   bool isToken() const override { return Kind == Token; }
   bool isImm() const override { return Kind == Immediate || Kind == Expression; }
+  bool isU1Imm() const { return Kind == Immediate && isUInt<1>(getImm()); }
   bool isU2Imm() const { return Kind == Immediate && isUInt<2>(getImm()); }
+  bool isU3Imm() const { return Kind == Immediate && isUInt<3>(getImm()); }
   bool isU4Imm() const { return Kind == Immediate && isUInt<4>(getImm()); }
   bool isU5Imm() const { return Kind == Immediate && isUInt<5>(getImm()); }
   bool isS5Imm() const { return Kind == Immediate && isInt<5>(getImm()); }
diff --git a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
index 5251b60..b0b3b84 100644
--- a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
+++ b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
@@ -178,6 +178,12 @@ static DecodeStatus DecodeCRRCRegisterClass(MCInst &Inst, uint64_t RegNo,
   return decodeRegisterClass(Inst, RegNo, CRRegs);
 }
 
+static DecodeStatus DecodeCRRC0RegisterClass(MCInst &Inst, uint64_t RegNo,
+                                            uint64_t Address,
+                                            const void *Decoder) {
+  return decodeRegisterClass(Inst, RegNo, CRRegs);
+}
+
 static DecodeStatus DecodeCRBITRCRegisterClass(MCInst &Inst, uint64_t RegNo,
                                             uint64_t Address,
                                             const void *Decoder) {
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
index 670c40a..1726b8a 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -208,6 +208,13 @@ void PPCInstPrinter::printU2ImmOperand(const MCInst *MI, unsigned OpNo,
   O << (unsigned int)Value;
 }
 
+void PPCInstPrinter::printU3ImmOperand(const MCInst *MI, unsigned OpNo,
+                                       raw_ostream &O) {
+  unsigned int Value = MI->getOperand(OpNo).getImm();
+  assert(Value <= 8 && "Invalid u3imm argument!");
+  O << (unsigned int)Value;
+}
+
 void PPCInstPrinter::printU4ImmOperand(const MCInst *MI, unsigned OpNo,
                                        raw_ostream &O) {
   unsigned int Value = MI->getOperand(OpNo).getImm();
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
index b21aa22..2ecacb6 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
@@ -44,6 +44,7 @@ public:
                              raw_ostream &O, const char *Modifier = nullptr);
 
   void printU2ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printU3ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printU4ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printS5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printU5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td
index f53add5..622f142 100644
--- a/lib/Target/PowerPC/PPC.td
+++ b/lib/Target/PowerPC/PPC.td
@@ -115,6 +115,8 @@ def FeatureP8Altivec : SubtargetFeature<"power8-altivec", "HasP8Altivec", "true"
 def FeatureP8Vector  : SubtargetFeature<"power8-vector", "HasP8Vector", "true",
                                         "Enable POWER8 vector instructions",
                                         [FeatureVSX, FeatureP8Altivec]>;
+def FeatureHTM : SubtargetFeature<"htm", "HasHTM", "true",
+                                  "Enable Hardware Transaction Memory instructions">;
 
 def FeatureInvariantFunctionDescriptors :
   SubtargetFeature<"invariant-function-descriptors",
@@ -256,7 +258,7 @@ def ProcessorFeatures {
         [DirectivePwr8, FeatureAltivec, FeatureP8Altivec, FeatureVSX, 
         FeatureP8Vector, FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, 
         FeatureFRE, FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
-        FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX,
+        FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, FeatureHTM,
         FeatureFPRND, FeatureFPCVT, FeatureISEL,
         FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX,
         Feature64Bit /*, Feature64BitRegs */, FeatureICBT,
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 1258d96..e9300e7 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -7848,7 +7848,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
     BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY),
             MI->getOperand(0).getReg())
       .addReg(isEQ ? PPC::CR0EQ : PPC::CR0GT);
-  } else {
+  } else  {
     llvm_unreachable("Unexpected instr type to insert");
   }
 
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index 449c8e3..8bfe4d8 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -325,6 +325,12 @@ let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in {
                           Requires<[In64BitMode]>;
 }
 
+def MFSPR8 : XFXForm_1<31, 339, (outs g8rc:$RT), (ins i32imm:$SPR),
+                       "mfspr $RT, $SPR", IIC_SprMFSPR>;
+def MTSPR8 : XFXForm_1<31, 467, (outs), (ins i32imm:$SPR, g8rc:$RT),
+                       "mtspr $SPR, $RT", IIC_SprMTSPR>;
+
+
 //===----------------------------------------------------------------------===//
 // 64-bit SPR manipulation instrs.
 
diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td
index 0410b1c..4d2298a 100644
--- a/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/lib/Target/PowerPC/PPCInstrFormats.td
@@ -652,6 +652,60 @@ class XForm_16b<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
   let A = 0;
 }
 
+class XForm_htm0<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+                 string asmstr, InstrItinClass itin, list<dag> pattern>
+  : I<opcode, OOL, IOL, asmstr, itin> {
+  bit R;
+
+  bit RC = 1;
+
+  let Inst{6-9}   = 0;
+  let Inst{10}    = R;
+  let Inst{11-20} = 0;
+  let Inst{21-30} = xo;
+  let Inst{31}    = RC;
+}
+
+class XForm_htm1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+                 string asmstr, InstrItinClass itin, list<dag> pattern>
+  : I<opcode, OOL, IOL, asmstr, itin> {
+  bit A;
+
+  bit RC = 1;
+
+  let Inst{6}     = A;
+  let Inst{7-20}  = 0;
+  let Inst{21-30} = xo;
+  let Inst{31}    = RC;
+}
+
+class XForm_htm2<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+              InstrItinClass itin, list<dag> pattern>
+  : I<opcode, OOL, IOL, asmstr, itin> {
+  bit L;
+
+  bit RC = 0;    // set by isDOT
+
+  let Inst{7-9}   = 0;
+  let Inst{10}    = L;
+  let Inst{11-20} = 0;
+  let Inst{21-30} = xo;
+  let Inst{31}    = RC;
+}
+
+class XForm_htm3<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+              InstrItinClass itin, list<dag> pattern>
+  : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<3> BF;
+
+  bit RC = 0;
+
+  let Inst{6-8}   = BF;
+  let Inst{9-20}  = 0;
+  let Inst{21-30} = xo;
+  let Inst{31}    = RC;
+}
+
 // XX*-Form (VSX)
 class XX1Form<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, 
               InstrItinClass itin, list<dag> pattern>
diff --git a/lib/Target/PowerPC/PPCInstrHTM.td b/lib/Target/PowerPC/PPCInstrHTM.td
new file mode 100644
index 0000000..e403c66
--- /dev/null
+++ b/lib/Target/PowerPC/PPCInstrHTM.td
@@ -0,0 +1,183 @@
+//===-- PPCInstrHTM.td - The PowerPC Hardware Transactional Memory  -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hardware Transactional Memory extension to the i
+// PowerPC instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+
+
+def HasHTM : Predicate<"PPCSubTarget->hasHTM()">;
+
+def HTM_get_imm : SDNodeXForm<imm, [{
+  return getI32Imm (N->getZExtValue());
+}]>;
+
+let Predicates = [HasHTM] in {
+
+def TBEGIN : XForm_htm0 <31, 654,
+                         (outs crrc0:$ret), (ins u1imm:$R), "tbegin. $R", IIC_SprMTSPR, []>;
+
+def TEND : XForm_htm1 <31, 686,
+                       (outs crrc0:$ret), (ins u1imm:$A), "tend. $A", IIC_SprMTSPR, []>;
+
+def TABORT : XForm_base_r3xo <31, 910,
+                              (outs crrc0:$ret), (ins gprc:$A), "tabort. $A", IIC_SprMTSPR,
+                              []>, isDOT {
+  let RST = 0;
+  let B = 0;
+}
+
+def TABORTWC : XForm_base_r3xo <31, 782,
+                                (outs crrc0:$ret), (ins u5imm:$RTS, gprc:$A, gprc:$B),
+                                "tabortwc. $RTS, $A, $B", IIC_SprMTSPR, []>,
+                                isDOT;
+
+def TABORTWCI : XForm_base_r3xo <31, 846,
+                                 (outs crrc0:$ret), (ins u5imm:$RTS, gprc:$A, u5imm:$B),
+                                 "tabortwci. $RTS, $A, $B", IIC_SprMTSPR, []>,
+                                 isDOT;
+
+def TABORTDC : XForm_base_r3xo <31, 814,
+                                (outs crrc0:$ret), (ins u5imm:$RTS, gprc:$A, gprc:$B),
+                                "tabortdc. $RTS, $A, $B", IIC_SprMTSPR, []>,
+                                isDOT;
+
+def TABORTDCI : XForm_base_r3xo <31, 878,
+                                 (outs crrc0:$ret), (ins u5imm:$RTS, gprc:$A, u5imm:$B),
+                                 "tabortdci. $RTS, $A, $B", IIC_SprMTSPR, []>,
+                                 isDOT;
+
+def TSR : XForm_htm2 <31, 750,
+                      (outs crrc0:$ret), (ins u1imm:$L), "tsr. $L", IIC_SprMTSPR, []>,
+                      isDOT;
+
+def TCHECK : XForm_htm3 <31, 718,
+                        (outs crrc:$ret), (ins u3imm:$BF), "tcheck $BF", IIC_SprMTSPR, []>;
+
+
+def TRECLAIM : XForm_base_r3xo <31, 942,
+                                (outs crrc:$ret), (ins gprc:$A), "treclaim. $A",
+                                IIC_SprMTSPR, []>,
+                                isDOT {
+  let RST = 0;
+  let B = 0;
+}
+
+def TRECHKPT : XForm_base_r3xo <31, 1006,
+                                (outs crrc:$ret), (ins), "trechkpt.", IIC_SprMTSPR, []>,
+                                isDOT {
+  let RST = 0;
+  let A = 0;
+  let B = 0;
+}
+
+// Builtins
+
+// All HTM instructions, with exception os tcheck, sets bit EQ in CR0 as 0 in case of
+// a success.  So the XORI pattern is 'flip' the bit to return 1 as a success.
+def : Pat<(int_ppc_tbegin i32:$R),
+           (XORI
+             (EXTRACT_SUBREG (
+               TBEGIN (HTM_get_imm imm:$R)), sub_eq),
+            1)>;
+
+def : Pat<(int_ppc_tend i32:$R),
+           (XORI
+             (EXTRACT_SUBREG (
+               TEND (HTM_get_imm imm:$R)), sub_eq),
+            1)>;
+
+
+def : Pat<(int_ppc_tabort i32:$R),
+           (XORI
+            (EXTRACT_SUBREG (
+              TABORT $R), sub_eq),
+           1)>;
+
+def : Pat<(int_ppc_tabortwc i32:$TO, i32:$RA, i32:$RB),
+           (XORI
+            (EXTRACT_SUBREG (
+              TABORTWC (HTM_get_imm imm:$TO), $RA, $RB), sub_eq),
+           1)>;
+
+def : Pat<(int_ppc_tabortwci i32:$TO, i32:$RA, i32:$SI),
+           (XORI
+            (EXTRACT_SUBREG (
+              TABORTWCI (HTM_get_imm imm:$TO), $RA, (HTM_get_imm imm:$SI)),
+             sub_eq),
+           1)>;
+
+def : Pat<(int_ppc_tabortdc i32:$TO, i32:$RA, i32:$RB),
+           (XORI
+            (EXTRACT_SUBREG (
+              TABORTDC (HTM_get_imm imm:$TO), $RA, $RB),
+             sub_eq),
+           1)>;
+
+def : Pat<(int_ppc_tabortdci i32:$TO, i32:$RA, i32:$SI),
+           (XORI
+            (EXTRACT_SUBREG (
+              TABORTDCI (HTM_get_imm imm:$TO), $RA, (HTM_get_imm imm:$SI)),
+             sub_eq),
+           1)>;
+
+def : Pat<(int_ppc_tcheck i32:$BF),
+          (TCHECK (HTM_get_imm imm:$BF))>;
+
+def : Pat<(int_ppc_treclaim i32:$RA),
+          (TRECLAIM $RA)>;
+
+def : Pat<(int_ppc_trechkpt),
+          (TRECHKPT)>;
+
+def : Pat<(int_ppc_tsr i32:$L),
+          (TSR (HTM_get_imm imm:$L))>;
+
+def : Pat<(int_ppc_get_texasr),
+          (MFSPR8 130)>;
+
+def : Pat<(int_ppc_get_texasru),
+          (MFSPR8 131)>;
+
+def : Pat<(int_ppc_get_tfhar),
+          (MFSPR8 128)>;
+
+def : Pat<(int_ppc_get_tfiar),
+          (MFSPR8 129)>;
+
+
+def : Pat<(int_ppc_set_texasr i64:$V),
+          (MTSPR8 130, $V)>;
+
+def : Pat<(int_ppc_set_texasru i64:$V),
+          (MTSPR8 131, $V)>;
+
+def : Pat<(int_ppc_set_tfhar i64:$V),
+          (MTSPR8 128, $V)>;
+
+def : Pat<(int_ppc_set_tfiar i64:$V),
+          (MTSPR8 129, $V)>;
+
+
+// Extended mnemonics
+def : Pat<(int_ppc_tendall),
+          (TEND 1)>;
+
+def : Pat<(int_ppc_tresume),
+          (TSR 1)>;
+
+def : Pat<(int_ppc_tsuspend),
+          (TSR 0)>;
+
+def : Pat<(i64 (int_ppc_ttest)),
+          (RLDICL (i64 (COPY (TABORTWCI 0, ZERO, 0))), 36, 28)>;
+
+} // [HasHTM]
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index d1c60a2..7e72d17 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -691,6 +691,33 @@ void PPCInstrInfo::insertSelect(MachineBasicBlock &MBB,
     .addReg(Cond[1].getReg(), 0, SubIdx);
 }
 
+static unsigned getCRBitValue(unsigned CRBit) {
+  unsigned Ret = 4;
+  if (CRBit == PPC::CR0LT || CRBit == PPC::CR1LT ||
+      CRBit == PPC::CR2LT || CRBit == PPC::CR3LT ||
+      CRBit == PPC::CR4LT || CRBit == PPC::CR5LT ||
+      CRBit == PPC::CR6LT || CRBit == PPC::CR7LT)
+    Ret = 3;
+  if (CRBit == PPC::CR0GT || CRBit == PPC::CR1GT ||
+      CRBit == PPC::CR2GT || CRBit == PPC::CR3GT ||
+      CRBit == PPC::CR4GT || CRBit == PPC::CR5GT ||
+      CRBit == PPC::CR6GT || CRBit == PPC::CR7GT)
+    Ret = 2;
+  if (CRBit == PPC::CR0EQ || CRBit == PPC::CR1EQ ||
+      CRBit == PPC::CR2EQ || CRBit == PPC::CR3EQ ||
+      CRBit == PPC::CR4EQ || CRBit == PPC::CR5EQ ||
+      CRBit == PPC::CR6EQ || CRBit == PPC::CR7EQ)
+    Ret = 1;
+  if (CRBit == PPC::CR0UN || CRBit == PPC::CR1UN ||
+      CRBit == PPC::CR2UN || CRBit == PPC::CR3UN ||
+      CRBit == PPC::CR4UN || CRBit == PPC::CR5UN ||
+      CRBit == PPC::CR6UN || CRBit == PPC::CR7UN)
+    Ret = 0;
+
+  assert(Ret != 4 && "Invalid CR bit register");
+  return Ret;
+}
+
 void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
                                MachineBasicBlock::iterator I, DebugLoc DL,
                                unsigned DestReg, unsigned SrcReg,
@@ -736,6 +763,32 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     SrcReg = SuperReg;
   }
 
+  // Different class register copy
+  if (PPC::CRBITRCRegClass.contains(SrcReg) &&
+      PPC::GPRCRegClass.contains(DestReg)) {
+    unsigned CRReg = getCRFromCRBit(SrcReg);
+    BuildMI(MBB, I, DL, get(PPC::MFOCRF), DestReg)
+       .addReg(CRReg), getKillRegState(KillSrc);
+    // Rotate the CR bit in the CR fields to be the least significant bit and
+    // then mask with 0x1 (MB = ME = 31).
+    BuildMI(MBB, I, DL, get(PPC::RLWINM), DestReg)
+       .addReg(DestReg, RegState::Kill)
+       .addImm(TRI->getEncodingValue(CRReg) * 4 + (4 - getCRBitValue(SrcReg)))
+       .addImm(31)
+       .addImm(31);
+    return;
+  } else if (PPC::CRRCRegClass.contains(SrcReg) &&
+      PPC::G8RCRegClass.contains(DestReg)) {
+    BuildMI(MBB, I, DL, get(PPC::MFOCRF8), DestReg)
+       .addReg(SrcReg), getKillRegState(KillSrc);
+    return;
+  } else if (PPC::CRRCRegClass.contains(SrcReg) &&
+      PPC::GPRCRegClass.contains(DestReg)) {
+    BuildMI(MBB, I, DL, get(PPC::MFOCRF), DestReg)
+       .addReg(SrcReg), getKillRegState(KillSrc);
+    return;
+   }
+
   unsigned Opc;
   if (PPC::GPRCRegClass.contains(DestReg, SrcReg))
     Opc = PPC::OR;
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 4e3980d..bd9e5c1 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -419,6 +419,18 @@ def PPCRegCRRCAsmOperand : AsmOperandClass {
 def crrc : RegisterOperand<CRRC> {
   let ParserMatchClass = PPCRegCRRCAsmOperand;
 }
+def crrc0 : RegisterOperand<CRRC0> {
+  let ParserMatchClass = PPCRegCRRCAsmOperand;
+}
+
+def PPCU1ImmAsmOperand : AsmOperandClass {
+  let Name = "U1Imm"; let PredicateMethod = "isU1Imm";
+  let RenderMethod = "addImmOperands";
+}
+def u1imm   : Operand<i32> {
+  let PrintMethod = "printU2ImmOperand";
+  let ParserMatchClass = PPCU1ImmAsmOperand;
+}
 
 def PPCU2ImmAsmOperand : AsmOperandClass {
   let Name = "U2Imm"; let PredicateMethod = "isU2Imm";
@@ -429,6 +441,15 @@ def u2imm   : Operand<i32> {
   let ParserMatchClass = PPCU2ImmAsmOperand;
 }
 
+def PPCU3ImmAsmOperand : AsmOperandClass {
+  let Name = "U3Imm"; let PredicateMethod = "isU3Imm";
+  let RenderMethod = "addImmOperands";
+}
+def u3imm   : Operand<i32> {
+  let PrintMethod = "printU3ImmOperand";
+  let ParserMatchClass = PPCU3ImmAsmOperand;
+}
+
 def PPCU4ImmAsmOperand : AsmOperandClass {
   let Name = "U4Imm"; let PredicateMethod = "isU4Imm";
   let RenderMethod = "addImmOperands";
@@ -2643,6 +2664,7 @@ include "PPCInstrAltivec.td"
 include "PPCInstrSPE.td"
 include "PPCInstr64Bit.td"
 include "PPCInstrVSX.td"
+include "PPCInstrHTM.td"
 
 def crnot : OutPatFrag<(ops node:$in),
                        (CRNOR $in, $in)>;
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 41bb11f..85118b3 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -519,37 +519,6 @@ void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II,
   MBB.erase(II);
 }
 
-static unsigned getCRFromCRBit(unsigned SrcReg) {
-  unsigned Reg = 0;
-  if (SrcReg == PPC::CR0LT || SrcReg == PPC::CR0GT ||
-      SrcReg == PPC::CR0EQ || SrcReg == PPC::CR0UN)
-    Reg = PPC::CR0;
-  else if (SrcReg == PPC::CR1LT || SrcReg == PPC::CR1GT ||
-           SrcReg == PPC::CR1EQ || SrcReg == PPC::CR1UN)
-    Reg = PPC::CR1;
-  else if (SrcReg == PPC::CR2LT || SrcReg == PPC::CR2GT ||
-           SrcReg == PPC::CR2EQ || SrcReg == PPC::CR2UN)
-    Reg = PPC::CR2;
-  else if (SrcReg == PPC::CR3LT || SrcReg == PPC::CR3GT ||
-           SrcReg == PPC::CR3EQ || SrcReg == PPC::CR3UN)
-    Reg = PPC::CR3;
-  else if (SrcReg == PPC::CR4LT || SrcReg == PPC::CR4GT ||
-           SrcReg == PPC::CR4EQ || SrcReg == PPC::CR4UN)
-    Reg = PPC::CR4;
-  else if (SrcReg == PPC::CR5LT || SrcReg == PPC::CR5GT ||
-           SrcReg == PPC::CR5EQ || SrcReg == PPC::CR5UN)
-    Reg = PPC::CR5;
-  else if (SrcReg == PPC::CR6LT || SrcReg == PPC::CR6GT ||
-           SrcReg == PPC::CR6EQ || SrcReg == PPC::CR6UN)
-    Reg = PPC::CR6;
-  else if (SrcReg == PPC::CR7LT || SrcReg == PPC::CR7GT ||
-           SrcReg == PPC::CR7EQ || SrcReg == PPC::CR7UN)
-    Reg = PPC::CR7;
-
-  assert(Reg != 0 && "Invalid CR bit register");
-  return Reg;
-}
-
 void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II,
                                          unsigned FrameIndex) const {
   // Get the instruction.
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h
index 4c2ef90..7f840f0 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -26,6 +26,39 @@ class PPCSubtarget;
 class TargetInstrInfo;
 class Type;
 
+
+inline static unsigned getCRFromCRBit(unsigned SrcReg) {
+  unsigned Reg = 0;
+  if (SrcReg == PPC::CR0LT || SrcReg == PPC::CR0GT ||
+      SrcReg == PPC::CR0EQ || SrcReg == PPC::CR0UN)
+    Reg = PPC::CR0;
+  else if (SrcReg == PPC::CR1LT || SrcReg == PPC::CR1GT ||
+           SrcReg == PPC::CR1EQ || SrcReg == PPC::CR1UN)
+    Reg = PPC::CR1;
+  else if (SrcReg == PPC::CR2LT || SrcReg == PPC::CR2GT ||
+           SrcReg == PPC::CR2EQ || SrcReg == PPC::CR2UN)
+    Reg = PPC::CR2;
+  else if (SrcReg == PPC::CR3LT || SrcReg == PPC::CR3GT ||
+           SrcReg == PPC::CR3EQ || SrcReg == PPC::CR3UN)
+    Reg = PPC::CR3;
+  else if (SrcReg == PPC::CR4LT || SrcReg == PPC::CR4GT ||
+           SrcReg == PPC::CR4EQ || SrcReg == PPC::CR4UN)
+    Reg = PPC::CR4;
+  else if (SrcReg == PPC::CR5LT || SrcReg == PPC::CR5GT ||
+           SrcReg == PPC::CR5EQ || SrcReg == PPC::CR5UN)
+    Reg = PPC::CR5;
+  else if (SrcReg == PPC::CR6LT || SrcReg == PPC::CR6GT ||
+           SrcReg == PPC::CR6EQ || SrcReg == PPC::CR6UN)
+    Reg = PPC::CR6;
+  else if (SrcReg == PPC::CR7LT || SrcReg == PPC::CR7GT ||
+           SrcReg == PPC::CR7EQ || SrcReg == PPC::CR7UN)
+    Reg = PPC::CR7;
+
+  assert(Reg != 0 && "Invalid CR bit register");
+  return Reg;
+}
+
+
 class PPCRegisterInfo : public PPCGenRegisterInfo {
   DenseMap<unsigned, unsigned> ImmToIdxMap;
   const PPCSubtarget &Subtarget;
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td
index 572d7c8..cd2b079 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -318,6 +318,8 @@ def CRBITRC : RegisterClass<"PPC", [i1], 32,
 def CRRC : RegisterClass<"PPC", [i32], 32, (add CR0, CR1, CR5, CR6,
                                                 CR7, CR2, CR3, CR4)>;
 
+def CRRC0 : RegisterClass<"PPC", [i32], 32, (add CR0)>;
+
 // The CTR registers are not allocatable because they're used by the
 // decrement-and-branch instructions, and thus need to stay live across
 // multiple basic blocks.
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp
index 8d3d5c4..b6cdace 100644
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -90,6 +90,7 @@ void PPCSubtarget::initializeEnvironment() {
   HasLazyResolverStubs = false;
   HasICBT = false;
   HasInvariantFunctionDescriptors = false;
+  HasHTM = false;
 }
 
 void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index 704a226..5abb026 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -113,6 +113,7 @@ protected:
   bool IsLittleEndian;
   bool HasICBT;
   bool HasInvariantFunctionDescriptors;
+  bool HasHTM;
 
   const PPCTargetMachine &TM;
   PPCFrameLowering FrameLowering;
@@ -229,6 +230,7 @@ public:
   bool hasInvariantFunctionDescriptors() const {
     return HasInvariantFunctionDescriptors;
   }
+  bool hasHTM () const { return HasHTM; }
 
   const Triple &getTargetTriple() const { return TargetTriple; }
 
diff --git a/test/CodeGen/PowerPC/htm.ll b/test/CodeGen/PowerPC/htm.ll
new file mode 100644
index 0000000..2ecf837
--- /dev/null
+++ b/test/CodeGen/PowerPC/htm.ll
@@ -0,0 +1,127 @@
+; RUN: llc -mcpu=pwr8 -mattr=+htm < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define zeroext i32 @test1() {
+entry:
+  %0 = tail call i32 @llvm.ppc.tbegin(i32 0)
+  ret i32 %0
+
+; CHECK-LABEL: @test1
+; CHECK: tbegin. 0
+; CHECK: mfocrf  [[REGISTER:[0-9]+]], 128
+; CHECK: rlwinm  [[REGISTER]], [[REGISTER]], 3, 31, 31
+; CHECK: xori    [[REGISTER]], [[REGISTER]], 1
+}
+
+declare i32 @llvm.ppc.tbegin(i32) #1
+
+
+define zeroext i32 @test2() {
+entry:
+  %0 = tail call i32 @llvm.ppc.tend(i32 0)
+  ret i32 %0
+; CHECK-LABEL: @test2
+; CHECK: tend. 0
+; CHECK: mfocrf  [[REGISTER:[0-9]+]], 128
+; CHECK: rlwinm  [[REGISTER]], [[REGISTER]], 3, 31, 31
+; CHECK: xori    [[REGISTER]], [[REGISTER]], 1
+}
+
+declare i32 @llvm.ppc.tend(i32)
+
+
+define void @test3() {
+entry:
+  %0 = tail call i32 @llvm.ppc.tabort(i32 0)
+  %1 = tail call i32 @llvm.ppc.tabortdc(i32 0, i32 1, i32 2)
+  %2 = tail call i32 @llvm.ppc.tabortdci(i32 0, i32 1, i32 2)
+  %3 = tail call i32 @llvm.ppc.tabortwc(i32 0, i32 1, i32 2)
+  %4 = tail call i32 @llvm.ppc.tabortwci(i32 0, i32 1, i32 2)
+  ret void
+; CHECK-LABEL: @test3
+; CHECK: tabort.    [[REG1:[0-9]+]]
+; CHECK: tabortdc.  0, [[REG2:[0-9]+]], [[REG3:[0-9]+]]
+; CHECK: tabortdci. 0, [[REG2]], 2
+; CHECK: tabortwc.  0, [[REG2]], [[REG3]]
+; CHECK: tabortwci. 0, [[REG2]], 2
+}
+
+declare i32 @llvm.ppc.tabort(i32)
+declare i32 @llvm.ppc.tabortdc(i32, i32, i32)
+declare i32 @llvm.ppc.tabortdci(i32, i32, i32)
+declare i32 @llvm.ppc.tabortwc(i32, i32, i32)
+declare i32 @llvm.ppc.tabortwci(i32, i32, i32)
+
+
+define void @test4() {
+entry:
+  %0 = tail call i32 @llvm.ppc.tendall()
+  %1 = tail call i32 @llvm.ppc.tresume()
+  %2 = tail call i32 @llvm.ppc.tsuspend()
+  ret void
+; CHECK-LABEL: @test4
+; CHECK: tend. 1
+; CHECK: tsr.  1
+; CHECK: tsr.  0
+}
+
+declare i32 @llvm.ppc.tendall()
+declare i32 @llvm.ppc.tresume()
+declare i32 @llvm.ppc.tsuspend()
+
+
+define void @test5(i64 %v) {
+entry:
+  tail call void @llvm.ppc.set.texasr(i64 %v)
+  tail call void @llvm.ppc.set.texasru(i64 %v)
+  tail call void @llvm.ppc.set.tfhar(i64 %v)
+  tail call void @llvm.ppc.set.tfiar(i64 %v)
+  ret void
+; CHECK-LABEL: @test5
+; CHECK: mtspr 130, [[REG1:[0-9]+]]
+; CHECK: mtspr 131, [[REG2:[0-9]+]]
+; CHECK: mtspr 128, [[REG3:[0-9]+]]
+; CHECK: mtspr 129, [[REG4:[0-9]+]]
+}
+
+define i64 @test6() {
+entry:
+  %0 = tail call i64 @llvm.ppc.get.texasr()
+  ret i64 %0
+; CHECK-LABEL: @test6
+; CHECK: mfspr [[REG1:[0-9]+]], 130
+}
+
+define i64 @test7() {
+entry:
+  %0 = tail call i64 @llvm.ppc.get.texasru()
+  ret i64 %0
+; CHECK-LABEL: @test7
+; CHECK: mfspr [[REG1:[0-9]+]], 131
+}
+
+define i64 @test8() {
+entry:
+  %0 = tail call i64 @llvm.ppc.get.tfhar()
+  ret i64 %0
+; CHECK-LABEL: @test8
+; CHECK: mfspr [[REG1:[0-9]+]], 128
+}
+
+define i64 @test9() {
+entry:
+  %0 = tail call i64 @llvm.ppc.get.tfiar()
+  ret i64 %0
+; CHECK-LABEL: @test9
+; CHECK: mfspr [[REG1:[0-9]+]], 129
+}
+
+declare void @llvm.ppc.set.texasr(i64)
+declare void @llvm.ppc.set.texasru(i64)
+declare void @llvm.ppc.set.tfhar(i64)
+declare void @llvm.ppc.set.tfiar(i64)
+declare i64 @llvm.ppc.get.texasr()
+declare i64 @llvm.ppc.get.texasru()
+declare i64 @llvm.ppc.get.tfhar()
+declare i64 @llvm.ppc.get.tfiar()
diff --git a/test/MC/PowerPC/htm.s b/test/MC/PowerPC/htm.s
new file mode 100644
index 0000000..f99ff3c
--- /dev/null
+++ b/test/MC/PowerPC/htm.s
@@ -0,0 +1,53 @@
+# RUN: llvm-mc -triple powerpc64-unknown-linux-gnu --show-encoding %s | FileCheck -check-prefix=CHECK-BE %s
+# RUN: llvm-mc -triple powerpc64le-unknown-linux-gnu --show-encoding %s | FileCheck -check-prefix=CHECK-LE %s
+
+# CHECK-BE: tbegin. 0                      # encoding: [0x7c,0x00,0x05,0x1d]
+# CHECK-LE: tbegin. 0                      # encoding: [0x1d,0x05,0x00,0x7c]
+            tbegin. 0
+# CHECK-BE: tbegin. 1                      # encoding: [0x7c,0x20,0x05,0x1d]
+# CHECK-LE: tbegin. 1                      # encoding: [0x1d,0x05,0x20,0x7c]
+            tbegin. 1
+
+# CHECK-BE: tend. 0                        # encoding: [0x7c,0x00,0x05,0x5d]
+# CHECK-LE: tend. 0                        # encoding: [0x5d,0x05,0x00,0x7c]
+            tend. 0
+# CHECK-BE: tend. 1                        # encoding: [0x7e,0x00,0x05,0x5d]
+# CHECK-LE: tend. 1                        # encoding: [0x5d,0x05,0x00,0x7e]
+            tend. 1
+
+# CHECK-BE: tabort. 9                      # encoding: [0x7c,0x09,0x07,0x1d]
+# CHECK-LE: tabort. 9                      # encoding: [0x1d,0x07,0x09,0x7c]
+            tabort. 9
+# CHECK-BE: tabortdc. 0, 9, 9              # encoding: [0x7c,0x09,0x4e,0x5d]
+# CHECK-LE: tabortdc. 0, 9, 9              # encoding: [0x5d,0x4e,0x09,0x7c]
+            tabortdc. 0, 9, 9
+# CHECK-BE: tabortdci. 0, 9, 0             # encoding: [0x7c,0x09,0x06,0xdd]
+# CHECK-LE: tabortdci. 0, 9, 0             # encoding: [0xdd,0x06,0x09,0x7c]
+            tabortdci. 0, 9, 0
+# CHECK-BE: tabortwc. 0, 9, 9              # encoding: [0x7c,0x09,0x4e,0x1d]
+# CHECK-LE: tabortwc. 0, 9, 9              # encoding: [0x1d,0x4e,0x09,0x7c]
+            tabortwc. 0, 9, 9
+# CHECK-BE: tabortwci. 0, 9, 0             # encoding: [0x7c,0x09,0x06,0x9d]
+# CHECK-LE: tabortwci. 0, 9, 0             # encoding: [0x9d,0x06,0x09,0x7c]
+            tabortwci. 0, 9, 0
+
+# CHECK-BE: tsr. 0                         # encoding: [0x7c,0x00,0x05,0xdd]
+# CHECK-LE: tsr. 0                         # encoding: [0xdd,0x05,0x00,0x7c]
+            tsr. 0
+# CHECK-BE: tsr. 1                         # encoding: [0x7c,0x20,0x05,0xdd]
+# CHECK-LE: tsr. 1                         # encoding: [0xdd,0x05,0x20,0x7c]
+            tsr. 1
+
+# CHECK-BE: tcheck 0                       # encoding: [0x7c,0x00,0x05,0x9c]
+# CHECK-LE: tcheck 0                       # encoding: [0x9c,0x05,0x00,0x7c]
+            tcheck 0
+# CHECK-BE: tcheck 3                       # encoding: [0x7d,0x80,0x05,0x9c]
+# CHECK-LE: tcheck 3                       # encoding: [0x9c,0x05,0x80,0x7d]
+            tcheck 3
+
+# CHECK-BE: treclaim. 9                    # encoding: [0x7c,0x09,0x07,0x5d]
+# CHECK-LE: treclaim. 9                    # encoding: [0x5d,0x07,0x09,0x7c]
+            treclaim. 9
+# CHECK-BE: trechkpt.                      # encoding: [0x7c,0x00,0x07,0xdd]
+# CHECK-LE: trechkpt.                      # encoding: [0xdd,0x07,0x00,0x7c]
+            trechkpt.




More information about the llvm-commits mailing list