[llvm] r345053 - [Power9] Add __float128 support in the backend for bitcast to a i128

Tue Oct 23 10:11:36 PDT 2018

Author: stefanp
Date: Tue Oct 23 10:11:36 2018
New Revision: 345053

URL: http://llvm.org/viewvc/llvm-project?rev=345053&view=rev
Log:
[Power9] Add __float128 support in the backend for bitcast to a i128

Add support to allow bit-casting from f128 to i128 and then
extracting 64 bits from the result.

Differential Revision: https://reviews.llvm.org/D49507

Added:
    llvm/trunk/test/CodeGen/PowerPC/f128-bitcast.ll
Modified:
    llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
    llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h
    llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td

Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp?rev=345053&r1=345052&r2=345053&view=diff
==============================================================================

--- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp Tue Oct 23 10:11:36 2018
@@ -1070,6 +1070,8 @@ PPCTargetLowering::PPCTargetLowering(con
   setTargetDAGCombine(ISD::ZERO_EXTEND);
   setTargetDAGCombine(ISD::ANY_EXTEND);
 
+  setTargetDAGCombine(ISD::TRUNCATE);
+
   if (Subtarget.useCRBits()) {
     setTargetDAGCombine(ISD::TRUNCATE);
     setTargetDAGCombine(ISD::SETCC);
@@ -9634,6 +9636,9 @@ void PPCTargetLowering::ReplaceNodeResul
       return;
     Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
     return;
+  case ISD::BITCAST:
+    // Don't handle bitcast here.
+    return;
   }
 }
 
@@ -12479,6 +12484,7 @@ SDValue PPCTargetLowering::PerformDAGCom
   case ISD::ANY_EXTEND:
     return DAGCombineExtBoolTrunc(N, DCI);
   case ISD::TRUNCATE:
+    return combineTRUNCATE(N, DCI);
   case ISD::SETCC:
   case ISD::SELECT_CC:
     return DAGCombineTruncBoolExt(N, DCI);
@@ -14253,6 +14259,58 @@ SDValue PPCTargetLowering::combineADD(SD
   return SDValue();
 }
 
+// Detect TRUNCATE operations on bitcasts of float128 values.
+// What we are looking for here is the situtation where we extract a subset
+// of bits from a 128 bit float.
+// This can be of two forms:
+// 1) BITCAST of f128 feeding TRUNCATE
+// 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
+// The reason this is required is because we do not have a legal i128 type
+// and so we want to prevent having to store the f128 and then reload part
+// of it.
+SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
+                                           DAGCombinerInfo &DCI) const {
+  // If we are using CRBits then try that first.
+  if (Subtarget.useCRBits()) {
+    // Check if CRBits did anything and return that if it did.
+    if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
+      return CRTruncValue;
+  }
+
+  SDLoc dl(N);
+  SDValue Op0 = N->getOperand(0);
+
+  // Looking for a truncate of i128 to i64.
+  if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)
+    return SDValue();
+
+  int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
+
+  // SRL feeding TRUNCATE.
+  if (Op0.getOpcode() == ISD::SRL) {
+    ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
+    // The right shift has to be by 64 bits.
+    if (!ConstNode || ConstNode->getZExtValue() != 64)
+      return SDValue();
+
+    // Switch the element number to extract.
+    EltToExtract = EltToExtract ? 0 : 1;
+    // Update Op0 past the SRL.
+    Op0 = Op0.getOperand(0);
+  }
+
+  // BITCAST feeding a TRUNCATE possibly via SRL.
+  if (Op0.getOpcode() == ISD::BITCAST &&
+      Op0.getValueType() == MVT::i128 &&
+      Op0.getOperand(0).getValueType() == MVT::f128) {
+    SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0));
+    return DCI.DAG.getNode(
+        ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast,
+        DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32));
+  }
+  return SDValue();
+}
+
 bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
   // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
   if (!Subtarget.isSVR4ABI() || !Subtarget.isPPC64())

Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h?rev=345053&r1=345052&r2=345053&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h Tue Oct 23 10:11:36 2018
@@ -1093,6 +1093,7 @@ namespace llvm {
     SDValue combineSRA(SDNode *N, DAGCombinerInfo &DCI) const;
     SDValue combineSRL(SDNode *N, DAGCombinerInfo &DCI) const;
     SDValue combineADD(SDNode *N, DAGCombinerInfo &DCI) const;
+    SDValue combineTRUNCATE(SDNode *N, DAGCombinerInfo &DCI) const;
 
     /// ConvertSETCCToSubtract - looks at SETCC that compares ints. It replaces
     /// SETCC with integer subtraction when (1) there is a legal way of doing it

Modified: llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td?rev=345053&r1=345052&r2=345053&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td Tue Oct 23 10:11:36 2018
@@ -1040,6 +1040,15 @@ def : Pat<(v2f64 (bitconvert v1i128:$A))
 def : Pat<(v1i128 (bitconvert v2f64:$A)),
           (COPY_TO_REGCLASS $A, VRRC)>;
 
+def : Pat<(v2i64 (bitconvert f128:$A)),
+          (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v4i32 (bitconvert f128:$A)),
+          (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v8i16 (bitconvert f128:$A)),
+          (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v16i8 (bitconvert f128:$A)),
+          (COPY_TO_REGCLASS $A, VRRC)>;
+
 def : Pat<(v2f64 (PPCsvec2fp v4i32:$C, 0)),
           (v2f64 (XVCVSXWDP (v2i64 (XXMRGHW $C, $C))))>;
 def : Pat<(v2f64 (PPCsvec2fp v4i32:$C, 1)),

Added: llvm/trunk/test/CodeGen/PowerPC/f128-bitcast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/f128-bitcast.ll?rev=345053&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/f128-bitcast.ll (added)
+++ llvm/trunk/test/CodeGen/PowerPC/f128-bitcast.ll Tue Oct 23 10:11:36 2018
@@ -0,0 +1,53 @@
+; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown \
+; RUN:   -enable-ppc-quad-precision -verify-machineinstrs \
+; RUN:   -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | FileCheck %s
+; RUN: llc -mcpu=pwr9 -mtriple=powerpc64-unknown-unknown \
+; RUN:   -enable-ppc-quad-precision -verify-machineinstrs \
+; RUN:   -ppc-asm-full-reg-names \
+; RUN:   -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE
+
+; Function Attrs: norecurse nounwind readnone
+define i64 @getPart1(fp128 %in) local_unnamed_addr {
+entry:
+  %0 = bitcast fp128 %in to i128
+  %a.sroa.0.0.extract.trunc = trunc i128 %0 to i64
+  ret i64 %a.sroa.0.0.extract.trunc
+; CHECK-LABEL: getPart1
+; CHECK:       mfvsrld r3, v2
+; CHECK-NEXT:  blr
+; CHECK-BE-LABEL: getPart1
+; CHECK-BE:       mfvsrld r3, v2
+; CHECK-BE-NEXT:  blr
+}
+
+; Function Attrs: norecurse nounwind readnone
+define i64 @getPart2(fp128 %in) local_unnamed_addr {
+entry:
+  %0 = bitcast fp128 %in to i128
+  %a.sroa.0.8.extract.shift = lshr i128 %0, 64
+  %a.sroa.0.8.extract.trunc = trunc i128 %a.sroa.0.8.extract.shift to i64
+  ret i64 %a.sroa.0.8.extract.trunc
+; CHECK-LABEL: getPart2
+; CHECK:       mfvsrd r3, v2
+; CHECK-NEXT:  blr
+; CHECK-BE-LABEL: getPart2
+; CHECK-BE:       mfvsrd r3, v2
+; CHECK-BE-NEXT:  blr
+}
+
+; Function Attrs: norecurse nounwind readnone
+define i64 @checkBitcast(fp128 %in, <2 x i64> %in2, <2 x i64> *%out) local_unnamed_addr {
+entry:
+  %0 = bitcast fp128 %in to <2 x i64>
+  %1 = extractelement <2 x i64> %0, i64 0
+  %2 = add <2 x i64> %0, %in2
+  store <2 x i64> %2, <2 x i64> *%out, align 16
+  ret i64 %1
+; CHECK-LABEL: checkBitcast
+; CHECK:       mfvsrld r3, v2
+; CHECK:       blr
+; CHECK-BE-LABEL: checkBitcast
+; CHECK-BE:       mfvsrd r3, v2
+; CHECK-BE:       blr
+}
+