[llvm] 0bb9a27 - [FPEnv][AArch64] Add lowering and instruction selection for strict conversions

Thu Jan 30 05:50:45 PST 2020

Author: John Brawn
Date: 2020-01-30T13:50:06Z
New Revision: 0bb9a27c9895c0fbc3f55f56ad7f1e1927398fce

URL: https://github.com/llvm/llvm-project/commit/0bb9a27c9895c0fbc3f55f56ad7f1e1927398fce
DIFF: https://github.com/llvm/llvm-project/commit/0bb9a27c9895c0fbc3f55f56ad7f1e1927398fce.diff

LOG: [FPEnv][AArch64] Add lowering and instruction selection for strict conversions

Strict fp-to-int and int-to-fp conversions can be handled in the same way that
the non-strict versions are (by using the appropriate instruction or converting
to a function call when we have no instruction).

Differential Revision: https://reviews.llvm.org/D73625

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/lib/Target/AArch64/AArch64InstrInfo.td
    llvm/test/CodeGen/AArch64/fp-intrinsics.ll
    llvm/test/CodeGen/AArch64/fpconv-vector-op-scalarize-strict.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 205e65a3d47c..400eb9ad5ba7 100644

--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -282,15 +282,27 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
   setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
   setOperationAction(ISD::FP_TO_SINT, MVT::i128, Custom);
+  setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
+  setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
+  setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i128, Custom);
   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
   setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
   setOperationAction(ISD::FP_TO_UINT, MVT::i128, Custom);
+  setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
+  setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom);
+  setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i128, Custom);
   setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
   setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
   setOperationAction(ISD::SINT_TO_FP, MVT::i128, Custom);
+  setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
+  setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
+  setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i128, Custom);
   setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
   setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
   setOperationAction(ISD::UINT_TO_FP, MVT::i128, Custom);
+  setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
+  setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);
+  setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i128, Custom);
   setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
   setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
   setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom);
@@ -2324,9 +2336,16 @@ getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
 
 SDValue AArch64TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG,
                                              RTLIB::Libcall Call) const {
-  SmallVector<SDValue, 2> Ops(Op->op_begin(), Op->op_end());
+  bool IsStrict = Op->isStrictFPOpcode();
+  unsigned Offset = IsStrict ? 1 : 0;
+  SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
+  SmallVector<SDValue, 2> Ops(Op->op_begin() + Offset, Op->op_end());
   MakeLibCallOptions CallOptions;
-  return makeLibCall(DAG, Call, MVT::f128, Ops, CallOptions, SDLoc(Op)).first;
+  SDValue Result;
+  SDLoc dl(Op);
+  std::tie(Result, Chain) = makeLibCall(DAG, Call, Op.getValueType(), Ops,
+                                        CallOptions, dl, Chain);
+  return IsStrict ? DAG.getMergeValues({Result, Chain}, dl) : Result;
 }
 
 // Returns true if the given Op is the overflow flag result of an overflow
@@ -2587,32 +2606,34 @@ SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
 
 SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
                                               SelectionDAG &DAG) const {
-  if (Op.getOperand(0).getValueType().isVector())
+  bool IsStrict = Op->isStrictFPOpcode();
+  SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
+
+  if (SrcVal.getValueType().isVector())
     return LowerVectorFP_TO_INT(Op, DAG);
 
   // f16 conversions are promoted to f32 when full fp16 is not supported.
-  if (Op.getOperand(0).getValueType() == MVT::f16 &&
-      !Subtarget->hasFullFP16()) {
+  if (SrcVal.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) {
+    assert(!IsStrict && "Lowering of strict fp16 not yet implemented");
     SDLoc dl(Op);
     return DAG.getNode(
         Op.getOpcode(), dl, Op.getValueType(),
-        DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Op.getOperand(0)));
+        DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, SrcVal));
   }
 
-  if (Op.getOperand(0).getValueType() != MVT::f128) {
+  if (SrcVal.getValueType() != MVT::f128) {
     // It's legal except when f128 is involved
     return Op;
   }
 
   RTLIB::Libcall LC;
-  if (Op.getOpcode() == ISD::FP_TO_SINT)
-    LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(), Op.getValueType());
+  if (Op.getOpcode() == ISD::FP_TO_SINT ||
+      Op.getOpcode() == ISD::STRICT_FP_TO_SINT)
+    LC = RTLIB::getFPTOSINT(SrcVal.getValueType(), Op.getValueType());
   else
-    LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(), Op.getValueType());
+    LC = RTLIB::getFPTOUINT(SrcVal.getValueType(), Op.getValueType());
 
-  SmallVector<SDValue, 2> Ops(Op->op_begin(), Op->op_end());
-  MakeLibCallOptions CallOptions;
-  return makeLibCall(DAG, LC, Op.getValueType(), Ops, CallOptions, SDLoc(Op)).first;
+  return LowerF128Call(Op, DAG, LC);
 }
 
 static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
@@ -2648,18 +2669,22 @@ SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op,
   if (Op.getValueType().isVector())
     return LowerVectorINT_TO_FP(Op, DAG);
 
+  bool IsStrict = Op->isStrictFPOpcode();
+  SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
+
   // f16 conversions are promoted to f32 when full fp16 is not supported.
   if (Op.getValueType() == MVT::f16 &&
       !Subtarget->hasFullFP16()) {
+    assert(!IsStrict && "Lowering of strict fp16 not yet implemented");
     SDLoc dl(Op);
     return DAG.getNode(
         ISD::FP_ROUND, dl, MVT::f16,
-        DAG.getNode(Op.getOpcode(), dl, MVT::f32, Op.getOperand(0)),
+        DAG.getNode(Op.getOpcode(), dl, MVT::f32, SrcVal),
         DAG.getIntPtrConstant(0, dl));
   }
 
   // i128 conversions are libcalls.
-  if (Op.getOperand(0).getValueType() == MVT::i128)
+  if (SrcVal.getValueType() == MVT::i128)
     return SDValue();
 
   // Other conversions are legal, unless it's to the completely software-based
@@ -2668,10 +2693,11 @@ SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op,
     return Op;
 
   RTLIB::Libcall LC;
-  if (Op.getOpcode() == ISD::SINT_TO_FP)
-    LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType());
+  if (Op.getOpcode() == ISD::SINT_TO_FP ||
+      Op.getOpcode() == ISD::STRICT_SINT_TO_FP)
+    LC = RTLIB::getSINTTOFP(SrcVal.getValueType(), Op.getValueType());
   else
-    LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType());
+    LC = RTLIB::getUINTTOFP(SrcVal.getValueType(), Op.getValueType());
 
   return LowerF128Call(Op, DAG, LC);
 }
@@ -3262,9 +3288,13 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
     return LowerPREFETCH(Op, DAG);
   case ISD::SINT_TO_FP:
   case ISD::UINT_TO_FP:
+  case ISD::STRICT_SINT_TO_FP:
+  case ISD::STRICT_UINT_TO_FP:
     return LowerINT_TO_FP(Op, DAG);
   case ISD::FP_TO_SINT:
   case ISD::FP_TO_UINT:
+  case ISD::STRICT_FP_TO_SINT:
+  case ISD::STRICT_FP_TO_UINT:
     return LowerFP_TO_INT(Op, DAG);
   case ISD::FSINCOS:
     return LowerFSINCOS(Op, DAG);

diff  --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index dbe3a4948cc2..1938c6fbb912 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -3325,10 +3325,10 @@ defm FCVTNS : FPToIntegerUnscaled<0b00, 0b000, "fcvtns", int_aarch64_neon_fcvtns
 defm FCVTNU : FPToIntegerUnscaled<0b00, 0b001, "fcvtnu", int_aarch64_neon_fcvtnu>;
 defm FCVTPS : FPToIntegerUnscaled<0b01, 0b000, "fcvtps", int_aarch64_neon_fcvtps>;
 defm FCVTPU : FPToIntegerUnscaled<0b01, 0b001, "fcvtpu", int_aarch64_neon_fcvtpu>;
-defm FCVTZS : FPToIntegerUnscaled<0b11, 0b000, "fcvtzs", fp_to_sint>;
-defm FCVTZU : FPToIntegerUnscaled<0b11, 0b001, "fcvtzu", fp_to_uint>;
-defm FCVTZS : FPToIntegerScaled<0b11, 0b000, "fcvtzs", fp_to_sint>;
-defm FCVTZU : FPToIntegerScaled<0b11, 0b001, "fcvtzu", fp_to_uint>;
+defm FCVTZS : FPToIntegerUnscaled<0b11, 0b000, "fcvtzs", any_fp_to_sint>;
+defm FCVTZU : FPToIntegerUnscaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>;
+defm FCVTZS : FPToIntegerScaled<0b11, 0b000, "fcvtzs", any_fp_to_sint>;
+defm FCVTZU : FPToIntegerScaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>;
 
 multiclass FPToIntegerIntPats<Intrinsic round, string INST> {
   def : Pat<(i32 (round f16:$Rn)), (!cast<Instruction>(INST # UWHr) $Rn)>;
@@ -3400,8 +3400,8 @@ def : Pat<(i64 (llround f64:$Rn)),
 // Scaled integer to floating point conversion instructions.
 //===----------------------------------------------------------------------===//
 
-defm SCVTF : IntegerToFP<0, "scvtf", sint_to_fp>;
-defm UCVTF : IntegerToFP<1, "ucvtf", uint_to_fp>;
+defm SCVTF : IntegerToFP<0, "scvtf", any_sint_to_fp>;
+defm UCVTF : IntegerToFP<1, "ucvtf", any_uint_to_fp>;
 
 //===----------------------------------------------------------------------===//
 // Unscaled integer to floating point conversion instruction.

diff  --git a/llvm/test/CodeGen/AArch64/fp-intrinsics.ll b/llvm/test/CodeGen/AArch64/fp-intrinsics.ll
index 88cb78841c19..d3e17dcf5ff9 100644
--- a/llvm/test/CodeGen/AArch64/fp-intrinsics.ll
+++ b/llvm/test/CodeGen/AArch64/fp-intrinsics.ll
@@ -2,8 +2,6 @@
 
 ; Check that constrained fp intrinsics are correctly lowered.
 
-; FIXME: We're not generating the right instructions for some of these
-; operations (see further FIXMEs down below).
 
 ; Single-precision intrinsics
 
@@ -57,7 +55,7 @@ define i32 @fptosi_i32_f32(float %x) #0 {
 }
 
 ; CHECK-LABEL: fptoui_i32_f32:
-; FIXME-CHECK: fcvtzu w0, s0
+; CHECK: fcvtzu w0, s0
 define i32 @fptoui_i32_f32(float %x) #0 {
   %val = call i32 @llvm.experimental.constrained.fptoui.i32.f32(float %x, metadata !"fpexcept.strict") #0
   ret i32 %val
@@ -71,29 +69,53 @@ define i64 @fptosi_i64_f32(float %x) #0 {
 }
 
 ; CHECK-LABEL: fptoui_i64_f32:
-; FIXME-CHECK: fcvtzu x0, s0
+; CHECK: fcvtzu x0, s0
 define i64 @fptoui_i64_f32(float %x) #0 {
   %val = call i64 @llvm.experimental.constrained.fptoui.i64.f32(float %x, metadata !"fpexcept.strict") #0
   ret i64 %val
 }
 
 ; CHECK-LABEL: sitofp_f32_i32:
-; FIXME-CHECK: scvtf s0, w0
+; CHECK: scvtf s0, w0
 define float @sitofp_f32_i32(i32 %x) #0 {
   %val = call float @llvm.experimental.constrained.sitofp.f32.i32(i32 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret float %val
 }
 
 ; CHECK-LABEL: uitofp_f32_i32:
-; FIXME-CHECK: ucvtf s0, w0
+; CHECK: ucvtf s0, w0
 define float @uitofp_f32_i32(i32 %x) #0 {
   %val = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret float %val
 }
 
-; TODO: sitofp_f32_i64 (missing STRICT_SINT_TO_FP handling)
+; CHECK-LABEL: sitofp_f32_i64:
+; CHECK: scvtf s0, x0
+define float @sitofp_f32_i64(i64 %x) #0 {
+  %val = call float @llvm.experimental.constrained.sitofp.f32.i64(i64 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  ret float %val
+}
+
+; CHECK-LABEL: uitofp_f32_i64:
+; CHECK: ucvtf s0, x0
+define float @uitofp_f32_i64(i64 %x) #0 {
+  %val = call float @llvm.experimental.constrained.uitofp.f32.i64(i64 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  ret float %val
+}
 
-; TODO: uitofp_f32_i64 (missing STRICT_SINT_TO_FP handling)
+; CHECK-LABEL: sitofp_f32_i128:
+; CHECK: bl __floattisf
+define float @sitofp_f32_i128(i128 %x) #0 {
+  %val = call float @llvm.experimental.constrained.sitofp.f32.i128(i128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  ret float %val
+}
+
+; CHECK-LABEL: uitofp_f32_i128:
+; CHECK: bl __floatuntisf
+define float @uitofp_f32_i128(i128 %x) #0 {
+  %val = call float @llvm.experimental.constrained.uitofp.f32.i128(i128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  ret float %val
+}
 
 ; CHECK-LABEL: sqrt_f32:
 ; CHECK: fsqrt s0, s0
@@ -496,7 +518,7 @@ define i32 @fptosi_i32_f64(double %x) #0 {
 }
 
 ; CHECK-LABEL: fptoui_i32_f64:
-; FIXME-CHECK: fcvtzu w0, d0
+; CHECK: fcvtzu w0, d0
 define i32 @fptoui_i32_f64(double %x) #0 {
   %val = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double %x, metadata !"fpexcept.strict") #0
   ret i32 %val
@@ -510,35 +532,54 @@ define i64 @fptosi_i64_f64(double %x) #0 {
 }
 
 ; CHECK-LABEL: fptoui_i64_f64:
-; FIXME-CHECK: fcvtzu x0, d0
+; CHECK: fcvtzu x0, d0
 define i64 @fptoui_i64_f64(double %x) #0 {
   %val = call i64 @llvm.experimental.constrained.fptoui.i64.f64(double %x, metadata !"fpexcept.strict") #0
   ret i64 %val
 }
 
 ; CHECK-LABEL: sitofp_f64_i32:
-; FIXME-CHECK: scvtf d0, w0
+; CHECK: scvtf d0, w0
 define double @sitofp_f64_i32(i32 %x) #0 {
   %val = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret double %val
 }
 
 ; CHECK-LABEL: uitofp_f64_i32:
-; FIXME-CHECK: ucvtf d0, w0
+; CHECK: ucvtf d0, w0
 define double @uitofp_f64_i32(i32 %x) #0 {
   %val = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret double %val
 }
 
-; TODO sitofp_f64_i64 (missing STRICT_SINT_TO_FP handling)
+; CHECK-LABEL: sitofp_f64_i64:
+; CHECK: scvtf d0, x0
+define double @sitofp_f64_i64(i64 %x) #0 {
+  %val = call double @llvm.experimental.constrained.sitofp.f64.i64(i64 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  ret double %val
+}
 
 ; CHECK-LABEL: uitofp_f64_i64:
-; FIXME-CHECK: ucvtf d0, x0
+; CHECK: ucvtf d0, x0
 define double @uitofp_f64_i64(i64 %x) #0 {
   %val = call double @llvm.experimental.constrained.uitofp.f64.i64(i64 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret double %val
 }
 
+; CHECK-LABEL: sitofp_f64_i128:
+; CHECK: bl __floattidf
+define double @sitofp_f64_i128(i128 %x) #0 {
+  %val = call double @llvm.experimental.constrained.sitofp.f64.i128(i128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  ret double %val
+}
+
+; CHECK-LABEL: uitofp_f64_i128:
+; CHECK: bl __floatuntidf
+define double @uitofp_f64_i128(i128 %x) #0 {
+  %val = call double @llvm.experimental.constrained.uitofp.f64.i128(i128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  ret double %val
+}
+
 ; CHECK-LABEL: sqrt_f64:
 ; CHECK: fsqrt d0, d0
 define double @sqrt_f64(double %x) #0 {
@@ -888,22 +929,321 @@ define i32 @fcmps_une_f64(double %a, double %b) #0 {
 }
 
 
-; Single/Double conversion intrinsics
+; Long-double-precision intrinsics
+
+; CHECK-LABEL: add_f128:
+; CHECK: bl __addtf3
+define fp128 @add_f128(fp128 %x, fp128 %y) #0 {
+  %val = call fp128 @llvm.experimental.constrained.fadd.f128(fp128 %x, fp128 %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  ret fp128 %val
+}
+
+; CHECK-LABEL: sub_f128:
+; CHECK: bl __subtf3
+define fp128 @sub_f128(fp128 %x, fp128 %y) #0 {
+  %val = call fp128 @llvm.experimental.constrained.fsub.f128(fp128 %x, fp128 %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  ret fp128 %val
+}
+
+; CHECK-LABEL: mul_f128:
+; CHECK: bl __multf3
+define fp128 @mul_f128(fp128 %x, fp128 %y) #0 {
+  %val = call fp128 @llvm.experimental.constrained.fmul.f128(fp128 %x, fp128 %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  ret fp128 %val
+}
+
+; CHECK-LABEL: div_f128:
+; CHECK: bl __divtf3
+define fp128 @div_f128(fp128 %x, fp128 %y) #0 {
+  %val = call fp128 @llvm.experimental.constrained.fdiv.f128(fp128 %x, fp128 %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  ret fp128 %val
+}
+
+; CHECK-LABEL: frem_f128:
+; CHECK: bl fmodl
+define fp128 @frem_f128(fp128 %x, fp128 %y) #0 {
+  %val = call fp128 @llvm.experimental.constrained.frem.f128(fp128 %x, fp128 %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  ret fp128 %val
+}
+
+; CHECK-LABEL: fma_f128:
+; CHECK: fmal
+define fp128 @fma_f128(fp128 %x, fp128 %y, fp128 %z) #0 {
+  %val = call fp128 @llvm.experimental.constrained.fma.f128(fp128 %x, fp128 %y, fp128 %z, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  ret fp128 %val
+}
+
+; CHECK-LABEL: fptosi_i32_f128:
+; CHECK: bl __fixtfsi
+define i32 @fptosi_i32_f128(fp128 %x) #0 {
+  %val = call i32 @llvm.experimental.constrained.fptosi.i32.f128(fp128 %x, metadata !"fpexcept.strict") #0
+  ret i32 %val
+}
+
+; CHECK-LABEL: fptoui_i32_f128:
+; CHECK: bl __fixunstfsi
+define i32 @fptoui_i32_f128(fp128 %x) #0 {
+  %val = call i32 @llvm.experimental.constrained.fptoui.i32.f128(fp128 %x, metadata !"fpexcept.strict") #0
+  ret i32 %val
+}
+
+; CHECK-LABEL: fptosi_i64_f128:
+; CHECK: bl __fixtfdi
+define i64 @fptosi_i64_f128(fp128 %x) #0 {
+  %val = call i64 @llvm.experimental.constrained.fptosi.i64.f128(fp128 %x, metadata !"fpexcept.strict") #0
+  ret i64 %val
+}
+
+; CHECK-LABEL: fptoui_i64_f128:
+; CHECK: bl __fixunstfdi
+define i64 @fptoui_i64_f128(fp128 %x) #0 {
+  %val = call i64 @llvm.experimental.constrained.fptoui.i64.f128(fp128 %x, metadata !"fpexcept.strict") #0
+  ret i64 %val
+}
+
+; CHECK-LABEL: sitofp_f128_i32:
+; CHECK: bl __floatsitf
+define fp128 @sitofp_f128_i32(i32 %x) #0 {
+  %val = call fp128 @llvm.experimental.constrained.sitofp.f128.i32(i32 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  ret fp128 %val
+}
+
+; CHECK-LABEL: uitofp_f128_i32:
+; CHECK: bl __floatunsitf
+define fp128 @uitofp_f128_i32(i32 %x) #0 {
+  %val = call fp128 @llvm.experimental.constrained.uitofp.f128.i32(i32 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  ret fp128 %val
+}
+
+; CHECK-LABEL: sitofp_f128_i64:
+; CHECK: bl __floatditf
+define fp128 @sitofp_f128_i64(i64 %x) #0 {
+  %val = call fp128 @llvm.experimental.constrained.sitofp.f128.i64(i64 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  ret fp128 %val
+}
+
+; CHECK-LABEL: uitofp_f128_i64:
+; CHECK: bl __floatunditf
+define fp128 @uitofp_f128_i64(i64 %x) #0 {
+  %val = call fp128 @llvm.experimental.constrained.uitofp.f128.i64(i64 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  ret fp128 %val
+}
+
+; CHECK-LABEL: sitofp_f128_i128:
+; CHECK: bl __floattitf
+define fp128 @sitofp_f128_i128(i128 %x) #0 {
+  %val = call fp128 @llvm.experimental.constrained.sitofp.f128.i128(i128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  ret fp128 %val
+}
+
+; CHECK-LABEL: uitofp_f128_i128:
+; CHECK: bl __floatuntitf
+define fp128 @uitofp_f128_i128(i128 %x) #0 {
+  %val = call fp128 @llvm.experimental.constrained.uitofp.f128.i128(i128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  ret fp128 %val
+}
+
+; CHECK-LABEL: sqrt_f128:
+; CHECK: bl sqrtl
+define fp128 @sqrt_f128(fp128 %x) #0 {
+  %val = call fp128 @llvm.experimental.constrained.sqrt.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  ret fp128 %val
+}
+
+; CHECK-LABEL: powi_f128:
+; CHECK: bl __powitf2
+define fp128 @powi_f128(fp128 %x, i32 %y) #0 {
+  %val = call fp128 @llvm.experimental.constrained.powi.f128(fp128 %x, i32 %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  ret fp128 %val
+}
+
+; CHECK-LABEL: sin_f128:
+; CHECK: bl sinl
+define fp128 @sin_f128(fp128 %x) #0 {
+  %val = call fp128 @llvm.experimental.constrained.sin.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  ret fp128 %val
+}
+
+; CHECK-LABEL: cos_f128:
+; CHECK: bl cosl
+define fp128 @cos_f128(fp128 %x) #0 {
+  %val = call fp128 @llvm.experimental.constrained.cos.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  ret fp128 %val
+}
+
+; CHECK-LABEL: pow_f128:
+; CHECK: bl powl
+define fp128 @pow_f128(fp128 %x, fp128 %y) #0 {
+  %val = call fp128 @llvm.experimental.constrained.pow.f128(fp128 %x, fp128 %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  ret fp128 %val
+}
+
+; CHECK-LABEL: log_f128:
+; CHECK: bl logl
+define fp128 @log_f128(fp128 %x) #0 {
+  %val = call fp128 @llvm.experimental.constrained.log.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  ret fp128 %val
+}
+
+; CHECK-LABEL: log10_f128:
+; CHECK: bl log10l
+define fp128 @log10_f128(fp128 %x) #0 {
+  %val = call fp128 @llvm.experimental.constrained.log10.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  ret fp128 %val
+}
+
+; CHECK-LABEL: log2_f128:
+; CHECK: bl log2l
+define fp128 @log2_f128(fp128 %x) #0 {
+  %val = call fp128 @llvm.experimental.constrained.log2.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  ret fp128 %val
+}
+
+; CHECK-LABEL: exp_f128:
+; CHECK: bl expl
+define fp128 @exp_f128(fp128 %x) #0 {
+  %val = call fp128 @llvm.experimental.constrained.exp.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  ret fp128 %val
+}
+
+; CHECK-LABEL: exp2_f128:
+; CHECK: bl exp2l
+define fp128 @exp2_f128(fp128 %x) #0 {
+  %val = call fp128 @llvm.experimental.constrained.exp2.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  ret fp128 %val
+}
+
+; CHECK-LABEL: rint_f128:
+; CHECK: bl rintl
+define fp128 @rint_f128(fp128 %x) #0 {
+  %val = call fp128 @llvm.experimental.constrained.rint.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  ret fp128 %val
+}
+
+; CHECK-LABEL: nearbyint_f128:
+; CHECK: bl nearbyintl
+define fp128 @nearbyint_f128(fp128 %x) #0 {
+  %val = call fp128 @llvm.experimental.constrained.nearbyint.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  ret fp128 %val
+}
+
+; CHECK-LABEL: lrint_f128:
+; CHECK: bl lrintl
+define i32 @lrint_f128(fp128 %x) #0 {
+  %val = call i32 @llvm.experimental.constrained.lrint.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  ret i32 %val
+}
+
+; CHECK-LABEL: llrint_f128:
+; CHECK: bl llrintl
+define i64 @llrint_f128(fp128 %x) #0 {
+  %val = call i64 @llvm.experimental.constrained.llrint.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  ret i64 %val
+}
+
+; CHECK-LABEL: maxnum_f128:
+; CHECK: bl fmaxl
+define fp128 @maxnum_f128(fp128 %x, fp128 %y) #0 {
+  %val = call fp128 @llvm.experimental.constrained.maxnum.f128(fp128 %x, fp128 %y, metadata !"fpexcept.strict") #0
+  ret fp128 %val
+}
+
+; CHECK-LABEL: minnum_f128:
+; CHECK: bl fminl
+define fp128 @minnum_f128(fp128 %x, fp128 %y) #0 {
+  %val = call fp128 @llvm.experimental.constrained.minnum.f128(fp128 %x, fp128 %y, metadata !"fpexcept.strict") #0
+  ret fp128 %val
+}
+
+; CHECK-LABEL: ceil_f128:
+; CHECK: bl ceill
+define fp128 @ceil_f128(fp128 %x) #0 {
+  %val = call fp128 @llvm.experimental.constrained.ceil.f128(fp128 %x, metadata !"fpexcept.strict") #0
+  ret fp128 %val
+}
+
+; CHECK-LABEL: floor_f128:
+; CHECK: bl floorl
+define fp128 @floor_f128(fp128 %x) #0 {
+  %val = call fp128 @llvm.experimental.constrained.floor.f128(fp128 %x, metadata !"fpexcept.strict") #0
+  ret fp128 %val
+}
+
+; CHECK-LABEL: lround_f128:
+; CHECK: bl lroundl
+define i32 @lround_f128(fp128 %x) #0 {
+  %val = call i32 @llvm.experimental.constrained.lround.f128(fp128 %x, metadata !"fpexcept.strict") #0
+  ret i32 %val
+}
+
+; CHECK-LABEL: llround_f128:
+; CHECK: bl llroundl
+define i64 @llround_f128(fp128 %x) #0 {
+  %val = call i64 @llvm.experimental.constrained.llround.f128(fp128 %x, metadata !"fpexcept.strict") #0
+  ret i64 %val
+}
+
+; CHECK-LABEL: round_f128:
+; CHECK: bl roundl
+define fp128 @round_f128(fp128 %x) #0 {
+  %val = call fp128 @llvm.experimental.constrained.round.f128(fp128 %x, metadata !"fpexcept.strict") #0
+  ret fp128 %val
+}
+
+; CHECK-LABEL: trunc_f128:
+; CHECK: bl truncl
+define fp128 @trunc_f128(fp128 %x) #0 {
+  %val = call fp128 @llvm.experimental.constrained.trunc.f128(fp128 %x, metadata !"fpexcept.strict") #0
+  ret fp128 %val
+}
 
-; CHECK-LABEL: fptrunc_f32:
+; TODO: fcmp (missing STRICT_FSETCC handling)
+
+
+; Intrinsics to convert between floating-point types
+
+; CHECK-LABEL: fptrunc_f32_f64:
 ; CHECK: fcvt s0, d0
-define float @fptrunc_f32(double %x) #0 {
+define float @fptrunc_f32_f64(double %x) #0 {
   %val = call float @llvm.experimental.constrained.fptrunc.f32.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret float %val
 }
 
-; CHECK-LABEL: fpext_f32:
+; CHECK-LABEL: fptrunc_f32_f128:
+; CHECK: bl __trunctfsf2
+define float @fptrunc_f32_f128(fp128 %x) #0 {
+  %val = call float @llvm.experimental.constrained.fptrunc.f32.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  ret float %val
+}
+
+; CHECK-LABEL: fptrunc_f64_f128:
+; CHECK: bl __trunctfdf2
+define double @fptrunc_f64_f128(fp128 %x) #0 {
+  %val = call double @llvm.experimental.constrained.fptrunc.f64.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  ret double %val
+}
+
+; CHECK-LABEL: fpext_f64_f32:
 ; CHECK: fcvt d0, s0
-define double @fpext_f32(float %x) #0 {
+define double @fpext_f64_f32(float %x) #0 {
   %val = call double @llvm.experimental.constrained.fpext.f64.f32(float %x, metadata !"fpexcept.strict") #0
   ret double %val
 }
 
+; CHECK-LABEL: fpext_f128_f32:
+; CHECK: bl __extendsftf2
+define fp128 @fpext_f128_f32(float %x) #0 {
+  %val = call fp128 @llvm.experimental.constrained.fpext.f128.f32(float %x, metadata !"fpexcept.strict") #0
+  ret fp128 %val
+}
+
+; CHECK-LABEL: fpext_f128_f64:
+; CHECK: bl __extenddftf2
+define fp128 @fpext_f128_f64(double %x) #0 {
+  %val = call fp128 @llvm.experimental.constrained.fpext.f128.f64(double %x, metadata !"fpexcept.strict") #0
+  ret fp128 %val
+}
+
 
 attributes #0 = { strictfp }
 
@@ -921,6 +1261,8 @@ declare float @llvm.experimental.constrained.sitofp.f32.i32(i32, metadata, metad
 declare float @llvm.experimental.constrained.uitofp.f32.i32(i32, metadata, metadata)
 declare float @llvm.experimental.constrained.sitofp.f32.i64(i64, metadata, metadata)
 declare float @llvm.experimental.constrained.uitofp.f32.i64(i64, metadata, metadata)
+declare float @llvm.experimental.constrained.sitofp.f32.i128(i128, metadata, metadata)
+declare float @llvm.experimental.constrained.uitofp.f32.i128(i128, metadata, metadata)
 declare float @llvm.experimental.constrained.sqrt.f32(float, metadata, metadata)
 declare float @llvm.experimental.constrained.powi.f32(float, i32, metadata, metadata)
 declare float @llvm.experimental.constrained.sin.f32(float, metadata, metadata)
@@ -960,6 +1302,8 @@ declare double @llvm.experimental.constrained.sitofp.f64.i32(i32, metadata, meta
 declare double @llvm.experimental.constrained.uitofp.f64.i32(i32, metadata, metadata)
 declare double @llvm.experimental.constrained.sitofp.f64.i64(i64, metadata, metadata)
 declare double @llvm.experimental.constrained.uitofp.f64.i64(i64, metadata, metadata)
+declare double @llvm.experimental.constrained.sitofp.f64.i128(i128, metadata, metadata)
+declare double @llvm.experimental.constrained.uitofp.f64.i128(i128, metadata, metadata)
 declare double @llvm.experimental.constrained.sqrt.f64(double, metadata, metadata)
 declare double @llvm.experimental.constrained.powi.f64(double, i32, metadata, metadata)
 declare double @llvm.experimental.constrained.sin.f64(double, metadata, metadata)
@@ -985,5 +1329,50 @@ declare double @llvm.experimental.constrained.trunc.f64(double, metadata)
 declare i1 @llvm.experimental.constrained.fcmps.f64(double, double, metadata, metadata)
 declare i1 @llvm.experimental.constrained.fcmp.f64(double, double, metadata, metadata)
 
+declare fp128 @llvm.experimental.constrained.fadd.f128(fp128, fp128, metadata, metadata)
+declare fp128 @llvm.experimental.constrained.fsub.f128(fp128, fp128, metadata, metadata)
+declare fp128 @llvm.experimental.constrained.fmul.f128(fp128, fp128, metadata, metadata)
+declare fp128 @llvm.experimental.constrained.fdiv.f128(fp128, fp128, metadata, metadata)
+declare fp128 @llvm.experimental.constrained.frem.f128(fp128, fp128, metadata, metadata)
+declare fp128 @llvm.experimental.constrained.fma.f128(fp128, fp128, fp128, metadata, metadata)
+declare i32 @llvm.experimental.constrained.fptosi.i32.f128(fp128, metadata)
+declare i32 @llvm.experimental.constrained.fptoui.i32.f128(fp128, metadata)
+declare i64 @llvm.experimental.constrained.fptosi.i64.f128(fp128, metadata)
+declare i64 @llvm.experimental.constrained.fptoui.i64.f128(fp128, metadata)
+declare fp128 @llvm.experimental.constrained.sitofp.f128.i32(i32, metadata, metadata)
+declare fp128 @llvm.experimental.constrained.uitofp.f128.i32(i32, metadata, metadata)
+declare fp128 @llvm.experimental.constrained.sitofp.f128.i64(i64, metadata, metadata)
+declare fp128 @llvm.experimental.constrained.uitofp.f128.i64(i64, metadata, metadata)
+declare fp128 @llvm.experimental.constrained.sitofp.f128.i128(i128, metadata, metadata)
+declare fp128 @llvm.experimental.constrained.uitofp.f128.i128(i128, metadata, metadata)
+declare fp128 @llvm.experimental.constrained.sqrt.f128(fp128, metadata, metadata)
+declare fp128 @llvm.experimental.constrained.powi.f128(fp128, i32, metadata, metadata)
+declare fp128 @llvm.experimental.constrained.sin.f128(fp128, metadata, metadata)
+declare fp128 @llvm.experimental.constrained.cos.f128(fp128, metadata, metadata)
+declare fp128 @llvm.experimental.constrained.pow.f128(fp128, fp128, metadata, metadata)
+declare fp128 @llvm.experimental.constrained.log.f128(fp128, metadata, metadata)
+declare fp128 @llvm.experimental.constrained.log10.f128(fp128, metadata, metadata)
+declare fp128 @llvm.experimental.constrained.log2.f128(fp128, metadata, metadata)
+declare fp128 @llvm.experimental.constrained.exp.f128(fp128, metadata, metadata)
+declare fp128 @llvm.experimental.constrained.exp2.f128(fp128, metadata, metadata)
+declare fp128 @llvm.experimental.constrained.rint.f128(fp128, metadata, metadata)
+declare fp128 @llvm.experimental.constrained.nearbyint.f128(fp128, metadata, metadata)
+declare i32 @llvm.experimental.constrained.lrint.f128(fp128, metadata, metadata)
+declare i64 @llvm.experimental.constrained.llrint.f128(fp128, metadata, metadata)
+declare fp128 @llvm.experimental.constrained.maxnum.f128(fp128, fp128, metadata)
+declare fp128 @llvm.experimental.constrained.minnum.f128(fp128, fp128, metadata)
+declare fp128 @llvm.experimental.constrained.ceil.f128(fp128, metadata)
+declare fp128 @llvm.experimental.constrained.floor.f128(fp128, metadata)
+declare i32 @llvm.experimental.constrained.lround.f128(fp128, metadata)
+declare i64 @llvm.experimental.constrained.llround.f128(fp128, metadata)
+declare fp128 @llvm.experimental.constrained.round.f128(fp128, metadata)
+declare fp128 @llvm.experimental.constrained.trunc.f128(fp128, metadata)
+declare i1 @llvm.experimental.constrained.fcmps.f128(fp128, fp128, metadata, metadata)
+declare i1 @llvm.experimental.constrained.fcmp.f128(fp128, fp128, metadata, metadata)
+
 declare float @llvm.experimental.constrained.fptrunc.f32.f64(double, metadata, metadata)
+declare float @llvm.experimental.constrained.fptrunc.f32.f128(fp128, metadata, metadata)
+declare double @llvm.experimental.constrained.fptrunc.f64.f128(fp128, metadata, metadata)
 declare double @llvm.experimental.constrained.fpext.f64.f32(float, metadata)
+declare fp128 @llvm.experimental.constrained.fpext.f128.f32(float, metadata)
+declare fp128 @llvm.experimental.constrained.fpext.f128.f64(double, metadata)

diff  --git a/llvm/test/CodeGen/AArch64/fpconv-vector-op-scalarize-strict.ll b/llvm/test/CodeGen/AArch64/fpconv-vector-op-scalarize-strict.ll
index 30ab3bd1077d..eba7fa88dc5d 100644
--- a/llvm/test/CodeGen/AArch64/fpconv-vector-op-scalarize-strict.ll
+++ b/llvm/test/CodeGen/AArch64/fpconv-vector-op-scalarize-strict.ll
@@ -8,18 +8,8 @@
 define <1 x double> @test_sitofp(<1 x i1> %in) #0 {
 ; CHECK-LABEL: test_sitofp:
 ; CHECK:       ; %bb.0: ; %entry
-; CHECK-NEXT:    sub sp, sp, #16 ; =16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    sbfx w8, w0, #0, #1
-; CHECK-NEXT:    mov w9, #1127219200
-; CHECK-NEXT:    eor w8, w8, #0x80000000
-; CHECK-NEXT:    stp w8, w9, [sp, #8]
-; CHECK-NEXT:    ldr d0, [sp, #8]
-; CHECK-NEXT:    mov x8, #2147483648
-; CHECK-NEXT:    movk x8, #17200, lsl #48
-; CHECK-NEXT:    fmov d1, x8
-; CHECK-NEXT:    fsub d0, d0, d1
-; CHECK-NEXT:    add sp, sp, #16 ; =16
+; CHECK-NEXT:    scvtf d0, w8
 ; CHECK-NEXT:    ret
 entry:
   %0 = call <1 x double> @llvm.experimental.constrained.sitofp.v1f64.v1i1(<1 x i1> %in, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
@@ -29,16 +19,8 @@ entry:
 define <1 x double> @test_uitofp(<1 x i1> %in) #0 {
 ; CHECK-LABEL: test_uitofp:
 ; CHECK:       ; %bb.0: ; %entry
-; CHECK-NEXT:    sub sp, sp, #16 ; =16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    and w8, w0, #0x1
-; CHECK-NEXT:    mov w9, #1127219200
-; CHECK-NEXT:    stp w8, w9, [sp, #8]
-; CHECK-NEXT:    ldr d0, [sp, #8]
-; CHECK-NEXT:    mov x8, #4841369599423283200
-; CHECK-NEXT:    fmov d1, x8
-; CHECK-NEXT:    fsub d0, d0, d1
-; CHECK-NEXT:    add sp, sp, #16 ; =16
+; CHECK-NEXT:    ucvtf d0, w8
 ; CHECK-NEXT:    ret
 entry:
   %0 = call <1 x double> @llvm.experimental.constrained.uitofp.v1f64.v1i1(<1 x i1> %in, metadata !"round.dynamic", metadata !"fpexcept.strict") #0