[llvm] d7c97e9 - [RISCV] Support llvm.lround intrinsics with i32 return type on RV64.

Wed Apr 12 13:23:07 PDT 2023

Author: Craig Topper
Date: 2023-04-12T13:15:59-07:00
New Revision: d7c97e9129096048f08d4232e5a76ac5853ce74e

URL: https://github.com/llvm/llvm-project/commit/d7c97e9129096048f08d4232e5a76ac5853ce74e
DIFF: https://github.com/llvm/llvm-project/commit/d7c97e9129096048f08d4232e5a76ac5853ce74e.diff

LOG: [RISCV] Support llvm.lround intrinsics with i32 return type on RV64.

It seems that flang uses this for "nint" and expects this i32
to work. On the C side we think lround should only work for "long"
which is i64 on rv64.

It's easy for us to support i32 when we have native FP instructions.
I fell back to i64 and truncated the result otherwise. The
documentation for lround says it returns an unspecified value if
doesn't fit in the integer type. I have no idea what flang is
expecting. I really only did the libcall to avoid forking a test.

Reviewed By: asb

Differential Revision: https://reviews.llvm.org/D147195

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVISelLowering.cpp
    llvm/test/CodeGen/RISCV/double-intrinsics.ll
    llvm/test/CodeGen/RISCV/float-intrinsics.ll
    llvm/test/CodeGen/RISCV/zfh-half-intrinsics.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 70ca7a6d7e275..c62d8d71002dd 100644

--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -452,10 +452,12 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
     setTruncStoreAction(MVT::f64, MVT::f16, Expand);
   }
 
-  if (Subtarget.is64Bit())
+  if (Subtarget.is64Bit()) {
     setOperationAction({ISD::FP_TO_UINT, ISD::FP_TO_SINT,
                         ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT},
                        MVT::i32, Custom);
+    setOperationAction(ISD::LROUND, MVT::i32, Custom);
+  }
 
   if (Subtarget.hasStdExtF()) {
     setOperationAction({ISD::FP_TO_UINT_SAT, ISD::FP_TO_SINT_SAT}, XLenVT,
@@ -8491,6 +8493,37 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
       Results.push_back(Chain);
     break;
   }
+  case ISD::LROUND: {
+    SDValue Op0 = N->getOperand(0);
+    EVT Op0VT = Op0.getValueType();
+    if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
+        TargetLowering::TypeSoftenFloat) {
+      if (!isTypeLegal(Op0VT))
+        return;
+
+      // In absense of Zfh, promote f16 to f32, then convert.
+      if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
+        Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
+
+      SDValue Res =
+          DAG.getNode(RISCVISD::FCVT_W_RV64, DL, MVT::i64, Op0,
+                      DAG.getTargetConstant(RISCVFPRndMode::RMM, DL, MVT::i64));
+      Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
+      return;
+    }
+    // If the FP type needs to be softened, emit a library call to lround. We'll
+    // need to truncate the result. We assume any value that doesn't fit in i32
+    // is allowed to return an unspecified value.
+    RTLIB::Libcall LC =
+        Op0.getValueType() == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
+    MakeLibCallOptions CallOptions;
+    EVT OpVT = Op0.getValueType();
+    CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64, true);
+    SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
+    Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
+    Results.push_back(Result);
+    break;
+  }
   case ISD::READCYCLECOUNTER: {
     assert(!Subtarget.is64Bit() &&
            "READCYCLECOUNTER only has custom type legalization on riscv32");

diff  --git a/llvm/test/CodeGen/RISCV/double-intrinsics.ll b/llvm/test/CodeGen/RISCV/double-intrinsics.ll
index 0980ef6b8d9f9..74cd579e2c4f3 100644
--- a/llvm/test/CodeGen/RISCV/double-intrinsics.ll
+++ b/llvm/test/CodeGen/RISCV/double-intrinsics.ll
@@ -907,7 +907,8 @@ define iXLen @lrint_f64(double %a) nounwind {
   ret iXLen %1
 }
 
-declare iXLen @llvm.lround.iXLen.f64(double)
+declare i32 @llvm.lround.i32.f64(double)
+declare i64 @llvm.lround.i64.f64(double)
 
 define iXLen @lround_f64(double %a) nounwind {
 ; RV32IFD-LABEL: lround_f64:
@@ -941,6 +942,33 @@ define iXLen @lround_f64(double %a) nounwind {
   ret iXLen %1
 }
 
+define i32 @lround_i32_f64(double %a) nounwind {
+; CHECKIFD-LABEL: lround_i32_f64:
+; CHECKIFD:       # %bb.0:
+; CHECKIFD-NEXT:    fcvt.w.d a0, fa0, rmm
+; CHECKIFD-NEXT:    ret
+;
+; RV32I-LABEL: lround_i32_f64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    call lround at plt
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: lround_i32_f64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    call lround at plt
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+  %1 = call i32 @llvm.lround.i32.f64(double %a)
+  ret i32 %1
+}
+
 declare i64 @llvm.llrint.i64.f64(double)
 
 define i64 @llrint_f64(double %a) nounwind {

diff  --git a/llvm/test/CodeGen/RISCV/float-intrinsics.ll b/llvm/test/CodeGen/RISCV/float-intrinsics.ll
index 84f5c26acabd8..8ae7ce4256d29 100644
--- a/llvm/test/CodeGen/RISCV/float-intrinsics.ll
+++ b/llvm/test/CodeGen/RISCV/float-intrinsics.ll
@@ -1013,7 +1013,8 @@ define iXLen @lrint_f32(float %a) nounwind {
   ret iXLen %1
 }
 
-declare iXLen @llvm.lround.iXLen.f32(float)
+declare i32 @llvm.lround.i32.f32(float)
+declare i64 @llvm.lround.i64.f32(float)
 
 define iXLen @lround_f32(float %a) nounwind {
 ; RV32IF-LABEL: lround_f32:
@@ -1047,6 +1048,40 @@ define iXLen @lround_f32(float %a) nounwind {
   ret iXLen %1
 }
 
+; We support i32 lround on RV64 even though long isn't 32 bits. This is needed
+; by flang.
+define i32 @lround_i32_f32(float %a) nounwind {
+; RV32IF-LABEL: lround_i32_f32:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    fcvt.w.s a0, fa0, rmm
+; RV32IF-NEXT:    ret
+;
+; RV64IF-LABEL: lround_i32_f32:
+; RV64IF:       # %bb.0:
+; RV64IF-NEXT:    fcvt.w.s a0, fa0, rmm
+; RV64IF-NEXT:    ret
+;
+; RV32I-LABEL: lround_i32_f32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    call lroundf at plt
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: lround_i32_f32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    call lroundf at plt
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+  %1 = call i32 @llvm.lround.i32.f32(float %a)
+  ret i32 %1
+}
+
 declare i64 @llvm.llrint.i64.f32(float)
 
 define i64 @llrint_f32(float %a) nounwind {

diff  --git a/llvm/test/CodeGen/RISCV/zfh-half-intrinsics.ll b/llvm/test/CodeGen/RISCV/zfh-half-intrinsics.ll
index f0ef52f9dc45e..1667bf19b97de 100644
--- a/llvm/test/CodeGen/RISCV/zfh-half-intrinsics.ll
+++ b/llvm/test/CodeGen/RISCV/zfh-half-intrinsics.ll
@@ -40,7 +40,8 @@ define iXLen @lrint_f16(half %a) nounwind {
   ret iXLen %1
 }
 
-declare iXLen @llvm.lround.iXLen.f16(half)
+declare i32 @llvm.lround.i32.f16(half)
+declare i64 @llvm.lround.i64.f16(half)
 
 define iXLen @lround_f16(half %a) nounwind {
 ; RV32IZFH-LABEL: lround_f16:
@@ -65,3 +66,27 @@ define iXLen @lround_f16(half %a) nounwind {
   %1 = call iXLen @llvm.lround.iXLen.f16(half %a)
   ret iXLen %1
 }
+
+define i32 @lround_i32_f16(half %a) nounwind {
+; RV32IZFH-LABEL: lround_i32_f16:
+; RV32IZFH:       # %bb.0:
+; RV32IZFH-NEXT:    fcvt.w.h a0, fa0, rmm
+; RV32IZFH-NEXT:    ret
+;
+; RV64IZFH-LABEL: lround_i32_f16:
+; RV64IZFH:       # %bb.0:
+; RV64IZFH-NEXT:    fcvt.w.h a0, fa0, rmm
+; RV64IZFH-NEXT:    ret
+;
+; RV32IDZFH-LABEL: lround_i32_f16:
+; RV32IDZFH:       # %bb.0:
+; RV32IDZFH-NEXT:    fcvt.w.h a0, fa0, rmm
+; RV32IDZFH-NEXT:    ret
+;
+; RV64IDZFH-LABEL: lround_i32_f16:
+; RV64IDZFH:       # %bb.0:
+; RV64IDZFH-NEXT:    fcvt.w.h a0, fa0, rmm
+; RV64IDZFH-NEXT:    ret
+  %1 = call i32 @llvm.lround.i32.f16(half %a)
+  ret i32 %1
+}