[llvm] LegalizeIntegerTypes: implement PromoteIntRes for xrint (PR #71055)

via llvm-commits llvm-commits at lists.llvm.org
Thu Nov 2 05:25:13 PDT 2023


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-selectiondag

Author: Ramkumar Ramachandra (artagnon)

<details>
<summary>Changes</summary>

Recently, 98c90a1 (ISel: introduce vector ISD::LRINT, ISD::LLRINT; custom RISCV lowering) introduced vector variants of llvm.lrint, llvm.llrint, and bundled several tests along with the code change. However, it forgot to test lrint and llrint on fixed vectors on RISC-V, and it turns out that that fixed-vectors-lrint.ll requires PromoteIntRes_XRINT to be implemented. Implement it, and add tests for fixed-vector lrint, llrint.

---

Patch is 60.94 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/71055.diff


4 Files Affected:

- (modified) llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp (+11) 
- (modified) llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h (+1) 
- (added) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll (+885) 
- (added) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll (+645) 


``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 5bd04e2360679d4..cd0d9081d61be17 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -301,6 +301,11 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
   case ISD::FFREXP:
     Res = PromoteIntRes_FFREXP(N);
     break;
+
+  case ISD::LRINT:
+  case ISD::LLRINT:
+    Res = PromoteIntRes_XRINT(N);
+    break;
   }
 
   // If the result is null then the sub-method took care of registering it.
@@ -783,6 +788,12 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_FP16_BF16(SDNode *N) {
   return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));
 }
 
+SDValue DAGTypeLegalizer::PromoteIntRes_XRINT(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDLoc dl(N);
+  return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));
+}
+
 SDValue DAGTypeLegalizer::PromoteIntRes_GET_ROUNDING(SDNode *N) {
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDLoc dl(N);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index ee4278ceb729b61..c48d6c4adf61517 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -326,6 +326,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   SDValue PromoteIntRes_FP_TO_XINT(SDNode *N);
   SDValue PromoteIntRes_FP_TO_XINT_SAT(SDNode *N);
   SDValue PromoteIntRes_FP_TO_FP16_BF16(SDNode *N);
+  SDValue PromoteIntRes_XRINT(SDNode *N);
   SDValue PromoteIntRes_FREEZE(SDNode *N);
   SDValue PromoteIntRes_INT_EXTEND(SDNode *N);
   SDValue PromoteIntRes_LOAD(LoadSDNode *N);
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll
new file mode 100644
index 000000000000000..cba375c9c8d2850
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll
@@ -0,0 +1,885 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+v,+f,+d -target-abi=ilp32d \
+; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v,+f,+d -target-abi=lp64d \
+; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV64
+
+define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) {
+; RV32-LABEL: llrint_v1i64_v1f32:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT:    .cfi_offset ra, -4
+; RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; RV32-NEXT:    vfmv.f.s fa0, v8
+; RV32-NEXT:    call llrintf at plt
+; RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
+; RV32-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: llrint_v1i64_v1f32:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; RV64-NEXT:    vfmv.f.s fa5, v8
+; RV64-NEXT:    fcvt.l.s a0, fa5
+; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v8, a0
+; RV64-NEXT:    ret
+  %a = call <1 x i64> @llvm.llrint.v1i64.v1f32(<1 x float> %x)
+  ret <1 x i64> %a
+}
+declare <1 x i64> @llvm.llrint.v1i64.v1f32(<1 x float>)
+
+define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) {
+; RV32-LABEL: llrint_v2i64_v2f32:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -32
+; RV32-NEXT:    .cfi_def_cfa_offset 32
+; RV32-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; RV32-NEXT:    .cfi_offset ra, -4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    sub sp, sp, a0
+; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 2 * vlenb
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; RV32-NEXT:    vfmv.f.s fa0, v8
+; RV32-NEXT:    call llrintf at plt
+; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vslidedown.vi v8, v8, 1
+; RV32-NEXT:    vfmv.f.s fa0, v8
+; RV32-NEXT:    call llrintf at plt
+; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vl1r.v v8, (a2) # Unknown-size Folded Reload
+; RV32-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32-NEXT:    addi sp, sp, 32
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: llrint_v2i64_v2f32:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; RV64-NEXT:    vfmv.f.s fa5, v8
+; RV64-NEXT:    fcvt.l.s a0, fa5
+; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV64-NEXT:    vslide1down.vx v9, v8, a0
+; RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; RV64-NEXT:    vslidedown.vi v8, v8, 1
+; RV64-NEXT:    vfmv.f.s fa5, v8
+; RV64-NEXT:    fcvt.l.s a0, fa5
+; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV64-NEXT:    vslide1down.vx v8, v9, a0
+; RV64-NEXT:    ret
+  %a = call <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float> %x)
+  ret <2 x i64> %a
+}
+declare <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float>)
+
+define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) {
+; RV32-LABEL: llrint_v4i64_v4f32:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -32
+; RV32-NEXT:    .cfi_def_cfa_offset 32
+; RV32-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; RV32-NEXT:    .cfi_offset ra, -4
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    sub sp, sp, a0
+; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 4 * vlenb
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT:    vfmv.f.s fa0, v8
+; RV32-NEXT:    call llrintf at plt
+; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vs2r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vslidedown.vi v8, v8, 1
+; RV32-NEXT:    vfmv.f.s fa0, v8
+; RV32-NEXT:    call llrintf at plt
+; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT:    addi a2, sp, 16
+; RV32-NEXT:    vl2r.v v8, (a2) # Unknown-size Folded Reload
+; RV32-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vs2r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vslidedown.vi v8, v8, 2
+; RV32-NEXT:    vfmv.f.s fa0, v8
+; RV32-NEXT:    call llrintf at plt
+; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT:    addi a2, sp, 16
+; RV32-NEXT:    vl2r.v v8, (a2) # Unknown-size Folded Reload
+; RV32-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vs2r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vslidedown.vi v8, v8, 3
+; RV32-NEXT:    vfmv.f.s fa0, v8
+; RV32-NEXT:    call llrintf at plt
+; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT:    addi a2, sp, 16
+; RV32-NEXT:    vl2r.v v8, (a2) # Unknown-size Folded Reload
+; RV32-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32-NEXT:    addi sp, sp, 32
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: llrint_v4i64_v4f32:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
+; RV64-NEXT:    vfmv.f.s fa5, v8
+; RV64-NEXT:    fcvt.l.s a0, fa5
+; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT:    vslide1down.vx v10, v8, a0
+; RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
+; RV64-NEXT:    vslidedown.vi v9, v8, 1
+; RV64-NEXT:    vfmv.f.s fa5, v9
+; RV64-NEXT:    fcvt.l.s a0, fa5
+; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT:    vslide1down.vx v10, v10, a0
+; RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
+; RV64-NEXT:    vslidedown.vi v9, v8, 2
+; RV64-NEXT:    vfmv.f.s fa5, v9
+; RV64-NEXT:    fcvt.l.s a0, fa5
+; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT:    vslide1down.vx v10, v10, a0
+; RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
+; RV64-NEXT:    vslidedown.vi v8, v8, 3
+; RV64-NEXT:    vfmv.f.s fa5, v8
+; RV64-NEXT:    fcvt.l.s a0, fa5
+; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT:    vslide1down.vx v8, v10, a0
+; RV64-NEXT:    ret
+  %a = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> %x)
+  ret <4 x i64> %a
+}
+declare <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float>)
+
+define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) {
+; RV32-LABEL: llrint_v8i64_v8f32:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -208
+; RV32-NEXT:    .cfi_def_cfa_offset 208
+; RV32-NEXT:    sw ra, 204(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s0, 200(sp) # 4-byte Folded Spill
+; RV32-NEXT:    .cfi_offset ra, -4
+; RV32-NEXT:    .cfi_offset s0, -8
+; RV32-NEXT:    addi s0, sp, 208
+; RV32-NEXT:    .cfi_def_cfa s0, 0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    sub sp, sp, a0
+; RV32-NEXT:    andi sp, sp, -64
+; RV32-NEXT:    addi a0, sp, 192
+; RV32-NEXT:    vs2r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT:    vfmv.f.s fa0, v8
+; RV32-NEXT:    call llrintf at plt
+; RV32-NEXT:    sw a1, 68(sp)
+; RV32-NEXT:    sw a0, 64(sp)
+; RV32-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
+; RV32-NEXT:    addi a0, sp, 192
+; RV32-NEXT:    vl2r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vslidedown.vi v8, v8, 7
+; RV32-NEXT:    vfmv.f.s fa0, v8
+; RV32-NEXT:    call llrintf at plt
+; RV32-NEXT:    sw a1, 124(sp)
+; RV32-NEXT:    sw a0, 120(sp)
+; RV32-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
+; RV32-NEXT:    addi a0, sp, 192
+; RV32-NEXT:    vl2r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vslidedown.vi v8, v8, 6
+; RV32-NEXT:    vfmv.f.s fa0, v8
+; RV32-NEXT:    call llrintf at plt
+; RV32-NEXT:    sw a1, 116(sp)
+; RV32-NEXT:    sw a0, 112(sp)
+; RV32-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
+; RV32-NEXT:    addi a0, sp, 192
+; RV32-NEXT:    vl2r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vslidedown.vi v8, v8, 5
+; RV32-NEXT:    vfmv.f.s fa0, v8
+; RV32-NEXT:    call llrintf at plt
+; RV32-NEXT:    sw a1, 108(sp)
+; RV32-NEXT:    sw a0, 104(sp)
+; RV32-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
+; RV32-NEXT:    addi a0, sp, 192
+; RV32-NEXT:    vl2r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vslidedown.vi v8, v8, 4
+; RV32-NEXT:    vfmv.f.s fa0, v8
+; RV32-NEXT:    call llrintf at plt
+; RV32-NEXT:    sw a1, 100(sp)
+; RV32-NEXT:    sw a0, 96(sp)
+; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT:    addi a0, sp, 192
+; RV32-NEXT:    vl2r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vslidedown.vi v8, v8, 3
+; RV32-NEXT:    vfmv.f.s fa0, v8
+; RV32-NEXT:    call llrintf at plt
+; RV32-NEXT:    sw a1, 92(sp)
+; RV32-NEXT:    sw a0, 88(sp)
+; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT:    addi a0, sp, 192
+; RV32-NEXT:    vl2r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vslidedown.vi v8, v8, 2
+; RV32-NEXT:    vfmv.f.s fa0, v8
+; RV32-NEXT:    call llrintf at plt
+; RV32-NEXT:    sw a1, 84(sp)
+; RV32-NEXT:    sw a0, 80(sp)
+; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT:    addi a0, sp, 192
+; RV32-NEXT:    vl2r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vslidedown.vi v8, v8, 1
+; RV32-NEXT:    vfmv.f.s fa0, v8
+; RV32-NEXT:    call llrintf at plt
+; RV32-NEXT:    sw a1, 76(sp)
+; RV32-NEXT:    sw a0, 72(sp)
+; RV32-NEXT:    addi a0, sp, 64
+; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
+; RV32-NEXT:    vle32.v v8, (a0)
+; RV32-NEXT:    addi sp, s0, -208
+; RV32-NEXT:    lw ra, 204(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s0, 200(sp) # 4-byte Folded Reload
+; RV32-NEXT:    addi sp, sp, 208
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: llrint_v8i64_v8f32:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -128
+; RV64-NEXT:    .cfi_def_cfa_offset 128
+; RV64-NEXT:    sd ra, 120(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s0, 112(sp) # 8-byte Folded Spill
+; RV64-NEXT:    .cfi_offset ra, -8
+; RV64-NEXT:    .cfi_offset s0, -16
+; RV64-NEXT:    addi s0, sp, 128
+; RV64-NEXT:    .cfi_def_cfa s0, 0
+; RV64-NEXT:    andi sp, sp, -64
+; RV64-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
+; RV64-NEXT:    vfmv.f.s fa5, v8
+; RV64-NEXT:    fcvt.l.s a0, fa5
+; RV64-NEXT:    sd a0, 0(sp)
+; RV64-NEXT:    vslidedown.vi v10, v8, 7
+; RV64-NEXT:    vfmv.f.s fa5, v10
+; RV64-NEXT:    fcvt.l.s a0, fa5
+; RV64-NEXT:    sd a0, 56(sp)
+; RV64-NEXT:    vslidedown.vi v10, v8, 6
+; RV64-NEXT:    vfmv.f.s fa5, v10
+; RV64-NEXT:    fcvt.l.s a0, fa5
+; RV64-NEXT:    sd a0, 48(sp)
+; RV64-NEXT:    vslidedown.vi v10, v8, 5
+; RV64-NEXT:    vfmv.f.s fa5, v10
+; RV64-NEXT:    fcvt.l.s a0, fa5
+; RV64-NEXT:    sd a0, 40(sp)
+; RV64-NEXT:    vslidedown.vi v10, v8, 4
+; RV64-NEXT:    vfmv.f.s fa5, v10
+; RV64-NEXT:    fcvt.l.s a0, fa5
+; RV64-NEXT:    sd a0, 32(sp)
+; RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
+; RV64-NEXT:    vslidedown.vi v9, v8, 3
+; RV64-NEXT:    vfmv.f.s fa5, v9
+; RV64-NEXT:    fcvt.l.s a0, fa5
+; RV64-NEXT:    sd a0, 24(sp)
+; RV64-NEXT:    vslidedown.vi v9, v8, 2
+; RV64-NEXT:    vfmv.f.s fa5, v9
+; RV64-NEXT:    fcvt.l.s a0, fa5
+; RV64-NEXT:    sd a0, 16(sp)
+; RV64-NEXT:    vslidedown.vi v8, v8, 1
+; RV64-NEXT:    vfmv.f.s fa5, v8
+; RV64-NEXT:    fcvt.l.s a0, fa5
+; RV64-NEXT:    sd a0, 8(sp)
+; RV64-NEXT:    mv a0, sp
+; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; RV64-NEXT:    vle64.v v8, (a0)
+; RV64-NEXT:    addi sp, s0, -128
+; RV64-NEXT:    ld ra, 120(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s0, 112(sp) # 8-byte Folded Reload
+; RV64-NEXT:    addi sp, sp, 128
+; RV64-NEXT:    ret
+  %a = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> %x)
+  ret <8 x i64> %a
+}
+declare <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float>)
+
+define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) {
+; RV32-LABEL: llrint_v16i64_v16f32:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -400
+; RV32-NEXT:    .cfi_def_cfa_offset 400
+; RV32-NEXT:    sw ra, 396(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s0, 392(sp) # 4-byte Folded Spill
+; RV32-NEXT:    .cfi_offset ra, -4
+; RV32-NEXT:    .cfi_offset s0, -8
+; RV32-NEXT:    addi s0, sp, 400
+; RV32-NEXT:    .cfi_def_cfa s0, 0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    sub sp, sp, a0
+; RV32-NEXT:    andi sp, sp, -128
+; RV32-NEXT:    addi a0, sp, 384
+; RV32-NEXT:    vs4r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    addi a0, sp, 64
+; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
+; RV32-NEXT:    vse32.v v8, (a0)
+; RV32-NEXT:    flw fa0, 124(sp)
+; RV32-NEXT:    call llrintf at plt
+; RV32-NEXT:    sw a1, 252(sp)
+; RV32-NEXT:    sw a0, 248(sp)
+; RV32-NEXT:    flw fa0, 120(sp)
+; RV32-NEXT:    call llrintf at plt
+; RV32-NEXT:    sw a1, 244(sp)
+; RV32-NEXT:    sw a0, 240(sp)
+; RV32-NEXT:    flw fa0, 116(sp)
+; RV32-NEXT:    call llrintf at plt
+; RV32-NEXT:    sw a1, 236(sp)
+; RV32-NEXT:    sw a0, 232(sp)
+; RV32-NEXT:    flw fa0, 112(sp)
+; RV32-NEXT:    call llrintf at plt
+; RV32-NEXT:    sw a1, 228(sp)
+; RV32-NEXT:    sw a0, 224(sp)
+; RV32-NEXT:    flw fa0, 108(sp)
+; RV32-NEXT:    call llrintf at plt
+; RV32-NEXT:    sw a1, 220(sp)
+; RV32-NEXT:    sw a0, 216(sp)
+; RV32-NEXT:    flw fa0, 104(sp)
+; RV32-NEXT:    call llrintf at plt
+; RV32-NEXT:    sw a1, 212(sp)
+; RV32-NEXT:    sw a0, 208(sp)
+; RV32-NEXT:    flw fa0, 100(sp)
+; RV32-NEXT:    call llrintf at plt
+; RV32-NEXT:    sw a1, 204(sp)
+; RV32-NEXT:    sw a0, 200(sp)
+; RV32-NEXT:    flw fa0, 96(sp)
+; RV32-NEXT:    call llrintf at plt
+; RV32-NEXT:    sw a1, 196(sp)
+; RV32-NEXT:    sw a0, 192(sp)
+; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT:    addi a0, sp, 384
+; RV32-NEXT:    vl4r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vfmv.f.s fa0, v8
+; RV32-NEXT:    call llrintf at plt
+; RV32-NEXT:    sw a1, 132(sp)
+; RV32-NEXT:    sw a0, 128(sp)
+; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT:    addi a0, sp, 384
+; RV32-NEXT:    vl4r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vslidedown.vi v8, v8, 3
+; RV32-NEXT:    vfmv.f.s fa0, v8
+; RV32-NEXT:    call llrintf at plt
+; RV32-NEXT:    sw a1, 156(sp)
+; RV32-NEXT:    sw a0, 152(sp)
+; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT:    addi a0, sp, 384
+; RV32-NEXT:    vl4r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vslidedown.vi v8, v8, 2
+; RV32-NEXT:    vfmv.f.s fa0, v8
+; RV32-NEXT:    call llrintf at plt
+; RV32-NEXT:    sw a1, 148(sp)
+; RV32-NEXT:    sw a0, 144(sp)
+; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT:    addi a0, sp, 384
+; RV32-NEXT:    vl4r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vslidedown.vi v8, v8, 1
+; RV32-NEXT:    vfmv.f.s fa0, v8
+; RV32-NEXT:    call llrintf at plt
+; RV32-NEXT:    sw a1, 140(sp)
+; RV32-NEXT:    sw a0, 136(sp)
+; RV32-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
+; RV32-NEXT:    addi a0, sp, 384
+; RV32-NEXT:    vl4r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vslidedown.vi v8, v8, 7
+; RV32-NEXT:    vfmv.f.s fa0, v8
+; RV32-NEXT:    call llrintf at plt
+; RV32-NEXT:    sw a1, 188(sp)
+; RV32-NEXT:    sw a0, 184(sp)
+; RV32-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
+; RV32-NEXT:    addi a0, sp, 384
+; RV32-NEXT:    vl4r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vslidedown.vi v8, v8, 6
+; RV32-NEXT:    vfmv.f.s fa0, v8
+; RV32-NEXT:    call llrintf at plt
+; RV32-NEXT:    sw a1, 180(sp)
+; RV32-NEXT:    sw a0, 176(sp)
+; RV32-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
+; RV32-NEXT:    addi a0, sp, 384
+; RV32-NEXT:    vl4r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vslidedown.vi v8, v8, 5
+; RV32-NEXT:    vfmv.f.s fa0, v8
+; RV32-NEXT:    call llrintf at plt
+; RV32-NEXT:    sw a1, 172(sp)
+; RV32-NEXT:    sw a0, 168(sp)
+; RV32-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
+; RV32-NEXT:    addi a0, sp, 384
+; RV32-NEXT:    vl4r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vslidedown.vi v8, v8, 4
+; RV32-NEXT:    vfmv.f.s fa0, v8
+; RV32-NEXT:    call llrintf at plt
+; RV32-NEXT:    sw a1, 164(sp)
+; RV32-NEXT:    sw a0, 160(sp)
+; RV32-NEXT:    li a0, 32
+; RV32-NEXT:    addi a1, sp, 128
+; RV32-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
+; RV32-NEXT:    vle32.v v8, (a1)
+; RV32-NEXT:    addi sp, s0, -400
+; RV32-NEXT:    lw ra, 396(sp) # 4-byte F...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/71055


More information about the llvm-commits mailing list