[llvm] 2b826df - [RISCV][rvv] setcc-fp-vp.ll - regenerate with missing riscv32/64 checks
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 21 05:57:34 PDT 2025
Author: Simon Pilgrim
Date: 2025-07-21T13:57:23+01:00
New Revision: 2b826dff01c93517f837bc665a30c21ac9308c8a
URL: https://github.com/llvm/llvm-project/commit/2b826dff01c93517f837bc665a30c21ac9308c8a
DIFF: https://github.com/llvm/llvm-project/commit/2b826dff01c93517f837bc665a30c21ac9308c8a.diff
LOG: [RISCV][rvv] setcc-fp-vp.ll - regenerate with missing riscv32/64 checks
Added:
Modified:
llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll
index 8495dfe350729..32892bca84747 100644
--- a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll
@@ -1,16 +1,16 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \
; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
-; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: --check-prefixes=CHECK,CHECK32,ZVFH
; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \
; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
-; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: --check-prefixes=CHECK,CHECK64,ZVFH
; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
-; RUN: --check-prefixes=CHECK,ZVFHMIN
+; RUN: --check-prefixes=CHECK,CHECK32,ZVFHMIN
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
-; RUN: --check-prefixes=CHECK,ZVFHMIN
+; RUN: --check-prefixes=CHECK,CHECK64,ZVFHMIN
declare <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat>, <vscale x 1 x bfloat>, metadata, <vscale x 1 x i1>, i32)
@@ -4820,6 +4820,427 @@ define <vscale x 8 x i1> @fcmp_uno_vf_swap_nxv8f64(<vscale x 8 x double> %va, do
declare <vscale x 32 x i1> @llvm.vp.fcmp.nxv32f64(<vscale x 32 x double>, <vscale x 32 x double>, metadata, <vscale x 32 x i1>, i32)
define <vscale x 32 x i1> @fcmp_oeq_vv_nxv32f64(<vscale x 32 x double> %va, <vscale x 32 x double> %vb, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK32-LABEL: fcmp_oeq_vv_nxv32f64:
+; CHECK32: # %bb.0:
+; CHECK32-NEXT: addi sp, sp, -48
+; CHECK32-NEXT: .cfi_def_cfa_offset 48
+; CHECK32-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; CHECK32-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; CHECK32-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; CHECK32-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
+; CHECK32-NEXT: sw s3, 28(sp) # 4-byte Folded Spill
+; CHECK32-NEXT: sw s4, 24(sp) # 4-byte Folded Spill
+; CHECK32-NEXT: .cfi_offset ra, -4
+; CHECK32-NEXT: .cfi_offset s0, -8
+; CHECK32-NEXT: .cfi_offset s1, -12
+; CHECK32-NEXT: .cfi_offset s2, -16
+; CHECK32-NEXT: .cfi_offset s3, -20
+; CHECK32-NEXT: .cfi_offset s4, -24
+; CHECK32-NEXT: csrr a1, vlenb
+; CHECK32-NEXT: slli a1, a1, 1
+; CHECK32-NEXT: mv a3, a1
+; CHECK32-NEXT: slli a1, a1, 2
+; CHECK32-NEXT: add a3, a3, a1
+; CHECK32-NEXT: slli a1, a1, 1
+; CHECK32-NEXT: add a1, a1, a3
+; CHECK32-NEXT: sub sp, sp, a1
+; CHECK32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x1a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 26 * vlenb
+; CHECK32-NEXT: mv s1, a6
+; CHECK32-NEXT: csrr a1, vlenb
+; CHECK32-NEXT: add a1, sp, a1
+; CHECK32-NEXT: addi a1, a1, 16
+; CHECK32-NEXT: vs1r.v v0, (a1) # vscale x 8-byte Folded Spill
+; CHECK32-NEXT: mv s3, a2
+; CHECK32-NEXT: mv s2, a0
+; CHECK32-NEXT: csrr a0, vlenb
+; CHECK32-NEXT: slli a1, a0, 3
+; CHECK32-NEXT: add a0, a1, a0
+; CHECK32-NEXT: add a0, sp, a0
+; CHECK32-NEXT: addi a0, a0, 16
+; CHECK32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; CHECK32-NEXT: csrr a0, vlenb
+; CHECK32-NEXT: slli a0, a0, 1
+; CHECK32-NEXT: mv a1, a0
+; CHECK32-NEXT: slli a0, a0, 3
+; CHECK32-NEXT: add a0, a0, a1
+; CHECK32-NEXT: add a0, sp, a0
+; CHECK32-NEXT: addi a0, a0, 16
+; CHECK32-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK32-NEXT: csrr s0, vlenb
+; CHECK32-NEXT: li a1, 24
+; CHECK32-NEXT: mv a0, s0
+; CHECK32-NEXT: call __mulsi3
+; CHECK32-NEXT: csrr a1, vlenb
+; CHECK32-NEXT: add a1, sp, a1
+; CHECK32-NEXT: addi a1, a1, 16
+; CHECK32-NEXT: vl1r.v v6, (a1) # vscale x 8-byte Folded Reload
+; CHECK32-NEXT: mv a1, a0
+; CHECK32-NEXT: slli a4, s0, 3
+; CHECK32-NEXT: srli s4, s0, 2
+; CHECK32-NEXT: srli a0, s0, 3
+; CHECK32-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
+; CHECK32-NEXT: vslidedown.vx v7, v6, s4
+; CHECK32-NEXT: add a2, s3, a4
+; CHECK32-NEXT: vl8re64.v v16, (a2)
+; CHECK32-NEXT: slli a6, s0, 4
+; CHECK32-NEXT: slli a2, s0, 1
+; CHECK32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
+; CHECK32-NEXT: vslidedown.vx v0, v6, a0
+; CHECK32-NEXT: mv a3, s1
+; CHECK32-NEXT: bltu s1, a2, .LBB257_2
+; CHECK32-NEXT: # %bb.1:
+; CHECK32-NEXT: mv a3, a2
+; CHECK32-NEXT: .LBB257_2:
+; CHECK32-NEXT: add a5, s3, a1
+; CHECK32-NEXT: add a1, s2, a4
+; CHECK32-NEXT: vslidedown.vx v9, v7, a0
+; CHECK32-NEXT: csrr a4, vlenb
+; CHECK32-NEXT: slli a7, a4, 4
+; CHECK32-NEXT: add a4, a7, a4
+; CHECK32-NEXT: add a4, sp, a4
+; CHECK32-NEXT: addi a4, a4, 16
+; CHECK32-NEXT: vs1r.v v9, (a4) # vscale x 8-byte Folded Spill
+; CHECK32-NEXT: add a4, s3, a6
+; CHECK32-NEXT: vl8re64.v v24, (s3)
+; CHECK32-NEXT: sub a6, a3, s0
+; CHECK32-NEXT: sltu a7, a3, a6
+; CHECK32-NEXT: addi a7, a7, -1
+; CHECK32-NEXT: and a6, a7, a6
+; CHECK32-NEXT: csrr a7, vlenb
+; CHECK32-NEXT: slli t0, a7, 3
+; CHECK32-NEXT: add a7, t0, a7
+; CHECK32-NEXT: add a7, sp, a7
+; CHECK32-NEXT: addi a7, a7, 16
+; CHECK32-NEXT: vl8r.v v8, (a7) # vscale x 64-byte Folded Reload
+; CHECK32-NEXT: vsetvli zero, a6, e64, m8, ta, ma
+; CHECK32-NEXT: vmfeq.vv v5, v8, v16, v0.t
+; CHECK32-NEXT: bltu a3, s0, .LBB257_4
+; CHECK32-NEXT: # %bb.3:
+; CHECK32-NEXT: mv a3, s0
+; CHECK32-NEXT: .LBB257_4:
+; CHECK32-NEXT: vmv1r.v v0, v6
+; CHECK32-NEXT: vl8re64.v v8, (a5)
+; CHECK32-NEXT: csrr a5, vlenb
+; CHECK32-NEXT: slli a6, a5, 3
+; CHECK32-NEXT: add a5, a6, a5
+; CHECK32-NEXT: add a5, sp, a5
+; CHECK32-NEXT: addi a5, a5, 16
+; CHECK32-NEXT: vs8r.v v8, (a5) # vscale x 64-byte Folded Spill
+; CHECK32-NEXT: csrr a5, vlenb
+; CHECK32-NEXT: slli a5, a5, 1
+; CHECK32-NEXT: mv a6, a5
+; CHECK32-NEXT: slli a5, a5, 3
+; CHECK32-NEXT: add a5, a5, a6
+; CHECK32-NEXT: add a5, sp, a5
+; CHECK32-NEXT: addi a5, a5, 16
+; CHECK32-NEXT: vl8r.v v16, (a5) # vscale x 64-byte Folded Reload
+; CHECK32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
+; CHECK32-NEXT: vmfeq.vv v8, v16, v24, v0.t
+; CHECK32-NEXT: vl8re64.v v16, (a1)
+; CHECK32-NEXT: csrr a1, vlenb
+; CHECK32-NEXT: add a1, sp, a1
+; CHECK32-NEXT: addi a1, a1, 16
+; CHECK32-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; CHECK32-NEXT: vl8re64.v v16, (a4)
+; CHECK32-NEXT: sub a1, s1, a2
+; CHECK32-NEXT: sltu a2, s1, a1
+; CHECK32-NEXT: vl8re64.v v24, (s2)
+; CHECK32-NEXT: addi a2, a2, -1
+; CHECK32-NEXT: and s1, a2, a1
+; CHECK32-NEXT: vsetvli zero, s4, e8, mf2, tu, ma
+; CHECK32-NEXT: vslideup.vx v8, v5, a0
+; CHECK32-NEXT: csrr a1, vlenb
+; CHECK32-NEXT: slli a1, a1, 1
+; CHECK32-NEXT: mv a2, a1
+; CHECK32-NEXT: slli a1, a1, 3
+; CHECK32-NEXT: add a1, a1, a2
+; CHECK32-NEXT: add a1, sp, a1
+; CHECK32-NEXT: addi a1, a1, 16
+; CHECK32-NEXT: vs1r.v v8, (a1) # vscale x 8-byte Folded Spill
+; CHECK32-NEXT: mv a1, s1
+; CHECK32-NEXT: bltu s1, s0, .LBB257_6
+; CHECK32-NEXT: # %bb.5:
+; CHECK32-NEXT: mv a1, s0
+; CHECK32-NEXT: .LBB257_6:
+; CHECK32-NEXT: vmv1r.v v0, v7
+; CHECK32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK32-NEXT: vmfeq.vv v8, v24, v16, v0.t
+; CHECK32-NEXT: addi a1, sp, 16
+; CHECK32-NEXT: vs1r.v v8, (a1) # vscale x 8-byte Folded Spill
+; CHECK32-NEXT: li a1, 3
+; CHECK32-NEXT: call __mulsi3
+; CHECK32-NEXT: csrr a1, vlenb
+; CHECK32-NEXT: slli a2, a1, 4
+; CHECK32-NEXT: add a1, a2, a1
+; CHECK32-NEXT: add a1, sp, a1
+; CHECK32-NEXT: addi a1, a1, 16
+; CHECK32-NEXT: vl1r.v v0, (a1) # vscale x 8-byte Folded Reload
+; CHECK32-NEXT: csrr a1, vlenb
+; CHECK32-NEXT: slli a1, a1, 1
+; CHECK32-NEXT: mv a2, a1
+; CHECK32-NEXT: slli a1, a1, 3
+; CHECK32-NEXT: add a1, a1, a2
+; CHECK32-NEXT: add a1, sp, a1
+; CHECK32-NEXT: addi a1, a1, 16
+; CHECK32-NEXT: vl1r.v v9, (a1) # vscale x 8-byte Folded Reload
+; CHECK32-NEXT: addi a1, sp, 16
+; CHECK32-NEXT: vl1r.v v8, (a1) # vscale x 8-byte Folded Reload
+; CHECK32-NEXT: vsetvli zero, a0, e8, mf2, tu, ma
+; CHECK32-NEXT: vslideup.vx v9, v8, s4
+; CHECK32-NEXT: sub a1, s1, s0
+; CHECK32-NEXT: sltu a2, s1, a1
+; CHECK32-NEXT: addi a2, a2, -1
+; CHECK32-NEXT: and a1, a2, a1
+; CHECK32-NEXT: csrr a2, vlenb
+; CHECK32-NEXT: slli a3, a2, 3
+; CHECK32-NEXT: add a2, a3, a2
+; CHECK32-NEXT: add a2, sp, a2
+; CHECK32-NEXT: addi a2, a2, 16
+; CHECK32-NEXT: vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
+; CHECK32-NEXT: csrr a2, vlenb
+; CHECK32-NEXT: add a2, sp, a2
+; CHECK32-NEXT: addi a2, a2, 16
+; CHECK32-NEXT: vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
+; CHECK32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK32-NEXT: vmfeq.vv v8, v24, v16, v0.t
+; CHECK32-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
+; CHECK32-NEXT: vslideup.vx v9, v8, a0
+; CHECK32-NEXT: vmv1r.v v0, v9
+; CHECK32-NEXT: csrr a0, vlenb
+; CHECK32-NEXT: slli a0, a0, 1
+; CHECK32-NEXT: mv a1, a0
+; CHECK32-NEXT: slli a0, a0, 2
+; CHECK32-NEXT: add a1, a1, a0
+; CHECK32-NEXT: slli a0, a0, 1
+; CHECK32-NEXT: add a0, a0, a1
+; CHECK32-NEXT: add sp, sp, a0
+; CHECK32-NEXT: .cfi_def_cfa sp, 48
+; CHECK32-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; CHECK32-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; CHECK32-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; CHECK32-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; CHECK32-NEXT: lw s3, 28(sp) # 4-byte Folded Reload
+; CHECK32-NEXT: lw s4, 24(sp) # 4-byte Folded Reload
+; CHECK32-NEXT: .cfi_restore ra
+; CHECK32-NEXT: .cfi_restore s0
+; CHECK32-NEXT: .cfi_restore s1
+; CHECK32-NEXT: .cfi_restore s2
+; CHECK32-NEXT: .cfi_restore s3
+; CHECK32-NEXT: .cfi_restore s4
+; CHECK32-NEXT: addi sp, sp, 48
+; CHECK32-NEXT: .cfi_def_cfa_offset 0
+; CHECK32-NEXT: ret
+;
+; CHECK64-LABEL: fcmp_oeq_vv_nxv32f64:
+; CHECK64: # %bb.0:
+; CHECK64-NEXT: addi sp, sp, -64
+; CHECK64-NEXT: .cfi_def_cfa_offset 64
+; CHECK64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
+; CHECK64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
+; CHECK64-NEXT: sd s1, 40(sp) # 8-byte Folded Spill
+; CHECK64-NEXT: sd s2, 32(sp) # 8-byte Folded Spill
+; CHECK64-NEXT: sd s3, 24(sp) # 8-byte Folded Spill
+; CHECK64-NEXT: sd s4, 16(sp) # 8-byte Folded Spill
+; CHECK64-NEXT: .cfi_offset ra, -8
+; CHECK64-NEXT: .cfi_offset s0, -16
+; CHECK64-NEXT: .cfi_offset s1, -24
+; CHECK64-NEXT: .cfi_offset s2, -32
+; CHECK64-NEXT: .cfi_offset s3, -40
+; CHECK64-NEXT: .cfi_offset s4, -48
+; CHECK64-NEXT: csrr a1, vlenb
+; CHECK64-NEXT: slli a1, a1, 1
+; CHECK64-NEXT: mv a3, a1
+; CHECK64-NEXT: slli a1, a1, 2
+; CHECK64-NEXT: add a3, a3, a1
+; CHECK64-NEXT: slli a1, a1, 1
+; CHECK64-NEXT: add a1, a1, a3
+; CHECK64-NEXT: sub sp, sp, a1
+; CHECK64-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x1a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 26 * vlenb
+; CHECK64-NEXT: mv s1, a6
+; CHECK64-NEXT: csrr a1, vlenb
+; CHECK64-NEXT: add a1, sp, a1
+; CHECK64-NEXT: addi a1, a1, 16
+; CHECK64-NEXT: vs1r.v v0, (a1) # vscale x 8-byte Folded Spill
+; CHECK64-NEXT: mv s3, a2
+; CHECK64-NEXT: mv s2, a0
+; CHECK64-NEXT: csrr a0, vlenb
+; CHECK64-NEXT: slli a1, a0, 3
+; CHECK64-NEXT: add a0, a1, a0
+; CHECK64-NEXT: add a0, sp, a0
+; CHECK64-NEXT: addi a0, a0, 16
+; CHECK64-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; CHECK64-NEXT: csrr a0, vlenb
+; CHECK64-NEXT: slli a0, a0, 1
+; CHECK64-NEXT: mv a1, a0
+; CHECK64-NEXT: slli a0, a0, 3
+; CHECK64-NEXT: add a0, a0, a1
+; CHECK64-NEXT: add a0, sp, a0
+; CHECK64-NEXT: addi a0, a0, 16
+; CHECK64-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK64-NEXT: csrr s0, vlenb
+; CHECK64-NEXT: li a1, 24
+; CHECK64-NEXT: mv a0, s0
+; CHECK64-NEXT: call __muldi3
+; CHECK64-NEXT: csrr a1, vlenb
+; CHECK64-NEXT: add a1, sp, a1
+; CHECK64-NEXT: addi a1, a1, 16
+; CHECK64-NEXT: vl1r.v v6, (a1) # vscale x 8-byte Folded Reload
+; CHECK64-NEXT: mv a1, a0
+; CHECK64-NEXT: slli a4, s0, 3
+; CHECK64-NEXT: srli s4, s0, 2
+; CHECK64-NEXT: srli a0, s0, 3
+; CHECK64-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
+; CHECK64-NEXT: vslidedown.vx v7, v6, s4
+; CHECK64-NEXT: add a2, s3, a4
+; CHECK64-NEXT: vl8re64.v v16, (a2)
+; CHECK64-NEXT: slli a6, s0, 4
+; CHECK64-NEXT: slli a2, s0, 1
+; CHECK64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
+; CHECK64-NEXT: vslidedown.vx v0, v6, a0
+; CHECK64-NEXT: mv a3, s1
+; CHECK64-NEXT: bltu s1, a2, .LBB257_2
+; CHECK64-NEXT: # %bb.1:
+; CHECK64-NEXT: mv a3, a2
+; CHECK64-NEXT: .LBB257_2:
+; CHECK64-NEXT: add a5, s3, a1
+; CHECK64-NEXT: add a1, s2, a4
+; CHECK64-NEXT: vslidedown.vx v9, v7, a0
+; CHECK64-NEXT: csrr a4, vlenb
+; CHECK64-NEXT: slli a7, a4, 4
+; CHECK64-NEXT: add a4, a7, a4
+; CHECK64-NEXT: add a4, sp, a4
+; CHECK64-NEXT: addi a4, a4, 16
+; CHECK64-NEXT: vs1r.v v9, (a4) # vscale x 8-byte Folded Spill
+; CHECK64-NEXT: add a4, s3, a6
+; CHECK64-NEXT: vl8re64.v v24, (s3)
+; CHECK64-NEXT: sub a6, a3, s0
+; CHECK64-NEXT: sltu a7, a3, a6
+; CHECK64-NEXT: addi a7, a7, -1
+; CHECK64-NEXT: and a6, a7, a6
+; CHECK64-NEXT: csrr a7, vlenb
+; CHECK64-NEXT: slli t0, a7, 3
+; CHECK64-NEXT: add a7, t0, a7
+; CHECK64-NEXT: add a7, sp, a7
+; CHECK64-NEXT: addi a7, a7, 16
+; CHECK64-NEXT: vl8r.v v8, (a7) # vscale x 64-byte Folded Reload
+; CHECK64-NEXT: vsetvli zero, a6, e64, m8, ta, ma
+; CHECK64-NEXT: vmfeq.vv v5, v8, v16, v0.t
+; CHECK64-NEXT: bltu a3, s0, .LBB257_4
+; CHECK64-NEXT: # %bb.3:
+; CHECK64-NEXT: mv a3, s0
+; CHECK64-NEXT: .LBB257_4:
+; CHECK64-NEXT: vmv1r.v v0, v6
+; CHECK64-NEXT: vl8re64.v v8, (a5)
+; CHECK64-NEXT: csrr a5, vlenb
+; CHECK64-NEXT: slli a6, a5, 3
+; CHECK64-NEXT: add a5, a6, a5
+; CHECK64-NEXT: add a5, sp, a5
+; CHECK64-NEXT: addi a5, a5, 16
+; CHECK64-NEXT: vs8r.v v8, (a5) # vscale x 64-byte Folded Spill
+; CHECK64-NEXT: csrr a5, vlenb
+; CHECK64-NEXT: slli a5, a5, 1
+; CHECK64-NEXT: mv a6, a5
+; CHECK64-NEXT: slli a5, a5, 3
+; CHECK64-NEXT: add a5, a5, a6
+; CHECK64-NEXT: add a5, sp, a5
+; CHECK64-NEXT: addi a5, a5, 16
+; CHECK64-NEXT: vl8r.v v16, (a5) # vscale x 64-byte Folded Reload
+; CHECK64-NEXT: vsetvli zero, a3, e64, m8, ta, ma
+; CHECK64-NEXT: vmfeq.vv v8, v16, v24, v0.t
+; CHECK64-NEXT: vl8re64.v v16, (a1)
+; CHECK64-NEXT: csrr a1, vlenb
+; CHECK64-NEXT: add a1, sp, a1
+; CHECK64-NEXT: addi a1, a1, 16
+; CHECK64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; CHECK64-NEXT: vl8re64.v v16, (a4)
+; CHECK64-NEXT: sub a1, s1, a2
+; CHECK64-NEXT: sltu a2, s1, a1
+; CHECK64-NEXT: vl8re64.v v24, (s2)
+; CHECK64-NEXT: addi a2, a2, -1
+; CHECK64-NEXT: and s1, a2, a1
+; CHECK64-NEXT: vsetvli zero, s4, e8, mf2, tu, ma
+; CHECK64-NEXT: vslideup.vx v8, v5, a0
+; CHECK64-NEXT: csrr a1, vlenb
+; CHECK64-NEXT: slli a1, a1, 1
+; CHECK64-NEXT: mv a2, a1
+; CHECK64-NEXT: slli a1, a1, 3
+; CHECK64-NEXT: add a1, a1, a2
+; CHECK64-NEXT: add a1, sp, a1
+; CHECK64-NEXT: addi a1, a1, 16
+; CHECK64-NEXT: vs1r.v v8, (a1) # vscale x 8-byte Folded Spill
+; CHECK64-NEXT: mv a1, s1
+; CHECK64-NEXT: bltu s1, s0, .LBB257_6
+; CHECK64-NEXT: # %bb.5:
+; CHECK64-NEXT: mv a1, s0
+; CHECK64-NEXT: .LBB257_6:
+; CHECK64-NEXT: vmv1r.v v0, v7
+; CHECK64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK64-NEXT: vmfeq.vv v8, v24, v16, v0.t
+; CHECK64-NEXT: addi a1, sp, 16
+; CHECK64-NEXT: vs1r.v v8, (a1) # vscale x 8-byte Folded Spill
+; CHECK64-NEXT: li a1, 3
+; CHECK64-NEXT: call __muldi3
+; CHECK64-NEXT: csrr a1, vlenb
+; CHECK64-NEXT: slli a2, a1, 4
+; CHECK64-NEXT: add a1, a2, a1
+; CHECK64-NEXT: add a1, sp, a1
+; CHECK64-NEXT: addi a1, a1, 16
+; CHECK64-NEXT: vl1r.v v0, (a1) # vscale x 8-byte Folded Reload
+; CHECK64-NEXT: csrr a1, vlenb
+; CHECK64-NEXT: slli a1, a1, 1
+; CHECK64-NEXT: mv a2, a1
+; CHECK64-NEXT: slli a1, a1, 3
+; CHECK64-NEXT: add a1, a1, a2
+; CHECK64-NEXT: add a1, sp, a1
+; CHECK64-NEXT: addi a1, a1, 16
+; CHECK64-NEXT: vl1r.v v9, (a1) # vscale x 8-byte Folded Reload
+; CHECK64-NEXT: addi a1, sp, 16
+; CHECK64-NEXT: vl1r.v v8, (a1) # vscale x 8-byte Folded Reload
+; CHECK64-NEXT: vsetvli zero, a0, e8, mf2, tu, ma
+; CHECK64-NEXT: vslideup.vx v9, v8, s4
+; CHECK64-NEXT: sub a1, s1, s0
+; CHECK64-NEXT: sltu a2, s1, a1
+; CHECK64-NEXT: addi a2, a2, -1
+; CHECK64-NEXT: and a1, a2, a1
+; CHECK64-NEXT: csrr a2, vlenb
+; CHECK64-NEXT: slli a3, a2, 3
+; CHECK64-NEXT: add a2, a3, a2
+; CHECK64-NEXT: add a2, sp, a2
+; CHECK64-NEXT: addi a2, a2, 16
+; CHECK64-NEXT: vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
+; CHECK64-NEXT: csrr a2, vlenb
+; CHECK64-NEXT: add a2, sp, a2
+; CHECK64-NEXT: addi a2, a2, 16
+; CHECK64-NEXT: vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
+; CHECK64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK64-NEXT: vmfeq.vv v8, v24, v16, v0.t
+; CHECK64-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
+; CHECK64-NEXT: vslideup.vx v9, v8, a0
+; CHECK64-NEXT: vmv1r.v v0, v9
+; CHECK64-NEXT: csrr a0, vlenb
+; CHECK64-NEXT: slli a0, a0, 1
+; CHECK64-NEXT: mv a1, a0
+; CHECK64-NEXT: slli a0, a0, 2
+; CHECK64-NEXT: add a1, a1, a0
+; CHECK64-NEXT: slli a0, a0, 1
+; CHECK64-NEXT: add a0, a0, a1
+; CHECK64-NEXT: add sp, sp, a0
+; CHECK64-NEXT: .cfi_def_cfa sp, 64
+; CHECK64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
+; CHECK64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
+; CHECK64-NEXT: ld s1, 40(sp) # 8-byte Folded Reload
+; CHECK64-NEXT: ld s2, 32(sp) # 8-byte Folded Reload
+; CHECK64-NEXT: ld s3, 24(sp) # 8-byte Folded Reload
+; CHECK64-NEXT: ld s4, 16(sp) # 8-byte Folded Reload
+; CHECK64-NEXT: .cfi_restore ra
+; CHECK64-NEXT: .cfi_restore s0
+; CHECK64-NEXT: .cfi_restore s1
+; CHECK64-NEXT: .cfi_restore s2
+; CHECK64-NEXT: .cfi_restore s3
+; CHECK64-NEXT: .cfi_restore s4
+; CHECK64-NEXT: addi sp, sp, 64
+; CHECK64-NEXT: .cfi_def_cfa_offset 0
+; CHECK64-NEXT: ret
%v = call <vscale x 32 x i1> @llvm.vp.fcmp.nxv32f64(<vscale x 32 x double> %va, <vscale x 32 x double> %vb, metadata !"oeq", <vscale x 32 x i1> %m, i32 %evl)
ret <vscale x 32 x i1> %v
}
More information about the llvm-commits
mailing list