[llvm] [RISCV] Lower mgather/mscatter for zvfhmin/zvfbfmin (PR #114945)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 4 23:31:41 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Luke Lau (lukel97)
<details>
<summary>Changes</summary>
In preparation for allowing zvfhmin and zvfbfmin in isLegalElementTypeForRVV, this lowers masked gathers and scatters.
We need to mark f16 and bf16 as legal in isLegalMaskedGatherScatter otherwise ScalarizeMaskedMemIntrin will just scalarize them, but we can move this back into isLegalElementTypeForRVV afterwards.
The scalarized codegen required #<!-- -->114938, #<!-- -->114927 and #<!-- -->114915 to not crash.
---
Patch is 691.34 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/114945.diff
4 Files Affected:
- (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+4-4)
- (modified) llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h (+6-1)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll (+2861-1223)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll (+3471-1804)
``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 3b6dd0c11bbf90..613cb01667d044 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1339,9 +1339,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::VECTOR_SHUFFLE, ISD::VECTOR_COMPRESS},
VT, Custom);
- // FIXME: mload, mstore, mgather, mscatter, vp_gather/scatter can be
+ // FIXME: mload, mstore, vp_gather/scatter can be
// hoisted to here.
- setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
+ setOperationAction({ISD::LOAD, ISD::STORE, ISD::MGATHER, ISD::MSCATTER},
+ VT, Custom);
setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
ISD::EXPERIMENTAL_VP_STRIDED_STORE},
@@ -1408,8 +1409,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction({ISD::BUILD_VECTOR, ISD::SCALAR_TO_VECTOR}, VT,
Custom);
- setOperationAction(
- {ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, VT, Custom);
+ setOperationAction({ISD::MLOAD, ISD::MSTORE}, VT, Custom);
setOperationAction({ISD::VP_GATHER, ISD::VP_SCATTER}, VT, Custom);
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index 29a6c68a6c585a..7d1e9007adc0df 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -270,7 +270,12 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
if (!ST->enableUnalignedVectorMem() && Alignment < ElemType.getStoreSize())
return false;
- return TLI->isLegalElementTypeForRVV(ElemType);
+ // TODO: Move bf16/f16 support into isLegalElementTypeForRVV
+ return TLI->isLegalElementTypeForRVV(ElemType) ||
+ (DataTypeVT.getVectorElementType() == MVT::bf16 &&
+ ST->hasVInstructionsBF16Minimal()) ||
+ (DataTypeVT.getVectorElementType() == MVT::f16 &&
+ ST->hasVInstructionsF16Minimal());
}
bool isLegalMaskedGather(Type *DataType, Align Alignment) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
index 5802f45d311b37..5a7b512e4ea5f5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
@@ -1,12 +1,21 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zvfh,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32V
-; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zvfh,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64V
-; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zvfh,+zve32f,+zvl128b -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32ZVE32F
-; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zvfh,+zve32f,+zvl128b -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZVE32F
+; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32V,RV32V-ZVFH
+; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64V,RV64V-ZVFH
+; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32V,RV32V-ZVFHMIN
+; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64V,RV64V-ZVFHMIN
+
+; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zvfh,+zfbfmin,+zvfbfmin,+zve32f,+zvl128b -target-abi=ilp32d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32ZVE32F,RV32ZVE32F-ZVFH
+; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zvfh,+zfbfmin,+zvfbfmin,+zve32f,+zvl128b -target-abi=lp64d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZVE32F,RV64ZVE32F-ZVFH
+; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+zve32f,+zvl128b -target-abi=ilp32d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32ZVE32F,RV32ZVE32F-ZVFHMIN
+; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+zve32f,+zvl128b -target-abi=lp64d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZVE32F,RV64ZVE32F-ZVFHMIN
declare <1 x i8> @llvm.masked.gather.v1i8.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x i8>)
@@ -6874,31 +6883,31 @@ define <8 x i64> @mgather_baseidx_v8i64(ptr %base, <8 x i64> %idxs, <8 x i1> %m,
ret <8 x i64> %v
}
-declare <1 x half> @llvm.masked.gather.v1f16.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x half>)
+declare <1 x bfloat> @llvm.masked.gather.v1bf16.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x bfloat>)
-define <1 x half> @mgather_v1f16(<1 x ptr> %ptrs, <1 x i1> %m, <1 x half> %passthru) {
-; RV32V-LABEL: mgather_v1f16:
+define <1 x bfloat> @mgather_v1bf16(<1 x ptr> %ptrs, <1 x i1> %m, <1 x bfloat> %passthru) {
+; RV32V-LABEL: mgather_v1bf16:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 1, e16, mf4, ta, mu
; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32V-NEXT: vmv1r.v v8, v9
; RV32V-NEXT: ret
;
-; RV64V-LABEL: mgather_v1f16:
+; RV64V-LABEL: mgather_v1bf16:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 1, e16, mf4, ta, mu
; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
; RV64V-NEXT: vmv1r.v v8, v9
; RV64V-NEXT: ret
;
-; RV32ZVE32F-LABEL: mgather_v1f16:
+; RV32ZVE32F-LABEL: mgather_v1bf16:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu
; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32ZVE32F-NEXT: vmv1r.v v8, v9
; RV32ZVE32F-NEXT: ret
;
-; RV64ZVE32F-LABEL: mgather_v1f16:
+; RV64ZVE32F-LABEL: mgather_v1bf16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vfirst.m a1, v0
@@ -6908,35 +6917,35 @@ define <1 x half> @mgather_v1f16(<1 x ptr> %ptrs, <1 x i1> %m, <1 x half> %passt
; RV64ZVE32F-NEXT: vle16.v v8, (a0)
; RV64ZVE32F-NEXT: .LBB58_2: # %else
; RV64ZVE32F-NEXT: ret
- %v = call <1 x half> @llvm.masked.gather.v1f16.v1p0(<1 x ptr> %ptrs, i32 2, <1 x i1> %m, <1 x half> %passthru)
- ret <1 x half> %v
+ %v = call <1 x bfloat> @llvm.masked.gather.v1bf16.v1p0(<1 x ptr> %ptrs, i32 2, <1 x i1> %m, <1 x bfloat> %passthru)
+ ret <1 x bfloat> %v
}
-declare <2 x half> @llvm.masked.gather.v2f16.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x half>)
+declare <2 x bfloat> @llvm.masked.gather.v2bf16.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x bfloat>)
-define <2 x half> @mgather_v2f16(<2 x ptr> %ptrs, <2 x i1> %m, <2 x half> %passthru) {
-; RV32V-LABEL: mgather_v2f16:
+define <2 x bfloat> @mgather_v2bf16(<2 x ptr> %ptrs, <2 x i1> %m, <2 x bfloat> %passthru) {
+; RV32V-LABEL: mgather_v2bf16:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32V-NEXT: vmv1r.v v8, v9
; RV32V-NEXT: ret
;
-; RV64V-LABEL: mgather_v2f16:
+; RV64V-LABEL: mgather_v2bf16:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
; RV64V-NEXT: vmv1r.v v8, v9
; RV64V-NEXT: ret
;
-; RV32ZVE32F-LABEL: mgather_v2f16:
+; RV32ZVE32F-LABEL: mgather_v2bf16:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu
; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32ZVE32F-NEXT: vmv1r.v v8, v9
; RV32ZVE32F-NEXT: ret
;
-; RV64ZVE32F-LABEL: mgather_v2f16:
+; RV64ZVE32F-LABEL: mgather_v2bf16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
@@ -6948,40 +6957,40 @@ define <2 x half> @mgather_v2f16(<2 x ptr> %ptrs, <2 x i1> %m, <2 x half> %passt
; RV64ZVE32F-NEXT: .LBB59_2: # %else2
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB59_3: # %cond.load
-; RV64ZVE32F-NEXT: flh fa5, 0(a0)
+; RV64ZVE32F-NEXT: lh a0, 0(a0)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
+; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB59_2
; RV64ZVE32F-NEXT: .LBB59_4: # %cond.load1
-; RV64ZVE32F-NEXT: flh fa5, 0(a1)
+; RV64ZVE32F-NEXT: lh a0, 0(a1)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
+; RV64ZVE32F-NEXT: vmv.s.x v9, a0
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: ret
- %v = call <2 x half> @llvm.masked.gather.v2f16.v2p0(<2 x ptr> %ptrs, i32 2, <2 x i1> %m, <2 x half> %passthru)
- ret <2 x half> %v
+ %v = call <2 x bfloat> @llvm.masked.gather.v2bf16.v2p0(<2 x ptr> %ptrs, i32 2, <2 x i1> %m, <2 x bfloat> %passthru)
+ ret <2 x bfloat> %v
}
-declare <4 x half> @llvm.masked.gather.v4f16.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x half>)
+declare <4 x bfloat> @llvm.masked.gather.v4bf16.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x bfloat>)
-define <4 x half> @mgather_v4f16(<4 x ptr> %ptrs, <4 x i1> %m, <4 x half> %passthru) {
-; RV32-LABEL: mgather_v4f16:
+define <4 x bfloat> @mgather_v4bf16(<4 x ptr> %ptrs, <4 x i1> %m, <4 x bfloat> %passthru) {
+; RV32-LABEL: mgather_v4bf16:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
-; RV64V-LABEL: mgather_v4f16:
+; RV64V-LABEL: mgather_v4bf16:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
; RV64V-NEXT: vluxei64.v v10, (zero), v8, v0.t
; RV64V-NEXT: vmv1r.v v8, v10
; RV64V-NEXT: ret
;
-; RV64ZVE32F-LABEL: mgather_v4f16:
+; RV64ZVE32F-LABEL: mgather_v4bf16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
@@ -7000,110 +7009,110 @@ define <4 x half> @mgather_v4f16(<4 x ptr> %ptrs, <4 x i1> %m, <4 x half> %passt
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB60_5: # %cond.load
; RV64ZVE32F-NEXT: ld a2, 0(a0)
-; RV64ZVE32F-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
+; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB60_2
; RV64ZVE32F-NEXT: .LBB60_6: # %cond.load1
; RV64ZVE32F-NEXT: ld a2, 8(a0)
-; RV64ZVE32F-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
+; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: beqz a2, .LBB60_3
; RV64ZVE32F-NEXT: .LBB60_7: # %cond.load4
; RV64ZVE32F-NEXT: ld a2, 16(a0)
-; RV64ZVE32F-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, mf2, tu, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
+; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
; RV64ZVE32F-NEXT: andi a1, a1, 8
; RV64ZVE32F-NEXT: beqz a1, .LBB60_4
; RV64ZVE32F-NEXT: .LBB60_8: # %cond.load7
; RV64ZVE32F-NEXT: ld a0, 24(a0)
-; RV64ZVE32F-NEXT: flh fa5, 0(a0)
+; RV64ZVE32F-NEXT: lh a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
+; RV64ZVE32F-NEXT: vmv.s.x v9, a0
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
; RV64ZVE32F-NEXT: ret
- %v = call <4 x half> @llvm.masked.gather.v4f16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> %m, <4 x half> %passthru)
- ret <4 x half> %v
+ %v = call <4 x bfloat> @llvm.masked.gather.v4bf16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> %m, <4 x bfloat> %passthru)
+ ret <4 x bfloat> %v
}
-define <4 x half> @mgather_truemask_v4f16(<4 x ptr> %ptrs, <4 x half> %passthru) {
-; RV32-LABEL: mgather_truemask_v4f16:
+define <4 x bfloat> @mgather_truemask_v4bf16(<4 x ptr> %ptrs, <4 x bfloat> %passthru) {
+; RV32-LABEL: mgather_truemask_v4bf16:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; RV32-NEXT: vluxei32.v v9, (zero), v8
; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
-; RV64V-LABEL: mgather_truemask_v4f16:
+; RV64V-LABEL: mgather_truemask_v4bf16:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; RV64V-NEXT: vluxei64.v v10, (zero), v8
; RV64V-NEXT: vmv1r.v v8, v10
; RV64V-NEXT: ret
;
-; RV64ZVE32F-LABEL: mgather_truemask_v4f16:
+; RV64ZVE32F-LABEL: mgather_truemask_v4bf16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: ld a1, 0(a0)
; RV64ZVE32F-NEXT: ld a2, 8(a0)
; RV64ZVE32F-NEXT: ld a3, 16(a0)
; RV64ZVE32F-NEXT: ld a0, 24(a0)
-; RV64ZVE32F-NEXT: flh fa5, 0(a1)
-; RV64ZVE32F-NEXT: flh fa4, 0(a2)
-; RV64ZVE32F-NEXT: flh fa3, 0(a3)
-; RV64ZVE32F-NEXT: flh fa2, 0(a0)
+; RV64ZVE32F-NEXT: lh a1, 0(a1)
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
+; RV64ZVE32F-NEXT: lh a3, 0(a3)
+; RV64ZVE32F-NEXT: lh a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vfmv.v.f v8, fa5
-; RV64ZVE32F-NEXT: vfslide1down.vf v8, v8, fa4
-; RV64ZVE32F-NEXT: vfslide1down.vf v8, v8, fa3
-; RV64ZVE32F-NEXT: vfslide1down.vf v8, v8, fa2
+; RV64ZVE32F-NEXT: vmv.v.x v8, a1
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0
; RV64ZVE32F-NEXT: ret
- %v = call <4 x half> @llvm.masked.gather.v4f16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> splat (i1 1), <4 x half> %passthru)
- ret <4 x half> %v
+ %v = call <4 x bfloat> @llvm.masked.gather.v4bf16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> splat (i1 1), <4 x bfloat> %passthru)
+ ret <4 x bfloat> %v
}
-define <4 x half> @mgather_falsemask_v4f16(<4 x ptr> %ptrs, <4 x half> %passthru) {
-; RV32-LABEL: mgather_falsemask_v4f16:
+define <4 x bfloat> @mgather_falsemask_v4bf16(<4 x ptr> %ptrs, <4 x bfloat> %passthru) {
+; RV32-LABEL: mgather_falsemask_v4bf16:
; RV32: # %bb.0:
; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
-; RV64V-LABEL: mgather_falsemask_v4f16:
+; RV64V-LABEL: mgather_falsemask_v4bf16:
; RV64V: # %bb.0:
; RV64V-NEXT: vmv1r.v v8, v10
; RV64V-NEXT: ret
;
-; RV64ZVE32F-LABEL: mgather_falsemask_v4f16:
+; RV64ZVE32F-LABEL: mgather_falsemask_v4bf16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: ret
- %v = call <4 x half> @llvm.masked.gather.v4f16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> zeroinitializer, <4 x half> %passthru)
- ret <4 x half> %v
+ %v = call <4 x bfloat> @llvm.masked.gather.v4bf16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> zeroinitializer, <4 x bfloat> %passthru)
+ ret <4 x bfloat> %v
}
-declare <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x half>)
+declare <8 x bfloat> @llvm.masked.gather.v8bf16.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x bfloat>)
-define <8 x half> @mgather_v8f16(<8 x ptr> %ptrs, <8 x i1> %m, <8 x half> %passthru) {
-; RV32-LABEL: mgather_v8f16:
+define <8 x bfloat> @mgather_v8bf16(<8 x ptr> %ptrs, <8 x i1> %m, <8 x bfloat> %passthru) {
+; RV32-LABEL: mgather_v8bf16:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu
; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t
; RV32-NEXT: vmv.v.v v8, v10
; RV32-NEXT: ret
;
-; RV64V-LABEL: mgather_v8f16:
+; RV64V-LABEL: mgather_v8bf16:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, mu
; RV64V-NEXT: vluxei64.v v12, (zero), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v12
; RV64V-NEXT: ret
;
-; RV64ZVE32F-LABEL: mgather_v8f16:
+; RV64ZVE32F-LABEL: mgather_v8bf16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
@@ -7134,73 +7143,73 @@ define <8 x half> @mgather_v8f16(<8 x ptr> %ptrs, <8 x i1> %m, <8 x half> %passt
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB63_9: # %cond.load
; RV64ZVE32F-NEXT: ld a2, 0(a0)
-; RV64ZVE32F-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
+; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB63_2
; RV64ZVE32F-NEXT: .LBB63_10: # %cond.load1
; RV64ZVE32F-NEXT: ld a2, 8(a0)
-; RV64ZVE32F-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
+; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: beqz a2, .LBB63_3
; RV64ZVE32F-NEXT: .LBB63_11: # %cond.load4
; RV64ZVE32F-NEXT: ld a2, 16(a0)
-; RV64ZVE32F-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
+; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB63_4
; RV64ZVE32F-NEXT: .LBB63_12: # %cond.load7
; RV64ZVE32F-NEXT: ld a2, 24(a0)
-; RV64ZVE32F-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
+; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB63_5
; RV64ZVE32F-NEXT: .LBB63_13: # %cond.load10
; RV64ZVE32F-NEXT: ld a2, 32(a0)
-; RV64ZVE32F-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
+; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB63_6
; RV64ZVE32F-NEXT: .LBB63_14: # %cond.load13
; RV64ZVE32F-NEXT: ld a2, 40(a0)
-; RV64ZVE32F-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
+; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 5
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: beqz a2, .LBB63_7
; RV64ZVE32F-NEXT: .LBB63_15: # %cond.load16
; RV64ZVE32F-NEXT: ld a2, 48(a0)
-; RV64ZVE32F-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
+; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB63_8
; RV64ZVE32F-NEXT: .LBB63_16: # %cond.load19
; RV64ZVE32F-NEXT: ld a0, 56(a0)
-; RV64ZVE32F-NEXT: flh fa5, 0(a0)
+; RV64ZVE32F-NEXT: lh a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
+; RV64ZVE32F-NEXT: vmv.s.x v9, a0
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 7
; RV64ZVE32F-NEXT: ret
- %v = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x half> %passthru)
- ret <8 x half> %v
+ %v = call <8 x bfloat> @llvm.masked.gather.v8bf16.v8p0(<8 x ptr> %ptrs, i32 ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/114945
More information about the llvm-commits
mailing list