[llvm-branch-commits] [llvm] release/22.x: [RISCV] Fix i64 gather/scatter cost on rv32 (#176105) (PR #176899)

Tue Jan 20 03:27:59 PST 2026

llvmbot wrote:




@llvm/pr-subscribers-llvm-analysis

Author: None (llvmbot)

<details>
<summary>Changes</summary>

Backport 3ad6d350c44f54482a86a7eb488732093eaed372 0c1257cd46456513016b106d964dc5ad47c6289b

Requested by: @lukel97

---

Patch is 119.71 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/176899.diff


5 Files Affected:

- (modified) llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp (+2-5) 
- (added) llvm/test/Analysis/CostModel/RISCV/scalable-gather-zve32f.ll (+112) 
- (modified) llvm/test/Analysis/CostModel/RISCV/scalable-gather.ll (+97-114) 
- (added) llvm/test/Analysis/CostModel/RISCV/scalable-scatter-zve32f.ll (+125) 
- (modified) llvm/test/Analysis/CostModel/RISCV/scalable-scatter.ll (+100-113) 


``````````diff

diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index e812d092c3ea0..c0119325285c7 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1184,7 +1184,6 @@ RISCVTTIImpl::getGatherScatterOpCost(const MemIntrinsicCostAttributes &MICA,
   unsigned Opcode = IsLoad ? Instruction::Load : Instruction::Store;
   Type *DataTy = MICA.getDataType();
   Align Alignment = MICA.getAlignment();
-  const Instruction *I = MICA.getInst();
   if (CostKind != TTI::TCK_RecipThroughput)
     return BaseT::getMemIntrinsicInstrCost(MICA, CostKind);
 
@@ -1198,11 +1197,8 @@ RISCVTTIImpl::getGatherScatterOpCost(const MemIntrinsicCostAttributes &MICA,
   // scalable vectors, we use an estimate on that number since we don't
   // know exactly what VL will be.
   auto &VTy = *cast<VectorType>(DataTy);
-  InstructionCost MemOpCost =
-      getMemoryOpCost(Opcode, VTy.getElementType(), Alignment, 0, CostKind,
-                      {TTI::OK_AnyValue, TTI::OP_None}, I);
   unsigned NumLoads = getEstimatedVLFor(&VTy);
-  return NumLoads * MemOpCost;
+  return NumLoads * TTI::TCC_Basic;
 }
 
 InstructionCost RISCVTTIImpl::getExpandCompressMemoryOpCost(
@@ -1269,6 +1265,7 @@ RISCVTTIImpl::getStridedMemoryOpCost(const MemIntrinsicCostAttributes &MICA,
   // Cost is proportional to the number of memory operations implied.  For
   // scalable vectors, we use an estimate on that number since we don't
   // know exactly what VL will be.
+  // FIXME: This will overcost for i64 on rv32 with +zve64x.
   auto &VTy = *cast<VectorType>(DataTy);
   InstructionCost MemOpCost =
       getMemoryOpCost(Opcode, VTy.getElementType(), Alignment, 0, CostKind,
diff --git a/llvm/test/Analysis/CostModel/RISCV/scalable-gather-zve32f.ll b/llvm/test/Analysis/CostModel/RISCV/scalable-gather-zve32f.ll
new file mode 100644
index 0000000000000..20749d07c44fa
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/RISCV/scalable-gather-zve32f.ll
@@ -0,0 +1,112 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv32 -mattr=+zve32f,+zvl128b < %s | FileCheck %s --check-prefixes=RV32
+; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+zve32f,+zvl128b < %s | FileCheck %s --check-prefixes=RV64
+
+define void @masked_gather() {
+; RV32-LABEL: 'masked_gather'
+; RV32-NEXT:  Cost Model: Invalid cost for instruction: %V8F64 = call <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> align 8 undef, <vscale x 8 x i1> undef, <vscale x 8 x double> undef)
+; RV32-NEXT:  Cost Model: Invalid cost for instruction: %V4F64 = call <vscale x 4 x double> @llvm.masked.gather.nxv4f64.nxv4p0(<vscale x 4 x ptr> align 8 undef, <vscale x 4 x i1> undef, <vscale x 4 x double> undef)
+; RV32-NEXT:  Cost Model: Invalid cost for instruction: %V2F64 = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0(<vscale x 2 x ptr> align 8 undef, <vscale x 2 x i1> undef, <vscale x 2 x double> undef)
+; RV32-NEXT:  Cost Model: Invalid cost for instruction: %V1F64 = call <vscale x 1 x double> @llvm.masked.gather.nxv1f64.nxv1p0(<vscale x 1 x ptr> align 8 undef, <vscale x 1 x i1> undef, <vscale x 1 x double> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <vscale x 16 x float> @llvm.masked.gather.nxv16f32.nxv16p0(<vscale x 16 x ptr> align 4 undef, <vscale x 16 x i1> undef, <vscale x 16 x float> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <vscale x 8 x float> @llvm.masked.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> align 4 undef, <vscale x 8 x i1> undef, <vscale x 8 x float> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> align 4 undef, <vscale x 4 x i1> undef, <vscale x 4 x float> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32.nxv2p0(<vscale x 2 x ptr> align 4 undef, <vscale x 2 x i1> undef, <vscale x 2 x float> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = call <vscale x 1 x float> @llvm.masked.gather.nxv1f32.nxv1p0(<vscale x 1 x ptr> align 4 undef, <vscale x 1 x i1> undef, <vscale x 1 x float> undef)
+; RV32-NEXT:  Cost Model: Invalid cost for instruction: %V8I64 = call <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> align 8 undef, <vscale x 8 x i1> undef, <vscale x 8 x i64> undef)
+; RV32-NEXT:  Cost Model: Invalid cost for instruction: %V4I64 = call <vscale x 4 x i64> @llvm.masked.gather.nxv4i64.nxv4p0(<vscale x 4 x ptr> align 8 undef, <vscale x 4 x i1> undef, <vscale x 4 x i64> undef)
+; RV32-NEXT:  Cost Model: Invalid cost for instruction: %V2I64 = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> align 8 undef, <vscale x 2 x i1> undef, <vscale x 2 x i64> undef)
+; RV32-NEXT:  Cost Model: Invalid cost for instruction: %V1I64 = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> align 8 undef, <vscale x 1 x i1> undef, <vscale x 1 x i64> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <vscale x 16 x i32> @llvm.masked.gather.nxv16i32.nxv16p0(<vscale x 16 x ptr> align 4 undef, <vscale x 16 x i1> undef, <vscale x 16 x i32> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> align 4 undef, <vscale x 8 x i1> undef, <vscale x 8 x i32> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> align 4 undef, <vscale x 4 x i1> undef, <vscale x 4 x i32> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32.nxv2p0(<vscale x 2 x ptr> align 4 undef, <vscale x 2 x i1> undef, <vscale x 2 x i32> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1I32 = call <vscale x 1 x i32> @llvm.masked.gather.nxv1i32.nxv1p0(<vscale x 1 x ptr> align 4 undef, <vscale x 1 x i1> undef, <vscale x 1 x i32> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call <vscale x 32 x i16> @llvm.masked.gather.nxv32i16.nxv32p0(<vscale x 32 x ptr> align 2 undef, <vscale x 32 x i1> undef, <vscale x 32 x i16> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <vscale x 16 x i16> @llvm.masked.gather.nxv16i16.nxv16p0(<vscale x 16 x ptr> align 2 undef, <vscale x 16 x i1> undef, <vscale x 16 x i16> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <vscale x 8 x i16> @llvm.masked.gather.nxv8i16.nxv8p0(<vscale x 8 x ptr> align 2 undef, <vscale x 8 x i1> undef, <vscale x 8 x i16> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> align 2 undef, <vscale x 4 x i1> undef, <vscale x 4 x i16> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16.nxv2p0(<vscale x 2 x ptr> align 2 undef, <vscale x 2 x i1> undef, <vscale x 2 x i16> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1I16 = call <vscale x 1 x i16> @llvm.masked.gather.nxv1i16.nxv1p0(<vscale x 1 x ptr> align 2 undef, <vscale x 1 x i1> undef, <vscale x 1 x i16> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V64I8 = call <vscale x 64 x i8> @llvm.masked.gather.nxv64i8.nxv64p0(<vscale x 64 x ptr> align 1 undef, <vscale x 64 x i1> undef, <vscale x 64 x i8> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32I8 = call <vscale x 32 x i8> @llvm.masked.gather.nxv32i8.nxv32p0(<vscale x 32 x ptr> align 1 undef, <vscale x 32 x i1> undef, <vscale x 32 x i8> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <vscale x 16 x i8> @llvm.masked.gather.nxv16i8.nxv16p0(<vscale x 16 x ptr> align 1 undef, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <vscale x 8 x i8> @llvm.masked.gather.nxv8i8.nxv8p0(<vscale x 8 x ptr> align 1 undef, <vscale x 8 x i1> undef, <vscale x 8 x i8> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I8 = call <vscale x 4 x i8> @llvm.masked.gather.nxv4i8.nxv4p0(<vscale x 4 x ptr> align 1 undef, <vscale x 4 x i1> undef, <vscale x 4 x i8> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> align 1 undef, <vscale x 2 x i1> undef, <vscale x 2 x i8> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1I8 = call <vscale x 1 x i8> @llvm.masked.gather.nxv1i8.nxv1p0(<vscale x 1 x ptr> align 1 undef, <vscale x 1 x i1> undef, <vscale x 1 x i8> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; RV64-LABEL: 'masked_gather'
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: %V8F64 = call <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> align 8 undef, <vscale x 8 x i1> undef, <vscale x 8 x double> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: %V4F64 = call <vscale x 4 x double> @llvm.masked.gather.nxv4f64.nxv4p0(<vscale x 4 x ptr> align 8 undef, <vscale x 4 x i1> undef, <vscale x 4 x double> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: %V2F64 = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0(<vscale x 2 x ptr> align 8 undef, <vscale x 2 x i1> undef, <vscale x 2 x double> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: %V1F64 = call <vscale x 1 x double> @llvm.masked.gather.nxv1f64.nxv1p0(<vscale x 1 x ptr> align 8 undef, <vscale x 1 x i1> undef, <vscale x 1 x double> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: %V16F32 = call <vscale x 16 x float> @llvm.masked.gather.nxv16f32.nxv16p0(<vscale x 16 x ptr> align 4 undef, <vscale x 16 x i1> undef, <vscale x 16 x float> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: %V8F32 = call <vscale x 8 x float> @llvm.masked.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> align 4 undef, <vscale x 8 x i1> undef, <vscale x 8 x float> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: %V4F32 = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> align 4 undef, <vscale x 4 x i1> undef, <vscale x 4 x float> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: %V2F32 = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32.nxv2p0(<vscale x 2 x ptr> align 4 undef, <vscale x 2 x i1> undef, <vscale x 2 x float> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: %V1F32 = call <vscale x 1 x float> @llvm.masked.gather.nxv1f32.nxv1p0(<vscale x 1 x ptr> align 4 undef, <vscale x 1 x i1> undef, <vscale x 1 x float> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: %V8I64 = call <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> align 8 undef, <vscale x 8 x i1> undef, <vscale x 8 x i64> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: %V4I64 = call <vscale x 4 x i64> @llvm.masked.gather.nxv4i64.nxv4p0(<vscale x 4 x ptr> align 8 undef, <vscale x 4 x i1> undef, <vscale x 4 x i64> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: %V2I64 = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> align 8 undef, <vscale x 2 x i1> undef, <vscale x 2 x i64> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: %V1I64 = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> align 8 undef, <vscale x 1 x i1> undef, <vscale x 1 x i64> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: %V16I32 = call <vscale x 16 x i32> @llvm.masked.gather.nxv16i32.nxv16p0(<vscale x 16 x ptr> align 4 undef, <vscale x 16 x i1> undef, <vscale x 16 x i32> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: %V8I32 = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> align 4 undef, <vscale x 8 x i1> undef, <vscale x 8 x i32> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: %V4I32 = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> align 4 undef, <vscale x 4 x i1> undef, <vscale x 4 x i32> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: %V2I32 = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32.nxv2p0(<vscale x 2 x ptr> align 4 undef, <vscale x 2 x i1> undef, <vscale x 2 x i32> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: %V1I32 = call <vscale x 1 x i32> @llvm.masked.gather.nxv1i32.nxv1p0(<vscale x 1 x ptr> align 4 undef, <vscale x 1 x i1> undef, <vscale x 1 x i32> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: %V32I16 = call <vscale x 32 x i16> @llvm.masked.gather.nxv32i16.nxv32p0(<vscale x 32 x ptr> align 2 undef, <vscale x 32 x i1> undef, <vscale x 32 x i16> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: %V16I16 = call <vscale x 16 x i16> @llvm.masked.gather.nxv16i16.nxv16p0(<vscale x 16 x ptr> align 2 undef, <vscale x 16 x i1> undef, <vscale x 16 x i16> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: %V8I16 = call <vscale x 8 x i16> @llvm.masked.gather.nxv8i16.nxv8p0(<vscale x 8 x ptr> align 2 undef, <vscale x 8 x i1> undef, <vscale x 8 x i16> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: %V4I16 = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> align 2 undef, <vscale x 4 x i1> undef, <vscale x 4 x i16> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: %V2I16 = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16.nxv2p0(<vscale x 2 x ptr> align 2 undef, <vscale x 2 x i1> undef, <vscale x 2 x i16> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: %V1I16 = call <vscale x 1 x i16> @llvm.masked.gather.nxv1i16.nxv1p0(<vscale x 1 x ptr> align 2 undef, <vscale x 1 x i1> undef, <vscale x 1 x i16> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: %V64I8 = call <vscale x 64 x i8> @llvm.masked.gather.nxv64i8.nxv64p0(<vscale x 64 x ptr> align 1 undef, <vscale x 64 x i1> undef, <vscale x 64 x i8> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: %V32I8 = call <vscale x 32 x i8> @llvm.masked.gather.nxv32i8.nxv32p0(<vscale x 32 x ptr> align 1 undef, <vscale x 32 x i1> undef, <vscale x 32 x i8> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: %V16I8 = call <vscale x 16 x i8> @llvm.masked.gather.nxv16i8.nxv16p0(<vscale x 16 x ptr> align 1 undef, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: %V8I8 = call <vscale x 8 x i8> @llvm.masked.gather.nxv8i8.nxv8p0(<vscale x 8 x ptr> align 1 undef, <vscale x 8 x i1> undef, <vscale x 8 x i8> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: %V4I8 = call <vscale x 4 x i8> @llvm.masked.gather.nxv4i8.nxv4p0(<vscale x 4 x ptr> align 1 undef, <vscale x 4 x i1> undef, <vscale x 4 x i8> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: %V2I8 = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> align 1 undef, <vscale x 2 x i1> undef, <vscale x 2 x i8> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: %V1I8 = call <vscale x 1 x i8> @llvm.masked.gather.nxv1i8.nxv1p0(<vscale x 1 x ptr> align 1 undef, <vscale x 1 x i1> undef, <vscale x 1 x i8> undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %V8F64 = call <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x double> undef)
+  %V4F64 = call <vscale x 4 x double> @llvm.masked.gather.nxv4f64.nxv4p0(<vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x double> undef)
+  %V2F64 = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0(<vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x double> undef)
+  %V1F64 = call <vscale x 1 x double> @llvm.masked.gather.nxv1f64.nxv1p0(<vscale x 1 x ptr> undef, i32 8, <vscale x 1 x i1> undef, <vscale x 1 x double> undef)
+
+  %V16F32 = call <vscale x 16 x float> @llvm.masked.gather.nxv16f32.nxv16p0(<vscale x 16 x ptr> undef, i32 4, <vscale x 16 x i1> undef, <vscale x 16 x float> undef)
+  %V8F32 = call <vscale x 8 x float> @llvm.masked.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> undef, i32 4, <vscale x 8 x i1> undef, <vscale x 8 x float> undef)
+  %V4F32 = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> undef, i32 4, <vscale x 4 x i1> undef, <vscale x 4 x float> undef)
+  %V2F32 = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32.nxv2p0(<vscale x 2 x ptr> undef, i32 4, <vscale x 2 x i1> undef, <vscale x 2 x float> undef)
+  %V1F32 = call <vscale x 1 x float> @llvm.masked.gather.nxv1f32.nxv1p0(<vscale x 1 x ptr> undef, i32 4, <vscale x 1 x i1> undef, <vscale x 1 x float> undef)
+
+  %V8I64 = call <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i64> undef)
+  %V4I64 = call <vscale x 4 x i64> @llvm.masked.gather.nxv4i64.nxv4p0(<vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i64> undef)
+  %V2I64 = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i64> undef)
+  %V1I64 = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> undef, i32 8, <vscale x 1 x i1> undef, <vscale x 1 x i64> undef)
+
+  %V16I32 = call <vscale x 16 x i32> @llvm.masked.gather.nxv16i32.nxv16p0(<vscale x 16 x ptr> undef, i32 4, <vscale x 16 x i1> undef, <vscale x 16 x i32> undef)
+  %V8I32 = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> undef, i32 4, <vscale x 8 x i1> undef, <vscale x 8 x i32> undef)
+  %V4I32 = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> undef, i32 4, <vscale x 4 x i1> undef, <vscale x 4 x i32> undef)
+  %V2I32 = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32.nxv2p0(<vscale x 2 x ptr> undef, i32 4, <vscale x 2 x i1> undef, <vscale x 2 x i32> undef)
+  %V1I32 = call <vscale x 1 x i32> @llvm.masked.gather.nxv1i32.nxv1p0(<vscale x 1 x ptr> undef, i32 4, <vscale x 1 x i1> undef, <vscale x 1 x i32> undef)
+
+  %V32I16 = call <vscale x 32 x i16> @llvm.masked.gather.nxv32i16.nxv32p0(<vscale x 32 x ptr> undef, i32 2, <vscale x 32 x i1> undef, <vscale x 32 x i16> undef)
+  %V16I16 = call <vscale x 16 x i16> @llvm.masked.gather.nxv16i16.nxv16p0(<vscale x 16 x ptr> undef, i32 2, <vscale x 16 x i1> undef, <vscale x 16 x i16> undef)
+  %V8I16 = call <vscale x 8 x i16> @llvm.masked.gather.nxv8i16.nxv8p0(<vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef, <vscale x 8 x i16> undef)
+  %V4I16 = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> undef, i32 2, <vscale x 4 x i1> undef, <vscale x 4 x i16> undef)
+  %V2I16 = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16.nxv2p0(<vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef, <vscale x 2 x i16> undef)
+  %V1I16 = call <vscale x 1 x i16> @llvm.masked.gather.nxv1i16.nxv1p0(<vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef, <vscale x 1 x i16> undef)
+
+  %V64I8 = call <vscale x 64 x i8> @llvm.masked.gather.nxv64i8.nxv64p0(<vscale x 64 x ptr> undef, i32 1, <vscale x 64 x i1> undef, <vscale x 64 x i8> undef)
+  %V32I8 = call <vscale x 32 x i8> @llvm.masked.gather.nxv32i8.nxv32p0(<vscale x 32 x ptr> undef, i32 1, <vscale x 32 x i1> undef, <vscale x 32 x i8> undef)
+  %V16I8 = call <vscale x 16 x i8> @llvm.masked.gather.nxv16i8.nxv16p0(<vscale x 16 x ptr> undef, i32 1, <vs...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/176899