[clang] [llvm] [mlir] [IR] Replace alignment argument with attribute on masked intrinsics (PR #163802)

Fri Oct 17 01:15:56 PDT 2025

llvmbot wrote:




@llvm/pr-subscribers-backend-arm

Author: Nikita Popov (nikic)

<details>
<summary>Changes</summary>

The `masked.load`, `masked.store`, `masked.gather` and `masked.scatter` intrinsics currently accept a separate alignment immarg. Replace this with an `align` attribute on the pointer / vector of pointers argument.

This is the standard representation for alignment information on intrinsics, and is already used by all other memory intrinsics. (Things like llvm.memcpy used to have a separate alignment argument as well, but were already migrated a long time ago.)

It's worth noting that the masked.gather and masked.scatter intrinsics previously accepted a zero alignment to indicate the ABI type alignment of the element type. This special case is gone now: If the align attribute is omitted, the implied alignment is 1, as usual. If ABI alignment is desired, it needs to be explicitly emitted (which the IRBuilder API already requires anyway).

Based on top of https://github.com/llvm/llvm-project/pull/163790.

---

Patch is 2.79 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/163802.diff


302 Files Affected:

- (modified) clang/include/clang/Basic/arm_mve.td (+8-16) 
- (modified) clang/include/clang/Basic/arm_mve_defs.td (+7) 
- (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1.c (+52-52) 
- (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1sb.c (+24-24) 
- (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1sh.c (+16-16) 
- (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1sw.c (+8-8) 
- (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1ub.c (+24-24) 
- (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1uh.c (+16-16) 
- (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1uw.c (+8-8) 
- (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st1.c (+52-52) 
- (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st1b.c (+12-12) 
- (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st1h.c (+8-8) 
- (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st1w.c (+4-4) 
- (modified) clang/test/CodeGen/X86/avx10_2bf16-builtins.c (+4-4) 
- (modified) clang/test/CodeGen/X86/avx512bw-builtins.c (+6-6) 
- (modified) clang/test/CodeGen/X86/avx512f-builtins.c (+26-26) 
- (modified) clang/test/CodeGen/X86/avx512fp16-builtins.c (+3-3) 
- (modified) clang/test/CodeGen/X86/avx512vl-builtins.c (+48-48) 
- (modified) clang/test/CodeGen/X86/avx512vlbw-builtins.c (+12-12) 
- (modified) clang/test/CodeGen/arm-mve-intrinsics/cplusplus.cpp (+1-1) 
- (modified) clang/test/CodeGen/arm-mve-intrinsics/load-store.c (+44-44) 
- (modified) clang/test/CodeGen/builtin-masked.c (+9-9) 
- (modified) clang/utils/TableGen/MveEmitter.cpp (+2-1) 
- (modified) llvm/docs/LangRef.rst (+37-30) 
- (modified) llvm/include/llvm/CodeGen/BasicTTIImpl.h (+4-4) 
- (modified) llvm/include/llvm/IR/Intrinsics.td (+8-10) 
- (modified) llvm/include/llvm/IR/PatternMatch.h (+12-15) 
- (modified) llvm/lib/Analysis/ConstantFolding.cpp (+2-2) 
- (modified) llvm/lib/Analysis/InstructionSimplify.cpp (+2-2) 
- (modified) llvm/lib/Analysis/MemoryLocation.cpp (+2-2) 
- (modified) llvm/lib/CodeGen/InterleavedAccessPass.cpp (+2-4) 
- (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (+15-57) 
- (modified) llvm/lib/IR/AutoUpgrade.cpp (+70) 
- (modified) llvm/lib/IR/IRBuilder.cpp (+20-10) 
- (modified) llvm/lib/IR/Intrinsics.cpp (+13) 
- (modified) llvm/lib/IR/Verifier.cpp (+3-24) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUAsanInstrumentation.cpp (+2-5) 
- (modified) llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp (+10-10) 
- (modified) llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp (+5-12) 
- (modified) llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp (+11-9) 
- (modified) llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp (+3-3) 
- (modified) llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp (+10-13) 
- (modified) llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp (+13-14) 
- (modified) llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp (+2-2) 
- (modified) llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp (+2-5) 
- (modified) llvm/lib/Transforms/Instrumentation/MemProfInstrumentation.cpp (+1-1) 
- (modified) llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp (+10-14) 
- (modified) llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp (+1-1) 
- (modified) llvm/lib/Transforms/Scalar/EarlyCSE.cpp (+3-3) 
- (modified) llvm/lib/Transforms/Scalar/GVN.cpp (+4-4) 
- (modified) llvm/lib/Transforms/Scalar/InferAlignment.cpp (+6-11) 
- (modified) llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp (+16-27) 
- (modified) llvm/test/Analysis/BasicAA/intrinsics.ll (+2-2) 
- (modified) llvm/test/Analysis/BasicAA/ptr-vector.ll (+2-2) 
- (modified) llvm/test/Analysis/BasicAA/scalable-dse-aa.ll (+47-47) 
- (modified) llvm/test/Analysis/CostModel/AArch64/masked_ldst.ll (+70-70) 
- (modified) llvm/test/Analysis/CostModel/AArch64/masked_ldst_vls.ll (+7-7) 
- (modified) llvm/test/Analysis/CostModel/AArch64/mem-op-cost-model.ll (+48-48) 
- (modified) llvm/test/Analysis/CostModel/AArch64/sve-gather.ll (+32-32) 
- (modified) llvm/test/Analysis/CostModel/AArch64/sve-illegal-types.ll (+4-4) 
- (modified) llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll (+24-24) 
- (modified) llvm/test/Analysis/CostModel/AArch64/sve-scatter.ll (+45-45) 
- (modified) llvm/test/Analysis/CostModel/ARM/cast_ldst.ll (+220-220) 
- (modified) llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll (+8-8) 
- (modified) llvm/test/Analysis/CostModel/ARM/mve-gather-scatter-cost.ll (+131-131) 
- (modified) llvm/test/Analysis/CostModel/RISCV/fixed-vector-gather.ll (+79-79) 
- (modified) llvm/test/Analysis/CostModel/RISCV/fixed-vector-scatter.ll (+79-79) 
- (modified) llvm/test/Analysis/CostModel/RISCV/gep.ll (+8-8) 
- (modified) llvm/test/Analysis/CostModel/RISCV/masked_ldst.ll (+37-37) 
- (modified) llvm/test/Analysis/CostModel/RISCV/scalable-gather.ll (+116-116) 
- (modified) llvm/test/Analysis/CostModel/RISCV/scalable-scatter.ll (+116-116) 
- (modified) llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll (+8-8) 
- (modified) llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll (+1003-1003) 
- (modified) llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll (+1003-1003) 
- (modified) llvm/test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll (+2-2) 
- (modified) llvm/test/Assembler/masked-load-store-intrinsics-attributes.ll (+4-4) 
- (modified) llvm/test/Bitcode/upgrade-masked-keep-metadata.ll (+4-4) 
- (modified) llvm/test/CodeGen/AArch64/sve-lsr-scaled-index-addressing-mode.ll (+2-2) 
- (modified) llvm/test/CodeGen/AArch64/sve2-vscale-sinking.ll (+8-8) 
- (modified) llvm/test/CodeGen/Hexagon/autohvx/vector-align-tbaa.ll (+6-6) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-negative.ll (+3-3) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store.ll (+6-6) 
- (modified) llvm/test/CodeGen/RISCV/rvv/strided-load-store.ll (+4-4) 
- (modified) llvm/test/CodeGen/Thumb2/LowOverheadLoops/clear-maskedinsts.ll (+4-4) 
- (modified) llvm/test/CodeGen/Thumb2/LowOverheadLoops/nested.ll (+4-4) 
- (modified) llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-basic.ll (+39-39) 
- (modified) llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-const.ll (+27-27) 
- (modified) llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-reduce.ll (+6-6) 
- (modified) llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-widen.ll (+10-10) 
- (modified) llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-reduce-mve-tail.ll (+2-2) 
- (modified) llvm/test/Instrumentation/AddressSanitizer/asan-masked-load-store.ll (+34-34) 
- (modified) llvm/test/Instrumentation/HeapProfiler/masked-load-store.ll (+12-12) 
- (modified) llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics-upgrade.ll (+48-48) 
- (modified) llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics.ll (+2-2) 
- (modified) llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics-upgrade.ll (+12-12) 
- (modified) llvm/test/Instrumentation/MemorySanitizer/masked-store-load.ll (+42-42) 
- (modified) llvm/test/Transforms/Attributor/readattrs.ll (+48-48) 
- (modified) llvm/test/Transforms/CodeGenPrepare/AArch64/dont-sink-scalable-vector-compare.ll (+2-2) 
- (modified) llvm/test/Transforms/CodeGenPrepare/AArch64/gather-scatter-opt-inseltpoison.ll (+8-8) 
- (modified) llvm/test/Transforms/CodeGenPrepare/AArch64/gather-scatter-opt.ll (+8-8) 
- (modified) llvm/test/Transforms/CodeGenPrepare/AArch64/sink-gather-scatter-addressing.ll (+15-15) 
- (modified) llvm/test/Transforms/CodeGenPrepare/X86/gather-scatter-opt-inseltpoison.ll (+13-13) 
- (modified) llvm/test/Transforms/CodeGenPrepare/X86/gather-scatter-opt.ll (+8-8) 
- (modified) llvm/test/Transforms/CodeGenPrepare/X86/masked-gather-struct-gep.ll (+1-1) 
- (modified) llvm/test/Transforms/DeadStoreElimination/masked-dead-store-inseltpoison.ll (+6-6) 
- (modified) llvm/test/Transforms/DeadStoreElimination/masked-dead-store.ll (+7-7) 
- (modified) llvm/test/Transforms/EarlyCSE/masked-intrinsics-unequal-masks.ll (+16-16) 
- (modified) llvm/test/Transforms/EarlyCSE/masked-intrinsics.ll (+3-3) 
- (modified) llvm/test/Transforms/EarlyCSE/opaque-ptr.ll (+6-6) 
- (modified) llvm/test/Transforms/FunctionAttrs/readattrs.ll (+24-24) 
- (modified) llvm/test/Transforms/FunctionAttrs/vector-of-pointers-getunderlyingobject-crash.ll (+1-1) 
- (modified) llvm/test/Transforms/GVN/2016-08-30-MaskedScatterGather-inseltpoison.ll (+6-6) 
- (modified) llvm/test/Transforms/GVN/2016-08-30-MaskedScatterGather.ll (+6-6) 
- (modified) llvm/test/Transforms/GVN/masked-load-store-no-mem-dep.ll (+7-7) 
- (modified) llvm/test/Transforms/GVN/masked-load-store-vn-crash.ll (+1-1) 
- (modified) llvm/test/Transforms/GVN/masked-load-store.ll (+34-34) 
- (modified) llvm/test/Transforms/InferAddressSpaces/AMDGPU/masked-load-store.ll (+6-6) 
- (modified) llvm/test/Transforms/InferAddressSpaces/masked-gather-scatter.ll (+2-2) 
- (modified) llvm/test/Transforms/InferAlignment/masked.ll (+4-4) 
- (modified) llvm/test/Transforms/Inline/pr50589.ll (+6-6) 
- (modified) llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-gatherscatter.ll (+4-4) 
- (modified) llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-loadstore.ll (+4-4) 
- (modified) llvm/test/Transforms/InstCombine/X86/x86-masked-memops.ll (+22-22) 
- (modified) llvm/test/Transforms/InstCombine/intrinsic-select.ll (+1-1) 
- (modified) llvm/test/Transforms/InstCombine/load-store-masked-constant-array.ll (+2-2) 
- (modified) llvm/test/Transforms/InstCombine/masked_intrinsics-inseltpoison.ll (+13-13) 
- (modified) llvm/test/Transforms/InstCombine/masked_intrinsics.ll (+19-19) 
- (modified) llvm/test/Transforms/InstCombine/pr83947.ll (+2-2) 
- (modified) llvm/test/Transforms/InstCombine/select-masked_gather.ll (+9-9) 
- (modified) llvm/test/Transforms/InstCombine/select-masked_load.ll (+13-13) 
- (modified) llvm/test/Transforms/InterleavedAccess/AArch64/scalable-deinterleave-intrinsics.ll (+1-1) 
- (modified) llvm/test/Transforms/LICM/licm-ci.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopIdiom/AArch64/byte-compare-index.ll (+18-18) 
- (modified) llvm/test/Transforms/LoopIdiom/AArch64/find-first-byte.ll (+12-11) 
- (modified) llvm/test/Transforms/LoopIdiom/RISCV/byte-compare-index.ll (+10-10) 
- (modified) llvm/test/Transforms/LoopUnroll/ARM/mve-upperbound.ll (+2-2) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll (+2-2) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll (+5-5) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll (+2-2) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/drop-poison-generating-flags.ll (+3-3) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence-fold-tail.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/fixed-wide-lane-mask.ll (+5-5) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll (+2-2) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/interleave-with-gaps.ll (+2-2) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/invalid-costs.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll (+2-2) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll (+47-47) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/multiple-result-intrinsics.ll (+2-2) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll (+3-3) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll (+3-3) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll (+4-4) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll (+6-6) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce.ll (+3-3) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll (+7-7) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/scalable-reduction-inloop-cond.ll (+2-2) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll (+28-28) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/scalable-struct-return.ll (+9-9) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll (+2-2) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll (+7-7) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll (+5-5) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions.ll (+2-2) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll (+34-34) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll (+16-16) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-large-strides.ll (+6-6) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-low-trip-count.ll (+3-3) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-masked-loadstore.ll (+4-4) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-select-cmp.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-optsize.ll (+3-3) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-overflow-checks.ll (+4-4) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-reductions.ll (+8-8) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll (+12-12) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll (+21-21) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse-mask4.ll (+2-2) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-vfabi.ll (+6-6) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-wide-lane-mask.ll (+20-20) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/tail-fold-uniform-memops.ll (+4-4) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll (+4-4) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/uniform-args-call-variants.ll (+15-15) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll (+4-4) 
- (modified) llvm/test/Transforms/LoopVectorize/ARM/active-lane-mask.ll (+4-4) 
- (modified) llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll (+10-10) 
- (modified) llvm/test/Transforms/LoopVectorize/ARM/mve-hoist-runtime-checks.ll (+2-2) 
- (modified) llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-predselect.ll (+15-15) 
- (modified) llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll (+11-11) 
- (modified) llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll (+23-23) 
- (modified) llvm/test/Transforms/LoopVectorize/ARM/mve-reg-pressure-vmla.ll (+6-6) 
- (modified) llvm/test/Transforms/LoopVectorize/ARM/mve-saddsatcost.ll (+2-6) 
- (modified) llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll (+14-14) 
- (modified) llvm/test/Transforms/LoopVectorize/ARM/tail-folding-counting-down.ll (+3-3) 
- (modified) llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll (+2-2) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/interleaved-masked-access.ll (+9-9) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/interleaved-store-with-gap.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/safe-dep-distance.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll (+22-22) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-interleave.ll (+16-16) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-masked-loadstore.ll (+2-2) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reverse-load-store.ll (+5-5) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll (+4-4) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/cost-conditional-branches.ll (+4-4) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/divs-with-tail-folding.ll (+2-2) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/drop-inbounds-flags-for-reverse-vector-pointer.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll (+12-12) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll (+30-30) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/gep-use-outside-loop.ll (+2-2) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll (+10-10) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-sink-store-across-load.ll (+2-2) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/invariant-load-gather.ll (+2-2) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll (+5-5) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/iv-live-outs.ll (+4-4) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll (+28-28) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/masked-store-cost.ll (+8-8) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll (+171-171) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/optsize.ll (+12-12) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll (+4-4) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/pr48340.ll (+2-2) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/pr54634.ll (+10-10) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/pr81872.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/predicate-switch.ll (+52-52) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll (+40-40) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/scev-checks-unprofitable.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll (+8-8) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/transform-narrow-interleave-to-widen-memory-gaps.ll (+3-3) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll (+3-3) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/vectorize-force-tail-with-evl.ll (+6-6) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/vectorize-interleaved-accesses-gap.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/widened-value-used-as-scalar-and-first-lane.ll (+8-8) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll (+22-22) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll (+6-6) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll (+2-2) 
- (modified) llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll (+4-4) 
- (modified) llvm/test/Transforms/LoopVectorize/outer-loop-inner-latch-successors.ll (+10-10) 
- (modified) llvm/test/Transforms/LoopVectorize/outer-loop-vec-phi-predecessor-order.ll (+2-2) 
- (modified) llvm/test/Transforms/LoopVectorize/outer-loop-wide-phis.ll (+8-8) 
- (modified) llvm/test/Transforms/LoopVectorize/outer_loop_hcfg_construction.ll (+4-4) 
- (modified) llvm/test/Transforms/LoopVectorize/outer_loop_scalable.ll (+3-3) 
- (modified) llvm/test/Transforms/LoopVectorize/outer_loop_test1.ll (+2-2) 
- (modified) llvm/test/Transforms/LoopVectorize/outer_loop_test2.ll (+4-4) 
- (modified) llvm/test/Transforms/LoopVectorize/vplan-vectorize-inner-loop-reduction.ll (+3-3) 
- (modified) llvm/test/Transforms/LoopVectorize/vplan-widen-call-instruction.ll (+4-4) 
- (modified) llvm/test/Transforms/LoopVectorize/vplan-widen-select-instruction.ll (+4-4) 
- (modified) llvm/test/Transforms/MemCpyOpt/vscale-crashes.ll (+1-1) 
- (modified) llvm/test/Transforms/NewGVN/2016-08-30-MaskedScatterGather-xfail.ll (+6-6) 


``````````diff

diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td
index 412ef9abac1bc..2e5e1d93be096 100644
--- a/clang/include/clang/Basic/arm_mve.td
+++ b/clang/include/clang/Basic/arm_mve.td
@@ -831,9 +831,8 @@ multiclass contiguous_load<string mnemonic, PrimitiveType memtype,
          NameOverride<mnemonic>;
     def: Intrinsic<Vector, (args CPtr<CopyKind<same_size[0], Scalar>>:$addr,
                                  Predicate:$pred),
-                   (IRIntBase<"masked_load", [Vector, CPtr<Vector>]>
-                        (CPtr<Vector> $addr), !srl(memtype.size,3),
-                        $pred, (zeroinit Vector))>,
+                   (masked_load Vector, (CPtr<Vector> $addr),
+                       !srl(memtype.size,3), $pred, (zeroinit Vector))>,
          NameOverride<mnemonic # "_z">;
   }
 
@@ -846,9 +845,8 @@ multiclass contiguous_load<string mnemonic, PrimitiveType memtype,
          NameOverride<"vld1q">;
     def: Intrinsic<Vector, (args CPtr<CopyKind<same_size[0], Scalar>>:$addr,
                                  Predicate:$pred),
-                   (IRIntBase<"masked_load", [Vector, CPtr<Vector>]>
-                        (CPtr<Vector> $addr), !srl(memtype.size,3),
-                        $pred, (zeroinit Vector))>,
+                   (masked_load Vector, (CPtr<Vector> $addr),
+                        !srl(memtype.size,3), $pred, (zeroinit Vector))>,
          NameOverride<"vld1q_z">;
   }
 
@@ -863,9 +861,7 @@ multiclass contiguous_load<string mnemonic, PrimitiveType memtype,
          NameOverride<mnemonic>;
     def: Intrinsic<Vector, (args CPtr<CopyKind<same_size[0], Scalar>>:$addr,
                                  Predicate:$pred),
-                   (extend (IRIntBase<"masked_load",
-                                      [NarrowedVecOf<memtype,Vector>,
-                                      CPtr<NarrowedVecOf<memtype,Vector>>]>
+                   (extend (masked_load NarrowedVecOf<memtype,Vector>,
                                 (CPtr<NarrowedVecOf<memtype,Vector>> $addr),
                                 !srl(memtype.size,3), $pred,
                                 (zeroinit NarrowedVecOf<memtype,Vector>)),
@@ -890,8 +886,7 @@ multiclass contiguous_store<string mnemonic, PrimitiveType memtype,
          NameOverride<mnemonic>;
     def: Intrinsic<Void, (args Ptr<CopyKind<same_size[0], Scalar>>:$addr,
                                Vector:$value, Predicate:$pred),
-                   (IRIntBase<"masked_store", [Vector, Ptr<Vector>]>
-                        $value, (Ptr<Vector> $addr),
+                   (masked_store $value, (Ptr<Vector> $addr),
                         !srl(memtype.size,3), $pred)>,
          NameOverride<mnemonic # "_p">;
   }
@@ -907,8 +902,7 @@ multiclass contiguous_store<string mnemonic, PrimitiveType memtype,
          NameOverride<"vst1q">;
     def: Intrinsic<Void, (args Ptr<CopyKind<same_size[0], Scalar>>:$addr,
                                Vector:$value, Predicate:$pred),
-                   (IRIntBase<"masked_store", [Vector, Ptr<Vector>]>
-                        $value, (Ptr<Vector> $addr),
+                   (masked_store $value, (Ptr<Vector> $addr),
                         !srl(memtype.size,3), $pred)>,
          NameOverride<"vst1q_p">;
   }
@@ -925,9 +919,7 @@ multiclass contiguous_store<string mnemonic, PrimitiveType memtype,
          NameOverride<mnemonic>;
     def: Intrinsic<Void, (args Ptr<CopyKind<same_size[0], Scalar>>:$addr,
                                Vector:$value, Predicate:$pred),
-                   (IRIntBase<"masked_store",
-                              [NarrowedVecOf<memtype,Vector>,
-                               Ptr<NarrowedVecOf<memtype,Vector>>]>
+                   (masked_store
                         (trunc $value, NarrowedVecOf<memtype,Vector>),
                         (Ptr<NarrowedVecOf<memtype,Vector>> $addr),
                         !srl(memtype.size,3), $pred)>,
diff --git a/clang/include/clang/Basic/arm_mve_defs.td b/clang/include/clang/Basic/arm_mve_defs.td
index 083d03a396ba3..c1562a0c1f04c 100644
--- a/clang/include/clang/Basic/arm_mve_defs.td
+++ b/clang/include/clang/Basic/arm_mve_defs.td
@@ -134,6 +134,13 @@ def unzip: CGHelperFn<"VectorUnzip"> {
 }
 def zip: CGHelperFn<"VectorZip">;
 
+def masked_load: IRBuilder<"CreateMaskedLoad"> {
+  let special_params = [IRBuilderIntParam<2, "Align">];
+}
+def masked_store: IRBuilder<"CreateMaskedStore"> {
+  let special_params = [IRBuilderIntParam<2, "Align">];
+}
+
 // Trivial 'codegen' function that just returns its argument. Useful
 // for wrapping up a variable name like $foo into a thing you can pass
 // around as type 'dag'.
diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1.c
index 19e5243c8a625..1ed59c6c80bdc 100644
--- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1.c
+++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1.c
@@ -24,12 +24,12 @@
 
 // CHECK-LABEL: @test_svld1_s8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[BASE:%.*]], i32 1, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> zeroinitializer)
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr align 1 [[BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> zeroinitializer)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
 // CPP-CHECK-LABEL: @_Z13test_svld1_s8u10__SVBool_tPKa(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[BASE:%.*]], i32 1, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> zeroinitializer)
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr align 1 [[BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> zeroinitializer)
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
 svint8_t test_svld1_s8(svbool_t pg, const int8_t *base) MODE_ATTR
@@ -40,13 +40,13 @@ svint8_t test_svld1_s8(svbool_t pg, const int8_t *base) MODE_ATTR
 // CHECK-LABEL: @test_svld1_s16(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr [[BASE:%.*]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> zeroinitializer)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr align 1 [[BASE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> zeroinitializer)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z14test_svld1_s16u10__SVBool_tPKs(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr [[BASE:%.*]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> zeroinitializer)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr align 1 [[BASE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> zeroinitializer)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
 svint16_t test_svld1_s16(svbool_t pg, const int16_t *base) MODE_ATTR
@@ -57,13 +57,13 @@ svint16_t test_svld1_s16(svbool_t pg, const int16_t *base) MODE_ATTR
 // CHECK-LABEL: @test_svld1_s32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[BASE:%.*]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> zeroinitializer)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr align 1 [[BASE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> zeroinitializer)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z14test_svld1_s32u10__SVBool_tPKi(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[BASE:%.*]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> zeroinitializer)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr align 1 [[BASE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> zeroinitializer)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
 svint32_t test_svld1_s32(svbool_t pg, const int32_t *base) MODE_ATTR
@@ -74,13 +74,13 @@ svint32_t test_svld1_s32(svbool_t pg, const int32_t *base) MODE_ATTR
 // CHECK-LABEL: @test_svld1_s64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr [[BASE:%.*]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> zeroinitializer)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr align 1 [[BASE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> zeroinitializer)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z14test_svld1_s64u10__SVBool_tPKl(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr [[BASE:%.*]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> zeroinitializer)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr align 1 [[BASE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> zeroinitializer)
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
 svint64_t test_svld1_s64(svbool_t pg, const int64_t *base) MODE_ATTR
@@ -90,12 +90,12 @@ svint64_t test_svld1_s64(svbool_t pg, const int64_t *base) MODE_ATTR
 
 // CHECK-LABEL: @test_svld1_u8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[BASE:%.*]], i32 1, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> zeroinitializer)
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr align 1 [[BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> zeroinitializer)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
 // CPP-CHECK-LABEL: @_Z13test_svld1_u8u10__SVBool_tPKh(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[BASE:%.*]], i32 1, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> zeroinitializer)
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr align 1 [[BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> zeroinitializer)
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
 svuint8_t test_svld1_u8(svbool_t pg, const uint8_t *base) MODE_ATTR
@@ -106,13 +106,13 @@ svuint8_t test_svld1_u8(svbool_t pg, const uint8_t *base) MODE_ATTR
 // CHECK-LABEL: @test_svld1_u16(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr [[BASE:%.*]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> zeroinitializer)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr align 1 [[BASE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> zeroinitializer)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z14test_svld1_u16u10__SVBool_tPKt(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr [[BASE:%.*]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> zeroinitializer)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr align 1 [[BASE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> zeroinitializer)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
 svuint16_t test_svld1_u16(svbool_t pg, const uint16_t *base) MODE_ATTR
@@ -123,13 +123,13 @@ svuint16_t test_svld1_u16(svbool_t pg, const uint16_t *base) MODE_ATTR
 // CHECK-LABEL: @test_svld1_u32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[BASE:%.*]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> zeroinitializer)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr align 1 [[BASE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> zeroinitializer)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z14test_svld1_u32u10__SVBool_tPKj(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[BASE:%.*]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> zeroinitializer)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr align 1 [[BASE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> zeroinitializer)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
 svuint32_t test_svld1_u32(svbool_t pg, const uint32_t *base) MODE_ATTR
@@ -140,13 +140,13 @@ svuint32_t test_svld1_u32(svbool_t pg, const uint32_t *base) MODE_ATTR
 // CHECK-LABEL: @test_svld1_u64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr [[BASE:%.*]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> zeroinitializer)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr align 1 [[BASE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> zeroinitializer)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z14test_svld1_u64u10__SVBool_tPKm(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr [[BASE:%.*]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> zeroinitializer)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr align 1 [[BASE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> zeroinitializer)
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
 svuint64_t test_svld1_u64(svbool_t pg, const uint64_t *base) MODE_ATTR
@@ -157,13 +157,13 @@ svuint64_t test_svld1_u64(svbool_t pg, const uint64_t *base) MODE_ATTR
 // CHECK-LABEL: @test_svld1_f16(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.masked.load.nxv8f16.p0(ptr [[BASE:%.*]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> zeroinitializer)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.masked.load.nxv8f16.p0(ptr align 1 [[BASE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> zeroinitializer)
 // CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z14test_svld1_f16u10__SVBool_tPKDh(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.masked.load.nxv8f16.p0(ptr [[BASE:%.*]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> zeroinitializer)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.masked.load.nxv8f16.p0(ptr align 1 [[BASE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> zeroinitializer)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
 svfloat16_t test_svld1_f16(svbool_t pg, const float16_t *base) MODE_ATTR
@@ -174,13 +174,13 @@ svfloat16_t test_svld1_f16(svbool_t pg, const float16_t *base) MODE_ATTR
 // CHECK-LABEL: @test_svld1_f32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr [[BASE:%.*]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> zeroinitializer)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr align 1 [[BASE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> zeroinitializer)
 // CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z14test_svld1_f32u10__SVBool_tPKf(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr [[BASE:%.*]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> zeroinitializer)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr align 1 [[BASE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> zeroinitializer)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
 svfloat32_t test_svld1_f32(svbool_t pg, const float32_t *base) MODE_ATTR
@@ -191,13 +191,13 @@ svfloat32_t test_svld1_f32(svbool_t pg, const float32_t *base) MODE_ATTR
 // CHECK-LABEL: @test_svld1_f64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0(ptr [[BASE:%.*]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> zeroinitializer)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0(ptr align 1 [[BASE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> zeroinitializer)
 // CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z14test_svld1_f64u10__SVBool_tPKd(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0(ptr [[BASE:%.*]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> zeroinitializer)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0(ptr align 1 [[BASE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> zeroinitializer)
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
 svfloat64_t test_svld1_f64(svbool_t pg, const float64_t *base) MODE_ATTR
@@ -207,12 +207,12 @@ svfloat64_t test_svld1_f64(svbool_t pg, const float64_t *base) MODE_ATTR
 
 // CHECK-LABEL: @test_svld1_mf8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/163802