[llvm] [SVE][InstCombine] Fold ld1d and splice into ld1ro (PR #69565)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 19 05:43:41 PDT 2023
================
@@ -0,0 +1,32 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -mattr=+sve,+f64mm -passes=instcombine < %s | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+define <vscale x 2 x double> @combine_ld1ro_double(<vscale x 2 x i1> %pred, ptr %addr) {
+; CHECK-LABEL: @combine_ld1ro_double(
+; CHECK-NEXT: [[RES:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.ld1ro.nxv2f64(<vscale x 2 x i1> [[PRED:%.*]], ptr [[ADDR:%.*]])
----------------
vfdff wrote:
Thanks @david-arm for your comment,
When vscale=4 (-msve-vector-bits=512), then the half part of mask is one, and we'll also load exactly 256 bits. then can we transform it to a `ld1ro` instrunction?
```
define <vscale x 2 x double> @test(ptr %addr) nounwind {
%pred = call <vscale x 2 x i1> @llvm.aarch64.sve.whilelt.nxv2i1.i64(i64 0, i64 4) ; half = 512/bits(type double)/2 = 4
%a = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.nxv2f64(<vscale x 2 x i1> %pred, ptr %addr)
%res = call <vscale x 2 x double> @llvm.aarch64.sve.splice.nxv2f64(<vscale x 2 x i1> %pred,
<vscale x 2 x double> %a,
<vscale x 2 x double> %a)
ret <vscale x 2 x double> %res
}
```
https://github.com/llvm/llvm-project/pull/69565
More information about the llvm-commits
mailing list