[llvm] [SVE][InstCombine] Fold ld1d and splice into ld1ro (PR #69565)

Thu Oct 19 05:43:41 PDT 2023

================
@@ -0,0 +1,32 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -mattr=+sve,+f64mm -passes=instcombine < %s | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+define <vscale x 2 x double> @combine_ld1ro_double(<vscale x 2 x i1> %pred, ptr %addr) {
+; CHECK-LABEL: @combine_ld1ro_double(
+; CHECK-NEXT:    [[RES:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.ld1ro.nxv2f64(<vscale x 2 x i1> [[PRED:%.*]], ptr [[ADDR:%.*]])
----------------
vfdff wrote:

Thanks @david-arm  for your comment,
When vscale=4 (-msve-vector-bits=512), then the half part of mask is one, and we'll also load exactly 256 bits. then can we transform it to a `ld1ro` instrunction?
```
define <vscale x 2 x double> @test(ptr %addr) nounwind {
  %pred = call <vscale x 2 x i1> @llvm.aarch64.sve.whilelt.nxv2i1.i64(i64 0, i64 4)   ; half = 512/bits(type double)/2 = 4
  %a = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.nxv2f64(<vscale x 2 x i1> %pred, ptr %addr)
  %res = call <vscale x 2 x double> @llvm.aarch64.sve.splice.nxv2f64(<vscale x 2 x i1> %pred,
                                                                     <vscale x 2 x double> %a,
                                                                     <vscale x 2 x double> %a)
  ret <vscale x 2 x double> %res
}
```

https://github.com/llvm/llvm-project/pull/69565