[clang] [llvm] [LLVM][PhaseOrdering] Run CSE after InstCombine has cleaned the result of vectorisation. (PR #120443)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 18 07:55:48 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang
@llvm/pr-subscribers-llvm-transforms
Author: Paul Walker (paulwalker-arm)
<details>
<summary>Changes</summary>
I revisited 58690 and was puzzled why the obvious combines do not fire. I believe the reason is combines that end with replaceOperand, because they can introduce duplicate expression trees (albeit tiny ones) which in turn blocks combines that compare expression pointers.
Perhaps adding the extra CSE run is too big of a hammer but I figured it exists as the first of the extra-vectorisation passes for a similar reason and thus promoting it to the main vectorisation pipeline seems reasonable? given the issues are now more prevalent as vectorisation capabilities increase.
---
Patch is 51.26 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/120443.diff
15 Files Affected:
- (modified) clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_ld1_vnum.c (+16-32)
- (modified) clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_st1_vnum.c (+16-32)
- (modified) llvm/lib/Passes/PassBuilderPipelines.cpp (+4-1)
- (modified) llvm/test/Other/new-pm-defaults.ll (+1)
- (modified) llvm/test/Other/new-pm-lto-defaults.ll (+1)
- (modified) llvm/test/Other/new-pm-thinlto-postlink-defaults.ll (+1)
- (modified) llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll (+1)
- (modified) llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll (+1)
- (modified) llvm/test/Other/opt-pipeline-vector-passes.ll (-1)
- (modified) llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll (+2-8)
- (modified) llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll (+1-2)
- (modified) llvm/test/Transforms/PhaseOrdering/AArch64/indvars-vectorization.ll (+1-7)
- (modified) llvm/test/Transforms/PhaseOrdering/AArch64/interleavevectorization.ll (+9-23)
- (added) llvm/test/Transforms/PhaseOrdering/AArch64/post-vectorisation-combines-with-cse.ll (+89)
- (modified) llvm/test/Transforms/PhaseOrdering/ARM/arm_mult_q15.ll (+4-8)
``````````diff
diff --git a/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_ld1_vnum.c b/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_ld1_vnum.c
index fb86690f07f1d8..517f481a640927 100644
--- a/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_ld1_vnum.c
+++ b/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_ld1_vnum.c
@@ -15,8 +15,7 @@
// CHECK-C-NEXT: [[TMP2:%.*]] = trunc i64 [[VNUM]] to i32
// CHECK-C-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE]], [[TMP2]]
// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz(<vscale x 16 x i1> [[PG]], ptr [[TMP1]], i32 0, i32 [[TMP3]])
-// CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP2]]
-// CHECK-C-NEXT: [[TMP4:%.*]] = add i32 [[ADD]], 15
+// CHECK-C-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], 15
// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz(<vscale x 16 x i1> [[PG]], ptr [[TMP1]], i32 0, i32 [[TMP4]])
// CHECK-C-NEXT: ret void
//
@@ -29,8 +28,7 @@
// CHECK-CXX-NEXT: [[TMP2:%.*]] = trunc i64 [[VNUM]] to i32
// CHECK-CXX-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE]], [[TMP2]]
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz(<vscale x 16 x i1> [[PG]], ptr [[TMP1]], i32 0, i32 [[TMP3]])
-// CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP2]]
-// CHECK-CXX-NEXT: [[TMP4:%.*]] = add i32 [[ADD]], 15
+// CHECK-CXX-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], 15
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz(<vscale x 16 x i1> [[PG]], ptr [[TMP1]], i32 0, i32 [[TMP4]])
// CHECK-CXX-NEXT: ret void
//
@@ -49,8 +47,7 @@ void test_svld1_hor_vnum_za8(uint32_t slice_base, svbool_t pg, const void *ptr,
// CHECK-C-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32
// CHECK-C-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]]
// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz(<vscale x 8 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[TMP4]])
-// CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]]
-// CHECK-C-NEXT: [[TMP5:%.*]] = add i32 [[ADD]], 7
+// CHECK-C-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], 7
// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz(<vscale x 8 x i1> [[TMP0]], ptr [[TMP2]], i32 1, i32 [[TMP5]])
// CHECK-C-NEXT: ret void
//
@@ -64,8 +61,7 @@ void test_svld1_hor_vnum_za8(uint32_t slice_base, svbool_t pg, const void *ptr,
// CHECK-CXX-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32
// CHECK-CXX-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]]
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz(<vscale x 8 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[TMP4]])
-// CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]]
-// CHECK-CXX-NEXT: [[TMP5:%.*]] = add i32 [[ADD]], 7
+// CHECK-CXX-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], 7
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz(<vscale x 8 x i1> [[TMP0]], ptr [[TMP2]], i32 1, i32 [[TMP5]])
// CHECK-CXX-NEXT: ret void
//
@@ -84,8 +80,7 @@ void test_svld1_hor_vnum_za16(uint32_t slice_base, svbool_t pg, const void *ptr,
// CHECK-C-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32
// CHECK-C-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]]
// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[TMP4]])
-// CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]]
-// CHECK-C-NEXT: [[TMP5:%.*]] = add i32 [[ADD]], 3
+// CHECK-C-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], 3
// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 x i1> [[TMP0]], ptr [[TMP2]], i32 3, i32 [[TMP5]])
// CHECK-C-NEXT: ret void
//
@@ -99,8 +94,7 @@ void test_svld1_hor_vnum_za16(uint32_t slice_base, svbool_t pg, const void *ptr,
// CHECK-CXX-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32
// CHECK-CXX-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]]
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[TMP4]])
-// CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]]
-// CHECK-CXX-NEXT: [[TMP5:%.*]] = add i32 [[ADD]], 3
+// CHECK-CXX-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], 3
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 x i1> [[TMP0]], ptr [[TMP2]], i32 3, i32 [[TMP5]])
// CHECK-CXX-NEXT: ret void
//
@@ -119,8 +113,7 @@ void test_svld1_hor_vnum_za32(uint32_t slice_base, svbool_t pg, const void *ptr,
// CHECK-C-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32
// CHECK-C-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]]
// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[TMP4]])
-// CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]]
-// CHECK-C-NEXT: [[TMP5:%.*]] = add i32 [[ADD]], 1
+// CHECK-C-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], 1
// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x i1> [[TMP0]], ptr [[TMP2]], i32 7, i32 [[TMP5]])
// CHECK-C-NEXT: ret void
//
@@ -134,8 +127,7 @@ void test_svld1_hor_vnum_za32(uint32_t slice_base, svbool_t pg, const void *ptr,
// CHECK-CXX-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32
// CHECK-CXX-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]]
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[TMP4]])
-// CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]]
-// CHECK-CXX-NEXT: [[TMP5:%.*]] = add i32 [[ADD]], 1
+// CHECK-CXX-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], 1
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x i1> [[TMP0]], ptr [[TMP2]], i32 7, i32 [[TMP5]])
// CHECK-CXX-NEXT: ret void
//
@@ -184,8 +176,7 @@ void test_svld1_hor_vnum_za128(uint32_t slice_base, svbool_t pg, const void *ptr
// CHECK-C-NEXT: [[TMP2:%.*]] = trunc i64 [[VNUM]] to i32
// CHECK-C-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE]], [[TMP2]]
// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1b.vert(<vscale x 16 x i1> [[PG]], ptr [[TMP1]], i32 0, i32 [[TMP3]])
-// CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP2]]
-// CHECK-C-NEXT: [[TMP4:%.*]] = add i32 [[ADD]], 15
+// CHECK-C-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], 15
// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1b.vert(<vscale x 16 x i1> [[PG]], ptr [[TMP1]], i32 0, i32 [[TMP4]])
// CHECK-C-NEXT: ret void
//
@@ -198,8 +189,7 @@ void test_svld1_hor_vnum_za128(uint32_t slice_base, svbool_t pg, const void *ptr
// CHECK-CXX-NEXT: [[TMP2:%.*]] = trunc i64 [[VNUM]] to i32
// CHECK-CXX-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE]], [[TMP2]]
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1b.vert(<vscale x 16 x i1> [[PG]], ptr [[TMP1]], i32 0, i32 [[TMP3]])
-// CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP2]]
-// CHECK-CXX-NEXT: [[TMP4:%.*]] = add i32 [[ADD]], 15
+// CHECK-CXX-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], 15
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1b.vert(<vscale x 16 x i1> [[PG]], ptr [[TMP1]], i32 0, i32 [[TMP4]])
// CHECK-CXX-NEXT: ret void
//
@@ -218,8 +208,7 @@ void test_svld1_ver_hor_za8(uint32_t slice_base, svbool_t pg, const void *ptr, i
// CHECK-C-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32
// CHECK-C-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]]
// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1h.vert(<vscale x 8 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[TMP4]])
-// CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]]
-// CHECK-C-NEXT: [[TMP5:%.*]] = add i32 [[ADD]], 7
+// CHECK-C-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], 7
// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1h.vert(<vscale x 8 x i1> [[TMP0]], ptr [[TMP2]], i32 1, i32 [[TMP5]])
// CHECK-C-NEXT: ret void
//
@@ -233,8 +222,7 @@ void test_svld1_ver_hor_za8(uint32_t slice_base, svbool_t pg, const void *ptr, i
// CHECK-CXX-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32
// CHECK-CXX-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]]
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1h.vert(<vscale x 8 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[TMP4]])
-// CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]]
-// CHECK-CXX-NEXT: [[TMP5:%.*]] = add i32 [[ADD]], 7
+// CHECK-CXX-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], 7
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1h.vert(<vscale x 8 x i1> [[TMP0]], ptr [[TMP2]], i32 1, i32 [[TMP5]])
// CHECK-CXX-NEXT: ret void
//
@@ -253,8 +241,7 @@ void test_svld1_ver_vnum_za16(uint32_t slice_base, svbool_t pg, const void *ptr,
// CHECK-C-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32
// CHECK-C-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]]
// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1w.vert(<vscale x 4 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[TMP4]])
-// CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]]
-// CHECK-C-NEXT: [[TMP5:%.*]] = add i32 [[ADD]], 3
+// CHECK-C-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], 3
// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1w.vert(<vscale x 4 x i1> [[TMP0]], ptr [[TMP2]], i32 3, i32 [[TMP5]])
// CHECK-C-NEXT: ret void
//
@@ -268,8 +255,7 @@ void test_svld1_ver_vnum_za16(uint32_t slice_base, svbool_t pg, const void *ptr,
// CHECK-CXX-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32
// CHECK-CXX-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]]
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1w.vert(<vscale x 4 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[TMP4]])
-// CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]]
-// CHECK-CXX-NEXT: [[TMP5:%.*]] = add i32 [[ADD]], 3
+// CHECK-CXX-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], 3
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1w.vert(<vscale x 4 x i1> [[TMP0]], ptr [[TMP2]], i32 3, i32 [[TMP5]])
// CHECK-CXX-NEXT: ret void
//
@@ -288,8 +274,7 @@ void test_svld1_ver_vnum_za32(uint32_t slice_base, svbool_t pg, const void *ptr,
// CHECK-C-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32
// CHECK-C-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]]
// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1d.vert(<vscale x 2 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[TMP4]])
-// CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]]
-// CHECK-C-NEXT: [[TMP5:%.*]] = add i32 [[ADD]], 1
+// CHECK-C-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], 1
// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1d.vert(<vscale x 2 x i1> [[TMP0]], ptr [[TMP2]], i32 7, i32 [[TMP5]])
// CHECK-C-NEXT: ret void
//
@@ -303,8 +288,7 @@ void test_svld1_ver_vnum_za32(uint32_t slice_base, svbool_t pg, const void *ptr,
// CHECK-CXX-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32
// CHECK-CXX-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]]
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1d.vert(<vscale x 2 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[TMP4]])
-// CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]]
-// CHECK-CXX-NEXT: [[TMP5:%.*]] = add i32 [[ADD]], 1
+// CHECK-CXX-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], 1
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1d.vert(<vscale x 2 x i1> [[TMP0]], ptr [[TMP2]], i32 7, i32 [[TMP5]])
// CHECK-CXX-NEXT: ret void
//
diff --git a/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_st1_vnum.c b/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_st1_vnum.c
index dafc3d61a05f1b..5ed28e653ec59c 100644
--- a/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_st1_vnum.c
+++ b/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_st1_vnum.c
@@ -15,8 +15,7 @@
// CHECK-C-NEXT: [[TMP2:%.*]] = trunc i64 [[VNUM]] to i32
// CHECK-C-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE]], [[TMP2]]
// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.st1b.horiz(<vscale x 16 x i1> [[PG]], ptr [[TMP1]], i32 0, i32 [[TMP3]])
-// CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP2]]
-// CHECK-C-NEXT: [[TMP4:%.*]] = add i32 [[ADD]], 15
+// CHECK-C-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], 15
// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.st1b.horiz(<vscale x 16 x i1> [[PG]], ptr [[TMP1]], i32 0, i32 [[TMP4]])
// CHECK-C-NEXT: ret void
//
@@ -29,8 +28,7 @@
// CHECK-CXX-NEXT: [[TMP2:%.*]] = trunc i64 [[VNUM]] to i32
// CHECK-CXX-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE]], [[TMP2]]
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1b.horiz(<vscale x 16 x i1> [[PG]], ptr [[TMP1]], i32 0, i32 [[TMP3]])
-// CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP2]]
-// CHECK-CXX-NEXT: [[TMP4:%.*]] = add i32 [[ADD]], 15
+// CHECK-CXX-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], 15
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1b.horiz(<vscale x 16 x i1> [[PG]], ptr [[TMP1]], i32 0, i32 [[TMP4]])
// CHECK-CXX-NEXT: ret void
//
@@ -49,8 +47,7 @@ void test_svst1_hor_vnum_za8(uint32_t slice_base, svbool_t pg, void *ptr, int64_
// CHECK-C-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32
// CHECK-C-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]]
// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.st1h.horiz(<vscale x 8 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[TMP4]])
-// CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]]
-// CHECK-C-NEXT: [[TMP5:%.*]] = add i32 [[ADD]], 7
+// CHECK-C-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], 7
// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.st1h.horiz(<vscale x 8 x i1> [[TMP0]], ptr [[TMP2]], i32 1, i32 [[TMP5]])
// CHECK-C-NEXT: ret void
//
@@ -64,8 +61,7 @@ void test_svst1_hor_vnum_za8(uint32_t slice_base, svbool_t pg, void *ptr, int64_
// CHECK-CXX-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32
// CHECK-CXX-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]]
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1h.horiz(<vscale x 8 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[TMP4]])
-// CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]]
-// CHECK-CXX-NEXT: [[TMP5:%.*]] = add i32 [[ADD]], 7
+// CHECK-CXX-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], 7
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1h.horiz(<vscale x 8 x i1> [[TMP0]], ptr [[TMP2]], i32 1, i32 [[TMP5]])
// CHECK-CXX-NEXT: ret void
//
@@ -84,8 +80,7 @@ void test_svst1_hor_vnum_za16(uint32_t slice_base, svbool_t pg, void *ptr, int64
// CHECK-C-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32
// CHECK-C-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]]
// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.st1w.horiz(<vscale x 4 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[TMP4]])
-// CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]]
-// CHECK-C-NEXT: [[TMP5:%.*]] = add i32 [[ADD]], 3
+// CHECK-C-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], 3
// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.st1w.horiz(<vscale x 4 x i1> [[TMP0]], ptr [[TMP2]], i32 3, i32 [[TMP5]])
// CHECK-C-NEXT: ret void
//
@@ -99,8 +94,7 @@ void test_svst1_hor_vnum_za16(uint32_t slice_base, svbool_t pg, void *ptr, int64
// CHECK-CXX-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32
// CHECK-CXX-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]]
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1w.horiz(<vscale x 4 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[TMP4]])
-// CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]]
-// CHECK-CXX-NEXT: [[TMP5:%.*]] = add i32 [[ADD]], 3
+// CHECK-CXX-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], 3
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1w.horiz(<vscale x 4 x i1> [[TMP0]], ptr [[TMP2]], i32 3, i32 [[TMP5]])
// CHECK-CXX-NEXT: ret void
//
@@ -119,8 +113,7 @@ void test_svst1_hor_vnum_za32(uint32_t slice_base, svbool_t pg, void *ptr, int64
// CHECK-C-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32
// CHECK-C-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]]
// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.st1d.horiz(<vscale x 2 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[TMP4]])
-// CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]]
-// CHECK-C-NEXT: [[TMP5:%.*]] = add i32 [[ADD]], 1
+// CHECK-C-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], 1
// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.st1d.horiz(<vscale x 2 x i1> [[TMP0]], ptr [[TMP2]], i32 7, i32 [[TMP5]])
// CHECK-C-NEXT: ret void
//
@@ -134,8 +127,7 @@ void test_svst1_hor_vnum_za32(uint32_t slice_base, svbool_t pg, void *ptr, int64
// CHECK-CXX-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32
// CHECK-CXX-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]]
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1d.horiz(<vscale x 2 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[TMP4]])
-// CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]]
-// CHECK-CXX-NEXT: [[TMP5:%.*]] = add i32 [[ADD]], 1
+// CHECK-CXX-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], 1
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1d.horiz(<vscale x 2 x i1> [[TMP0]], ptr [[TMP2]], i32 7, i32 [[TMP5]])
// CHECK-CXX-NEXT: ret void
//
@@ -184,8 +176,7 @@ void test_svst1_hor_vnum_za128(uint32_t slice_base, svbool_t pg, void *ptr, int6
// CHECK-C-NEXT: [[TMP2:%.*]] = trunc i64 [[VNUM]] to i32
// CHECK-C-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE]], [[TMP2]]
// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.st1b.vert(<vscale x 16 x i1> [[PG]], ptr [[TMP1]], i32 0, i32 [[TMP3]])
-// CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP2]]
-// CHECK-C-NEXT: [[TMP4:%.*]] = add i32 [[ADD]], 15
+// CHECK-C-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], 15
// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.st1b.vert(<vscale x 16 x i1> [[PG]], ptr [[TMP1]], i32 0, i32 [[TMP4]])
// CHECK-C-NEXT: ret void
//
@@ -198,8 +189,7 @@ void test_svst1_hor_vnum_za128(uint32_t slice_base, svbool_t pg, void *ptr, int6
// CHECK-CXX-NEXT: [[TMP2:%.*]] = trunc i64 [[VNUM]] to i32
// CHECK-CXX-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE]], [[TMP2]]
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1b.vert(<vscale x 16 x i1> [[PG]], ptr [[TMP1]], i32 0, i32 [[TMP3]])
-// CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP2]]
-// CHECK-CXX-NEXT: [[TMP4:%.*]] = add i32 [[ADD]], 15
+// CHECK-CXX-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], 15
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1b.vert(<vscale x 16 x i1> [[PG]], ptr [[TMP1]], i32 0, i32 [[TMP4]])
// CHECK-CXX-NEXT: ret void
//
@@ -218,8 +208,7 @@ void test_svst1_ver_vnum_za8(uint32_t slice_base, svbool_t pg, void *ptr, int64_
// CHECK-C-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32
// CHECK-C-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]]
// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.st1h.vert(<vscale x 8 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[TMP4]])
-// CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]]
-// CHECK-C-NEXT: [[TMP5:%.*]] = add i32 [[ADD]], 7
+// CHECK-C-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], 7
// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.st1h.vert(<vscale x 8 x i1> [[TMP0]], ptr [[TMP2]], i32 1, i32 [[TMP5]])
// CHECK-C-NEXT: ret void
//
@@ -233,8 +222,7 @@ void test_svst1_ver_vnum_za8(uint32_t slice_base, svbool_t pg, void *ptr, int64_
// CHECK-CXX-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32
// CHECK-CXX-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]]
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1h.vert(<vscale x 8 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[TMP4]])
-// CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]]
-// CHECK-CXX-NEXT: [[TMP5:%.*]] = add i32 [[ADD]], 7
+// CHECK-CXX-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], 7
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1h.vert(<vscale x 8 x i1> [[TMP0]], ptr [[TMP2]], i32 1, i32 [[TMP5]])
// CHECK-CXX-NEXT: ret void
//
@@ -253,8 +241,7 @@ void test_svst1_ver_vnum_za16(uint32_t slice_base, svbool_t pg, void *ptr, int64
// CHECK-C-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32
// CHECK-C-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]]
// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.st1w.vert(<vscale x 4 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[TMP4]])
-// C...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/120443
More information about the llvm-commits
mailing list