[llvm] [GlobalISel] Widen vector loads from aligned ptrs (PR #144309)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 16 00:40:06 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-globalisel
Author: David Green (davemgreen)
<details>
<summary>Changes</summary>
If the pointer is aligned to more than the size of the vector, we can widen the load up to next power of 2 size, as SDAG performs.
Some of the v3 tests are currently worse - those should be addressed in other issues.
---
Patch is 52.75 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/144309.diff
10 Files Affected:
- (modified) llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp (+13)
- (modified) llvm/test/CodeGen/AArch64/add.ll (+14-16)
- (modified) llvm/test/CodeGen/AArch64/andorxor.ll (+42-48)
- (modified) llvm/test/CodeGen/AArch64/ctlz.ll (+10-8)
- (modified) llvm/test/CodeGen/AArch64/ctpop.ll (+10-8)
- (modified) llvm/test/CodeGen/AArch64/cttz.ll (+19-19)
- (modified) llvm/test/CodeGen/AArch64/load.ll (+23-75)
- (modified) llvm/test/CodeGen/AArch64/mul.ll (+14-16)
- (modified) llvm/test/CodeGen/AArch64/neon-dotreduce.ll (+391-348)
- (modified) llvm/test/CodeGen/AArch64/sub.ll (+14-16)
``````````diff
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 028bffd1bf5a7..65cfa722dbd72 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -4072,6 +4072,19 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerLoad(GAnyLoad &LoadMI) {
if (MemTy != DstTy)
return UnableToLegalize;
+ Align Alignment = LoadMI.getAlign();
+ if (Alignment.value() * 8 > MemSizeInBits &&
+ isPowerOf2_64(DstTy.getScalarSizeInBits())) {
+ LLT MoreTy = LLT::fixed_vector(NextPowerOf2(DstTy.getNumElements()),
+ DstTy.getElementType());
+ MachineMemOperand *NewMMO = MF.getMachineMemOperand(&MMO, 0, MoreTy);
+ auto NewLoad = MIRBuilder.buildLoad(MoreTy, PtrReg, *NewMMO);
+ MIRBuilder.buildDeleteTrailingVectorElements(LoadMI.getReg(0),
+ NewLoad.getReg(0));
+ LoadMI.eraseFromParent();
+ return Legalized;
+ }
+
// TODO: We can do better than scalarizing the vector and at least split it
// in half.
return reduceLoadStoreWidth(LoadMI, 0, DstTy.getElementType());
diff --git a/llvm/test/CodeGen/AArch64/add.ll b/llvm/test/CodeGen/AArch64/add.ll
index d5bd1b712a2a6..96168cb80196f 100644
--- a/llvm/test/CodeGen/AArch64/add.ll
+++ b/llvm/test/CodeGen/AArch64/add.ll
@@ -110,16 +110,20 @@ define void @v3i8(ptr %p1, ptr %p2) {
;
; CHECK-GI-LABEL: v3i8:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ldrb w8, [x0]
-; CHECK-GI-NEXT: ldrb w9, [x1]
-; CHECK-GI-NEXT: ldrb w10, [x0, #1]
-; CHECK-GI-NEXT: ldrb w11, [x1, #1]
+; CHECK-GI-NEXT: ldr w8, [x0]
+; CHECK-GI-NEXT: ldr w9, [x1]
; CHECK-GI-NEXT: fmov s0, w8
; CHECK-GI-NEXT: fmov s1, w9
-; CHECK-GI-NEXT: ldrb w8, [x0, #2]
-; CHECK-GI-NEXT: ldrb w9, [x1, #2]
-; CHECK-GI-NEXT: mov v0.h[1], w10
-; CHECK-GI-NEXT: mov v1.h[1], w11
+; CHECK-GI-NEXT: mov b2, v0.b[1]
+; CHECK-GI-NEXT: mov b3, v1.b[1]
+; CHECK-GI-NEXT: mov b4, v0.b[2]
+; CHECK-GI-NEXT: mov b5, v1.b[2]
+; CHECK-GI-NEXT: fmov w8, s2
+; CHECK-GI-NEXT: fmov w9, s3
+; CHECK-GI-NEXT: mov v0.h[1], w8
+; CHECK-GI-NEXT: mov v1.h[1], w9
+; CHECK-GI-NEXT: fmov w8, s4
+; CHECK-GI-NEXT: fmov w9, s5
; CHECK-GI-NEXT: mov v0.h[2], w8
; CHECK-GI-NEXT: mov v1.h[2], w9
; CHECK-GI-NEXT: add v0.4h, v0.4h, v1.4h
@@ -270,16 +274,10 @@ define void @v3i16(ptr %p1, ptr %p2) {
;
; CHECK-GI-LABEL: v3i16:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ldr h0, [x0]
-; CHECK-GI-NEXT: ldr h1, [x1]
+; CHECK-GI-NEXT: ldr d0, [x0]
+; CHECK-GI-NEXT: ldr d1, [x1]
; CHECK-GI-NEXT: add x8, x0, #2
-; CHECK-GI-NEXT: add x9, x1, #2
-; CHECK-GI-NEXT: add x10, x1, #4
-; CHECK-GI-NEXT: ld1 { v0.h }[1], [x8]
-; CHECK-GI-NEXT: ld1 { v1.h }[1], [x9]
; CHECK-GI-NEXT: add x9, x0, #4
-; CHECK-GI-NEXT: ld1 { v0.h }[2], [x9]
-; CHECK-GI-NEXT: ld1 { v1.h }[2], [x10]
; CHECK-GI-NEXT: add v0.4h, v0.4h, v1.4h
; CHECK-GI-NEXT: str h0, [x0]
; CHECK-GI-NEXT: st1 { v0.h }[1], [x8]
diff --git a/llvm/test/CodeGen/AArch64/andorxor.ll b/llvm/test/CodeGen/AArch64/andorxor.ll
index f7df1092287bd..a7875dbebd0e6 100644
--- a/llvm/test/CodeGen/AArch64/andorxor.ll
+++ b/llvm/test/CodeGen/AArch64/andorxor.ll
@@ -302,16 +302,20 @@ define void @and_v3i8(ptr %p1, ptr %p2) {
;
; CHECK-GI-LABEL: and_v3i8:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ldrb w8, [x0]
-; CHECK-GI-NEXT: ldrb w9, [x1]
-; CHECK-GI-NEXT: ldrb w10, [x0, #1]
-; CHECK-GI-NEXT: ldrb w11, [x1, #1]
+; CHECK-GI-NEXT: ldr w8, [x0]
+; CHECK-GI-NEXT: ldr w9, [x1]
; CHECK-GI-NEXT: fmov s0, w8
; CHECK-GI-NEXT: fmov s1, w9
-; CHECK-GI-NEXT: ldrb w8, [x0, #2]
-; CHECK-GI-NEXT: ldrb w9, [x1, #2]
-; CHECK-GI-NEXT: mov v0.h[1], w10
-; CHECK-GI-NEXT: mov v1.h[1], w11
+; CHECK-GI-NEXT: mov b2, v0.b[1]
+; CHECK-GI-NEXT: mov b3, v1.b[1]
+; CHECK-GI-NEXT: mov b4, v0.b[2]
+; CHECK-GI-NEXT: mov b5, v1.b[2]
+; CHECK-GI-NEXT: fmov w8, s2
+; CHECK-GI-NEXT: fmov w9, s3
+; CHECK-GI-NEXT: mov v0.h[1], w8
+; CHECK-GI-NEXT: mov v1.h[1], w9
+; CHECK-GI-NEXT: fmov w8, s4
+; CHECK-GI-NEXT: fmov w9, s5
; CHECK-GI-NEXT: mov v0.h[2], w8
; CHECK-GI-NEXT: mov v1.h[2], w9
; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b
@@ -350,16 +354,20 @@ define void @or_v3i8(ptr %p1, ptr %p2) {
;
; CHECK-GI-LABEL: or_v3i8:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ldrb w8, [x0]
-; CHECK-GI-NEXT: ldrb w9, [x1]
-; CHECK-GI-NEXT: ldrb w10, [x0, #1]
-; CHECK-GI-NEXT: ldrb w11, [x1, #1]
+; CHECK-GI-NEXT: ldr w8, [x0]
+; CHECK-GI-NEXT: ldr w9, [x1]
; CHECK-GI-NEXT: fmov s0, w8
; CHECK-GI-NEXT: fmov s1, w9
-; CHECK-GI-NEXT: ldrb w8, [x0, #2]
-; CHECK-GI-NEXT: ldrb w9, [x1, #2]
-; CHECK-GI-NEXT: mov v0.h[1], w10
-; CHECK-GI-NEXT: mov v1.h[1], w11
+; CHECK-GI-NEXT: mov b2, v0.b[1]
+; CHECK-GI-NEXT: mov b3, v1.b[1]
+; CHECK-GI-NEXT: mov b4, v0.b[2]
+; CHECK-GI-NEXT: mov b5, v1.b[2]
+; CHECK-GI-NEXT: fmov w8, s2
+; CHECK-GI-NEXT: fmov w9, s3
+; CHECK-GI-NEXT: mov v0.h[1], w8
+; CHECK-GI-NEXT: mov v1.h[1], w9
+; CHECK-GI-NEXT: fmov w8, s4
+; CHECK-GI-NEXT: fmov w9, s5
; CHECK-GI-NEXT: mov v0.h[2], w8
; CHECK-GI-NEXT: mov v1.h[2], w9
; CHECK-GI-NEXT: orr v0.8b, v0.8b, v1.8b
@@ -398,16 +406,20 @@ define void @xor_v3i8(ptr %p1, ptr %p2) {
;
; CHECK-GI-LABEL: xor_v3i8:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ldrb w8, [x0]
-; CHECK-GI-NEXT: ldrb w9, [x1]
-; CHECK-GI-NEXT: ldrb w10, [x0, #1]
-; CHECK-GI-NEXT: ldrb w11, [x1, #1]
+; CHECK-GI-NEXT: ldr w8, [x0]
+; CHECK-GI-NEXT: ldr w9, [x1]
; CHECK-GI-NEXT: fmov s0, w8
; CHECK-GI-NEXT: fmov s1, w9
-; CHECK-GI-NEXT: ldrb w8, [x0, #2]
-; CHECK-GI-NEXT: ldrb w9, [x1, #2]
-; CHECK-GI-NEXT: mov v0.h[1], w10
-; CHECK-GI-NEXT: mov v1.h[1], w11
+; CHECK-GI-NEXT: mov b2, v0.b[1]
+; CHECK-GI-NEXT: mov b3, v1.b[1]
+; CHECK-GI-NEXT: mov b4, v0.b[2]
+; CHECK-GI-NEXT: mov b5, v1.b[2]
+; CHECK-GI-NEXT: fmov w8, s2
+; CHECK-GI-NEXT: fmov w9, s3
+; CHECK-GI-NEXT: mov v0.h[1], w8
+; CHECK-GI-NEXT: mov v1.h[1], w9
+; CHECK-GI-NEXT: fmov w8, s4
+; CHECK-GI-NEXT: fmov w9, s5
; CHECK-GI-NEXT: mov v0.h[2], w8
; CHECK-GI-NEXT: mov v1.h[2], w9
; CHECK-GI-NEXT: eor v0.8b, v0.8b, v1.8b
@@ -805,16 +817,10 @@ define void @and_v3i16(ptr %p1, ptr %p2) {
;
; CHECK-GI-LABEL: and_v3i16:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ldr h0, [x0]
-; CHECK-GI-NEXT: ldr h1, [x1]
+; CHECK-GI-NEXT: ldr d0, [x0]
+; CHECK-GI-NEXT: ldr d1, [x1]
; CHECK-GI-NEXT: add x8, x0, #2
-; CHECK-GI-NEXT: add x9, x1, #2
-; CHECK-GI-NEXT: add x10, x1, #4
-; CHECK-GI-NEXT: ld1 { v0.h }[1], [x8]
-; CHECK-GI-NEXT: ld1 { v1.h }[1], [x9]
; CHECK-GI-NEXT: add x9, x0, #4
-; CHECK-GI-NEXT: ld1 { v0.h }[2], [x9]
-; CHECK-GI-NEXT: ld1 { v1.h }[2], [x10]
; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-GI-NEXT: str h0, [x0]
; CHECK-GI-NEXT: st1 { v0.h }[1], [x8]
@@ -842,16 +848,10 @@ define void @or_v3i16(ptr %p1, ptr %p2) {
;
; CHECK-GI-LABEL: or_v3i16:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ldr h0, [x0]
-; CHECK-GI-NEXT: ldr h1, [x1]
+; CHECK-GI-NEXT: ldr d0, [x0]
+; CHECK-GI-NEXT: ldr d1, [x1]
; CHECK-GI-NEXT: add x8, x0, #2
-; CHECK-GI-NEXT: add x9, x1, #2
-; CHECK-GI-NEXT: add x10, x1, #4
-; CHECK-GI-NEXT: ld1 { v0.h }[1], [x8]
-; CHECK-GI-NEXT: ld1 { v1.h }[1], [x9]
; CHECK-GI-NEXT: add x9, x0, #4
-; CHECK-GI-NEXT: ld1 { v0.h }[2], [x9]
-; CHECK-GI-NEXT: ld1 { v1.h }[2], [x10]
; CHECK-GI-NEXT: orr v0.8b, v0.8b, v1.8b
; CHECK-GI-NEXT: str h0, [x0]
; CHECK-GI-NEXT: st1 { v0.h }[1], [x8]
@@ -879,16 +879,10 @@ define void @xor_v3i16(ptr %p1, ptr %p2) {
;
; CHECK-GI-LABEL: xor_v3i16:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ldr h0, [x0]
-; CHECK-GI-NEXT: ldr h1, [x1]
+; CHECK-GI-NEXT: ldr d0, [x0]
+; CHECK-GI-NEXT: ldr d1, [x1]
; CHECK-GI-NEXT: add x8, x0, #2
-; CHECK-GI-NEXT: add x9, x1, #2
-; CHECK-GI-NEXT: add x10, x1, #4
-; CHECK-GI-NEXT: ld1 { v0.h }[1], [x8]
-; CHECK-GI-NEXT: ld1 { v1.h }[1], [x9]
; CHECK-GI-NEXT: add x9, x0, #4
-; CHECK-GI-NEXT: ld1 { v0.h }[2], [x9]
-; CHECK-GI-NEXT: ld1 { v1.h }[2], [x10]
; CHECK-GI-NEXT: eor v0.8b, v0.8b, v1.8b
; CHECK-GI-NEXT: str h0, [x0]
; CHECK-GI-NEXT: st1 { v0.h }[1], [x8]
diff --git a/llvm/test/CodeGen/AArch64/ctlz.ll b/llvm/test/CodeGen/AArch64/ctlz.ll
index b1c6e24c30a7d..04124609eec74 100644
--- a/llvm/test/CodeGen/AArch64/ctlz.ll
+++ b/llvm/test/CodeGen/AArch64/ctlz.ll
@@ -56,12 +56,16 @@ define void @v3i8(ptr %p1) {
;
; CHECK-GI-LABEL: v3i8:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ldr b0, [x0]
-; CHECK-GI-NEXT: add x8, x0, #1
+; CHECK-GI-NEXT: ldr w8, [x0]
; CHECK-GI-NEXT: add x9, x0, #2
-; CHECK-GI-NEXT: ld1 { v0.b }[1], [x8]
-; CHECK-GI-NEXT: ld1 { v0.b }[2], [x9]
-; CHECK-GI-NEXT: clz v0.8b, v0.8b
+; CHECK-GI-NEXT: fmov s0, w8
+; CHECK-GI-NEXT: add x8, x0, #1
+; CHECK-GI-NEXT: mov b1, v0.b[1]
+; CHECK-GI-NEXT: mov v2.b[0], v0.b[0]
+; CHECK-GI-NEXT: mov b0, v0.b[2]
+; CHECK-GI-NEXT: mov v2.b[1], v1.b[0]
+; CHECK-GI-NEXT: mov v2.b[2], v0.b[0]
+; CHECK-GI-NEXT: clz v0.8b, v2.8b
; CHECK-GI-NEXT: st1 { v0.b }[0], [x0]
; CHECK-GI-NEXT: st1 { v0.b }[1], [x8]
; CHECK-GI-NEXT: st1 { v0.b }[2], [x9]
@@ -181,11 +185,9 @@ define void @v3i16(ptr %p1) {
;
; CHECK-GI-LABEL: v3i16:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ldr h0, [x0]
+; CHECK-GI-NEXT: ldr d0, [x0]
; CHECK-GI-NEXT: add x8, x0, #2
; CHECK-GI-NEXT: add x9, x0, #4
-; CHECK-GI-NEXT: ld1 { v0.h }[1], [x8]
-; CHECK-GI-NEXT: ld1 { v0.h }[2], [x9]
; CHECK-GI-NEXT: clz v0.4h, v0.4h
; CHECK-GI-NEXT: str h0, [x0]
; CHECK-GI-NEXT: st1 { v0.h }[1], [x8]
diff --git a/llvm/test/CodeGen/AArch64/ctpop.ll b/llvm/test/CodeGen/AArch64/ctpop.ll
index 55f75b6bc3f27..c739be95cd243 100644
--- a/llvm/test/CodeGen/AArch64/ctpop.ll
+++ b/llvm/test/CodeGen/AArch64/ctpop.ll
@@ -55,12 +55,16 @@ define void @v3i8(ptr %p1) {
;
; CHECK-GI-LABEL: v3i8:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ldr b0, [x0]
-; CHECK-GI-NEXT: add x8, x0, #1
+; CHECK-GI-NEXT: ldr w8, [x0]
; CHECK-GI-NEXT: add x9, x0, #2
-; CHECK-GI-NEXT: ld1 { v0.b }[1], [x8]
-; CHECK-GI-NEXT: ld1 { v0.b }[2], [x9]
-; CHECK-GI-NEXT: cnt v0.8b, v0.8b
+; CHECK-GI-NEXT: fmov s0, w8
+; CHECK-GI-NEXT: add x8, x0, #1
+; CHECK-GI-NEXT: mov b1, v0.b[1]
+; CHECK-GI-NEXT: mov v2.b[0], v0.b[0]
+; CHECK-GI-NEXT: mov b0, v0.b[2]
+; CHECK-GI-NEXT: mov v2.b[1], v1.b[0]
+; CHECK-GI-NEXT: mov v2.b[2], v0.b[0]
+; CHECK-GI-NEXT: cnt v0.8b, v2.8b
; CHECK-GI-NEXT: st1 { v0.b }[0], [x0]
; CHECK-GI-NEXT: st1 { v0.b }[1], [x8]
; CHECK-GI-NEXT: st1 { v0.b }[2], [x9]
@@ -181,11 +185,9 @@ define void @v3i16(ptr %p1) {
;
; CHECK-GI-LABEL: v3i16:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ldr h0, [x0]
+; CHECK-GI-NEXT: ldr d0, [x0]
; CHECK-GI-NEXT: add x8, x0, #2
; CHECK-GI-NEXT: add x9, x0, #4
-; CHECK-GI-NEXT: ld1 { v0.h }[1], [x8]
-; CHECK-GI-NEXT: ld1 { v0.h }[2], [x9]
; CHECK-GI-NEXT: cnt v0.8b, v0.8b
; CHECK-GI-NEXT: uaddlp v0.4h, v0.8b
; CHECK-GI-NEXT: str h0, [x0]
diff --git a/llvm/test/CodeGen/AArch64/cttz.ll b/llvm/test/CodeGen/AArch64/cttz.ll
index 93ac97e20dabd..fc9bf2c0aca65 100644
--- a/llvm/test/CodeGen/AArch64/cttz.ll
+++ b/llvm/test/CodeGen/AArch64/cttz.ll
@@ -68,21 +68,23 @@ define void @v3i8(ptr %p1) {
;
; CHECK-GI-LABEL: v3i8:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ldrb w9, [x0]
+; CHECK-GI-NEXT: ldr w9, [x0]
; CHECK-GI-NEXT: mov w8, #65535 // =0xffff
-; CHECK-GI-NEXT: ldrb w10, [x0, #1]
-; CHECK-GI-NEXT: fmov s0, w8
-; CHECK-GI-NEXT: fmov s1, w9
-; CHECK-GI-NEXT: ldrb w9, [x0, #2]
-; CHECK-GI-NEXT: mov v0.h[1], w8
-; CHECK-GI-NEXT: mov v1.h[1], w10
-; CHECK-GI-NEXT: mov v0.h[2], w8
+; CHECK-GI-NEXT: fmov s2, w8
+; CHECK-GI-NEXT: fmov s0, w9
+; CHECK-GI-NEXT: mov v2.h[1], w8
+; CHECK-GI-NEXT: mov b1, v0.b[1]
+; CHECK-GI-NEXT: mov v2.h[2], w8
; CHECK-GI-NEXT: add x8, x0, #1
-; CHECK-GI-NEXT: mov v1.h[2], w9
+; CHECK-GI-NEXT: fmov w9, s1
+; CHECK-GI-NEXT: mov b1, v0.b[2]
+; CHECK-GI-NEXT: mov v0.h[1], w9
+; CHECK-GI-NEXT: fmov w9, s1
+; CHECK-GI-NEXT: mov v0.h[2], w9
; CHECK-GI-NEXT: add x9, x0, #2
-; CHECK-GI-NEXT: eor v2.8b, v1.8b, v0.8b
-; CHECK-GI-NEXT: add v0.4h, v1.4h, v0.4h
-; CHECK-GI-NEXT: and v0.8b, v2.8b, v0.8b
+; CHECK-GI-NEXT: eor v1.8b, v0.8b, v2.8b
+; CHECK-GI-NEXT: add v0.4h, v0.4h, v2.4h
+; CHECK-GI-NEXT: and v0.8b, v1.8b, v0.8b
; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b
; CHECK-GI-NEXT: cnt v0.8b, v0.8b
; CHECK-GI-NEXT: st1 { v0.b }[0], [x0]
@@ -275,22 +277,20 @@ define void @v3i16(ptr %p1) {
; CHECK-GI-LABEL: v3i16:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: mov w8, #65535 // =0xffff
-; CHECK-GI-NEXT: ldr h1, [x0]
-; CHECK-GI-NEXT: add x9, x0, #2
+; CHECK-GI-NEXT: ldr d1, [x0]
+; CHECK-GI-NEXT: add x9, x0, #4
; CHECK-GI-NEXT: fmov s0, w8
-; CHECK-GI-NEXT: add x10, x0, #4
-; CHECK-GI-NEXT: ld1 { v1.h }[1], [x9]
; CHECK-GI-NEXT: mov v0.h[1], w8
-; CHECK-GI-NEXT: ld1 { v1.h }[2], [x10]
; CHECK-GI-NEXT: mov v0.h[2], w8
+; CHECK-GI-NEXT: add x8, x0, #2
; CHECK-GI-NEXT: eor v2.8b, v1.8b, v0.8b
; CHECK-GI-NEXT: add v0.4h, v1.4h, v0.4h
; CHECK-GI-NEXT: and v0.8b, v2.8b, v0.8b
; CHECK-GI-NEXT: cnt v0.8b, v0.8b
; CHECK-GI-NEXT: uaddlp v0.4h, v0.8b
; CHECK-GI-NEXT: str h0, [x0]
-; CHECK-GI-NEXT: st1 { v0.h }[1], [x9]
-; CHECK-GI-NEXT: st1 { v0.h }[2], [x10]
+; CHECK-GI-NEXT: st1 { v0.h }[1], [x8]
+; CHECK-GI-NEXT: st1 { v0.h }[2], [x9]
; CHECK-GI-NEXT: ret
entry:
%d = load <3 x i16>, ptr %p1
diff --git a/llvm/test/CodeGen/AArch64/load.ll b/llvm/test/CodeGen/AArch64/load.ll
index 6b26ae98a4ed8..c4bb6e37d6eaf 100644
--- a/llvm/test/CodeGen/AArch64/load.ll
+++ b/llvm/test/CodeGen/AArch64/load.ll
@@ -335,102 +335,50 @@ define <3 x i8> @load_v3i8(ptr %ptr) {
;
; CHECK-GI-LABEL: load_v3i8:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: ldrb w8, [x0]
-; CHECK-GI-NEXT: ldrb w1, [x0, #1]
-; CHECK-GI-NEXT: ldrb w2, [x0, #2]
-; CHECK-GI-NEXT: mov w0, w8
+; CHECK-GI-NEXT: ldr w8, [x0]
+; CHECK-GI-NEXT: fmov s0, w8
+; CHECK-GI-NEXT: mov b1, v0.b[1]
+; CHECK-GI-NEXT: mov b2, v0.b[2]
+; CHECK-GI-NEXT: fmov w0, s0
+; CHECK-GI-NEXT: fmov w1, s1
+; CHECK-GI-NEXT: fmov w2, s2
; CHECK-GI-NEXT: ret
%a = load <3 x i8>, ptr %ptr
ret <3 x i8> %a
}
define <7 x i8> @load_v7i8(ptr %ptr) {
-; CHECK-SD-LABEL: load_v7i8:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: ldr d0, [x0]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: load_v7i8:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: ldr b0, [x0]
-; CHECK-GI-NEXT: add x8, x0, #1
-; CHECK-GI-NEXT: mov v0.b[0], v0.b[0]
-; CHECK-GI-NEXT: ld1 { v0.b }[1], [x8]
-; CHECK-GI-NEXT: add x8, x0, #2
-; CHECK-GI-NEXT: ld1 { v0.b }[2], [x8]
-; CHECK-GI-NEXT: add x8, x0, #3
-; CHECK-GI-NEXT: ld1 { v0.b }[3], [x8]
-; CHECK-GI-NEXT: add x8, x0, #4
-; CHECK-GI-NEXT: ld1 { v0.b }[4], [x8]
-; CHECK-GI-NEXT: add x8, x0, #5
-; CHECK-GI-NEXT: ld1 { v0.b }[5], [x8]
-; CHECK-GI-NEXT: add x8, x0, #6
-; CHECK-GI-NEXT: ld1 { v0.b }[6], [x8]
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: load_v7i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr d0, [x0]
+; CHECK-NEXT: ret
%a = load <7 x i8>, ptr %ptr
ret <7 x i8> %a
}
define <3 x i16> @load_v3i16(ptr %ptr) {
-; CHECK-SD-LABEL: load_v3i16:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: ldr d0, [x0]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: load_v3i16:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: ldr h0, [x0]
-; CHECK-GI-NEXT: add x8, x0, #2
-; CHECK-GI-NEXT: ld1 { v0.h }[1], [x8]
-; CHECK-GI-NEXT: add x8, x0, #4
-; CHECK-GI-NEXT: ld1 { v0.h }[2], [x8]
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: load_v3i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr d0, [x0]
+; CHECK-NEXT: ret
%a = load <3 x i16>, ptr %ptr
ret <3 x i16> %a
}
define <7 x i16> @load_v7i16(ptr %ptr) {
-; CHECK-SD-LABEL: load_v7i16:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: ldr q0, [x0]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: load_v7i16:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: ldr h0, [x0]
-; CHECK-GI-NEXT: add x8, x0, #2
-; CHECK-GI-NEXT: ld1 { v0.h }[1], [x8]
-; CHECK-GI-NEXT: add x8, x0, #4
-; CHECK-GI-NEXT: ld1 { v0.h }[2], [x8]
-; CHECK-GI-NEXT: add x8, x0, #6
-; CHECK-GI-NEXT: ld1 { v0.h }[3], [x8]
-; CHECK-GI-NEXT: add x8, x0, #8
-; CHECK-GI-NEXT: ld1 { v0.h }[4], [x8]
-; CHECK-GI-NEXT: add x8, x0, #10
-; CHECK-GI-NEXT: ld1 { v0.h }[5], [x8]
-; CHECK-GI-NEXT: add x8, x0, #12
-; CHECK-GI-NEXT: ld1 { v0.h }[6], [x8]
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: load_v7i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr q0, [x0]
+; CHECK-NEXT: ret
%a = load <7 x i16>, ptr %ptr
ret <7 x i16> %a
}
define <3 x i32> @load_v3i32(ptr %ptr) {
-; CHECK-SD-LABEL: load_v3i32:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: ldr q0, [x0]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: load_v3i32:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: ldr s0, [x0]
-; CHECK-GI-NEXT: add x8, x0, #4
-; CHECK-GI-NEXT: ld1 { v0.s }[1], [x8]
-; CHECK-GI-NEXT: add x8, x0, #8
-; CHECK-GI-NEXT: ld1 { v0.s }[2], [x8]
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: load_v3i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr q0, [x0]
+; CHECK-NEXT: ret
%a = load <3 x i32>, ptr %ptr
ret <3 x i32> %a
}
diff --git a/llvm/test/CodeGen/AArch64/mul.ll b/llvm/test/CodeGen/AArch64/mul.ll
index 1558043f7f40a..9c69a6f03b858 100644
--- a/llvm/test/CodeGen/AArch64/mul.ll
+++ b/llvm/test/CodeGen/AArch64/mul.ll
@@ -122,16 +122,20 @@ define void @v3i8(ptr %p1, ptr %p2) {
;
; CHECK-GI-LABEL: v3i8:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ldrb w8, [x0]
-; CHECK-GI-NEXT: ldrb w9, [x1]
-; CHECK-GI-NEXT: ldrb w10, [x0, #1]
-; CHECK-GI-NEXT: ldrb w11, [x1, #1]
+; CHECK-GI-NEXT: ldr w8, [x0]
+; CHECK-GI-NEXT: ldr w9, [x1]
; CHECK-GI-NEXT: fmov s0, w8
; CHECK-GI-NEXT: fmov s1, w9
-; CHECK-GI-NEXT: ldrb w8, [x0, #2]
-; CHECK-GI-NEXT: ldrb w9, [x1, #2]
-; CHECK-GI-NEXT: mov v0.h[1], w10
-; CHECK-GI-NEXT: mov v1.h[1], w11
+; CHECK-GI-NEXT: mov b2, v0.b[1]
+; CHECK-GI-NEXT: mov b3, v1.b[1]
+; CHECK-GI-NEXT: mov b4, v0.b[2]
+; CHECK-GI-NEXT: mov b5, v1.b[2]
+; CHECK-GI-NEXT: fmov w8, s2
+; CHECK-GI-NEXT: fmov w9, s3
+; CHECK-GI-NEXT: mov v0.h[1], w8
+; CHECK-GI-NEXT: mov v1.h[1], w9
+; CHECK-GI-NEXT: fmov w8, s4
+; CHECK-GI-NEXT: fmov w9, s5
; CHECK-GI-NEXT: mov v0.h[2], w8
; CHECK-GI-NEXT: mov v1.h[2], w9
; CHECK-GI-NEXT: mul v0.4h, v0.4h, v1.4h
@@ -282,16 +286,10 @@ define void @v3i16(ptr %p1, ptr %p2) {
;
; CHECK-GI-LABEL: v3i16:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ldr h0, [x0]
-; CHECK-GI-NEXT: ldr h1, [x1]
+; CHECK-GI-NEXT: ldr d0, [x0]
+; CHECK-GI-NEXT: ldr d1, [x1]
; CHECK-GI-NEXT: add x8, x0, #2
-; CHECK-GI-NEXT: add x9, x1, #2
-; CHECK-GI-NEXT: add x10, x1, #4
-; CHECK-GI-NEXT: ld1 { v0.h }[1], [x8]
-; CHECK-GI-NEXT: ld1 { v1.h }[1], [x9]
; CHECK-GI-NEXT: add x9, x0, #4
-; CHECK-GI-NEXT: ld1 { v0.h }[2], [x9]
-; CHECK-GI-NEXT: ld1 { v1.h }[2], [x10]
; CHECK-GI-NEXT: mul v0.4h, v0.4h, v1.4h
; CHECK-GI-NEXT: str h0, [x0]
; CHECK-GI-NEXT: st1 { v0.h }[1], [x8]
diff --git a/llvm/test/CodeGen/AArch64/neon-dotreduce.ll b/llvm/test/CodeGen/AArch64/neon-dotreduce.ll
index a534112b7c559..4f0c4080aa0ce 100644
--- a/llvm/test/CodeGen/AArch64/neon-dotreduce.ll
+++ b/llvm/test/CodeGen/AArch64/neon-dotreduce.ll
@@ -412,31 +412,33 @@ define i32 @test_udo...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/144309
More information about the llvm-commits
mailing list