[llvm] [AArch64] Do not generate ld1IndexPost when inserting into lane 0 of a zero vector (PR #145723)

via llvm-commits llvm-commits at lists.llvm.org
Wed Jun 25 08:46:10 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-aarch64

Author: David Green (davemgreen)

<details>
<summary>Changes</summary>

If we are inserting into lane 0 of a zero vector, we can use the ldr instructions to get the upper-lane zero for free. Do not attempt to make post-inc operations in that case, which should be less micro-ops overall.

---
Full diff: https://github.com/llvm/llvm-project/pull/145723.diff


2 Files Affected:

- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+2) 
- (modified) llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll (+9-11) 


``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 13835747c91e5..9759cc159d5a3 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -23395,6 +23395,8 @@ static SDValue performPostLD1Combine(SDNode *N,
     auto *LaneC = dyn_cast<ConstantSDNode>(Lane);
     if (!LaneC || LaneC->getZExtValue() >= VT.getVectorNumElements())
       return SDValue();
+    if (LaneC->getZExtValue() == 0 && isNullOrNullSplat(N->getOperand(0)))
+      return SDValue();
   }
 
   LoadSDNode *LoadSDN = cast<LoadSDNode>(LD);
diff --git a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
index 0779c75c345e3..2af7cd4370b10 100644
--- a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
@@ -13340,9 +13340,9 @@ define <16 x i8> @test_v16i8_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <16
 define <16 x i8> @test_v16i8_post_reg_ld1lane_zero(ptr %bar, ptr %ptr, i64 %inc) {
 ; CHECK-SD-LABEL: test_v16i8_post_reg_ld1lane_zero:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    movi.2d v0, #0000000000000000
-; CHECK-SD-NEXT:    ld1.b { v0 }[0], [x0], x2
-; CHECK-SD-NEXT:    str x0, [x1]
+; CHECK-SD-NEXT:    ldr b0, [x0]
+; CHECK-SD-NEXT:    add x8, x0, x2
+; CHECK-SD-NEXT:    str x8, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v16i8_post_reg_ld1lane_zero:
@@ -14133,17 +14133,15 @@ define i32 @load_single_extract_variable_index_masked2_i32(ptr %A, i32 %idx) {
 define void @chained_insert_zero(ptr noundef %fenc, ptr noundef %pred, ptr noundef %residual, i32 noundef %stride) {
 ; CHECK-SD-LABEL: chained_insert_zero:
 ; CHECK-SD:       ; %bb.0: ; %entry
-; CHECK-SD-NEXT:    movi.2d v0, #0000000000000000
-; CHECK-SD-NEXT:    movi.2d v1, #0000000000000000
+; CHECK-SD-NEXT:    ldr s0, [x1]
+; CHECK-SD-NEXT:    ldr s1, [x0]
 ; CHECK-SD-NEXT:    ; kill: def $w3 killed $w3 def $x3
 ; CHECK-SD-NEXT:    sxtw x8, w3
-; CHECK-SD-NEXT:    ld1.s { v0 }[0], [x0], x8
-; CHECK-SD-NEXT:    ld1.s { v1 }[0], [x1], x8
-; CHECK-SD-NEXT:    sbfiz x8, x3, #1, #32
-; CHECK-SD-NEXT:    usubl.8h v0, v0, v1
+; CHECK-SD-NEXT:    usubl.8h v0, v1, v0
 ; CHECK-SD-NEXT:    str d0, [x2]
-; CHECK-SD-NEXT:    ldr s0, [x0]
-; CHECK-SD-NEXT:    ldr s1, [x1]
+; CHECK-SD-NEXT:    ldr s0, [x0, x8]
+; CHECK-SD-NEXT:    ldr s1, [x1, x8]
+; CHECK-SD-NEXT:    sbfiz x8, x3, #1, #32
 ; CHECK-SD-NEXT:    usubl.8h v0, v0, v1
 ; CHECK-SD-NEXT:    str d0, [x2, x8]
 ; CHECK-SD-NEXT:    ret

``````````

</details>


https://github.com/llvm/llvm-project/pull/145723


More information about the llvm-commits mailing list