[llvm] [LLVM][DAGCombiner] Port calculateByteProvider to TypeSize. (PR #148425)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Jul 13 05:10:14 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Paul Walker (paulwalker-arm)
<details>
<summary>Changes</summary>
Fixes https://github.com/llvm/llvm-project/issues/148387
---
Full diff: https://github.com/llvm/llvm-project/pull/148425.diff
2 Files Affected:
- (modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+12-8)
- (modified) llvm/test/CodeGen/AArch64/load-combine.ll (+179)
``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 231184587d682..410e862b6c0f1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -9149,11 +9149,12 @@ calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
if (Op.getOpcode() != ISD::LOAD && VectorIndex.has_value())
return std::nullopt;
- unsigned BitWidth = Op.getValueSizeInBits();
- if (BitWidth % 8 != 0)
+ TypeSize BitWidth = Op.getValueSizeInBits();
+ if (!BitWidth.isKnownMultipleOf(8))
return std::nullopt;
- unsigned ByteWidth = BitWidth / 8;
- assert(Index < ByteWidth && "invalid index requested");
+ TypeSize ByteWidth = BitWidth.divideCoefficientBy(8);
+ assert(TypeSize::isKnownLT(TypeSize::getFixed(Index), ByteWidth) &&
+ "invalid index requested");
(void) ByteWidth;
switch (Op.getOpcode()) {
@@ -9248,20 +9249,23 @@ calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
if (!L->isSimple() || L->isIndexed())
return std::nullopt;
- unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
- if (NarrowBitWidth % 8 != 0)
+ TypeSize NarrowBitWidth = L->getMemoryVT().getSizeInBits();
+ if (!NarrowBitWidth.isKnownMultipleOf(8))
return std::nullopt;
- uint64_t NarrowByteWidth = NarrowBitWidth / 8;
+ TypeSize NarrowByteWidth = NarrowBitWidth.divideCoefficientBy(8);
// If the width of the load does not reach byte we are trying to provide for
// and it is not a ZEXTLOAD, then the load does not provide for the byte in
// question
- if (Index >= NarrowByteWidth)
+ if (TypeSize::isKnownGE(TypeSize::getFixed(Index), NarrowByteWidth))
return L->getExtensionType() == ISD::ZEXTLOAD
? std::optional<SDByteProvider>(
SDByteProvider::getConstantZero())
: std::nullopt;
+ if (!TypeSize::isKnownLT(TypeSize::getFixed(Index), NarrowByteWidth))
+ return std::nullopt;
+
unsigned BPVectorIndex = VectorIndex.value_or(0U);
return SDByteProvider::getSrc(L, Index, BPVectorIndex);
}
diff --git a/llvm/test/CodeGen/AArch64/load-combine.ll b/llvm/test/CodeGen/AArch64/load-combine.ll
index 57f61e5303ecf..0bf4fb1063025 100644
--- a/llvm/test/CodeGen/AArch64/load-combine.ll
+++ b/llvm/test/CodeGen/AArch64/load-combine.ll
@@ -713,3 +713,182 @@ define void @short_vector_to_i64(ptr %in, ptr %out, ptr %p) {
store i64 %i3, ptr %out
ret void
}
+
+; x1 = x0
+define void @scalable_vector_to_i32(ptr %in, ptr %out, ptr %p) #0 {
+; CHECK-LABEL: scalable_vector_to_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr w8, [x0]
+; CHECK-NEXT: str w8, [x1]
+; CHECK-NEXT: ret
+ %ld = load <vscale x 4 x i8>, ptr %in, align 4
+
+ %e1 = extractelement <vscale x 4 x i8> %ld, i32 0
+ %e2 = extractelement <vscale x 4 x i8> %ld, i32 1
+ %e3 = extractelement <vscale x 4 x i8> %ld, i32 2
+ %e4 = extractelement <vscale x 4 x i8> %ld, i32 3
+
+ %z0 = zext i8 %e1 to i32
+ %z1 = zext i8 %e2 to i32
+ %z2 = zext i8 %e3 to i32
+ %z3 = zext i8 %e4 to i32
+
+ %s1 = shl nuw nsw i32 %z1, 8
+ %s2 = shl nuw nsw i32 %z2, 16
+ %s3 = shl nuw i32 %z3, 24
+
+ %i1 = or i32 %s1, %z0
+ %i2 = or i32 %i1, %s2
+ %i3 = or i32 %i2, %s3
+
+ store i32 %i3, ptr %out
+ ret void
+}
+
+define void @scalable_vector_to_i32_unused_low_i8(ptr %in, ptr %out, ptr %p) #0 {
+; CHECK-LABEL: scalable_vector_to_i32_unused_low_i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: ld1b { z0.s }, p0/z, [x0]
+; CHECK-NEXT: mov w8, v0.s[1]
+; CHECK-NEXT: mov w9, v0.s[2]
+; CHECK-NEXT: mov w10, v0.s[3]
+; CHECK-NEXT: lsl w8, w8, #8
+; CHECK-NEXT: orr w8, w8, w9, lsl #16
+; CHECK-NEXT: orr w8, w8, w10, lsl #24
+; CHECK-NEXT: str w8, [x1]
+; CHECK-NEXT: ret
+ %ld = load <vscale x 4 x i8>, ptr %in, align 4
+
+ %e2 = extractelement <vscale x 4 x i8> %ld, i32 1
+ %e3 = extractelement <vscale x 4 x i8> %ld, i32 2
+ %e4 = extractelement <vscale x 4 x i8> %ld, i32 3
+
+ %z1 = zext i8 %e2 to i32
+ %z2 = zext i8 %e3 to i32
+ %z3 = zext i8 %e4 to i32
+
+ %s1 = shl nuw nsw i32 %z1, 8
+ %s2 = shl nuw nsw i32 %z2, 16
+ %s3 = shl nuw i32 %z3, 24
+
+ %i2 = or i32 %s1, %s2
+ %i3 = or i32 %i2, %s3
+
+ store i32 %i3, ptr %out
+ ret void
+}
+
+define void @scalable_vector_to_i32_unused_high_i8(ptr %in, ptr %out, ptr %p) #0 {
+; CHECK-LABEL: scalable_vector_to_i32_unused_high_i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: ldrh w9, [x0]
+; CHECK-NEXT: ld1b { z0.s }, p0/z, [x0]
+; CHECK-NEXT: mov w8, v0.s[2]
+; CHECK-NEXT: orr w8, w9, w8, lsl #16
+; CHECK-NEXT: str w8, [x1]
+; CHECK-NEXT: ret
+ %ld = load <vscale x 4 x i8>, ptr %in, align 4
+
+ %e1 = extractelement <vscale x 4 x i8> %ld, i32 0
+ %e2 = extractelement <vscale x 4 x i8> %ld, i32 1
+ %e3 = extractelement <vscale x 4 x i8> %ld, i32 2
+
+ %z0 = zext i8 %e1 to i32
+ %z1 = zext i8 %e2 to i32
+ %z2 = zext i8 %e3 to i32
+
+ %s1 = shl nuw nsw i32 %z1, 8
+ %s2 = shl nuw nsw i32 %z2, 16
+
+ %i1 = or i32 %s1, %z0
+ %i2 = or i32 %i1, %s2
+
+ store i32 %i2, ptr %out
+ ret void
+}
+
+define void @scalable_vector_to_i32_unused_low_i16(ptr %in, ptr %out, ptr %p) #0 {
+; CHECK-LABEL: scalable_vector_to_i32_unused_low_i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: ld1b { z0.s }, p0/z, [x0]
+; CHECK-NEXT: mov w8, v0.s[2]
+; CHECK-NEXT: mov w9, v0.s[3]
+; CHECK-NEXT: lsl w8, w8, #16
+; CHECK-NEXT: orr w8, w8, w9, lsl #24
+; CHECK-NEXT: str w8, [x1]
+; CHECK-NEXT: ret
+ %ld = load <vscale x 4 x i8>, ptr %in, align 4
+
+ %e3 = extractelement <vscale x 4 x i8> %ld, i32 2
+ %e4 = extractelement <vscale x 4 x i8> %ld, i32 3
+
+ %z2 = zext i8 %e3 to i32
+ %z3 = zext i8 %e4 to i32
+
+ %s2 = shl nuw nsw i32 %z2, 16
+ %s3 = shl nuw i32 %z3, 24
+
+ %i3 = or i32 %s2, %s3
+
+ store i32 %i3, ptr %out
+ ret void
+}
+
+; x1 = x0[0:1]
+define void @scalable_vector_to_i32_unused_high_i16(ptr %in, ptr %out, ptr %p) #0 {
+; CHECK-LABEL: scalable_vector_to_i32_unused_high_i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldrh w8, [x0]
+; CHECK-NEXT: str w8, [x1]
+; CHECK-NEXT: ret
+ %ld = load <vscale x 4 x i8>, ptr %in, align 4
+
+ %e1 = extractelement <vscale x 4 x i8> %ld, i32 0
+ %e2 = extractelement <vscale x 4 x i8> %ld, i32 1
+
+ %z0 = zext i8 %e1 to i32
+ %z1 = zext i8 %e2 to i32
+
+ %s1 = shl nuw nsw i32 %z1, 8
+
+ %i1 = or i32 %s1, %z0
+
+ store i32 %i1, ptr %out
+ ret void
+}
+
+; x1 = x0
+define void @scalable_vector_to_i64(ptr %in, ptr %out, ptr %p) #0 {
+; CHECK-LABEL: scalable_vector_to_i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr w8, [x0]
+; CHECK-NEXT: str x8, [x1]
+; CHECK-NEXT: ret
+ %ld = load <vscale x 4 x i8>, ptr %in, align 4
+
+ %e1 = extractelement <vscale x 4 x i8> %ld, i32 0
+ %e2 = extractelement <vscale x 4 x i8> %ld, i32 1
+ %e3 = extractelement <vscale x 4 x i8> %ld, i32 2
+ %e4 = extractelement <vscale x 4 x i8> %ld, i32 3
+
+ %z0 = zext i8 %e1 to i64
+ %z1 = zext i8 %e2 to i64
+ %z2 = zext i8 %e3 to i64
+ %z3 = zext i8 %e4 to i64
+
+ %s1 = shl nuw nsw i64 %z1, 8
+ %s2 = shl nuw nsw i64 %z2, 16
+ %s3 = shl nuw i64 %z3, 24
+
+ %i1 = or i64 %s1, %z0
+ %i2 = or i64 %i1, %s2
+ %i3 = or i64 %i2, %s3
+
+ store i64 %i3, ptr %out
+ ret void
+}
+
+attributes #0 = { "target-features"="+sve" }
``````````
</details>
https://github.com/llvm/llvm-project/pull/148425
More information about the llvm-commits
mailing list