[llvm] [AMDGPU] Enable more consecutive load folding during aggressive-instcombine (PR #158036)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 11 07:45:00 PDT 2025
================
@@ -0,0 +1,457 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=sroa,instcombine,aggressive-instcombine %s -S -o - | FileCheck %s
+
+define i64 @quux(ptr %arg) {
+; CHECK-LABEL: define i64 @quux(
+; CHECK-SAME: ptr [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[BB:.*:]]
+; CHECK-NEXT: [[LOAD:%.*]] = load i64, ptr [[ARG]], align 1
+; CHECK-NEXT: ret i64 [[LOAD]]
+;
+bb:
+ %load = load i8, ptr %arg, align 1
+ %getelementptr = getelementptr inbounds nuw i8, ptr %arg, i64 1
+ %load1 = load i8, ptr %getelementptr, align 1
+ %getelementptr2 = getelementptr inbounds nuw i8, ptr %arg, i64 2
+ %load3 = load i8, ptr %getelementptr2, align 1
+ %getelementptr4 = getelementptr inbounds nuw i8, ptr %arg, i64 3
+ %load5 = load i8, ptr %getelementptr4, align 1
+ %getelementptr6 = getelementptr inbounds nuw i8, ptr %arg, i64 4
+ %load7 = load i8, ptr %getelementptr6, align 1
+ %getelementptr8 = getelementptr inbounds nuw i8, ptr %arg, i64 5
+ %load9 = load i8, ptr %getelementptr8, align 1
+ %getelementptr10 = getelementptr inbounds nuw i8, ptr %arg, i64 6
+ %load11 = load i8, ptr %getelementptr10, align 1
+ %getelementptr12 = getelementptr inbounds nuw i8, ptr %arg, i64 7
+ %load13 = load i8, ptr %getelementptr12, align 1
+ %zext = zext i8 %load13 to i64
+ %shl = shl nuw i64 %zext, 56
+ %zext14 = zext i8 %load11 to i64
+ %shl15 = shl nuw nsw i64 %zext14, 48
+ %or = or disjoint i64 %shl, %shl15
+ %zext16 = zext i8 %load9 to i64
+ %shl17 = shl nuw nsw i64 %zext16, 40
+ %or18 = or disjoint i64 %or, %shl17
+ %zext19 = zext i8 %load7 to i64
+ %shl20 = shl nuw nsw i64 %zext19, 32
+ %or21 = or disjoint i64 %or18, %shl20
+ %zext22 = zext i8 %load5 to i64
+ %shl23 = shl nuw nsw i64 %zext22, 24
+ %or24 = or disjoint i64 %or21, %shl23
+ %zext25 = zext i8 %load3 to i64
+ %shl26 = shl nuw nsw i64 %zext25, 16
+ %zext27 = zext i8 %load1 to i64
+ %shl28 = shl nuw nsw i64 %zext27, 8
+ %or29 = or disjoint i64 %or24, %shl26
+ %zext30 = zext i8 %load to i64
+ %or31 = or i64 %or29, %shl28
+ %or32 = or i64 %or31, %zext30
+ ret i64 %or32
+}
+
+
+; The following test case reduced from a client kernel
+%struct.eggs = type { i8 }
+%struct.pluto = type { %struct.spam }
+%struct.spam = type { <32 x i8> }
+%struct.snork = type { i8 }
+%struct.quux = type { ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr }
+%struct.bar = type { ptr, ptr, ptr, ptr, ptr, ptr }
----------------
arsenm wrote:
Can you avoid so many type definitions
https://github.com/llvm/llvm-project/pull/158036
More information about the llvm-commits
mailing list