[llvm] [AMDGPU] Enable more consecutive load folding during aggressive-instcombine (PR #158036)

via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 11 03:50:51 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: None (macurtis-amd)

<details>
<summary>Changes</summary>

The original motivating example provided by Jeff Byrnes: https://godbolt.org/z/8ebcTEjTs

Example provided by Nikita Popov: https://godbolt.org/z/Gv1j4vjqE as part of my original attempt to fix the issue (PR [#<!-- -->133301](https://github.com/llvm/llvm-project/pull/133301), see his [comment](https://github.com/llvm/llvm-project/pull/133301#issuecomment-2984905809)).

This changes the value of `IsFast` returned by `In SITargetLowering::allowsMisalignedMemoryAccessesImpl` to be non-zero for private and flat addresses if the subtarget supports unaligned scratch accesses.

This enables aggressive-instcombine to do more folding of consecutive loads (see [here](https://github.com/llvm/llvm-project/blob/cbd496581fb6953a9a8d8387a010cc3a67d4654b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp#L811)).

Summary performance impact on [composable_kernel](https://github.com/ROCm/composable_kernel):

|GPU|speedup (geomean)|
|---|---|
|MI300A| 1.11|
|MI300X| 1.14|
|MI350X| 1.03|

---

Patch is 102.83 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/158036.diff


6 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+7-1) 
- (added) llvm/test/CodeGen/AMDGPU/fold-consecutive-loads.ll (+457) 
- (modified) llvm/test/CodeGen/AMDGPU/memcpy-fixed-align.ll (+9-7) 
- (modified) llvm/test/CodeGen/AMDGPU/memcpy-libcall.ll (+74-78) 
- (modified) llvm/test/CodeGen/AMDGPU/memcpy-param-combinations.ll (+234-324) 
- (modified) llvm/test/CodeGen/AMDGPU/memmove-param-combinations.ll (+34-30) 


``````````diff
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index edce4856f77b0..f8e5740880708 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -2090,10 +2090,16 @@ bool SITargetLowering::allowsMisalignedMemoryAccessesImpl(
   if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS ||
       AddrSpace == AMDGPUAS::FLAT_ADDRESS) {
     bool AlignedBy4 = Alignment >= Align(4);
+    if (Subtarget->hasUnalignedScratchAccessEnabled()) {
+      if (IsFast)
+        *IsFast = AlignedBy4 ? Size : 1;
+      return true;
+    }
+
     if (IsFast)
       *IsFast = AlignedBy4;
 
-    return AlignedBy4 || Subtarget->hasUnalignedScratchAccessEnabled();
+    return AlignedBy4;
   }
 
   // So long as they are correct, wide global memory operations perform better
diff --git a/llvm/test/CodeGen/AMDGPU/fold-consecutive-loads.ll b/llvm/test/CodeGen/AMDGPU/fold-consecutive-loads.ll
new file mode 100644
index 0000000000000..610760f788ea8
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/fold-consecutive-loads.ll
@@ -0,0 +1,457 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=sroa,instcombine,aggressive-instcombine %s -S -o - | FileCheck %s
+
+define i64 @quux(ptr %arg) {
+; CHECK-LABEL: define i64 @quux(
+; CHECK-SAME: ptr [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[BB:.*:]]
+; CHECK-NEXT:    [[LOAD:%.*]] = load i64, ptr [[ARG]], align 1
+; CHECK-NEXT:    ret i64 [[LOAD]]
+;
+bb:
+  %load = load i8, ptr %arg, align 1
+  %getelementptr = getelementptr inbounds nuw i8, ptr %arg, i64 1
+  %load1 = load i8, ptr %getelementptr, align 1
+  %getelementptr2 = getelementptr inbounds nuw i8, ptr %arg, i64 2
+  %load3 = load i8, ptr %getelementptr2, align 1
+  %getelementptr4 = getelementptr inbounds nuw i8, ptr %arg, i64 3
+  %load5 = load i8, ptr %getelementptr4, align 1
+  %getelementptr6 = getelementptr inbounds nuw i8, ptr %arg, i64 4
+  %load7 = load i8, ptr %getelementptr6, align 1
+  %getelementptr8 = getelementptr inbounds nuw i8, ptr %arg, i64 5
+  %load9 = load i8, ptr %getelementptr8, align 1
+  %getelementptr10 = getelementptr inbounds nuw i8, ptr %arg, i64 6
+  %load11 = load i8, ptr %getelementptr10, align 1
+  %getelementptr12 = getelementptr inbounds nuw i8, ptr %arg, i64 7
+  %load13 = load i8, ptr %getelementptr12, align 1
+  %zext = zext i8 %load13 to i64
+  %shl = shl nuw i64 %zext, 56
+  %zext14 = zext i8 %load11 to i64
+  %shl15 = shl nuw nsw i64 %zext14, 48
+  %or = or disjoint i64 %shl, %shl15
+  %zext16 = zext i8 %load9 to i64
+  %shl17 = shl nuw nsw i64 %zext16, 40
+  %or18 = or disjoint i64 %or, %shl17
+  %zext19 = zext i8 %load7 to i64
+  %shl20 = shl nuw nsw i64 %zext19, 32
+  %or21 = or disjoint i64 %or18, %shl20
+  %zext22 = zext i8 %load5 to i64
+  %shl23 = shl nuw nsw i64 %zext22, 24
+  %or24 = or disjoint i64 %or21, %shl23
+  %zext25 = zext i8 %load3 to i64
+  %shl26 = shl nuw nsw i64 %zext25, 16
+  %zext27 = zext i8 %load1 to i64
+  %shl28 = shl nuw nsw i64 %zext27, 8
+  %or29 = or disjoint i64 %or24, %shl26
+  %zext30 = zext i8 %load to i64
+  %or31 = or i64 %or29, %shl28
+  %or32 = or i64 %or31, %zext30
+  ret i64 %or32
+}
+
+
+; The following test case reduced from a client kernel
+%struct.eggs = type { i8 }
+%struct.pluto = type { %struct.spam }
+%struct.spam = type { <32 x i8> }
+%struct.snork = type { i8 }
+%struct.quux = type { ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr }
+%struct.bar = type { ptr, ptr, ptr, ptr, ptr, ptr }
+
+define fastcc void @hoge(ptr noundef nonnull readonly align 8 captures(none) dereferenceable(48) %arg) {
+; CHECK-LABEL: define fastcc void @hoge(
+; CHECK-SAME: ptr noundef nonnull readonly align 8 captures(none) dereferenceable(48) [[ARG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[BB:.*:]]
+; CHECK-NEXT:    [[LOAD:%.*]] = load ptr, ptr [[ARG]], align 8
+; CHECK-NEXT:    [[GETELEMENTPTR13:%.*]] = getelementptr inbounds nuw i8, ptr [[ARG]], i64 16
+; CHECK-NEXT:    [[LOAD14:%.*]] = load ptr, ptr [[GETELEMENTPTR13]], align 8
+; CHECK-NEXT:    [[LOAD28:%.*]] = load i64, ptr [[LOAD]], align 1
+; CHECK-NEXT:    [[LOAD29:%.*]] = load i64, ptr [[LOAD14]], align 1
+; CHECK-NEXT:    [[GETELEMENTPTR72:%.*]] = getelementptr inbounds nuw i8, ptr [[LOAD]], i64 8
+; CHECK-NEXT:    [[LOAD73:%.*]] = load i64, ptr [[GETELEMENTPTR72]], align 1
+; CHECK-NEXT:    [[GETELEMENTPTR75:%.*]] = getelementptr inbounds nuw i8, ptr [[LOAD14]], i64 8
+; CHECK-NEXT:    [[LOAD76:%.*]] = load i64, ptr [[GETELEMENTPTR75]], align 1
+; CHECK-NEXT:    [[GETELEMENTPTR120:%.*]] = getelementptr inbounds nuw i8, ptr [[LOAD]], i64 16
+; CHECK-NEXT:    [[LOAD121:%.*]] = load i64, ptr [[GETELEMENTPTR120]], align 1
+; CHECK-NEXT:    [[GETELEMENTPTR123:%.*]] = getelementptr inbounds nuw i8, ptr [[LOAD14]], i64 16
+; CHECK-NEXT:    [[LOAD124:%.*]] = load i64, ptr [[GETELEMENTPTR123]], align 1
+; CHECK-NEXT:    [[GETELEMENTPTR168:%.*]] = getelementptr inbounds nuw i8, ptr [[LOAD]], i64 24
+; CHECK-NEXT:    [[LOAD169:%.*]] = load i64, ptr [[GETELEMENTPTR168]], align 1
+; CHECK-NEXT:    [[GETELEMENTPTR171:%.*]] = getelementptr inbounds nuw i8, ptr [[LOAD14]], i64 24
+; CHECK-NEXT:    [[LOAD172:%.*]] = load i32, ptr [[GETELEMENTPTR171]], align 1
+; CHECK-NEXT:    [[TMP0:%.*]] = zext i32 [[LOAD172]] to i64
+; CHECK-NEXT:    [[GETELEMENTPTR195:%.*]] = getelementptr inbounds nuw i8, ptr [[LOAD14]], i64 28
+; CHECK-NEXT:    [[LOAD196:%.*]] = load i8, ptr [[GETELEMENTPTR195]], align 1
+; CHECK-NEXT:    [[ALLOCA2_SROA_30_28_INSERT_EXT:%.*]] = zext i8 [[LOAD196]] to i64
+; CHECK-NEXT:    [[ALLOCA2_SROA_30_28_INSERT_SHIFT:%.*]] = shl nuw nsw i64 [[ALLOCA2_SROA_30_28_INSERT_EXT]], 32
+; CHECK-NEXT:    [[GETELEMENTPTR201:%.*]] = getelementptr inbounds nuw i8, ptr [[LOAD14]], i64 29
+; CHECK-NEXT:    [[LOAD202:%.*]] = load i8, ptr [[GETELEMENTPTR201]], align 1
+; CHECK-NEXT:    [[ALLOCA2_SROA_30_29_INSERT_EXT:%.*]] = zext i8 [[LOAD202]] to i64
+; CHECK-NEXT:    [[ALLOCA2_SROA_30_29_INSERT_SHIFT:%.*]] = shl nuw nsw i64 [[ALLOCA2_SROA_30_29_INSERT_EXT]], 40
+; CHECK-NEXT:    [[ALLOCA2_SROA_30_29_INSERT_MASK:%.*]] = or disjoint i64 [[TMP0]], [[ALLOCA2_SROA_30_28_INSERT_SHIFT]]
+; CHECK-NEXT:    [[GETELEMENTPTR207:%.*]] = getelementptr inbounds nuw i8, ptr [[LOAD14]], i64 30
+; CHECK-NEXT:    [[LOAD208:%.*]] = load i8, ptr [[GETELEMENTPTR207]], align 1
+; CHECK-NEXT:    [[ALLOCA2_SROA_30_30_INSERT_EXT:%.*]] = zext i8 [[LOAD208]] to i64
+; CHECK-NEXT:    [[ALLOCA2_SROA_30_30_INSERT_SHIFT:%.*]] = shl nuw nsw i64 [[ALLOCA2_SROA_30_30_INSERT_EXT]], 48
+; CHECK-NEXT:    [[GETELEMENTPTR213:%.*]] = getelementptr inbounds nuw i8, ptr [[LOAD14]], i64 31
+; CHECK-NEXT:    [[LOAD214:%.*]] = load i8, ptr [[GETELEMENTPTR213]], align 1
+; CHECK-NEXT:    [[ALLOCA2_SROA_30_31_INSERT_EXT:%.*]] = zext i8 [[LOAD214]] to i64
+; CHECK-NEXT:    [[ALLOCA2_SROA_30_31_INSERT_SHIFT:%.*]] = shl nuw i64 [[ALLOCA2_SROA_30_31_INSERT_EXT]], 56
+; CHECK-NEXT:    [[ALLOCA2_SROA_30_30_INSERT_MASK_MASKED:%.*]] = or i64 [[ALLOCA2_SROA_30_29_INSERT_MASK]], [[ALLOCA2_SROA_30_29_INSERT_SHIFT]]
+; CHECK-NEXT:    [[ALLOCA2_SROA_30_31_INSERT_MASK:%.*]] = or i64 [[ALLOCA2_SROA_30_30_INSERT_MASK_MASKED]], [[ALLOCA2_SROA_30_30_INSERT_SHIFT]]
+; CHECK-NEXT:    [[ALLOCA2_SROA_30_31_INSERT_INSERT:%.*]] = or i64 [[ALLOCA2_SROA_30_31_INSERT_MASK]], [[ALLOCA2_SROA_30_31_INSERT_SHIFT]]
+; CHECK-NEXT:    [[GETELEMENTPTR216:%.*]] = getelementptr inbounds nuw i8, ptr [[ARG]], i64 40
+; CHECK-NEXT:    [[LOAD217:%.*]] = load ptr, ptr [[GETELEMENTPTR216]], align 8
+; CHECK-NEXT:    [[LOAD220:%.*]] = load <16 x float>, ptr [[LOAD217]], align 64
+; CHECK-NEXT:    [[CALL:%.*]] = call contract <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.fp8.fp8(i64 [[LOAD28]], i64 [[LOAD29]], <16 x float> [[LOAD220]], i32 0, i32 0, i32 0)
+; CHECK-NEXT:    store <16 x float> [[CALL]], ptr [[LOAD217]], align 64
+; CHECK-NEXT:    [[CALL225:%.*]] = call contract <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.fp8.fp8(i64 [[LOAD73]], i64 [[LOAD76]], <16 x float> [[CALL]], i32 0, i32 0, i32 0)
+; CHECK-NEXT:    store <16 x float> [[CALL225]], ptr [[LOAD217]], align 64
+; CHECK-NEXT:    [[CALL230:%.*]] = call contract <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.fp8.fp8(i64 [[LOAD121]], i64 [[LOAD124]], <16 x float> [[CALL225]], i32 0, i32 0, i32 0)
+; CHECK-NEXT:    [[CALL235:%.*]] = call contract <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.fp8.fp8(i64 [[LOAD169]], i64 [[ALLOCA2_SROA_30_31_INSERT_INSERT]], <16 x float> [[CALL230]], i32 0, i32 0, i32 0)
+; CHECK-NEXT:    store <16 x float> [[CALL235]], ptr [[LOAD217]], align 64
+; CHECK-NEXT:    ret void
+;
+bb:
+  %alloca = alloca %struct.eggs, align 1, addrspace(5)
+  %alloca1 = alloca %struct.pluto, align 32, addrspace(5)
+  %alloca2 = alloca %struct.pluto, align 32, addrspace(5)
+  %alloca3 = alloca %struct.snork, align 1, addrspace(5)
+  %alloca4 = alloca %struct.quux, align 8, addrspace(5)
+  %addrspacecast = addrspacecast ptr addrspace(5) %alloca to ptr
+  %addrspacecast5 = addrspacecast ptr addrspace(5) %alloca1 to ptr
+  %addrspacecast6 = addrspacecast ptr addrspace(5) %alloca2 to ptr
+  call void @llvm.lifetime.start.p5(i64 32, ptr addrspace(5) %alloca1)
+  call void @llvm.memset.p5.i64(ptr addrspace(5) align 32 %alloca1, i8 0, i64 32, i1 false)
+  call void @llvm.lifetime.start.p5(i64 32, ptr addrspace(5) %alloca2)
+  call void @llvm.memset.p5.i64(ptr addrspace(5) align 32 %alloca2, i8 0, i64 32, i1 false)
+  call void @llvm.lifetime.start.p5(i64 1, ptr addrspace(5) %alloca3)
+  store ptr %addrspacecast5, ptr addrspace(5) %alloca4, align 8
+  %getelementptr = getelementptr inbounds %struct.quux, ptr addrspace(5) %alloca4, i64 0, i32 1
+  %load = load ptr, ptr %arg, align 8
+  store ptr %load, ptr addrspace(5) %getelementptr, align 8
+  %getelementptr7 = getelementptr inbounds %struct.quux, ptr addrspace(5) %alloca4, i64 0, i32 2
+  %getelementptr8 = getelementptr inbounds %struct.bar, ptr %arg, i64 0, i32 1
+  %load9 = load ptr, ptr %getelementptr8, align 8
+  store ptr %load9, ptr addrspace(5) %getelementptr7, align 8
+  %getelementptr10 = getelementptr inbounds %struct.quux, ptr addrspace(5) %alloca4, i64 0, i32 3
+  store ptr %addrspacecast, ptr addrspace(5) %getelementptr10, align 8
+  %getelementptr11 = getelementptr inbounds %struct.quux, ptr addrspace(5) %alloca4, i64 0, i32 4
+  store ptr %addrspacecast6, ptr addrspace(5) %getelementptr11, align 8
+  %getelementptr12 = getelementptr inbounds %struct.quux, ptr addrspace(5) %alloca4, i64 0, i32 5
+  %getelementptr13 = getelementptr inbounds %struct.bar, ptr %arg, i64 0, i32 2
+  %load14 = load ptr, ptr %getelementptr13, align 8
+  store ptr %load14, ptr addrspace(5) %getelementptr12, align 8
+  %getelementptr15 = getelementptr inbounds %struct.quux, ptr addrspace(5) %alloca4, i64 0, i32 6
+  %getelementptr16 = getelementptr inbounds %struct.bar, ptr %arg, i64 0, i32 3
+  %load17 = load ptr, ptr %getelementptr16, align 8
+  store ptr %load17, ptr addrspace(5) %getelementptr15, align 8
+  %getelementptr18 = getelementptr inbounds %struct.quux, ptr addrspace(5) %alloca4, i64 0, i32 7
+  %getelementptr19 = getelementptr inbounds %struct.bar, ptr %arg, i64 0, i32 4
+  %load20 = load ptr, ptr %getelementptr19, align 8
+  store ptr %load20, ptr addrspace(5) %getelementptr18, align 8
+  %load21 = load ptr, ptr addrspace(5) %alloca4, align 8
+  %getelementptr22 = getelementptr inbounds i8, ptr addrspace(5) %alloca4, i32 8
+  %load23 = load ptr, ptr addrspace(5) %getelementptr22, align 8
+  %getelementptr24 = getelementptr inbounds i8, ptr addrspace(5) %alloca4, i32 32
+  %load25 = load ptr, ptr addrspace(5) %getelementptr24, align 8
+  %getelementptr26 = getelementptr inbounds i8, ptr addrspace(5) %alloca4, i32 40
+  %load27 = load ptr, ptr addrspace(5) %getelementptr26, align 8
+  %load28 = load i8, ptr %load23, align 1
+  store i8 %load28, ptr %load21, align 1
+  %load29 = load i8, ptr %load27, align 1
+  store i8 %load29, ptr %load25, align 1
+  %getelementptr30 = getelementptr inbounds i8, ptr %load23, i64 1
+  %load31 = load i8, ptr %getelementptr30, align 1
+  %getelementptr32 = getelementptr inbounds i8, ptr %load21, i64 1
+  store i8 %load31, ptr %getelementptr32, align 1
+  %getelementptr33 = getelementptr inbounds i8, ptr %load27, i64 1
+  %load34 = load i8, ptr %getelementptr33, align 1
+  %getelementptr35 = getelementptr inbounds i8, ptr %load25, i64 1
+  store i8 %load34, ptr %getelementptr35, align 1
+  %getelementptr36 = getelementptr inbounds i8, ptr %load23, i64 2
+  %load37 = load i8, ptr %getelementptr36, align 1
+  %getelementptr38 = getelementptr inbounds i8, ptr %load21, i64 2
+  store i8 %load37, ptr %getelementptr38, align 1
+  %getelementptr39 = getelementptr inbounds i8, ptr %load27, i64 2
+  %load40 = load i8, ptr %getelementptr39, align 1
+  %getelementptr41 = getelementptr inbounds i8, ptr %load25, i64 2
+  store i8 %load40, ptr %getelementptr41, align 1
+  %getelementptr42 = getelementptr inbounds i8, ptr %load23, i64 3
+  %load43 = load i8, ptr %getelementptr42, align 1
+  %getelementptr44 = getelementptr inbounds i8, ptr %load21, i64 3
+  store i8 %load43, ptr %getelementptr44, align 1
+  %getelementptr45 = getelementptr inbounds i8, ptr %load27, i64 3
+  %load46 = load i8, ptr %getelementptr45, align 1
+  %getelementptr47 = getelementptr inbounds i8, ptr %load25, i64 3
+  store i8 %load46, ptr %getelementptr47, align 1
+  %getelementptr48 = getelementptr inbounds i8, ptr %load23, i64 4
+  %load49 = load i8, ptr %getelementptr48, align 1
+  %getelementptr50 = getelementptr inbounds i8, ptr %load21, i64 4
+  store i8 %load49, ptr %getelementptr50, align 1
+  %getelementptr51 = getelementptr inbounds i8, ptr %load27, i64 4
+  %load52 = load i8, ptr %getelementptr51, align 1
+  %getelementptr53 = getelementptr inbounds i8, ptr %load25, i64 4
+  store i8 %load52, ptr %getelementptr53, align 1
+  %getelementptr54 = getelementptr inbounds i8, ptr %load23, i64 5
+  %load55 = load i8, ptr %getelementptr54, align 1
+  %getelementptr56 = getelementptr inbounds i8, ptr %load21, i64 5
+  store i8 %load55, ptr %getelementptr56, align 1
+  %getelementptr57 = getelementptr inbounds i8, ptr %load27, i64 5
+  %load58 = load i8, ptr %getelementptr57, align 1
+  %getelementptr59 = getelementptr inbounds i8, ptr %load25, i64 5
+  store i8 %load58, ptr %getelementptr59, align 1
+  %getelementptr60 = getelementptr inbounds i8, ptr %load23, i64 6
+  %load61 = load i8, ptr %getelementptr60, align 1
+  %getelementptr62 = getelementptr inbounds i8, ptr %load21, i64 6
+  store i8 %load61, ptr %getelementptr62, align 1
+  %getelementptr63 = getelementptr inbounds i8, ptr %load27, i64 6
+  %load64 = load i8, ptr %getelementptr63, align 1
+  %getelementptr65 = getelementptr inbounds i8, ptr %load25, i64 6
+  store i8 %load64, ptr %getelementptr65, align 1
+  %getelementptr66 = getelementptr inbounds i8, ptr %load23, i64 7
+  %load67 = load i8, ptr %getelementptr66, align 1
+  %getelementptr68 = getelementptr inbounds i8, ptr %load21, i64 7
+  store i8 %load67, ptr %getelementptr68, align 1
+  %getelementptr69 = getelementptr inbounds i8, ptr %load27, i64 7
+  %load70 = load i8, ptr %getelementptr69, align 1
+  %getelementptr71 = getelementptr inbounds i8, ptr %load25, i64 7
+  store i8 %load70, ptr %getelementptr71, align 1
+  %getelementptr72 = getelementptr inbounds i8, ptr %load23, i64 8
+  %load73 = load i8, ptr %getelementptr72, align 1
+  %getelementptr74 = getelementptr inbounds i8, ptr %load21, i64 8
+  store i8 %load73, ptr %getelementptr74, align 1
+  %getelementptr75 = getelementptr inbounds i8, ptr %load27, i64 8
+  %load76 = load i8, ptr %getelementptr75, align 1
+  %getelementptr77 = getelementptr inbounds i8, ptr %load25, i64 8
+  store i8 %load76, ptr %getelementptr77, align 1
+  %getelementptr78 = getelementptr inbounds i8, ptr %load23, i64 9
+  %load79 = load i8, ptr %getelementptr78, align 1
+  %getelementptr80 = getelementptr inbounds i8, ptr %load21, i64 9
+  store i8 %load79, ptr %getelementptr80, align 1
+  %getelementptr81 = getelementptr inbounds i8, ptr %load27, i64 9
+  %load82 = load i8, ptr %getelementptr81, align 1
+  %getelementptr83 = getelementptr inbounds i8, ptr %load25, i64 9
+  store i8 %load82, ptr %getelementptr83, align 1
+  %getelementptr84 = getelementptr inbounds i8, ptr %load23, i64 10
+  %load85 = load i8, ptr %getelementptr84, align 1
+  %getelementptr86 = getelementptr inbounds i8, ptr %load21, i64 10
+  store i8 %load85, ptr %getelementptr86, align 1
+  %getelementptr87 = getelementptr inbounds i8, ptr %load27, i64 10
+  %load88 = load i8, ptr %getelementptr87, align 1
+  %getelementptr89 = getelementptr inbounds i8, ptr %load25, i64 10
+  store i8 %load88, ptr %getelementptr89, align 1
+  %getelementptr90 = getelementptr inbounds i8, ptr %load23, i64 11
+  %load91 = load i8, ptr %getelementptr90, align 1
+  %getelementptr92 = getelementptr inbounds i8, ptr %load21, i64 11
+  store i8 %load91, ptr %getelementptr92, align 1
+  %getelementptr93 = getelementptr inbounds i8, ptr %load27, i64 11
+  %load94 = load i8, ptr %getelementptr93, align 1
+  %getelementptr95 = getelementptr inbounds i8, ptr %load25, i64 11
+  store i8 %load94, ptr %getelementptr95, align 1
+  %getelementptr96 = getelementptr inbounds i8, ptr %load23, i64 12
+  %load97 = load i8, ptr %getelementptr96, align 1
+  %getelementptr98 = getelementptr inbounds i8, ptr %load21, i64 12
+  store i8 %load97, ptr %getelementptr98, align 1
+  %getelementptr99 = getelementptr inbounds i8, ptr %load27, i64 12
+  %load100 = load i8, ptr %getelementptr99, align 1
+  %getelementptr101 = getelementptr inbounds i8, ptr %load25, i64 12
+  store i8 %load100, ptr %getelementptr101, align 1
+  %getelementptr102 = getelementptr inbounds i8, ptr %load23, i64 13
+  %load103 = load i8, ptr %getelementptr102, align 1
+  %getelementptr104 = getelementptr inbounds i8, ptr %load21, i64 13
+  store i8 %load103, ptr %getelementptr104, align 1
+  %getelementptr105 = getelementptr inbounds i8, ptr %load27, i64 13
+  %load106 = load i8, ptr %getelementptr105, align 1
+  %getelementptr107 = getelementptr inbounds i8, ptr %load25, i64 13
+  store i8 %load106, ptr %getelementptr107, align 1
+  %getelementptr108 = getelementptr inbounds i8, ptr %load23, i64 14
+  %load109 = load i8, ptr %getelementptr108, align 1
+  %getelementptr110 = getelementptr inbounds i8, ptr %load21, i64 14
+  store i8 %load109, ptr %getelementptr110, align 1
+  %getelementptr111 = getelementptr inbounds i8, ptr %load27, i64 14
+  %load112 = load i8, ptr %getelementptr111, align 1
+  %getelementptr113 = getelementptr inbounds i8, ptr %load25, i64 14
+  store i8 %load112, ptr %getelementptr113, align 1
+  %getelementptr114 = getelementptr inbounds i8, ptr %load23, i64 15
+  %load115 = load i8, ptr %getelementptr114, align 1
+  %getelementptr116 = getelementptr inbounds i8, ptr %load21, i64 15
+  store i8 %load115, ptr %getelementptr116, align 1
+  %getelementptr117 = getelementptr inbounds i8, ptr %load27, i64 15
+  %load118 = load i8, ptr %getelementptr117, align 1
+  %getelementptr119 = getelementptr inbounds i8, ptr %load25, i64 15
+  store i8 %load118, ptr %getelementptr119, align 1
+  %getelementptr120 = getelementptr inbounds i8, ptr %load23, i64 16
+  %load121 = load i8, ptr %getelementptr120, align 1
+  %getelementptr122 = getelementptr inbounds i8, ptr %load21, i64 16
+  store i8 %load121, ptr %getelementptr122, align 1
+  %getelementptr123 = getelementptr inbounds i8, ptr %load27, i64 16
+  %load124 = load i8, ptr %getelementptr123, align 1
+  %getelementptr125 = getelementptr inbounds i8, ptr %load25, i64 16
+  store i8 %load124, ptr %getelementptr125, align 1
+  %getelementptr126 = getelementptr inbounds i8, ptr %load23, i64 17
+  %load127 = load i8, ptr %getelementptr126, align 1
+  %getelementptr128 = getelementptr inbounds i8, ptr %load21, i64 17
+  store i8 %load127, ptr %getelementptr128, align 1
+  %getelementptr129 = getelementptr inbounds i8, ptr %load27, i64 17
+  %load130 = load i8, ptr %getelementptr129, align 1
+  %getelementptr131 = getelementptr inbounds i8, ptr %load25, i64 17
+  store i8 %load130, ptr %getelementptr131, align 1
+  %getelementptr132 = getelementptr inbounds i8, ptr %load23, i64 18
+  %load133 = load i8, ptr %getelementptr132, align 1
+  %getelementptr134 = getelementptr inbounds i8, ptr %load21, i64 18
+  store i8 %lo...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/158036


More information about the llvm-commits mailing list