[llvm] Add a pass "SinkGEPConstOffset" (PR #140657)
via llvm-commits
llvm-commits at lists.llvm.org
Sun May 25 23:43:58 PDT 2025
================
@@ -0,0 +1,106 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa \
+; RUN: -passes=sink-gep-const-offset -S | FileCheck %s
+
+define void @kernel__0(ptr addrspace(5) noalias %pout, ptr addrspace(3) noalias %pin, i32 %num, i32 %ofst0, i32 %ofst1, i32 %ofst2, i32 %ofst3, i32 %ofst4) {
+; CHECK-LABEL: @kernel__0(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = srem i32 [[NUM:%.*]], 1024
+; CHECK-NEXT: [[TMP1:%.*]] = add nsw i32 [[TMP0]], 3
+; CHECK-NEXT: [[TMP2:%.*]] = add nsw i32 [[TMP0]], [[OFST0:%.*]]
+; CHECK-NEXT: [[TMP3:%.*]] = add nsw i32 [[TMP0]], [[OFST1:%.*]]
+; CHECK-NEXT: [[TMP4:%.*]] = add nsw i32 [[TMP0]], [[OFST2:%.*]]
+; CHECK-NEXT: [[TMP5:%.*]] = add nsw i32 [[TMP0]], [[OFST3:%.*]]
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds half, ptr addrspace(3) [[PIN:%.*]], i32 [[TMP2]]
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds half, ptr addrspace(3) [[TMP6]], i32 111
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds half, ptr addrspace(3) [[PIN]], i32 [[TMP3]]
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds half, ptr addrspace(3) [[TMP8]], i32 111
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds half, ptr addrspace(3) [[PIN]], i32 [[TMP2]]
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds half, ptr addrspace(3) [[PIN]], i32 [[TMP3]]
+; CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds half, ptr addrspace(3) [[TMP10]], i32 555
+; CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds half, ptr addrspace(3) [[TMP11]], i32 666
+; CHECK-NEXT: [[TMP12:%.*]] = mul nsw i32 [[TMP0]], [[OFST0]]
+; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds half, ptr addrspace(5) [[POUT:%.*]], i32 [[TMP12]]
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds half, ptr addrspace(5) [[TMP13]], i32 [[TMP4]]
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds half, ptr addrspace(5) [[TMP14]], i32 555
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds half, ptr addrspace(5) [[TMP13]], i32 [[TMP5]]
+; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds half, ptr addrspace(5) [[TMP16]], i32 555
+; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds half, ptr addrspace(5) [[POUT]], i32 [[TMP12]]
+; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds half, ptr addrspace(5) [[TMP18]], i32 [[TMP4]]
+; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds half, ptr addrspace(5) [[TMP18]], i32 [[TMP5]]
+; CHECK-NEXT: [[T2:%.*]] = getelementptr inbounds half, ptr addrspace(5) [[TMP19]], i32 1443
+; CHECK-NEXT: [[T3:%.*]] = getelementptr inbounds half, ptr addrspace(5) [[TMP20]], i32 1554
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[TMP21:%.*]] = load half, ptr addrspace(3) [[TMP7]], align 2
+; CHECK-NEXT: [[TMP22:%.*]] = load half, ptr addrspace(3) [[TMP9]], align 2
+; CHECK-NEXT: [[TMP23:%.*]] = load half, ptr addrspace(3) [[T0]], align 2
+; CHECK-NEXT: [[TMP24:%.*]] = load half, ptr addrspace(3) [[T1]], align 2
+; CHECK-NEXT: store half [[TMP21]], ptr addrspace(5) [[TMP15]], align 2
+; CHECK-NEXT: store half [[TMP22]], ptr addrspace(5) [[TMP17]], align 2
+; CHECK-NEXT: store half [[TMP23]], ptr addrspace(5) [[T2]], align 2
+; CHECK-NEXT: store half [[TMP24]], ptr addrspace(5) [[T3]], align 2
+; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[PIN]], i32 [[OFST4:%.*]]
+; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr addrspace(3) [[TMP25]], align 4
+; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr addrspace(5) [[POUT]], i32 [[OFST4]]
+; CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr addrspace(5) [[TMP27]], align 4
+; CHECK-NEXT: store i32 [[TMP28]], ptr addrspace(3) [[PIN]], align 4
+; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[TMP26]], [[TMP28]]
+; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = srem i32 %num, 1024
----------------
StevenYangCC wrote:
I have made changes based on the suggestions you made, please verify the results.
https://github.com/llvm/llvm-project/pull/140657
More information about the llvm-commits
mailing list