[llvm] Add a pass "SinkGEPConstOffset" (PR #140657)

via llvm-commits llvm-commits at lists.llvm.org
Sun May 25 23:43:58 PDT 2025


================
@@ -0,0 +1,106 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa \
+; RUN: -passes=sink-gep-const-offset -S | FileCheck %s
+
+define void @kernel__0(ptr addrspace(5) noalias %pout, ptr addrspace(3) noalias %pin, i32 %num, i32 %ofst0, i32 %ofst1, i32 %ofst2, i32 %ofst3, i32 %ofst4) {
+; CHECK-LABEL: @kernel__0(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = srem i32 [[NUM:%.*]], 1024
+; CHECK-NEXT:    [[TMP1:%.*]] = add nsw i32 [[TMP0]], 3
+; CHECK-NEXT:    [[TMP2:%.*]] = add nsw i32 [[TMP0]], [[OFST0:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = add nsw i32 [[TMP0]], [[OFST1:%.*]]
+; CHECK-NEXT:    [[TMP4:%.*]] = add nsw i32 [[TMP0]], [[OFST2:%.*]]
+; CHECK-NEXT:    [[TMP5:%.*]] = add nsw i32 [[TMP0]], [[OFST3:%.*]]
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds half, ptr addrspace(3) [[PIN:%.*]], i32 [[TMP2]]
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds half, ptr addrspace(3) [[TMP6]], i32 111
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds half, ptr addrspace(3) [[PIN]], i32 [[TMP3]]
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds half, ptr addrspace(3) [[TMP8]], i32 111
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds half, ptr addrspace(3) [[PIN]], i32 [[TMP2]]
+; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds half, ptr addrspace(3) [[PIN]], i32 [[TMP3]]
+; CHECK-NEXT:    [[T0:%.*]] = getelementptr inbounds half, ptr addrspace(3) [[TMP10]], i32 555
+; CHECK-NEXT:    [[T1:%.*]] = getelementptr inbounds half, ptr addrspace(3) [[TMP11]], i32 666
+; CHECK-NEXT:    [[TMP12:%.*]] = mul nsw i32 [[TMP0]], [[OFST0]]
+; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds half, ptr addrspace(5) [[POUT:%.*]], i32 [[TMP12]]
+; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds half, ptr addrspace(5) [[TMP13]], i32 [[TMP4]]
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds half, ptr addrspace(5) [[TMP14]], i32 555
+; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds half, ptr addrspace(5) [[TMP13]], i32 [[TMP5]]
+; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds half, ptr addrspace(5) [[TMP16]], i32 555
+; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds half, ptr addrspace(5) [[POUT]], i32 [[TMP12]]
+; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds half, ptr addrspace(5) [[TMP18]], i32 [[TMP4]]
+; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds half, ptr addrspace(5) [[TMP18]], i32 [[TMP5]]
+; CHECK-NEXT:    [[T2:%.*]] = getelementptr inbounds half, ptr addrspace(5) [[TMP19]], i32 1443
+; CHECK-NEXT:    [[T3:%.*]] = getelementptr inbounds half, ptr addrspace(5) [[TMP20]], i32 1554
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[TMP21:%.*]] = load half, ptr addrspace(3) [[TMP7]], align 2
+; CHECK-NEXT:    [[TMP22:%.*]] = load half, ptr addrspace(3) [[TMP9]], align 2
+; CHECK-NEXT:    [[TMP23:%.*]] = load half, ptr addrspace(3) [[T0]], align 2
+; CHECK-NEXT:    [[TMP24:%.*]] = load half, ptr addrspace(3) [[T1]], align 2
+; CHECK-NEXT:    store half [[TMP21]], ptr addrspace(5) [[TMP15]], align 2
+; CHECK-NEXT:    store half [[TMP22]], ptr addrspace(5) [[TMP17]], align 2
+; CHECK-NEXT:    store half [[TMP23]], ptr addrspace(5) [[T2]], align 2
+; CHECK-NEXT:    store half [[TMP24]], ptr addrspace(5) [[T3]], align 2
+; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[PIN]], i32 [[OFST4:%.*]]
+; CHECK-NEXT:    [[TMP26:%.*]] = load i32, ptr addrspace(3) [[TMP25]], align 4
+; CHECK-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i32, ptr addrspace(5) [[POUT]], i32 [[OFST4]]
+; CHECK-NEXT:    [[TMP28:%.*]] = load i32, ptr addrspace(5) [[TMP27]], align 4
+; CHECK-NEXT:    store i32 [[TMP28]], ptr addrspace(3) [[PIN]], align 4
+; CHECK-NEXT:    [[COND:%.*]] = icmp eq i32 [[TMP26]], [[TMP28]]
+; CHECK-NEXT:    br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = srem i32 %num, 1024
----------------
StevenYangCC wrote:

I have made changes based on the suggestions you made, please verify the results.

https://github.com/llvm/llvm-project/pull/140657


More information about the llvm-commits mailing list