[llvm] Add a pass "SinkGEPConstOffset" (PR #140657)

via llvm-commits llvm-commits at lists.llvm.org
Mon May 26 20:36:48 PDT 2025


================
@@ -0,0 +1,155 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa \
+; RUN: -passes="default<O3>,sink-gep-const-offset" -S | \
+; RUN: FileCheck %s --check-prefix=CHECK-O3
+; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa \
+; RUN: -passes=sink-gep-const-offset -S | FileCheck %s --check-prefix=CHECK-SINK
+
+define void @kernel__0(ptr addrspace(5) noalias %pout, ptr addrspace(3) noalias %pin, i32 %num, i32 %ofst0, i32 %ofst1, i32 %ofst2, i32 %ofst3, i32 %ofst4) {
+; CHECK-O3-LABEL: @kernel__0(
+; CHECK-O3-NEXT:  entry:
+; CHECK-O3-NEXT:    [[SREM:%.*]] = srem i32 [[NUM:%.*]], 1024
+; CHECK-O3-NEXT:    [[ADD1:%.*]] = add nsw i32 [[SREM]], [[OFST0:%.*]]
+; CHECK-O3-NEXT:    [[ADD2:%.*]] = add nsw i32 [[OFST1:%.*]], [[SREM]]
+; CHECK-O3-NEXT:    [[ADD3:%.*]] = add nsw i32 [[OFST2:%.*]], [[SREM]]
+; CHECK-O3-NEXT:    [[ADD4:%.*]] = add nsw i32 [[OFST3:%.*]], [[SREM]]
+; CHECK-O3-NEXT:    [[GEP1:%.*]] = getelementptr inbounds half, ptr addrspace(3) [[GEP:%.*]], i32 [[ADD1]]
+; CHECK-O3-NEXT:    [[GEP3:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[GEP1]], i32 222
+; CHECK-O3-NEXT:    [[GEP2:%.*]] = getelementptr inbounds half, ptr addrspace(3) [[GEP]], i32 [[ADD2]]
+; CHECK-O3-NEXT:    [[GEP4:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[GEP2]], i32 222
+; CHECK-O3-NEXT:    [[TMP2:%.*]] = getelementptr inbounds half, ptr addrspace(3) [[GEP]], i32 [[ADD1]]
+; CHECK-O3-NEXT:    [[TMP3:%.*]] = getelementptr inbounds half, ptr addrspace(3) [[GEP]], i32 [[ADD2]]
+; CHECK-O3-NEXT:    [[GEP6:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP2]], i32 1110
+; CHECK-O3-NEXT:    [[GEP7:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP3]], i32 1332
+; CHECK-O3-NEXT:    [[MUL:%.*]] = mul nsw i32 [[SREM]], [[OFST0]]
+; CHECK-O3-NEXT:    [[GEP9:%.*]] = getelementptr inbounds half, ptr addrspace(5) [[GEP8:%.*]], i32 [[MUL]]
+; CHECK-O3-NEXT:    [[GEP10:%.*]] = getelementptr inbounds half, ptr addrspace(5) [[GEP9]], i32 [[ADD3]]
+; CHECK-O3-NEXT:    [[GEP12:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[GEP10]], i32 1110
+; CHECK-O3-NEXT:    [[GEP11:%.*]] = getelementptr inbounds half, ptr addrspace(5) [[GEP9]], i32 [[ADD4]]
+; CHECK-O3-NEXT:    [[GEP22:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[GEP11]], i32 1110
+; CHECK-O3-NEXT:    [[GEP13:%.*]] = getelementptr inbounds half, ptr addrspace(5) [[GEP8]], i32 [[MUL]]
+; CHECK-O3-NEXT:    [[GEP14:%.*]] = getelementptr inbounds half, ptr addrspace(5) [[GEP13]], i32 [[ADD3]]
+; CHECK-O3-NEXT:    [[GEP15:%.*]] = getelementptr inbounds half, ptr addrspace(5) [[GEP13]], i32 [[ADD4]]
+; CHECK-O3-NEXT:    [[GEP16:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[GEP14]], i32 2886
+; CHECK-O3-NEXT:    [[GEP17:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[GEP15]], i32 3108
+; CHECK-O3-NEXT:    [[GEP18:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[GEP]], i32 [[OFST4:%.*]]
+; CHECK-O3-NEXT:    [[GEP20:%.*]] = getelementptr inbounds i32, ptr addrspace(5) [[GEP8]], i32 [[OFST4]]
+; CHECK-O3-NEXT:    br label [[LOOP:%.*]]
+; CHECK-O3:       loop:
+; CHECK-O3-NEXT:    [[LOAD:%.*]] = load half, ptr addrspace(3) [[GEP3]], align 2
+; CHECK-O3-NEXT:    [[LOAD1:%.*]] = load half, ptr addrspace(3) [[GEP4]], align 2
+; CHECK-O3-NEXT:    [[LOAD2:%.*]] = load half, ptr addrspace(3) [[GEP6]], align 2
+; CHECK-O3-NEXT:    [[LOAD3:%.*]] = load half, ptr addrspace(3) [[GEP7]], align 2
+; CHECK-O3-NEXT:    store half [[LOAD]], ptr addrspace(5) [[GEP12]], align 2
+; CHECK-O3-NEXT:    store half [[LOAD1]], ptr addrspace(5) [[GEP22]], align 2
+; CHECK-O3-NEXT:    store half [[LOAD2]], ptr addrspace(5) [[GEP16]], align 2
+; CHECK-O3-NEXT:    store half [[LOAD3]], ptr addrspace(5) [[GEP17]], align 2
+; CHECK-O3-NEXT:    [[GEP19:%.*]] = load i32, ptr addrspace(3) [[GEP18]], align 4
+; CHECK-O3-NEXT:    [[GEP21:%.*]] = load i32, ptr addrspace(5) [[GEP20]], align 4
+; CHECK-O3-NEXT:    store i32 [[GEP21]], ptr addrspace(3) [[GEP]], align 4
+; CHECK-O3-NEXT:    [[ICMP:%.*]] = icmp eq i32 [[GEP19]], [[GEP21]]
+; CHECK-O3-NEXT:    br i1 [[ICMP]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK-O3:       exit:
+; CHECK-O3-NEXT:    ret void
+;
+; CHECK-SINK-LABEL: @kernel__0(
+; CHECK-SINK-NEXT:  entry:
+; CHECK-SINK-NEXT:    [[SREM:%.*]] = srem i32 [[NUM:%.*]], 1024
+; CHECK-SINK-NEXT:    [[ADD:%.*]] = add nsw i32 [[SREM]], 3
+; CHECK-SINK-NEXT:    [[ADD1:%.*]] = add nsw i32 [[SREM]], [[OFST0:%.*]]
+; CHECK-SINK-NEXT:    [[ADD2:%.*]] = add nsw i32 [[SREM]], [[OFST1:%.*]]
+; CHECK-SINK-NEXT:    [[ADD3:%.*]] = add nsw i32 [[SREM]], [[OFST2:%.*]]
+; CHECK-SINK-NEXT:    [[ADD4:%.*]] = add nsw i32 [[SREM]], [[OFST3:%.*]]
+; CHECK-SINK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds half, ptr addrspace(3) [[PIN:%.*]], i32 [[ADD1]]
+; CHECK-SINK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds half, ptr addrspace(3) [[TMP0]], i32 111
+; CHECK-SINK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds half, ptr addrspace(3) [[PIN]], i32 [[ADD2]]
+; CHECK-SINK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds half, ptr addrspace(3) [[TMP1]], i32 111
+; CHECK-SINK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds half, ptr addrspace(3) [[PIN]], i32 [[ADD1]]
+; CHECK-SINK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds half, ptr addrspace(3) [[PIN]], i32 [[ADD2]]
+; CHECK-SINK-NEXT:    [[GEP6:%.*]] = getelementptr inbounds half, ptr addrspace(3) [[TMP2]], i32 555
+; CHECK-SINK-NEXT:    [[GEP7:%.*]] = getelementptr inbounds half, ptr addrspace(3) [[TMP3]], i32 666
+; CHECK-SINK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[SREM]], [[OFST0]]
+; CHECK-SINK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds half, ptr addrspace(5) [[POUT:%.*]], i32 [[MUL]]
+; CHECK-SINK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds half, ptr addrspace(5) [[TMP4]], i32 [[ADD3]]
+; CHECK-SINK-NEXT:    [[GEP10:%.*]] = getelementptr inbounds half, ptr addrspace(5) [[TMP5]], i32 555
+; CHECK-SINK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds half, ptr addrspace(5) [[TMP4]], i32 [[ADD4]]
+; CHECK-SINK-NEXT:    [[GEP11:%.*]] = getelementptr inbounds half, ptr addrspace(5) [[TMP6]], i32 555
+; CHECK-SINK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds half, ptr addrspace(5) [[POUT]], i32 [[MUL]]
+; CHECK-SINK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds half, ptr addrspace(5) [[TMP7]], i32 [[ADD3]]
+; CHECK-SINK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds half, ptr addrspace(5) [[TMP7]], i32 [[ADD4]]
+; CHECK-SINK-NEXT:    [[GEP16:%.*]] = getelementptr inbounds half, ptr addrspace(5) [[TMP8]], i32 1443
+; CHECK-SINK-NEXT:    [[GEP17:%.*]] = getelementptr inbounds half, ptr addrspace(5) [[TMP9]], i32 1554
+; CHECK-SINK-NEXT:    br label [[LOOP:%.*]]
+; CHECK-SINK:       loop:
+; CHECK-SINK-NEXT:    [[LOAD:%.*]] = load half, ptr addrspace(3) [[GEP1]], align 2
+; CHECK-SINK-NEXT:    [[LOAD1:%.*]] = load half, ptr addrspace(3) [[GEP2]], align 2
+; CHECK-SINK-NEXT:    [[LOAD2:%.*]] = load half, ptr addrspace(3) [[GEP6]], align 2
+; CHECK-SINK-NEXT:    [[LOAD3:%.*]] = load half, ptr addrspace(3) [[GEP7]], align 2
+; CHECK-SINK-NEXT:    store half [[LOAD]], ptr addrspace(5) [[GEP10]], align 2
+; CHECK-SINK-NEXT:    store half [[LOAD1]], ptr addrspace(5) [[GEP11]], align 2
+; CHECK-SINK-NEXT:    store half [[LOAD2]], ptr addrspace(5) [[GEP16]], align 2
+; CHECK-SINK-NEXT:    store half [[LOAD3]], ptr addrspace(5) [[GEP17]], align 2
+; CHECK-SINK-NEXT:    [[GEP18:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[PIN]], i32 [[OFST4:%.*]]
+; CHECK-SINK-NEXT:    [[GEP19:%.*]] = load i32, ptr addrspace(3) [[GEP18]], align 4
+; CHECK-SINK-NEXT:    [[GEP20:%.*]] = getelementptr inbounds i32, ptr addrspace(5) [[POUT]], i32 [[OFST4]]
+; CHECK-SINK-NEXT:    [[GEP21:%.*]] = load i32, ptr addrspace(5) [[GEP20]], align 4
+; CHECK-SINK-NEXT:    store i32 [[GEP21]], ptr addrspace(3) [[PIN]], align 4
+; CHECK-SINK-NEXT:    [[ICMP:%.*]] = icmp eq i32 [[GEP19]], [[GEP21]]
+; CHECK-SINK-NEXT:    br i1 [[ICMP]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK-SINK:       exit:
+; CHECK-SINK-NEXT:    ret void
+;
+entry:
+  %srem = srem i32 %num, 1024
+  %add = add nsw i32 %srem, 3
+  %add1 = add nsw i32 %srem, %ofst0
+  %add2 = add nsw i32 %srem, %ofst1
+  %add3 = add nsw i32 %srem, %ofst2
+  %add4 = add nsw i32 %srem, %ofst3
+
+  %gep = getelementptr inbounds half, ptr addrspace(3) %pin, i32 111
+  %gep1 = getelementptr inbounds half, ptr addrspace(3) %gep, i32 %add1
+  %gep2 = getelementptr inbounds half, ptr addrspace(3) %gep, i32 %add2
+  %gep3 = getelementptr inbounds half, ptr addrspace(3) %pin, i32 222
+  %gep4 = getelementptr inbounds half, ptr addrspace(3) %gep3, i32 %add1
+  %gep5 = getelementptr inbounds half, ptr addrspace(3) %gep3, i32 %add2
+  %gep6 = getelementptr inbounds half, ptr addrspace(3) %gep4, i32 333
+  %gep7 = getelementptr inbounds half, ptr addrspace(3) %gep5, i32 444
----------------
StevenYangCC wrote:

I have made changes based on the suggestions you made, please verify the results.

https://github.com/llvm/llvm-project/pull/140657


More information about the llvm-commits mailing list