[llvm] Add a pass "SinkGEPConstOffset" (PR #140657)
via llvm-commits
llvm-commits at lists.llvm.org
Mon May 26 20:36:48 PDT 2025
================
@@ -0,0 +1,155 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa \
+; RUN: -passes="default<O3>,sink-gep-const-offset" -S | \
+; RUN: FileCheck %s --check-prefix=CHECK-O3
+; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa \
+; RUN: -passes=sink-gep-const-offset -S | FileCheck %s --check-prefix=CHECK-SINK
+
+define void @kernel__0(ptr addrspace(5) noalias %pout, ptr addrspace(3) noalias %pin, i32 %num, i32 %ofst0, i32 %ofst1, i32 %ofst2, i32 %ofst3, i32 %ofst4) {
+; CHECK-O3-LABEL: @kernel__0(
+; CHECK-O3-NEXT: entry:
+; CHECK-O3-NEXT: [[SREM:%.*]] = srem i32 [[NUM:%.*]], 1024
+; CHECK-O3-NEXT: [[ADD1:%.*]] = add nsw i32 [[SREM]], [[OFST0:%.*]]
+; CHECK-O3-NEXT: [[ADD2:%.*]] = add nsw i32 [[OFST1:%.*]], [[SREM]]
+; CHECK-O3-NEXT: [[ADD3:%.*]] = add nsw i32 [[OFST2:%.*]], [[SREM]]
+; CHECK-O3-NEXT: [[ADD4:%.*]] = add nsw i32 [[OFST3:%.*]], [[SREM]]
+; CHECK-O3-NEXT: [[GEP1:%.*]] = getelementptr inbounds half, ptr addrspace(3) [[GEP:%.*]], i32 [[ADD1]]
+; CHECK-O3-NEXT: [[GEP3:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[GEP1]], i32 222
+; CHECK-O3-NEXT: [[GEP2:%.*]] = getelementptr inbounds half, ptr addrspace(3) [[GEP]], i32 [[ADD2]]
+; CHECK-O3-NEXT: [[GEP4:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[GEP2]], i32 222
+; CHECK-O3-NEXT: [[TMP2:%.*]] = getelementptr inbounds half, ptr addrspace(3) [[GEP]], i32 [[ADD1]]
+; CHECK-O3-NEXT: [[TMP3:%.*]] = getelementptr inbounds half, ptr addrspace(3) [[GEP]], i32 [[ADD2]]
+; CHECK-O3-NEXT: [[GEP6:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP2]], i32 1110
+; CHECK-O3-NEXT: [[GEP7:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP3]], i32 1332
+; CHECK-O3-NEXT: [[MUL:%.*]] = mul nsw i32 [[SREM]], [[OFST0]]
+; CHECK-O3-NEXT: [[GEP9:%.*]] = getelementptr inbounds half, ptr addrspace(5) [[GEP8:%.*]], i32 [[MUL]]
+; CHECK-O3-NEXT: [[GEP10:%.*]] = getelementptr inbounds half, ptr addrspace(5) [[GEP9]], i32 [[ADD3]]
+; CHECK-O3-NEXT: [[GEP12:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[GEP10]], i32 1110
+; CHECK-O3-NEXT: [[GEP11:%.*]] = getelementptr inbounds half, ptr addrspace(5) [[GEP9]], i32 [[ADD4]]
+; CHECK-O3-NEXT: [[GEP22:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[GEP11]], i32 1110
+; CHECK-O3-NEXT: [[GEP13:%.*]] = getelementptr inbounds half, ptr addrspace(5) [[GEP8]], i32 [[MUL]]
+; CHECK-O3-NEXT: [[GEP14:%.*]] = getelementptr inbounds half, ptr addrspace(5) [[GEP13]], i32 [[ADD3]]
+; CHECK-O3-NEXT: [[GEP15:%.*]] = getelementptr inbounds half, ptr addrspace(5) [[GEP13]], i32 [[ADD4]]
+; CHECK-O3-NEXT: [[GEP16:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[GEP14]], i32 2886
+; CHECK-O3-NEXT: [[GEP17:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[GEP15]], i32 3108
+; CHECK-O3-NEXT: [[GEP18:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[GEP]], i32 [[OFST4:%.*]]
+; CHECK-O3-NEXT: [[GEP20:%.*]] = getelementptr inbounds i32, ptr addrspace(5) [[GEP8]], i32 [[OFST4]]
+; CHECK-O3-NEXT: br label [[LOOP:%.*]]
+; CHECK-O3: loop:
+; CHECK-O3-NEXT: [[LOAD:%.*]] = load half, ptr addrspace(3) [[GEP3]], align 2
+; CHECK-O3-NEXT: [[LOAD1:%.*]] = load half, ptr addrspace(3) [[GEP4]], align 2
+; CHECK-O3-NEXT: [[LOAD2:%.*]] = load half, ptr addrspace(3) [[GEP6]], align 2
+; CHECK-O3-NEXT: [[LOAD3:%.*]] = load half, ptr addrspace(3) [[GEP7]], align 2
+; CHECK-O3-NEXT: store half [[LOAD]], ptr addrspace(5) [[GEP12]], align 2
+; CHECK-O3-NEXT: store half [[LOAD1]], ptr addrspace(5) [[GEP22]], align 2
+; CHECK-O3-NEXT: store half [[LOAD2]], ptr addrspace(5) [[GEP16]], align 2
+; CHECK-O3-NEXT: store half [[LOAD3]], ptr addrspace(5) [[GEP17]], align 2
+; CHECK-O3-NEXT: [[GEP19:%.*]] = load i32, ptr addrspace(3) [[GEP18]], align 4
+; CHECK-O3-NEXT: [[GEP21:%.*]] = load i32, ptr addrspace(5) [[GEP20]], align 4
+; CHECK-O3-NEXT: store i32 [[GEP21]], ptr addrspace(3) [[GEP]], align 4
+; CHECK-O3-NEXT: [[ICMP:%.*]] = icmp eq i32 [[GEP19]], [[GEP21]]
+; CHECK-O3-NEXT: br i1 [[ICMP]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK-O3: exit:
+; CHECK-O3-NEXT: ret void
+;
+; CHECK-SINK-LABEL: @kernel__0(
+; CHECK-SINK-NEXT: entry:
+; CHECK-SINK-NEXT: [[SREM:%.*]] = srem i32 [[NUM:%.*]], 1024
+; CHECK-SINK-NEXT: [[ADD:%.*]] = add nsw i32 [[SREM]], 3
+; CHECK-SINK-NEXT: [[ADD1:%.*]] = add nsw i32 [[SREM]], [[OFST0:%.*]]
+; CHECK-SINK-NEXT: [[ADD2:%.*]] = add nsw i32 [[SREM]], [[OFST1:%.*]]
+; CHECK-SINK-NEXT: [[ADD3:%.*]] = add nsw i32 [[SREM]], [[OFST2:%.*]]
+; CHECK-SINK-NEXT: [[ADD4:%.*]] = add nsw i32 [[SREM]], [[OFST3:%.*]]
+; CHECK-SINK-NEXT: [[TMP0:%.*]] = getelementptr inbounds half, ptr addrspace(3) [[PIN:%.*]], i32 [[ADD1]]
+; CHECK-SINK-NEXT: [[GEP1:%.*]] = getelementptr inbounds half, ptr addrspace(3) [[TMP0]], i32 111
+; CHECK-SINK-NEXT: [[TMP1:%.*]] = getelementptr inbounds half, ptr addrspace(3) [[PIN]], i32 [[ADD2]]
+; CHECK-SINK-NEXT: [[GEP2:%.*]] = getelementptr inbounds half, ptr addrspace(3) [[TMP1]], i32 111
+; CHECK-SINK-NEXT: [[TMP2:%.*]] = getelementptr inbounds half, ptr addrspace(3) [[PIN]], i32 [[ADD1]]
+; CHECK-SINK-NEXT: [[TMP3:%.*]] = getelementptr inbounds half, ptr addrspace(3) [[PIN]], i32 [[ADD2]]
+; CHECK-SINK-NEXT: [[GEP6:%.*]] = getelementptr inbounds half, ptr addrspace(3) [[TMP2]], i32 555
+; CHECK-SINK-NEXT: [[GEP7:%.*]] = getelementptr inbounds half, ptr addrspace(3) [[TMP3]], i32 666
+; CHECK-SINK-NEXT: [[MUL:%.*]] = mul nsw i32 [[SREM]], [[OFST0]]
+; CHECK-SINK-NEXT: [[TMP4:%.*]] = getelementptr inbounds half, ptr addrspace(5) [[POUT:%.*]], i32 [[MUL]]
+; CHECK-SINK-NEXT: [[TMP5:%.*]] = getelementptr inbounds half, ptr addrspace(5) [[TMP4]], i32 [[ADD3]]
+; CHECK-SINK-NEXT: [[GEP10:%.*]] = getelementptr inbounds half, ptr addrspace(5) [[TMP5]], i32 555
+; CHECK-SINK-NEXT: [[TMP6:%.*]] = getelementptr inbounds half, ptr addrspace(5) [[TMP4]], i32 [[ADD4]]
+; CHECK-SINK-NEXT: [[GEP11:%.*]] = getelementptr inbounds half, ptr addrspace(5) [[TMP6]], i32 555
+; CHECK-SINK-NEXT: [[TMP7:%.*]] = getelementptr inbounds half, ptr addrspace(5) [[POUT]], i32 [[MUL]]
+; CHECK-SINK-NEXT: [[TMP8:%.*]] = getelementptr inbounds half, ptr addrspace(5) [[TMP7]], i32 [[ADD3]]
+; CHECK-SINK-NEXT: [[TMP9:%.*]] = getelementptr inbounds half, ptr addrspace(5) [[TMP7]], i32 [[ADD4]]
+; CHECK-SINK-NEXT: [[GEP16:%.*]] = getelementptr inbounds half, ptr addrspace(5) [[TMP8]], i32 1443
+; CHECK-SINK-NEXT: [[GEP17:%.*]] = getelementptr inbounds half, ptr addrspace(5) [[TMP9]], i32 1554
+; CHECK-SINK-NEXT: br label [[LOOP:%.*]]
+; CHECK-SINK: loop:
+; CHECK-SINK-NEXT: [[LOAD:%.*]] = load half, ptr addrspace(3) [[GEP1]], align 2
+; CHECK-SINK-NEXT: [[LOAD1:%.*]] = load half, ptr addrspace(3) [[GEP2]], align 2
+; CHECK-SINK-NEXT: [[LOAD2:%.*]] = load half, ptr addrspace(3) [[GEP6]], align 2
+; CHECK-SINK-NEXT: [[LOAD3:%.*]] = load half, ptr addrspace(3) [[GEP7]], align 2
+; CHECK-SINK-NEXT: store half [[LOAD]], ptr addrspace(5) [[GEP10]], align 2
+; CHECK-SINK-NEXT: store half [[LOAD1]], ptr addrspace(5) [[GEP11]], align 2
+; CHECK-SINK-NEXT: store half [[LOAD2]], ptr addrspace(5) [[GEP16]], align 2
+; CHECK-SINK-NEXT: store half [[LOAD3]], ptr addrspace(5) [[GEP17]], align 2
+; CHECK-SINK-NEXT: [[GEP18:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[PIN]], i32 [[OFST4:%.*]]
+; CHECK-SINK-NEXT: [[GEP19:%.*]] = load i32, ptr addrspace(3) [[GEP18]], align 4
+; CHECK-SINK-NEXT: [[GEP20:%.*]] = getelementptr inbounds i32, ptr addrspace(5) [[POUT]], i32 [[OFST4]]
+; CHECK-SINK-NEXT: [[GEP21:%.*]] = load i32, ptr addrspace(5) [[GEP20]], align 4
+; CHECK-SINK-NEXT: store i32 [[GEP21]], ptr addrspace(3) [[PIN]], align 4
+; CHECK-SINK-NEXT: [[ICMP:%.*]] = icmp eq i32 [[GEP19]], [[GEP21]]
+; CHECK-SINK-NEXT: br i1 [[ICMP]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK-SINK: exit:
+; CHECK-SINK-NEXT: ret void
+;
+entry:
+ %srem = srem i32 %num, 1024
+ %add = add nsw i32 %srem, 3
+ %add1 = add nsw i32 %srem, %ofst0
+ %add2 = add nsw i32 %srem, %ofst1
+ %add3 = add nsw i32 %srem, %ofst2
+ %add4 = add nsw i32 %srem, %ofst3
+
+ %gep = getelementptr inbounds half, ptr addrspace(3) %pin, i32 111
+ %gep1 = getelementptr inbounds half, ptr addrspace(3) %gep, i32 %add1
+ %gep2 = getelementptr inbounds half, ptr addrspace(3) %gep, i32 %add2
+ %gep3 = getelementptr inbounds half, ptr addrspace(3) %pin, i32 222
+ %gep4 = getelementptr inbounds half, ptr addrspace(3) %gep3, i32 %add1
+ %gep5 = getelementptr inbounds half, ptr addrspace(3) %gep3, i32 %add2
+ %gep6 = getelementptr inbounds half, ptr addrspace(3) %gep4, i32 333
+ %gep7 = getelementptr inbounds half, ptr addrspace(3) %gep5, i32 444
----------------
StevenYangCC wrote:
I have made changes based on the suggestions you made, please verify the results.
https://github.com/llvm/llvm-project/pull/140657
More information about the llvm-commits
mailing list