[llvm-branch-commits] [clang] 21fef82 - [SimplifyCFG] Pre-commit test for folding branches in simplify cfg
Zhang Xiang via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Dec 4 00:34:25 PST 2023
Author: Zhang Xiang
Date: 2023-12-04T16:25:37+08:00
New Revision: 21fef821eb83d0d426a5e2de20469dcdd41598f1
URL: https://github.com/llvm/llvm-project/commit/21fef821eb83d0d426a5e2de20469dcdd41598f1
DIFF: https://github.com/llvm/llvm-project/commit/21fef821eb83d0d426a5e2de20469dcdd41598f1.diff
LOG: [SimplifyCFG] Pre-commit test for folding branches in simplify cfg
Added:
clang/test/CodeGenCUDA/simplify-cfg-unroll.cu
Modified:
Removed:
################################################################################
diff --git a/clang/test/CodeGenCUDA/simplify-cfg-unroll.cu b/clang/test/CodeGenCUDA/simplify-cfg-unroll.cu
new file mode 100644
index 0000000000000..b6502ce76c298
--- /dev/null
+++ b/clang/test/CodeGenCUDA/simplify-cfg-unroll.cu
@@ -0,0 +1,86 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
+// REQUIRES: amdgpu-registered-target
+// REQUIRES: x86-registered-target
+// RUN: %clang_cc1 -O2 "-aux-triple" "x86_64-unknown-linux-gnu" "-triple" "amdgcn-amd-amdhsa" \
+// RUN: -fcuda-is-device "-aux-target-cpu" "x86-64" -emit-llvm -o - %s | FileCheck %s
+
+#include "Inputs/cuda.h"
+
+__device__ void bar();
+
+// CHECK-LABEL: define dso_local void @_Z4funciPPiiS_(
+// CHECK-SAME: i32 noundef [[IDX:%.*]], ptr nocapture noundef readonly [[ARR:%.*]], i32 noundef [[DIMS:%.*]], ptr nocapture noundef [[OUT:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[CMP127:%.*]] = icmp eq i32 [[DIMS]], 0
+// CHECK-NEXT: br i1 [[CMP127]], label [[CLEANUP:%.*]], label [[IF_END_PREHEADER:%.*]]
+// CHECK: if.end.preheader:
+// CHECK-NEXT: [[TMP0:%.*]] = add i32 [[DIMS]], -1
+// CHECK-NEXT: [[UMIN:%.*]] = tail call i32 @llvm.umin.i32(i32 [[TMP0]], i32 15)
+// CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i32 [[UMIN]], 1
+// CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[TMP1]] to i64
+// CHECK-NEXT: [[ARRAYIDX13_1:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+// CHECK-NEXT: [[ARRAYIDX13_2:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+// CHECK-NEXT: [[ARRAYIDX13_3:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+// CHECK-NEXT: br label [[IF_END:%.*]]
+// CHECK: if.end:
+// CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[IF_END_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[IF_END]] ]
+// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 [[INDVARS_IV]]
+// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA3:![0-9]+]]
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !tbaa [[TBAA7:![0-9]+]]
+// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP4]], [[TMP3]]
+// CHECK-NEXT: store i32 [[ADD14]], ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT: tail call void @_Z3barv() #[[ATTR3:[0-9]+]]
+// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT: [[ARRAYIDX11_1:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 1
+// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX11_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT: [[ADD14_1:%.*]] = add nsw i32 [[TMP7]], [[TMP6]]
+// CHECK-NEXT: store i32 [[ADD14_1]], ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT: tail call void @_Z3barv() #[[ATTR3]]
+// CHECK-NEXT: [[TMP8:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT: [[ARRAYIDX11_2:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 2
+// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX11_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT: [[ADD14_2:%.*]] = add nsw i32 [[TMP10]], [[TMP9]]
+// CHECK-NEXT: store i32 [[ADD14_2]], ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT: tail call void @_Z3barv() #[[ATTR3]]
+// CHECK-NEXT: [[TMP11:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT: [[ARRAYIDX11_3:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i64 3
+// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX11_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT: [[ADD14_3:%.*]] = add nsw i32 [[TMP13]], [[TMP12]]
+// CHECK-NEXT: store i32 [[ADD14_3]], ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT: tail call void @_Z3barv() #[[ATTR3]]
+// CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+// CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
+// CHECK-NEXT: br i1 [[EXITCOND]], label [[CLEANUP]], label [[IF_END]], !llvm.loop [[LOOP9:![0-9]+]]
+// CHECK: cleanup:
+// CHECK-NEXT: ret void
+//
+__device__ void func(int Idx, int *Arr[], int Dims, int *Out) {
+ #pragma unroll
+ for (int Dim = 0; Dim < 16; ++Dim) {
+ if (Dim == Dims) {
+ break;
+ }
+ int divmod = Arr[Dim][Idx];
+ Idx = divmod + 1;
+
+ for (int arg = 0; arg < 4; arg++) {
+ Out[arg] += Arr[Dim][arg];
+ bar();
+ }
+ }
+}
+//.
+// CHECK: [[TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0}
+// CHECK: [[META4]] = !{!"any pointer", [[META5:![0-9]+]], i64 0}
+// CHECK: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0}
+// CHECK: [[META6]] = !{!"Simple C++ TBAA"}
+// CHECK: [[TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0}
+// CHECK: [[META8]] = !{!"int", [[META5]], i64 0}
+// CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META10:![0-9]+]], [[META11:![0-9]+]]}
+// CHECK: [[META10]] = !{!"llvm.loop.mustprogress"}
+// CHECK: [[META11]] = !{!"llvm.loop.unroll.enable"}
+//.
More information about the llvm-branch-commits
mailing list