[llvm] 7128b12 - SimpleLoopUnswitch: Restore uniform unswitch test

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Fri Jul 7 06:53:16 PDT 2023

Author: Matt Arsenault
Date: 2023-07-07T09:53:12-04:00
New Revision: 7128b127c38dfae27b8b55e76ead51dfdda9b6ca

URL: https://github.com/llvm/llvm-project/commit/7128b127c38dfae27b8b55e76ead51dfdda9b6ca
DIFF: https://github.com/llvm/llvm-project/commit/7128b127c38dfae27b8b55e76ead51dfdda9b6ca.diff

LOG: SimpleLoopUnswitch: Restore uniform unswitch test

This was supposed to document the new PM limitation but
was deleted in fb4113ef0c8b2c5e5e2817e9ca14fb57a6d252be

Switch to generated checks since that's more reliable than XFAIL, and
just preserve the preferred results as comments.




diff  --git a/llvm/test/Transforms/SimpleLoopUnswitch/AMDGPU/uniform-unswitch.ll b/llvm/test/Transforms/SimpleLoopUnswitch/AMDGPU/uniform-unswitch.ll
new file mode 100644
index 00000000000000..8d1b0c7177be4b
--- /dev/null
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/AMDGPU/uniform-unswitch.ll
@@ -0,0 +1,77 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
+; RUN: opt -mtriple=amdgcn-- -passes='default<O3>' -S %s | FileCheck %s
+; Check that loop unswitch happened and condition hoisted out of the loop.
+; Condition is uniform so even targets with divergence should perform unswitching.
+; This fails with the new pass manager:
+; https://bugs.llvm.org/show_bug.cgi?id=48819
+; The correct behaviour (allow uniform non-trivial branches to be
+; unswitched on all targets) requires access to the function-level
+; divergence analysis from a loop transform, which is currently not
+; supported in the new pass manager.
+; SHOULDBE-LABEL: {{^}}define amdgpu_kernel void @uniform_unswitch
+; SHOULDBE: entry:
+; SHOULDBE-NEXT: [[LOOP_COND:%[a-z0-9]+]] = icmp
+; SHOULDBE-NEXT: [[IF_COND:%[a-z0-9]+]] = icmp eq i32 %x, 123456
+define amdgpu_kernel void @uniform_unswitch(ptr nocapture %out, i32 %n, i32 %x) {
+; CHECK-LABEL: define amdgpu_kernel void @uniform_unswitch
+; CHECK-SAME: (ptr nocapture writeonly [[OUT:%.*]], i32 [[N:%.*]], i32 [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[OUT_GLOBAL:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
+; CHECK-NEXT:    [[CMP6:%.*]] = icmp sgt i32 [[N]], 0
+; CHECK-NEXT:    br i1 [[CMP6]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK:       for.body.lr.ph:
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[X]], 123456
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    ret void
+; CHECK:       for.body:
+; CHECK-NEXT:    [[I_07:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ]
+; CHECK-NEXT:    br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[TMP0:%.*]] = zext i32 [[I_07]] to i64
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[OUT_GLOBAL]], i64 [[TMP0]]
+; CHECK-NEXT:    store i32 [[I_07]], ptr addrspace(1) [[ARRAYIDX]], align 4
+; CHECK-NEXT:    br label [[FOR_INC]]
+; CHECK:       for.inc:
+; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_07]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]]
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body.lr.ph, label %for.cond.cleanup
+for.body.lr.ph:                                   ; preds = %entry
+  %cmp1 = icmp eq i32 %x, 123456
+  br label %for.body
+for.cond.cleanup.loopexit:                        ; preds = %for.inc
+  br label %for.cond.cleanup
+for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
+  ret void
+for.body:                                         ; preds = %for.inc, %for.body.lr.ph
+  %i.07 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
+  br i1 %cmp1, label %if.then, label %for.inc
+if.then:                                          ; preds = %for.body
+  %arrayidx = getelementptr inbounds i32, ptr %out, i32 %i.07
+  store i32 %i.07, ptr %arrayidx, align 4
+  br label %for.inc
+for.inc:                                          ; preds = %for.body, %if.then
+  %inc = add nuw nsw i32 %i.07, 1
+  %exitcond = icmp eq i32 %inc, %n
+  br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
+declare i32 @llvm.amdgcn.workitem.id.x() #0
+attributes #0 = { nounwind readnone }


More information about the llvm-commits mailing list