[llvm-dev] Cleaning up ‘br i1 false’ cases in CodeGenPrepare

Bharathi Seshadri via llvm-dev llvm-dev at lists.llvm.org
Thu Jun 28 21:44:39 PDT 2018


Hi,

I have come across a couple of cases where the code generated after
CodeGenPrepare pass has "br i1 false .." with both true and false
conditions preserved and this propagates further and remains the same
in the final assembly code/executable.

In CodeGenPrepare::runOnFunction, ConstantFoldTerminator (which
handles the br i1 false condition) is called only once and if after
the transformation of code by ConstantFoldTerminator() and
DeleteDeadBlock() we end up with code like "br i1 false", there is no
further opportunity to clean them up. So calling this code under
(!DisableBranchOpts) in a loop until no more transformations are made
fixes this issue. Is this reasonable ?

My simple fix (without any indentation changes) is:

--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp

@@ -316,7 +316,9 @@ bool CodeGenPrepare::runOnFunction(Function &F) {

   SunkAddrs.clear();
   if (!DisableBranchOpts) {
+  MadeChange = true;
+  while (MadeChange) {
     MadeChange = false;
     SmallPtrSet<BasicBlock*, 8> WorkList;

     for (BasicBlock &BB : F) {

@@ -352,6 +354,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
     EverMadeChange |= MadeChange;
   }
+ }
   if (!DisableGCOpts) {
   SmallVector<Instruction *, 2> Statepoints;

Testing the patch, I got a regression with
llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll.  I am unsure if
this requires remastering the test case to adjust with the new results
or if this is a real issue.  I don't have any expertise with GPU and
so any inputs in this regard would be very helpful.

Attached:
testcase.ll : Original test case for this issue
base.s and new.s: llc output for the failing case
(nested-loop-conditions.ll) before and after applying the above patch.

Thanks,
Bharathi
-------------- next part --------------
; RUN: llc < %s | FileCheck %s

; CHECK-NOT: check_func

define void @copyfunc(i8* %py_src, i8* %py_grp) #0 {
entry:
  %group17 = alloca [16 x i8], align 16
  %source18 = alloca [16 x i8], align 16
  %source219 = alloca [16 x i8], align 16
  %source219.sub63 = bitcast [16 x i8]* %source219 to i8*
  %source18.sub64 = bitcast [16 x i8]* %source18 to i8*
  %group17.sub65 = bitcast [16 x i8]* %group17 to i8*
  call void @llvm.lifetime.start(i64 16, i8* nonnull %group17.sub65) #5
  call void @llvm.lifetime.start(i64 16, i8* nonnull %source18.sub64) #5
  call void @llvm.lifetime.start(i64 16, i8* nonnull %source219.sub63) #5
  %call = tail call i32 @getsize(i8* %py_grp) #5
  %tobool = icmp ne i8* %py_src, null
  %conv = sext i32 %call to i64
  br i1 %tobool, label %if.then, label %if.end


if.then:                                          ; preds = %entry
  br i1 false, label %cond.true5.i, label %if.end.thread53


cond.true5.i:                                     ; preds = %if.then
  %0 = trunc i64 %conv to i32
  %cmp6.i = icmp ugt i32 %0, 16
  br i1 %cmp6.i, label %cond.true5.i26.thread, label %cond.true5.i26.thread59


cond.true5.i26.thread59:                          ; preds = %cond.true5.i
  %1 = bitcast [16 x i8]* %source18 to i8*
  call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull %1, i8* nonnull %py_src, i64 %conv, i32 1, i1 false) #5
  br label %cond.true8.i27


cond.true5.i26.thread:                            ; preds = %cond.true5.i
  %call.i = tail call i32 @check_func() #5
  br label %cond.false.i29


if.end.thread53:                                  ; preds = %if.then
  %2 = bitcast [16 x i8]* %source18 to i8*
  %call10.i = call i8* @copy2(i64 16, i8* nonnull %2, i8* nonnull %py_src, i64 %conv) #5
  br label %cond.false9.i31


if.end:                                           ; preds = %entry
  br i1 false, label %cond.true5.i26, label %cond.false9.i31


cond.true5.i26:                                   ; preds = %if.end
  %3 = trunc i64 %conv to i32
  %cmp6.i25 = icmp ugt i32 %3, 16
  br i1 %cmp6.i25, label %cond.false.i29, label %cond.true8.i27


cond.true8.i27:                                   ; preds = %cond.true5.i26.thread59, %cond.true5.i26
  %4 = bitcast [16 x i8]* %group17 to i8*
  call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull %4, i8* %py_grp, i64 %conv, i32 1, i1 false) #5
  br label %copy.exit33


cond.false.i29:                                   ; preds = %cond.true5.i26.thread, %cond.true5.i26
  %call.i28 = call i32 @check_func() #5
  br label %copy.exit33


cond.false9.i31:                                  ; preds = %if.end.thread53, %if.end
  %5 = bitcast [16 x i8]* %group17 to i8*
  %call10.i30 = call i8* @copy2(i64 16, i8* nonnull %5, i8* %py_grp, i64 %conv) #5
  br label %copy.exit33


copy.exit33:                                ; preds = %cond.true8.i27, %cond.false.i29, %cond.false9.i31
  %.pre-phi49 = phi i1 [ true, %cond.true8.i27 ], [ true, %cond.false.i29 ], [ false, %cond.false9.i31 ]
  %6 = icmp ne i8* %py_src, null
  br i1 %6, label %cond.true.i38, label %if.end8


cond.true.i38:                                    ; preds = %copy.exit33
  br i1 %.pre-phi49, label %cond.true5.i40, label %cond.false9.i45


cond.true5.i40:                                   ; preds = %cond.true.i38
  %7 = trunc i64 %conv to i32
  %cmp6.i39 = icmp ugt i32 %7, 16
  br i1 %cmp6.i39, label %cond.false.i43, label %cond.true8.i41


cond.true8.i41:                                   ; preds = %cond.true5.i40
  %8 = bitcast [16 x i8]* %source219 to i8*
  call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull %8, i8* nonnull %py_src, i64 %conv, i32 1, i1 false) #5
  br label %if.end8


cond.false.i43:                                   ; preds = %cond.true5.i40
  %call.i42 = call i32 @check_func() #5
  br label %if.end8


cond.false9.i45:                                  ; preds = %cond.true.i38
  %9 = bitcast [16 x i8]* %source219 to i8*
  %call10.i44 = call i8* @copy2(i64 16, i8* nonnull %9, i8* nonnull %py_src, i64 %conv) #5
  br label %if.end8


if.end8:                                          ; preds = %cond.false9.i45, %cond.false.i43, %cond.true8.i41, %copy.exit33
  %10 = bitcast [16 x i8]* %group17 to i8*
  %11 = bitcast [16 x i8]* %source18 to i8*
  %12 = bitcast [16 x i8]* %source219 to i8*
  call void @llvm.lifetime.end(i64 16, i8* nonnull %12) #5
  call void @llvm.lifetime.end(i64 16, i8* nonnull %11) #5
  call void @llvm.lifetime.end(i64 16, i8* nonnull %10) #5
  ret void
}
declare i32 @getsize(i8*) #2
declare i32 @check_func() #5
declare i8* @copy2(i64, i8*, i8*, i64) #2
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #1
declare void @llvm.lifetime.end(i64, i8* nocapture) #1
declare void @llvm.lifetime.start(i64, i8* nocapture) #1
-------------- next part --------------
A non-text attachment was scrubbed...
Name: base.s
Type: application/octet-stream
Size: 5634 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20180628/cd17689a/attachment.obj>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: new.s
Type: application/octet-stream
Size: 5228 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20180628/cd17689a/attachment-0001.obj>


More information about the llvm-dev mailing list