[llvm] [ConstraintElim] Decompose `sub nsw` (PR #118219)
Yingwei Zheng via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 2 04:20:32 PST 2024
dtcxzyw wrote:
Regression (reduced from https://github.com/dtcxzyw/llvm-opt-benchmark/pull/1781#discussion_r1865327138):
```
; bin/opt -O3 reduced.ll -S
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-linux-gnu"
; Function Attrs: mustprogress
define void @_ZN2cv3hal12cpu_baselineL12fastAtan32f_EPKfS3_Pfib(i32 %0) #0 {
br label %2
2: ; preds = %4, %1
%.01 = phi i32 [ 0, %1 ], [ %6, %4 ]
%3 = icmp slt i32 %.01, %0
br i1 %3, label %4, label %7
4: ; preds = %2
%5 = load volatile float, ptr null, align 4
%6 = add i32 %.01, 1
br label %2
7: ; preds = %2
ret void
}
; Function Attrs: mustprogress
define void @_ZN2cv3hal12cpu_baseline11fastAtan64fEPKdS3_Pdib(ptr %0, i32 %1) #0 personality ptr null {
br label %3
3: ; preds = %10, %2
%.0 = phi i32 [ 0, %2 ], [ 128, %10 ]
%4 = icmp slt i32 %.0, %1
br i1 %4, label %5, label %11
5: ; preds = %3
%6 = sub i32 %1, %.0
br label %7
7: ; preds = %9, %5
%.09 = phi i32 [ 0, %5 ], [ 1, %9 ]
%8 = icmp slt i32 %.09, %6
br i1 %8, label %9, label %10
9: ; preds = %7
store float 0.000000e+00, ptr %0, align 4
br label %7
10: ; preds = %7
call void @_ZN2cv3hal12cpu_baselineL12fastAtan32f_EPKfS3_Pfib(i32 %6)
br label %3
11: ; preds = %3
ret void
}
attributes #0 = { mustprogress }
```
Before (22417ec6cca0ed8ccecb0c2b77011e591378fd2a)
```
; ModuleID = 'reduced.ll'
source_filename = "reduced.ll"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-linux-gnu"
; Function Attrs: mustprogress nofree norecurse nounwind
define void @_ZN2cv3hal12cpu_baselineL12fastAtan32f_EPKfS3_Pfib(i32 %0) local_unnamed_addr #0 {
%2 = icmp sgt i32 %0, 0
br i1 %2, label %.lr.ph, label %._crit_edge
.lr.ph: ; preds = %1, %.lr.ph
%.011 = phi i32 [ %4, %.lr.ph ], [ 0, %1 ]
%3 = load volatile float, ptr null, align 4294967296
%4 = add nuw nsw i32 %.011, 1
%exitcond.not = icmp eq i32 %4, %0
br i1 %exitcond.not, label %._crit_edge, label %.lr.ph
._crit_edge: ; preds = %.lr.ph, %1
ret void
}
; Function Attrs: mustprogress nofree norecurse nounwind
define void @_ZN2cv3hal12cpu_baseline11fastAtan64fEPKdS3_Pdib(ptr nocapture writeonly %0, i32 %1) local_unnamed_addr #0 personality ptr null {
%.fr7 = freeze i32 %1
%3 = icmp sgt i32 %.fr7, 0
br i1 %3, label %.lr.ph5, label %._crit_edge6
.lr.ph5: ; preds = %2
%4 = icmp samesign ugt i32 %.fr7, 128
%5 = xor i1 %4, true
call void @llvm.assume(i1 %5)
store float 0.000000e+00, ptr %0, align 4
%.not = icmp eq i32 %.fr7, 1
tail call void @llvm.assume(i1 %.not)
%6 = load volatile float, ptr null, align 4294967296
br label %._crit_edge6
._crit_edge6: ; preds = %.lr.ph5, %2
ret void
}
; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write)
declare void @llvm.assume(i1 noundef) #1
attributes #0 = { mustprogress nofree norecurse nounwind }
attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
```
After:
```
; ModuleID = 'reduced.ll'
source_filename = "reduced.ll"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-linux-gnu"
; Function Attrs: mustprogress nofree norecurse nounwind
define void @_ZN2cv3hal12cpu_baselineL12fastAtan32f_EPKfS3_Pfib(i32 %0) local_unnamed_addr #0 {
%2 = icmp sgt i32 %0, 0
br i1 %2, label %.lr.ph, label %._crit_edge
.lr.ph: ; preds = %1, %.lr.ph
%.011 = phi i32 [ %4, %.lr.ph ], [ 0, %1 ]
%3 = load volatile float, ptr null, align 4294967296
%4 = add nuw nsw i32 %.011, 1
%exitcond.not = icmp eq i32 %4, %0
br i1 %exitcond.not, label %._crit_edge, label %.lr.ph
._crit_edge: ; preds = %.lr.ph, %1
ret void
}
; Function Attrs: mustprogress nofree norecurse nounwind
define void @_ZN2cv3hal12cpu_baseline11fastAtan64fEPKdS3_Pdib(ptr nocapture writeonly %0, i32 %1) local_unnamed_addr #0 personality ptr null {
%.fr7 = freeze i32 %1
%3 = icmp sgt i32 %.fr7, 0
br i1 %3, label %.lr.ph5, label %._crit_edge6
.lr.ph5: ; preds = %2
%4 = icmp samesign ugt i32 %.fr7, 128
br i1 %4, label %.lr.ph5.split.us, label %.lr.ph
.lr.ph5.split.us: ; preds = %.lr.ph.i.us, %.lr.ph5
%.03.us = phi i32 [ 0, %.lr.ph5 ], [ 128, %.lr.ph.i.us ]
%5 = sub nsw i32 %.fr7, %.03.us
%6 = icmp sgt i32 %5, 0
br i1 %6, label %.lr.ph.us, label %.lr.ph.i.us.preheader
.lr.ph.i.us.preheader: ; preds = %.lr.ph.us, %.lr.ph5.split.us
br label %.lr.ph.i.us
.lr.ph.i.us: ; preds = %.lr.ph.i.us.preheader, %.lr.ph.i.us
%.011.i.us = phi i32 [ %8, %.lr.ph.i.us ], [ 0, %.lr.ph.i.us.preheader ]
%7 = load volatile float, ptr null, align 4294967296
%8 = add nuw nsw i32 %.011.i.us, 1
%exitcond.not.i.us = icmp eq i32 %8, %5
br i1 %exitcond.not.i.us, label %.lr.ph5.split.us, label %.lr.ph.i.us
.lr.ph.us: ; preds = %.lr.ph5.split.us
store float 0.000000e+00, ptr %0, align 4
%.not8 = icmp eq i32 %5, 1
tail call void @llvm.assume(i1 %.not8)
br label %.lr.ph.i.us.preheader
.lr.ph: ; preds = %.lr.ph5
store float 0.000000e+00, ptr %0, align 4
%.not = icmp eq i32 %.fr7, 1
tail call void @llvm.assume(i1 %.not)
%9 = load volatile float, ptr null, align 4294967296
br label %._crit_edge6
._crit_edge6: ; preds = %.lr.ph, %2
ret void
}
; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write)
declare void @llvm.assume(i1 noundef) #1
attributes #0 = { mustprogress nofree norecurse nounwind }
attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
```
https://github.com/llvm/llvm-project/pull/115893 may fix this problem.
https://github.com/llvm/llvm-project/pull/118219
More information about the llvm-commits
mailing list