[llvm] [ConstraintElim] Decompose `sub nsw` (PR #118219)

Yingwei Zheng via llvm-commits llvm-commits at lists.llvm.org
Mon Dec 2 04:20:32 PST 2024


dtcxzyw wrote:

Regression (reduced from https://github.com/dtcxzyw/llvm-opt-benchmark/pull/1781#discussion_r1865327138):
```
; bin/opt -O3 reduced.ll -S
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-linux-gnu"

; Function Attrs: mustprogress
define void @_ZN2cv3hal12cpu_baselineL12fastAtan32f_EPKfS3_Pfib(i32 %0) #0 {
  br label %2

2:                                                ; preds = %4, %1
  %.01 = phi i32 [ 0, %1 ], [ %6, %4 ]
  %3 = icmp slt i32 %.01, %0
  br i1 %3, label %4, label %7

4:                                                ; preds = %2
  %5 = load volatile float, ptr null, align 4
  %6 = add i32 %.01, 1
  br label %2

7:                                                ; preds = %2
  ret void
}

; Function Attrs: mustprogress
define void @_ZN2cv3hal12cpu_baseline11fastAtan64fEPKdS3_Pdib(ptr %0, i32 %1) #0 personality ptr null {
  br label %3

3:                                                ; preds = %10, %2
  %.0 = phi i32 [ 0, %2 ], [ 128, %10 ]
  %4 = icmp slt i32 %.0, %1
  br i1 %4, label %5, label %11

5:                                                ; preds = %3
  %6 = sub i32 %1, %.0
  br label %7

7:                                                ; preds = %9, %5
  %.09 = phi i32 [ 0, %5 ], [ 1, %9 ]
  %8 = icmp slt i32 %.09, %6
  br i1 %8, label %9, label %10

9:                                                ; preds = %7
  store float 0.000000e+00, ptr %0, align 4
  br label %7

10:                                               ; preds = %7
  call void @_ZN2cv3hal12cpu_baselineL12fastAtan32f_EPKfS3_Pfib(i32 %6)
  br label %3

11:                                               ; preds = %3
  ret void
}

attributes #0 = { mustprogress }
```
Before (22417ec6cca0ed8ccecb0c2b77011e591378fd2a)
```
; ModuleID = 'reduced.ll'
source_filename = "reduced.ll"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-linux-gnu"

; Function Attrs: mustprogress nofree norecurse nounwind
define void @_ZN2cv3hal12cpu_baselineL12fastAtan32f_EPKfS3_Pfib(i32 %0) local_unnamed_addr #0 {
  %2 = icmp sgt i32 %0, 0
  br i1 %2, label %.lr.ph, label %._crit_edge

.lr.ph:                                           ; preds = %1, %.lr.ph
  %.011 = phi i32 [ %4, %.lr.ph ], [ 0, %1 ]
  %3 = load volatile float, ptr null, align 4294967296
  %4 = add nuw nsw i32 %.011, 1
  %exitcond.not = icmp eq i32 %4, %0
  br i1 %exitcond.not, label %._crit_edge, label %.lr.ph

._crit_edge:                                      ; preds = %.lr.ph, %1
  ret void
}

; Function Attrs: mustprogress nofree norecurse nounwind
define void @_ZN2cv3hal12cpu_baseline11fastAtan64fEPKdS3_Pdib(ptr nocapture writeonly %0, i32 %1) local_unnamed_addr #0 personality ptr null {
  %.fr7 = freeze i32 %1
  %3 = icmp sgt i32 %.fr7, 0
  br i1 %3, label %.lr.ph5, label %._crit_edge6

.lr.ph5:                                          ; preds = %2
  %4 = icmp samesign ugt i32 %.fr7, 128
  %5 = xor i1 %4, true
  call void @llvm.assume(i1 %5)
  store float 0.000000e+00, ptr %0, align 4
  %.not = icmp eq i32 %.fr7, 1
  tail call void @llvm.assume(i1 %.not)
  %6 = load volatile float, ptr null, align 4294967296
  br label %._crit_edge6

._crit_edge6:                                     ; preds = %.lr.ph5, %2
  ret void
}

; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write)
declare void @llvm.assume(i1 noundef) #1

attributes #0 = { mustprogress nofree norecurse nounwind }
attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
```
After:
```
; ModuleID = 'reduced.ll'
source_filename = "reduced.ll"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-linux-gnu"

; Function Attrs: mustprogress nofree norecurse nounwind
define void @_ZN2cv3hal12cpu_baselineL12fastAtan32f_EPKfS3_Pfib(i32 %0) local_unnamed_addr #0 {
  %2 = icmp sgt i32 %0, 0
  br i1 %2, label %.lr.ph, label %._crit_edge

.lr.ph:                                           ; preds = %1, %.lr.ph
  %.011 = phi i32 [ %4, %.lr.ph ], [ 0, %1 ]
  %3 = load volatile float, ptr null, align 4294967296
  %4 = add nuw nsw i32 %.011, 1
  %exitcond.not = icmp eq i32 %4, %0
  br i1 %exitcond.not, label %._crit_edge, label %.lr.ph

._crit_edge:                                      ; preds = %.lr.ph, %1
  ret void
}

; Function Attrs: mustprogress nofree norecurse nounwind
define void @_ZN2cv3hal12cpu_baseline11fastAtan64fEPKdS3_Pdib(ptr nocapture writeonly %0, i32 %1) local_unnamed_addr #0 personality ptr null {
  %.fr7 = freeze i32 %1
  %3 = icmp sgt i32 %.fr7, 0
  br i1 %3, label %.lr.ph5, label %._crit_edge6

.lr.ph5:                                          ; preds = %2
  %4 = icmp samesign ugt i32 %.fr7, 128
  br i1 %4, label %.lr.ph5.split.us, label %.lr.ph

.lr.ph5.split.us:                                 ; preds = %.lr.ph.i.us, %.lr.ph5
  %.03.us = phi i32 [ 0, %.lr.ph5 ], [ 128, %.lr.ph.i.us ]
  %5 = sub nsw i32 %.fr7, %.03.us
  %6 = icmp sgt i32 %5, 0
  br i1 %6, label %.lr.ph.us, label %.lr.ph.i.us.preheader

.lr.ph.i.us.preheader:                            ; preds = %.lr.ph.us, %.lr.ph5.split.us
  br label %.lr.ph.i.us

.lr.ph.i.us:                                      ; preds = %.lr.ph.i.us.preheader, %.lr.ph.i.us
  %.011.i.us = phi i32 [ %8, %.lr.ph.i.us ], [ 0, %.lr.ph.i.us.preheader ]
  %7 = load volatile float, ptr null, align 4294967296
  %8 = add nuw nsw i32 %.011.i.us, 1
  %exitcond.not.i.us = icmp eq i32 %8, %5
  br i1 %exitcond.not.i.us, label %.lr.ph5.split.us, label %.lr.ph.i.us

.lr.ph.us:                                        ; preds = %.lr.ph5.split.us
  store float 0.000000e+00, ptr %0, align 4
  %.not8 = icmp eq i32 %5, 1
  tail call void @llvm.assume(i1 %.not8)
  br label %.lr.ph.i.us.preheader

.lr.ph:                                           ; preds = %.lr.ph5
  store float 0.000000e+00, ptr %0, align 4
  %.not = icmp eq i32 %.fr7, 1
  tail call void @llvm.assume(i1 %.not)
  %9 = load volatile float, ptr null, align 4294967296
  br label %._crit_edge6

._crit_edge6:                                     ; preds = %.lr.ph, %2
  ret void
}

; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write)
declare void @llvm.assume(i1 noundef) #1

attributes #0 = { mustprogress nofree norecurse nounwind }
attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
```
https://github.com/llvm/llvm-project/pull/115893 may fix this problem.


https://github.com/llvm/llvm-project/pull/118219


More information about the llvm-commits mailing list