[llvm] [ValueTracking] Extend LHS/RHS with matching operand to work without constants. (PR #85557)

via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 19 11:45:33 PDT 2024

goldsteinn wrote:

> This patch seems to block SROA: [dtcxzyw/llvm-opt-benchmark#419 (comment)](https://github.com/dtcxzyw/llvm-opt-benchmark/pull/419#discussion_r1527863982).

Seems to boil down to simplifications happening earlier.

A reduced form:

define void @fun0() {
  %first111 = alloca [0 x [0 x [0 x ptr]]], i32 0, align 8
  store i64 0, ptr %first111, align 8
  %last = getelementptr i8, ptr %first111, i64 8
  call void @fun3(ptr %first111, ptr %last)
  ret void

define void @fun3(ptr %first, ptr %last, ptr %p_in) {
  %sub.ptr.lhs.cast = ptrtoint ptr %last to i64
  %sub.ptr.rhs.cast = ptrtoint ptr %first to i64
  %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
  %call = ashr exact i64 %sub.ptr.sub, 3
  %call2 = load volatile i64, ptr %p_in, align 8
  %cmp = icmp ugt i64 %call, %call2
  br i1 %cmp, label %common.ret, label %if.else

common.ret:                                       ; preds = %if.else29, %if.else, %entry
  ret void

if.else:                                          ; preds = %entry
  %c_load.cast0.i = ptrtoint ptr %last to i64
  %c_load.cast.div0.i = ashr exact i64 %c_load.cast0.i, 3
  %cmp24.not = icmp ult i64 %c_load.cast.div0.i, %call
  br i1 %cmp24.not, label %if.else29, label %common.ret

if.else29:                                        ; preds = %if.else
  %n_is_c = call i1 @llvm.is.constant.i64(i64 %c_load.cast.div0.i)
  %cmp2 = icmp eq i64 %c_load.cast.div0.i, -1
  %or.cond1 = and i1 %n_is_c, %cmp2
  %add.ptr = getelementptr i64, ptr %first, i64 %c_load.cast.div0.i
  %.pre = ptrtoint ptr %add.ptr to i64
  %ptr.lhs.pre-phi = select i1 %or.cond1, i64 0, i64 %.pre
  %ptr.sub = sub i64 %ptr.lhs.pre-phi, %sub.ptr.rhs.cast
  call void @llvm.memmove.p0.p0.i64(ptr null, ptr %first, i64 %ptr.sub, i1 false)
  br label %common.ret

; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite)
declare void @llvm.memmove.p0.p0.i64(ptr nocapture writeonly, ptr nocapture readonly, i64, i1 immarg) #0

; Function Attrs: convergent nocallback nofree nosync nounwind willreturn memory(none)
declare i1 @llvm.is.constant.i64(i64) #1

attributes #0 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
attributes #1 = { convergent nocallback nofree nosync nounwind willreturn memory(none) }


Where we go awry is when fold:
  %c_load.cast.div0.i = ashr exact i64 %c_load.cast0.i, 3
  %cmp24.not = icmp ult i64 %c_load.cast.div0.i, %call
  %cmp24.not = icmp ugt i64 %sub.ptr.sub, %c_load.cast0.i

Which eventually results in the following diff after inlining:
  %c_load.cast.div0.i.i = ashr exact i64 %c_load.cast0.i.i, 3
  %cmp24.not.i = icmp ult i64 %c_load.cast.div0.i.i, 1
  %cmp24.not.i = icmp ugt i64 8, %c_load.cast0.i.i

Then finally:
  %cmp24.not.i = icmp eq ptr %c_load0.i.i, null

  %cmp24.not.i = icmp ult ptr %c_load0.i.i, inttoptr (i64 8 to ptr)

Essentially we throw away the information that the low 3 bits of the pointer are zero
before we have enough information to fully reduce the compare to something easy to

Looking into a fix...


