[PATCH] D46760: [InstCombine] Enhance narrowUDivURem.

Sat May 12 20:36:40 PDT 2018

bixia added a comment.

Yes, you are right in that correlated value propagation can handle the case. However, if the divisor is a power of 2 and instcombine is invoked before correlated value propagation (as in the opt passes), instcombine transforms the i64 udiv/urem into i64 shift/and. My original motivated test case is like below, the change here is required in order for opt transform all the i64 arithmetic operations into i32 operations.

; Function Attrs: nounwind
declare i32 @get_number() #0
define void @narrow_long_chain_with_udiv_urem(i64* %result) {

  %num1 = call i32 @get_number(), !range !0
  %block_id = zext i32 %num1 to i64
  %num2 = call i32 @get_number(), !range !0
  %thread_id = zext i32 %num2 to i64
  %tmp = mul nuw nsw i64 %block_id, 64
  %linear_index = add nuw nsw i64 %tmp, %thread_id
  %tmp1 = udiv i64 %linear_index, 1
  %tmp2 = urem i64 %tmp1, 384
  %warp_id = udiv i64 %tmp2, 32
  %lane_id = urem i64 %tmp2, 32
  %tmp3 = mul nsw i64 %warp_id, 8
  %tmp4 = add nsw i64 7, %tmp3
  %tmp5 = mul nsw i64 32, %tmp4
  %tmp6 = add nsw i64 %lane_id, %tmp5
  store i64 %tmp6, i64* %result
  ret void

}
attributes #0 = { nounwind }
!0 = !{i32 0, i32 9945}

Repository:
  rL LLVM

https://reviews.llvm.org/D46760