[llvm] [InstCombine] Make `(binop ({s|u}itofp),({s|u}itofp))` transform more flexible to mismatched signs (PR #84389)

Fri Mar 8 10:28:59 PST 2024

goldsteinn wrote:

> Could you please have a look at [dtcxzyw/llvm-opt-benchmark#336 (comment)](https://github.com/dtcxzyw/llvm-opt-benchmark/pull/336#discussion_r1517575694)?

So it seems this transform is enabling SLP vectorization in a case where its not profitable:
```
;; Before
; *** IR Dump After SimplifyCFGPass on regress ***
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
define float @regress(i16 %inp) local_unnamed_addr #0 {
  %r0 = urem i16 %inp, 20
  %ui0 = uitofp i16 %r0 to float
  %fadd0 = fadd float %ui0, -1.000000e+01
  %fdiv0 = fdiv float %fadd0, 0.000000e+00
  %ui1 = uitofp i16 %inp to float
  %fdiv1 = fdiv float %ui1, 0.000000e+00
  %r = fmul float %fdiv1, %fdiv0
  ret float %r
}
; *** IR Dump After SLPVectorizerPass on regress ***
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
define float @regress(i16 %inp) local_unnamed_addr #0 {
  %r0 = urem i16 %inp, 20
  %ui0 = uitofp i16 %r0 to float
  %fadd0 = fadd float %ui0, -1.000000e+01
  %fdiv0 = fdiv float %fadd0, 0.000000e+00
  %ui1 = uitofp i16 %inp to float
  %fdiv1 = fdiv float %ui1, 0.000000e+00
  %r = fmul float %fdiv1, %fdiv0
  ret float %r
}
;; After
; *** IR Dump After SimplifyCFGPass on regress ***
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
define float @regress(i16 %inp) local_unnamed_addr #0 {
  %r0 = urem i16 %inp, 20
  %1 = add nsw i16 %r0, -10
  %fadd0 = sitofp i16 %1 to float
  %fdiv0 = fdiv float %fadd0, 0.000000e+00
  %ui1 = uitofp i16 %inp to float
  %fdiv1 = fdiv float %ui1, 0.000000e+00
  %r = fmul float %fdiv1, %fdiv0
  ret float %r
}
; *** IR Dump After SLPVectorizerPass on regress ***
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
define float @regress(i16 %inp) local_unnamed_addr #0 {
  %r0 = urem i16 %inp, 20
  %1 = add nsw i16 %r0, -10
  %2 = insertelement <2 x i16> poison, i16 %inp, i32 0
  %3 = insertelement <2 x i16> %2, i16 %1, i32 1
  %4 = uitofp <2 x i16> %3 to <2 x float>
  %5 = sitofp <2 x i16> %3 to <2 x float>
  %6 = shufflevector <2 x float> %4, <2 x float> %5, <2 x i32> <i32 0, i32 3>
  %7 = fdiv <2 x float> %6, zeroinitializer
  %8 = extractelement <2 x float> %7, i32 0
  %9 = extractelement <2 x float> %7, i32 1
  %r = fmul float %8, %9
  ret float %r
}
```
looking into a fix.

https://github.com/llvm/llvm-project/pull/84389