[llvm] [AArch64] Lower extending sitofp using tbl (PR #92528)
Shu-Chun Weng via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 28 14:17:32 PDT 2024
scweng wrote:
I'm pretty confident that this is a pre-existing bug somewhere in instruction selection that was revealed by this commit.
`-print-before-all` at this commit, `%0` through `%3` are all return values of `i32 @rand()` `trunc` to `i8`:
```
*** IR Dump Before CodeGen Prepare (codegenprepare) ***
; Function Attrs: mustprogress uwtable
define linkonce_odr dso_local void @_ZN14cast_test_implIafLi5ELi1EE3runEv() local_unnamed_addr #6 comdat personality ptr @__gxx_personality_v0 {
...
%4 = insertelement <2 x i8> poison, i8 %0, i64 0
%5 = insertelement <2 x i8> %4, i8 %1, i64 1
%shuffle.i.i2.i.i.i.i.i.i.i.i.i.i.i.i = sitofp <2 x i8> %5 to <2 x float>
%6 = insertelement <2 x i8> poison, i8 %2, i64 0
%7 = insertelement <2 x i8> %6, i8 %3, i64 1
%shuffle.i.i2.i.i.i.i.i.i.i.i.i.i.i.i.i = sitofp <2 x i8> %7 to <2 x float>
%dst.sroa.0.0.vec.extract = extractelement <2 x float> %shuffle.i.i2.i.i.i.i.i.i.i.i.i.i.i.i, i64 0
%conv.i97 = fptosi float %dst.sroa.0.0.vec.extract to i32
%sext = shl i32 %call.i.i.i.i.i.i.i.i.i.i.i.i.i.i.i.i.i, 24
%conv1.i = ashr exact i32 %sext, 24
%cmp.i.i.i.i = icmp eq i32 %conv1.i, %conv.i97
br i1 %cmp.i.i.i.i, label %if.end, label %if.then
...
*** IR Dump Before Exception handling preparation (dwarf-eh-prepare) ***
; Function Attrs: mustprogress uwtable
define linkonce_odr dso_local void @_ZN14cast_test_implIafLi5ELi1EE3runEv() local_unnamed_addr #6 comdat personality ptr @__gxx_personality_v0 {
...
%4 = insertelement <2 x i8> poison, i8 %0, i64 0
%5 = insertelement <2 x i8> %4, i8 %1, i64 1
%6 = shufflevector <2 x i8> %5, <2 x i8> <i8 0, i8 poison>, <8 x i32> <i32 2, i32 2, i32 2, i32 0, i32 2, i32 2, i32 2, i32 1>
%7 = bitcast <8 x i8> %6 to <2 x i32>
%8 = ashr exact <2 x i32> %7, <i32 24, i32 24>
%9 = sitofp <2 x i32> %8 to <2 x float>
%10 = insertelement <2 x i8> poison, i8 %2, i64 0
%11 = insertelement <2 x i8> %10, i8 %3, i64 1
%12 = shufflevector <2 x i8> %11, <2 x i8> <i8 0, i8 poison>, <8 x i32> <i32 2, i32 2, i32 2, i32 0, i32 2, i32 2, i32 2, i32 1>
%13 = bitcast <8 x i8> %12 to <2 x i32>
%14 = ashr exact <2 x i32> %13, <i32 24, i32 24>
%15 = sitofp <2 x i32> %14 to <2 x float>
%dst.sroa.0.0.vec.extract = extractelement <2 x float> %9, i64 0
%conv.i97 = fptosi float %dst.sroa.0.0.vec.extract to i32
%sext = shl i32 %call.i.i.i.i.i.i.i.i.i.i.i.i.i.i.i.i.i, 24
%conv1.i = ashr exact i32 %sext, 24
%cmp.i.i.i.i = icmp eq i32 %conv1.i, %conv.i97
br i1 %cmp.i.i.i.i, label %if.end, label %if.then
```
The instruction after this commit, `%6 = shufflevector <2 x i8> %5, <2 x i8> <i8 0, i8 poison>, <8 x i32> <i32 2, i32 2, i32 2, i32 0, i32 2, i32 2, i32 2, i32 1>` clearly requests `v0.[0, 1, 2, 4, 5, 6]` to be `i8 0`, but `mov v0.b[3], w21; mov v0.b[7], w22` fails to do so.
Here's the repro: https://godbolt.org/z/686fbPjKz
```
define dso_local noundef <2 x float> @before(i8 noundef %0, i8 noundef %1) {
%3 = insertelement <2 x i8> poison, i8 %0, i64 0
%4 = insertelement <2 x i8> %3, i8 %1, i64 1
%5 = sitofp <2 x i8> %4 to <2 x float>
ret <2 x float> %5
}
define dso_local noundef <2 x float> @after(i8 noundef %0, i8 noundef %1) {
%3 = insertelement <2 x i8> poison, i8 %0, i64 0
%4 = insertelement <2 x i8> %3, i8 %1, i64 1
%5 = shufflevector <2 x i8> %4, <2 x i8> <i8 0, i8 poison>, <8 x i32> <i32 2, i32 2, i32 2, i32 0, i32 2, i32 2, i32 2, i32 1>
%6 = bitcast <8 x i8> %5 to <2 x i32>
%7 = ashr exact <2 x i32> %6, <i32 24, i32 24>
%8 = sitofp <2 x i32> %7 to <2 x float>
ret <2 x float> %8
}
```
results in
```
before:
fmov s0, w0
mov v0.s[1], w1
shl v0.2s, v0.2s, #24
sshr v0.2s, v0.2s, #24
scvtf v0.2s, v0.2s
ret
after:
mov v0.b[3], w0
mov v0.b[7], w1
scvtf v0.2s, v0.2s, #24
ret
```
https://github.com/llvm/llvm-project/pull/92528
More information about the llvm-commits
mailing list