[llvm-bugs] [Bug 49858] Failure to convert 'sub' reduction to negated 'add' reduction

via llvm-bugs llvm-bugs at lists.llvm.org
Tue Apr 6 06:06:38 PDT 2021


https://bugs.llvm.org/show_bug.cgi?id=49858

Roman Lebedev <lebedev.ri at gmail.com> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
             Status|NEW                         |RESOLVED
 Fixed By Commit(s)|                            |31d219d2997fed1b7dc97e0adf1
                   |                            |70d5aaf65883e
         Resolution|---                         |FIXED

--- Comment #2 from Roman Lebedev <lebedev.ri at gmail.com> ---
Fixed by instcombine transform in 31d219d2997fed1b7dc97e0adf170d5aaf65883e.

We end with expected vectorization/assembly afterwards:

$ ./bin/opt -O3 /tmp/test.ll -S 
; ModuleID = '/tmp/test.ll'
source_filename = "./example.cpp"
target datalayout =
"e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

; Function Attrs: norecurse nounwind readonly uwtable willreturn mustprogress
define dso_local i32 @_Z5sub32PKi(i32* nocapture readonly %0)
local_unnamed_addr #0 {
  %2 = bitcast i32* %0 to <16 x i32>*
  %3 = load <16 x i32>, <16 x i32>* %2, align 4, !tbaa !2
  %4 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %3)
  %5 = sub i32 0, %4
  ret i32 %5
}

; Function Attrs: norecurse nounwind readonly uwtable willreturn mustprogress
define dso_local i32 @_Z5sub32PKii(i32* nocapture readonly %0, i32 %1)
local_unnamed_addr #0 {
  %3 = bitcast i32* %0 to <16 x i32>*
  %4 = load <16 x i32>, <16 x i32>* %3, align 4, !tbaa !2
  %5 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %4)
  %6 = sub i32 %1, %5
  ret i32 %6
}

; Function Attrs: nofree nosync nounwind readnone willreturn
declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>) #1

attributes #0 = { norecurse nounwind readonly uwtable willreturn mustprogress
"frame-pointer"="none" "no-trapping-math"="true"
"stack-protector-buffer-size"="8" "target-cpu"="haswell"
"target-features"="+avx,+avx2,+bmi,+bmi2,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt"
}
attributes #1 = { nofree nosync nounwind readnone willreturn }

!llvm.module.flags = !{!0}
!llvm.ident = !{!1}

!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{!"clang version 13.0.0 (https://github.com/llvm/llvm-project.git
30b3aab3299a1b6e4e262866e88f0aac0ecdee09)"}
!2 = !{!3, !3, i64 0}
!3 = !{!"int", !4, i64 0}
!4 = !{!"omnipotent char", !5, i64 0}
!5 = !{!"Simple C++ TBAA"}

$ ./bin/opt -O3 /tmp/test.ll -S | ./bin/llc -
        .text
        .file   "example.cpp"
        .globl  _Z5sub32PKi                     # -- Begin function _Z5sub32PKi
        .p2align        4, 0x90
        .type   _Z5sub32PKi, at function
_Z5sub32PKi:                            # @_Z5sub32PKi
        .cfi_startproc
# %bb.0:
        vmovdqu (%rdi), %ymm0
        vpaddd  32(%rdi), %ymm0, %ymm0
        vextracti128    $1, %ymm0, %xmm1
        vpaddd  %xmm1, %xmm0, %xmm0
        vpshufd $238, %xmm0, %xmm1              # xmm1 = xmm0[2,3,2,3]
        vpaddd  %xmm1, %xmm0, %xmm0
        vpshufd $85, %xmm0, %xmm1               # xmm1 = xmm0[1,1,1,1]
        vpaddd  %xmm1, %xmm0, %xmm0
        vmovd   %xmm0, %eax
        negl    %eax
        vzeroupper
        retq
.Lfunc_end0:
        .size   _Z5sub32PKi, .Lfunc_end0-_Z5sub32PKi
        .cfi_endproc
                                        # -- End function
        .globl  _Z5sub32PKii                    # -- Begin function
_Z5sub32PKii
        .p2align        4, 0x90
        .type   _Z5sub32PKii, at function
_Z5sub32PKii:                           # @_Z5sub32PKii
        .cfi_startproc
# %bb.0:
        movl    %esi, %eax
        vmovdqu (%rdi), %ymm0
        vpaddd  32(%rdi), %ymm0, %ymm0
        vextracti128    $1, %ymm0, %xmm1
        vpaddd  %xmm1, %xmm0, %xmm0
        vpshufd $238, %xmm0, %xmm1              # xmm1 = xmm0[2,3,2,3]
        vpaddd  %xmm1, %xmm0, %xmm0
        vpshufd $85, %xmm0, %xmm1               # xmm1 = xmm0[1,1,1,1]
        vpaddd  %xmm1, %xmm0, %xmm0
        vmovd   %xmm0, %ecx
        subl    %ecx, %eax
        vzeroupper
        retq
.Lfunc_end1:
        .size   _Z5sub32PKii, .Lfunc_end1-_Z5sub32PKii
        .cfi_endproc
                                        # -- End function
        .ident  "clang version 13.0.0 (https://github.com/llvm/llvm-project.git
30b3aab3299a1b6e4e262866e88f0aac0ecdee09)"
        .section        ".note.GNU-stack","", at progbits

-- 
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20210406/1b245825/attachment-0001.html>


More information about the llvm-bugs mailing list