[all-commits] [llvm/llvm-project] bec726: [X86] optimize ssse3 horizontal saturating add/sub...

Folkert de Vries via All-commits all-commits at lists.llvm.org
Thu Nov 27 03:51:27 PST 2025


  Branch: refs/heads/main
  Home:   https://github.com/llvm/llvm-project
  Commit: bec726f6a6d37bdfb90d1330d4b5e947ce017046
      https://github.com/llvm/llvm-project/commit/bec726f6a6d37bdfb90d1330d4b5e947ce017046
  Author: Folkert de Vries <folkert at folkertdev.nl>
  Date:   2025-11-27 (Thu, 27 Nov 2025)

  Changed paths:
    M llvm/lib/Target/X86/X86ISelLowering.cpp
    M llvm/lib/Target/X86/X86ISelLowering.h
    M llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
    M llvm/lib/Target/X86/X86InstrSSE.td
    M llvm/lib/Target/X86/X86IntrinsicsInfo.h
    A llvm/test/CodeGen/X86/haddsubsat.ll

  Log Message:
  -----------
  [X86] optimize ssse3 horizontal saturating add/sub (#169591)

Currently LLVM fails to recognize a manual implementation of `phadd`

https://godbolt.org/z/zozrssaWb

```llvm
declare <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16>, <8 x i16>)

declare <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16>, <8 x i16>)

define <8 x i16> @phaddsw_v8i16_intrinsic(<8 x i16> %a, <8 x i16> %b) {
entry:
  %res = call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %a, <8 x i16> %b)
  ret <8 x i16> %res
}

define <8 x i16> @phaddsw_v8i16_generic(<8 x i16> %a, <8 x i16> %b) {
entry:
  %even = shufflevector <8 x i16> %a, <8 x i16> %b,
    <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
  %odd  = shufflevector <8 x i16> %a, <8 x i16> %b,
    <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
  %sum = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %even, <8 x i16> %odd)
  ret <8 x i16> %sum
}
```

```asm
phaddsw_v8i16_intrinsic:                # @phaddsw_v8i16_intrinsic
        phaddsw xmm0, xmm1
        ret

phaddsw_v8i16_generic:                  # @phaddsw_v8i16_generic
        movdqa  xmm2, xmmword ptr [rip + .LCPI1_0] # xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
        movdqa  xmm3, xmm1
        pshufb  xmm3, xmm2
        movdqa  xmm4, xmm0
        pshufb  xmm4, xmm2
        punpcklqdq      xmm4, xmm3              # xmm4 = xmm4[0],xmm3[0]
        psrad   xmm1, 16
        psrad   xmm0, 16
        packssdw        xmm0, xmm1
        paddsw  xmm0, xmm4
        ret
```

This PR does recognize the pattern.



To unsubscribe from these emails, change your notification settings at https://github.com/llvm/llvm-project/settings/notifications


More information about the All-commits mailing list