[llvm] [X86] Recognise VPMADD52L pattern with AVX512IFMA/AVXIFMA (#153787) (PR #156714)

Fri Sep 5 03:31:43 PDT 2025

================
@@ -0,0 +1,217 @@
+; RUN: llc < %s -O1 -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=X64
+
+; 67108863 == (1 << 26) - 1
+; 4503599627370496 == (1 << 52)
+; 4503599627370495 == (1 << 52) - 1
+
+define dso_local <8 x i64> @test_512_combine_evex(<8 x i64> noundef %0, <8 x i64> noundef %1, <8 x i64> noundef %2) local_unnamed_addr #0 {
+; X64-LABEL: test_512_combine_evex:
+; X64:       # %bb.0:
+; X64-NEXT:    vpbroadcastq {{.*#+}} zmm3 = [67108863,67108863,67108863,67108863,67108863,67108863,67108863,67108863]
+; X64-NEXT:    vpandq %zmm3, %zmm0, %zmm0
+; X64-NEXT:    vpandq %zmm3, %zmm1, %zmm1
+; X64-NEXT:    vpandq %zmm3, %zmm2, %zmm2
+; X64-NOT:     vpmul
+; X64-NOT:     vpadd
+; X64-NEXT:    vpmadd52luq %zmm1, %zmm2, %zmm0
+; X64-NEXT:    retq
+  %4 = and <8 x i64> %0, splat (i64 67108863)
+  %5 = and <8 x i64> %1, splat (i64 67108863)
+  %6 = and <8 x i64> %2, splat (i64 67108863)
+  %7 = mul nuw nsw <8 x i64> %5, %4
+  %8 = add nuw nsw <8 x i64> %7, %6
+  ret <8 x i64> %8
+}
+
+define dso_local <8 x i64> @test_512_no_combine_evex_v2(<8 x i64> noundef %0, <8 x i64> noundef %1, <8 x i64> noundef %2) local_unnamed_addr #0 {
+; X64-LABEL: test_512_no_combine_evex_v2:
+; X64-NOT:   vpmadd52luq
+; X64:       retq
+  %4 = and <8 x i64> %0, splat (i64 4503599627370495)
+  %5 = and <8 x i64> %1, splat (i64 4503599627370495)
+  %6 = and <8 x i64> %2, splat (i64 4503599627370495)
+  %7 = mul nuw nsw <8 x i64> %5, %4
+  %8 = add nuw nsw <8 x i64> %7, %6
+  ret <8 x i64> %8
+}
+
+define dso_local noundef <8 x i64> @test_512_no_combine_evex(<8 x i64> noundef %0, <8 x i64> noundef %1, <8 x i64> noundef %2) local_unnamed_addr #0 {
+; X64-LABEL: test_512_no_combine_evex:
+; X64:       # %bb.0:
+; X64-NOT:     vpmadd52
+; X64-NEXT:    vpmullq %zmm0, %zmm1, %zmm0
+; X64-NEXT:    vpaddq %zmm2, %zmm0, %zmm0
+; X64-NEXT:    retq
+  %4 = mul <8 x i64> %1, %0
+  %5 = add <8 x i64> %4, %2
+  ret <8 x i64> %5
+}
+
+define dso_local <4 x i64> @test_256_combine_evex(<4 x i64> noundef %0, <4 x i64> noundef %1, <4 x i64> noundef %2) local_unnamed_addr #1 {
+; X64-LABEL: test_256_combine_evex:
+; X64:       # %bb.0:
+; X64-NEXT:    vpbroadcastq {{.*#+}} ymm3 = [67108863,67108863,67108863,67108863]
+; X64-NEXT:    vpand %ymm3, %ymm0, %ymm0
+; X64-NEXT:    vpand %ymm3, %ymm1, %ymm1
+; X64-NEXT:    vpand %ymm3, %ymm2, %ymm2
+; X64-NOT:     vpmul
+; X64-NOT:     vpadd
+; X64-NEXT:    vpmadd52luq %ymm1, %ymm2, %ymm0
+; X64-NEXT:    retq
+  %4 = and <4 x i64> %0, <i64 67108863, i64 67108863, i64 67108863, i64 67108863>
+  %5 = and <4 x i64> %1, <i64 67108863, i64 67108863, i64 67108863, i64 67108863>
+  %6 = and <4 x i64> %2, <i64 67108863, i64 67108863, i64 67108863, i64 67108863>
+  %7 = mul nuw nsw <4 x i64> %5, %4
+  %8 = add nuw nsw <4 x i64> %7, %6
+  ret <4 x i64> %8
+}
+
+define dso_local noundef <4 x i64> @test_256_no_combine_evex(<4 x i64> noundef %0, <4 x i64> noundef %1, <4 x i64> noundef %2) local_unnamed_addr #1 {
+; X64-LABEL: test_256_no_combine_evex:
+; X64:       # %bb.0:
+; X64-NOT:     vpmadd52
+; X64-NEXT:    vpmullq %ymm0, %ymm1, %ymm0
+; X64-NEXT:    vpaddq %ymm2, %ymm0, %ymm0
+; X64-NEXT:    retq
+  %4 = mul <4 x i64> %1, %0
+  %5 = add <4 x i64> %4, %2
+  ret <4 x i64> %5
+}
+
+define dso_local <4 x i64> @test_256_combine_vex(<4 x i64> noundef %0, <4 x i64> noundef %1, <4 x i64> noundef %2) local_unnamed_addr #2 {
+; X64-LABEL: test_256_combine_vex:
+; X64:       # %bb.0:
+; X64-NEXT:    vpbroadcastq {{.*#+}} ymm3 = [67108863,67108863,67108863,67108863]
+; X64-NEXT:    vpand %ymm3, %ymm0, %ymm0
+; X64-NEXT:    vpand %ymm3, %ymm1, %ymm1
+; X64-NEXT:    vpand %ymm3, %ymm2, %ymm2
+; X64-NOT:     vpmul
+; X64-NOT:     vpadd
+; X64-NEXT:    {vex} vpmadd52luq %ymm1, %ymm2, %ymm0
+; X64-NEXT:    retq
+  %4 = and <4 x i64> %0, <i64 67108863, i64 67108863, i64 67108863, i64 67108863>
+  %5 = and <4 x i64> %1, <i64 67108863, i64 67108863, i64 67108863, i64 67108863>
+  %6 = and <4 x i64> %2, <i64 67108863, i64 67108863, i64 67108863, i64 67108863>
+  %7 = mul nuw nsw <4 x i64> %5, %4
+  %8 = add nuw nsw <4 x i64> %7, %6
+  ret <4 x i64> %8
+}
+
+define dso_local noundef <4 x i64> @test_256_no_combine_vex(<4 x i64> noundef %0, <4 x i64> noundef %1, <4 x i64> noundef %2) local_unnamed_addr #2 {
+; X64-LABEL: test_256_no_combine_vex:
+; X64:       # %bb.0:
+; X64-NOT:     vpmadd52
+; X64-NEXT:    vpmullq %ymm0, %ymm1, %ymm0
+; X64-NEXT:    vpaddq %ymm2, %ymm0, %ymm0
+; X64-NEXT:    retq
+  %4 = mul <4 x i64> %1, %0
+  %5 = add <4 x i64> %4, %2
+  ret <4 x i64> %5
+}
+
+define i64 @scalar_no_ifma(i64 %a, i64 %b, i64 %acc) #0 {
+; X64-LABEL: scalar_no_ifma:
+; X64-NOT: vpmadd52
+; X64-NOT: vpmullq
+; X64:     imulq
+; X64:     ret
+entry:
+  %mul = mul i64 %a, %b
+  %res = add i64 %acc, %mul
+  ret i64 %res
+}
+
+define <8 x i64> @mixed_width_too_wide(<8 x i64> %a, <8 x i64> %b, <8 x i64> %acc) #0 {
+; X64-LABEL: mixed_width_too_wide:
+; X64-NOT:   vpmadd52luq
+; X64:       vpmullq
+; X64:       ret
+entry:
+  ; 40-bit and 13-bit, product fits < 2^53 (NOT < 2^52)
+  %a40 = and <8 x i64> %a, splat (i64 1099511627775)
+  %b13 = and <8 x i64> %b, splat (i64 8191)
+  %mul = mul <8 x i64> %a40, %b13
+  %res = add <8 x i64> %acc, %mul
+  ret <8 x i64> %res
+}
+
+define <8 x i64> @zext32_inputs_not_safe(<8 x i32> %ai32, <8 x i32> %bi32, <8 x i64> %acc) #0 {
+; X64-LABEL: zext32_inputs_not_safe:
+; X64:       vpmul
+; X64-NOT:   vpmadd52luq
+; X64:       ret
+entry:
+  %a = zext <8 x i32> %ai32 to <8 x i64>
+  %b = zext <8 x i32> %bi32 to <8 x i64>
+  %mul = mul <8 x i64> %a, %b
+  %res = add <8 x i64> %acc, %mul
+  ret <8 x i64> %res
+}
+
+define <8 x i64> @const_2pow51_times_2(<8 x i64> %acc) #0 {
+; X64-LABEL: const_2pow51_times_2:
+; X64-NOT:   vpmadd52luq
+; X64:       vpaddq
+; X64:       ret
+entry:
+  ; product = 2^52
+  %mul = mul <8 x i64> splat(i64 2251799813685248), splat(i64 2)
----------------
XChy wrote:

`%mul` constant folds early. It has no impact on this patch.

https://github.com/llvm/llvm-project/pull/156714