[llvm] [X86] Recognise VPMADD52L pattern with AVX512IFMA/AVXIFMA (#153787) (PR #156714)
Hongyu Chen via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 5 03:31:43 PDT 2025
================
@@ -0,0 +1,217 @@
+; RUN: llc < %s -O1 -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=X64
+
+; 67108863 == (1 << 26) - 1
+; 4503599627370496 == (1 << 52)
+; 4503599627370495 == (1 << 52) - 1
+
+define dso_local <8 x i64> @test_512_combine_evex(<8 x i64> noundef %0, <8 x i64> noundef %1, <8 x i64> noundef %2) local_unnamed_addr #0 {
+; X64-LABEL: test_512_combine_evex:
+; X64: # %bb.0:
+; X64-NEXT: vpbroadcastq {{.*#+}} zmm3 = [67108863,67108863,67108863,67108863,67108863,67108863,67108863,67108863]
+; X64-NEXT: vpandq %zmm3, %zmm0, %zmm0
+; X64-NEXT: vpandq %zmm3, %zmm1, %zmm1
+; X64-NEXT: vpandq %zmm3, %zmm2, %zmm2
+; X64-NOT: vpmul
+; X64-NOT: vpadd
+; X64-NEXT: vpmadd52luq %zmm1, %zmm2, %zmm0
+; X64-NEXT: retq
+ %4 = and <8 x i64> %0, splat (i64 67108863)
+ %5 = and <8 x i64> %1, splat (i64 67108863)
+ %6 = and <8 x i64> %2, splat (i64 67108863)
+ %7 = mul nuw nsw <8 x i64> %5, %4
+ %8 = add nuw nsw <8 x i64> %7, %6
+ ret <8 x i64> %8
+}
+
+define dso_local <8 x i64> @test_512_no_combine_evex_v2(<8 x i64> noundef %0, <8 x i64> noundef %1, <8 x i64> noundef %2) local_unnamed_addr #0 {
+; X64-LABEL: test_512_no_combine_evex_v2:
+; X64-NOT: vpmadd52luq
+; X64: retq
+ %4 = and <8 x i64> %0, splat (i64 4503599627370495)
+ %5 = and <8 x i64> %1, splat (i64 4503599627370495)
+ %6 = and <8 x i64> %2, splat (i64 4503599627370495)
+ %7 = mul nuw nsw <8 x i64> %5, %4
+ %8 = add nuw nsw <8 x i64> %7, %6
+ ret <8 x i64> %8
+}
+
+define dso_local noundef <8 x i64> @test_512_no_combine_evex(<8 x i64> noundef %0, <8 x i64> noundef %1, <8 x i64> noundef %2) local_unnamed_addr #0 {
+; X64-LABEL: test_512_no_combine_evex:
+; X64: # %bb.0:
+; X64-NOT: vpmadd52
+; X64-NEXT: vpmullq %zmm0, %zmm1, %zmm0
+; X64-NEXT: vpaddq %zmm2, %zmm0, %zmm0
+; X64-NEXT: retq
+ %4 = mul <8 x i64> %1, %0
+ %5 = add <8 x i64> %4, %2
+ ret <8 x i64> %5
+}
+
+define dso_local <4 x i64> @test_256_combine_evex(<4 x i64> noundef %0, <4 x i64> noundef %1, <4 x i64> noundef %2) local_unnamed_addr #1 {
+; X64-LABEL: test_256_combine_evex:
+; X64: # %bb.0:
+; X64-NEXT: vpbroadcastq {{.*#+}} ymm3 = [67108863,67108863,67108863,67108863]
+; X64-NEXT: vpand %ymm3, %ymm0, %ymm0
+; X64-NEXT: vpand %ymm3, %ymm1, %ymm1
+; X64-NEXT: vpand %ymm3, %ymm2, %ymm2
+; X64-NOT: vpmul
+; X64-NOT: vpadd
+; X64-NEXT: vpmadd52luq %ymm1, %ymm2, %ymm0
+; X64-NEXT: retq
+ %4 = and <4 x i64> %0, <i64 67108863, i64 67108863, i64 67108863, i64 67108863>
+ %5 = and <4 x i64> %1, <i64 67108863, i64 67108863, i64 67108863, i64 67108863>
+ %6 = and <4 x i64> %2, <i64 67108863, i64 67108863, i64 67108863, i64 67108863>
+ %7 = mul nuw nsw <4 x i64> %5, %4
+ %8 = add nuw nsw <4 x i64> %7, %6
+ ret <4 x i64> %8
+}
+
+define dso_local noundef <4 x i64> @test_256_no_combine_evex(<4 x i64> noundef %0, <4 x i64> noundef %1, <4 x i64> noundef %2) local_unnamed_addr #1 {
+; X64-LABEL: test_256_no_combine_evex:
+; X64: # %bb.0:
+; X64-NOT: vpmadd52
+; X64-NEXT: vpmullq %ymm0, %ymm1, %ymm0
+; X64-NEXT: vpaddq %ymm2, %ymm0, %ymm0
+; X64-NEXT: retq
+ %4 = mul <4 x i64> %1, %0
+ %5 = add <4 x i64> %4, %2
+ ret <4 x i64> %5
+}
+
+define dso_local <4 x i64> @test_256_combine_vex(<4 x i64> noundef %0, <4 x i64> noundef %1, <4 x i64> noundef %2) local_unnamed_addr #2 {
+; X64-LABEL: test_256_combine_vex:
+; X64: # %bb.0:
+; X64-NEXT: vpbroadcastq {{.*#+}} ymm3 = [67108863,67108863,67108863,67108863]
+; X64-NEXT: vpand %ymm3, %ymm0, %ymm0
+; X64-NEXT: vpand %ymm3, %ymm1, %ymm1
+; X64-NEXT: vpand %ymm3, %ymm2, %ymm2
+; X64-NOT: vpmul
+; X64-NOT: vpadd
+; X64-NEXT: {vex} vpmadd52luq %ymm1, %ymm2, %ymm0
+; X64-NEXT: retq
+ %4 = and <4 x i64> %0, <i64 67108863, i64 67108863, i64 67108863, i64 67108863>
+ %5 = and <4 x i64> %1, <i64 67108863, i64 67108863, i64 67108863, i64 67108863>
+ %6 = and <4 x i64> %2, <i64 67108863, i64 67108863, i64 67108863, i64 67108863>
+ %7 = mul nuw nsw <4 x i64> %5, %4
+ %8 = add nuw nsw <4 x i64> %7, %6
+ ret <4 x i64> %8
+}
+
+define dso_local noundef <4 x i64> @test_256_no_combine_vex(<4 x i64> noundef %0, <4 x i64> noundef %1, <4 x i64> noundef %2) local_unnamed_addr #2 {
+; X64-LABEL: test_256_no_combine_vex:
+; X64: # %bb.0:
+; X64-NOT: vpmadd52
+; X64-NEXT: vpmullq %ymm0, %ymm1, %ymm0
+; X64-NEXT: vpaddq %ymm2, %ymm0, %ymm0
+; X64-NEXT: retq
+ %4 = mul <4 x i64> %1, %0
+ %5 = add <4 x i64> %4, %2
+ ret <4 x i64> %5
+}
+
+define i64 @scalar_no_ifma(i64 %a, i64 %b, i64 %acc) #0 {
+; X64-LABEL: scalar_no_ifma:
+; X64-NOT: vpmadd52
+; X64-NOT: vpmullq
+; X64: imulq
+; X64: ret
+entry:
+ %mul = mul i64 %a, %b
+ %res = add i64 %acc, %mul
+ ret i64 %res
+}
+
+define <8 x i64> @mixed_width_too_wide(<8 x i64> %a, <8 x i64> %b, <8 x i64> %acc) #0 {
+; X64-LABEL: mixed_width_too_wide:
+; X64-NOT: vpmadd52luq
+; X64: vpmullq
+; X64: ret
+entry:
+ ; 40-bit and 13-bit, product fits < 2^53 (NOT < 2^52)
+ %a40 = and <8 x i64> %a, splat (i64 1099511627775)
+ %b13 = and <8 x i64> %b, splat (i64 8191)
+ %mul = mul <8 x i64> %a40, %b13
+ %res = add <8 x i64> %acc, %mul
+ ret <8 x i64> %res
+}
+
+define <8 x i64> @zext32_inputs_not_safe(<8 x i32> %ai32, <8 x i32> %bi32, <8 x i64> %acc) #0 {
+; X64-LABEL: zext32_inputs_not_safe:
+; X64: vpmul
+; X64-NOT: vpmadd52luq
+; X64: ret
+entry:
+ %a = zext <8 x i32> %ai32 to <8 x i64>
+ %b = zext <8 x i32> %bi32 to <8 x i64>
+ %mul = mul <8 x i64> %a, %b
+ %res = add <8 x i64> %acc, %mul
+ ret <8 x i64> %res
+}
+
+define <8 x i64> @const_2pow51_times_2(<8 x i64> %acc) #0 {
+; X64-LABEL: const_2pow51_times_2:
+; X64-NOT: vpmadd52luq
+; X64: vpaddq
+; X64: ret
+entry:
+ ; product = 2^52
+ %mul = mul <8 x i64> splat(i64 2251799813685248), splat(i64 2)
----------------
XChy wrote:
`%mul` constant folds early. It has no impact on this patch.
https://github.com/llvm/llvm-project/pull/156714
More information about the llvm-commits
mailing list