[llvm] [msan] Add tests for avx512-gfni-intrinsics (PR #149258)
Thurston Dang via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 16 23:00:05 PDT 2025
https://github.com/thurstond updated https://github.com/llvm/llvm-project/pull/149258
>From ec4efacd2fc387ab7d0cb67f94625ee4b6dddf4e Mon Sep 17 00:00:00 2001
From: Thurston Dang <thurston at google.com>
Date: Thu, 17 Jul 2025 05:45:17 +0000
Subject: [PATCH 1/2] [msan] Add tests for avx512-gfni-intrinsics
Gluten-free, nuts included or something
---
.../X86/avx512-gfni-intrinsics.ll | 670 ++++++++++++++++++
1 file changed, 670 insertions(+)
create mode 100644 llvm/test/Instrumentation/MemorySanitizer/X86/avx512-gfni-intrinsics.ll
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-gfni-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-gfni-intrinsics.ll
new file mode 100644
index 0000000000000..7b5782605ae56
--- /dev/null
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-gfni-intrinsics.ll
@@ -0,0 +1,670 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt %s -S -mattr=+avx512vl,+gfni,+avx512bw -passes=msan 2>&1 | FileCheck %s
+;
+; Forked from llvm/test/CodeGen/X86/avx512-gfni-intrinsics.ll
+;
+; Strictly handled:
+; - llvm.x86.vgf2p8affineinvqb.128
+; - llvm.x86.vgf2p8affineinvqb.256
+; - llvm.x86.vgf2p8affineinvqb.512
+; - llvm.x86.vgf2p8affineqb.128
+; - llvm.x86.vgf2p8affineqb.256
+; - llvm.x86.vgf2p8affineqb.512
+;
+; Heuristically handled:
+; - llvm.x86.vgf2p8mulb.128
+; - llvm.x86.vgf2p8mulb.256
+; - llvm.x86.vgf2p8mulb.512
+
+target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare <16 x i8> @llvm.x86.vgf2p8affineinvqb.128(<16 x i8>, <16 x i8>, i8)
+define { <16 x i8>, <16 x i8>, <16 x i8> } @test_vgf2p8affineinvqb_128(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %passthru, i16 %mask) #0 {
+; CHECK-LABEL: @test_vgf2p8affineinvqb_128(
+; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[TMP1]] to <16 x i1>
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP7]], 0
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP3]] to i128
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP8]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP2]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1:![0-9]+]]
+; CHECK: 9:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4:[0-9]+]]
+; CHECK-NEXT: unreachable
+; CHECK: 10:
+; CHECK-NEXT: [[TMP11:%.*]] = call <16 x i8> @llvm.x86.vgf2p8affineinvqb.128(<16 x i8> [[SRC1:%.*]], <16 x i8> [[SRC2:%.*]], i8 3)
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP12]], 0
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP3]] to i128
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP13]], 0
+; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]]
+; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF1]]
+; CHECK: 14:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
+; CHECK-NEXT: unreachable
+; CHECK: 15:
+; CHECK-NEXT: [[TMP16:%.*]] = call <16 x i8> @llvm.x86.vgf2p8affineinvqb.128(<16 x i8> [[SRC1]], <16 x i8> [[SRC2]], i8 4)
+; CHECK-NEXT: [[TMP17:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i128 [[TMP17]], 0
+; CHECK-NEXT: [[TMP18:%.*]] = bitcast <16 x i8> [[TMP3]] to i128
+; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i128 [[TMP18]], 0
+; CHECK-NEXT: [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]]
+; CHECK-NEXT: br i1 [[_MSOR8]], label [[TMP19:%.*]], label [[TMP20:%.*]], !prof [[PROF1]]
+; CHECK: 19:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
+; CHECK-NEXT: unreachable
+; CHECK: 20:
+; CHECK-NEXT: [[TMP21:%.*]] = call <16 x i8> @llvm.x86.vgf2p8affineinvqb.128(<16 x i8> [[SRC1]], <16 x i8> [[SRC2]], i8 5)
+; CHECK-NEXT: [[TMP22:%.*]] = select <16 x i1> [[TMP6]], <16 x i8> zeroinitializer, <16 x i8> zeroinitializer
+; CHECK-NEXT: [[TMP23:%.*]] = xor <16 x i8> [[TMP16]], zeroinitializer
+; CHECK-NEXT: [[TMP24:%.*]] = or <16 x i8> [[TMP23]], zeroinitializer
+; CHECK-NEXT: [[TMP25:%.*]] = or <16 x i8> [[TMP24]], zeroinitializer
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP5]], <16 x i8> [[TMP25]], <16 x i8> [[TMP22]]
+; CHECK-NEXT: [[TMP26:%.*]] = select <16 x i1> [[TMP6]], <16 x i8> [[TMP16]], <16 x i8> zeroinitializer
+; CHECK-NEXT: [[TMP27:%.*]] = select <16 x i1> [[TMP6]], <16 x i8> zeroinitializer, <16 x i8> [[TMP4]]
+; CHECK-NEXT: [[TMP28:%.*]] = xor <16 x i8> [[TMP21]], [[PASSTHRU:%.*]]
+; CHECK-NEXT: [[TMP29:%.*]] = or <16 x i8> [[TMP28]], zeroinitializer
+; CHECK-NEXT: [[TMP30:%.*]] = or <16 x i8> [[TMP29]], [[TMP4]]
+; CHECK-NEXT: [[_MSPROP_SELECT1:%.*]] = select <16 x i1> [[TMP5]], <16 x i8> [[TMP30]], <16 x i8> [[TMP27]]
+; CHECK-NEXT: [[TMP31:%.*]] = select <16 x i1> [[TMP6]], <16 x i8> [[TMP21]], <16 x i8> [[PASSTHRU]]
+; CHECK-NEXT: [[TMP32:%.*]] = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } undef, <16 x i8> [[TMP11]], 0
+; CHECK-NEXT: [[TMP33:%.*]] = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } { <16 x i8> zeroinitializer, <16 x i8> splat (i8 -1), <16 x i8> splat (i8 -1) }, <16 x i8> [[_MSPROP_SELECT]], 1
+; CHECK-NEXT: [[TMP34:%.*]] = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[TMP32]], <16 x i8> [[TMP26]], 1
+; CHECK-NEXT: [[TMP35:%.*]] = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[TMP33]], <16 x i8> [[_MSPROP_SELECT1]], 2
+; CHECK-NEXT: [[TMP36:%.*]] = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[TMP34]], <16 x i8> [[TMP31]], 2
+; CHECK-NEXT: store { <16 x i8>, <16 x i8>, <16 x i8> } [[TMP35]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret { <16 x i8>, <16 x i8>, <16 x i8> } [[TMP36]]
+;
+ %1 = bitcast i16 %mask to <16 x i1>
+ %2 = call <16 x i8> @llvm.x86.vgf2p8affineinvqb.128(<16 x i8> %src1, <16 x i8> %src2, i8 3)
+ %3 = call <16 x i8> @llvm.x86.vgf2p8affineinvqb.128(<16 x i8> %src1, <16 x i8> %src2, i8 4)
+ %4 = call <16 x i8> @llvm.x86.vgf2p8affineinvqb.128(<16 x i8> %src1, <16 x i8> %src2, i8 5)
+ %5 = select <16 x i1> %1, <16 x i8> %3, <16 x i8> zeroinitializer
+ %6 = select <16 x i1> %1, <16 x i8> %4, <16 x i8> %passthru
+ %7 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } undef, <16 x i8> %2, 0
+ %8 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } %7, <16 x i8> %5, 1
+ %9 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } %8, <16 x i8> %6, 2
+ ret { <16 x i8>, <16 x i8>, <16 x i8> } %9
+}
+
+declare <32 x i8> @llvm.x86.vgf2p8affineinvqb.256(<32 x i8>, <32 x i8>, i8)
+define { <32 x i8>, <32 x i8>, <32 x i8> } @test_vgf2p8affineinvqb_256(<32 x i8> %src1, <32 x i8> %src2, <32 x i8> %passthru, i32 %mask) #0 {
+; CHECK-LABEL: @test_vgf2p8affineinvqb_256(
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP1]] to <32 x i1>
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1>
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <32 x i8> [[TMP2]] to i256
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP7]], 0
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast <32 x i8> [[TMP3]] to i256
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i256 [[TMP8]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP2]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
+; CHECK: 9:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
+; CHECK-NEXT: unreachable
+; CHECK: 10:
+; CHECK-NEXT: [[TMP11:%.*]] = call <32 x i8> @llvm.x86.vgf2p8affineinvqb.256(<32 x i8> [[SRC1:%.*]], <32 x i8> [[SRC2:%.*]], i8 3)
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <32 x i8> [[TMP2]] to i256
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i256 [[TMP12]], 0
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <32 x i8> [[TMP3]] to i256
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i256 [[TMP13]], 0
+; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]]
+; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF1]]
+; CHECK: 14:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
+; CHECK-NEXT: unreachable
+; CHECK: 15:
+; CHECK-NEXT: [[TMP16:%.*]] = call <32 x i8> @llvm.x86.vgf2p8affineinvqb.256(<32 x i8> [[SRC1]], <32 x i8> [[SRC2]], i8 4)
+; CHECK-NEXT: [[TMP17:%.*]] = bitcast <32 x i8> [[TMP2]] to i256
+; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i256 [[TMP17]], 0
+; CHECK-NEXT: [[TMP18:%.*]] = bitcast <32 x i8> [[TMP3]] to i256
+; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i256 [[TMP18]], 0
+; CHECK-NEXT: [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]]
+; CHECK-NEXT: br i1 [[_MSOR8]], label [[TMP19:%.*]], label [[TMP20:%.*]], !prof [[PROF1]]
+; CHECK: 19:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
+; CHECK-NEXT: unreachable
+; CHECK: 20:
+; CHECK-NEXT: [[TMP21:%.*]] = call <32 x i8> @llvm.x86.vgf2p8affineinvqb.256(<32 x i8> [[SRC1]], <32 x i8> [[SRC2]], i8 5)
+; CHECK-NEXT: [[TMP22:%.*]] = select <32 x i1> [[TMP6]], <32 x i8> zeroinitializer, <32 x i8> zeroinitializer
+; CHECK-NEXT: [[TMP23:%.*]] = xor <32 x i8> [[TMP16]], zeroinitializer
+; CHECK-NEXT: [[TMP24:%.*]] = or <32 x i8> [[TMP23]], zeroinitializer
+; CHECK-NEXT: [[TMP25:%.*]] = or <32 x i8> [[TMP24]], zeroinitializer
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP5]], <32 x i8> [[TMP25]], <32 x i8> [[TMP22]]
+; CHECK-NEXT: [[TMP26:%.*]] = select <32 x i1> [[TMP6]], <32 x i8> [[TMP16]], <32 x i8> zeroinitializer
+; CHECK-NEXT: [[TMP27:%.*]] = select <32 x i1> [[TMP6]], <32 x i8> zeroinitializer, <32 x i8> [[TMP4]]
+; CHECK-NEXT: [[TMP28:%.*]] = xor <32 x i8> [[TMP21]], [[PASSTHRU:%.*]]
+; CHECK-NEXT: [[TMP29:%.*]] = or <32 x i8> [[TMP28]], zeroinitializer
+; CHECK-NEXT: [[TMP30:%.*]] = or <32 x i8> [[TMP29]], [[TMP4]]
+; CHECK-NEXT: [[_MSPROP_SELECT1:%.*]] = select <32 x i1> [[TMP5]], <32 x i8> [[TMP30]], <32 x i8> [[TMP27]]
+; CHECK-NEXT: [[TMP31:%.*]] = select <32 x i1> [[TMP6]], <32 x i8> [[TMP21]], <32 x i8> [[PASSTHRU]]
+; CHECK-NEXT: [[TMP32:%.*]] = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> } undef, <32 x i8> [[TMP11]], 0
+; CHECK-NEXT: [[TMP33:%.*]] = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> } { <32 x i8> zeroinitializer, <32 x i8> splat (i8 -1), <32 x i8> splat (i8 -1) }, <32 x i8> [[_MSPROP_SELECT]], 1
+; CHECK-NEXT: [[TMP34:%.*]] = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> } [[TMP32]], <32 x i8> [[TMP26]], 1
+; CHECK-NEXT: [[TMP35:%.*]] = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> } [[TMP33]], <32 x i8> [[_MSPROP_SELECT1]], 2
+; CHECK-NEXT: [[TMP36:%.*]] = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> } [[TMP34]], <32 x i8> [[TMP31]], 2
+; CHECK-NEXT: store { <32 x i8>, <32 x i8>, <32 x i8> } [[TMP35]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret { <32 x i8>, <32 x i8>, <32 x i8> } [[TMP36]]
+;
+ %1 = bitcast i32 %mask to <32 x i1>
+ %2 = call <32 x i8> @llvm.x86.vgf2p8affineinvqb.256(<32 x i8> %src1, <32 x i8> %src2, i8 3)
+ %3 = call <32 x i8> @llvm.x86.vgf2p8affineinvqb.256(<32 x i8> %src1, <32 x i8> %src2, i8 4)
+ %4 = call <32 x i8> @llvm.x86.vgf2p8affineinvqb.256(<32 x i8> %src1, <32 x i8> %src2, i8 5)
+ %5 = select <32 x i1> %1, <32 x i8> %3, <32 x i8> zeroinitializer
+ %6 = select <32 x i1> %1, <32 x i8> %4, <32 x i8> %passthru
+ %7 = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> } undef, <32 x i8> %2, 0
+ %8 = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> } %7, <32 x i8> %5, 1
+ %9 = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> } %8, <32 x i8> %6, 2
+ ret { <32 x i8>, <32 x i8>, <32 x i8> } %9
+}
+
+declare <64 x i8> @llvm.x86.vgf2p8affineinvqb.512(<64 x i8>, <64 x i8>, i8)
+define { <64 x i8>, <64 x i8>, <64 x i8> } @test_vgf2p8affineinvqb_512(<64 x i8> %src1, <64 x i8> %src2, <64 x i8> %passthru, i64 %mask) #0 {
+; CHECK-LABEL: @test_vgf2p8affineinvqb_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64 [[TMP1]] to <64 x i1>
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1>
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <64 x i8> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP7]], 0
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast <64 x i8> [[TMP3]] to i512
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP2]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
+; CHECK: 9:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
+; CHECK-NEXT: unreachable
+; CHECK: 10:
+; CHECK-NEXT: [[TMP11:%.*]] = call <64 x i8> @llvm.x86.vgf2p8affineinvqb.512(<64 x i8> [[SRC1:%.*]], <64 x i8> [[SRC2:%.*]], i8 3)
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <64 x i8> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP12]], 0
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <64 x i8> [[TMP3]] to i512
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i512 [[TMP13]], 0
+; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]]
+; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF1]]
+; CHECK: 14:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
+; CHECK-NEXT: unreachable
+; CHECK: 15:
+; CHECK-NEXT: [[TMP16:%.*]] = call <64 x i8> @llvm.x86.vgf2p8affineinvqb.512(<64 x i8> [[SRC1]], <64 x i8> [[SRC2]], i8 4)
+; CHECK-NEXT: [[TMP17:%.*]] = bitcast <64 x i8> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i512 [[TMP17]], 0
+; CHECK-NEXT: [[TMP18:%.*]] = bitcast <64 x i8> [[TMP3]] to i512
+; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP18]], 0
+; CHECK-NEXT: [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]]
+; CHECK-NEXT: br i1 [[_MSOR8]], label [[TMP19:%.*]], label [[TMP20:%.*]], !prof [[PROF1]]
+; CHECK: 19:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
+; CHECK-NEXT: unreachable
+; CHECK: 20:
+; CHECK-NEXT: [[TMP21:%.*]] = call <64 x i8> @llvm.x86.vgf2p8affineinvqb.512(<64 x i8> [[SRC1]], <64 x i8> [[SRC2]], i8 5)
+; CHECK-NEXT: [[TMP22:%.*]] = select <64 x i1> [[TMP6]], <64 x i8> zeroinitializer, <64 x i8> zeroinitializer
+; CHECK-NEXT: [[TMP23:%.*]] = xor <64 x i8> [[TMP16]], zeroinitializer
+; CHECK-NEXT: [[TMP24:%.*]] = or <64 x i8> [[TMP23]], zeroinitializer
+; CHECK-NEXT: [[TMP25:%.*]] = or <64 x i8> [[TMP24]], zeroinitializer
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP5]], <64 x i8> [[TMP25]], <64 x i8> [[TMP22]]
+; CHECK-NEXT: [[TMP26:%.*]] = select <64 x i1> [[TMP6]], <64 x i8> [[TMP16]], <64 x i8> zeroinitializer
+; CHECK-NEXT: [[TMP27:%.*]] = select <64 x i1> [[TMP6]], <64 x i8> zeroinitializer, <64 x i8> [[TMP4]]
+; CHECK-NEXT: [[TMP28:%.*]] = xor <64 x i8> [[TMP21]], [[PASSTHRU:%.*]]
+; CHECK-NEXT: [[TMP29:%.*]] = or <64 x i8> [[TMP28]], zeroinitializer
+; CHECK-NEXT: [[TMP30:%.*]] = or <64 x i8> [[TMP29]], [[TMP4]]
+; CHECK-NEXT: [[_MSPROP_SELECT1:%.*]] = select <64 x i1> [[TMP5]], <64 x i8> [[TMP30]], <64 x i8> [[TMP27]]
+; CHECK-NEXT: [[TMP31:%.*]] = select <64 x i1> [[TMP6]], <64 x i8> [[TMP21]], <64 x i8> [[PASSTHRU]]
+; CHECK-NEXT: [[TMP32:%.*]] = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } undef, <64 x i8> [[TMP11]], 0
+; CHECK-NEXT: [[TMP33:%.*]] = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } { <64 x i8> zeroinitializer, <64 x i8> splat (i8 -1), <64 x i8> splat (i8 -1) }, <64 x i8> [[_MSPROP_SELECT]], 1
+; CHECK-NEXT: [[TMP34:%.*]] = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } [[TMP32]], <64 x i8> [[TMP26]], 1
+; CHECK-NEXT: [[TMP35:%.*]] = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } [[TMP33]], <64 x i8> [[_MSPROP_SELECT1]], 2
+; CHECK-NEXT: [[TMP36:%.*]] = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } [[TMP34]], <64 x i8> [[TMP31]], 2
+; CHECK-NEXT: store { <64 x i8>, <64 x i8>, <64 x i8> } [[TMP35]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret { <64 x i8>, <64 x i8>, <64 x i8> } [[TMP36]]
+;
+ %1 = bitcast i64 %mask to <64 x i1>
+ %2 = call <64 x i8> @llvm.x86.vgf2p8affineinvqb.512(<64 x i8> %src1, <64 x i8> %src2, i8 3)
+ %3 = call <64 x i8> @llvm.x86.vgf2p8affineinvqb.512(<64 x i8> %src1, <64 x i8> %src2, i8 4)
+ %4 = call <64 x i8> @llvm.x86.vgf2p8affineinvqb.512(<64 x i8> %src1, <64 x i8> %src2, i8 5)
+ %5 = select <64 x i1> %1, <64 x i8> %3, <64 x i8> zeroinitializer
+ %6 = select <64 x i1> %1, <64 x i8> %4, <64 x i8> %passthru
+ %7 = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } undef, <64 x i8> %2, 0
+ %8 = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } %7, <64 x i8> %5, 1
+ %9 = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } %8, <64 x i8> %6, 2
+ ret { <64 x i8>, <64 x i8>, <64 x i8> } %9
+}
+
+declare <16 x i8> @llvm.x86.vgf2p8affineqb.128(<16 x i8>, <16 x i8>, i8)
+define { <16 x i8>, <16 x i8>, <16 x i8> } @test_vgf2p8affineqb_128(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %passthru, i16 %mask) #0 {
+; CHECK-LABEL: @test_vgf2p8affineqb_128(
+; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[TMP1]] to <16 x i1>
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP7]], 0
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP3]] to i128
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP8]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP2]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
+; CHECK: 9:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
+; CHECK-NEXT: unreachable
+; CHECK: 10:
+; CHECK-NEXT: [[TMP11:%.*]] = call <16 x i8> @llvm.x86.vgf2p8affineqb.128(<16 x i8> [[SRC1:%.*]], <16 x i8> [[SRC2:%.*]], i8 3)
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP12]], 0
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP3]] to i128
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP13]], 0
+; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]]
+; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF1]]
+; CHECK: 14:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
+; CHECK-NEXT: unreachable
+; CHECK: 15:
+; CHECK-NEXT: [[TMP16:%.*]] = call <16 x i8> @llvm.x86.vgf2p8affineqb.128(<16 x i8> [[SRC1]], <16 x i8> [[SRC2]], i8 4)
+; CHECK-NEXT: [[TMP17:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i128 [[TMP17]], 0
+; CHECK-NEXT: [[TMP18:%.*]] = bitcast <16 x i8> [[TMP3]] to i128
+; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i128 [[TMP18]], 0
+; CHECK-NEXT: [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]]
+; CHECK-NEXT: br i1 [[_MSOR8]], label [[TMP19:%.*]], label [[TMP20:%.*]], !prof [[PROF1]]
+; CHECK: 19:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
+; CHECK-NEXT: unreachable
+; CHECK: 20:
+; CHECK-NEXT: [[TMP21:%.*]] = call <16 x i8> @llvm.x86.vgf2p8affineqb.128(<16 x i8> [[SRC1]], <16 x i8> [[SRC2]], i8 5)
+; CHECK-NEXT: [[TMP22:%.*]] = select <16 x i1> [[TMP6]], <16 x i8> zeroinitializer, <16 x i8> zeroinitializer
+; CHECK-NEXT: [[TMP23:%.*]] = xor <16 x i8> [[TMP16]], zeroinitializer
+; CHECK-NEXT: [[TMP24:%.*]] = or <16 x i8> [[TMP23]], zeroinitializer
+; CHECK-NEXT: [[TMP25:%.*]] = or <16 x i8> [[TMP24]], zeroinitializer
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP5]], <16 x i8> [[TMP25]], <16 x i8> [[TMP22]]
+; CHECK-NEXT: [[TMP26:%.*]] = select <16 x i1> [[TMP6]], <16 x i8> [[TMP16]], <16 x i8> zeroinitializer
+; CHECK-NEXT: [[TMP27:%.*]] = select <16 x i1> [[TMP6]], <16 x i8> zeroinitializer, <16 x i8> [[TMP4]]
+; CHECK-NEXT: [[TMP28:%.*]] = xor <16 x i8> [[TMP21]], [[PASSTHRU:%.*]]
+; CHECK-NEXT: [[TMP29:%.*]] = or <16 x i8> [[TMP28]], zeroinitializer
+; CHECK-NEXT: [[TMP30:%.*]] = or <16 x i8> [[TMP29]], [[TMP4]]
+; CHECK-NEXT: [[_MSPROP_SELECT1:%.*]] = select <16 x i1> [[TMP5]], <16 x i8> [[TMP30]], <16 x i8> [[TMP27]]
+; CHECK-NEXT: [[TMP31:%.*]] = select <16 x i1> [[TMP6]], <16 x i8> [[TMP21]], <16 x i8> [[PASSTHRU]]
+; CHECK-NEXT: [[TMP32:%.*]] = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } undef, <16 x i8> [[TMP11]], 0
+; CHECK-NEXT: [[TMP33:%.*]] = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } { <16 x i8> zeroinitializer, <16 x i8> splat (i8 -1), <16 x i8> splat (i8 -1) }, <16 x i8> [[_MSPROP_SELECT]], 1
+; CHECK-NEXT: [[TMP34:%.*]] = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[TMP32]], <16 x i8> [[TMP26]], 1
+; CHECK-NEXT: [[TMP35:%.*]] = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[TMP33]], <16 x i8> [[_MSPROP_SELECT1]], 2
+; CHECK-NEXT: [[TMP36:%.*]] = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[TMP34]], <16 x i8> [[TMP31]], 2
+; CHECK-NEXT: store { <16 x i8>, <16 x i8>, <16 x i8> } [[TMP35]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret { <16 x i8>, <16 x i8>, <16 x i8> } [[TMP36]]
+;
+ %1 = bitcast i16 %mask to <16 x i1>
+ %2 = call <16 x i8> @llvm.x86.vgf2p8affineqb.128(<16 x i8> %src1, <16 x i8> %src2, i8 3)
+ %3 = call <16 x i8> @llvm.x86.vgf2p8affineqb.128(<16 x i8> %src1, <16 x i8> %src2, i8 4)
+ %4 = call <16 x i8> @llvm.x86.vgf2p8affineqb.128(<16 x i8> %src1, <16 x i8> %src2, i8 5)
+ %5 = select <16 x i1> %1, <16 x i8> %3, <16 x i8> zeroinitializer
+ %6 = select <16 x i1> %1, <16 x i8> %4, <16 x i8> %passthru
+ %7 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } undef, <16 x i8> %2, 0
+ %8 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } %7, <16 x i8> %5, 1
+ %9 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } %8, <16 x i8> %6, 2
+ ret { <16 x i8>, <16 x i8>, <16 x i8> } %9
+}
+
+declare <32 x i8> @llvm.x86.vgf2p8affineqb.256(<32 x i8>, <32 x i8>, i8)
+define { <32 x i8>, <32 x i8>, <32 x i8> } @test_vgf2p8affineqb_256(<32 x i8> %src1, <32 x i8> %src2, <32 x i8> %passthru, i32 %mask) #0 {
+; CHECK-LABEL: @test_vgf2p8affineqb_256(
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP1]] to <32 x i1>
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1>
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <32 x i8> [[TMP2]] to i256
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP7]], 0
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast <32 x i8> [[TMP3]] to i256
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i256 [[TMP8]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP2]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
+; CHECK: 9:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
+; CHECK-NEXT: unreachable
+; CHECK: 10:
+; CHECK-NEXT: [[TMP11:%.*]] = call <32 x i8> @llvm.x86.vgf2p8affineqb.256(<32 x i8> [[SRC1:%.*]], <32 x i8> [[SRC2:%.*]], i8 3)
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <32 x i8> [[TMP2]] to i256
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i256 [[TMP12]], 0
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <32 x i8> [[TMP3]] to i256
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i256 [[TMP13]], 0
+; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]]
+; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF1]]
+; CHECK: 14:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
+; CHECK-NEXT: unreachable
+; CHECK: 15:
+; CHECK-NEXT: [[TMP16:%.*]] = call <32 x i8> @llvm.x86.vgf2p8affineqb.256(<32 x i8> [[SRC1]], <32 x i8> [[SRC2]], i8 4)
+; CHECK-NEXT: [[TMP17:%.*]] = bitcast <32 x i8> [[TMP2]] to i256
+; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i256 [[TMP17]], 0
+; CHECK-NEXT: [[TMP18:%.*]] = bitcast <32 x i8> [[TMP3]] to i256
+; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i256 [[TMP18]], 0
+; CHECK-NEXT: [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]]
+; CHECK-NEXT: br i1 [[_MSOR8]], label [[TMP19:%.*]], label [[TMP20:%.*]], !prof [[PROF1]]
+; CHECK: 19:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
+; CHECK-NEXT: unreachable
+; CHECK: 20:
+; CHECK-NEXT: [[TMP21:%.*]] = call <32 x i8> @llvm.x86.vgf2p8affineqb.256(<32 x i8> [[SRC1]], <32 x i8> [[SRC2]], i8 5)
+; CHECK-NEXT: [[TMP22:%.*]] = select <32 x i1> [[TMP6]], <32 x i8> zeroinitializer, <32 x i8> zeroinitializer
+; CHECK-NEXT: [[TMP23:%.*]] = xor <32 x i8> [[TMP16]], zeroinitializer
+; CHECK-NEXT: [[TMP24:%.*]] = or <32 x i8> [[TMP23]], zeroinitializer
+; CHECK-NEXT: [[TMP25:%.*]] = or <32 x i8> [[TMP24]], zeroinitializer
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP5]], <32 x i8> [[TMP25]], <32 x i8> [[TMP22]]
+; CHECK-NEXT: [[TMP26:%.*]] = select <32 x i1> [[TMP6]], <32 x i8> [[TMP16]], <32 x i8> zeroinitializer
+; CHECK-NEXT: [[TMP27:%.*]] = select <32 x i1> [[TMP6]], <32 x i8> zeroinitializer, <32 x i8> [[TMP4]]
+; CHECK-NEXT: [[TMP28:%.*]] = xor <32 x i8> [[TMP21]], [[PASSTHRU:%.*]]
+; CHECK-NEXT: [[TMP29:%.*]] = or <32 x i8> [[TMP28]], zeroinitializer
+; CHECK-NEXT: [[TMP30:%.*]] = or <32 x i8> [[TMP29]], [[TMP4]]
+; CHECK-NEXT: [[_MSPROP_SELECT1:%.*]] = select <32 x i1> [[TMP5]], <32 x i8> [[TMP30]], <32 x i8> [[TMP27]]
+; CHECK-NEXT: [[TMP31:%.*]] = select <32 x i1> [[TMP6]], <32 x i8> [[TMP21]], <32 x i8> [[PASSTHRU]]
+; CHECK-NEXT: [[TMP32:%.*]] = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> } undef, <32 x i8> [[TMP11]], 0
+; CHECK-NEXT: [[TMP33:%.*]] = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> } { <32 x i8> zeroinitializer, <32 x i8> splat (i8 -1), <32 x i8> splat (i8 -1) }, <32 x i8> [[_MSPROP_SELECT]], 1
+; CHECK-NEXT: [[TMP34:%.*]] = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> } [[TMP32]], <32 x i8> [[TMP26]], 1
+; CHECK-NEXT: [[TMP35:%.*]] = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> } [[TMP33]], <32 x i8> [[_MSPROP_SELECT1]], 2
+; CHECK-NEXT: [[TMP36:%.*]] = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> } [[TMP34]], <32 x i8> [[TMP31]], 2
+; CHECK-NEXT: store { <32 x i8>, <32 x i8>, <32 x i8> } [[TMP35]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret { <32 x i8>, <32 x i8>, <32 x i8> } [[TMP36]]
+;
+ %1 = bitcast i32 %mask to <32 x i1>
+ %2 = call <32 x i8> @llvm.x86.vgf2p8affineqb.256(<32 x i8> %src1, <32 x i8> %src2, i8 3)
+ %3 = call <32 x i8> @llvm.x86.vgf2p8affineqb.256(<32 x i8> %src1, <32 x i8> %src2, i8 4)
+ %4 = call <32 x i8> @llvm.x86.vgf2p8affineqb.256(<32 x i8> %src1, <32 x i8> %src2, i8 5)
+ %5 = select <32 x i1> %1, <32 x i8> %3, <32 x i8> zeroinitializer
+ %6 = select <32 x i1> %1, <32 x i8> %4, <32 x i8> %passthru
+ %7 = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> } undef, <32 x i8> %2, 0
+ %8 = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> } %7, <32 x i8> %5, 1
+ %9 = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> } %8, <32 x i8> %6, 2
+ ret { <32 x i8>, <32 x i8>, <32 x i8> } %9
+}
+
+declare <64 x i8> @llvm.x86.vgf2p8affineqb.512(<64 x i8>, <64 x i8>, i8)
+define { <64 x i8>, <64 x i8>, <64 x i8> } @test_vgf2p8affineqb_512(<64 x i8> %src1, <64 x i8> %src2, <64 x i8> %passthru, i64 %mask) #0 {
+; CHECK-LABEL: @test_vgf2p8affineqb_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64 [[TMP1]] to <64 x i1>
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1>
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <64 x i8> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP7]], 0
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast <64 x i8> [[TMP3]] to i512
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP2]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
+; CHECK: 9:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
+; CHECK-NEXT: unreachable
+; CHECK: 10:
+; CHECK-NEXT: [[TMP11:%.*]] = call <64 x i8> @llvm.x86.vgf2p8affineqb.512(<64 x i8> [[SRC1:%.*]], <64 x i8> [[SRC2:%.*]], i8 3)
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <64 x i8> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP12]], 0
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <64 x i8> [[TMP3]] to i512
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i512 [[TMP13]], 0
+; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]]
+; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF1]]
+; CHECK: 14:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
+; CHECK-NEXT: unreachable
+; CHECK: 15:
+; CHECK-NEXT: [[TMP16:%.*]] = call <64 x i8> @llvm.x86.vgf2p8affineqb.512(<64 x i8> [[SRC1]], <64 x i8> [[SRC2]], i8 4)
+; CHECK-NEXT: [[TMP17:%.*]] = bitcast <64 x i8> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i512 [[TMP17]], 0
+; CHECK-NEXT: [[TMP18:%.*]] = bitcast <64 x i8> [[TMP3]] to i512
+; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP18]], 0
+; CHECK-NEXT: [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]]
+; CHECK-NEXT: br i1 [[_MSOR8]], label [[TMP19:%.*]], label [[TMP20:%.*]], !prof [[PROF1]]
+; CHECK: 19:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
+; CHECK-NEXT: unreachable
+; CHECK: 20:
+; CHECK-NEXT: [[TMP21:%.*]] = call <64 x i8> @llvm.x86.vgf2p8affineqb.512(<64 x i8> [[SRC1]], <64 x i8> [[SRC2]], i8 5)
+; CHECK-NEXT: [[TMP22:%.*]] = select <64 x i1> [[TMP6]], <64 x i8> zeroinitializer, <64 x i8> zeroinitializer
+; CHECK-NEXT: [[TMP23:%.*]] = xor <64 x i8> [[TMP16]], zeroinitializer
+; CHECK-NEXT: [[TMP24:%.*]] = or <64 x i8> [[TMP23]], zeroinitializer
+; CHECK-NEXT: [[TMP25:%.*]] = or <64 x i8> [[TMP24]], zeroinitializer
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP5]], <64 x i8> [[TMP25]], <64 x i8> [[TMP22]]
+; CHECK-NEXT: [[TMP26:%.*]] = select <64 x i1> [[TMP6]], <64 x i8> [[TMP16]], <64 x i8> zeroinitializer
+; CHECK-NEXT: [[TMP27:%.*]] = select <64 x i1> [[TMP6]], <64 x i8> zeroinitializer, <64 x i8> [[TMP4]]
+; CHECK-NEXT: [[TMP28:%.*]] = xor <64 x i8> [[TMP21]], [[PASSTHRU:%.*]]
+; CHECK-NEXT: [[TMP29:%.*]] = or <64 x i8> [[TMP28]], zeroinitializer
+; CHECK-NEXT: [[TMP30:%.*]] = or <64 x i8> [[TMP29]], [[TMP4]]
+; CHECK-NEXT: [[_MSPROP_SELECT1:%.*]] = select <64 x i1> [[TMP5]], <64 x i8> [[TMP30]], <64 x i8> [[TMP27]]
+; CHECK-NEXT: [[TMP31:%.*]] = select <64 x i1> [[TMP6]], <64 x i8> [[TMP21]], <64 x i8> [[PASSTHRU]]
+; CHECK-NEXT: [[TMP32:%.*]] = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } undef, <64 x i8> [[TMP11]], 0
+; CHECK-NEXT: [[TMP33:%.*]] = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } { <64 x i8> zeroinitializer, <64 x i8> splat (i8 -1), <64 x i8> splat (i8 -1) }, <64 x i8> [[_MSPROP_SELECT]], 1
+; CHECK-NEXT: [[TMP34:%.*]] = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } [[TMP32]], <64 x i8> [[TMP26]], 1
+; CHECK-NEXT: [[TMP35:%.*]] = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } [[TMP33]], <64 x i8> [[_MSPROP_SELECT1]], 2
+; CHECK-NEXT: [[TMP36:%.*]] = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } [[TMP34]], <64 x i8> [[TMP31]], 2
+; CHECK-NEXT: store { <64 x i8>, <64 x i8>, <64 x i8> } [[TMP35]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret { <64 x i8>, <64 x i8>, <64 x i8> } [[TMP36]]
+;
+ %1 = bitcast i64 %mask to <64 x i1>
+ %2 = call <64 x i8> @llvm.x86.vgf2p8affineqb.512(<64 x i8> %src1, <64 x i8> %src2, i8 3)
+ %3 = call <64 x i8> @llvm.x86.vgf2p8affineqb.512(<64 x i8> %src1, <64 x i8> %src2, i8 4)
+ %4 = call <64 x i8> @llvm.x86.vgf2p8affineqb.512(<64 x i8> %src1, <64 x i8> %src2, i8 5)
+ %5 = select <64 x i1> %1, <64 x i8> %3, <64 x i8> zeroinitializer
+ %6 = select <64 x i1> %1, <64 x i8> %4, <64 x i8> %passthru
+ %7 = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } undef, <64 x i8> %2, 0
+ %8 = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } %7, <64 x i8> %5, 1
+ %9 = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } %8, <64 x i8> %6, 2
+ ret { <64 x i8>, <64 x i8>, <64 x i8> } %9
+}
+
+declare <16 x i8> @llvm.x86.vgf2p8mulb.128(<16 x i8>, <16 x i8>)
+define <16 x i8> @test_vgf2p8mulb_128(<16 x i8> %src1, <16 x i8> %src2) #0 {
+; CHECK-LABEL: @test_vgf2p8mulb_128(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.x86.vgf2p8mulb.128(<16 x i8> [[SRC1:%.*]], <16 x i8> [[SRC2:%.*]])
+; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i8> [[TMP3]]
+;
+ %1 = call <16 x i8> @llvm.x86.vgf2p8mulb.128(<16 x i8> %src1, <16 x i8> %src2)
+ ret <16 x i8> %1
+}
+
+define <16 x i8> @test_vgf2p8mulb_128_mask(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %passthru, i16 %mask) #0 {
+; CHECK-LABEL: @test_vgf2p8mulb_128_mask(
+; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[TMP1]] to <16 x i1>
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP2]], [[TMP3]]
+; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i8> @llvm.x86.vgf2p8mulb.128(<16 x i8> [[SRC1:%.*]], <16 x i8> [[SRC2:%.*]])
+; CHECK-NEXT: [[TMP8:%.*]] = select <16 x i1> [[TMP6]], <16 x i8> [[_MSPROP]], <16 x i8> [[TMP4]]
+; CHECK-NEXT: [[TMP9:%.*]] = xor <16 x i8> [[TMP7]], [[PASSTHRU:%.*]]
+; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i8> [[TMP9]], [[_MSPROP]]
+; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i8> [[TMP10]], [[TMP4]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP5]], <16 x i8> [[TMP11]], <16 x i8> [[TMP8]]
+; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP6]], <16 x i8> [[TMP7]], <16 x i8> [[PASSTHRU]]
+; CHECK-NEXT: store <16 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i8> [[TMP12]]
+;
+ %1 = bitcast i16 %mask to <16 x i1>
+ %2 = call <16 x i8> @llvm.x86.vgf2p8mulb.128(<16 x i8> %src1, <16 x i8> %src2)
+ %3 = select <16 x i1> %1, <16 x i8> %2, <16 x i8> %passthru
+ ret <16 x i8> %3
+}
+
+define <16 x i8> @test_vgf2p8mulb_128_maskz(<16 x i8> %src1, <16 x i8> %src2, i16 %mask) #0 {
+; CHECK-LABEL: @test_vgf2p8mulb_128_maskz(
+; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[TMP1]] to <16 x i1>
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP2]], [[TMP3]]
+; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i8> @llvm.x86.vgf2p8mulb.128(<16 x i8> [[SRC1:%.*]], <16 x i8> [[SRC2:%.*]])
+; CHECK-NEXT: [[TMP7:%.*]] = select <16 x i1> [[TMP5]], <16 x i8> [[_MSPROP]], <16 x i8> zeroinitializer
+; CHECK-NEXT: [[TMP8:%.*]] = xor <16 x i8> [[TMP6]], zeroinitializer
+; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i8> [[TMP8]], [[_MSPROP]]
+; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i8> [[TMP9]], zeroinitializer
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP4]], <16 x i8> [[TMP10]], <16 x i8> [[TMP7]]
+; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP5]], <16 x i8> [[TMP6]], <16 x i8> zeroinitializer
+; CHECK-NEXT: store <16 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i8> [[TMP11]]
+;
+ %1 = bitcast i16 %mask to <16 x i1>
+ %2 = call <16 x i8> @llvm.x86.vgf2p8mulb.128(<16 x i8> %src1, <16 x i8> %src2)
+ %3 = select <16 x i1> %1, <16 x i8> %2, <16 x i8> zeroinitializer
+ ret <16 x i8> %3
+}
+
+declare <32 x i8> @llvm.x86.vgf2p8mulb.256(<32 x i8>, <32 x i8>)
+define <32 x i8> @test_vgf2p8mulb_256(<32 x i8> %src1, <32 x i8> %src2) #0 {
+; CHECK-LABEL: @test_vgf2p8mulb_256(
+; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i8> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP3:%.*]] = call <32 x i8> @llvm.x86.vgf2p8mulb.256(<32 x i8> [[SRC1:%.*]], <32 x i8> [[SRC2:%.*]])
+; CHECK-NEXT: store <32 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <32 x i8> [[TMP3]]
+;
+ %1 = call <32 x i8> @llvm.x86.vgf2p8mulb.256(<32 x i8> %src1, <32 x i8> %src2)
+ ret <32 x i8> %1
+}
+
+define <32 x i8> @test_vgf2p8mulb_256_mask(<32 x i8> %src1, <32 x i8> %src2, <32 x i8> %passthru, i32 %mask) #0 {
+; CHECK-LABEL: @test_vgf2p8mulb_256_mask(
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP1]] to <32 x i1>
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1>
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i8> [[TMP2]], [[TMP3]]
+; CHECK-NEXT: [[TMP7:%.*]] = call <32 x i8> @llvm.x86.vgf2p8mulb.256(<32 x i8> [[SRC1:%.*]], <32 x i8> [[SRC2:%.*]])
+; CHECK-NEXT: [[TMP8:%.*]] = select <32 x i1> [[TMP6]], <32 x i8> [[_MSPROP]], <32 x i8> [[TMP4]]
+; CHECK-NEXT: [[TMP9:%.*]] = xor <32 x i8> [[TMP7]], [[PASSTHRU:%.*]]
+; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i8> [[TMP9]], [[_MSPROP]]
+; CHECK-NEXT: [[TMP11:%.*]] = or <32 x i8> [[TMP10]], [[TMP4]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP5]], <32 x i8> [[TMP11]], <32 x i8> [[TMP8]]
+; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP6]], <32 x i8> [[TMP7]], <32 x i8> [[PASSTHRU]]
+; CHECK-NEXT: store <32 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <32 x i8> [[TMP12]]
+;
+ %1 = bitcast i32 %mask to <32 x i1>
+ %2 = call <32 x i8> @llvm.x86.vgf2p8mulb.256(<32 x i8> %src1, <32 x i8> %src2)
+ %3 = select <32 x i1> %1, <32 x i8> %2, <32 x i8> %passthru
+ ret <32 x i8> %3
+}
+
+define <32 x i8> @test_vgf2p8mulb_256_maskz(<32 x i8> %src1, <32 x i8> %src2, i32 %mask) #0 {
+; CHECK-LABEL: @test_vgf2p8mulb_256_maskz(
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP1]] to <32 x i1>
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1>
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i8> [[TMP2]], [[TMP3]]
+; CHECK-NEXT: [[TMP6:%.*]] = call <32 x i8> @llvm.x86.vgf2p8mulb.256(<32 x i8> [[SRC1:%.*]], <32 x i8> [[SRC2:%.*]])
+; CHECK-NEXT: [[TMP7:%.*]] = select <32 x i1> [[TMP5]], <32 x i8> [[_MSPROP]], <32 x i8> zeroinitializer
+; CHECK-NEXT: [[TMP8:%.*]] = xor <32 x i8> [[TMP6]], zeroinitializer
+; CHECK-NEXT: [[TMP9:%.*]] = or <32 x i8> [[TMP8]], [[_MSPROP]]
+; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i8> [[TMP9]], zeroinitializer
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP4]], <32 x i8> [[TMP10]], <32 x i8> [[TMP7]]
+; CHECK-NEXT: [[TMP11:%.*]] = select <32 x i1> [[TMP5]], <32 x i8> [[TMP6]], <32 x i8> zeroinitializer
+; CHECK-NEXT: store <32 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <32 x i8> [[TMP11]]
+;
+ %1 = bitcast i32 %mask to <32 x i1>
+ %2 = call <32 x i8> @llvm.x86.vgf2p8mulb.256(<32 x i8> %src1, <32 x i8> %src2)
+ %3 = select <32 x i1> %1, <32 x i8> %2, <32 x i8> zeroinitializer
+ ret <32 x i8> %3
+}
+
+declare <64 x i8> @llvm.x86.vgf2p8mulb.512(<64 x i8>, <64 x i8>)
+define <64 x i8> @test_vgf2p8mulb_512(<64 x i8> %src1, <64 x i8> %src2) #0 {
+; CHECK-LABEL: @test_vgf2p8mulb_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP3:%.*]] = call <64 x i8> @llvm.x86.vgf2p8mulb.512(<64 x i8> [[SRC1:%.*]], <64 x i8> [[SRC2:%.*]])
+; CHECK-NEXT: store <64 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <64 x i8> [[TMP3]]
+;
+ %1 = call <64 x i8> @llvm.x86.vgf2p8mulb.512(<64 x i8> %src1, <64 x i8> %src2)
+ ret <64 x i8> %1
+}
+
+define <64 x i8> @test_vgf2p8mulb_512_mask(<64 x i8> %src1, <64 x i8> %src2, <64 x i8> %passthru, i64 %mask) #0 {
+; CHECK-LABEL: @test_vgf2p8mulb_512_mask(
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64 [[TMP1]] to <64 x i1>
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1>
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP2]], [[TMP3]]
+; CHECK-NEXT: [[TMP7:%.*]] = call <64 x i8> @llvm.x86.vgf2p8mulb.512(<64 x i8> [[SRC1:%.*]], <64 x i8> [[SRC2:%.*]])
+; CHECK-NEXT: [[TMP8:%.*]] = select <64 x i1> [[TMP6]], <64 x i8> [[_MSPROP]], <64 x i8> [[TMP4]]
+; CHECK-NEXT: [[TMP9:%.*]] = xor <64 x i8> [[TMP7]], [[PASSTHRU:%.*]]
+; CHECK-NEXT: [[TMP10:%.*]] = or <64 x i8> [[TMP9]], [[_MSPROP]]
+; CHECK-NEXT: [[TMP11:%.*]] = or <64 x i8> [[TMP10]], [[TMP4]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP5]], <64 x i8> [[TMP11]], <64 x i8> [[TMP8]]
+; CHECK-NEXT: [[TMP12:%.*]] = select <64 x i1> [[TMP6]], <64 x i8> [[TMP7]], <64 x i8> [[PASSTHRU]]
+; CHECK-NEXT: store <64 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <64 x i8> [[TMP12]]
+;
+ %1 = bitcast i64 %mask to <64 x i1>
+ %2 = call <64 x i8> @llvm.x86.vgf2p8mulb.512(<64 x i8> %src1, <64 x i8> %src2)
+ %3 = select <64 x i1> %1, <64 x i8> %2, <64 x i8> %passthru
+ ret <64 x i8> %3
+}
+
+define <64 x i8> @test_vgf2p8mulb_512_maskz(<64 x i8> %src1, <64 x i8> %src2, i64 %mask) #0 {
+; CHECK-LABEL: @test_vgf2p8mulb_512_maskz(
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64 [[TMP1]] to <64 x i1>
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1>
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP2]], [[TMP3]]
+; CHECK-NEXT: [[TMP6:%.*]] = call <64 x i8> @llvm.x86.vgf2p8mulb.512(<64 x i8> [[SRC1:%.*]], <64 x i8> [[SRC2:%.*]])
+; CHECK-NEXT: [[TMP7:%.*]] = select <64 x i1> [[TMP5]], <64 x i8> [[_MSPROP]], <64 x i8> zeroinitializer
+; CHECK-NEXT: [[TMP8:%.*]] = xor <64 x i8> [[TMP6]], zeroinitializer
+; CHECK-NEXT: [[TMP9:%.*]] = or <64 x i8> [[TMP8]], [[_MSPROP]]
+; CHECK-NEXT: [[TMP10:%.*]] = or <64 x i8> [[TMP9]], zeroinitializer
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP4]], <64 x i8> [[TMP10]], <64 x i8> [[TMP7]]
+; CHECK-NEXT: [[TMP11:%.*]] = select <64 x i1> [[TMP5]], <64 x i8> [[TMP6]], <64 x i8> zeroinitializer
+; CHECK-NEXT: store <64 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <64 x i8> [[TMP11]]
+;
+ %1 = bitcast i64 %mask to <64 x i1>
+ %2 = call <64 x i8> @llvm.x86.vgf2p8mulb.512(<64 x i8> %src1, <64 x i8> %src2)
+ %3 = select <64 x i1> %1, <64 x i8> %2, <64 x i8> zeroinitializer
+ ret <64 x i8> %3
+}
+
+attributes #0 = { sanitize_memory }
>From fd9be3ba5393d7d3f0da10bafbc35bfdfe357f87 Mon Sep 17 00:00:00 2001
From: Thurston Dang <thurston at google.com>
Date: Thu, 17 Jul 2025 05:59:47 +0000
Subject: [PATCH 2/2] undef -> poison
---
.../X86/avx512-gfni-intrinsics.ll | 24 +++++++++----------
1 file changed, 12 insertions(+), 12 deletions(-)
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-gfni-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-gfni-intrinsics.ll
index 7b5782605ae56..e5e4371c525b2 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-gfni-intrinsics.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-gfni-intrinsics.ll
@@ -74,7 +74,7 @@ define { <16 x i8>, <16 x i8>, <16 x i8> } @test_vgf2p8affineinvqb_128(<16 x i8>
; CHECK-NEXT: [[TMP30:%.*]] = or <16 x i8> [[TMP29]], [[TMP4]]
; CHECK-NEXT: [[_MSPROP_SELECT1:%.*]] = select <16 x i1> [[TMP5]], <16 x i8> [[TMP30]], <16 x i8> [[TMP27]]
; CHECK-NEXT: [[TMP31:%.*]] = select <16 x i1> [[TMP6]], <16 x i8> [[TMP21]], <16 x i8> [[PASSTHRU]]
-; CHECK-NEXT: [[TMP32:%.*]] = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } undef, <16 x i8> [[TMP11]], 0
+; CHECK-NEXT: [[TMP32:%.*]] = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } poison, <16 x i8> [[TMP11]], 0
; CHECK-NEXT: [[TMP33:%.*]] = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } { <16 x i8> zeroinitializer, <16 x i8> splat (i8 -1), <16 x i8> splat (i8 -1) }, <16 x i8> [[_MSPROP_SELECT]], 1
; CHECK-NEXT: [[TMP34:%.*]] = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[TMP32]], <16 x i8> [[TMP26]], 1
; CHECK-NEXT: [[TMP35:%.*]] = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[TMP33]], <16 x i8> [[_MSPROP_SELECT1]], 2
@@ -88,7 +88,7 @@ define { <16 x i8>, <16 x i8>, <16 x i8> } @test_vgf2p8affineinvqb_128(<16 x i8>
%4 = call <16 x i8> @llvm.x86.vgf2p8affineinvqb.128(<16 x i8> %src1, <16 x i8> %src2, i8 5)
%5 = select <16 x i1> %1, <16 x i8> %3, <16 x i8> zeroinitializer
%6 = select <16 x i1> %1, <16 x i8> %4, <16 x i8> %passthru
- %7 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } undef, <16 x i8> %2, 0
+ %7 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } poison, <16 x i8> %2, 0
%8 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } %7, <16 x i8> %5, 1
%9 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } %8, <16 x i8> %6, 2
ret { <16 x i8>, <16 x i8>, <16 x i8> } %9
@@ -149,7 +149,7 @@ define { <32 x i8>, <32 x i8>, <32 x i8> } @test_vgf2p8affineinvqb_256(<32 x i8>
; CHECK-NEXT: [[TMP30:%.*]] = or <32 x i8> [[TMP29]], [[TMP4]]
; CHECK-NEXT: [[_MSPROP_SELECT1:%.*]] = select <32 x i1> [[TMP5]], <32 x i8> [[TMP30]], <32 x i8> [[TMP27]]
; CHECK-NEXT: [[TMP31:%.*]] = select <32 x i1> [[TMP6]], <32 x i8> [[TMP21]], <32 x i8> [[PASSTHRU]]
-; CHECK-NEXT: [[TMP32:%.*]] = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> } undef, <32 x i8> [[TMP11]], 0
+; CHECK-NEXT: [[TMP32:%.*]] = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> } poison, <32 x i8> [[TMP11]], 0
; CHECK-NEXT: [[TMP33:%.*]] = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> } { <32 x i8> zeroinitializer, <32 x i8> splat (i8 -1), <32 x i8> splat (i8 -1) }, <32 x i8> [[_MSPROP_SELECT]], 1
; CHECK-NEXT: [[TMP34:%.*]] = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> } [[TMP32]], <32 x i8> [[TMP26]], 1
; CHECK-NEXT: [[TMP35:%.*]] = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> } [[TMP33]], <32 x i8> [[_MSPROP_SELECT1]], 2
@@ -163,7 +163,7 @@ define { <32 x i8>, <32 x i8>, <32 x i8> } @test_vgf2p8affineinvqb_256(<32 x i8>
%4 = call <32 x i8> @llvm.x86.vgf2p8affineinvqb.256(<32 x i8> %src1, <32 x i8> %src2, i8 5)
%5 = select <32 x i1> %1, <32 x i8> %3, <32 x i8> zeroinitializer
%6 = select <32 x i1> %1, <32 x i8> %4, <32 x i8> %passthru
- %7 = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> } undef, <32 x i8> %2, 0
+ %7 = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> } poison, <32 x i8> %2, 0
%8 = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> } %7, <32 x i8> %5, 1
%9 = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> } %8, <32 x i8> %6, 2
ret { <32 x i8>, <32 x i8>, <32 x i8> } %9
@@ -224,7 +224,7 @@ define { <64 x i8>, <64 x i8>, <64 x i8> } @test_vgf2p8affineinvqb_512(<64 x i8>
; CHECK-NEXT: [[TMP30:%.*]] = or <64 x i8> [[TMP29]], [[TMP4]]
; CHECK-NEXT: [[_MSPROP_SELECT1:%.*]] = select <64 x i1> [[TMP5]], <64 x i8> [[TMP30]], <64 x i8> [[TMP27]]
; CHECK-NEXT: [[TMP31:%.*]] = select <64 x i1> [[TMP6]], <64 x i8> [[TMP21]], <64 x i8> [[PASSTHRU]]
-; CHECK-NEXT: [[TMP32:%.*]] = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } undef, <64 x i8> [[TMP11]], 0
+; CHECK-NEXT: [[TMP32:%.*]] = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } poison, <64 x i8> [[TMP11]], 0
; CHECK-NEXT: [[TMP33:%.*]] = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } { <64 x i8> zeroinitializer, <64 x i8> splat (i8 -1), <64 x i8> splat (i8 -1) }, <64 x i8> [[_MSPROP_SELECT]], 1
; CHECK-NEXT: [[TMP34:%.*]] = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } [[TMP32]], <64 x i8> [[TMP26]], 1
; CHECK-NEXT: [[TMP35:%.*]] = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } [[TMP33]], <64 x i8> [[_MSPROP_SELECT1]], 2
@@ -238,7 +238,7 @@ define { <64 x i8>, <64 x i8>, <64 x i8> } @test_vgf2p8affineinvqb_512(<64 x i8>
%4 = call <64 x i8> @llvm.x86.vgf2p8affineinvqb.512(<64 x i8> %src1, <64 x i8> %src2, i8 5)
%5 = select <64 x i1> %1, <64 x i8> %3, <64 x i8> zeroinitializer
%6 = select <64 x i1> %1, <64 x i8> %4, <64 x i8> %passthru
- %7 = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } undef, <64 x i8> %2, 0
+ %7 = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } poison, <64 x i8> %2, 0
%8 = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } %7, <64 x i8> %5, 1
%9 = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } %8, <64 x i8> %6, 2
ret { <64 x i8>, <64 x i8>, <64 x i8> } %9
@@ -299,7 +299,7 @@ define { <16 x i8>, <16 x i8>, <16 x i8> } @test_vgf2p8affineqb_128(<16 x i8> %s
; CHECK-NEXT: [[TMP30:%.*]] = or <16 x i8> [[TMP29]], [[TMP4]]
; CHECK-NEXT: [[_MSPROP_SELECT1:%.*]] = select <16 x i1> [[TMP5]], <16 x i8> [[TMP30]], <16 x i8> [[TMP27]]
; CHECK-NEXT: [[TMP31:%.*]] = select <16 x i1> [[TMP6]], <16 x i8> [[TMP21]], <16 x i8> [[PASSTHRU]]
-; CHECK-NEXT: [[TMP32:%.*]] = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } undef, <16 x i8> [[TMP11]], 0
+; CHECK-NEXT: [[TMP32:%.*]] = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } poison, <16 x i8> [[TMP11]], 0
; CHECK-NEXT: [[TMP33:%.*]] = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } { <16 x i8> zeroinitializer, <16 x i8> splat (i8 -1), <16 x i8> splat (i8 -1) }, <16 x i8> [[_MSPROP_SELECT]], 1
; CHECK-NEXT: [[TMP34:%.*]] = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[TMP32]], <16 x i8> [[TMP26]], 1
; CHECK-NEXT: [[TMP35:%.*]] = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[TMP33]], <16 x i8> [[_MSPROP_SELECT1]], 2
@@ -313,7 +313,7 @@ define { <16 x i8>, <16 x i8>, <16 x i8> } @test_vgf2p8affineqb_128(<16 x i8> %s
%4 = call <16 x i8> @llvm.x86.vgf2p8affineqb.128(<16 x i8> %src1, <16 x i8> %src2, i8 5)
%5 = select <16 x i1> %1, <16 x i8> %3, <16 x i8> zeroinitializer
%6 = select <16 x i1> %1, <16 x i8> %4, <16 x i8> %passthru
- %7 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } undef, <16 x i8> %2, 0
+ %7 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } poison, <16 x i8> %2, 0
%8 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } %7, <16 x i8> %5, 1
%9 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } %8, <16 x i8> %6, 2
ret { <16 x i8>, <16 x i8>, <16 x i8> } %9
@@ -374,7 +374,7 @@ define { <32 x i8>, <32 x i8>, <32 x i8> } @test_vgf2p8affineqb_256(<32 x i8> %s
; CHECK-NEXT: [[TMP30:%.*]] = or <32 x i8> [[TMP29]], [[TMP4]]
; CHECK-NEXT: [[_MSPROP_SELECT1:%.*]] = select <32 x i1> [[TMP5]], <32 x i8> [[TMP30]], <32 x i8> [[TMP27]]
; CHECK-NEXT: [[TMP31:%.*]] = select <32 x i1> [[TMP6]], <32 x i8> [[TMP21]], <32 x i8> [[PASSTHRU]]
-; CHECK-NEXT: [[TMP32:%.*]] = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> } undef, <32 x i8> [[TMP11]], 0
+; CHECK-NEXT: [[TMP32:%.*]] = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> } poison, <32 x i8> [[TMP11]], 0
; CHECK-NEXT: [[TMP33:%.*]] = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> } { <32 x i8> zeroinitializer, <32 x i8> splat (i8 -1), <32 x i8> splat (i8 -1) }, <32 x i8> [[_MSPROP_SELECT]], 1
; CHECK-NEXT: [[TMP34:%.*]] = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> } [[TMP32]], <32 x i8> [[TMP26]], 1
; CHECK-NEXT: [[TMP35:%.*]] = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> } [[TMP33]], <32 x i8> [[_MSPROP_SELECT1]], 2
@@ -388,7 +388,7 @@ define { <32 x i8>, <32 x i8>, <32 x i8> } @test_vgf2p8affineqb_256(<32 x i8> %s
%4 = call <32 x i8> @llvm.x86.vgf2p8affineqb.256(<32 x i8> %src1, <32 x i8> %src2, i8 5)
%5 = select <32 x i1> %1, <32 x i8> %3, <32 x i8> zeroinitializer
%6 = select <32 x i1> %1, <32 x i8> %4, <32 x i8> %passthru
- %7 = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> } undef, <32 x i8> %2, 0
+ %7 = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> } poison, <32 x i8> %2, 0
%8 = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> } %7, <32 x i8> %5, 1
%9 = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> } %8, <32 x i8> %6, 2
ret { <32 x i8>, <32 x i8>, <32 x i8> } %9
@@ -449,7 +449,7 @@ define { <64 x i8>, <64 x i8>, <64 x i8> } @test_vgf2p8affineqb_512(<64 x i8> %s
; CHECK-NEXT: [[TMP30:%.*]] = or <64 x i8> [[TMP29]], [[TMP4]]
; CHECK-NEXT: [[_MSPROP_SELECT1:%.*]] = select <64 x i1> [[TMP5]], <64 x i8> [[TMP30]], <64 x i8> [[TMP27]]
; CHECK-NEXT: [[TMP31:%.*]] = select <64 x i1> [[TMP6]], <64 x i8> [[TMP21]], <64 x i8> [[PASSTHRU]]
-; CHECK-NEXT: [[TMP32:%.*]] = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } undef, <64 x i8> [[TMP11]], 0
+; CHECK-NEXT: [[TMP32:%.*]] = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } poison, <64 x i8> [[TMP11]], 0
; CHECK-NEXT: [[TMP33:%.*]] = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } { <64 x i8> zeroinitializer, <64 x i8> splat (i8 -1), <64 x i8> splat (i8 -1) }, <64 x i8> [[_MSPROP_SELECT]], 1
; CHECK-NEXT: [[TMP34:%.*]] = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } [[TMP32]], <64 x i8> [[TMP26]], 1
; CHECK-NEXT: [[TMP35:%.*]] = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } [[TMP33]], <64 x i8> [[_MSPROP_SELECT1]], 2
@@ -463,7 +463,7 @@ define { <64 x i8>, <64 x i8>, <64 x i8> } @test_vgf2p8affineqb_512(<64 x i8> %s
%4 = call <64 x i8> @llvm.x86.vgf2p8affineqb.512(<64 x i8> %src1, <64 x i8> %src2, i8 5)
%5 = select <64 x i1> %1, <64 x i8> %3, <64 x i8> zeroinitializer
%6 = select <64 x i1> %1, <64 x i8> %4, <64 x i8> %passthru
- %7 = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } undef, <64 x i8> %2, 0
+ %7 = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } poison, <64 x i8> %2, 0
%8 = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } %7, <64 x i8> %5, 1
%9 = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } %8, <64 x i8> %6, 2
ret { <64 x i8>, <64 x i8>, <64 x i8> } %9
More information about the llvm-commits
mailing list