[llvm] [CodeGen] Remove checks for vectors in unsigned division prior to computing leading zeros (PR #99524)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 18 12:30:35 PDT 2024
https://github.com/AtariDreams updated https://github.com/llvm/llvm-project/pull/99524
>From 599119ebc220a228b6673958bd677ab0c1297fbe Mon Sep 17 00:00:00 2001
From: Rose <gfunni234 at gmail.com>
Date: Thu, 18 Jul 2024 15:07:50 -0400
Subject: [PATCH 1/2] Pre-commit test (NFC)
---
llvm/test/CodeGen/X86/combine-udiv.ll | 82 +++++++++++++++++++++++++++
1 file changed, 82 insertions(+)
diff --git a/llvm/test/CodeGen/X86/combine-udiv.ll b/llvm/test/CodeGen/X86/combine-udiv.ll
index e429ac0c63c2d..9ae025f677ef2 100644
--- a/llvm/test/CodeGen/X86/combine-udiv.ll
+++ b/llvm/test/CodeGen/X86/combine-udiv.ll
@@ -755,3 +755,85 @@ define <4 x i1> @boolvec_udiv(<4 x i1> %x, <4 x i1> %y) {
%r = udiv <4 x i1> %x, %y
ret <4 x i1> %r
}
+
+define <4 x i32> @vector_div_leading_zeros(<4 x i32> %x) {
+; SSE2-LABEL: vector_div_leading_zeros:
+; SSE2: # %bb.0:
+; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [613566757,613566757,613566757,613566757]
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: pmuludq %xmm1, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
+; SSE2-NEXT: pmuludq %xmm1, %xmm3
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSE2-NEXT: psubd %xmm2, %xmm0
+; SSE2-NEXT: psrld $1, %xmm0
+; SSE2-NEXT: paddd %xmm2, %xmm0
+; SSE2-NEXT: psrld $2, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: vector_div_leading_zeros:
+; SSE41: # %bb.0:
+; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [613566757,613566757,613566757,613566757]
+; SSE41-NEXT: pmuludq %xmm2, %xmm1
+; SSE41-NEXT: pmuludq %xmm0, %xmm2
+; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
+; SSE41-NEXT: psubd %xmm2, %xmm0
+; SSE41-NEXT: psrld $1, %xmm0
+; SSE41-NEXT: paddd %xmm2, %xmm0
+; SSE41-NEXT: psrld $2, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: vector_div_leading_zeros:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [613566757,613566757,613566757,613566757]
+; AVX1-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpmuludq %xmm2, %xmm0, %xmm2
+; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
+; AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpsrld $1, %xmm0, %xmm0
+; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpsrld $2, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: vector_div_leading_zeros:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [613566757,613566757,613566757,613566757]
+; AVX2-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vpmuludq %xmm2, %xmm0, %xmm2
+; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
+; AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpsrld $1, %xmm0, %xmm0
+; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpsrld $2, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; XOP-LABEL: vector_div_leading_zeros:
+; XOP: # %bb.0:
+; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; XOP-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; XOP-NEXT: vbroadcastss {{.*#+}} xmm2 = [613566757,613566757,613566757,613566757]
+; XOP-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
+; XOP-NEXT: vpmuludq %xmm2, %xmm0, %xmm2
+; XOP-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; XOP-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
+; XOP-NEXT: vpsubd %xmm1, %xmm0, %xmm0
+; XOP-NEXT: vpsrld $1, %xmm0, %xmm0
+; XOP-NEXT: vpaddd %xmm1, %xmm0, %xmm0
+; XOP-NEXT: vpsrld $2, %xmm0, %xmm0
+; XOP-NEXT: retq
+ %a = and <4 x i32> %x, <i32 255, i32 255, i32 255, i32 255>
+ %b = udiv <4 x i32> %a, <i32 7, i32 7, i32 7, i32 7>
+ ret <4 x i32> %b
+}
>From a795ccf9bd4e342734751c25a2e2c5d2900e4611 Mon Sep 17 00:00:00 2001
From: Rose <gfunni234 at gmail.com>
Date: Thu, 18 Jul 2024 12:23:13 -0400
Subject: [PATCH 2/2] [CodeGen] Remove checks for vectors in unsigned division
prior to computing leading zeros
It turns out we can safely use DAG.computeKnownBits(N0).countMinLeadingZeros() with constant legal vectors, so remove the check for it.
---
.../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 4 +-
.../CodeGen/SelectionDAG/TargetLowering.cpp | 13 +----
llvm/test/CodeGen/X86/combine-udiv.ll | 55 ++++++-------------
...of-two-or-zero-when-comparing-with-zero.ll | 9 +--
4 files changed, 26 insertions(+), 55 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index dfc3d73e322b8..a510f40decb84 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -5180,7 +5180,9 @@ MachineInstr *CombinerHelper::buildUDivUsingMul(MachineInstr &MI) {
LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType();
unsigned KnownLeadingZeros =
- KB ? KB->getKnownBits(LHS).countMinLeadingZeros() : 0;
+ (!MI.getFlag(MachineInstr::MIFlag::IsExact) && KB)
+ ? KB->getKnownBits(LHS).countMinLeadingZeros()
+ : 0;
auto &MIB = Builder;
bool UseSRL = false;
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index c3a20b5044c5f..140c97ccd90ba 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -6483,15 +6483,7 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
// Try to use leading zeros of the dividend to reduce the multiplier and
// avoid expensive fixups.
- // TODO: Support vectors.
- unsigned LeadingZeros = 0;
- if (!VT.isVector() && isa<ConstantSDNode>(N1)) {
- assert(!isOneConstant(N1) && "Unexpected divisor");
- LeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros();
- // UnsignedDivisionByConstantInfo doesn't work correctly if leading zeros in
- // the dividend exceeds the leading zeros for the divisor.
- LeadingZeros = std::min(LeadingZeros, N1->getAsAPIntVal().countl_zero());
- }
+ unsigned KnownLeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros();
bool UseNPQ = false, UsePreShift = false, UsePostShift = false;
SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
@@ -6510,7 +6502,8 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
MagicFactor = NPQFactor = DAG.getUNDEF(SVT);
} else {
UnsignedDivisionByConstantInfo magics =
- UnsignedDivisionByConstantInfo::get(Divisor, LeadingZeros);
+ UnsignedDivisionByConstantInfo::get(
+ Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()));
MagicFactor = DAG.getConstant(magics.Magic, dl, SVT);
diff --git a/llvm/test/CodeGen/X86/combine-udiv.ll b/llvm/test/CodeGen/X86/combine-udiv.ll
index 9ae025f677ef2..d5a481549f851 100644
--- a/llvm/test/CodeGen/X86/combine-udiv.ll
+++ b/llvm/test/CodeGen/X86/combine-udiv.ll
@@ -761,17 +761,12 @@ define <4 x i32> @vector_div_leading_zeros(<4 x i32> %x) {
; SSE2: # %bb.0:
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [613566757,613566757,613566757,613566757]
-; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
+; SSE2-NEXT: pmuludq %xmm1, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
; SSE2-NEXT: pmuludq %xmm1, %xmm2
-; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
-; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
-; SSE2-NEXT: pmuludq %xmm1, %xmm3
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3]
-; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; SSE2-NEXT: psubd %xmm2, %xmm0
-; SSE2-NEXT: psrld $1, %xmm0
-; SSE2-NEXT: paddd %xmm2, %xmm0
-; SSE2-NEXT: psrld $2, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,3,2,3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: retq
;
; SSE41-LABEL: vector_div_leading_zeros:
@@ -780,13 +775,9 @@ define <4 x i32> @vector_div_leading_zeros(<4 x i32> %x) {
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [613566757,613566757,613566757,613566757]
; SSE41-NEXT: pmuludq %xmm2, %xmm1
-; SSE41-NEXT: pmuludq %xmm0, %xmm2
-; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
-; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
-; SSE41-NEXT: psubd %xmm2, %xmm0
-; SSE41-NEXT: psrld $1, %xmm0
-; SSE41-NEXT: paddd %xmm2, %xmm0
-; SSE41-NEXT: psrld $2, %xmm0
+; SSE41-NEXT: pmuludq %xmm2, %xmm0
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
; SSE41-NEXT: retq
;
; AVX1-LABEL: vector_div_leading_zeros:
@@ -795,13 +786,9 @@ define <4 x i32> @vector_div_leading_zeros(<4 x i32> %x) {
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [613566757,613566757,613566757,613566757]
; AVX1-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vpmuludq %xmm2, %xmm0, %xmm2
-; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
-; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
-; AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpsrld $1, %xmm0, %xmm0
-; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpsrld $2, %xmm0, %xmm0
+; AVX1-NEXT: vpmuludq %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
; AVX1-NEXT: retq
;
; AVX2-LABEL: vector_div_leading_zeros:
@@ -810,13 +797,9 @@ define <4 x i32> @vector_div_leading_zeros(<4 x i32> %x) {
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [613566757,613566757,613566757,613566757]
; AVX2-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
-; AVX2-NEXT: vpmuludq %xmm2, %xmm0, %xmm2
-; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
-; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
-; AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpsrld $1, %xmm0, %xmm0
-; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpsrld $2, %xmm0, %xmm0
+; AVX2-NEXT: vpmuludq %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
; AVX2-NEXT: retq
;
; XOP-LABEL: vector_div_leading_zeros:
@@ -825,13 +808,9 @@ define <4 x i32> @vector_div_leading_zeros(<4 x i32> %x) {
; XOP-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
; XOP-NEXT: vbroadcastss {{.*#+}} xmm2 = [613566757,613566757,613566757,613566757]
; XOP-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
-; XOP-NEXT: vpmuludq %xmm2, %xmm0, %xmm2
-; XOP-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
-; XOP-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
-; XOP-NEXT: vpsubd %xmm1, %xmm0, %xmm0
-; XOP-NEXT: vpsrld $1, %xmm0, %xmm0
-; XOP-NEXT: vpaddd %xmm1, %xmm0, %xmm0
-; XOP-NEXT: vpsrld $2, %xmm0, %xmm0
+; XOP-NEXT: vpmuludq %xmm2, %xmm0, %xmm0
+; XOP-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; XOP-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
; XOP-NEXT: retq
%a = and <4 x i32> %x, <i32 255, i32 255, i32 255, i32 255>
%b = udiv <4 x i32> %a, <i32 7, i32 7, i32 7, i32 7>
diff --git a/llvm/test/CodeGen/X86/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll b/llvm/test/CodeGen/X86/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll
index 886c3ae10324d..9e398096bfcc5 100644
--- a/llvm/test/CodeGen/X86/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll
+++ b/llvm/test/CodeGen/X86/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll
@@ -230,7 +230,7 @@ define <4 x i1> @p7_vector_urem_by_const__nonsplat_undef2(<4 x i32> %x, <4 x i32
; SSE2-LABEL: p7_vector_urem_by_const__nonsplat_undef2:
; SSE2: # %bb.0:
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2863311531,2863311531,2863311531,2863311531]
+; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [715827883,715827883,715827883,715827883]
; SSE2-NEXT: movdqa %xmm0, %xmm2
; SSE2-NEXT: pmuludq %xmm1, %xmm2
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
@@ -238,7 +238,6 @@ define <4 x i1> @p7_vector_urem_by_const__nonsplat_undef2(<4 x i32> %x, <4 x i32
; SSE2-NEXT: pmuludq %xmm1, %xmm3
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3]
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; SSE2-NEXT: psrld $2, %xmm2
; SSE2-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [6,0,6,0,6,0,6,0]
; SSE2-NEXT: psubd %xmm2, %xmm0
; SSE2-NEXT: pxor %xmm1, %xmm1
@@ -249,12 +248,11 @@ define <4 x i1> @p7_vector_urem_by_const__nonsplat_undef2(<4 x i32> %x, <4 x i32
; SSE4: # %bb.0:
; SSE4-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE4-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; SSE4-NEXT: movdqa {{.*#+}} xmm2 = [2863311531,2863311531,2863311531,2863311531]
+; SSE4-NEXT: movdqa {{.*#+}} xmm2 = [715827883,715827883,715827883,715827883]
; SSE4-NEXT: pmuludq %xmm2, %xmm1
; SSE4-NEXT: pmuludq %xmm0, %xmm2
; SSE4-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
; SSE4-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
-; SSE4-NEXT: psrld $2, %xmm2
; SSE4-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [6,0,6,0,6,0,6,0]
; SSE4-NEXT: psubd %xmm2, %xmm0
; SSE4-NEXT: pxor %xmm1, %xmm1
@@ -266,12 +264,11 @@ define <4 x i1> @p7_vector_urem_by_const__nonsplat_undef2(<4 x i32> %x, <4 x i32
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [128,128,128,128]
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2863311531,2863311531,2863311531,2863311531]
+; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [715827883,715827883,715827883,715827883]
; AVX2-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vpmuludq %xmm2, %xmm0, %xmm2
; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
-; AVX2-NEXT: vpsrld $2, %xmm1, %xmm1
; AVX2-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [6,0,6,0,6,0,6,0]
; AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
More information about the llvm-commits
mailing list