[llvm] cfb3ee2 - [DAG] Add non-uniform vector support to (shl (srl x, c1), c2) -> (and (shift x, c3))
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 13 03:37:43 PDT 2022
Author: Simon Pilgrim
Date: 2022-04-13T11:37:33+01:00
New Revision: cfb3ee21857da7c3d96f89afc3fecb0ec833dde9
URL: https://github.com/llvm/llvm-project/commit/cfb3ee21857da7c3d96f89afc3fecb0ec833dde9
DIFF: https://github.com/llvm/llvm-project/commit/cfb3ee21857da7c3d96f89afc3fecb0ec833dde9.diff
LOG: [DAG] Add non-uniform vector support to (shl (srl x, c1), c2) -> (and (shift x, c3))
Another part of D77804 yak shaving
Differential Revision: https://reviews.llvm.org/D123523
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/test/CodeGen/X86/combine-shl.ll
llvm/test/CodeGen/X86/rotate-extract-vector.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 88f68338ffa22..700a66c68168b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -8887,10 +8887,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
}
}
- // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2
- // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 >= C2
- if ((N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
- N0->getFlags().hasExact()) {
+ if (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) {
auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
ConstantSDNode *RHS) {
const APInt &LHSC = LHS->getAPIntValue();
@@ -8898,54 +8895,55 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
LHSC.getZExtValue() <= RHSC.getZExtValue();
};
- if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
- /*AllowUndefs*/ false,
- /*AllowTypeMismatch*/ true)) {
- SDLoc DL(N);
- SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
- SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
- return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
- }
- if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
- /*AllowUndefs*/ false,
- /*AllowTypeMismatch*/ true)) {
- SDLoc DL(N);
- SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
- SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
- return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Diff);
- }
- }
- ConstantSDNode *N1C = isConstOrConstSplat(N1);
+ SDLoc DL(N);
- // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
- // (and (srl x, (sub c1, c2), MASK)
- // Only fold this if the inner shift has no other uses -- if it does, folding
- // this will increase the total number of instructions.
- // TODO - drop hasOneUse requirement if c1 == c2?
- // TODO - support non-uniform vector shift amounts.
- if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
- TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
- if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
- if (N0C1->getAPIntValue().ult(OpSizeInBits)) {
- uint64_t c1 = N0C1->getZExtValue();
- uint64_t c2 = N1C->getZExtValue();
- APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
- SDValue Shift;
- if (c2 > c1) {
- Mask <<= c2 - c1;
- SDLoc DL(N);
- Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
- DAG.getConstant(c2 - c1, DL, ShiftVT));
- } else {
- Mask.lshrInPlace(c1 - c2);
- SDLoc DL(N);
- Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
- DAG.getConstant(c1 - c2, DL, ShiftVT));
- }
- SDLoc DL(N0);
- return DAG.getNode(ISD::AND, DL, VT, Shift,
- DAG.getConstant(Mask, DL, VT));
+ // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2
+ // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 >= C2
+ if (N0->getFlags().hasExact()) {
+ if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
+ /*AllowUndefs*/ false,
+ /*AllowTypeMismatch*/ true)) {
+ SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
+ SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
+ return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
+ }
+ if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
+ /*AllowUndefs*/ false,
+ /*AllowTypeMismatch*/ true)) {
+ SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
+ SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
+ return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Diff);
+ }
+ }
+
+ // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
+ // (and (srl x, (sub c1, c2), MASK)
+ // Only fold this if the inner shift has no other uses -- if it does,
+ // folding this will increase the total number of instructions.
+ // TODO - drop hasOneUse requirement if c1 == c2?
+ if (N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
+ TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
+ if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
+ /*AllowUndefs*/ false,
+ /*AllowTypeMismatch*/ true)) {
+ SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
+ SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
+ SDValue Mask = DAG.getAllOnesConstant(DL, VT);
+ Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N01);
+ Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, Diff);
+ SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
+ return DAG.getNode(ISD::AND, SDLoc(N0), VT, Shift, Mask);
+ }
+ if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
+ /*AllowUndefs*/ false,
+ /*AllowTypeMismatch*/ true)) {
+ SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
+ SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
+ SDValue Mask = DAG.getAllOnesConstant(DL, VT);
+ Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N1);
+ SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
+ return DAG.getNode(ISD::AND, SDLoc(N0), VT, Shift, Mask);
}
}
}
@@ -8984,6 +8982,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
}
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (N1C && !N1C->isOpaque())
if (SDValue NewSHL = visitShiftByConstant(N))
return NewSHL;
diff --git a/llvm/test/CodeGen/X86/combine-shl.ll b/llvm/test/CodeGen/X86/combine-shl.ll
index 5d7d9730f1252..2fc75062bfe8d 100644
--- a/llvm/test/CodeGen/X86/combine-shl.ll
+++ b/llvm/test/CodeGen/X86/combine-shl.ll
@@ -368,9 +368,8 @@ define <8 x i32> @combine_vec_shl_zext_lshr1(<8 x i16> %x) {
; SSE2-LABEL: combine_vec_shl_zext_lshr1:
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE2-NEXT: pxor %xmm2, %xmm2
-; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE2-NEXT: movdqa %xmm1, %xmm0
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
@@ -379,22 +378,20 @@ define <8 x i32> @combine_vec_shl_zext_lshr1(<8 x i16> %x) {
; SSE41-LABEL: combine_vec_shl_zext_lshr1:
; SSE41: # %bb.0:
; SSE41-NEXT: movdqa %xmm0, %xmm1
-; SSE41-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE41-NEXT: pxor %xmm2, %xmm2
-; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
; SSE41-NEXT: retq
;
; AVX-LABEL: combine_vec_shl_zext_lshr1:
; AVX: # %bb.0:
-; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX-NEXT: retq
- %1 = lshr <8 x i16> %x, <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>
+ %1 = lshr <8 x i16> %x, <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 15>
%2 = zext <8 x i16> %1 to <8 x i32>
- %3 = shl <8 x i32> %2, <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
+ %3 = shl <8 x i32> %2, <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 15>
ret <8 x i32> %3
}
@@ -509,8 +506,8 @@ define <4 x i32> @combine_vec_shl_gt_lshr0(<4 x i32> %x) {
;
; AVX-LABEL: combine_vec_shl_gt_lshr0:
; AVX: # %bb.0:
-; AVX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294967264,4294967264,4294967264,4294967264]
; AVX-NEXT: vpslld $2, %xmm0, %xmm0
+; AVX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294967264,4294967264,4294967264,4294967264]
; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%1 = lshr <4 x i32> %x, <i32 3, i32 3, i32 3, i32 3>
@@ -519,46 +516,19 @@ define <4 x i32> @combine_vec_shl_gt_lshr0(<4 x i32> %x) {
}
define <4 x i32> @combine_vec_shl_gt_lshr1(<4 x i32> %x) {
-; SSE2-LABEL: combine_vec_shl_gt_lshr1:
-; SSE2: # %bb.0:
-; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: psrld $3, %xmm1
-; SSE2-NEXT: movdqa %xmm0, %xmm2
-; SSE2-NEXT: psrld $5, %xmm2
-; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
-; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: psrld $8, %xmm1
-; SSE2-NEXT: psrld $4, %xmm0
-; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[3,3]
-; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,2,2,3]
-; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
-; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE2-NEXT: retq
-;
-; SSE41-LABEL: combine_vec_shl_gt_lshr1:
-; SSE41: # %bb.0:
-; SSE41-NEXT: movdqa %xmm0, %xmm1
-; SSE41-NEXT: psrld $8, %xmm1
-; SSE41-NEXT: movdqa %xmm0, %xmm2
-; SSE41-NEXT: psrld $4, %xmm2
-; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7]
-; SSE41-NEXT: movdqa %xmm0, %xmm1
-; SSE41-NEXT: psrld $5, %xmm1
-; SSE41-NEXT: psrld $3, %xmm0
-; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
-; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
-; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE41-NEXT: retq
+; SSE-LABEL: combine_vec_shl_gt_lshr1:
+; SSE: # %bb.0:
+; SSE-NEXT: pslld $2, %xmm0
+; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE-NEXT: retq
;
; AVX-LABEL: combine_vec_shl_gt_lshr1:
; AVX: # %bb.0:
-; AVX-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: vpslld $2, %xmm0, %xmm0
+; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX-NEXT: retq
- %1 = lshr <4 x i32> %x, <i32 3, i32 4, i32 5, i32 8>
- %2 = shl <4 x i32> %1, <i32 5, i32 6, i32 7, i32 8>
+ %1 = lshr <4 x i32> %x, <i32 3, i32 4, i32 5, i32 29>
+ %2 = shl <4 x i32> %1, <i32 5, i32 6, i32 7, i32 31>
ret <4 x i32> %2
}
@@ -572,8 +542,8 @@ define <4 x i32> @combine_vec_shl_le_lshr0(<4 x i32> %x) {
;
; AVX-LABEL: combine_vec_shl_le_lshr0:
; AVX: # %bb.0:
-; AVX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1073741816,1073741816,1073741816,1073741816]
; AVX-NEXT: vpsrld $2, %xmm0, %xmm0
+; AVX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1073741816,1073741816,1073741816,1073741816]
; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%1 = lshr <4 x i32> %x, <i32 5, i32 5, i32 5, i32 5>
@@ -585,40 +555,25 @@ define <4 x i32> @combine_vec_shl_le_lshr1(<4 x i32> %x) {
; SSE2-LABEL: combine_vec_shl_le_lshr1:
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: psrld $5, %xmm1
-; SSE2-NEXT: movdqa %xmm0, %xmm2
-; SSE2-NEXT: psrld $7, %xmm2
-; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
-; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: psrld $8, %xmm1
-; SSE2-NEXT: psrld $6, %xmm0
-; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[3,3]
-; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,2,2,3]
-; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
-; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT: psrld $2, %xmm1
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
+; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
+; SSE2-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: combine_vec_shl_le_lshr1:
; SSE41: # %bb.0:
; SSE41-NEXT: movdqa %xmm0, %xmm1
-; SSE41-NEXT: psrld $8, %xmm1
-; SSE41-NEXT: movdqa %xmm0, %xmm2
-; SSE41-NEXT: psrld $6, %xmm2
-; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7]
-; SSE41-NEXT: movdqa %xmm0, %xmm1
-; SSE41-NEXT: psrld $7, %xmm1
-; SSE41-NEXT: psrld $5, %xmm0
-; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
-; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
-; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE41-NEXT: psrld $2, %xmm1
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7]
+; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: combine_vec_shl_le_lshr1:
; AVX: # %bb.0:
; AVX-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX-NEXT: retq
%1 = lshr <4 x i32> %x, <i32 5, i32 6, i32 7, i32 8>
%2 = shl <4 x i32> %1, <i32 3, i32 4, i32 5, i32 8>
diff --git a/llvm/test/CodeGen/X86/rotate-extract-vector.ll b/llvm/test/CodeGen/X86/rotate-extract-vector.ll
index 5f0d1500caf5a..df9be9a35e2eb 100644
--- a/llvm/test/CodeGen/X86/rotate-extract-vector.ll
+++ b/llvm/test/CodeGen/X86/rotate-extract-vector.ll
@@ -172,9 +172,9 @@ define <4 x i64> @no_extract_shl(<4 x i64> %i) nounwind {
define <4 x i32> @no_extract_shrl(<4 x i32> %i) nounwind {
; CHECK-LABEL: no_extract_shrl:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4026531840,4026531840,4026531840,4026531840]
-; CHECK-NEXT: vpslld $25, %xmm0, %xmm2
-; CHECK-NEXT: vpand %xmm1, %xmm2, %xmm1
+; CHECK-NEXT: vpslld $25, %xmm0, %xmm1
+; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm2 = [4026531840,4026531840,4026531840,4026531840]
+; CHECK-NEXT: vpand %xmm2, %xmm1, %xmm1
; CHECK-NEXT: vpsrld $9, %xmm0, %xmm0
; CHECK-NEXT: vpor %xmm0, %xmm1, %xmm0
; CHECK-NEXT: ret{{[l|q]}}
More information about the llvm-commits
mailing list