[llvm] [DAG] Peek through bitcasts when canonicalizing constants to the RHS on commutable instructions (PR #112682)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 17 02:25:36 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-selectiondag
Author: Simon Pilgrim (RKSimon)
<details>
<summary>Changes</summary>
X86 is the beneficiary here as it often bitcasts rematerializable 0/-1 vector constants as vXi32 and bitcasts to the requested type
Minor cleanup that helps with #<!-- -->107423
---
Patch is 50.23 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/112682.diff
8 Files Affected:
- (modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+32-32)
- (modified) llvm/test/CodeGen/X86/avx2-arith.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/midpoint-int-vec-128.ll (+25-25)
- (modified) llvm/test/CodeGen/X86/midpoint-int-vec-256.ll (+30-30)
- (modified) llvm/test/CodeGen/X86/min-legal-vector-width.ll (+9-9)
- (modified) llvm/test/CodeGen/X86/pmul.ll (+29-33)
- (modified) llvm/test/CodeGen/X86/prefer-avx256-wide-mul.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/vector-shuffle-combining-sse41.ll (+3-3)
``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index ca91d35573c3ec..b09d33eb20296a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2668,8 +2668,8 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
return C;
// canonicalize constant to RHS
- if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
- !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N0)) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N1)))
return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
if (areBitwiseNotOfEachother(N0, N1))
@@ -3048,8 +3048,8 @@ SDValue DAGCombiner::visitADDSAT(SDNode *N) {
return C;
// canonicalize constant to RHS
- if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
- !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N0)) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N1)))
return DAG.getNode(Opcode, DL, VT, N1, N0);
// fold vector ops
@@ -3306,8 +3306,8 @@ SDValue DAGCombiner::visitADDO(SDNode *N) {
DAG.getUNDEF(CarryVT));
// canonicalize constant to RHS.
- if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
- !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N0)) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N1)))
return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
// fold (addo x, 0) -> x + no carry out
@@ -4381,8 +4381,8 @@ SDValue DAGCombiner::visitMULFIX(SDNode *N) {
return DAG.getConstant(0, SDLoc(N), VT);
// Canonicalize constant to RHS (vector doesn't have to splat)
- if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
- !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N0)) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N1)))
return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
// fold (mulfix x, 0, scale) -> 0
@@ -4410,8 +4410,8 @@ template <class MatchContextClass> SDValue DAGCombiner::visitMUL(SDNode *N) {
return C;
// canonicalize constant to RHS (vector doesn't have to splat)
- if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
- !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N0)) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N1)))
return Matcher.getNode(ISD::MUL, DL, VT, N1, N0);
bool N1IsConst = false;
@@ -5156,8 +5156,8 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {
return C;
// canonicalize constant to RHS.
- if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
- !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N0)) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N1)))
return DAG.getNode(ISD::MULHS, DL, N->getVTList(), N1, N0);
if (VT.isVector()) {
@@ -5215,8 +5215,8 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
return C;
// canonicalize constant to RHS.
- if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
- !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N0)) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N1)))
return DAG.getNode(ISD::MULHU, DL, N->getVTList(), N1, N0);
if (VT.isVector()) {
@@ -5293,8 +5293,8 @@ SDValue DAGCombiner::visitAVG(SDNode *N) {
return C;
// canonicalize constant to RHS.
- if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
- !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N0)) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N1)))
return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
if (VT.isVector())
@@ -5367,8 +5367,8 @@ SDValue DAGCombiner::visitABD(SDNode *N) {
return C;
// canonicalize constant to RHS.
- if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
- !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N0)) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N1)))
return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
if (VT.isVector())
@@ -5465,8 +5465,8 @@ SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
return DAG.getNode(ISD::SMUL_LOHI, DL, N->getVTList(), N0, N1);
// canonicalize constant to RHS (vector doesn't have to splat)
- if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
- !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N0)) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N1)))
return DAG.getNode(ISD::SMUL_LOHI, DL, N->getVTList(), N1, N0);
// If the type is twice as wide is legal, transform the mulhu to a wider
@@ -5506,8 +5506,8 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
return DAG.getNode(ISD::UMUL_LOHI, DL, N->getVTList(), N0, N1);
// canonicalize constant to RHS (vector doesn't have to splat)
- if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
- !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N0)) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N1)))
return DAG.getNode(ISD::UMUL_LOHI, DL, N->getVTList(), N1, N0);
// (umul_lohi N0, 0) -> (0, 0)
@@ -5570,8 +5570,8 @@ SDValue DAGCombiner::visitMULO(SDNode *N) {
}
// canonicalize constant to RHS.
- if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
- !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N0)) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N1)))
return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
// fold (mulo x, 0) -> 0 + no carry out
@@ -5784,8 +5784,8 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
return N0;
// canonicalize constant to RHS
- if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
- !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N0)) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N1)))
return DAG.getNode(Opcode, DL, VT, N1, N0);
// fold vector ops
@@ -7048,8 +7048,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
return C;
// canonicalize constant to RHS
- if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
- !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N0)) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N1)))
return DAG.getNode(ISD::AND, DL, VT, N1, N0);
if (areBitwiseNotOfEachother(N0, N1))
@@ -7945,8 +7945,8 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
return C;
// canonicalize constant to RHS
- if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
- !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N0)) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N1)))
return DAG.getNode(ISD::OR, DL, VT, N1, N0);
// fold vector ops
@@ -9501,8 +9501,8 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
return C;
// canonicalize constant to RHS
- if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
- !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N0)) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N1)))
return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
// fold vector ops
diff --git a/llvm/test/CodeGen/X86/avx2-arith.ll b/llvm/test/CodeGen/X86/avx2-arith.ll
index 90733dfb8465ef..44ab33ad67f272 100644
--- a/llvm/test/CodeGen/X86/avx2-arith.ll
+++ b/llvm/test/CodeGen/X86/avx2-arith.ll
@@ -122,7 +122,7 @@ define <32 x i8> @mul_v32i8(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
; CHECK-LABEL: mul_v32i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vpbroadcastw {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; CHECK-NEXT: vpand %ymm1, %ymm2, %ymm3
+; CHECK-NEXT: vpand %ymm2, %ymm1, %ymm3
; CHECK-NEXT: vpmaddubsw %ymm3, %ymm0, %ymm3
; CHECK-NEXT: vpand %ymm2, %ymm3, %ymm3
; CHECK-NEXT: vpandn %ymm1, %ymm2, %ymm1
diff --git a/llvm/test/CodeGen/X86/midpoint-int-vec-128.ll b/llvm/test/CodeGen/X86/midpoint-int-vec-128.ll
index 6fd3db3464decb..ee83a79b6dd550 100644
--- a/llvm/test/CodeGen/X86/midpoint-int-vec-128.ll
+++ b/llvm/test/CodeGen/X86/midpoint-int-vec-128.ll
@@ -2369,8 +2369,8 @@ define <16 x i8> @vec128_i8_signed_reg_reg(<16 x i8> %a1, <16 x i8> %a2) nounwin
; SSE41-NEXT: psrlw $1, %xmm1
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
-; SSE41-NEXT: movdqa %xmm3, %xmm4
-; SSE41-NEXT: pand %xmm2, %xmm4
+; SSE41-NEXT: movdqa %xmm2, %xmm4
+; SSE41-NEXT: pand %xmm3, %xmm4
; SSE41-NEXT: movdqa %xmm1, %xmm5
; SSE41-NEXT: pmaddubsw %xmm4, %xmm5
; SSE41-NEXT: pand %xmm3, %xmm5
@@ -2391,7 +2391,7 @@ define <16 x i8> @vec128_i8_signed_reg_reg(<16 x i8> %a1, <16 x i8> %a2) nounwin
; AVX1-NEXT: vpsrlw $1, %xmm1, %xmm1
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
-; AVX1-NEXT: vpand %xmm2, %xmm3, %xmm4
+; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm4
; AVX1-NEXT: vpmaddubsw %xmm4, %xmm1, %xmm4
; AVX1-NEXT: vpand %xmm3, %xmm4, %xmm4
; AVX1-NEXT: vpandn %xmm2, %xmm3, %xmm2
@@ -2432,7 +2432,7 @@ define <16 x i8> @vec128_i8_signed_reg_reg(<16 x i8> %a1, <16 x i8> %a2) nounwin
; XOP-FALLBACK-NEXT: vbroadcastss {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
; XOP-FALLBACK-NEXT: vpandn %xmm2, %xmm3, %xmm4
; XOP-FALLBACK-NEXT: vpmaddubsw %xmm4, %xmm1, %xmm4
-; XOP-FALLBACK-NEXT: vpand %xmm2, %xmm3, %xmm2
+; XOP-FALLBACK-NEXT: vpand %xmm3, %xmm2, %xmm2
; XOP-FALLBACK-NEXT: vpmaddubsw %xmm2, %xmm1, %xmm1
; XOP-FALLBACK-NEXT: vpperm {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[2],xmm4[2],xmm1[4],xmm4[4],xmm1[6],xmm4[6],xmm1[8],xmm4[8],xmm1[10],xmm4[10],xmm1[12],xmm4[12],xmm1[14],xmm4[14]
; XOP-FALLBACK-NEXT: vpaddb %xmm0, %xmm1, %xmm0
@@ -2450,7 +2450,7 @@ define <16 x i8> @vec128_i8_signed_reg_reg(<16 x i8> %a1, <16 x i8> %a2) nounwin
; XOPAVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
; XOPAVX1-NEXT: vpandn %xmm2, %xmm3, %xmm4
; XOPAVX1-NEXT: vpmaddubsw %xmm4, %xmm1, %xmm4
-; XOPAVX1-NEXT: vpand %xmm2, %xmm3, %xmm2
+; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
; XOPAVX1-NEXT: vpmaddubsw %xmm2, %xmm1, %xmm1
; XOPAVX1-NEXT: vpperm {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[2],xmm4[2],xmm1[4],xmm4[4],xmm1[6],xmm4[6],xmm1[8],xmm4[8],xmm1[10],xmm4[10],xmm1[12],xmm4[12],xmm1[14],xmm4[14]
; XOPAVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0
@@ -2592,8 +2592,8 @@ define <16 x i8> @vec128_i8_unsigned_reg_reg(<16 x i8> %a1, <16 x i8> %a2) nounw
; SSE41-NEXT: psrlw $1, %xmm1
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
-; SSE41-NEXT: movdqa %xmm2, %xmm3
-; SSE41-NEXT: pand %xmm4, %xmm3
+; SSE41-NEXT: movdqa %xmm4, %xmm3
+; SSE41-NEXT: pand %xmm2, %xmm3
; SSE41-NEXT: movdqa %xmm1, %xmm5
; SSE41-NEXT: pmaddubsw %xmm3, %xmm5
; SSE41-NEXT: pand %xmm2, %xmm5
@@ -2616,7 +2616,7 @@ define <16 x i8> @vec128_i8_unsigned_reg_reg(<16 x i8> %a1, <16 x i8> %a2) nounw
; AVX1-NEXT: vpsrlw $1, %xmm1, %xmm1
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
-; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm4
+; AVX1-NEXT: vpand %xmm2, %xmm3, %xmm4
; AVX1-NEXT: vpmaddubsw %xmm4, %xmm1, %xmm4
; AVX1-NEXT: vpand %xmm2, %xmm4, %xmm4
; AVX1-NEXT: vpandn %xmm3, %xmm2, %xmm2
@@ -2659,7 +2659,7 @@ define <16 x i8> @vec128_i8_unsigned_reg_reg(<16 x i8> %a1, <16 x i8> %a2) nounw
; XOP-FALLBACK-NEXT: vbroadcastss {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
; XOP-FALLBACK-NEXT: vpandn %xmm2, %xmm3, %xmm4
; XOP-FALLBACK-NEXT: vpmaddubsw %xmm4, %xmm1, %xmm4
-; XOP-FALLBACK-NEXT: vpand %xmm2, %xmm3, %xmm2
+; XOP-FALLBACK-NEXT: vpand %xmm3, %xmm2, %xmm2
; XOP-FALLBACK-NEXT: vpmaddubsw %xmm2, %xmm1, %xmm1
; XOP-FALLBACK-NEXT: vpperm {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[2],xmm4[2],xmm1[4],xmm4[4],xmm1[6],xmm4[6],xmm1[8],xmm4[8],xmm1[10],xmm4[10],xmm1[12],xmm4[12],xmm1[14],xmm4[14]
; XOP-FALLBACK-NEXT: vpaddb %xmm0, %xmm1, %xmm0
@@ -2677,7 +2677,7 @@ define <16 x i8> @vec128_i8_unsigned_reg_reg(<16 x i8> %a1, <16 x i8> %a2) nounw
; XOPAVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
; XOPAVX1-NEXT: vpandn %xmm2, %xmm3, %xmm4
; XOPAVX1-NEXT: vpmaddubsw %xmm4, %xmm1, %xmm4
-; XOPAVX1-NEXT: vpand %xmm2, %xmm3, %xmm2
+; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
; XOPAVX1-NEXT: vpmaddubsw %xmm2, %xmm1, %xmm1
; XOPAVX1-NEXT: vpperm {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[2],xmm4[2],xmm1[4],xmm4[4],xmm1[6],xmm4[6],xmm1[8],xmm4[8],xmm1[10],xmm4[10],xmm1[12],xmm4[12],xmm1[14],xmm4[14]
; XOPAVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0
@@ -2823,8 +2823,8 @@ define <16 x i8> @vec128_i8_signed_mem_reg(ptr %a1_addr, <16 x i8> %a2) nounwind
; SSE41-NEXT: psrlw $1, %xmm0
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
-; SSE41-NEXT: movdqa %xmm3, %xmm4
-; SSE41-NEXT: pand %xmm2, %xmm4
+; SSE41-NEXT: movdqa %xmm2, %xmm4
+; SSE41-NEXT: pand %xmm3, %xmm4
; SSE41-NEXT: movdqa %xmm0, %xmm5
; SSE41-NEXT: pmaddubsw %xmm4, %xmm5
; SSE41-NEXT: pand %xmm3, %xmm5
@@ -2846,7 +2846,7 @@ define <16 x i8> @vec128_i8_signed_mem_reg(ptr %a1_addr, <16 x i8> %a2) nounwind
; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm0
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
-; AVX1-NEXT: vpand %xmm2, %xmm3, %xmm4
+; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm4
; AVX1-NEXT: vpmaddubsw %xmm4, %xmm0, %xmm4
; AVX1-NEXT: vpand %xmm3, %xmm4, %xmm4
; AVX1-NEXT: vpandn %xmm2, %xmm3, %xmm2
@@ -2889,7 +2889,7 @@ define <16 x i8> @vec128_i8_signed_mem_reg(ptr %a1_addr, <16 x i8> %a2) nounwind
; XOP-FALLBACK-NEXT: vbroadcastss {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
; XOP-FALLBACK-NEXT: vpandn %xmm2, %xmm3, %xmm4
; XOP-FALLBACK-NEXT: vpmaddubsw %xmm4, %xmm0, %xmm4
-; XOP-FALLBACK-NEXT: vpand %xmm2, %xmm3, %xmm2
+; XOP-FALLBACK-NEXT: vpand %xmm3, %xmm2, %xmm2
; XOP-FALLBACK-NEXT: vpmaddubsw %xmm2, %xmm0, %xmm0
; XOP-FALLBACK-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[2],xmm4[2],xmm0[4],xmm4[4],xmm0[6],xmm4[6],xmm0[8],xmm4[8],xmm0[10],xmm4[10],xmm0[12],xmm4[12],xmm0[14],xmm4[14]
; XOP-FALLBACK-NEXT: vpaddb %xmm1, %xmm0, %xmm0
@@ -2908,7 +2908,7 @@ define <16 x i8> @vec128_i8_signed_mem_reg(ptr %a1_addr, <16 x i8> %a2) nounwind
; XOPAVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
; XOPAVX1-NEXT: vpandn %xmm2, %xmm3, %xmm4
; XOPAVX1-NEXT: vpmaddubsw %xmm4, %xmm0, %xmm4
-; XOPAVX1-NEXT: vpand %xmm2, %xmm3, %xmm2
+; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
; XOPAVX1-NEXT: vpmaddubsw %xmm2, %xmm0, %xmm0
; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[2],xmm4[2],xmm0[4],xmm4[4],xmm0[6],xmm4[6],xmm0[8],xmm4[8],xmm0[10],xmm4[10],xmm0[12],xmm4[12],xmm0[14],xmm4[14]
; XOPAVX1-NEXT: vpaddb %xmm1, %xmm0, %xmm0
@@ -3054,8 +3054,8 @@ define <16 x i8> @vec128_i8_signed_reg_mem(<16 x i8> %a1, ptr %a2_addr) nounwind
; SSE41-NEXT: psrlw $1, %xmm1
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
-; SSE41-NEXT: movdqa %xmm3, %xmm4
-; SSE41-NEXT: pand %xmm2, %xmm4
+; SSE41-NEXT: movdqa %xmm2, %xmm4
+; SSE41-NEXT: pand %xmm3, %xmm4
; SSE41-NEXT: movdqa %xmm1, %xmm5
; SSE41-NEXT: pmaddubsw %xmm4, %xmm5
; SSE41-NEXT: pand %xmm3, %xmm5
@@ -3077,7 +3077,7 @@ define <16 x i8> @vec128_i8_signed_reg_mem(<16 x i8> %a1, ptr %a2_addr) nounwind
; AVX1-NEXT: vpsrlw $1, %xmm1, %xmm1
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
-; AVX1-NEXT: vpand %xmm2, %xmm3, %xmm4
+; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm4
; AVX1-NEXT: vpmaddubsw %xmm4, %xmm1, %xmm4
; AVX1-NEXT: vpand %xmm3, %xmm4, %xmm4
; AVX1-NEXT: vpandn %xmm2, %xmm3, %xmm2
@@ -3120,7 +3120,7 @@ define <16 x i8> @vec128_i8_signed_reg_mem(<16 x i8> %a1, ptr %a2_addr) nounwind
; XOP-FALLBACK-NEXT: vbroadcastss {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
; XOP-FALLBACK-NEXT: vpandn %xmm2, %xmm3, %xmm4
; XOP-FALLBACK-NEXT: vpmaddubsw %xmm4, %xmm1, %xmm4
-; XOP-FALLBACK-NEXT: vpand %xmm2, %xmm3, %xmm2
+; XOP-FALLBACK-NEXT: vpand %xmm3, %xmm2, %xmm2
; XOP-FALLBACK-NEXT: vpmaddubsw %xmm2, %xmm1, %xmm1
; XOP-FALLBACK-NEXT: vpperm {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[2],xmm4[2],xmm1[4],xmm4[4],xmm1[6],xmm4[6],xmm1[8],xmm4[8],xmm1[10],xmm4[10],xmm1[12],xmm4[12],xmm1[14],xmm4[14]
; XOP-FALLBACK-NEXT: vpaddb %xmm0, %xmm1, %xmm0
@@ -3139,7 +3139,7 @@ define <16 x i8> @vec128_i8_signed_reg_mem(<16 x i8> %a1, ptr %a2_addr) nounwind
; XOPAVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
; XOPAVX1-NEXT: vpandn %xmm2, %xmm3, %xmm4
; XOPAVX1-NEXT: vpmaddubsw %xmm4, %xmm1, %xmm4
-; XOPAVX1-NEXT: vpand %xmm2, %xmm3, %xmm2
+; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
; XOPAVX1-NEXT: vpmaddubsw %xmm2, %xmm1, %xmm1
; XOPAVX1-NEXT: vpperm {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[2],xmm4[2],xmm1[4],xmm4[4],xmm1[6],xmm4[6],xmm1[8],xmm4[8],xmm1[10],xmm4[10],xmm1[12],xmm4[12],xmm1[14],xmm4[14]
; XOPAVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0
@@ -3287,8 +3287,8 @@ define <16 x i8> @vec128_i8_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind
; SSE41-NEXT: psrlw $1, %xmm0
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
-; SSE41-NEXT: movdqa %xmm3, %xmm4
-; SSE41-NEXT: pand %xmm2, %xmm4
+; SSE41-NEXT: movdqa %xmm2, %xmm4
+; SSE41-NEXT: pand %xmm3, %xmm4
; SSE41-NEXT: movdqa %xmm0, %xmm5
; SSE41-NEXT: pmaddubsw %xmm4, %xmm5
; SSE41-NEXT: pand %xmm3, %xmm5
@@ -3311,7 +3311,7 @@ define <16 x i8> @vec128_i8_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind
; AVX1-NEXT: vpsrlw $1, %xmm1, %xmm1
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
-; AVX1-NEXT: vpand %xmm2, %xmm3, %xmm4
+; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm4
; AVX1-NEXT: vpmaddubsw %xmm4, %xmm1, %xmm4
; AVX1-NEXT: vpand %xmm3, %xmm4, %xmm4
; AVX1-NEXT: vpandn %xmm2, %xmm3, %xmm2
@@ -3356,7 +3356,7 @@ define <16 x i8> @vec128_i8_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind
; XOP-FALLBACK-NEXT: vbroadcastss {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
; XOP-FALLBACK-NEXT: vpandn %xmm2, %xmm3, %xmm4
; XOP-FALLBACK-NEXT: vpmaddubsw %xmm4, %xmm1, %xmm4
-; XOP-FALLBACK-NEXT: vpand %xmm2, %xmm3, %xmm2
+; XOP-FALLBACK-NEXT: vpand %xmm3, %xmm2, %xmm2
; XOP-FALLBACK-NEXT: vpmaddubsw %xmm2, %xmm1, %xmm1
; XOP-FALLBACK-NEXT: vpperm {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[2],xmm4[2],xmm1[4],xmm4[4],xmm1[6],xmm4[6],xmm1[8],xmm4[8],xmm1[10],xmm4[10],xmm1[12],xmm4[12],xmm1[14],xmm4[14]
; XOP-FALLBACK-NEXT: vpaddb %xmm0, %xmm1, %xm...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/112682
More information about the llvm-commits
mailing list