[llvm] 9385996 - [X86] combineMOVMSK - fold movmsk(logic(X,C)) -> logic(movmsk(X),C)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 29 04:22:26 PDT 2023
Author: Simon Pilgrim
Date: 2023-03-29T12:22:14+01:00
New Revision: 9385996ec6d18ec6c52991b40489bd3cd8766594
URL: https://github.com/llvm/llvm-project/commit/9385996ec6d18ec6c52991b40489bd3cd8766594
DIFF: https://github.com/llvm/llvm-project/commit/9385996ec6d18ec6c52991b40489bd3cd8766594.diff
LOG: [X86] combineMOVMSK - fold movmsk(logic(X,C)) -> logic(movmsk(X),C)
Avoid a vector mask load for something that is likely to be able to fold into a scalar logic op + EFLAGS comparison result
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/masked_store.ll
llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 68af565195c5..8db9372f9ab4 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -54465,6 +54465,28 @@ static SDValue combineMOVMSK(SDNode *N, SelectionDAG &DAG,
}
}
+ // Fold movmsk(logic(X,C)) -> logic(movmsk(X),C)
+ if (N->isOnlyUserOf(Src.getNode())) {
+ SDValue SrcBC = peekThroughOneUseBitcasts(Src);
+ if (ISD::isBitwiseLogicOp(SrcBC.getOpcode())) {
+ APInt UndefElts;
+ SmallVector<APInt, 32> EltBits;
+ if (getTargetConstantBitsFromNode(SrcBC.getOperand(1), NumBitsPerElt,
+ UndefElts, EltBits)) {
+ APInt Mask = APInt::getZero(NumBits);
+ for (unsigned Idx = 0; Idx != NumElts; ++Idx) {
+ if (!UndefElts[Idx] && EltBits[Idx].isNegative())
+ Mask.setBit(Idx);
+ }
+ SDLoc DL(N);
+ SDValue NewSrc = DAG.getBitcast(SrcVT, SrcBC.getOperand(0));
+ SDValue NewMovMsk = DAG.getNode(X86ISD::MOVMSK, DL, VT, NewSrc);
+ return DAG.getNode(SrcBC.getOpcode(), DL, VT, NewMovMsk,
+ DAG.getConstant(Mask, DL, VT));
+ }
+ }
+ }
+
// Simplify the inputs.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
APInt DemandedMask(APInt::getAllOnes(NumBits));
diff --git a/llvm/test/CodeGen/X86/masked_store.ll b/llvm/test/CodeGen/X86/masked_store.ll
index 6b1f2924aee5..d831a1b707ac 100644
--- a/llvm/test/CodeGen/X86/masked_store.ll
+++ b/llvm/test/CodeGen/X86/masked_store.ll
@@ -5647,10 +5647,10 @@ define void @store_v24i32_v24i32_stride6_vf4_only_even_numbered_elts(ptr %trigge
; SSE2-NEXT: pcmpgtd %xmm8, %xmm7
; SSE2-NEXT: pshuflw {{.*#+}} xmm7 = xmm7[0,2,2,3,4,5,6,7]
; SSE2-NEXT: punpckldq {{.*#+}} xmm7 = xmm7[0],xmm6[0],xmm7[1],xmm6[1]
-; SSE2-NEXT: andpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm9
; SSE2-NEXT: pmovmskb %xmm9, %r11d
-; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm7
+; SSE2-NEXT: andl $21845, %r11d ## imm = 0x5555
; SSE2-NEXT: pmovmskb %xmm7, %edi
+; SSE2-NEXT: andl $85, %edi
; SSE2-NEXT: shll $16, %edi
; SSE2-NEXT: orl %r11d, %edi
; SSE2-NEXT: testb $1, %dil
@@ -5720,10 +5720,19 @@ define void @store_v24i32_v24i32_stride6_vf4_only_even_numbered_elts(ptr %trigge
; SSE2-NEXT: jne LBB31_43
; SSE2-NEXT: LBB31_44: ## %else42
; SSE2-NEXT: testl $4194304, %edi ## imm = 0x400000
-; SSE2-NEXT: jne LBB31_45
+; SSE2-NEXT: je LBB31_46
+; SSE2-NEXT: LBB31_45: ## %cond.store43
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
+; SSE2-NEXT: movd %xmm1, %eax
+; SSE2-NEXT: movl %eax, 88(%rdx)
; SSE2-NEXT: LBB31_46: ## %else44
-; SSE2-NEXT: testl $8388608, %edi ## imm = 0x800000
-; SSE2-NEXT: jne LBB31_47
+; SSE2-NEXT: movb $1, %al
+; SSE2-NEXT: testb %al, %al
+; SSE2-NEXT: jne LBB31_48
+; SSE2-NEXT: ## %bb.47: ## %cond.store45
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
+; SSE2-NEXT: movd %xmm0, %eax
+; SSE2-NEXT: movl %eax, 92(%rdx)
; SSE2-NEXT: LBB31_48: ## %else46
; SSE2-NEXT: retq
; SSE2-NEXT: LBB31_1: ## %cond.store
@@ -5846,18 +5855,8 @@ define void @store_v24i32_v24i32_stride6_vf4_only_even_numbered_elts(ptr %trigge
; SSE2-NEXT: movd %xmm1, %eax
; SSE2-NEXT: movl %eax, 84(%rdx)
; SSE2-NEXT: testl $4194304, %edi ## imm = 0x400000
-; SSE2-NEXT: je LBB31_46
-; SSE2-NEXT: LBB31_45: ## %cond.store43
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; SSE2-NEXT: movd %xmm1, %eax
-; SSE2-NEXT: movl %eax, 88(%rdx)
-; SSE2-NEXT: testl $8388608, %edi ## imm = 0x800000
-; SSE2-NEXT: je LBB31_48
-; SSE2-NEXT: LBB31_47: ## %cond.store45
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
-; SSE2-NEXT: movd %xmm0, %eax
-; SSE2-NEXT: movl %eax, 92(%rdx)
-; SSE2-NEXT: retq
+; SSE2-NEXT: jne LBB31_45
+; SSE2-NEXT: jmp LBB31_46
;
; SSE4-LABEL: store_v24i32_v24i32_stride6_vf4_only_even_numbered_elts:
; SSE4: ## %bb.0:
@@ -5925,10 +5924,10 @@ define void @store_v24i32_v24i32_stride6_vf4_only_even_numbered_elts(ptr %trigge
; SSE4-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0],xmm0[1,2,3],xmm2[4],xmm0[5,6,7]
; SSE4-NEXT: packusdw %xmm1, %xmm2
; SSE4-NEXT: packusdw %xmm2, %xmm2
-; SSE4-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
; SSE4-NEXT: pmovmskb %xmm3, %eax
-; SSE4-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
+; SSE4-NEXT: andl $21845, %eax ## imm = 0x5555
; SSE4-NEXT: pmovmskb %xmm2, %edi
+; SSE4-NEXT: andl $85, %edi
; SSE4-NEXT: shll $16, %edi
; SSE4-NEXT: orl %eax, %edi
; SSE4-NEXT: movl 48(%rsi), %r13d
@@ -6010,11 +6009,15 @@ define void @store_v24i32_v24i32_stride6_vf4_only_even_numbered_elts(ptr %trigge
; SSE4-NEXT: jne LBB31_43
; SSE4-NEXT: LBB31_44: ## %else42
; SSE4-NEXT: testl $4194304, %edi ## imm = 0x400000
-; SSE4-NEXT: jne LBB31_45
+; SSE4-NEXT: je LBB31_46
+; SSE4-NEXT: LBB31_45: ## %cond.store43
+; SSE4-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax ## 4-byte Reload
+; SSE4-NEXT: movl %eax, 88(%rdx)
; SSE4-NEXT: LBB31_46: ## %else44
-; SSE4-NEXT: testl $8388608, %edi ## imm = 0x800000
-; SSE4-NEXT: je LBB31_48
-; SSE4-NEXT: LBB31_47: ## %cond.store45
+; SSE4-NEXT: movb $1, %al
+; SSE4-NEXT: testb %al, %al
+; SSE4-NEXT: jne LBB31_48
+; SSE4-NEXT: ## %bb.47: ## %cond.store45
; SSE4-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax ## 4-byte Reload
; SSE4-NEXT: movl %eax, 92(%rdx)
; SSE4-NEXT: LBB31_48: ## %else46
@@ -6122,13 +6125,8 @@ define void @store_v24i32_v24i32_stride6_vf4_only_even_numbered_elts(ptr %trigge
; SSE4-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax ## 4-byte Reload
; SSE4-NEXT: movl %eax, 84(%rdx)
; SSE4-NEXT: testl $4194304, %edi ## imm = 0x400000
-; SSE4-NEXT: je LBB31_46
-; SSE4-NEXT: LBB31_45: ## %cond.store43
-; SSE4-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax ## 4-byte Reload
-; SSE4-NEXT: movl %eax, 88(%rdx)
-; SSE4-NEXT: testl $8388608, %edi ## imm = 0x800000
-; SSE4-NEXT: jne LBB31_47
-; SSE4-NEXT: jmp LBB31_48
+; SSE4-NEXT: jne LBB31_45
+; SSE4-NEXT: jmp LBB31_46
;
; AVX1-LABEL: store_v24i32_v24i32_stride6_vf4_only_even_numbered_elts:
; AVX1: ## %bb.0:
diff --git a/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll b/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll
index f22d70506815..caa200c99984 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll
@@ -862,9 +862,8 @@ define i1 @mask_v8i32(<8 x i32> %a0) {
; SSE2-LABEL: mask_v8i32:
; SSE2: # %bb.0:
; SSE2-NEXT: por %xmm1, %xmm0
-; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE2-NEXT: pmovmskb %xmm0, %eax
-; SSE2-NEXT: testl %eax, %eax
+; SSE2-NEXT: testl $34952, %eax # imm = 0x8888
; SSE2-NEXT: sete %al
; SSE2-NEXT: retq
;
More information about the llvm-commits
mailing list