[llvm] [X86] ptest is commutable as long as only the Z flag is used. (PR #88969)

via llvm-commits llvm-commits at lists.llvm.org
Tue Apr 16 12:25:29 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-x86

Author: Craig Topper (topperc)

<details>
<summary>Changes</summary>

Fixes #<!-- -->88958.

---
Full diff: https://github.com/llvm/llvm-project/pull/88969.diff


2 Files Affected:

- (modified) llvm/lib/Target/X86/X86InstrSSE.td (+18) 
- (modified) llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll (+19-13) 


``````````diff
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index 5d799fc00df92b..385d65d0bcaaf6 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -5688,6 +5688,13 @@ let Predicates = [UseSSE41, OptForSize] in {
 // SSE4.1 - Packed Bit Test
 //===----------------------------------------------------------------------===//
 
+// ptest is commutable if only the Z flag is used. If the C flag is used,
+// commuting would change which operand is inverted.
+def X86ptest_commutable : PatFrag<(ops node:$src1, node:$src2),
+                                  (X86ptest node:$src1, node:$src2), [{
+  return onlyUsesZeroFlag(SDValue(Node, 0));
+}]>;
+
 // ptest instruction we'll lower to this in X86ISelLowering primarily from
 // the intel intrinsic that corresponds to this.
 let Defs = [EFLAGS], Predicates = [HasAVX] in {
@@ -5723,6 +5730,17 @@ def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
               Sched<[SchedWriteVecTest.XMM.Folded, SchedWriteVecTest.XMM.ReadAfterFold]>;
 }
 
+let Predicates = [HasAVX] in {
+  def : Pat<(X86ptest_commutable (loadv2i64 addr:$src2), VR128:$src1),
+            (VPTESTrm VR128:$src1, addr:$src2)>;
+  def : Pat<(X86ptest_commutable (loadv4i64 addr:$src2), VR256:$src1),
+            (VPTESTYrm VR256:$src1, addr:$src2)>;
+}
+let Predicates = [UseSSE41] in {
+  def : Pat<(X86ptest_commutable (memopv2i64 addr:$src2), VR128:$src1),
+            (PTESTrm VR128:$src1, addr:$src2)>;
+}
+
 // The bit test instructions below are AVX only
 multiclass avx_bittest<bits<8> opc, string OpcodeStr, RegisterClass RC,
                        X86MemOperand x86memop, PatFrag mem_frag, ValueType vt,
diff --git a/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll b/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll
index f8ba00b0332994..9cd0f4d12e15ab 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll
@@ -1018,32 +1018,38 @@ define zeroext i1 @PR44781(ptr %0) {
 ; SSE41-NEXT:    sete %al
 ; SSE41-NEXT:    retq
 ;
-; AVX1OR2-LABEL: PR44781:
-; AVX1OR2:       # %bb.0:
-; AVX1OR2-NEXT:    vmovdqu (%rdi), %xmm0
-; AVX1OR2-NEXT:    vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; AVX1OR2-NEXT:    sete %al
-; AVX1OR2-NEXT:    retq
+; AVX1-LABEL: PR44781:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vbroadcastss {{.*#+}} xmm0 = [15,15,15,15]
+; AVX1-NEXT:    vptest (%rdi), %xmm0
+; AVX1-NEXT:    sete %al
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: PR44781:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm0 = [15,15,15,15]
+; AVX2-NEXT:    vptest (%rdi), %xmm0
+; AVX2-NEXT:    sete %al
+; AVX2-NEXT:    retq
 ;
 ; AVX512F-LABEL: PR44781:
 ; AVX512F:       # %bb.0:
-; AVX512F-NEXT:    vmovdqu (%rdi), %xmm0
-; AVX512F-NEXT:    vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; AVX512F-NEXT:    vpbroadcastd {{.*#+}} xmm0 = [15,15,15,15]
+; AVX512F-NEXT:    vptest (%rdi), %xmm0
 ; AVX512F-NEXT:    sete %al
 ; AVX512F-NEXT:    retq
 ;
 ; AVX512BW-LABEL: PR44781:
 ; AVX512BW:       # %bb.0:
-; AVX512BW-NEXT:    vmovdqu (%rdi), %xmm0
-; AVX512BW-NEXT:    vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; AVX512BW-NEXT:    vpbroadcastd {{.*#+}} xmm0 = [15,15,15,15]
+; AVX512BW-NEXT:    vptest (%rdi), %xmm0
 ; AVX512BW-NEXT:    sete %al
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512BWVL-LABEL: PR44781:
 ; AVX512BWVL:       # %bb.0:
-; AVX512BWVL-NEXT:    vmovdqu (%rdi), %xmm0
-; AVX512BWVL-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [64424509455,64424509455]
-; AVX512BWVL-NEXT:    vptest %xmm1, %xmm0
+; AVX512BWVL-NEXT:    vpbroadcastq {{.*#+}} xmm0 = [64424509455,64424509455]
+; AVX512BWVL-NEXT:    vptest (%rdi), %xmm0
 ; AVX512BWVL-NEXT:    sete %al
 ; AVX512BWVL-NEXT:    retq
   %2 = load <4 x i32>, ptr %0, align 4

``````````

</details>


https://github.com/llvm/llvm-project/pull/88969


More information about the llvm-commits mailing list