[llvm] a290668 - [AArch64][SVE] Fix bad PTEST(X, X) optimization

Cullen Rhodes via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 15 03:59:44 PST 2022


Author: Cullen Rhodes
Date: 2022-11-15T11:59:07Z
New Revision: a290668ec5478e26d765a01d254b10d13c2a1dbd

URL: https://github.com/llvm/llvm-project/commit/a290668ec5478e26d765a01d254b10d13c2a1dbd
DIFF: https://github.com/llvm/llvm-project/commit/a290668ec5478e26d765a01d254b10d13c2a1dbd.diff

LOG: [AArch64][SVE] Fix bad PTEST(X, X) optimization

AArch64InstrInfo::optimizePTestInstr attempts to remove a PTEST of a
predicate generating operation that identically sets flags (implictly).

When the mask is the same as the input predicate the PTEST is currently
removed. This is incorrect since the mask for the implicit PTEST
performed by the flag-setting instruction differs from the mask
specified to the explicit PTEST and could set different flags.

For example, consider

  PG=<1, 1, x, x>
  Z0=<1, 2, x, x>
  Z1=<2, 1, x, x>

  X=CMPLE(PG, Z0, Z1)
   =<0, 1, x, x>       NZCV=0xxx
  PTEST(X, X),         NZCV=1xxx

where the first active flag (bit 'N' in NZCV) is set by the explicit
PTEST, but not by the implicit PTEST as part of the compare. Given the
PTEST mask and source are the same however, first is equivalent to any,
so the PTEST could be removed if the condition is changed. The same
applies to last active. It is safe to remove the PTEST for any active,
but this information isn't available in the current optimization.

This patch fixes the bad optimization, a later patch will implement the
optimization proposed above and fix the any active case.

Reviewed By: bsmith

Differential Revision: https://reviews.llvm.org/D137717

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
    llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll
    llvm/test/CodeGen/AArch64/sve-ptest-removal-whilegt.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 934cceb72c2b..b802358bcd95 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -1318,11 +1318,6 @@ bool AArch64InstrInfo::optimizePTestInstr(
         (Mask->getOperand(1).getImm() != 31))
       return false;
 
-    // Fallthough to simply remove the PTEST.
-  } else if ((Mask == Pred) && (PredIsPTestLike || PredIsWhileLike)) {
-    // For PTEST(PG, PG), PTEST is redundant when PG is the result of an
-    // instruction that sets the flags as PTEST would.
-
     // Fallthough to simply remove the PTEST.
   } else if (PredIsPTestLike) {
     // For PTEST(PG, PTEST_LIKE(PG, ...)), the PTEST is redundant since the

diff  --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll
index d0e5a1fea839..d2b0be299f9e 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll
@@ -192,6 +192,7 @@ define i1 @cmp8_ptest_first_xx(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <v
 ; CHECK-LABEL: cmp8_ptest_first_xx:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    cmpge p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT:    ptest p0, p0.b
 ; CHECK-NEXT:    cset w0, mi
 ; CHECK-NEXT:    ret
   %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
@@ -207,6 +208,7 @@ define i1 @cmp8_ptest_last_xx(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vs
 ; CHECK-LABEL: cmp8_ptest_last_xx:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    cmpge p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT:    ptest p0, p0.b
 ; CHECK-NEXT:    cset w0, lo
 ; CHECK-NEXT:    ret
   %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
@@ -221,6 +223,7 @@ define i1 @cmp8_ptest_any_xx(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vsc
 ; CHECK-LABEL: cmp8_ptest_any_xx:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    cmpge p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT:    ptest p0, p0.b
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
   %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
@@ -236,6 +239,7 @@ define i1 @cmp32_ptest_first_xx(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <
 ; CHECK-LABEL: cmp32_ptest_first_xx:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    cmpge p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT:    ptest p0, p0.b
 ; CHECK-NEXT:    cset w0, mi
 ; CHECK-NEXT:    ret
   %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
@@ -252,6 +256,7 @@ define i1 @cmp32_ptest_last_xx(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <v
 ; CHECK-LABEL: cmp32_ptest_last_xx:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    cmpge p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT:    ptest p0, p0.b
 ; CHECK-NEXT:    cset w0, lo
 ; CHECK-NEXT:    ret
   %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
@@ -267,6 +272,7 @@ define i1 @cmp32_ptest_any_xx(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vs
 ; CHECK-LABEL: cmp32_ptest_any_xx:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    cmpge p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT:    ptest p0, p0.b
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
   %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)

diff  --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilegt.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilegt.mir
index d1ee89f68876..217d984560e3 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilegt.mir
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilegt.mir
@@ -462,7 +462,7 @@ body:             |
     liveins: $w0, $w1
 
     ; CHECK-LABEL: name: whilegt_b8_s32_ptest_with_matching_operands
-    ; CHECK-NOT: PTEST
+    ; CHECK: PTEST
     %1:gpr32 = COPY $w1
     %0:gpr32 = COPY $w0
     %2:ppr = WHILEGT_PWW_B %0, %1, implicit-def dead $nzcv


        


More information about the llvm-commits mailing list