[llvm] 1674d9b - [PowerPC] Fix vector equality comparison for v2i64 pre-Power8

Nemanja Ivanovic via llvm-commits llvm-commits at lists.llvm.org
Tue Dec 21 12:29:21 PST 2021


Author: Nemanja Ivanovic
Date: 2021-12-21T14:28:41-06:00
New Revision: 1674d9b6b2da914619c7c197336bb74f7988cf38

URL: https://github.com/llvm/llvm-project/commit/1674d9b6b2da914619c7c197336bb74f7988cf38
DIFF: https://github.com/llvm/llvm-project/commit/1674d9b6b2da914619c7c197336bb74f7988cf38.diff

LOG: [PowerPC] Fix vector equality comparison for v2i64 pre-Power8

The current code makes the assumption that equality
comparison can be performed with a word comparison
instruction. While this is true if the entire 64-bit
results are used, it does not generally work. It is
possible that the low order words and high order
words produce different results and a user of only
one will get the wrong result.

This patch adds an and of the result words so that
each word has the result of the comparison of the
entire doubleword that contains it.

Differential revision: https://reviews.llvm.org/D115678

Added: 
    llvm/test/CodeGen/PowerPC/vec-icmpeq-v2i64-p7.ll

Modified: 
    llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index d6ee6a9d65253..8d6edf07bc539 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -3500,15 +3500,16 @@ SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
     if (LHS.getValueType() == MVT::v2i64) {
       // Equality can be handled by casting to the legal type for Altivec
       // comparisons, everything else needs to be expanded.
-      if (CC == ISD::SETEQ || CC == ISD::SETNE) {
-        return DAG.getNode(
-            ISD::BITCAST, dl, MVT::v2i64,
-            DAG.getSetCC(dl, MVT::v4i32,
-                         DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, LHS),
-                         DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, RHS), CC));
-      }
-
-      return SDValue();
+      if (CC != ISD::SETEQ && CC != ISD::SETNE)
+        return SDValue();
+      SDValue SetCC32 = DAG.getSetCC(
+          dl, MVT::v4i32, DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, LHS),
+          DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, RHS), CC);
+      int ShuffV[] = {1, 0, 3, 2};
+      SDValue Shuff =
+          DAG.getVectorShuffle(MVT::v4i32, dl, SetCC32, SetCC32, ShuffV);
+      return DAG.getBitcast(
+          MVT::v2i64, DAG.getNode(ISD::AND, dl, MVT::v4i32, Shuff, SetCC32));
     }
 
     // We handle most of these in the usual way.

diff  --git a/llvm/test/CodeGen/PowerPC/vec-icmpeq-v2i64-p7.ll b/llvm/test/CodeGen/PowerPC/vec-icmpeq-v2i64-p7.ll
new file mode 100644
index 0000000000000..b63b142793eec
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/vec-icmpeq-v2i64-p7.ll
@@ -0,0 +1,48 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple=powerpc-aix- < %s | \
+; RUN:   FileCheck %s
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple=powerpc64le-- < %s | \
+; RUN:   FileCheck %s --check-prefix=CHECK_LE
+; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64le-- < %s | \
+; RUN:   FileCheck %s --check-prefix=CHECK_P8LE
+define i1 @foo(<2 x i64> %a) #0 {
+; CHECK-LABEL: foo:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxswapd 35, 34
+; CHECK-NEXT:    lwz 3, L..C0(2) # %const.0
+; CHECK-NEXT:    vcmpequw 2, 2, 3
+; CHECK-NEXT:    lxvw4x 35, 0, 3
+; CHECK-NEXT:    addi 3, 1, -16
+; CHECK-NEXT:    vperm 3, 2, 2, 3
+; CHECK-NEXT:    xxland 0, 35, 34
+; CHECK-NEXT:    stxvw4x 0, 0, 3
+; CHECK-NEXT:    lwz 3, -12(1)
+; CHECK-NEXT:    blr
+;
+; CHECK_LE-LABEL: foo:
+; CHECK_LE:       # %bb.0: # %entry
+; CHECK_LE-NEXT:    xxswapd 35, 34
+; CHECK_LE-NEXT:    addis 3, 2, .LCPI0_0 at toc@ha
+; CHECK_LE-NEXT:    addi 3, 3, .LCPI0_0 at toc@l
+; CHECK_LE-NEXT:    vcmpequw 2, 2, 3
+; CHECK_LE-NEXT:    lvx 3, 0, 3
+; CHECK_LE-NEXT:    addi 3, 1, -16
+; CHECK_LE-NEXT:    vperm 3, 2, 2, 3
+; CHECK_LE-NEXT:    xxland 34, 35, 34
+; CHECK_LE-NEXT:    stvx 2, 0, 3
+; CHECK_LE-NEXT:    ld 3, -16(1)
+; CHECK_LE-NEXT:    blr
+;
+; CHECK_P8LE-LABEL: foo:
+; CHECK_P8LE:       # %bb.0: # %entry
+; CHECK_P8LE-NEXT:    xxswapd 35, 34
+; CHECK_P8LE-NEXT:    vcmpequd 2, 2, 3
+; CHECK_P8LE-NEXT:    xxswapd 0, 34
+; CHECK_P8LE-NEXT:    mffprd 3, 0
+; CHECK_P8LE-NEXT:    blr
+entry:
+  %0 = shufflevector <2 x i64> %a, <2 x i64> poison, <2 x i32> <i32 1, i32 undef>
+  %1 = icmp eq <2 x i64> %a, %0
+  %2 = extractelement <2 x i1> %1, i32 0
+  ret i1 %2
+}


        


More information about the llvm-commits mailing list