[llvm] 1150bfa - [PowerPC] Add missing negate for VPERMXOR on little endian subtargets

Mon Jan 25 10:23:58 PST 2021

Author: Nemanja Ivanovic
Date: 2021-01-25T12:23:33-06:00
New Revision: 1150bfa6bb099f9a85a140f66fde7b7f7aa54e60

URL: https://github.com/llvm/llvm-project/commit/1150bfa6bb099f9a85a140f66fde7b7f7aa54e60
DIFF: https://github.com/llvm/llvm-project/commit/1150bfa6bb099f9a85a140f66fde7b7f7aa54e60.diff

LOG: [PowerPC] Add missing negate for VPERMXOR on little endian subtargets

This intrinsic is supposed to have the permute control vector complemented on
little endian systems (as the ABI specifies and GCC implements). With the
current code gen, the result vector is byte-reversed.

Differential revision: https://reviews.llvm.org/D95004

Added: 
    

Modified: 
    llvm/lib/Target/PowerPC/PPCInstrAltivec.td
    llvm/lib/Target/PowerPC/PPCInstrVSX.td
    llvm/test/CodeGen/PowerPC/crypto_bifs.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
index c029ecb63e72..1a34aa09315b 100644

--- a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -1327,8 +1327,8 @@ def VPMSUMW : VX1_Int_Ty<1160, "vpmsumw",
                          int_ppc_altivec_crypto_vpmsumw, v4i32>;
 def VPMSUMD : VX1_Int_Ty<1224, "vpmsumd",
                          int_ppc_altivec_crypto_vpmsumd, v2i64>;
-def VPERMXOR : VA1a_Int_Ty<45, "vpermxor",
-                         int_ppc_altivec_crypto_vpermxor, v16i8>;
+def VPERMXOR : VAForm_1<45, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB, vrrc:$VC),
+                        "vpermxor $VD, $VA, $VB, $VC", IIC_VecFP, []>;
 
 // Vector doubleword integer pack and unpack.
 let hasSideEffects = 1 in {

diff  --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index 2e45d731c953..db6e00c71b89 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -2408,6 +2408,8 @@ def MrgWords {
 // arbitrarily chosen to be Big, Little.
 //
 // Predicate combinations available:
+// [HasVSX, IsLittleEndian, HasP8Altivec] Altivec patterns using VSX instr.
+// [HasVSX, IsBigEndian, HasP8Altivec] Altivec patterns using VSX instr.
 // [HasVSX]
 // [HasVSX, IsBigEndian]
 // [HasVSX, IsLittleEndian]
@@ -2436,6 +2438,18 @@ def MrgWords {
 // [HasVSX, IsISA3_0, HasDirectMove, IsBigEndian, IsPPC64]
 // [HasVSX, IsISA3_0, HasDirectMove, IsLittleEndian]
 
+// These Altivec patterns are here because we need a VSX instruction to match
+// the intrinsic (but only for little endian system).
+let Predicates = [HasVSX, IsLittleEndian, HasP8Altivec] in
+  def : Pat<(v16i8 (int_ppc_altivec_crypto_vpermxor v16i8:$a,
+                                                    v16i8:$b, v16i8:$c)),
+            (v16i8 (VPERMXOR $a, $b, (XXLNOR (COPY_TO_REGCLASS $c, VSRC),
+                                             (COPY_TO_REGCLASS $c, VSRC))))>;
+let Predicates = [HasVSX, IsBigEndian, HasP8Altivec] in
+  def : Pat<(v16i8 (int_ppc_altivec_crypto_vpermxor v16i8:$a,
+                                                    v16i8:$b, v16i8:$c)),
+            (v16i8 (VPERMXOR $a, $b, $c))>;
+
 let AddedComplexity = 400 in {
 // Valid for any VSX subtarget, regardless of endianness.
 let Predicates = [HasVSX] in {

diff  --git a/llvm/test/CodeGen/PowerPC/crypto_bifs.ll b/llvm/test/CodeGen/PowerPC/crypto_bifs.ll
index b34482abfcd0..e38fe90ecc57 100644
--- a/llvm/test/CodeGen/PowerPC/crypto_bifs.ll
+++ b/llvm/test/CodeGen/PowerPC/crypto_bifs.ll
@@ -1,7 +1,11 @@
-; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
-; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
-; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=+crypto < %s | FileCheck %s
-; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=pwr8 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr8 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-LE
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=pwr7 -mattr=+crypto < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr9 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-LE
 ; FIXME: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s
 ; FIXME: The original intent was to add a check-next for the blr after every check.
 ; However, this currently fails since we don't eliminate stores of the unused
@@ -103,6 +107,7 @@ entry:
   %2 = load <16 x i8>,  <16 x i8>* %c, align 16
   %3 = call <16 x i8> @llvm.ppc.altivec.crypto.vpermxor(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2)
   ret <16 x i8> %3
+; CHECK-LE: xxlnor
 ; CHECK: vpermxor 2,
 }
 
@@ -127,6 +132,7 @@ entry:
   %6 = call <16 x i8> @llvm.ppc.altivec.crypto.vpermxor(<16 x i8> %1, <16 x i8> %3, <16 x i8> %5)
   %7 = bitcast <16 x i8> %6 to <8 x i16>
   ret <8 x i16> %7
+; CHECK-LE: xxlnor
 ; CHECK: vpermxor 2,
 }
 
@@ -148,6 +154,7 @@ entry:
   %6 = call <16 x i8> @llvm.ppc.altivec.crypto.vpermxor(<16 x i8> %1, <16 x i8> %3, <16 x i8> %5)
   %7 = bitcast <16 x i8> %6 to <4 x i32>
   ret <4 x i32> %7
+; CHECK-LE: xxlnor
 ; CHECK: vpermxor 2,
 }
 
@@ -169,6 +176,7 @@ entry:
   %6 = call <16 x i8> @llvm.ppc.altivec.crypto.vpermxor(<16 x i8> %1, <16 x i8> %3, <16 x i8> %5)
   %7 = bitcast <16 x i8> %6 to <2 x i64>
   ret <2 x i64> %7
+; CHECK-LE: xxlnor
 ; CHECK: vpermxor 2,
 }