[clang] 0850655 - Big-endian version of vpermxor

Wael Yehia via cfe-commits cfe-commits at lists.llvm.org
Tue Nov 30 14:50:32 PST 2021


Author: Tarique Islam
Date: 2021-11-30T22:49:55Z
New Revision: 0850655da69a700b7def4fe8d9a44d1c8d55877c

URL: https://github.com/llvm/llvm-project/commit/0850655da69a700b7def4fe8d9a44d1c8d55877c
DIFF: https://github.com/llvm/llvm-project/commit/0850655da69a700b7def4fe8d9a44d1c8d55877c.diff

LOG: Big-endian version of vpermxor

A big-endian version of vpermxor, named vpermxor_be, is added to LLVM
and Clang. vpermxor_be can be called directly on both the little-endian
and the big-endian platforms.

Reviewed By: nemanjai

Differential Revision: https://reviews.llvm.org/D114540

Added: 
    llvm/test/CodeGen/PowerPC/crypto_bifs_be.ll

Modified: 
    clang/include/clang/Basic/BuiltinsPPC.def
    clang/test/CodeGen/builtins-ppc-crypto.c
    llvm/include/llvm/IR/IntrinsicsPowerPC.td
    llvm/lib/Target/PowerPC/PPCInstrVSX.td

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def
index 4ba23f8f3139b..70b0184f199f8 100644
--- a/clang/include/clang/Basic/BuiltinsPPC.def
+++ b/clang/include/clang/Basic/BuiltinsPPC.def
@@ -404,6 +404,7 @@ BUILTIN(__builtin_altivec_vbpermd, "V2ULLiV2ULLiV16Uc", "")
 // P8 Crypto built-ins.
 BUILTIN(__builtin_altivec_crypto_vsbox, "V2ULLiV2ULLi", "")
 BUILTIN(__builtin_altivec_crypto_vpermxor, "V16UcV16UcV16UcV16Uc", "")
+BUILTIN(__builtin_altivec_crypto_vpermxor_be, "V16UcV16UcV16UcV16Uc", "")
 BUILTIN(__builtin_altivec_crypto_vshasigmaw, "V4UiV4UiIiIi", "")
 BUILTIN(__builtin_altivec_crypto_vshasigmad, "V2ULLiV2ULLiIiIi", "")
 BUILTIN(__builtin_altivec_crypto_vcipher, "V2ULLiV2ULLiV2ULLi", "")

diff  --git a/clang/test/CodeGen/builtins-ppc-crypto.c b/clang/test/CodeGen/builtins-ppc-crypto.c
index 3ebccfd23efb2..136930523c8ec 100644
--- a/clang/test/CodeGen/builtins-ppc-crypto.c
+++ b/clang/test/CodeGen/builtins-ppc-crypto.c
@@ -132,6 +132,62 @@ vector unsigned char test_vpermxoruc(vector unsigned char a,
 // CHECK: @llvm.ppc.altivec.crypto.vpermxor
 }
 
+// CHECK-LABEL: define{{.*}} <16 x i8> @test_vpermxorb_be
+vector unsigned char test_vpermxorb_be(vector unsigned char a,
+                                       vector unsigned char b,
+                                       vector unsigned char c) {
+  return __builtin_altivec_crypto_vpermxor_be(a, b, c);
+// CHECK: @llvm.ppc.altivec.crypto.vpermxor.be
+}
+
+// CHECK-LABEL: define{{.*}} <8 x i16> @test_vpermxorh_be
+vector unsigned short test_vpermxorh_be(vector unsigned short a,
+                                        vector unsigned short b,
+                                        vector unsigned short c) {
+  return __builtin_altivec_crypto_vpermxor_be(a, b, c);
+// CHECK: @llvm.ppc.altivec.crypto.vpermxor.be
+}
+
+// CHECK-LABEL: define{{.*}} <4 x i32> @test_vpermxorw_be
+vector unsigned int test_vpermxorw_be(vector unsigned int a,
+                                      vector unsigned int b,
+                                      vector unsigned int c) {
+  return __builtin_altivec_crypto_vpermxor_be(a, b, c);
+// CHECK: @llvm.ppc.altivec.crypto.vpermxor.be
+}
+
+// CHECK-LABEL: define{{.*}} <2 x i64> @test_vpermxord_be
+vector unsigned long long test_vpermxord_be(vector unsigned long long a,
+                                            vector unsigned long long b,
+                                            vector unsigned long long c) {
+  return __builtin_altivec_crypto_vpermxor_be(a, b, c);
+// CHECK: @llvm.ppc.altivec.crypto.vpermxor.be
+}
+
+// CHECK-LABEL: define{{.*}} <16 x i8> @test_vpermxorbc_be
+vector bool char test_vpermxorbc_be(vector bool char a,
+                                    vector bool char b,
+                                    vector bool char c) {
+  return  __builtin_altivec_crypto_vpermxor_be(a, b, c);
+// CHECK: @llvm.ppc.altivec.crypto.vpermxor.be
+}
+
+// CHECK-LABEL: define{{.*}} <16 x i8> @test_vpermxorsc_be
+vector signed char test_vpermxorsc_be(vector signed char a,
+                                      vector signed char b,
+                                      vector signed char c) {
+  return  __builtin_altivec_crypto_vpermxor_be(a, b, c);
+// CHECK: @llvm.ppc.altivec.crypto.vpermxor.be
+}
+
+// CHECK-LABEL: define{{.*}} <16 x i8> @test_vpermxoruc_be
+vector unsigned char test_vpermxoruc_be(vector unsigned char a,
+                                        vector unsigned char b,
+                                        vector unsigned char c) {
+  return  __builtin_altivec_crypto_vpermxor_be(a, b, c);
+// CHECK: @llvm.ppc.altivec.crypto.vpermxor.be
+}
+
 // CHECK-LABEL: define{{.*}} <2 x i64> @test_vcipher
 vector unsigned long long test_vcipher(void)
 {

diff  --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
index 1d249310fc3f8..b01fa10763b83 100644
--- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -1087,6 +1087,10 @@ let TargetPrefix = "ppc" in {  // All PPC intrinsics start with "llvm.ppc.".
               GCCBuiltin<"__builtin_altivec_crypto_vpermxor">,
               Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
                          llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
+  def int_ppc_altivec_crypto_vpermxor_be :
+              GCCBuiltin<"__builtin_altivec_crypto_vpermxor_be">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
+                         llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
 
 def int_ppc_altivec_crypto_vshasigmad :
             GCCBuiltin<"__builtin_altivec_crypto_vshasigmad">,

diff  --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index d2d5ca92ca1c8..d92a10c5b2081 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -2471,6 +2471,7 @@ def DblwdCmp {
 // [HasVSX, HasP8Vector, IsLittleEndian]
 // [HasVSX, HasP8Vector, NoP9Vector, IsBigEndian, IsPPC64]
 // [HasVSX, HasP8Vector, NoP9Vector, IsLittleEndian]
+// [HasVSX, HasP8Altivec]
 // [HasVSX, HasDirectMove]
 // [HasVSX, HasDirectMove, IsBigEndian]
 // [HasVSX, HasDirectMove, IsLittleEndian]
@@ -2500,6 +2501,10 @@ let Predicates = [HasVSX, IsBigEndian, HasP8Altivec] in
   def : Pat<(v16i8 (int_ppc_altivec_crypto_vpermxor v16i8:$a,
                                                     v16i8:$b, v16i8:$c)),
             (v16i8 (VPERMXOR $a, $b, $c))>;
+let Predicates = [HasVSX, HasP8Altivec] in
+  def : Pat<(v16i8 (int_ppc_altivec_crypto_vpermxor_be v16i8:$a,
+                                                       v16i8:$b, v16i8:$c)),
+            (v16i8 (VPERMXOR $a, $b, $c))>;
 
 let AddedComplexity = 400 in {
 // Valid for any VSX subtarget, regardless of endianness.

diff  --git a/llvm/test/CodeGen/PowerPC/crypto_bifs_be.ll b/llvm/test/CodeGen/PowerPC/crypto_bifs_be.ll
new file mode 100644
index 0000000000000..153d0206a39c9
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/crypto_bifs_be.ll
@@ -0,0 +1,165 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr8 < %s | FileCheck %s --check-prefixes=CHECK-LE-P8
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr9 < %s | FileCheck %s --check-prefixes=CHECK-P9
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=pwr8 < %s | FileCheck %s --check-prefixes=CHECK-BE-P8
+
+define <16 x i8> @test_vpermxorb() local_unnamed_addr {
+; CHECK-LE-P8-LABEL: test_vpermxorb:
+; CHECK-LE-P8:       # %bb.0: # %entry
+; CHECK-LE-P8-NEXT:    addis 3, 2, .LCPI0_0 at toc@ha
+; CHECK-LE-P8-NEXT:    addis 4, 2, .LCPI0_1 at toc@ha
+; CHECK-LE-P8-NEXT:    addi 3, 3, .LCPI0_0 at toc@l
+; CHECK-LE-P8-NEXT:    lvx 2, 0, 3
+; CHECK-LE-P8-NEXT:    addi 3, 4, .LCPI0_1 at toc@l
+; CHECK-LE-P8-NEXT:    lvx 3, 0, 3
+; CHECK-LE-P8-NEXT:    vpermxor 2, 3, 2, 2
+; CHECK-LE-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test_vpermxorb:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    addis 3, 2, .LCPI0_0 at toc@ha
+; CHECK-P9-NEXT:    addi 3, 3, .LCPI0_0 at toc@l
+; CHECK-P9-NEXT:    lxv 34, 0(3)
+; CHECK-P9-NEXT:    addis 3, 2, .LCPI0_1 at toc@ha
+; CHECK-P9-NEXT:    addi 3, 3, .LCPI0_1 at toc@l
+; CHECK-P9-NEXT:    lxv 35, 0(3)
+; CHECK-P9-NEXT:    vpermxor 2, 3, 2, 2
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-P8-LABEL: test_vpermxorb:
+; CHECK-BE-P8:       # %bb.0: # %entry
+; CHECK-BE-P8-NEXT:    addis 3, 2, .LCPI0_0 at toc@ha
+; CHECK-BE-P8-NEXT:    addis 4, 2, .LCPI0_1 at toc@ha
+; CHECK-BE-P8-NEXT:    addi 3, 3, .LCPI0_0 at toc@l
+; CHECK-BE-P8-NEXT:    addi 4, 4, .LCPI0_1 at toc@l
+; CHECK-BE-P8-NEXT:    lxvw4x 34, 0, 3
+; CHECK-BE-P8-NEXT:    lxvw4x 35, 0, 4
+; CHECK-BE-P8-NEXT:    vpermxor 2, 3, 2, 2
+; CHECK-BE-P8-NEXT:    blr
+entry:
+  %0 = tail call <16 x i8> @llvm.ppc.altivec.crypto.vpermxor.be(<16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16>, <16 x i8> <i8 113, i8 114, i8 115, i8 116, i8 117, i8 118, i8 119, i8 120, i8 121, i8 122, i8 123, i8 124, i8 125, i8 126, i8 127, i8 112>, <16 x i8> <i8 113, i8 114, i8 115, i8 116, i8 117, i8 118, i8 119, i8 120, i8 121, i8 122, i8 123, i8 124, i8 125, i8 126, i8 127, i8 112>)
+  ret <16 x i8> %0
+}
+
+declare <16 x i8> @llvm.ppc.altivec.crypto.vpermxor.be(<16 x i8>, <16 x i8>, <16 x i8>)
+
+define <8 x i16> @test_vpermxorh() local_unnamed_addr {
+; CHECK-LE-P8-LABEL: test_vpermxorh:
+; CHECK-LE-P8:       # %bb.0: # %entry
+; CHECK-LE-P8-NEXT:    addis 3, 2, .LCPI1_0 at toc@ha
+; CHECK-LE-P8-NEXT:    addis 4, 2, .LCPI1_1 at toc@ha
+; CHECK-LE-P8-NEXT:    addi 3, 3, .LCPI1_0 at toc@l
+; CHECK-LE-P8-NEXT:    lvx 2, 0, 3
+; CHECK-LE-P8-NEXT:    addi 3, 4, .LCPI1_1 at toc@l
+; CHECK-LE-P8-NEXT:    lvx 3, 0, 3
+; CHECK-LE-P8-NEXT:    vpermxor 2, 3, 2, 2
+; CHECK-LE-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test_vpermxorh:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    addis 3, 2, .LCPI1_0 at toc@ha
+; CHECK-P9-NEXT:    addi 3, 3, .LCPI1_0 at toc@l
+; CHECK-P9-NEXT:    lxv 34, 0(3)
+; CHECK-P9-NEXT:    addis 3, 2, .LCPI1_1 at toc@ha
+; CHECK-P9-NEXT:    addi 3, 3, .LCPI1_1 at toc@l
+; CHECK-P9-NEXT:    lxv 35, 0(3)
+; CHECK-P9-NEXT:    vpermxor 2, 3, 2, 2
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-P8-LABEL: test_vpermxorh:
+; CHECK-BE-P8:       # %bb.0: # %entry
+; CHECK-BE-P8-NEXT:    addis 3, 2, .LCPI1_0 at toc@ha
+; CHECK-BE-P8-NEXT:    addis 4, 2, .LCPI1_1 at toc@ha
+; CHECK-BE-P8-NEXT:    addi 3, 3, .LCPI1_0 at toc@l
+; CHECK-BE-P8-NEXT:    addi 4, 4, .LCPI1_1 at toc@l
+; CHECK-BE-P8-NEXT:    lxvw4x 34, 0, 3
+; CHECK-BE-P8-NEXT:    lxvw4x 35, 0, 4
+; CHECK-BE-P8-NEXT:    vpermxor 2, 3, 2, 2
+; CHECK-BE-P8-NEXT:    blr
+entry:
+  %0 = tail call <16 x i8> @llvm.ppc.altivec.crypto.vpermxor.be(<16 x i8> <i8 2, i8 1, i8 4, i8 3, i8 6, i8 5, i8 8, i8 7, i8 10, i8 9, i8 12, i8 11, i8 14, i8 13, i8 16, i8 15>, <16 x i8> <i8 114, i8 113, i8 116, i8 115, i8 118, i8 117, i8 120, i8 119, i8 122, i8 121, i8 124, i8 123, i8 126, i8 125, i8 112, i8 127>, <16 x i8> <i8 114, i8 113, i8 116, i8 115, i8 118, i8 117, i8 120, i8 119, i8 122, i8 121, i8 124, i8 123, i8 126, i8 125, i8 112, i8 127>)
+  %1 = bitcast <16 x i8> %0 to <8 x i16>
+  ret <8 x i16> %1
+}
+
+define <4 x i32> @test_vpermxorw() local_unnamed_addr {
+; CHECK-LE-P8-LABEL: test_vpermxorw:
+; CHECK-LE-P8:       # %bb.0: # %entry
+; CHECK-LE-P8-NEXT:    addis 3, 2, .LCPI2_0 at toc@ha
+; CHECK-LE-P8-NEXT:    addis 4, 2, .LCPI2_1 at toc@ha
+; CHECK-LE-P8-NEXT:    addi 3, 3, .LCPI2_0 at toc@l
+; CHECK-LE-P8-NEXT:    lvx 2, 0, 3
+; CHECK-LE-P8-NEXT:    addi 3, 4, .LCPI2_1 at toc@l
+; CHECK-LE-P8-NEXT:    lvx 3, 0, 3
+; CHECK-LE-P8-NEXT:    vpermxor 2, 3, 2, 2
+; CHECK-LE-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test_vpermxorw:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    addis 3, 2, .LCPI2_0 at toc@ha
+; CHECK-P9-NEXT:    addi 3, 3, .LCPI2_0 at toc@l
+; CHECK-P9-NEXT:    lxv 34, 0(3)
+; CHECK-P9-NEXT:    addis 3, 2, .LCPI2_1 at toc@ha
+; CHECK-P9-NEXT:    addi 3, 3, .LCPI2_1 at toc@l
+; CHECK-P9-NEXT:    lxv 35, 0(3)
+; CHECK-P9-NEXT:    vpermxor 2, 3, 2, 2
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-P8-LABEL: test_vpermxorw:
+; CHECK-BE-P8:       # %bb.0: # %entry
+; CHECK-BE-P8-NEXT:    addis 3, 2, .LCPI2_0 at toc@ha
+; CHECK-BE-P8-NEXT:    addis 4, 2, .LCPI2_1 at toc@ha
+; CHECK-BE-P8-NEXT:    addi 3, 3, .LCPI2_0 at toc@l
+; CHECK-BE-P8-NEXT:    addi 4, 4, .LCPI2_1 at toc@l
+; CHECK-BE-P8-NEXT:    lxvw4x 34, 0, 3
+; CHECK-BE-P8-NEXT:    lxvw4x 35, 0, 4
+; CHECK-BE-P8-NEXT:    vpermxor 2, 3, 2, 2
+; CHECK-BE-P8-NEXT:    blr
+entry:
+  %0 = tail call <16 x i8> @llvm.ppc.altivec.crypto.vpermxor.be(<16 x i8> <i8 4, i8 3, i8 2, i8 1, i8 8, i8 7, i8 6, i8 5, i8 12, i8 11, i8 10, i8 9, i8 16, i8 15, i8 14, i8 13>, <16 x i8> <i8 116, i8 115, i8 114, i8 113, i8 120, i8 119, i8 118, i8 117, i8 124, i8 123, i8 122, i8 121, i8 112, i8 127, i8 126, i8 125>, <16 x i8> <i8 116, i8 115, i8 114, i8 113, i8 120, i8 119, i8 118, i8 117, i8 124, i8 123, i8 122, i8 121, i8 112, i8 127, i8 126, i8 125>)
+  %1 = bitcast <16 x i8> %0 to <4 x i32>
+  ret <4 x i32> %1
+}
+
+define <2 x i64> @test_vpermxord() local_unnamed_addr {
+; CHECK-LE-P8-LABEL: test_vpermxord:
+; CHECK-LE-P8:       # %bb.0: # %entry
+; CHECK-LE-P8-NEXT:    addis 3, 2, .LCPI3_0 at toc@ha
+; CHECK-LE-P8-NEXT:    addis 4, 2, .LCPI3_1 at toc@ha
+; CHECK-LE-P8-NEXT:    addi 3, 3, .LCPI3_0 at toc@l
+; CHECK-LE-P8-NEXT:    lvx 2, 0, 3
+; CHECK-LE-P8-NEXT:    addi 3, 4, .LCPI3_1 at toc@l
+; CHECK-LE-P8-NEXT:    lvx 3, 0, 3
+; CHECK-LE-P8-NEXT:    vpermxor 2, 3, 2, 2
+; CHECK-LE-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test_vpermxord:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    addis 3, 2, .LCPI3_0 at toc@ha
+; CHECK-P9-NEXT:    addi 3, 3, .LCPI3_0 at toc@l
+; CHECK-P9-NEXT:    lxv 34, 0(3)
+; CHECK-P9-NEXT:    addis 3, 2, .LCPI3_1 at toc@ha
+; CHECK-P9-NEXT:    addi 3, 3, .LCPI3_1 at toc@l
+; CHECK-P9-NEXT:    lxv 35, 0(3)
+; CHECK-P9-NEXT:    vpermxor 2, 3, 2, 2
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-P8-LABEL: test_vpermxord:
+; CHECK-BE-P8:       # %bb.0: # %entry
+; CHECK-BE-P8-NEXT:    addis 3, 2, .LCPI3_0 at toc@ha
+; CHECK-BE-P8-NEXT:    addis 4, 2, .LCPI3_1 at toc@ha
+; CHECK-BE-P8-NEXT:    addi 3, 3, .LCPI3_0 at toc@l
+; CHECK-BE-P8-NEXT:    addi 4, 4, .LCPI3_1 at toc@l
+; CHECK-BE-P8-NEXT:    lxvw4x 34, 0, 3
+; CHECK-BE-P8-NEXT:    lxvw4x 35, 0, 4
+; CHECK-BE-P8-NEXT:    vpermxor 2, 3, 2, 2
+; CHECK-BE-P8-NEXT:    blr
+entry:
+  %0 = tail call <16 x i8> @llvm.ppc.altivec.crypto.vpermxor.be(<16 x i8> <i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 16, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9>, <16 x i8> <i8 120, i8 119, i8 118, i8 117, i8 116, i8 115, i8 114, i8 113, i8 112, i8 127, i8 126, i8 125, i8 124, i8 123, i8 122, i8 121>, <16 x i8> <i8 120, i8 119, i8 118, i8 117, i8 116, i8 115, i8 114, i8 113, i8 112, i8 127, i8 126, i8 125, i8 124, i8 123, i8 122, i8 121>)
+  %1 = bitcast <16 x i8> %0 to <2 x i64>
+  ret <2 x i64> %1
+}
+


        


More information about the cfe-commits mailing list