[llvm] 74790a5 - [PowerPC] Implement Truncate and Store VSX Vector Builtins

Amy Kwan via llvm-commits llvm-commits at lists.llvm.org
Fri Jul 24 17:23:10 PDT 2020


Author: Amy Kwan
Date: 2020-07-24T19:22:39-05:00
New Revision: 74790a5dde9ae01b7e96bea0b2596ef37b5325bd

URL: https://github.com/llvm/llvm-project/commit/74790a5dde9ae01b7e96bea0b2596ef37b5325bd
DIFF: https://github.com/llvm/llvm-project/commit/74790a5dde9ae01b7e96bea0b2596ef37b5325bd.diff

LOG: [PowerPC] Implement Truncate and Store VSX Vector Builtins

This patch implements the `vec_xst_trunc` function in altivec.h in  order to
utilize the Store VSX Vector Rightmost [byte | half | word | doubleword] Indexed
instructions introduced in Power10.

Differential Revision: https://reviews.llvm.org/D82467

Added: 
    

Modified: 
    clang/lib/Headers/altivec.h
    clang/test/CodeGen/builtins-ppc-p10vector.c
    llvm/lib/Target/PowerPC/PPCInstrPrefix.td
    llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll

Removed: 
    


################################################################################
diff  --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h
index ac5f43836316..4e25ec118072 100644
--- a/clang/lib/Headers/altivec.h
+++ b/clang/lib/Headers/altivec.h
@@ -16597,6 +16597,58 @@ static inline __ATTRS_o_ai void vec_xst(vector unsigned __int128 __vec,
 }
 #endif
 
+/* vec_xst_trunc */
+
+#if defined(__POWER10_VECTOR__) && defined(__VSX__)
+static inline __ATTRS_o_ai void vec_xst_trunc(vector signed __int128 __vec,
+                                              signed long long __offset,
+                                              signed char *__ptr) {
+  *(__ptr + __offset) = (signed char)__vec[0];
+}
+
+static inline __ATTRS_o_ai void vec_xst_trunc(vector unsigned __int128 __vec,
+                                              signed long long __offset,
+                                              unsigned char *__ptr) {
+  *(__ptr + __offset) = (unsigned char)__vec[0];
+}
+
+static inline __ATTRS_o_ai void vec_xst_trunc(vector signed __int128 __vec,
+                                              signed long long __offset,
+                                              signed short *__ptr) {
+  *(__ptr + __offset) = (signed short)__vec[0];
+}
+
+static inline __ATTRS_o_ai void vec_xst_trunc(vector unsigned __int128 __vec,
+                                              signed long long __offset,
+                                              unsigned short *__ptr) {
+  *(__ptr + __offset) = (unsigned short)__vec[0];
+}
+
+static inline __ATTRS_o_ai void vec_xst_trunc(vector signed __int128 __vec,
+                                              signed long long __offset,
+                                              signed int *__ptr) {
+  *(__ptr + __offset) = (signed int)__vec[0];
+}
+
+static inline __ATTRS_o_ai void vec_xst_trunc(vector unsigned __int128 __vec,
+                                              signed long long __offset,
+                                              unsigned int *__ptr) {
+  *(__ptr + __offset) = (unsigned int)__vec[0];
+}
+
+static inline __ATTRS_o_ai void vec_xst_trunc(vector signed __int128 __vec,
+                                              signed long long __offset,
+                                              signed long long *__ptr) {
+  *(__ptr + __offset) = (signed long long)__vec[0];
+}
+
+static inline __ATTRS_o_ai void vec_xst_trunc(vector unsigned __int128 __vec,
+                                              signed long long __offset,
+                                              unsigned long long *__ptr) {
+  *(__ptr + __offset) = (unsigned long long)__vec[0];
+}
+#endif
+
 /* vec_xst_be */
 
 #ifdef __LITTLE_ENDIAN__

diff  --git a/clang/test/CodeGen/builtins-ppc-p10vector.c b/clang/test/CodeGen/builtins-ppc-p10vector.c
index 6f38ac77ee24..2182a19f2452 100644
--- a/clang/test/CodeGen/builtins-ppc-p10vector.c
+++ b/clang/test/CodeGen/builtins-ppc-p10vector.c
@@ -582,6 +582,54 @@ vector float test_vec_vec_splati_ins_f(void) {
   return vec_splati_ins(vfa, 0, 1.0f);
 }
 
+void test_vec_xst_trunc_sc(vector signed __int128 __a, signed long long __b,
+                           signed char *__c) {
+  // CHECK: store i8 %{{.+}}, i8* %{{.+}}, align 1
+  vec_xst_trunc(__a, __b, __c);
+}
+
+void test_vec_xst_trunc_uc(vector unsigned __int128 __a, signed long long __b,
+                           unsigned char *__c) {
+  // CHECK: store i8 %{{.+}}, i8* %{{.+}}, align 1
+  vec_xst_trunc(__a, __b, __c);
+}
+
+void test_vec_xst_trunc_ss(vector signed __int128 __a, signed long long __b,
+                           signed short *__c) {
+  // CHECK: store i16 %{{.+}}, i16* %{{.+}}, align 2
+  vec_xst_trunc(__a, __b, __c);
+}
+
+void test_vec_xst_trunc_us(vector unsigned __int128 __a, signed long long __b,
+                           unsigned short *__c) {
+  // CHECK: store i16 %{{.+}}, i16* %{{.+}}, align 2
+  vec_xst_trunc(__a, __b, __c);
+}
+
+void test_vec_xst_trunc_si(vector signed __int128 __a, signed long long __b,
+                           signed int *__c) {
+  // CHECK: store i32 %{{.+}}, i32* %{{.+}}, align 4
+  vec_xst_trunc(__a, __b, __c);
+}
+
+void test_vec_xst_trunc_ui(vector unsigned __int128 __a, signed long long __b,
+                           unsigned int *__c) {
+  // CHECK: store i32 %{{.+}}, i32* %{{.+}}, align 4
+  vec_xst_trunc(__a, __b, __c);
+}
+
+void test_vec_xst_trunc_sll(vector signed __int128 __a, signed long long __b,
+                            signed long long *__c) {
+  // CHECK: store i64 %{{.+}}, i64* %{{.+}}, align 8
+  vec_xst_trunc(__a, __b, __c);
+}
+
+void test_vec_xst_trunc_ull(vector unsigned __int128 __a, signed long long __b,
+                            unsigned long long *__c) {
+  // CHECK: store i64 %{{.+}}, i64* %{{.+}}, align 8
+  vec_xst_trunc(__a, __b, __c);
+}
+
 int test_vec_test_lsbb_all_ones(void) {
   // CHECK: @llvm.ppc.vsx.xvtlsbb(<16 x i8> %{{.+}}, i32 1
   // CHECK-NEXT: ret i32

diff  --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
index b468a8f318ee..4e048ee9930e 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -1100,6 +1100,17 @@ let Predicates = [IsISA3_1] in {
             (EXTRACT_SUBREG (XVTLSBB (COPY_TO_REGCLASS $XB, VSRC)), sub_eq)>;
 }
 
+let AddedComplexity = 400, Predicates = [IsISA3_1] in {
+  def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$rS, 0)), xoaddr:$src),
+            (STXVRBX (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$src)>;
+  def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$rS, 0)), xoaddr:$src),
+            (STXVRHX (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$src)>;
+  def : Pat<(store (i32 (vector_extract v4i32:$rS, 0)), xoaddr:$src),
+            (STXVRWX (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$src)>;
+  def : Pat<(store (i64 (vector_extract v2i64:$rS, 0)), xoaddr:$src),
+            (STXVRDX (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$src)>;
+}
+
 let AddedComplexity = 400, Predicates = [PrefixInstrs] in {
  def : Pat<(v4i32 (build_vector i32immNonAllOneNonZero:$A,
                                 i32immNonAllOneNonZero:$A,

diff  --git a/llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll b/llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll
index 2ac1b2b7514b..faddb5b4cc7f 100644
--- a/llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll
+++ b/llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll
@@ -2,9 +2,12 @@
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
 ; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
 ; RUN:   FileCheck %s
-; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O0 \
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
 ; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
 ; RUN:   FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O0 \
+; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN:   FileCheck %s --check-prefix=CHECK-O0
 
 ; These test cases aims to test the builtins for the Power10 VSX vector
 ; instructions introduced in ISA 3.1.
@@ -19,6 +22,14 @@ define signext i32 @test_vec_test_lsbb_all_ones(<16 x i8> %vuca) {
 ; CHECK-NEXT:    srwi r3, r3, 31
 ; CHECK-NEXT:    extsw r3, r3
 ; CHECK-NEXT:    blr
+;
+; CHECK-O0-LABEL: test_vec_test_lsbb_all_ones:
+; CHECK-O0:       # %bb.0: # %entry
+; CHECK-O0-NEXT:    xvtlsbb cr0, v2
+; CHECK-O0-NEXT:    mfocrf r3, 128
+; CHECK-O0-NEXT:    srwi r3, r3, 31
+; CHECK-O0-NEXT:    extsw r3, r3
+; CHECK-O0-NEXT:    blr
 entry:
   %0 = tail call i32 @llvm.ppc.vsx.xvtlsbb(<16 x i8> %vuca, i32 1)
   ret i32 %0
@@ -32,7 +43,199 @@ define signext i32 @test_vec_test_lsbb_all_zeros(<16 x i8> %vuca) {
 ; CHECK-NEXT:    rlwinm r3, r3, 3, 31, 31
 ; CHECK-NEXT:    extsw r3, r3
 ; CHECK-NEXT:    blr
+;
+; CHECK-O0-LABEL: test_vec_test_lsbb_all_zeros:
+; CHECK-O0:       # %bb.0: # %entry
+; CHECK-O0-NEXT:    xvtlsbb cr0, v2
+; CHECK-O0-NEXT:    mfocrf r3, 128
+; CHECK-O0-NEXT:    rlwinm r3, r3, 3, 31, 31
+; CHECK-O0-NEXT:    extsw r3, r3
+; CHECK-O0-NEXT:    blr
 entry:
   %0 = tail call i32 @llvm.ppc.vsx.xvtlsbb(<16 x i8> %vuca, i32 0)
   ret i32 %0
 }
+
+define void @vec_xst_trunc_sc(<1 x i128> %__vec, i64 %__offset, i8* nocapture %__ptr) {
+; CHECK-LABEL: vec_xst_trunc_sc:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    stxvrbx v2, r6, r5
+; CHECK-NEXT:    blr
+;
+; CHECK-O0-LABEL: vec_xst_trunc_sc:
+; CHECK-O0:       # %bb.0: # %entry
+; CHECK-O0-NEXT:    li r3, 0
+; CHECK-O0-NEXT:    vextubrx r3, r3, v2
+; CHECK-O0-NEXT:    # kill: def $r3 killed $r3 killed $x3
+; CHECK-O0-NEXT:    add r4, r6, r5
+; CHECK-O0-NEXT:    stb r3, 0(r4)
+; CHECK-O0-NEXT:    blr
+entry:
+  %0 = bitcast <1 x i128> %__vec to <16 x i8>
+  %conv = extractelement <16 x i8> %0, i32 0
+  %add.ptr = getelementptr inbounds i8, i8* %__ptr, i64 %__offset
+  store i8 %conv, i8* %add.ptr, align 1
+  ret void
+}
+
+define void @vec_xst_trunc_uc(<1 x i128> %__vec, i64 %__offset, i8* nocapture %__ptr) {
+; CHECK-LABEL: vec_xst_trunc_uc:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    stxvrbx v2, r6, r5
+; CHECK-NEXT:    blr
+;
+; CHECK-O0-LABEL: vec_xst_trunc_uc:
+; CHECK-O0:       # %bb.0: # %entry
+; CHECK-O0-NEXT:    li r3, 0
+; CHECK-O0-NEXT:    vextubrx r3, r3, v2
+; CHECK-O0-NEXT:    # kill: def $r3 killed $r3 killed $x3
+; CHECK-O0-NEXT:    add r4, r6, r5
+; CHECK-O0-NEXT:    stb r3, 0(r4)
+; CHECK-O0-NEXT:    blr
+entry:
+  %0 = bitcast <1 x i128> %__vec to <16 x i8>
+  %conv = extractelement <16 x i8> %0, i32 0
+  %add.ptr = getelementptr inbounds i8, i8* %__ptr, i64 %__offset
+  store i8 %conv, i8* %add.ptr, align 1
+  ret void
+}
+
+define void @vec_xst_trunc_ss(<1 x i128> %__vec, i64 %__offset, i16* nocapture %__ptr) {
+; CHECK-LABEL: vec_xst_trunc_ss:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sldi r3, r5, 1
+; CHECK-NEXT:    stxvrhx v2, r6, r3
+; CHECK-NEXT:    blr
+;
+; CHECK-O0-LABEL: vec_xst_trunc_ss:
+; CHECK-O0:       # %bb.0: # %entry
+; CHECK-O0-NEXT:    li r3, 0
+; CHECK-O0-NEXT:    vextuhrx r3, r3, v2
+; CHECK-O0-NEXT:    # kill: def $r3 killed $r3 killed $x3
+; CHECK-O0-NEXT:    sldi r4, r5, 1
+; CHECK-O0-NEXT:    add r4, r6, r4
+; CHECK-O0-NEXT:    sth r3, 0(r4)
+; CHECK-O0-NEXT:    blr
+entry:
+  %0 = bitcast <1 x i128> %__vec to <8 x i16>
+  %conv = extractelement <8 x i16> %0, i32 0
+  %add.ptr = getelementptr inbounds i16, i16* %__ptr, i64 %__offset
+  store i16 %conv, i16* %add.ptr, align 2
+  ret void
+}
+
+define void @vec_xst_trunc_us(<1 x i128> %__vec, i64 %__offset, i16* nocapture %__ptr) {
+; CHECK-LABEL: vec_xst_trunc_us:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sldi r3, r5, 1
+; CHECK-NEXT:    stxvrhx v2, r6, r3
+; CHECK-NEXT:    blr
+;
+; CHECK-O0-LABEL: vec_xst_trunc_us:
+; CHECK-O0:       # %bb.0: # %entry
+; CHECK-O0-NEXT:    li r3, 0
+; CHECK-O0-NEXT:    vextuhrx r3, r3, v2
+; CHECK-O0-NEXT:    # kill: def $r3 killed $r3 killed $x3
+; CHECK-O0-NEXT:    sldi r4, r5, 1
+; CHECK-O0-NEXT:    add r4, r6, r4
+; CHECK-O0-NEXT:    sth r3, 0(r4)
+; CHECK-O0-NEXT:    blr
+entry:
+  %0 = bitcast <1 x i128> %__vec to <8 x i16>
+  %conv = extractelement <8 x i16> %0, i32 0
+  %add.ptr = getelementptr inbounds i16, i16* %__ptr, i64 %__offset
+  store i16 %conv, i16* %add.ptr, align 2
+  ret void
+}
+
+define void @vec_xst_trunc_si(<1 x i128> %__vec, i64 %__offset, i32* nocapture %__ptr) {
+; CHECK-LABEL: vec_xst_trunc_si:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sldi r3, r5, 2
+; CHECK-NEXT:    stxvrwx v2, r6, r3
+; CHECK-NEXT:    blr
+;
+; CHECK-O0-LABEL: vec_xst_trunc_si:
+; CHECK-O0:       # %bb.0: # %entry
+; CHECK-O0-NEXT:    li r3, 0
+; CHECK-O0-NEXT:    vextuwrx r3, r3, v2
+; CHECK-O0-NEXT:    # kill: def $r3 killed $r3 killed $x3
+; CHECK-O0-NEXT:    sldi r4, r5, 2
+; CHECK-O0-NEXT:    add r4, r6, r4
+; CHECK-O0-NEXT:    stw r3, 0(r4)
+; CHECK-O0-NEXT:    blr
+entry:
+  %0 = bitcast <1 x i128> %__vec to <4 x i32>
+  %conv = extractelement <4 x i32> %0, i32 0
+  %add.ptr = getelementptr inbounds i32, i32* %__ptr, i64 %__offset
+  store i32 %conv, i32* %add.ptr, align 4
+  ret void
+}
+
+define void @vec_xst_trunc_ui(<1 x i128> %__vec, i64 %__offset, i32* nocapture %__ptr) {
+; CHECK-LABEL: vec_xst_trunc_ui:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sldi r3, r5, 2
+; CHECK-NEXT:    stxvrwx v2, r6, r3
+; CHECK-NEXT:    blr
+;
+; CHECK-O0-LABEL: vec_xst_trunc_ui:
+; CHECK-O0:       # %bb.0: # %entry
+; CHECK-O0-NEXT:    li r3, 0
+; CHECK-O0-NEXT:    vextuwrx r3, r3, v2
+; CHECK-O0-NEXT:    # kill: def $r3 killed $r3 killed $x3
+; CHECK-O0-NEXT:    sldi r4, r5, 2
+; CHECK-O0-NEXT:    add r4, r6, r4
+; CHECK-O0-NEXT:    stw r3, 0(r4)
+; CHECK-O0-NEXT:    blr
+entry:
+  %0 = bitcast <1 x i128> %__vec to <4 x i32>
+  %conv = extractelement <4 x i32> %0, i32 0
+  %add.ptr = getelementptr inbounds i32, i32* %__ptr, i64 %__offset
+  store i32 %conv, i32* %add.ptr, align 4
+  ret void
+}
+
+define void @vec_xst_trunc_sll(<1 x i128> %__vec, i64 %__offset, i64* nocapture %__ptr)  {
+; CHECK-LABEL: vec_xst_trunc_sll:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sldi r3, r5, 3
+; CHECK-NEXT:    stxvrdx v2, r6, r3
+; CHECK-NEXT:    blr
+;
+; CHECK-O0-LABEL: vec_xst_trunc_sll:
+; CHECK-O0:       # %bb.0: # %entry
+; CHECK-O0-NEXT:    mfvsrld r3, v2
+; CHECK-O0-NEXT:    sldi r4, r5, 3
+; CHECK-O0-NEXT:    add r4, r6, r4
+; CHECK-O0-NEXT:    std r3, 0(r4)
+; CHECK-O0-NEXT:    blr
+entry:
+  %0 = bitcast <1 x i128> %__vec to <2 x i64>
+  %conv = extractelement <2 x i64> %0, i32 0
+  %add.ptr = getelementptr inbounds i64, i64* %__ptr, i64 %__offset
+  store i64 %conv, i64* %add.ptr, align 8
+  ret void
+}
+
+define void @vec_xst_trunc_ull(<1 x i128> %__vec, i64 %__offset, i64* nocapture %__ptr)  {
+; CHECK-LABEL: vec_xst_trunc_ull:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sldi r3, r5, 3
+; CHECK-NEXT:    stxvrdx v2, r6, r3
+; CHECK-NEXT:    blr
+;
+; CHECK-O0-LABEL: vec_xst_trunc_ull:
+; CHECK-O0:       # %bb.0: # %entry
+; CHECK-O0-NEXT:    mfvsrld r3, v2
+; CHECK-O0-NEXT:    sldi r4, r5, 3
+; CHECK-O0-NEXT:    add r4, r6, r4
+; CHECK-O0-NEXT:    std r3, 0(r4)
+; CHECK-O0-NEXT:    blr
+entry:
+  %0 = bitcast <1 x i128> %__vec to <2 x i64>
+  %conv = extractelement <2 x i64> %0, i32 0
+  %add.ptr = getelementptr inbounds i64, i64* %__ptr, i64 %__offset
+  store i64 %conv, i64* %add.ptr, align 8
+  ret void
+}


        


More information about the llvm-commits mailing list