[llvm] 8a58f21 - [PowerPC][Power10] Exploit store rightmost vector element instructions

Albion Fung via llvm-commits llvm-commits at lists.llvm.org
Tue Dec 22 09:06:57 PST 2020


Author: Kamau Bridgeman
Date: 2020-12-22T12:06:43-05:00
New Revision: 8a58f21f5b6c228137a9b87906fe5b720c4d1dfb

URL: https://github.com/llvm/llvm-project/commit/8a58f21f5b6c228137a9b87906fe5b720c4d1dfb
DIFF: https://github.com/llvm/llvm-project/commit/8a58f21f5b6c228137a9b87906fe5b720c4d1dfb.diff

LOG: [PowerPC][Power10] Exploit store rightmost vector element instructions

Using the store rightmost vector element instructions to do vector
element extraction and store. The rightmost vector element on little
endian is the zeroth vector element, with these patterns that element
can be extracted and stored in one instruction for all vector types.

Differential Revision: https://reviews.llvm.org/D89195

Added: 
    llvm/test/CodeGen/PowerPC/store-rightmost-vector-elt.ll

Modified: 
    llvm/lib/Target/PowerPC/PPCInstrPrefix.td
    llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
index 54e9adae40d7..e7fa2affb730 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -2554,16 +2554,21 @@ let Predicates = [IsISA3_1, HasVSX] in {
             (COPY_TO_REGCLASS (XVCVBF16SPN RCCp.AToVSRC), VRRC)>;
 }
 
-let AddedComplexity = 400, Predicates = [IsISA3_1] in {
-  def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$rS, 0)), xoaddr:$src),
-            (STXVRBX (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$src)>;
-  def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$rS, 0)), xoaddr:$src),
-            (STXVRHX (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$src)>;
-  def : Pat<(store (i32 (vector_extract v4i32:$rS, 0)), xoaddr:$src),
-            (STXVRWX (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$src)>;
-  def : Pat<(store (i64 (vector_extract v2i64:$rS, 0)), xoaddr:$src),
-            (STXVRDX (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$src)>;
-}
+let AddedComplexity = 400, Predicates = [IsISA3_1, IsLittleEndian] in {
+  // Store element 0 of a VSX register to memory
+  def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$src, 0)), xoaddr:$dst),
+            (STXVRBX (COPY_TO_REGCLASS v16i8:$src, VSRC), xoaddr:$dst)>;
+  def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$src, 0)), xoaddr:$dst),
+            (STXVRHX (COPY_TO_REGCLASS v8i16:$src, VSRC), xoaddr:$dst)>;
+  def : Pat<(store (i32 (extractelt v4i32:$src, 0)), xoaddr:$dst),
+            (STXVRWX (COPY_TO_REGCLASS v4i32:$src, VSRC), xoaddr:$dst)>;
+  def : Pat<(store (f32 (extractelt v4f32:$src, 0)), xoaddr:$dst),
+            (STXVRWX (COPY_TO_REGCLASS v4f32:$src, VSRC), xoaddr:$dst)>;
+  def : Pat<(store (i64 (extractelt v2i64:$src, 0)), xoaddr:$dst),
+            (STXVRDX (COPY_TO_REGCLASS v2i64:$src, VSRC), xoaddr:$dst)>;
+  def : Pat<(store (f64 (extractelt v2f64:$src, 0)), xoaddr:$dst),
+            (STXVRDX (COPY_TO_REGCLASS v2f64:$src, VSRC), xoaddr:$dst)>;
+ }
 
 class xxevalPattern <dag pattern, bits<8> imm> :
   Pat<(v4i32 pattern), (XXEVAL $vA, $vB, $vC, imm)> {}

diff  --git a/llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll b/llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll
index 17617e90a01f..9e8f8d073a1a 100644
--- a/llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll
+++ b/llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll
@@ -1,13 +1,13 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
 ; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
-; RUN:   FileCheck %s
+; RUN:   FileCheck %s --check-prefixes=CHECK,CHECK-LE
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
 ; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
-; RUN:   FileCheck %s
+; RUN:   FileCheck %s --check-prefixes=CHECK,CHECK-BE
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O0 \
 ; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
-; RUN:   FileCheck %s --check-prefix=CHECK-O0
+; RUN:   FileCheck %s --check-prefixes=CHECK,CHECK-O0
 
 ; These test cases aims to test the builtins for the Power10 VSX vector
 ; instructions introduced in ISA 3.1.
@@ -22,14 +22,6 @@ define signext i32 @test_vec_test_lsbb_all_ones(<16 x i8> %vuca) {
 ; CHECK-NEXT:    srwi r3, r3, 31
 ; CHECK-NEXT:    extsw r3, r3
 ; CHECK-NEXT:    blr
-;
-; CHECK-O0-LABEL: test_vec_test_lsbb_all_ones:
-; CHECK-O0:       # %bb.0: # %entry
-; CHECK-O0-NEXT:    xvtlsbb cr0, v2
-; CHECK-O0-NEXT:    mfocrf r3, 128
-; CHECK-O0-NEXT:    srwi r3, r3, 31
-; CHECK-O0-NEXT:    extsw r3, r3
-; CHECK-O0-NEXT:    blr
 entry:
   %0 = tail call i32 @llvm.ppc.vsx.xvtlsbb(<16 x i8> %vuca, i32 1)
   ret i32 %0
@@ -43,24 +35,22 @@ define signext i32 @test_vec_test_lsbb_all_zeros(<16 x i8> %vuca) {
 ; CHECK-NEXT:    rlwinm r3, r3, 3, 31, 31
 ; CHECK-NEXT:    extsw r3, r3
 ; CHECK-NEXT:    blr
-;
-; CHECK-O0-LABEL: test_vec_test_lsbb_all_zeros:
-; CHECK-O0:       # %bb.0: # %entry
-; CHECK-O0-NEXT:    xvtlsbb cr0, v2
-; CHECK-O0-NEXT:    mfocrf r3, 128
-; CHECK-O0-NEXT:    rlwinm r3, r3, 3, 31, 31
-; CHECK-O0-NEXT:    extsw r3, r3
-; CHECK-O0-NEXT:    blr
 entry:
   %0 = tail call i32 @llvm.ppc.vsx.xvtlsbb(<16 x i8> %vuca, i32 0)
   ret i32 %0
 }
 
 define void @vec_xst_trunc_sc(<1 x i128> %__vec, i64 %__offset, i8* nocapture %__ptr) {
-; CHECK-LABEL: vec_xst_trunc_sc:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    stxvrbx v2, r6, r5
-; CHECK-NEXT:    blr
+; CHECK-LE-LABEL: vec_xst_trunc_sc:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    stxvrbx v2, r6, r5
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: vec_xst_trunc_sc:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    vsldoi v2, v2, v2, 9
+; CHECK-BE-NEXT:    stxsibx v2, r6, r5
+; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-O0-LABEL: vec_xst_trunc_sc:
 ; CHECK-O0:       # %bb.0: # %entry
@@ -79,10 +69,16 @@ entry:
 }
 
 define void @vec_xst_trunc_uc(<1 x i128> %__vec, i64 %__offset, i8* nocapture %__ptr) {
-; CHECK-LABEL: vec_xst_trunc_uc:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    stxvrbx v2, r6, r5
-; CHECK-NEXT:    blr
+; CHECK-LE-LABEL: vec_xst_trunc_uc:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    stxvrbx v2, r6, r5
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: vec_xst_trunc_uc:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    vsldoi v2, v2, v2, 9
+; CHECK-BE-NEXT:    stxsibx v2, r6, r5
+; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-O0-LABEL: vec_xst_trunc_uc:
 ; CHECK-O0:       # %bb.0: # %entry
@@ -101,11 +97,18 @@ entry:
 }
 
 define void @vec_xst_trunc_ss(<1 x i128> %__vec, i64 %__offset, i16* nocapture %__ptr) {
-; CHECK-LABEL: vec_xst_trunc_ss:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    sldi r3, r5, 1
-; CHECK-NEXT:    stxvrhx v2, r6, r3
-; CHECK-NEXT:    blr
+; CHECK-LE-LABEL: vec_xst_trunc_ss:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    sldi r3, r5, 1
+; CHECK-LE-NEXT:    stxvrhx v2, r6, r3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: vec_xst_trunc_ss:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    vsldoi v2, v2, v2, 10
+; CHECK-BE-NEXT:    sldi r3, r5, 1
+; CHECK-BE-NEXT:    stxsihx v2, r6, r3
+; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-O0-LABEL: vec_xst_trunc_ss:
 ; CHECK-O0:       # %bb.0: # %entry
@@ -125,11 +128,18 @@ entry:
 }
 
 define void @vec_xst_trunc_us(<1 x i128> %__vec, i64 %__offset, i16* nocapture %__ptr) {
-; CHECK-LABEL: vec_xst_trunc_us:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    sldi r3, r5, 1
-; CHECK-NEXT:    stxvrhx v2, r6, r3
-; CHECK-NEXT:    blr
+; CHECK-LE-LABEL: vec_xst_trunc_us:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    sldi r3, r5, 1
+; CHECK-LE-NEXT:    stxvrhx v2, r6, r3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: vec_xst_trunc_us:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    vsldoi v2, v2, v2, 10
+; CHECK-BE-NEXT:    sldi r3, r5, 1
+; CHECK-BE-NEXT:    stxsihx v2, r6, r3
+; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-O0-LABEL: vec_xst_trunc_us:
 ; CHECK-O0:       # %bb.0: # %entry
@@ -149,11 +159,18 @@ entry:
 }
 
 define void @vec_xst_trunc_si(<1 x i128> %__vec, i64 %__offset, i32* nocapture %__ptr) {
-; CHECK-LABEL: vec_xst_trunc_si:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    sldi r3, r5, 2
-; CHECK-NEXT:    stxvrwx v2, r6, r3
-; CHECK-NEXT:    blr
+; CHECK-LE-LABEL: vec_xst_trunc_si:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    sldi r3, r5, 2
+; CHECK-LE-NEXT:    stxvrwx v2, r6, r3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: vec_xst_trunc_si:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    xxsldwi vs0, v2, v2, 3
+; CHECK-BE-NEXT:    sldi r3, r5, 2
+; CHECK-BE-NEXT:    stfiwx f0, r6, r3
+; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-O0-LABEL: vec_xst_trunc_si:
 ; CHECK-O0:       # %bb.0: # %entry
@@ -173,11 +190,18 @@ entry:
 }
 
 define void @vec_xst_trunc_ui(<1 x i128> %__vec, i64 %__offset, i32* nocapture %__ptr) {
-; CHECK-LABEL: vec_xst_trunc_ui:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    sldi r3, r5, 2
-; CHECK-NEXT:    stxvrwx v2, r6, r3
-; CHECK-NEXT:    blr
+; CHECK-LE-LABEL: vec_xst_trunc_ui:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    sldi r3, r5, 2
+; CHECK-LE-NEXT:    stxvrwx v2, r6, r3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: vec_xst_trunc_ui:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    xxsldwi vs0, v2, v2, 3
+; CHECK-BE-NEXT:    sldi r3, r5, 2
+; CHECK-BE-NEXT:    stfiwx f0, r6, r3
+; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-O0-LABEL: vec_xst_trunc_ui:
 ; CHECK-O0:       # %bb.0: # %entry
@@ -197,11 +221,17 @@ entry:
 }
 
 define void @vec_xst_trunc_sll(<1 x i128> %__vec, i64 %__offset, i64* nocapture %__ptr)  {
-; CHECK-LABEL: vec_xst_trunc_sll:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    sldi r3, r5, 3
-; CHECK-NEXT:    stxvrdx v2, r6, r3
-; CHECK-NEXT:    blr
+; CHECK-LE-LABEL: vec_xst_trunc_sll:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    sldi r3, r5, 3
+; CHECK-LE-NEXT:    stxvrdx v2, r6, r3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: vec_xst_trunc_sll:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    sldi r3, r5, 3
+; CHECK-BE-NEXT:    stxsdx v2, r6, r3
+; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-O0-LABEL: vec_xst_trunc_sll:
 ; CHECK-O0:       # %bb.0: # %entry
@@ -219,11 +249,17 @@ entry:
 }
 
 define void @vec_xst_trunc_ull(<1 x i128> %__vec, i64 %__offset, i64* nocapture %__ptr)  {
-; CHECK-LABEL: vec_xst_trunc_ull:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    sldi r3, r5, 3
-; CHECK-NEXT:    stxvrdx v2, r6, r3
-; CHECK-NEXT:    blr
+; CHECK-LE-LABEL: vec_xst_trunc_ull:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    sldi r3, r5, 3
+; CHECK-LE-NEXT:    stxvrdx v2, r6, r3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: vec_xst_trunc_ull:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    sldi r3, r5, 3
+; CHECK-BE-NEXT:    stxsdx v2, r6, r3
+; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-O0-LABEL: vec_xst_trunc_ull:
 ; CHECK-O0:       # %bb.0: # %entry
@@ -245,11 +281,6 @@ define dso_local <1 x i128> @vec_xl_zext(i64 %__offset, i8* nocapture readonly %
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    lxvrbx v2, r4, r3
 ; CHECK-NEXT:    blr
-;
-; CHECK-O0-LABEL: vec_xl_zext:
-; CHECK-O0:       # %bb.0: # %entry
-; CHECK-O0-NEXT:    lxvrbx v2, r4, r3
-; CHECK-O0-NEXT:    blr
 entry:
   %add.ptr = getelementptr inbounds i8, i8* %__pointer, i64 %__offset
   %0 = load i8, i8* %add.ptr, align 1
@@ -264,12 +295,6 @@ define dso_local <1 x i128> @vec_xl_zext_short(i64 %__offset, i16* nocapture rea
 ; CHECK-NEXT:    sldi r3, r3, 1
 ; CHECK-NEXT:    lxvrhx v2, r4, r3
 ; CHECK-NEXT:    blr
-;
-; CHECK-O0-LABEL: vec_xl_zext_short:
-; CHECK-O0:       # %bb.0: # %entry
-; CHECK-O0-NEXT:    sldi r3, r3, 1
-; CHECK-O0-NEXT:    lxvrhx v2, r4, r3
-; CHECK-O0-NEXT:    blr
 entry:
   %add.ptr = getelementptr inbounds i16, i16* %__pointer, i64 %__offset
   %0 = load i16, i16* %add.ptr, align 2
@@ -284,12 +309,6 @@ define dso_local <1 x i128> @vec_xl_zext_word(i64 %__offset, i32* nocapture read
 ; CHECK-NEXT:    sldi r3, r3, 2
 ; CHECK-NEXT:    lxvrwx v2, r4, r3
 ; CHECK-NEXT:    blr
-;
-; CHECK-O0-LABEL: vec_xl_zext_word:
-; CHECK-O0:       # %bb.0: # %entry
-; CHECK-O0-NEXT:    sldi r3, r3, 2
-; CHECK-O0-NEXT:    lxvrwx v2, r4, r3
-; CHECK-O0-NEXT:    blr
 entry:
   %add.ptr = getelementptr inbounds i32, i32* %__pointer, i64 %__offset
   %0 = load i32, i32* %add.ptr, align 4
@@ -304,12 +323,6 @@ define dso_local <1 x i128> @vec_xl_zext_dw(i64 %__offset, i64* nocapture readon
 ; CHECK-NEXT:    sldi r3, r3, 3
 ; CHECK-NEXT:    lxvrdx v2, r4, r3
 ; CHECK-NEXT:    blr
-;
-; CHECK-O0-LABEL: vec_xl_zext_dw:
-; CHECK-O0:       # %bb.0: # %entry
-; CHECK-O0-NEXT:    sldi r3, r3, 3
-; CHECK-O0-NEXT:    lxvrdx v2, r4, r3
-; CHECK-O0-NEXT:    blr
 entry:
   %add.ptr = getelementptr inbounds i64, i64* %__pointer, i64 %__offset
   %0 = load i64, i64* %add.ptr, align 8
@@ -319,13 +332,21 @@ entry:
 }
 
 define dso_local <1 x i128> @vec_xl_sext_b(i64 %offset, i8* %p) {
-; CHECK-LABEL: vec_xl_sext_b:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    lbzx r3, r4, r3
-; CHECK-NEXT:    extsb r3, r3
-; CHECK-NEXT:    sradi r4, r3, 63
-; CHECK-NEXT:    mtvsrdd v2, r4, r3
-; CHECK-NEXT:    blr
+; CHECK-LE-LABEL: vec_xl_sext_b:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    lbzx r3, r4, r3
+; CHECK-LE-NEXT:    extsb r3, r3
+; CHECK-LE-NEXT:    sradi r4, r3, 63
+; CHECK-LE-NEXT:    mtvsrdd v2, r4, r3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: vec_xl_sext_b:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lbzx r3, r4, r3
+; CHECK-BE-NEXT:    extsb r3, r3
+; CHECK-BE-NEXT:    sradi r4, r3, 63
+; CHECK-BE-NEXT:    mtvsrdd v2, r4, r3
+; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-O0-LABEL: vec_xl_sext_b:
 ; CHECK-O0:       # %bb.0: # %entry
@@ -343,13 +364,21 @@ entry:
 }
 
 define dso_local <1 x i128> @vec_xl_sext_h(i64 %offset, i16* %p) {
-; CHECK-LABEL: vec_xl_sext_h:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    sldi r3, r3, 1
-; CHECK-NEXT:    lhax r3, r4, r3
-; CHECK-NEXT:    sradi r4, r3, 63
-; CHECK-NEXT:    mtvsrdd v2, r4, r3
-; CHECK-NEXT:    blr
+; CHECK-LE-LABEL: vec_xl_sext_h:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    sldi r3, r3, 1
+; CHECK-LE-NEXT:    lhax r3, r4, r3
+; CHECK-LE-NEXT:    sradi r4, r3, 63
+; CHECK-LE-NEXT:    mtvsrdd v2, r4, r3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: vec_xl_sext_h:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    sldi r3, r3, 1
+; CHECK-BE-NEXT:    lhax r3, r4, r3
+; CHECK-BE-NEXT:    sradi r4, r3, 63
+; CHECK-BE-NEXT:    mtvsrdd v2, r4, r3
+; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-O0-LABEL: vec_xl_sext_h:
 ; CHECK-O0:       # %bb.0: # %entry
@@ -367,13 +396,21 @@ entry:
 }
 
 define dso_local <1 x i128> @vec_xl_sext_w(i64 %offset, i32* %p) {
-; CHECK-LABEL: vec_xl_sext_w:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    sldi r3, r3, 2
-; CHECK-NEXT:    lwax r3, r4, r3
-; CHECK-NEXT:    sradi r4, r3, 63
-; CHECK-NEXT:    mtvsrdd v2, r4, r3
-; CHECK-NEXT:    blr
+; CHECK-LE-LABEL: vec_xl_sext_w:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    sldi r3, r3, 2
+; CHECK-LE-NEXT:    lwax r3, r4, r3
+; CHECK-LE-NEXT:    sradi r4, r3, 63
+; CHECK-LE-NEXT:    mtvsrdd v2, r4, r3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: vec_xl_sext_w:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    sldi r3, r3, 2
+; CHECK-BE-NEXT:    lwax r3, r4, r3
+; CHECK-BE-NEXT:    sradi r4, r3, 63
+; CHECK-BE-NEXT:    mtvsrdd v2, r4, r3
+; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-O0-LABEL: vec_xl_sext_w:
 ; CHECK-O0:       # %bb.0: # %entry
@@ -391,13 +428,21 @@ entry:
 }
 
 define dso_local <1 x i128> @vec_xl_sext_d(i64 %offset, i64* %p) {
-; CHECK-LABEL: vec_xl_sext_d:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    sldi r3, r3, 3
-; CHECK-NEXT:    ldx r3, r4, r3
-; CHECK-NEXT:    sradi r4, r3, 63
-; CHECK-NEXT:    mtvsrdd v2, r4, r3
-; CHECK-NEXT:    blr
+; CHECK-LE-LABEL: vec_xl_sext_d:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    sldi r3, r3, 3
+; CHECK-LE-NEXT:    ldx r3, r4, r3
+; CHECK-LE-NEXT:    sradi r4, r3, 63
+; CHECK-LE-NEXT:    mtvsrdd v2, r4, r3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: vec_xl_sext_d:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    sldi r3, r3, 3
+; CHECK-BE-NEXT:    ldx r3, r4, r3
+; CHECK-BE-NEXT:    sradi r4, r3, 63
+; CHECK-BE-NEXT:    mtvsrdd v2, r4, r3
+; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-O0-LABEL: vec_xl_sext_d:
 ; CHECK-O0:       # %bb.0: # %entry

diff  --git a/llvm/test/CodeGen/PowerPC/store-rightmost-vector-elt.ll b/llvm/test/CodeGen/PowerPC/store-rightmost-vector-elt.ll
new file mode 100644
index 000000000000..5fbcafecfb3d
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/store-rightmost-vector-elt.ll
@@ -0,0 +1,109 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:     -mcpu=pwr10 -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
+; RUN:     < %s | FileCheck %s --check-prefix=CHECK-LE
+
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:     -mcpu=pwr10 -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
+; RUN:     < %s | FileCheck %s --check-prefix=CHECK-BE
+
+define void @test1(<4 x i32> %A, i32* %a) {
+; CHECK-LE-LABEL: test1:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    stxvrwx v2, 0, r5
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test1:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    xxsldwi vs0, v2, v2, 3
+; CHECK-BE-NEXT:    stfiwx f0, 0, r5
+; CHECK-BE-NEXT:    blr
+entry:
+  %vecext = extractelement <4 x i32> %A, i32 0
+  store i32 %vecext, i32* %a, align 4
+  ret void
+}
+
+define void @test2(<4 x float> %A, float* %a) {
+; CHECK-LE-LABEL: test2:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    stxvrwx v2, 0, r5
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test2:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    xxsldwi vs0, v2, v2, 3
+; CHECK-BE-NEXT:    stfiwx f0, 0, r5
+; CHECK-BE-NEXT:    blr
+entry:
+  %vecext = extractelement <4 x float> %A, i32 0
+  store float %vecext, float* %a, align 4
+  ret void
+}
+
+define void @test3(<2 x double> %A, double* %a) {
+; CHECK-LE-LABEL: test3:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    stxvrdx v2, 0, r5
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test3:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    stxsd v2, 0(r5)
+; CHECK-BE-NEXT:    blr
+entry:
+  %vecext = extractelement <2 x double> %A, i32 0
+  store double %vecext, double* %a, align 8
+  ret void
+}
+
+define void @test4(<2 x i64> %A, i64* %a) {
+; CHECK-LE-LABEL: test4:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    stxvrdx v2, 0, r5
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test4:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    stxsd v2, 0(r5)
+; CHECK-BE-NEXT:    blr
+entry:
+  %vecext = extractelement <2 x i64> %A, i32 0
+  store i64 %vecext, i64* %a, align 8
+  ret void
+}
+
+define void @test5(<8 x i16> %A, i16* %a) {
+; CHECK-LE-LABEL: test5:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    stxvrhx v2, 0, r5
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test5:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    vsldoi v2, v2, v2, 10
+; CHECK-BE-NEXT:    stxsihx v2, 0, r5
+; CHECK-BE-NEXT:    blr
+entry:
+  %vecext = extractelement <8 x i16> %A, i32 0
+  store i16 %vecext, i16* %a, align 2
+  ret void
+}
+
+define void @test6(<16 x i8> %A, i8* %a) {
+; CHECK-LE-LABEL: test6:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    stxvrbx v2, 0, r5
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test6:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    vsldoi v2, v2, v2, 9
+; CHECK-BE-NEXT:    stxsibx v2, 0, r5
+; CHECK-BE-NEXT:    blr
+entry:
+  %vecext = extractelement <16 x i8> %A, i32 0
+  store i8 %vecext, i8* %a, align 1
+  ret void
+}
+


        


More information about the llvm-commits mailing list