[llvm] r249822 - Vector element extraction without stack operations on Power 8
Nemanja Ivanovic via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 9 04:12:19 PDT 2015
Author: nemanjai
Date: Fri Oct 9 06:12:18 2015
New Revision: 249822
URL: http://llvm.org/viewvc/llvm-project?rev=249822&view=rev
Log:
Vector element extraction without stack operations on Power 8
This patch corresponds to review:
http://reviews.llvm.org/D12032
This patch builds onto the patch that provided scalar to vector conversions
without stack operations (D11471).
Included in this patch:
- Vector element extraction for all vector types with constant element number
- Vector element extraction for v16i8 and v8i16 with variable element number
- Removal of some unnecessary COPY_TO_REGCLASS operations that ended up
unnecessarily moving things around between registers
Not included in this patch (will be in upcoming patch):
- Vector element extraction for v4i32, v4f32, v2i64 and v2f64 with
variable element number
- Vector element insertion for variable/constant element number
Testing is provided for all extractions. The extractions that are not
implemented yet are just placeholders.
Modified:
llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td
llvm/trunk/lib/Target/PowerPC/PPCVSXCopy.cpp
llvm/trunk/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll
Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp?rev=249822&r1=249821&r2=249822&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp Fri Oct 9 06:12:18 2015
@@ -543,14 +543,21 @@ PPCTargetLowering::PPCTargetLowering(con
if (Subtarget.hasVSX()) {
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
- if (Subtarget.hasP8Vector())
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
+ if (Subtarget.hasP8Vector()) {
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal);
+ }
if (Subtarget.hasDirectMove()) {
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Legal);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Legal);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Legal);
// FIXME: this is causing bootstrap failures, disable temporarily
//setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Legal);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Legal);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Legal);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal);
}
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
Modified: llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td?rev=249822&r1=249821&r2=249822&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td Fri Oct 9 06:12:18 2015
@@ -1237,59 +1237,397 @@ let Predicates = [HasDirectMove, HasVSX]
[(set f64:$XT, (PPCmtvsrz i32:$rA))]>;
} // HasDirectMove, HasVSX
-/* Direct moves of various size entities from GPR's into VSR's. Each lines
+/* Direct moves of various widths from GPR's into VSR's. Each move lines
the value up into element 0 (both BE and LE). Namely, entities smaller than
a doubleword are shifted left and moved for BE. For LE, they're moved, then
swapped to go into the least significant element of the VSR.
*/
-def Moves {
- dag BE_BYTE_0 = (MTVSRD
- (RLDICR
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32), 56, 7));
- dag BE_HALF_0 = (MTVSRD
- (RLDICR
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32), 48, 15));
- dag BE_WORD_0 = (MTVSRD
- (RLDICR
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32), 32, 31));
+def MovesToVSR {
+ dag BE_BYTE_0 =
+ (MTVSRD
+ (RLDICR
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32), 56, 7));
+ dag BE_HALF_0 =
+ (MTVSRD
+ (RLDICR
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32), 48, 15));
+ dag BE_WORD_0 =
+ (MTVSRD
+ (RLDICR
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32), 32, 31));
dag BE_DWORD_0 = (MTVSRD $A);
dag LE_MTVSRW = (MTVSRD (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32));
- dag LE_WORD_1 = (v2i64 (COPY_TO_REGCLASS LE_MTVSRW, VSRC));
+ dag LE_WORD_1 = (v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
+ LE_MTVSRW, sub_64));
dag LE_WORD_0 = (XXPERMDI LE_WORD_1, LE_WORD_1, 2);
- dag LE_DWORD_1 = (v2i64 (COPY_TO_REGCLASS BE_DWORD_0, VSRC));
+ dag LE_DWORD_1 = (v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
+ BE_DWORD_0, sub_64));
dag LE_DWORD_0 = (XXPERMDI LE_DWORD_1, LE_DWORD_1, 2);
}
+/* Direct moves of various widths from VSR's to GPR's. Each moves the
+ respective element out of the VSR and ensures that it is lined up
+ to the right side of the GPR. In addition to the extraction from positions
+ specified by a constant, a pattern for extracting from a variable position
+ is provided. This is useful when the element number is not known at
+ compile time.
+ The numbering for the DAG's is for LE, but when used on BE, the correct
+ LE element can just be used (i.e. LE_BYTE_2 == BE_BYTE_13).
+*/
+def MovesFromVSR {
+ // Doubleword extraction
+ dag LE_DWORD_0 =
+ (MFVSRD
+ (EXTRACT_SUBREG
+ (XXPERMDI (COPY_TO_REGCLASS $S, VSRC),
+ (COPY_TO_REGCLASS $S, VSRC), 2), sub_64));
+ dag LE_DWORD_1 = (MFVSRD
+ (EXTRACT_SUBREG
+ (v2i64 (COPY_TO_REGCLASS $S, VSRC)), sub_64));
+
+ // Word extraction
+ dag LE_WORD_0 = (MFVSRWZ (EXTRACT_SUBREG (XXSLDWI $S, $S, 2), sub_64));
+ dag LE_WORD_1 = (MFVSRWZ (EXTRACT_SUBREG (XXSLDWI $S, $S, 1), sub_64));
+ dag LE_WORD_2 = (MFVSRWZ (EXTRACT_SUBREG
+ (v2i64 (COPY_TO_REGCLASS $S, VSRC)), sub_64));
+ dag LE_WORD_3 = (MFVSRWZ (EXTRACT_SUBREG (XXSLDWI $S, $S, 3), sub_64));
+
+ // Halfword extraction
+ dag LE_HALF_0 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 0, 48), sub_32));
+ dag LE_HALF_1 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 48, 48), sub_32));
+ dag LE_HALF_2 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 32, 48), sub_32));
+ dag LE_HALF_3 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 16, 48), sub_32));
+ dag LE_HALF_4 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 0, 48), sub_32));
+ dag LE_HALF_5 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 48, 48), sub_32));
+ dag LE_HALF_6 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 32, 48), sub_32));
+ dag LE_HALF_7 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 16, 48), sub_32));
+
+ // Byte extraction
+ dag LE_BYTE_0 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 0, 56), sub_32));
+ dag LE_BYTE_1 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 56, 56), sub_32));
+ dag LE_BYTE_2 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 48, 56), sub_32));
+ dag LE_BYTE_3 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 40, 56), sub_32));
+ dag LE_BYTE_4 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 32, 56), sub_32));
+ dag LE_BYTE_5 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 24, 56), sub_32));
+ dag LE_BYTE_6 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 16, 56), sub_32));
+ dag LE_BYTE_7 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 8, 56), sub_32));
+ dag LE_BYTE_8 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 0, 56), sub_32));
+ dag LE_BYTE_9 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 56, 56), sub_32));
+ dag LE_BYTE_10 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 48, 56), sub_32));
+ dag LE_BYTE_11 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 40, 56), sub_32));
+ dag LE_BYTE_12 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 32, 56), sub_32));
+ dag LE_BYTE_13 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 24, 56), sub_32));
+ dag LE_BYTE_14 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 16, 56), sub_32));
+ dag LE_BYTE_15 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 8, 56), sub_32));
+
+ /* Variable element number (BE and LE patterns must be specified separately)
+ This is a rather involved process.
+
+ Conceptually, this is how the move is accomplished:
+ 1. Identify which doubleword contains the element
+ 2. Shift in the VMX register so that the correct doubleword is correctly
+ lined up for the MFVSRD
+ 3. Perform the move so that the element (along with some extra stuff)
+ is in the GPR
+ 4. Right shift within the GPR so that the element is right-justified
+
+ Of course, the index is an element number which has a different meaning
+ on LE/BE so the patterns have to be specified separately.
+
+ Note: The final result will be the element right-justified with high
+ order bits being arbitrarily defined (namely, whatever was in the
+ vector register to the left of the value originally).
+ */
+
+ /* LE variable byte
+ Number 1. above:
+ - For elements 0-7, we shift left by 8 bytes since they're on the right
+ - For elements 8-15, we need not shift (shift left by zero bytes)
+ This is accomplished by inverting the bits of the index and AND-ing
+ with 0x8 (i.e. clearing all bits of the index and inverting bit 60).
+ */
+ dag LE_VBYTE_PERM_VEC = (LVSL ZERO8, (ANDC8 (LI8 8), $Idx));
+
+ // Number 2. above:
+ // - Now that we set up the shift amount, we shift in the VMX register
+ dag LE_VBYTE_PERMUTE = (VPERM $S, $S, LE_VBYTE_PERM_VEC);
+
+ // Number 3. above:
+ // - The doubleword containing our element is moved to a GPR
+ dag LE_MV_VBYTE = (MFVSRD
+ (EXTRACT_SUBREG
+ (v2i64 (COPY_TO_REGCLASS LE_VBYTE_PERMUTE, VSRC)),
+ sub_64));
+
+ /* Number 4. above:
+ - Truncate the element number to the range 0-7 (8-15 are symmetrical
+ and out of range values are truncated accordingly)
+ - Multiply by 8 as we need to shift right by the number of bits, not bytes
+ - Shift right in the GPR by the calculated value
+ */
+ dag LE_VBYTE_SHIFT = (EXTRACT_SUBREG (RLDICR (AND8 (LI8 7), $Idx), 3, 60),
+ sub_32);
+ dag LE_VARIABLE_BYTE = (EXTRACT_SUBREG (SRD LE_MV_VBYTE, LE_VBYTE_SHIFT),
+ sub_32);
+
+ /* BE variable byte
+ The algorithm here is the same as the LE variable byte except:
+ - The shift in the VMX register is by 0/8 for opposite element numbers so
+ we simply AND the element number with 0x8
+ - The order of elements after the move to GPR is reversed, so we invert
+ the bits of the index prior to truncating to the range 0-7
+ */
+ dag BE_VBYTE_PERM_VEC = (LVSL ZERO8, (ANDIo8 $Idx, 8));
+ dag BE_VBYTE_PERMUTE = (VPERM $S, $S, BE_VBYTE_PERM_VEC);
+ dag BE_MV_VBYTE = (MFVSRD
+ (EXTRACT_SUBREG
+ (v2i64 (COPY_TO_REGCLASS BE_VBYTE_PERMUTE, VSRC)),
+ sub_64));
+ dag BE_VBYTE_SHIFT = (EXTRACT_SUBREG (RLDICR (ANDC8 (LI8 7), $Idx), 3, 60),
+ sub_32);
+ dag BE_VARIABLE_BYTE = (EXTRACT_SUBREG (SRD BE_MV_VBYTE, BE_VBYTE_SHIFT),
+ sub_32);
+
+ /* LE variable halfword
+ Number 1. above:
+ - For elements 0-3, we shift left by 8 since they're on the right
+ - For elements 4-7, we need not shift (shift left by zero bytes)
+ Similarly to the byte pattern, we invert the bits of the index, but we
+ AND with 0x4 (i.e. clear all bits of the index and invert bit 61).
+ Of course, the shift is still by 8 bytes, so we must multiply by 2.
+ */
+ dag LE_VHALF_PERM_VEC = (LVSL ZERO8, (RLDICR (ANDC8 (LI8 4), $Idx), 1, 62));
+
+ // Number 2. above:
+ // - Now that we set up the shift amount, we shift in the VMX register
+ dag LE_VHALF_PERMUTE = (VPERM $S, $S, LE_VHALF_PERM_VEC);
+
+ // Number 3. above:
+ // - The doubleword containing our element is moved to a GPR
+ dag LE_MV_VHALF = (MFVSRD
+ (EXTRACT_SUBREG
+ (v2i64 (COPY_TO_REGCLASS LE_VHALF_PERMUTE, VSRC)),
+ sub_64));
+
+ /* Number 4. above:
+ - Truncate the element number to the range 0-3 (4-7 are symmetrical
+ and out of range values are truncated accordingly)
+ - Multiply by 16 as we need to shift right by the number of bits
+ - Shift right in the GPR by the calculated value
+ */
+ dag LE_VHALF_SHIFT = (EXTRACT_SUBREG (RLDICR (AND8 (LI8 3), $Idx), 4, 59),
+ sub_32);
+ dag LE_VARIABLE_HALF = (EXTRACT_SUBREG (SRD LE_MV_VHALF, LE_VHALF_SHIFT),
+ sub_32);
+
+ /* BE variable halfword
+ The algorithm here is the same as the LE variable halfword except:
+ - The shift in the VMX register is by 0/8 for opposite element numbers so
+ we simply AND the element number with 0x4 and multiply by 2
+ - The order of elements after the move to GPR is reversed, so we invert
+ the bits of the index prior to truncating to the range 0-3
+ */
+ dag BE_VHALF_PERM_VEC = (LVSL ZERO8, (RLDICR (ANDIo8 $Idx, 4), 1, 62));
+ dag BE_VHALF_PERMUTE = (VPERM $S, $S, BE_VHALF_PERM_VEC);
+ dag BE_MV_VHALF = (MFVSRD
+ (EXTRACT_SUBREG
+ (v2i64 (COPY_TO_REGCLASS BE_VHALF_PERMUTE, VSRC)),
+ sub_64));
+ dag BE_VHALF_SHIFT = (EXTRACT_SUBREG (RLDICR (ANDC8 (LI8 3), $Idx), 4, 60),
+ sub_32);
+ dag BE_VARIABLE_HALF = (EXTRACT_SUBREG (SRD BE_MV_VHALF, BE_VHALF_SHIFT),
+ sub_32);
+}
+
+// v4f32 scalar <-> vector conversions (BE)
let Predicates = [IsBigEndian, HasP8Vector] in {
def : Pat<(v4f32 (scalar_to_vector f32:$A)),
(v4f32 (XSCVDPSPN $A))>;
+ def : Pat<(f32 (vector_extract v4f32:$S, 0)),
+ (f32 (XSCVSPDPN $S))>;
+ def : Pat<(f32 (vector_extract v4f32:$S, 1)),
+ (f32 (XSCVSPDPN (XXSLDWI $S, $S, 1)))>;
+ def : Pat<(f32 (vector_extract v4f32:$S, 2)),
+ (f32 (XSCVSPDPN (XXSLDWI $S, $S, 2)))>;
+ def : Pat<(f32 (vector_extract v4f32:$S, 3)),
+ (f32 (XSCVSPDPN (XXSLDWI $S, $S, 3)))>;
} // IsBigEndian, HasP8Vector
let Predicates = [IsBigEndian, HasDirectMove] in {
+ // v16i8 scalar <-> vector conversions (BE)
def : Pat<(v16i8 (scalar_to_vector i32:$A)),
- (v16i8 (COPY_TO_REGCLASS Moves.BE_BYTE_0, VSRC))>;
+ (v16i8 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_BYTE_0, sub_64))>;
def : Pat<(v8i16 (scalar_to_vector i32:$A)),
- (v8i16 (COPY_TO_REGCLASS Moves.BE_HALF_0, VSRC))>;
+ (v8i16 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_HALF_0, sub_64))>;
def : Pat<(v4i32 (scalar_to_vector i32:$A)),
- (v4i32 (COPY_TO_REGCLASS Moves.BE_WORD_0, VSRC))>;
+ (v4i32 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_WORD_0, sub_64))>;
def : Pat<(v2i64 (scalar_to_vector i64:$A)),
- (v2i64 (COPY_TO_REGCLASS Moves.BE_DWORD_0, VSRC))>;
+ (v2i64 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_DWORD_0, sub_64))>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 0)),
+ (i32 MovesFromVSR.LE_BYTE_15)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 1)),
+ (i32 MovesFromVSR.LE_BYTE_14)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 2)),
+ (i32 MovesFromVSR.LE_BYTE_13)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 3)),
+ (i32 MovesFromVSR.LE_BYTE_12)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 4)),
+ (i32 MovesFromVSR.LE_BYTE_11)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 5)),
+ (i32 MovesFromVSR.LE_BYTE_10)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 6)),
+ (i32 MovesFromVSR.LE_BYTE_9)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 7)),
+ (i32 MovesFromVSR.LE_BYTE_8)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 8)),
+ (i32 MovesFromVSR.LE_BYTE_7)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 9)),
+ (i32 MovesFromVSR.LE_BYTE_6)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 10)),
+ (i32 MovesFromVSR.LE_BYTE_5)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 11)),
+ (i32 MovesFromVSR.LE_BYTE_4)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 12)),
+ (i32 MovesFromVSR.LE_BYTE_3)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 13)),
+ (i32 MovesFromVSR.LE_BYTE_2)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 14)),
+ (i32 MovesFromVSR.LE_BYTE_1)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 15)),
+ (i32 MovesFromVSR.LE_BYTE_0)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, i64:$Idx)),
+ (i32 MovesFromVSR.BE_VARIABLE_BYTE)>;
+
+ // v8i16 scalar <-> vector conversions (BE)
+ def : Pat<(i32 (vector_extract v8i16:$S, 0)),
+ (i32 MovesFromVSR.LE_HALF_7)>;
+ def : Pat<(i32 (vector_extract v8i16:$S, 1)),
+ (i32 MovesFromVSR.LE_HALF_6)>;
+ def : Pat<(i32 (vector_extract v8i16:$S, 2)),
+ (i32 MovesFromVSR.LE_HALF_5)>;
+ def : Pat<(i32 (vector_extract v8i16:$S, 3)),
+ (i32 MovesFromVSR.LE_HALF_4)>;
+ def : Pat<(i32 (vector_extract v8i16:$S, 4)),
+ (i32 MovesFromVSR.LE_HALF_3)>;
+ def : Pat<(i32 (vector_extract v8i16:$S, 5)),
+ (i32 MovesFromVSR.LE_HALF_2)>;
+ def : Pat<(i32 (vector_extract v8i16:$S, 6)),
+ (i32 MovesFromVSR.LE_HALF_1)>;
+ def : Pat<(i32 (vector_extract v8i16:$S, 7)),
+ (i32 MovesFromVSR.LE_HALF_0)>;
+ def : Pat<(i32 (vector_extract v8i16:$S, i64:$Idx)),
+ (i32 MovesFromVSR.BE_VARIABLE_HALF)>;
+
+ // v4i32 scalar <-> vector conversions (BE)
+ def : Pat<(i32 (vector_extract v4i32:$S, 0)),
+ (i32 MovesFromVSR.LE_WORD_3)>;
+ def : Pat<(i32 (vector_extract v4i32:$S, 1)),
+ (i32 MovesFromVSR.LE_WORD_2)>;
+ def : Pat<(i32 (vector_extract v4i32:$S, 2)),
+ (i32 MovesFromVSR.LE_WORD_1)>;
+ def : Pat<(i32 (vector_extract v4i32:$S, 3)),
+ (i32 MovesFromVSR.LE_WORD_0)>;
+
+ // v2i64 scalar <-> vector conversions (BE)
+ def : Pat<(i64 (vector_extract v2i64:$S, 0)),
+ (i64 MovesFromVSR.LE_DWORD_1)>;
+ def : Pat<(i64 (vector_extract v2i64:$S, 1)),
+ (i64 MovesFromVSR.LE_DWORD_0)>;
} // IsBigEndian, HasDirectMove
+// v4f32 scalar <-> vector conversions (LE)
let Predicates = [IsLittleEndian, HasP8Vector] in {
def : Pat<(v4f32 (scalar_to_vector f32:$A)),
(v4f32 (XXSLDWI (XSCVDPSPN $A), (XSCVDPSPN $A), 1))>;
+ def : Pat<(f32 (vector_extract v4f32:$S, 0)),
+ (f32 (XSCVSPDPN (XXSLDWI $S, $S, 3)))>;
+ def : Pat<(f32 (vector_extract v4f32:$S, 1)),
+ (f32 (XSCVSPDPN (XXSLDWI $S, $S, 2)))>;
+ def : Pat<(f32 (vector_extract v4f32:$S, 2)),
+ (f32 (XSCVSPDPN (XXSLDWI $S, $S, 1)))>;
+ def : Pat<(f32 (vector_extract v4f32:$S, 3)),
+ (f32 (XSCVSPDPN $S))>;
} // IsLittleEndian, HasP8Vector
let Predicates = [IsLittleEndian, HasDirectMove] in {
+ // v16i8 scalar <-> vector conversions (LE)
def : Pat<(v16i8 (scalar_to_vector i32:$A)),
- (v16i8 (COPY_TO_REGCLASS Moves.LE_WORD_0, VSRC))>;
+ (v16i8 (COPY_TO_REGCLASS MovesToVSR.LE_WORD_0, VSRC))>;
def : Pat<(v8i16 (scalar_to_vector i32:$A)),
- (v8i16 (COPY_TO_REGCLASS Moves.LE_WORD_0, VSRC))>;
+ (v8i16 (COPY_TO_REGCLASS MovesToVSR.LE_WORD_0, VSRC))>;
def : Pat<(v4i32 (scalar_to_vector i32:$A)),
- (v4i32 (COPY_TO_REGCLASS Moves.LE_WORD_0, VSRC))>;
+ (v4i32 MovesToVSR.LE_WORD_0)>;
def : Pat<(v2i64 (scalar_to_vector i64:$A)),
- (v2i64 Moves.LE_DWORD_0)>;
-} // IsLittleEndian, HasDirectMove
+ (v2i64 MovesToVSR.LE_DWORD_0)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 0)),
+ (i32 MovesFromVSR.LE_BYTE_0)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 1)),
+ (i32 MovesFromVSR.LE_BYTE_1)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 2)),
+ (i32 MovesFromVSR.LE_BYTE_2)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 3)),
+ (i32 MovesFromVSR.LE_BYTE_3)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 4)),
+ (i32 MovesFromVSR.LE_BYTE_4)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 5)),
+ (i32 MovesFromVSR.LE_BYTE_5)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 6)),
+ (i32 MovesFromVSR.LE_BYTE_6)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 7)),
+ (i32 MovesFromVSR.LE_BYTE_7)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 8)),
+ (i32 MovesFromVSR.LE_BYTE_8)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 9)),
+ (i32 MovesFromVSR.LE_BYTE_9)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 10)),
+ (i32 MovesFromVSR.LE_BYTE_10)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 11)),
+ (i32 MovesFromVSR.LE_BYTE_11)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 12)),
+ (i32 MovesFromVSR.LE_BYTE_12)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 13)),
+ (i32 MovesFromVSR.LE_BYTE_13)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 14)),
+ (i32 MovesFromVSR.LE_BYTE_14)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 15)),
+ (i32 MovesFromVSR.LE_BYTE_15)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, i64:$Idx)),
+ (i32 MovesFromVSR.LE_VARIABLE_BYTE)>;
+ // v8i16 scalar <-> vector conversions (LE)
+ def : Pat<(i32 (vector_extract v8i16:$S, 0)),
+ (i32 MovesFromVSR.LE_HALF_0)>;
+ def : Pat<(i32 (vector_extract v8i16:$S, 1)),
+ (i32 MovesFromVSR.LE_HALF_1)>;
+ def : Pat<(i32 (vector_extract v8i16:$S, 2)),
+ (i32 MovesFromVSR.LE_HALF_2)>;
+ def : Pat<(i32 (vector_extract v8i16:$S, 3)),
+ (i32 MovesFromVSR.LE_HALF_3)>;
+ def : Pat<(i32 (vector_extract v8i16:$S, 4)),
+ (i32 MovesFromVSR.LE_HALF_4)>;
+ def : Pat<(i32 (vector_extract v8i16:$S, 5)),
+ (i32 MovesFromVSR.LE_HALF_5)>;
+ def : Pat<(i32 (vector_extract v8i16:$S, 6)),
+ (i32 MovesFromVSR.LE_HALF_6)>;
+ def : Pat<(i32 (vector_extract v8i16:$S, 7)),
+ (i32 MovesFromVSR.LE_HALF_7)>;
+ def : Pat<(i32 (vector_extract v8i16:$S, i64:$Idx)),
+ (i32 MovesFromVSR.LE_VARIABLE_HALF)>;
+
+ // v4i32 scalar <-> vector conversions (LE)
+ def : Pat<(i32 (vector_extract v4i32:$S, 0)),
+ (i32 MovesFromVSR.LE_WORD_0)>;
+ def : Pat<(i32 (vector_extract v4i32:$S, 1)),
+ (i32 MovesFromVSR.LE_WORD_1)>;
+ def : Pat<(i32 (vector_extract v4i32:$S, 2)),
+ (i32 MovesFromVSR.LE_WORD_2)>;
+ def : Pat<(i32 (vector_extract v4i32:$S, 3)),
+ (i32 MovesFromVSR.LE_WORD_3)>;
+
+ // v2i64 scalar <-> vector conversions (LE)
+ def : Pat<(i64 (vector_extract v2i64:$S, 0)),
+ (i64 MovesFromVSR.LE_DWORD_0)>;
+ def : Pat<(i64 (vector_extract v2i64:$S, 1)),
+ (i64 MovesFromVSR.LE_DWORD_1)>;
+} // IsLittleEndian, HasDirectMove
Modified: llvm/trunk/lib/Target/PowerPC/PPCVSXCopy.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCVSXCopy.cpp?rev=249822&r1=249821&r2=249822&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCVSXCopy.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCVSXCopy.cpp Fri Oct 9 06:12:18 2015
@@ -128,6 +128,7 @@ protected:
IsVRReg(DstMO.getReg(), MRI) ? &PPC::VSHRCRegClass :
&PPC::VSLRCRegClass;
assert((IsF8Reg(DstMO.getReg(), MRI) ||
+ IsVSFReg(DstMO.getReg(), MRI) ||
IsVRReg(DstMO.getReg(), MRI)) &&
"Unknown destination for a VSX copy");
Modified: llvm/trunk/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll?rev=249822&r1=249821&r2=249822&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll Fri Oct 9 06:12:18 2015
@@ -77,3 +77,1383 @@ entry:
; CHECK-LE: xscvdpspn [[REG1:[0-9]+]], 1
; CHECK-LE: xxsldwi {{[0-9]+}}, [[REG1]], [[REG1]], 1
}
+
+; Function Attrs: nounwind
+define signext i8 @getsc0(<16 x i8> %vsc) {
+entry:
+ %vsc.addr = alloca <16 x i8>, align 16
+ store <16 x i8> %vsc, <16 x i8>* %vsc.addr, align 16
+ %0 = load <16 x i8>, <16 x i8>* %vsc.addr, align 16
+ %vecext = extractelement <16 x i8> %0, i32 0
+ ret i8 %vecext
+; CHECK-LABEL: @getsc0
+; CHECK: mfvsrd 3, 34
+; CHECK: rldicl 3, 3, 8, 56
+; CHECK: extsb 3, 3
+; CHECK-LE-LABEL: @getsc0
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: clrldi 3, 3, 56
+; CHECK-LE: extsb 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i8 @getsc1(<16 x i8> %vsc) {
+entry:
+ %vsc.addr = alloca <16 x i8>, align 16
+ store <16 x i8> %vsc, <16 x i8>* %vsc.addr, align 16
+ %0 = load <16 x i8>, <16 x i8>* %vsc.addr, align 16
+ %vecext = extractelement <16 x i8> %0, i32 1
+ ret i8 %vecext
+; CHECK-LABEL: @getsc1
+; CHECK: mfvsrd 3, 34
+; CHECK: rldicl 3, 3, 16, 56
+; CHECK: extsb 3, 3
+; CHECK-LE-LABEL: @getsc1
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: rldicl 3, 3, 56, 56
+; CHECK-LE: extsb 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i8 @getsc2(<16 x i8> %vsc) {
+entry:
+ %vsc.addr = alloca <16 x i8>, align 16
+ store <16 x i8> %vsc, <16 x i8>* %vsc.addr, align 16
+ %0 = load <16 x i8>, <16 x i8>* %vsc.addr, align 16
+ %vecext = extractelement <16 x i8> %0, i32 2
+ ret i8 %vecext
+; CHECK-LABEL: @getsc2
+; CHECK: mfvsrd 3, 34
+; CHECK: rldicl 3, 3, 24, 56
+; CHECK: extsb 3, 3
+; CHECK-LE-LABEL: @getsc2
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: rldicl 3, 3, 48, 56
+; CHECK-LE: extsb 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i8 @getsc3(<16 x i8> %vsc) {
+entry:
+ %vsc.addr = alloca <16 x i8>, align 16
+ store <16 x i8> %vsc, <16 x i8>* %vsc.addr, align 16
+ %0 = load <16 x i8>, <16 x i8>* %vsc.addr, align 16
+ %vecext = extractelement <16 x i8> %0, i32 3
+ ret i8 %vecext
+; CHECK-LABEL: @getsc3
+; CHECK: mfvsrd 3, 34
+; CHECK: rldicl 3, 3, 32, 56
+; CHECK: extsb 3, 3
+; CHECK-LE-LABEL: @getsc3
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: rldicl 3, 3, 40, 56
+; CHECK-LE: extsb 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i8 @getsc4(<16 x i8> %vsc) {
+entry:
+ %vsc.addr = alloca <16 x i8>, align 16
+ store <16 x i8> %vsc, <16 x i8>* %vsc.addr, align 16
+ %0 = load <16 x i8>, <16 x i8>* %vsc.addr, align 16
+ %vecext = extractelement <16 x i8> %0, i32 4
+ ret i8 %vecext
+; CHECK-LABEL: @getsc4
+; CHECK: mfvsrd 3, 34
+; CHECK: rldicl 3, 3, 40, 56
+; CHECK: extsb 3, 3
+; CHECK-LE-LABEL: @getsc4
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: rldicl 3, 3, 32, 56
+; CHECK-LE: extsb 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i8 @getsc5(<16 x i8> %vsc) {
+entry:
+ %vsc.addr = alloca <16 x i8>, align 16
+ store <16 x i8> %vsc, <16 x i8>* %vsc.addr, align 16
+ %0 = load <16 x i8>, <16 x i8>* %vsc.addr, align 16
+ %vecext = extractelement <16 x i8> %0, i32 5
+ ret i8 %vecext
+; CHECK-LABEL: @getsc5
+; CHECK: mfvsrd 3, 34
+; CHECK: rldicl 3, 3, 48, 56
+; CHECK: extsb 3, 3
+; CHECK-LE-LABEL: @getsc5
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: rldicl 3, 3, 24, 56
+; CHECK-LE: extsb 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i8 @getsc6(<16 x i8> %vsc) {
+entry:
+ %vsc.addr = alloca <16 x i8>, align 16
+ store <16 x i8> %vsc, <16 x i8>* %vsc.addr, align 16
+ %0 = load <16 x i8>, <16 x i8>* %vsc.addr, align 16
+ %vecext = extractelement <16 x i8> %0, i32 6
+ ret i8 %vecext
+; CHECK-LABEL: @getsc6
+; CHECK: mfvsrd 3, 34
+; CHECK: rldicl 3, 3, 56, 56
+; CHECK: extsb 3, 3
+; CHECK-LE-LABEL: @getsc6
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: rldicl 3, 3, 16, 56
+; CHECK-LE: extsb 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i8 @getsc7(<16 x i8> %vsc) {
+entry:
+ %vsc.addr = alloca <16 x i8>, align 16
+ store <16 x i8> %vsc, <16 x i8>* %vsc.addr, align 16
+ %0 = load <16 x i8>, <16 x i8>* %vsc.addr, align 16
+ %vecext = extractelement <16 x i8> %0, i32 7
+ ret i8 %vecext
+; CHECK-LABEL: @getsc7
+; CHECK: mfvsrd 3, 34
+; CHECK: clrldi 3, 3, 56
+; CHECK: extsb 3, 3
+; CHECK-LE-LABEL: @getsc7
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: rldicl 3, 3, 8, 56
+; CHECK-LE: extsb 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i8 @getsc8(<16 x i8> %vsc) {
+entry:
+ %vsc.addr = alloca <16 x i8>, align 16
+ store <16 x i8> %vsc, <16 x i8>* %vsc.addr, align 16
+ %0 = load <16 x i8>, <16 x i8>* %vsc.addr, align 16
+ %vecext = extractelement <16 x i8> %0, i32 8
+ ret i8 %vecext
+; CHECK-LABEL: @getsc8
+; CHECK: mfvsrd 3,
+; CHECK: rldicl 3, 3, 8, 56
+; CHECK: extsb 3, 3
+; CHECK-LE-LABEL: @getsc8
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: clrldi 3, 3, 56
+; CHECK-LE: extsb 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i8 @getsc9(<16 x i8> %vsc) {
+entry:
+ %vsc.addr = alloca <16 x i8>, align 16
+ store <16 x i8> %vsc, <16 x i8>* %vsc.addr, align 16
+ %0 = load <16 x i8>, <16 x i8>* %vsc.addr, align 16
+ %vecext = extractelement <16 x i8> %0, i32 9
+ ret i8 %vecext
+; CHECK-LABEL: @getsc9
+; CHECK: mfvsrd 3,
+; CHECK: rldicl 3, 3, 16, 56
+; CHECK: extsb 3, 3
+; CHECK-LE-LABEL: @getsc9
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: rldicl 3, 3, 56, 56
+; CHECK-LE: extsb 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i8 @getsc10(<16 x i8> %vsc) {
+entry:
+ %vsc.addr = alloca <16 x i8>, align 16
+ store <16 x i8> %vsc, <16 x i8>* %vsc.addr, align 16
+ %0 = load <16 x i8>, <16 x i8>* %vsc.addr, align 16
+ %vecext = extractelement <16 x i8> %0, i32 10
+ ret i8 %vecext
+; CHECK-LABEL: @getsc10
+; CHECK: mfvsrd 3,
+; CHECK: rldicl 3, 3, 24, 56
+; CHECK: extsb 3, 3
+; CHECK-LE-LABEL: @getsc10
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: rldicl 3, 3, 48, 56
+; CHECK-LE: extsb 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i8 @getsc11(<16 x i8> %vsc) {
+entry:
+ %vsc.addr = alloca <16 x i8>, align 16
+ store <16 x i8> %vsc, <16 x i8>* %vsc.addr, align 16
+ %0 = load <16 x i8>, <16 x i8>* %vsc.addr, align 16
+ %vecext = extractelement <16 x i8> %0, i32 11
+ ret i8 %vecext
+; CHECK-LABEL: @getsc11
+; CHECK: mfvsrd 3,
+; CHECK: rldicl 3, 3, 32, 56
+; CHECK: extsb 3, 3
+; CHECK-LE-LABEL: @getsc11
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: rldicl 3, 3, 40, 56
+; CHECK-LE: extsb 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i8 @getsc12(<16 x i8> %vsc) {
+entry:
+ %vsc.addr = alloca <16 x i8>, align 16
+ store <16 x i8> %vsc, <16 x i8>* %vsc.addr, align 16
+ %0 = load <16 x i8>, <16 x i8>* %vsc.addr, align 16
+ %vecext = extractelement <16 x i8> %0, i32 12
+ ret i8 %vecext
+; CHECK-LABEL: @getsc12
+; CHECK: mfvsrd 3,
+; CHECK: rldicl 3, 3, 40, 56
+; CHECK: extsb 3, 3
+; CHECK-LE-LABEL: @getsc12
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: rldicl 3, 3, 32, 56
+; CHECK-LE: extsb 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i8 @getsc13(<16 x i8> %vsc) {
+entry:
+ %vsc.addr = alloca <16 x i8>, align 16
+ store <16 x i8> %vsc, <16 x i8>* %vsc.addr, align 16
+ %0 = load <16 x i8>, <16 x i8>* %vsc.addr, align 16
+ %vecext = extractelement <16 x i8> %0, i32 13
+ ret i8 %vecext
+; CHECK-LABEL: @getsc13
+; CHECK: mfvsrd 3,
+; CHECK: rldicl 3, 3, 48, 56
+; CHECK: extsb 3, 3
+; CHECK-LE-LABEL: @getsc13
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: rldicl 3, 3, 24, 56
+; CHECK-LE: extsb 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i8 @getsc14(<16 x i8> %vsc) {
+entry:
+ %vsc.addr = alloca <16 x i8>, align 16
+ store <16 x i8> %vsc, <16 x i8>* %vsc.addr, align 16
+ %0 = load <16 x i8>, <16 x i8>* %vsc.addr, align 16
+ %vecext = extractelement <16 x i8> %0, i32 14
+ ret i8 %vecext
+; CHECK-LABEL: @getsc14
+; CHECK: mfvsrd 3,
+; CHECK: rldicl 3, 3, 56, 56
+; CHECK: extsb 3, 3
+; CHECK-LE-LABEL: @getsc14
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: rldicl 3, 3, 16, 56
+; CHECK-LE: extsb 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i8 @getsc15(<16 x i8> %vsc) {
+entry:
+ %vsc.addr = alloca <16 x i8>, align 16
+ store <16 x i8> %vsc, <16 x i8>* %vsc.addr, align 16
+ %0 = load <16 x i8>, <16 x i8>* %vsc.addr, align 16
+ %vecext = extractelement <16 x i8> %0, i32 15
+ ret i8 %vecext
+; CHECK-LABEL: @getsc15
+; CHECK: mfvsrd 3,
+; CHECK: extsb 3, 3
+; CHECK-LE-LABEL: @getsc15
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: rldicl 3, 3, 8, 56
+; CHECK-LE: extsb 3, 3
+}
+
+; Function Attrs: nounwind
+define zeroext i8 @getuc0(<16 x i8> %vuc) {
+entry:
+ %vuc.addr = alloca <16 x i8>, align 16
+ store <16 x i8> %vuc, <16 x i8>* %vuc.addr, align 16
+ %0 = load <16 x i8>, <16 x i8>* %vuc.addr, align 16
+ %vecext = extractelement <16 x i8> %0, i32 0
+ ret i8 %vecext
+; CHECK-LABEL: @getuc0
+; CHECK: mfvsrd 3, 34
+; CHECK: rldicl 3, 3, 8, 56
+; CHECK: clrldi 3, 3, 56
+; CHECK-LE-LABEL: @getuc0
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: clrldi 3, 3, 56
+}
+
+; Function Attrs: nounwind
+define zeroext i8 @getuc1(<16 x i8> %vuc) {
+entry:
+ %vuc.addr = alloca <16 x i8>, align 16
+ store <16 x i8> %vuc, <16 x i8>* %vuc.addr, align 16
+ %0 = load <16 x i8>, <16 x i8>* %vuc.addr, align 16
+ %vecext = extractelement <16 x i8> %0, i32 1
+ ret i8 %vecext
+; CHECK-LABEL: @getuc1
+; CHECK: mfvsrd 3, 34
+; CHECK: rldicl 3, 3, 16, 56
+; CHECK: clrldi 3, 3, 56
+; CHECK-LE-LABEL: @getuc1
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: rldicl 3, 3, 56, 56
+; CHECK-LE: clrldi 3, 3, 56
+}
+
+; Function Attrs: nounwind
+define zeroext i8 @getuc2(<16 x i8> %vuc) {
+entry:
+ %vuc.addr = alloca <16 x i8>, align 16
+ store <16 x i8> %vuc, <16 x i8>* %vuc.addr, align 16
+ %0 = load <16 x i8>, <16 x i8>* %vuc.addr, align 16
+ %vecext = extractelement <16 x i8> %0, i32 2
+ ret i8 %vecext
+; CHECK-LABEL: @getuc2
+; CHECK: mfvsrd 3, 34
+; CHECK: rldicl 3, 3, 24, 56
+; CHECK: clrldi 3, 3, 56
+; CHECK-LE-LABEL: @getuc2
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: rldicl 3, 3, 48, 56
+; CHECK-LE: clrldi 3, 3, 56
+}
+
+; Function Attrs: nounwind
+define zeroext i8 @getuc3(<16 x i8> %vuc) {
+entry:
+ %vuc.addr = alloca <16 x i8>, align 16
+ store <16 x i8> %vuc, <16 x i8>* %vuc.addr, align 16
+ %0 = load <16 x i8>, <16 x i8>* %vuc.addr, align 16
+ %vecext = extractelement <16 x i8> %0, i32 3
+ ret i8 %vecext
+; CHECK-LABEL: @getuc3
+; CHECK: mfvsrd 3, 34
+; CHECK: rldicl 3, 3, 32, 56
+; CHECK: clrldi 3, 3, 56
+; CHECK-LE-LABEL: @getuc3
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: rldicl 3, 3, 40, 56
+; CHECK-LE: clrldi 3, 3, 56
+}
+
+; Function Attrs: nounwind
+define zeroext i8 @getuc4(<16 x i8> %vuc) {
+entry:
+ %vuc.addr = alloca <16 x i8>, align 16
+ store <16 x i8> %vuc, <16 x i8>* %vuc.addr, align 16
+ %0 = load <16 x i8>, <16 x i8>* %vuc.addr, align 16
+ %vecext = extractelement <16 x i8> %0, i32 4
+ ret i8 %vecext
+; CHECK-LABEL: @getuc4
+; CHECK: mfvsrd 3, 34
+; CHECK: rldicl 3, 3, 40, 56
+; CHECK: clrldi 3, 3, 56
+; CHECK-LE-LABEL: @getuc4
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: rldicl 3, 3, 32, 56
+; CHECK-LE: clrldi 3, 3, 56
+}
+
+; Function Attrs: nounwind
+define zeroext i8 @getuc5(<16 x i8> %vuc) {
+entry:
+ %vuc.addr = alloca <16 x i8>, align 16
+ store <16 x i8> %vuc, <16 x i8>* %vuc.addr, align 16
+ %0 = load <16 x i8>, <16 x i8>* %vuc.addr, align 16
+ %vecext = extractelement <16 x i8> %0, i32 5
+ ret i8 %vecext
+; CHECK-LABEL: @getuc5
+; CHECK: mfvsrd 3, 34
+; CHECK: rldicl 3, 3, 48, 56
+; CHECK: clrldi 3, 3, 56
+; CHECK-LE-LABEL: @getuc5
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: rldicl 3, 3, 24, 56
+; CHECK-LE: clrldi 3, 3, 56
+}
+
+; Function Attrs: nounwind
+define zeroext i8 @getuc6(<16 x i8> %vuc) {
+entry:
+ %vuc.addr = alloca <16 x i8>, align 16
+ store <16 x i8> %vuc, <16 x i8>* %vuc.addr, align 16
+ %0 = load <16 x i8>, <16 x i8>* %vuc.addr, align 16
+ %vecext = extractelement <16 x i8> %0, i32 6
+ ret i8 %vecext
+; CHECK-LABEL: @getuc6
+; CHECK: mfvsrd 3, 34
+; CHECK: rldicl 3, 3, 56, 56
+; CHECK: clrldi 3, 3, 56
+; CHECK-LE-LABEL: @getuc6
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: rldicl 3, 3, 16, 56
+; CHECK-LE: clrldi 3, 3, 56
+}
+
+; Function Attrs: nounwind
+define zeroext i8 @getuc7(<16 x i8> %vuc) {
+entry:
+ %vuc.addr = alloca <16 x i8>, align 16
+ store <16 x i8> %vuc, <16 x i8>* %vuc.addr, align 16
+ %0 = load <16 x i8>, <16 x i8>* %vuc.addr, align 16
+ %vecext = extractelement <16 x i8> %0, i32 7
+ ret i8 %vecext
+; CHECK-LABEL: @getuc7
+; CHECK: mfvsrd 3, 34
+; CHECK: clrldi 3, 3, 56
+; CHECK-LE-LABEL: @getuc7
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: rldicl 3, 3, 8, 56
+; CHECK-LE: clrldi 3, 3, 56
+}
+
+; Function Attrs: nounwind
+define zeroext i8 @getuc8(<16 x i8> %vuc) {
+entry:
+ %vuc.addr = alloca <16 x i8>, align 16
+ store <16 x i8> %vuc, <16 x i8>* %vuc.addr, align 16
+ %0 = load <16 x i8>, <16 x i8>* %vuc.addr, align 16
+ %vecext = extractelement <16 x i8> %0, i32 8
+ ret i8 %vecext
+; CHECK-LABEL: @getuc8
+; CHECK: mfvsrd 3,
+; CHECK: rldicl 3, 3, 8, 56
+; CHECK: clrldi 3, 3, 56
+; CHECK-LE-LABEL: @getuc8
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: clrldi 3, 3, 56
+}
+
+; Function Attrs: nounwind
+define zeroext i8 @getuc9(<16 x i8> %vuc) {
+entry:
+ %vuc.addr = alloca <16 x i8>, align 16
+ store <16 x i8> %vuc, <16 x i8>* %vuc.addr, align 16
+ %0 = load <16 x i8>, <16 x i8>* %vuc.addr, align 16
+ %vecext = extractelement <16 x i8> %0, i32 9
+ ret i8 %vecext
+; CHECK-LABEL: @getuc9
+; CHECK: mfvsrd 3,
+; CHECK: rldicl 3, 3, 16, 56
+; CHECK: clrldi 3, 3, 56
+; CHECK-LE-LABEL: @getuc9
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: rldicl 3, 3, 56, 56
+; CHECK-LE: clrldi 3, 3, 56
+}
+
+; Function Attrs: nounwind
+define zeroext i8 @getuc10(<16 x i8> %vuc) {
+entry:
+ %vuc.addr = alloca <16 x i8>, align 16
+ store <16 x i8> %vuc, <16 x i8>* %vuc.addr, align 16
+ %0 = load <16 x i8>, <16 x i8>* %vuc.addr, align 16
+ %vecext = extractelement <16 x i8> %0, i32 10
+ ret i8 %vecext
+; CHECK-LABEL: @getuc10
+; CHECK: mfvsrd 3,
+; CHECK: rldicl 3, 3, 24, 56
+; CHECK: clrldi 3, 3, 56
+; CHECK-LE-LABEL: @getuc10
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: rldicl 3, 3, 48, 56
+; CHECK-LE: clrldi 3, 3, 56
+}
+
+; Function Attrs: nounwind
+define zeroext i8 @getuc11(<16 x i8> %vuc) {
+entry:
+ %vuc.addr = alloca <16 x i8>, align 16
+ store <16 x i8> %vuc, <16 x i8>* %vuc.addr, align 16
+ %0 = load <16 x i8>, <16 x i8>* %vuc.addr, align 16
+ %vecext = extractelement <16 x i8> %0, i32 11
+ ret i8 %vecext
+; CHECK-LABEL: @getuc11
+; CHECK: mfvsrd 3,
+; CHECK: rldicl 3, 3, 32, 56
+; CHECK: clrldi 3, 3, 56
+; CHECK-LE-LABEL: @getuc11
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: rldicl 3, 3, 40, 56
+; CHECK-LE: clrldi 3, 3, 56
+}
+
+; Function Attrs: nounwind
+define zeroext i8 @getuc12(<16 x i8> %vuc) {
+entry:
+ %vuc.addr = alloca <16 x i8>, align 16
+ store <16 x i8> %vuc, <16 x i8>* %vuc.addr, align 16
+ %0 = load <16 x i8>, <16 x i8>* %vuc.addr, align 16
+ %vecext = extractelement <16 x i8> %0, i32 12
+ ret i8 %vecext
+; CHECK-LABEL: @getuc12
+; CHECK: mfvsrd 3,
+; CHECK: rldicl 3, 3, 40, 56
+; CHECK: clrldi 3, 3, 56
+; CHECK-LE-LABEL: @getuc12
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: rldicl 3, 3, 32, 56
+; CHECK-LE: clrldi 3, 3, 56
+}
+
+; Function Attrs: nounwind
+define zeroext i8 @getuc13(<16 x i8> %vuc) {
+entry:
+ %vuc.addr = alloca <16 x i8>, align 16
+ store <16 x i8> %vuc, <16 x i8>* %vuc.addr, align 16
+ %0 = load <16 x i8>, <16 x i8>* %vuc.addr, align 16
+ %vecext = extractelement <16 x i8> %0, i32 13
+ ret i8 %vecext
+; CHECK-LABEL: @getuc13
+; CHECK: mfvsrd 3,
+; CHECK: rldicl 3, 3, 48, 56
+; CHECK: clrldi 3, 3, 56
+; CHECK-LE-LABEL: @getuc13
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: rldicl 3, 3, 24, 56
+; CHECK-LE: clrldi 3, 3, 56
+}
+
+; Function Attrs: nounwind
+define zeroext i8 @getuc14(<16 x i8> %vuc) {
+entry:
+ %vuc.addr = alloca <16 x i8>, align 16
+ store <16 x i8> %vuc, <16 x i8>* %vuc.addr, align 16
+ %0 = load <16 x i8>, <16 x i8>* %vuc.addr, align 16
+ %vecext = extractelement <16 x i8> %0, i32 14
+ ret i8 %vecext
+; CHECK-LABEL: @getuc14
+; CHECK: mfvsrd 3,
+; CHECK: rldicl 3, 3, 56, 56
+; CHECK: clrldi 3, 3, 56
+; CHECK-LE-LABEL: @getuc14
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: rldicl 3, 3, 16, 56
+; CHECK-LE: clrldi 3, 3, 56
+}
+
+; Function Attrs: nounwind
+define zeroext i8 @getuc15(<16 x i8> %vuc) {
+entry:
+ %vuc.addr = alloca <16 x i8>, align 16
+ store <16 x i8> %vuc, <16 x i8>* %vuc.addr, align 16
+ %0 = load <16 x i8>, <16 x i8>* %vuc.addr, align 16
+ %vecext = extractelement <16 x i8> %0, i32 15
+ ret i8 %vecext
+; CHECK-LABEL: @getuc15
+; CHECK: mfvsrd 3,
+; CHECK: clrldi 3, 3, 56
+; CHECK-LE-LABEL: @getuc15
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: rldicl 3, 3, 8, 56
+; CHECK-LE: clrldi 3, 3, 56
+}
+
+; Function Attrs: nounwind
+define signext i8 @getvelsc(<16 x i8> %vsc, i32 signext %i) {
+entry:
+ %vsc.addr = alloca <16 x i8>, align 16
+ %i.addr = alloca i32, align 4
+ store <16 x i8> %vsc, <16 x i8>* %vsc.addr, align 16
+ store i32 %i, i32* %i.addr, align 4
+ %0 = load <16 x i8>, <16 x i8>* %vsc.addr, align 16
+ %1 = load i32, i32* %i.addr, align 4
+ %vecext = extractelement <16 x i8> %0, i32 %1
+ ret i8 %vecext
+; CHECK-LABEL: @getvelsc
+; CHECK-DAG: andi. [[ANDI:[0-9]+]], {{[0-9]+}}, 8
+; CHECK-DAG: lvsl [[SHMSK:[0-9]+]], 0, [[ANDI]]
+; CHECK-DAG: vperm [[PERMD:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}, [[SHMSK]]
+; CHECK-DAG: mfvsrd [[MOV:[0-9]+]],
+; CHECK-DAG: li [[IMM7:[0-9]+]], 7
+; CHECK-DAG: andc [[ANDC:[0-9]+]], [[IMM7]]
+; CHECK-DAG: sldi [[SHL:[0-9]+]], [[ANDC]], 3
+; CHECK-DAG: srd 3, [[MOV]], [[SHL]]
+; CHECK-DAG: extsb 3, 3
+; CHECK-LE-LABEL: @getvelsc
+; CHECK-DAG-LE: li [[IMM8:[0-9]+]], 8
+; CHECK-DAG-LE: andc [[ANDC:[0-9]+]], [[IMM8]]
+; CHECK-DAG-LE: lvsl [[SHMSK:[0-9]+]], 0, [[ANDC]]
+; CHECK-DAG-LE: vperm [[PERMD:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}, [[SHMSK]]
+; CHECK-DAG-LE: mfvsrd [[MOV:[0-9]+]],
+; CHECK-DAG-LE: li [[IMM7:[0-9]+]], 7
+; CHECK-DAG-LE: and [[AND:[0-9]+]], [[IMM7]]
+; CHECK-DAG-LE: sldi [[SHL:[0-9]+]], [[AND]], 3
+; CHECK-DAG-LE: srd 3, [[MOV]], [[SHL]]
+; CHECK-DAG-LE: extsb 3, 3
+}
+
+; Function Attrs: nounwind
+define zeroext i8 @getveluc(<16 x i8> %vuc, i32 signext %i) {
+entry:
+ %vuc.addr = alloca <16 x i8>, align 16
+ %i.addr = alloca i32, align 4
+ store <16 x i8> %vuc, <16 x i8>* %vuc.addr, align 16
+ store i32 %i, i32* %i.addr, align 4
+ %0 = load <16 x i8>, <16 x i8>* %vuc.addr, align 16
+ %1 = load i32, i32* %i.addr, align 4
+ %vecext = extractelement <16 x i8> %0, i32 %1
+ ret i8 %vecext
+; CHECK-LABEL: @getveluc
+; CHECK-DAG: andi. [[ANDI:[0-9]+]], {{[0-9]+}}, 8
+; CHECK-DAG: lvsl [[SHMSK:[0-9]+]], 0, [[ANDI]]
+; CHECK-DAG: vperm [[PERMD:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}, [[SHMSK]]
+; CHECK-DAG: mfvsrd [[MOV:[0-9]+]],
+; CHECK-DAG: li [[IMM7:[0-9]+]], 7
+; CHECK-DAG: andc [[ANDC:[0-9]+]], [[IMM7]]
+; CHECK-DAG: sldi [[SHL:[0-9]+]], [[ANDC]], 3
+; CHECK-DAG: srd 3, [[MOV]], [[SHL]]
+; CHECK-DAG: clrldi 3, 3, 56
+; CHECK-LE-LABEL: @getveluc
+; CHECK-DAG-LE: li [[IMM8:[0-9]+]], 8
+; CHECK-DAG-LE: andc [[ANDC:[0-9]+]], [[IMM8]]
+; CHECK-DAG-LE: lvsl [[SHMSK:[0-9]+]], 0, [[ANDC]]
+; CHECK-DAG-LE: vperm [[PERMD:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}, [[SHMSK]]
+; CHECK-DAG-LE: mfvsrd [[MOV:[0-9]+]],
+; CHECK-DAG-LE: li [[IMM7:[0-9]+]], 7
+; CHECK-DAG-LE: and [[AND:[0-9]+]], [[IMM7]]
+; CHECK-DAG-LE: sldi [[SHL:[0-9]+]], [[AND]], 3
+; CHECK-DAG-LE: srd 3, [[MOV]], [[SHL]]
+; CHECK-DAG-LE: clrldi 3, 3, 56
+}
+
+; Function Attrs: nounwind
+define signext i16 @getss0(<8 x i16> %vss) {
+entry:
+ %vss.addr = alloca <8 x i16>, align 16
+ store <8 x i16> %vss, <8 x i16>* %vss.addr, align 16
+ %0 = load <8 x i16>, <8 x i16>* %vss.addr, align 16
+ %vecext = extractelement <8 x i16> %0, i32 0
+ ret i16 %vecext
+; CHECK-LABEL: @getss0
+; CHECK: mfvsrd 3, 34
+; CHECK: rldicl 3, 3, 16, 48
+; CHECK: extsh 3, 3
+; CHECK-LE-LABEL: @getss0
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: clrldi 3, 3, 48
+; CHECK-LE: extsh 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i16 @getss1(<8 x i16> %vss) {
+entry:
+ %vss.addr = alloca <8 x i16>, align 16
+ store <8 x i16> %vss, <8 x i16>* %vss.addr, align 16
+ %0 = load <8 x i16>, <8 x i16>* %vss.addr, align 16
+ %vecext = extractelement <8 x i16> %0, i32 1
+ ret i16 %vecext
+; CHECK-LABEL: @getss1
+; CHECK: mfvsrd 3, 34
+; CHECK: rldicl 3, 3, 32, 48
+; CHECK: extsh 3, 3
+; CHECK-LE-LABEL: @getss1
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: rldicl 3, 3, 48, 48
+; CHECK-LE: extsh 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i16 @getss2(<8 x i16> %vss) {
+entry:
+ %vss.addr = alloca <8 x i16>, align 16
+ store <8 x i16> %vss, <8 x i16>* %vss.addr, align 16
+ %0 = load <8 x i16>, <8 x i16>* %vss.addr, align 16
+ %vecext = extractelement <8 x i16> %0, i32 2
+ ret i16 %vecext
+; CHECK-LABEL: @getss2
+; CHECK: mfvsrd 3, 34
+; CHECK: rldicl 3, 3, 48, 48
+; CHECK: extsh 3, 3
+; CHECK-LE-LABEL: @getss2
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: rldicl 3, 3, 32, 48
+; CHECK-LE: extsh 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i16 @getss3(<8 x i16> %vss) {
+entry:
+ %vss.addr = alloca <8 x i16>, align 16
+ store <8 x i16> %vss, <8 x i16>* %vss.addr, align 16
+ %0 = load <8 x i16>, <8 x i16>* %vss.addr, align 16
+ %vecext = extractelement <8 x i16> %0, i32 3
+ ret i16 %vecext
+; CHECK-LABEL: @getss3
+; CHECK: mfvsrd 3, 34
+; CHECK: clrldi 3, 3, 48
+; CHECK: extsh 3, 3
+; CHECK-LE-LABEL: @getss3
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: rldicl 3, 3, 16, 48
+; CHECK-LE: extsh 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i16 @getss4(<8 x i16> %vss) {
+entry:
+ %vss.addr = alloca <8 x i16>, align 16
+ store <8 x i16> %vss, <8 x i16>* %vss.addr, align 16
+ %0 = load <8 x i16>, <8 x i16>* %vss.addr, align 16
+ %vecext = extractelement <8 x i16> %0, i32 4
+ ret i16 %vecext
+; CHECK-LABEL: @getss4
+; CHECK: mfvsrd 3,
+; CHECK: rldicl 3, 3, 16, 48
+; CHECK: extsh 3, 3
+; CHECK-LE-LABEL: @getss4
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: clrldi 3, 3, 48
+; CHECK-LE: extsh 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i16 @getss5(<8 x i16> %vss) {
+entry:
+ %vss.addr = alloca <8 x i16>, align 16
+ store <8 x i16> %vss, <8 x i16>* %vss.addr, align 16
+ %0 = load <8 x i16>, <8 x i16>* %vss.addr, align 16
+ %vecext = extractelement <8 x i16> %0, i32 5
+ ret i16 %vecext
+; CHECK-LABEL: @getss5
+; CHECK: mfvsrd 3,
+; CHECK: rldicl 3, 3, 32, 48
+; CHECK: extsh 3, 3
+; CHECK-LE-LABEL: @getss5
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: rldicl 3, 3, 48, 48
+; CHECK-LE: extsh 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i16 @getss6(<8 x i16> %vss) {
+entry:
+ %vss.addr = alloca <8 x i16>, align 16
+ store <8 x i16> %vss, <8 x i16>* %vss.addr, align 16
+ %0 = load <8 x i16>, <8 x i16>* %vss.addr, align 16
+ %vecext = extractelement <8 x i16> %0, i32 6
+ ret i16 %vecext
+; CHECK-LABEL: @getss6
+; CHECK: mfvsrd 3,
+; CHECK: rldicl 3, 3, 48, 48
+; CHECK: extsh 3, 3
+; CHECK-LE-LABEL: @getss6
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: rldicl 3, 3, 32, 48
+; CHECK-LE: extsh 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i16 @getss7(<8 x i16> %vss) {
+entry:
+ %vss.addr = alloca <8 x i16>, align 16
+ store <8 x i16> %vss, <8 x i16>* %vss.addr, align 16
+ %0 = load <8 x i16>, <8 x i16>* %vss.addr, align 16
+ %vecext = extractelement <8 x i16> %0, i32 7
+ ret i16 %vecext
+; CHECK-LABEL: @getss7
+; CHECK: mfvsrd 3,
+; CHECK: extsh 3, 3
+; CHECK-LE-LABEL: @getss7
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: rldicl 3, 3, 16, 48
+; CHECK-LE: extsh 3, 3
+}
+
+; Function Attrs: nounwind
+define zeroext i16 @getus0(<8 x i16> %vus) {
+entry:
+ %vus.addr = alloca <8 x i16>, align 16
+ store <8 x i16> %vus, <8 x i16>* %vus.addr, align 16
+ %0 = load <8 x i16>, <8 x i16>* %vus.addr, align 16
+ %vecext = extractelement <8 x i16> %0, i32 0
+ ret i16 %vecext
+; CHECK-LABEL: @getus0
+; CHECK: mfvsrd 3, 34
+; CHECK: rldicl 3, 3, 16, 48
+; CHECK: clrldi 3, 3, 48
+; CHECK-LE-LABEL: @getus0
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: clrldi 3, 3, 48
+}
+
+; Function Attrs: nounwind
+define zeroext i16 @getus1(<8 x i16> %vus) {
+entry:
+ %vus.addr = alloca <8 x i16>, align 16
+ store <8 x i16> %vus, <8 x i16>* %vus.addr, align 16
+ %0 = load <8 x i16>, <8 x i16>* %vus.addr, align 16
+ %vecext = extractelement <8 x i16> %0, i32 1
+ ret i16 %vecext
+; CHECK-LABEL: @getus1
+; CHECK: mfvsrd 3, 34
+; CHECK: rldicl 3, 3, 32, 48
+; CHECK: clrldi 3, 3, 48
+; CHECK-LE-LABEL: @getus1
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: rldicl 3, 3, 48, 48
+; CHECK-LE: clrldi 3, 3, 48
+}
+
+; Function Attrs: nounwind
+define zeroext i16 @getus2(<8 x i16> %vus) {
+entry:
+ %vus.addr = alloca <8 x i16>, align 16
+ store <8 x i16> %vus, <8 x i16>* %vus.addr, align 16
+ %0 = load <8 x i16>, <8 x i16>* %vus.addr, align 16
+ %vecext = extractelement <8 x i16> %0, i32 2
+ ret i16 %vecext
+; CHECK-LABEL: @getus2
+; CHECK: mfvsrd 3, 34
+; CHECK: rldicl 3, 3, 48, 48
+; CHECK: clrldi 3, 3, 48
+; CHECK-LE-LABEL: @getus2
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: rldicl 3, 3, 32, 48
+; CHECK-LE: clrldi 3, 3, 48
+}
+
+; Function Attrs: nounwind
+define zeroext i16 @getus3(<8 x i16> %vus) {
+entry:
+ %vus.addr = alloca <8 x i16>, align 16
+ store <8 x i16> %vus, <8 x i16>* %vus.addr, align 16
+ %0 = load <8 x i16>, <8 x i16>* %vus.addr, align 16
+ %vecext = extractelement <8 x i16> %0, i32 3
+ ret i16 %vecext
+; CHECK-LABEL: @getus3
+; CHECK: mfvsrd 3, 34
+; CHECK: clrldi 3, 3, 48
+; CHECK-LE-LABEL: @getus3
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: rldicl 3, 3, 16, 48
+; CHECK-LE: clrldi 3, 3, 48
+}
+
+; Function Attrs: nounwind
+define zeroext i16 @getus4(<8 x i16> %vus) {
+entry:
+ %vus.addr = alloca <8 x i16>, align 16
+ store <8 x i16> %vus, <8 x i16>* %vus.addr, align 16
+ %0 = load <8 x i16>, <8 x i16>* %vus.addr, align 16
+ %vecext = extractelement <8 x i16> %0, i32 4
+ ret i16 %vecext
+; CHECK-LABEL: @getus4
+; CHECK: mfvsrd 3,
+; CHECK: rldicl 3, 3, 16, 48
+; CHECK: clrldi 3, 3, 48
+; CHECK-LE-LABEL: @getus4
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: clrldi 3, 3, 48
+}
+
+; Function Attrs: nounwind
+define zeroext i16 @getus5(<8 x i16> %vus) {
+entry:
+ %vus.addr = alloca <8 x i16>, align 16
+ store <8 x i16> %vus, <8 x i16>* %vus.addr, align 16
+ %0 = load <8 x i16>, <8 x i16>* %vus.addr, align 16
+ %vecext = extractelement <8 x i16> %0, i32 5
+ ret i16 %vecext
+; CHECK-LABEL: @getus5
+; CHECK: mfvsrd 3,
+; CHECK: rldicl 3, 3, 32, 48
+; CHECK: clrldi 3, 3, 48
+; CHECK-LE-LABEL: @getus5
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: rldicl 3, 3, 48, 48
+; CHECK-LE: clrldi 3, 3, 48
+}
+
+; Function Attrs: nounwind
+define zeroext i16 @getus6(<8 x i16> %vus) {
+entry:
+ %vus.addr = alloca <8 x i16>, align 16
+ store <8 x i16> %vus, <8 x i16>* %vus.addr, align 16
+ %0 = load <8 x i16>, <8 x i16>* %vus.addr, align 16
+ %vecext = extractelement <8 x i16> %0, i32 6
+ ret i16 %vecext
+; CHECK-LABEL: @getus6
+; CHECK: mfvsrd 3,
+; CHECK: rldicl 3, 3, 48, 48
+; CHECK: clrldi 3, 3, 48
+; CHECK-LE-LABEL: @getus6
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: rldicl 3, 3, 32, 48
+; CHECK-LE: clrldi 3, 3, 48
+}
+
+; Function Attrs: nounwind
+define zeroext i16 @getus7(<8 x i16> %vus) {
+entry:
+ %vus.addr = alloca <8 x i16>, align 16
+ store <8 x i16> %vus, <8 x i16>* %vus.addr, align 16
+ %0 = load <8 x i16>, <8 x i16>* %vus.addr, align 16
+ %vecext = extractelement <8 x i16> %0, i32 7
+ ret i16 %vecext
+; CHECK-LABEL: @getus7
+; CHECK: mfvsrd 3,
+; CHECK: clrldi 3, 3, 48
+; CHECK-LE-LABEL: @getus7
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: rldicl 3, 3, 16, 48
+; CHECK-LE: clrldi 3, 3, 48
+}
+
+; Function Attrs: nounwind
+define signext i16 @getvelss(<8 x i16> %vss, i32 signext %i) {
+entry:
+ %vss.addr = alloca <8 x i16>, align 16
+ %i.addr = alloca i32, align 4
+ store <8 x i16> %vss, <8 x i16>* %vss.addr, align 16
+ store i32 %i, i32* %i.addr, align 4
+ %0 = load <8 x i16>, <8 x i16>* %vss.addr, align 16
+ %1 = load i32, i32* %i.addr, align 4
+ %vecext = extractelement <8 x i16> %0, i32 %1
+ ret i16 %vecext
+; CHECK-LABEL: @getvelss
+; CHECK-DAG: andi. [[ANDI:[0-9]+]], {{[0-9]+}}, 4
+; CHECK-DAG: sldi [[MUL2:[0-9]+]], [[ANDI]], 1
+; CHECK-DAG: lvsl [[SHMSK:[0-9]+]], 0, [[MUL2]]
+; CHECK-DAG: vperm [[PERMD:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}, [[SHMSK]]
+; CHECK-DAG: mfvsrd [[MOV:[0-9]+]],
+; CHECK-DAG: li [[IMM3:[0-9]+]], 3
+; CHECK-DAG: andc [[ANDC:[0-9]+]], [[IMM3]]
+; CHECK-DAG: rldicr [[SHL:[0-9]+]], [[ANDC]], 4, 60
+; CHECK-DAG: srd 3, [[MOV]], [[SHL]]
+; CHECK-DAG: extsh 3, 3
+; CHECK-LE-LABEL: @getvelss
+; CHECK-DAG-LE: li [[IMM4:[0-9]+]], 4
+; CHECK-DAG-LE: andc [[ANDC:[0-9]+]], [[IMM4]]
+; CHECK-DAG-LE: sldi [[MUL2:[0-9]+]], [[ANDC]], 1
+; CHECK-DAG-LE: lvsl [[SHMSK:[0-9]+]], 0, [[MUL2]]
+; CHECK-DAG-LE: vperm [[PERMD:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}, [[SHMSK]]
+; CHECK-DAG-LE: mfvsrd [[MOV:[0-9]+]],
+; CHECK-DAG-LE: li [[IMM3:[0-9]+]], 3
+; CHECK-DAG-LE: and [[AND:[0-9]+]], [[IMM3]]
+; CHECK-DAG-LE: sldi [[SHL:[0-9]+]], [[AND]], 4
+; CHECK-DAG-LE: srd 3, [[MOV]], [[SHL]]
+; CHECK-DAG-LE: extsh 3, 3
+}
+
+; Function Attrs: nounwind
+define zeroext i16 @getvelus(<8 x i16> %vus, i32 signext %i) {
+entry:
+ %vus.addr = alloca <8 x i16>, align 16
+ %i.addr = alloca i32, align 4
+ store <8 x i16> %vus, <8 x i16>* %vus.addr, align 16
+ store i32 %i, i32* %i.addr, align 4
+ %0 = load <8 x i16>, <8 x i16>* %vus.addr, align 16
+ %1 = load i32, i32* %i.addr, align 4
+ %vecext = extractelement <8 x i16> %0, i32 %1
+ ret i16 %vecext
+; CHECK-LABEL: @getvelus
+; CHECK-DAG: andi. [[ANDI:[0-9]+]], {{[0-9]+}}, 4
+; CHECK-DAG: sldi [[MUL2:[0-9]+]], [[ANDI]], 1
+; CHECK-DAG: lvsl [[SHMSK:[0-9]+]], 0, [[MUL2]]
+; CHECK-DAG: vperm [[PERMD:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}, [[SHMSK]]
+; CHECK-DAG: mfvsrd [[MOV:[0-9]+]],
+; CHECK-DAG: li [[IMM3:[0-9]+]], 3
+; CHECK-DAG: andc [[ANDC:[0-9]+]], [[IMM3]]
+; CHECK-DAG: rldicr [[SHL:[0-9]+]], [[ANDC]], 4, 60
+; CHECK-DAG: srd 3, [[MOV]], [[SHL]]
+; CHECK-DAG: clrldi 3, 3, 48
+; CHECK-LE-LABEL: @getvelus
+; CHECK-DAG-LE: li [[IMM4:[0-9]+]], 4
+; CHECK-DAG-LE: andc [[ANDC:[0-9]+]], [[IMM4]]
+; CHECK-DAG-LE: sldi [[MUL2:[0-9]+]], [[ANDC]], 1
+; CHECK-DAG-LE: lvsl [[SHMSK:[0-9]+]], 0, [[MUL2]]
+; CHECK-DAG-LE: vperm [[PERMD:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}, [[SHMSK]]
+; CHECK-DAG-LE: mfvsrd [[MOV:[0-9]+]],
+; CHECK-DAG-LE: li [[IMM3:[0-9]+]], 3
+; CHECK-DAG-LE: and [[AND:[0-9]+]], [[IMM3]]
+; CHECK-DAG-LE: sldi [[SHL:[0-9]+]], [[AND]], 4
+; CHECK-DAG-LE: srd 3, [[MOV]], [[SHL]]
+; CHECK-DAG-LE: clrldi 3, 3, 48
+}
+
+; Function Attrs: nounwind
+define signext i32 @getsi0(<4 x i32> %vsi) {
+entry:
+ %vsi.addr = alloca <4 x i32>, align 16
+ store <4 x i32> %vsi, <4 x i32>* %vsi.addr, align 16
+ %0 = load <4 x i32>, <4 x i32>* %vsi.addr, align 16
+ %vecext = extractelement <4 x i32> %0, i32 0
+ ret i32 %vecext
+; CHECK-LABEL: @getsi0
+; CHECK: xxsldwi [[SHL:[0-9]+]], 34, 34, 3
+; CHECK: mfvsrwz 3, [[SHL]]
+; CHECK: extsw 3, 3
+; CHECK-LE-LABEL: @getsi0
+; CHECK-LE: xxsldwi [[SHL:[0-9]+]], 34, 34, 2
+; CHECK-LE: mfvsrwz 3, [[SHL]]
+; CHECK-LE: extsw 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i32 @getsi1(<4 x i32> %vsi) {
+entry:
+ %vsi.addr = alloca <4 x i32>, align 16
+ store <4 x i32> %vsi, <4 x i32>* %vsi.addr, align 16
+ %0 = load <4 x i32>, <4 x i32>* %vsi.addr, align 16
+ %vecext = extractelement <4 x i32> %0, i32 1
+ ret i32 %vecext
+; CHECK-LABEL: @getsi1
+; CHECK: mfvsrwz 3, 34
+; CHECK: extsw 3, 3
+; CHECK-LE-LABEL: @getsi1
+; CHECK-LE: xxsldwi [[SHL:[0-9]+]], 34, 34, 1
+; CHECK-LE: mfvsrwz 3, [[SHL]]
+; CHECK-LE: extsw 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i32 @getsi2(<4 x i32> %vsi) {
+entry:
+ %vsi.addr = alloca <4 x i32>, align 16
+ store <4 x i32> %vsi, <4 x i32>* %vsi.addr, align 16
+ %0 = load <4 x i32>, <4 x i32>* %vsi.addr, align 16
+ %vecext = extractelement <4 x i32> %0, i32 2
+ ret i32 %vecext
+; CHECK-LABEL: @getsi2
+; CHECK: xxsldwi [[SHL:[0-9]+]], 34, 34, 1
+; CHECK: mfvsrwz 3, [[SHL]]
+; CHECK: extsw 3, 3
+; CHECK-LE-LABEL: @getsi2
+; CHECK-LE: mfvsrwz 3, 34
+; CHECK-LE: extsw 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i32 @getsi3(<4 x i32> %vsi) {
+entry:
+ %vsi.addr = alloca <4 x i32>, align 16
+ store <4 x i32> %vsi, <4 x i32>* %vsi.addr, align 16
+ %0 = load <4 x i32>, <4 x i32>* %vsi.addr, align 16
+ %vecext = extractelement <4 x i32> %0, i32 3
+ ret i32 %vecext
+; CHECK-LABEL: @getsi3
+; CHECK: xxsldwi [[SHL:[0-9]+]], 34, 34, 2
+; CHECK: mfvsrwz 3, [[SHL]]
+; CHECK: extsw 3, 3
+; CHECK-LE-LABEL: @getsi3
+; CHECK-LE: xxsldwi [[SHL:[0-9]+]], 34, 34, 3
+; CHECK-LE: mfvsrwz 3, [[SHL]]
+; CHECK-LE: extsw 3, 3
+}
+
+; Function Attrs: nounwind
+define zeroext i32 @getui0(<4 x i32> %vui) {
+entry:
+ %vui.addr = alloca <4 x i32>, align 16
+ store <4 x i32> %vui, <4 x i32>* %vui.addr, align 16
+ %0 = load <4 x i32>, <4 x i32>* %vui.addr, align 16
+ %vecext = extractelement <4 x i32> %0, i32 0
+ ret i32 %vecext
+; CHECK-LABEL: @getui0
+; CHECK: xxsldwi [[SHL:[0-9]+]], 34, 34, 3
+; CHECK: mfvsrwz 3, [[SHL]]
+; CHECK: clrldi 3, 3, 32
+; CHECK-LE-LABEL: @getui0
+; CHECK-LE: xxsldwi [[SHL:[0-9]+]], 34, 34, 2
+; CHECK-LE: mfvsrwz 3, [[SHL]]
+; CHECK-LE: clrldi 3, 3, 32
+}
+
+; Function Attrs: nounwind
+define zeroext i32 @getui1(<4 x i32> %vui) {
+entry:
+ %vui.addr = alloca <4 x i32>, align 16
+ store <4 x i32> %vui, <4 x i32>* %vui.addr, align 16
+ %0 = load <4 x i32>, <4 x i32>* %vui.addr, align 16
+ %vecext = extractelement <4 x i32> %0, i32 1
+ ret i32 %vecext
+; CHECK-LABEL: @getui1
+; CHECK: mfvsrwz 3, 34
+; CHECK: clrldi 3, 3, 32
+; CHECK-LE-LABEL: @getui1
+; CHECK-LE: xxsldwi [[SHL:[0-9]+]], 34, 34, 1
+; CHECK-LE: mfvsrwz 3, [[SHL]]
+; CHECK-LE: clrldi 3, 3, 32
+}
+
+; Function Attrs: nounwind
+define zeroext i32 @getui2(<4 x i32> %vui) {
+entry:
+ %vui.addr = alloca <4 x i32>, align 16
+ store <4 x i32> %vui, <4 x i32>* %vui.addr, align 16
+ %0 = load <4 x i32>, <4 x i32>* %vui.addr, align 16
+ %vecext = extractelement <4 x i32> %0, i32 2
+ ret i32 %vecext
+; CHECK-LABEL: @getui2
+; CHECK: xxsldwi [[SHL:[0-9]+]], 34, 34, 1
+; CHECK: mfvsrwz 3, [[SHL]]
+; CHECK: clrldi 3, 3, 32
+; CHECK-LE-LABEL: @getui2
+; CHECK-LE: mfvsrwz 3, 34
+; CHECK-LE: clrldi 3, 3, 32
+}
+
+; Function Attrs: nounwind
+define zeroext i32 @getui3(<4 x i32> %vui) {
+entry:
+ %vui.addr = alloca <4 x i32>, align 16
+ store <4 x i32> %vui, <4 x i32>* %vui.addr, align 16
+ %0 = load <4 x i32>, <4 x i32>* %vui.addr, align 16
+ %vecext = extractelement <4 x i32> %0, i32 3
+ ret i32 %vecext
+; CHECK-LABEL: @getui3
+; CHECK: xxsldwi [[SHL:[0-9]+]], 34, 34, 2
+; CHECK: mfvsrwz 3, [[SHL]]
+; CHECK: clrldi 3, 3, 32
+; CHECK-LE-LABEL: @getui3
+; CHECK-LE: xxsldwi [[SHL:[0-9]+]], 34, 34, 3
+; CHECK-LE: mfvsrwz 3, [[SHL]]
+; CHECK-LE: clrldi 3, 3, 32
+}
+
+; Function Attrs: nounwind
+define signext i32 @getvelsi(<4 x i32> %vsi, i32 signext %i) {
+entry:
+ %vsi.addr = alloca <4 x i32>, align 16
+ %i.addr = alloca i32, align 4
+ store <4 x i32> %vsi, <4 x i32>* %vsi.addr, align 16
+ store i32 %i, i32* %i.addr, align 4
+ %0 = load <4 x i32>, <4 x i32>* %vsi.addr, align 16
+ %1 = load i32, i32* %i.addr, align 4
+ %vecext = extractelement <4 x i32> %0, i32 %1
+ ret i32 %vecext
+; CHECK-LABEL: @getvelsi
+; CHECK-LE-LABEL: @getvelsi
+; FIXME: add check patterns when variable element extraction is implemented
+}
+
+; Function Attrs: nounwind
+define zeroext i32 @getvelui(<4 x i32> %vui, i32 signext %i) {
+entry:
+ %vui.addr = alloca <4 x i32>, align 16
+ %i.addr = alloca i32, align 4
+ store <4 x i32> %vui, <4 x i32>* %vui.addr, align 16
+ store i32 %i, i32* %i.addr, align 4
+ %0 = load <4 x i32>, <4 x i32>* %vui.addr, align 16
+ %1 = load i32, i32* %i.addr, align 4
+ %vecext = extractelement <4 x i32> %0, i32 %1
+ ret i32 %vecext
+; CHECK-LABEL: @getvelui
+; CHECK-LE-LABEL: @getvelui
+; FIXME: add check patterns when variable element extraction is implemented
+}
+
+; Function Attrs: nounwind
+define i64 @getsl0(<2 x i64> %vsl) {
+entry:
+ %vsl.addr = alloca <2 x i64>, align 16
+ store <2 x i64> %vsl, <2 x i64>* %vsl.addr, align 16
+ %0 = load <2 x i64>, <2 x i64>* %vsl.addr, align 16
+ %vecext = extractelement <2 x i64> %0, i32 0
+ ret i64 %vecext
+; CHECK-LABEL: @getsl0
+; CHECK: mfvsrd 3, 34
+; CHECK-LE-LABEL: @getsl0
+; CHECK-LE: xxswapd [[SWP:[0-9]+]], 34
+; CHECK-LE: mfvsrd 3, [[SWP]]
+}
+
+; Function Attrs: nounwind
+define i64 @getsl1(<2 x i64> %vsl) {
+entry:
+ %vsl.addr = alloca <2 x i64>, align 16
+ store <2 x i64> %vsl, <2 x i64>* %vsl.addr, align 16
+ %0 = load <2 x i64>, <2 x i64>* %vsl.addr, align 16
+ %vecext = extractelement <2 x i64> %0, i32 1
+ ret i64 %vecext
+; CHECK-LABEL: @getsl1
+; CHECK: xxswapd [[SWP:[0-9]+]], 34
+; CHECK: mfvsrd 3, [[SWP]]
+; CHECK-LE-LABEL: @getsl1
+; CHECK-LE: mfvsrd 3, 34
+}
+
+; Function Attrs: nounwind
+define i64 @getul0(<2 x i64> %vul) {
+entry:
+ %vul.addr = alloca <2 x i64>, align 16
+ store <2 x i64> %vul, <2 x i64>* %vul.addr, align 16
+ %0 = load <2 x i64>, <2 x i64>* %vul.addr, align 16
+ %vecext = extractelement <2 x i64> %0, i32 0
+ ret i64 %vecext
+; CHECK-LABEL: @getul0
+; CHECK: mfvsrd 3, 34
+; CHECK-LE-LABEL: @getul0
+; CHECK-LE: xxswapd [[SWP:[0-9]+]], 34
+; CHECK-LE: mfvsrd 3, [[SWP]]
+}
+
+; Function Attrs: nounwind
+define i64 @getul1(<2 x i64> %vul) {
+entry:
+ %vul.addr = alloca <2 x i64>, align 16
+ store <2 x i64> %vul, <2 x i64>* %vul.addr, align 16
+ %0 = load <2 x i64>, <2 x i64>* %vul.addr, align 16
+ %vecext = extractelement <2 x i64> %0, i32 1
+ ret i64 %vecext
+; CHECK-LABEL: @getul1
+; CHECK: xxswapd [[SWP:[0-9]+]], 34
+; CHECK: mfvsrd 3, [[SWP]]
+; CHECK-LE-LABEL: @getul1
+; CHECK-LE: mfvsrd 3, 34
+}
+
+; Function Attrs: nounwind
+define i64 @getvelsl(<2 x i64> %vsl, i32 signext %i) {
+entry:
+ %vsl.addr = alloca <2 x i64>, align 16
+ %i.addr = alloca i32, align 4
+ store <2 x i64> %vsl, <2 x i64>* %vsl.addr, align 16
+ store i32 %i, i32* %i.addr, align 4
+ %0 = load <2 x i64>, <2 x i64>* %vsl.addr, align 16
+ %1 = load i32, i32* %i.addr, align 4
+ %vecext = extractelement <2 x i64> %0, i32 %1
+ ret i64 %vecext
+; CHECK-LABEL: @getvelsl
+; CHECK-LE-LABEL: @getvelsl
+; FIXME: add check patterns when variable element extraction is implemented
+}
+
+; Function Attrs: nounwind
+define i64 @getvelul(<2 x i64> %vul, i32 signext %i) {
+entry:
+ %vul.addr = alloca <2 x i64>, align 16
+ %i.addr = alloca i32, align 4
+ store <2 x i64> %vul, <2 x i64>* %vul.addr, align 16
+ store i32 %i, i32* %i.addr, align 4
+ %0 = load <2 x i64>, <2 x i64>* %vul.addr, align 16
+ %1 = load i32, i32* %i.addr, align 4
+ %vecext = extractelement <2 x i64> %0, i32 %1
+ ret i64 %vecext
+; CHECK-LABEL: @getvelul
+; CHECK-LE-LABEL: @getvelul
+; FIXME: add check patterns when variable element extraction is implemented
+}
+
+; Function Attrs: nounwind
+define float @getf0(<4 x float> %vf) {
+entry:
+ %vf.addr = alloca <4 x float>, align 16
+ store <4 x float> %vf, <4 x float>* %vf.addr, align 16
+ %0 = load <4 x float>, <4 x float>* %vf.addr, align 16
+ %vecext = extractelement <4 x float> %0, i32 0
+ ret float %vecext
+; CHECK-LABEL: @getf0
+; CHECK: xscvspdpn 1, 34
+; CHECK-LE-LABEL: @getf0
+; CHECK-LE: xxsldwi [[SHL:[0-9]+]], 34, 34, 3
+; CHECK-LE: xscvspdpn 1, [[SHL]]
+}
+
+; Function Attrs: nounwind
+define float @getf1(<4 x float> %vf) {
+entry:
+ %vf.addr = alloca <4 x float>, align 16
+ store <4 x float> %vf, <4 x float>* %vf.addr, align 16
+ %0 = load <4 x float>, <4 x float>* %vf.addr, align 16
+ %vecext = extractelement <4 x float> %0, i32 1
+ ret float %vecext
+; CHECK-LABEL: @getf1
+; CHECK: xxsldwi [[SHL:[0-9]+]], 34, 34, 1
+; CHECK: xscvspdpn 1, [[SHL]]
+; CHECK-LE-LABEL: @getf1
+; CHECK-LE: xxsldwi [[SHL:[0-9]+]], 34, 34, 2
+; CHECK-LE: xscvspdpn 1, [[SHL]]
+}
+
+; Function Attrs: nounwind
+define float @getf2(<4 x float> %vf) {
+entry:
+ %vf.addr = alloca <4 x float>, align 16
+ store <4 x float> %vf, <4 x float>* %vf.addr, align 16
+ %0 = load <4 x float>, <4 x float>* %vf.addr, align 16
+ %vecext = extractelement <4 x float> %0, i32 2
+ ret float %vecext
+; CHECK-LABEL: @getf2
+; CHECK: xxsldwi [[SHL:[0-9]+]], 34, 34, 2
+; CHECK: xscvspdpn 1, [[SHL]]
+; CHECK-LE-LABEL: @getf2
+; CHECK-LE: xxsldwi [[SHL:[0-9]+]], 34, 34, 1
+; CHECK-LE: xscvspdpn 1, [[SHL]]
+}
+
+; Function Attrs: nounwind
+define float @getf3(<4 x float> %vf) {
+entry:
+ %vf.addr = alloca <4 x float>, align 16
+ store <4 x float> %vf, <4 x float>* %vf.addr, align 16
+ %0 = load <4 x float>, <4 x float>* %vf.addr, align 16
+ %vecext = extractelement <4 x float> %0, i32 3
+ ret float %vecext
+; CHECK-LABEL: @getf3
+; CHECK: xxsldwi [[SHL:[0-9]+]], 34, 34, 3
+; CHECK: xscvspdpn 1, [[SHL]]
+; CHECK-LE-LABEL: @getf3
+; CHECK-LE: xscvspdpn 1, 34
+}
+
+; Function Attrs: nounwind
+define float @getvelf(<4 x float> %vf, i32 signext %i) {
+entry:
+ %vf.addr = alloca <4 x float>, align 16
+ %i.addr = alloca i32, align 4
+ store <4 x float> %vf, <4 x float>* %vf.addr, align 16
+ store i32 %i, i32* %i.addr, align 4
+ %0 = load <4 x float>, <4 x float>* %vf.addr, align 16
+ %1 = load i32, i32* %i.addr, align 4
+ %vecext = extractelement <4 x float> %0, i32 %1
+ ret float %vecext
+; CHECK-LABEL: @getvelf
+; CHECK-LE-LABEL: @getvelf
+; FIXME: add check patterns when variable element extraction is implemented
+}
+
+; Function Attrs: nounwind
+define double @getd0(<2 x double> %vd) {
+entry:
+ %vd.addr = alloca <2 x double>, align 16
+ store <2 x double> %vd, <2 x double>* %vd.addr, align 16
+ %0 = load <2 x double>, <2 x double>* %vd.addr, align 16
+ %vecext = extractelement <2 x double> %0, i32 0
+ ret double %vecext
+; CHECK-LABEL: @getd0
+; CHECK: xxlor 1, 34, 34
+; CHECK-LE-LABEL: @getd0
+; CHECK-LE: xxswapd 1, 34
+}
+
+; Function Attrs: nounwind
+define double @getd1(<2 x double> %vd) {
+entry:
+ %vd.addr = alloca <2 x double>, align 16
+ store <2 x double> %vd, <2 x double>* %vd.addr, align 16
+ %0 = load <2 x double>, <2 x double>* %vd.addr, align 16
+ %vecext = extractelement <2 x double> %0, i32 1
+ ret double %vecext
+; CHECK-LABEL: @getd1
+; CHECK: xxswapd 1, 34
+; CHECK-LE-LABEL: @getd1
+; CHECK-LE: xxlor 1, 34, 34
+}
+
+; Function Attrs: nounwind
+define double @getveld(<2 x double> %vd, i32 signext %i) {
+entry:
+ %vd.addr = alloca <2 x double>, align 16
+ %i.addr = alloca i32, align 4
+ store <2 x double> %vd, <2 x double>* %vd.addr, align 16
+ store i32 %i, i32* %i.addr, align 4
+ %0 = load <2 x double>, <2 x double>* %vd.addr, align 16
+ %1 = load i32, i32* %i.addr, align 4
+ %vecext = extractelement <2 x double> %0, i32 %1
+ ret double %vecext
+; CHECK-LABEL: @getveld
+; CHECK-LE-LABEL: @getveld
+; FIXME: add check patterns when variable element extraction is implemented
+}
More information about the llvm-commits
mailing list