[llvm] r354461 - [MIPS MSA] Avoid some DAG combines for vector shifts
Petar Avramovic via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 20 05:42:44 PST 2019
Author: petar.avramovic
Date: Wed Feb 20 05:42:44 2019
New Revision: 354461
URL: http://llvm.org/viewvc/llvm-project?rev=354461&view=rev
Log:
[MIPS MSA] Avoid some DAG combines for vector shifts
DAG combiner combines two shifts into shift + and with bitmask.
Avoid such combines for vectors since leaving two vector shifts
as they are produces better end results.
Differential Revision: https://reviews.llvm.org/D58225
Added:
llvm/trunk/test/CodeGen/Mips/msa/avoid_vector_shift_combines.ll
Removed:
llvm/trunk/test/CodeGen/Mips/msa/vector_shift_combines.ll
Modified:
llvm/trunk/lib/Target/Mips/MipsISelLowering.cpp
llvm/trunk/lib/Target/Mips/MipsISelLowering.h
Modified: llvm/trunk/lib/Target/Mips/MipsISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Mips/MipsISelLowering.cpp?rev=354461&r1=354460&r2=354461&view=diff
==============================================================================
--- llvm/trunk/lib/Target/Mips/MipsISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/Mips/MipsISelLowering.cpp Wed Feb 20 05:42:44 2019
@@ -1190,6 +1190,13 @@ bool MipsTargetLowering::isCheapToSpecul
return Subtarget.hasMips32();
}
+bool MipsTargetLowering::shouldFoldShiftPairToMask(const SDNode *N,
+ CombineLevel Level) const {
+ if (N->getOperand(0).getValueType().isVector())
+ return false;
+ return true;
+}
+
void
MipsTargetLowering::LowerOperationWrapper(SDNode *N,
SmallVectorImpl<SDValue> &Results,
Modified: llvm/trunk/lib/Target/Mips/MipsISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Mips/MipsISelLowering.h?rev=354461&r1=354460&r2=354461&view=diff
==============================================================================
--- llvm/trunk/lib/Target/Mips/MipsISelLowering.h (original)
+++ llvm/trunk/lib/Target/Mips/MipsISelLowering.h Wed Feb 20 05:42:44 2019
@@ -284,6 +284,8 @@ class TargetRegisterClass;
bool isCheapToSpeculateCttz() const override;
bool isCheapToSpeculateCtlz() const override;
+ bool shouldFoldShiftPairToMask(const SDNode *N,
+ CombineLevel Level) const override;
/// Return the register type for a given MVT, ensuring vectors are treated
/// as a series of gpr sized integers.
Added: llvm/trunk/test/CodeGen/Mips/msa/avoid_vector_shift_combines.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Mips/msa/avoid_vector_shift_combines.ll?rev=354461&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/Mips/msa/avoid_vector_shift_combines.ll (added)
+++ llvm/trunk/test/CodeGen/Mips/msa/avoid_vector_shift_combines.ll Wed Feb 20 05:42:44 2019
@@ -0,0 +1,211 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=mips64el-linux-gnu -mcpu=mips64r6 -mattr=+msa,+fp64 < %s | FileCheck %s --check-prefixes=MIPSEL64R6
+; RUN: llc -mtriple=mipsel-linux-gnu -mcpu=mips32r5 -mattr=+msa,+fp64 < %s | FileCheck %s --check-prefixes=MIPSEL32R5
+
+declare <2 x i64> @llvm.mips.slli.d(<2 x i64>, i32)
+declare <2 x i64> @llvm.mips.srli.d(<2 x i64>, i32)
+
+declare <4 x i32> @llvm.mips.slli.w(<4 x i32>, i32)
+declare <4 x i32> @llvm.mips.srli.w(<4 x i32>, i32)
+
+; do not fold (shl (srl x, c1), c2) -> (and (srl x, (sub c1, c2), MASK) if C1 < C2
+; MASK_TYPE1 = C2-C1 0s | 1s | ends with C1 0s
+define void @avoid_to_combine_shifts_to_shift_plus_and_mask_type1_i64(<2 x i64>* %a, <2 x i64>* %b) {
+; MIPSEL64R6-LABEL: avoid_to_combine_shifts_to_shift_plus_and_mask_type1_i64:
+; MIPSEL64R6: # %bb.0: # %entry
+; MIPSEL64R6-NEXT: ld.d $w0, 0($4)
+; MIPSEL64R6-NEXT: srli.d $w0, $w0, 52
+; MIPSEL64R6-NEXT: slli.d $w0, $w0, 51
+; MIPSEL64R6-NEXT: jr $ra
+; MIPSEL64R6-NEXT: st.d $w0, 0($5)
+;
+; MIPSEL32R5-LABEL: avoid_to_combine_shifts_to_shift_plus_and_mask_type1_i64:
+; MIPSEL32R5: # %bb.0: # %entry
+; MIPSEL32R5-NEXT: ld.d $w0, 0($4)
+; MIPSEL32R5-NEXT: srli.d $w0, $w0, 52
+; MIPSEL32R5-NEXT: slli.d $w0, $w0, 51
+; MIPSEL32R5-NEXT: jr $ra
+; MIPSEL32R5-NEXT: st.d $w0, 0($5)
+entry:
+ %0 = load <2 x i64>, <2 x i64>* %a
+ %1 = tail call <2 x i64> @llvm.mips.srli.d(<2 x i64> %0, i32 52)
+ %2 = tail call <2 x i64> @llvm.mips.slli.d(<2 x i64> %1, i32 51)
+ store <2 x i64> %2, <2 x i64>* %b
+ ret void
+}
+
+; do not fold (shl (srl x, c1), c2) -> (and (srl x, (sub c1, c2), MASK) if C1 < C2
+define void @avoid_to_combine_shifts_to_shift_plus_and_mask_type1_i64_long(<2 x i64>* %a, <2 x i64>* %b) {
+; MIPSEL64R6-LABEL: avoid_to_combine_shifts_to_shift_plus_and_mask_type1_i64_long:
+; MIPSEL64R6: # %bb.0: # %entry
+; MIPSEL64R6-NEXT: ld.d $w0, 0($4)
+; MIPSEL64R6-NEXT: srli.d $w0, $w0, 6
+; MIPSEL64R6-NEXT: slli.d $w0, $w0, 4
+; MIPSEL64R6-NEXT: jr $ra
+; MIPSEL64R6-NEXT: st.d $w0, 0($5)
+;
+; MIPSEL32R5-LABEL: avoid_to_combine_shifts_to_shift_plus_and_mask_type1_i64_long:
+; MIPSEL32R5: # %bb.0: # %entry
+; MIPSEL32R5-NEXT: ld.d $w0, 0($4)
+; MIPSEL32R5-NEXT: srli.d $w0, $w0, 6
+; MIPSEL32R5-NEXT: slli.d $w0, $w0, 4
+; MIPSEL32R5-NEXT: jr $ra
+; MIPSEL32R5-NEXT: st.d $w0, 0($5)
+entry:
+ %0 = load <2 x i64>, <2 x i64>* %a
+ %1 = tail call <2 x i64> @llvm.mips.srli.d(<2 x i64> %0, i32 6)
+ %2 = tail call <2 x i64> @llvm.mips.slli.d(<2 x i64> %1, i32 4)
+ store <2 x i64> %2, <2 x i64>* %b
+ ret void
+}
+
+; do not fold (shl (srl x, c1), c2) -> (and (shl x, (sub c1, c2), MASK) if C1 >= C2
+; MASK_TYPE2 = 1s | C1 zeros
+define void @avoid_to_combine_shifts_to_shift_plus_and_mask_type2_i32(<2 x i64>* %a, <2 x i64>* %b) {
+; MIPSEL64R6-LABEL: avoid_to_combine_shifts_to_shift_plus_and_mask_type2_i32:
+; MIPSEL64R6: # %bb.0: # %entry
+; MIPSEL64R6-NEXT: ld.d $w0, 0($4)
+; MIPSEL64R6-NEXT: srli.d $w0, $w0, 4
+; MIPSEL64R6-NEXT: slli.d $w0, $w0, 6
+; MIPSEL64R6-NEXT: jr $ra
+; MIPSEL64R6-NEXT: st.d $w0, 0($5)
+;
+; MIPSEL32R5-LABEL: avoid_to_combine_shifts_to_shift_plus_and_mask_type2_i32:
+; MIPSEL32R5: # %bb.0: # %entry
+; MIPSEL32R5-NEXT: ld.d $w0, 0($4)
+; MIPSEL32R5-NEXT: srli.d $w0, $w0, 4
+; MIPSEL32R5-NEXT: slli.d $w0, $w0, 6
+; MIPSEL32R5-NEXT: jr $ra
+; MIPSEL32R5-NEXT: st.d $w0, 0($5)
+entry:
+ %0 = load <2 x i64>, <2 x i64>* %a
+ %1 = tail call <2 x i64> @llvm.mips.srli.d(<2 x i64> %0, i32 4)
+ %2 = tail call <2 x i64> @llvm.mips.slli.d(<2 x i64> %1, i32 6)
+ store <2 x i64> %2, <2 x i64>* %b
+ ret void
+}
+
+; do not fold (shl (srl x, c1), c2) -> (and (srl x, (sub c1, c2), MASK) if C1 < C2
+define void @avoid_to_combine_shifts_to_shift_plus_and_mask_type1_i32_long(<4 x i32>* %a, <4 x i32>* %b) {
+; MIPSEL64R6-LABEL: avoid_to_combine_shifts_to_shift_plus_and_mask_type1_i32_long:
+; MIPSEL64R6: # %bb.0: # %entry
+; MIPSEL64R6-NEXT: ld.w $w0, 0($4)
+; MIPSEL64R6-NEXT: srli.w $w0, $w0, 7
+; MIPSEL64R6-NEXT: slli.w $w0, $w0, 3
+; MIPSEL64R6-NEXT: jr $ra
+; MIPSEL64R6-NEXT: st.w $w0, 0($5)
+;
+; MIPSEL32R5-LABEL: avoid_to_combine_shifts_to_shift_plus_and_mask_type1_i32_long:
+; MIPSEL32R5: # %bb.0: # %entry
+; MIPSEL32R5-NEXT: ld.w $w0, 0($4)
+; MIPSEL32R5-NEXT: srli.w $w0, $w0, 7
+; MIPSEL32R5-NEXT: slli.w $w0, $w0, 3
+; MIPSEL32R5-NEXT: jr $ra
+; MIPSEL32R5-NEXT: st.w $w0, 0($5)
+entry:
+ %0 = load <4 x i32>, <4 x i32>* %a
+ %1 = tail call <4 x i32> @llvm.mips.srli.w(<4 x i32> %0, i32 7)
+ %2 = tail call <4 x i32> @llvm.mips.slli.w(<4 x i32> %1, i32 3)
+ store <4 x i32> %2, <4 x i32>* %b
+ ret void
+}
+
+; do not fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
+define void @avoid_to_combine_shifts_to_and_mask_type2_i64_long(<2 x i64>* %a, <2 x i64>* %b) {
+; MIPSEL64R6-LABEL: avoid_to_combine_shifts_to_and_mask_type2_i64_long:
+; MIPSEL64R6: # %bb.0: # %entry
+; MIPSEL64R6-NEXT: ld.d $w0, 0($4)
+; MIPSEL64R6-NEXT: srli.d $w0, $w0, 38
+; MIPSEL64R6-NEXT: slli.d $w0, $w0, 38
+; MIPSEL64R6-NEXT: jr $ra
+; MIPSEL64R6-NEXT: st.d $w0, 0($5)
+;
+; MIPSEL32R5-LABEL: avoid_to_combine_shifts_to_and_mask_type2_i64_long:
+; MIPSEL32R5: # %bb.0: # %entry
+; MIPSEL32R5-NEXT: ld.d $w0, 0($4)
+; MIPSEL32R5-NEXT: srli.d $w0, $w0, 38
+; MIPSEL32R5-NEXT: slli.d $w0, $w0, 38
+; MIPSEL32R5-NEXT: jr $ra
+; MIPSEL32R5-NEXT: st.d $w0, 0($5)
+entry:
+ %0 = load <2 x i64>, <2 x i64>* %a
+ %1 = tail call <2 x i64> @llvm.mips.srli.d(<2 x i64> %0, i32 38)
+ %2 = tail call <2 x i64> @llvm.mips.slli.d(<2 x i64> %1, i32 38)
+ store <2 x i64> %2, <2 x i64>* %b
+ ret void
+}
+
+; do not fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
+define void @avoid_to_combine_shifts_to_and_mask_type2_i64(<2 x i64>* %a, <2 x i64>* %b) {
+; MIPSEL64R6-LABEL: avoid_to_combine_shifts_to_and_mask_type2_i64:
+; MIPSEL64R6: # %bb.0: # %entry
+; MIPSEL64R6-NEXT: ld.d $w0, 0($4)
+; MIPSEL64R6-NEXT: srli.d $w0, $w0, 3
+; MIPSEL64R6-NEXT: slli.d $w0, $w0, 3
+; MIPSEL64R6-NEXT: jr $ra
+; MIPSEL64R6-NEXT: st.d $w0, 0($5)
+;
+; MIPSEL32R5-LABEL: avoid_to_combine_shifts_to_and_mask_type2_i64:
+; MIPSEL32R5: # %bb.0: # %entry
+; MIPSEL32R5-NEXT: ld.d $w0, 0($4)
+; MIPSEL32R5-NEXT: srli.d $w0, $w0, 3
+; MIPSEL32R5-NEXT: slli.d $w0, $w0, 3
+; MIPSEL32R5-NEXT: jr $ra
+; MIPSEL32R5-NEXT: st.d $w0, 0($5)
+entry:
+ %0 = load <2 x i64>, <2 x i64>* %a
+ %1 = tail call <2 x i64> @llvm.mips.srli.d(<2 x i64> %0, i32 3)
+ %2 = tail call <2 x i64> @llvm.mips.slli.d(<2 x i64> %1, i32 3)
+ store <2 x i64> %2, <2 x i64>* %b
+ ret void
+}
+
+; do not fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
+define void @avoid_to_combine_shifts_to_and_mask_type1_long_i32_a(<4 x i32>* %a, <4 x i32>* %b) {
+; MIPSEL64R6-LABEL: avoid_to_combine_shifts_to_and_mask_type1_long_i32_a:
+; MIPSEL64R6: # %bb.0: # %entry
+; MIPSEL64R6-NEXT: ld.w $w0, 0($4)
+; MIPSEL64R6-NEXT: srli.w $w0, $w0, 5
+; MIPSEL64R6-NEXT: slli.w $w0, $w0, 5
+; MIPSEL64R6-NEXT: jr $ra
+; MIPSEL64R6-NEXT: st.w $w0, 0($5)
+;
+; MIPSEL32R5-LABEL: avoid_to_combine_shifts_to_and_mask_type1_long_i32_a:
+; MIPSEL32R5: # %bb.0: # %entry
+; MIPSEL32R5-NEXT: ld.w $w0, 0($4)
+; MIPSEL32R5-NEXT: srli.w $w0, $w0, 5
+; MIPSEL32R5-NEXT: slli.w $w0, $w0, 5
+; MIPSEL32R5-NEXT: jr $ra
+; MIPSEL32R5-NEXT: st.w $w0, 0($5)
+entry:
+ %0 = load <4 x i32>, <4 x i32>* %a
+ %1 = tail call <4 x i32> @llvm.mips.srli.w(<4 x i32> %0, i32 5)
+ %2 = tail call <4 x i32> @llvm.mips.slli.w(<4 x i32> %1, i32 5)
+ store <4 x i32> %2, <4 x i32>* %b
+ ret void
+}
+
+; do not fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
+define void @avoid_to_combine_shifts_to_and_mask_type1_long_i32_b(<4 x i32>* %a, <4 x i32>* %b) {
+; MIPSEL64R6-LABEL: avoid_to_combine_shifts_to_and_mask_type1_long_i32_b:
+; MIPSEL64R6: # %bb.0: # %entry
+; MIPSEL64R6-NEXT: ld.w $w0, 0($4)
+; MIPSEL64R6-NEXT: srli.w $w0, $w0, 30
+; MIPSEL64R6-NEXT: slli.w $w0, $w0, 30
+; MIPSEL64R6-NEXT: jr $ra
+; MIPSEL64R6-NEXT: st.w $w0, 0($5)
+;
+; MIPSEL32R5-LABEL: avoid_to_combine_shifts_to_and_mask_type1_long_i32_b:
+; MIPSEL32R5: # %bb.0: # %entry
+; MIPSEL32R5-NEXT: ld.w $w0, 0($4)
+; MIPSEL32R5-NEXT: srli.w $w0, $w0, 30
+; MIPSEL32R5-NEXT: slli.w $w0, $w0, 30
+; MIPSEL32R5-NEXT: jr $ra
+; MIPSEL32R5-NEXT: st.w $w0, 0($5)
+entry:
+ %0 = load <4 x i32>, <4 x i32>* %a
+ %1 = tail call <4 x i32> @llvm.mips.srli.w(<4 x i32> %0, i32 30)
+ %2 = tail call <4 x i32> @llvm.mips.slli.w(<4 x i32> %1, i32 30)
+ store <4 x i32> %2, <4 x i32>* %b
+ ret void
+}
Removed: llvm/trunk/test/CodeGen/Mips/msa/vector_shift_combines.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Mips/msa/vector_shift_combines.ll?rev=354460&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/Mips/msa/vector_shift_combines.ll (original)
+++ llvm/trunk/test/CodeGen/Mips/msa/vector_shift_combines.ll (removed)
@@ -1,232 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=mips64el-linux-gnu -mcpu=mips64r6 -mattr=+msa,+fp64 < %s | FileCheck %s --check-prefixes=MIPSEL64R6
-; RUN: llc -mtriple=mipsel-linux-gnu -mcpu=mips32r5 -mattr=+msa,+fp64 < %s | FileCheck %s --check-prefixes=MIPSEL32R5
-
-declare <2 x i64> @llvm.mips.slli.d(<2 x i64>, i32)
-declare <2 x i64> @llvm.mips.srli.d(<2 x i64>, i32)
-
-declare <4 x i32> @llvm.mips.slli.w(<4 x i32>, i32)
-declare <4 x i32> @llvm.mips.srli.w(<4 x i32>, i32)
-
-; fold (shl (srl x, c1), c2) -> (and (srl x, (sub c1, c2), MASK) if C1 < C2
-; MASK_TYPE1 = C2-C1 0s | 1s | ends with C1 0s
-define void @combine_shifts_to_shift_plus_and_mask_type1_i64(<2 x i64>* %a, <2 x i64>* %b) {
-; MIPSEL64R6-LABEL: combine_shifts_to_shift_plus_and_mask_type1_i64:
-; MIPSEL64R6: # %bb.0: # %entry
-; MIPSEL64R6-NEXT: ld.d $w0, 0($4)
-; MIPSEL64R6-NEXT: srli.d $w0, $w0, 1
-; MIPSEL64R6-NEXT: lui $1, 32760
-; MIPSEL64R6-NEXT: dsll32 $1, $1, 0
-; MIPSEL64R6-NEXT: fill.d $w1, $1
-; MIPSEL64R6-NEXT: and.v $w0, $w0, $w1
-; MIPSEL64R6-NEXT: jr $ra
-; MIPSEL64R6-NEXT: st.d $w0, 0($5)
-;
-; MIPSEL32R5-LABEL: combine_shifts_to_shift_plus_and_mask_type1_i64:
-; MIPSEL32R5: # %bb.0: # %entry
-; MIPSEL32R5-NEXT: ld.d $w0, 0($4)
-; MIPSEL32R5-NEXT: srli.d $w0, $w0, 52
-; MIPSEL32R5-NEXT: slli.d $w0, $w0, 51
-; MIPSEL32R5-NEXT: jr $ra
-; MIPSEL32R5-NEXT: st.d $w0, 0($5)
-entry:
- %0 = load <2 x i64>, <2 x i64>* %a
- %1 = tail call <2 x i64> @llvm.mips.srli.d(<2 x i64> %0, i32 52)
- %2 = tail call <2 x i64> @llvm.mips.slli.d(<2 x i64> %1, i32 51)
- store <2 x i64> %2, <2 x i64>* %b
- ret void
-}
-
-; fold (shl (srl x, c1), c2) -> (and (srl x, (sub c1, c2), MASK) if C1 < C2
-define void @combine_shifts_to_shift_plus_and_mask_type1_i64_long(<2 x i64>* %a, <2 x i64>* %b) {
-; MIPSEL64R6-LABEL: combine_shifts_to_shift_plus_and_mask_type1_i64_long:
-; MIPSEL64R6: # %bb.0: # %entry
-; MIPSEL64R6-NEXT: lui $1, 65535
-; MIPSEL64R6-NEXT: ori $1, $1, 65520
-; MIPSEL64R6-NEXT: lui $2, 16383
-; MIPSEL64R6-NEXT: ori $2, $2, 65535
-; MIPSEL64R6-NEXT: dinsu $1, $2, 32, 32
-; MIPSEL64R6-NEXT: ld.d $w0, 0($4)
-; MIPSEL64R6-NEXT: srli.d $w0, $w0, 2
-; MIPSEL64R6-NEXT: fill.d $w1, $1
-; MIPSEL64R6-NEXT: and.v $w0, $w0, $w1
-; MIPSEL64R6-NEXT: jr $ra
-; MIPSEL64R6-NEXT: st.d $w0, 0($5)
-;
-; MIPSEL32R5-LABEL: combine_shifts_to_shift_plus_and_mask_type1_i64_long:
-; MIPSEL32R5: # %bb.0: # %entry
-; MIPSEL32R5-NEXT: ld.d $w0, 0($4)
-; MIPSEL32R5-NEXT: srli.d $w0, $w0, 6
-; MIPSEL32R5-NEXT: slli.d $w0, $w0, 4
-; MIPSEL32R5-NEXT: jr $ra
-; MIPSEL32R5-NEXT: st.d $w0, 0($5)
-entry:
- %0 = load <2 x i64>, <2 x i64>* %a
- %1 = tail call <2 x i64> @llvm.mips.srli.d(<2 x i64> %0, i32 6)
- %2 = tail call <2 x i64> @llvm.mips.slli.d(<2 x i64> %1, i32 4)
- store <2 x i64> %2, <2 x i64>* %b
- ret void
-}
-
-; fold (shl (srl x, c1), c2) -> (and (shl x, (sub c1, c2), MASK) if C1 >= C2
-; MASK_TYPE2 = 1s | C1 zeros
-define void @combine_shifts_to_shift_plus_and_mask_type2_i32(<2 x i64>* %a, <2 x i64>* %b) {
-; MIPSEL64R6-LABEL: combine_shifts_to_shift_plus_and_mask_type2_i32:
-; MIPSEL64R6: # %bb.0: # %entry
-; MIPSEL64R6-NEXT: ld.d $w0, 0($4)
-; MIPSEL64R6-NEXT: slli.d $w0, $w0, 2
-; MIPSEL64R6-NEXT: ldi.d $w1, -64
-; MIPSEL64R6-NEXT: and.v $w0, $w0, $w1
-; MIPSEL64R6-NEXT: jr $ra
-; MIPSEL64R6-NEXT: st.d $w0, 0($5)
-;
-; MIPSEL32R5-LABEL: combine_shifts_to_shift_plus_and_mask_type2_i32:
-; MIPSEL32R5: # %bb.0: # %entry
-; MIPSEL32R5-NEXT: ld.d $w0, 0($4)
-; MIPSEL32R5-NEXT: srli.d $w0, $w0, 4
-; MIPSEL32R5-NEXT: slli.d $w0, $w0, 6
-; MIPSEL32R5-NEXT: jr $ra
-; MIPSEL32R5-NEXT: st.d $w0, 0($5)
-entry:
- %0 = load <2 x i64>, <2 x i64>* %a
- %1 = tail call <2 x i64> @llvm.mips.srli.d(<2 x i64> %0, i32 4)
- %2 = tail call <2 x i64> @llvm.mips.slli.d(<2 x i64> %1, i32 6)
- store <2 x i64> %2, <2 x i64>* %b
- ret void
-}
-
-; fold (shl (srl x, c1), c2) -> (and (srl x, (sub c1, c2), MASK) if C1 < C2
-define void @combine_shifts_to_shift_plus_and_mask_type1_i32_long(<4 x i32>* %a, <4 x i32>* %b) {
-; MIPSEL64R6-LABEL: combine_shifts_to_shift_plus_and_mask_type1_i32_long:
-; MIPSEL64R6: # %bb.0: # %entry
-; MIPSEL64R6-NEXT: ld.w $w0, 0($4)
-; MIPSEL64R6-NEXT: srli.w $w0, $w0, 4
-; MIPSEL64R6-NEXT: lui $1, 4095
-; MIPSEL64R6-NEXT: ori $1, $1, 65528
-; MIPSEL64R6-NEXT: fill.w $w1, $1
-; MIPSEL64R6-NEXT: and.v $w0, $w0, $w1
-; MIPSEL64R6-NEXT: jr $ra
-; MIPSEL64R6-NEXT: st.w $w0, 0($5)
-;
-; MIPSEL32R5-LABEL: combine_shifts_to_shift_plus_and_mask_type1_i32_long:
-; MIPSEL32R5: # %bb.0: # %entry
-; MIPSEL32R5-NEXT: ld.w $w0, 0($4)
-; MIPSEL32R5-NEXT: srli.w $w0, $w0, 4
-; MIPSEL32R5-NEXT: lui $1, 4095
-; MIPSEL32R5-NEXT: ori $1, $1, 65528
-; MIPSEL32R5-NEXT: fill.w $w1, $1
-; MIPSEL32R5-NEXT: and.v $w0, $w0, $w1
-; MIPSEL32R5-NEXT: jr $ra
-; MIPSEL32R5-NEXT: st.w $w0, 0($5)
-entry:
- %0 = load <4 x i32>, <4 x i32>* %a
- %1 = tail call <4 x i32> @llvm.mips.srli.w(<4 x i32> %0, i32 7)
- %2 = tail call <4 x i32> @llvm.mips.slli.w(<4 x i32> %1, i32 3)
- store <4 x i32> %2, <4 x i32>* %b
- ret void
-}
-
-; fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
-define void @combine_shifts_to_and_mask_type2_i64_long(<2 x i64>* %a, <2 x i64>* %b) {
-; MIPSEL64R6-LABEL: combine_shifts_to_and_mask_type2_i64_long:
-; MIPSEL64R6: # %bb.0: # %entry
-; MIPSEL64R6-NEXT: lui $1, 65535
-; MIPSEL64R6-NEXT: ori $1, $1, 65472
-; MIPSEL64R6-NEXT: dsll32 $1, $1, 0
-; MIPSEL64R6-NEXT: ld.d $w0, 0($4)
-; MIPSEL64R6-NEXT: fill.d $w1, $1
-; MIPSEL64R6-NEXT: and.v $w0, $w0, $w1
-; MIPSEL64R6-NEXT: jr $ra
-; MIPSEL64R6-NEXT: st.d $w0, 0($5)
-;
-; MIPSEL32R5-LABEL: combine_shifts_to_and_mask_type2_i64_long:
-; MIPSEL32R5: # %bb.0: # %entry
-; MIPSEL32R5-NEXT: ld.d $w0, 0($4)
-; MIPSEL32R5-NEXT: srli.d $w0, $w0, 38
-; MIPSEL32R5-NEXT: slli.d $w0, $w0, 38
-; MIPSEL32R5-NEXT: jr $ra
-; MIPSEL32R5-NEXT: st.d $w0, 0($5)
-entry:
- %0 = load <2 x i64>, <2 x i64>* %a
- %1 = tail call <2 x i64> @llvm.mips.srli.d(<2 x i64> %0, i32 38)
- %2 = tail call <2 x i64> @llvm.mips.slli.d(<2 x i64> %1, i32 38)
- store <2 x i64> %2, <2 x i64>* %b
- ret void
-}
-
-; fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
-define void @combine_shifts_to_and_mask_type2_i64(<2 x i64>* %a, <2 x i64>* %b) {
-; MIPSEL64R6-LABEL: combine_shifts_to_and_mask_type2_i64:
-; MIPSEL64R6: # %bb.0: # %entry
-; MIPSEL64R6-NEXT: ld.d $w0, 0($4)
-; MIPSEL64R6-NEXT: ldi.d $w1, -8
-; MIPSEL64R6-NEXT: and.v $w0, $w0, $w1
-; MIPSEL64R6-NEXT: jr $ra
-; MIPSEL64R6-NEXT: st.d $w0, 0($5)
-;
-; MIPSEL32R5-LABEL: combine_shifts_to_and_mask_type2_i64:
-; MIPSEL32R5: # %bb.0: # %entry
-; MIPSEL32R5-NEXT: ld.d $w0, 0($4)
-; MIPSEL32R5-NEXT: srli.d $w0, $w0, 3
-; MIPSEL32R5-NEXT: slli.d $w0, $w0, 3
-; MIPSEL32R5-NEXT: jr $ra
-; MIPSEL32R5-NEXT: st.d $w0, 0($5)
-entry:
- %0 = load <2 x i64>, <2 x i64>* %a
- %1 = tail call <2 x i64> @llvm.mips.srli.d(<2 x i64> %0, i32 3)
- %2 = tail call <2 x i64> @llvm.mips.slli.d(<2 x i64> %1, i32 3)
- store <2 x i64> %2, <2 x i64>* %b
- ret void
-}
-
-; fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
-define void @combine_shifts_to_and_mask_type1_long_i32_a(<4 x i32>* %a, <4 x i32>* %b) {
-; MIPSEL64R6-LABEL: combine_shifts_to_and_mask_type1_long_i32_a:
-; MIPSEL64R6: # %bb.0: # %entry
-; MIPSEL64R6-NEXT: ld.w $w0, 0($4)
-; MIPSEL64R6-NEXT: ldi.w $w1, -32
-; MIPSEL64R6-NEXT: and.v $w0, $w0, $w1
-; MIPSEL64R6-NEXT: jr $ra
-; MIPSEL64R6-NEXT: st.w $w0, 0($5)
-;
-; MIPSEL32R5-LABEL: combine_shifts_to_and_mask_type1_long_i32_a:
-; MIPSEL32R5: # %bb.0: # %entry
-; MIPSEL32R5-NEXT: ld.w $w0, 0($4)
-; MIPSEL32R5-NEXT: ldi.w $w1, -32
-; MIPSEL32R5-NEXT: and.v $w0, $w0, $w1
-; MIPSEL32R5-NEXT: jr $ra
-; MIPSEL32R5-NEXT: st.w $w0, 0($5)
-entry:
- %0 = load <4 x i32>, <4 x i32>* %a
- %1 = tail call <4 x i32> @llvm.mips.srli.w(<4 x i32> %0, i32 5)
- %2 = tail call <4 x i32> @llvm.mips.slli.w(<4 x i32> %1, i32 5)
- store <4 x i32> %2, <4 x i32>* %b
- ret void
-}
-
-; fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
-define void @combine_shifts_to_and_mask_type1_long_i32_b(<4 x i32>* %a, <4 x i32>* %b) {
-; MIPSEL64R6-LABEL: combine_shifts_to_and_mask_type1_long_i32_b:
-; MIPSEL64R6: # %bb.0: # %entry
-; MIPSEL64R6-NEXT: lui $1, 49152
-; MIPSEL64R6-NEXT: fill.w $w0, $1
-; MIPSEL64R6-NEXT: ld.w $w1, 0($4)
-; MIPSEL64R6-NEXT: and.v $w0, $w1, $w0
-; MIPSEL64R6-NEXT: jr $ra
-; MIPSEL64R6-NEXT: st.w $w0, 0($5)
-;
-; MIPSEL32R5-LABEL: combine_shifts_to_and_mask_type1_long_i32_b:
-; MIPSEL32R5: # %bb.0: # %entry
-; MIPSEL32R5-NEXT: lui $1, 49152
-; MIPSEL32R5-NEXT: fill.w $w0, $1
-; MIPSEL32R5-NEXT: ld.w $w1, 0($4)
-; MIPSEL32R5-NEXT: and.v $w0, $w1, $w0
-; MIPSEL32R5-NEXT: jr $ra
-; MIPSEL32R5-NEXT: st.w $w0, 0($5)
-entry:
- %0 = load <4 x i32>, <4 x i32>* %a
- %1 = tail call <4 x i32> @llvm.mips.srli.w(<4 x i32> %0, i32 30)
- %2 = tail call <4 x i32> @llvm.mips.slli.w(<4 x i32> %1, i32 30)
- store <4 x i32> %2, <4 x i32>* %b
- ret void
-}
More information about the llvm-commits
mailing list