[llvm] r207840 - AArch64/ARM64: add patterns for post-indexed ST1 ops.
Tim Northover
tnorthover at apple.com
Fri May 2 07:54:27 PDT 2014
Author: tnorthover
Date: Fri May 2 09:54:27 2014
New Revision: 207840
URL: http://llvm.org/viewvc/llvm-project?rev=207840&view=rev
Log:
AArch64/ARM64: add patterns for post-indexed ST1 ops.
Modified:
llvm/trunk/lib/Target/ARM64/ARM64InstrInfo.td
llvm/trunk/test/CodeGen/ARM64/indexed-vector-ldst.ll
Modified: llvm/trunk/lib/Target/ARM64/ARM64InstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM64/ARM64InstrInfo.td?rev=207840&r1=207839&r2=207840&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM64/ARM64InstrInfo.td (original)
+++ llvm/trunk/lib/Target/ARM64/ARM64InstrInfo.td Fri May 2 09:54:27 2014
@@ -4416,6 +4416,53 @@ def : St1Lane64Pat<truncstorei16, Vector
def : St1Lane64Pat<store, VectorIndexS, v2i32, i32, ST1i32>;
def : St1Lane64Pat<store, VectorIndexS, v2f32, f32, ST1i32>;
+multiclass St1LanePost64Pat<SDPatternOperator scalar_store, Operand VecIndex,
+ ValueType VTy, ValueType STy, Instruction ST1,
+ int offset> {
+ def : Pat<(scalar_store
+ (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)),
+ am_simdnoindex:$vaddr, offset),
+ (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub),
+ VecIndex:$idx, am_simdnoindex:$vaddr, XZR)>;
+
+ def : Pat<(scalar_store
+ (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)),
+ am_simdnoindex:$vaddr, GPR64:$Rm),
+ (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub),
+ VecIndex:$idx, am_simdnoindex:$vaddr, $Rm)>;
+}
+
+defm : St1LanePost64Pat<post_truncsti8, VectorIndexB, v8i8, i32, ST1i8_POST, 1>;
+defm : St1LanePost64Pat<post_truncsti16, VectorIndexH, v4i16, i32, ST1i16_POST,
+ 2>;
+defm : St1LanePost64Pat<post_store, VectorIndexS, v2i32, i32, ST1i32_POST, 4>;
+defm : St1LanePost64Pat<post_store, VectorIndexS, v2f32, f32, ST1i32_POST, 4>;
+defm : St1LanePost64Pat<post_store, VectorIndexD, v1i64, i64, ST1i64_POST, 8>;
+defm : St1LanePost64Pat<post_store, VectorIndexD, v1f64, f64, ST1i64_POST, 8>;
+
+multiclass St1LanePost128Pat<SDPatternOperator scalar_store, Operand VecIndex,
+ ValueType VTy, ValueType STy, Instruction ST1,
+ int offset> {
+ def : Pat<(scalar_store
+ (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)),
+ am_simdnoindex:$vaddr, offset),
+ (ST1 VecListOne128:$Vt, VecIndex:$idx, am_simdnoindex:$vaddr, XZR)>;
+
+ def : Pat<(scalar_store
+ (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)),
+ am_simdnoindex:$vaddr, GPR64:$Rm),
+ (ST1 VecListOne128:$Vt, VecIndex:$idx, am_simdnoindex:$vaddr, $Rm)>;
+}
+
+defm : St1LanePost128Pat<post_truncsti8, VectorIndexB, v16i8, i32, ST1i8_POST,
+ 1>;
+defm : St1LanePost128Pat<post_truncsti16, VectorIndexH, v8i16, i32, ST1i16_POST,
+ 2>;
+defm : St1LanePost128Pat<post_store, VectorIndexS, v4i32, i32, ST1i32_POST, 4>;
+defm : St1LanePost128Pat<post_store, VectorIndexS, v4f32, f32, ST1i32_POST, 4>;
+defm : St1LanePost128Pat<post_store, VectorIndexD, v2i64, i64, ST1i64_POST, 8>;
+defm : St1LanePost128Pat<post_store, VectorIndexD, v2f64, f64, ST1i64_POST, 8>;
+
let mayStore = 1, neverHasSideEffects = 1 in {
defm ST2 : SIMDStSingleB<1, 0b000, "st2", VecListTwob, GPR64pi2>;
defm ST2 : SIMDStSingleH<1, 0b010, 0, "st2", VecListTwoh, GPR64pi4>;
Modified: llvm/trunk/test/CodeGen/ARM64/indexed-vector-ldst.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/indexed-vector-ldst.ll?rev=207840&r1=207839&r2=207840&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/indexed-vector-ldst.ll (original)
+++ llvm/trunk/test/CodeGen/ARM64/indexed-vector-ldst.ll Fri May 2 09:54:27 2014
@@ -400,3 +400,214 @@ define void @test_v2f64_post_store(<2 x
store <2 x double>* %newaddr, <2 x double>** bitcast(i8** @ptr to <2 x double>**)
ret void
}
+
+define i8* @test_v16i8_post_imm_st1_lane(<16 x i8> %in, i8* %addr) {
+; CHECK-LABEL: test_v16i8_post_imm_st1_lane:
+; CHECK: st1.b { v0 }[3], [x0], #1
+ %elt = extractelement <16 x i8> %in, i32 3
+ store i8 %elt, i8* %addr
+
+ %newaddr = getelementptr i8* %addr, i32 1
+ ret i8* %newaddr
+}
+
+define i8* @test_v16i8_post_reg_st1_lane(<16 x i8> %in, i8* %addr) {
+; CHECK-LABEL: test_v16i8_post_reg_st1_lane:
+; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x2
+; CHECK: st1.b { v0 }[3], [x0], x[[OFFSET]]
+ %elt = extractelement <16 x i8> %in, i32 3
+ store i8 %elt, i8* %addr
+
+ %newaddr = getelementptr i8* %addr, i32 2
+ ret i8* %newaddr
+}
+
+
+define i16* @test_v8i16_post_imm_st1_lane(<8 x i16> %in, i16* %addr) {
+; CHECK-LABEL: test_v8i16_post_imm_st1_lane:
+; CHECK: st1.h { v0 }[3], [x0], #2
+ %elt = extractelement <8 x i16> %in, i32 3
+ store i16 %elt, i16* %addr
+
+ %newaddr = getelementptr i16* %addr, i32 1
+ ret i16* %newaddr
+}
+
+define i16* @test_v8i16_post_reg_st1_lane(<8 x i16> %in, i16* %addr) {
+; CHECK-LABEL: test_v8i16_post_reg_st1_lane:
+; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x4
+; CHECK: st1.h { v0 }[3], [x0], x[[OFFSET]]
+ %elt = extractelement <8 x i16> %in, i32 3
+ store i16 %elt, i16* %addr
+
+ %newaddr = getelementptr i16* %addr, i32 2
+ ret i16* %newaddr
+}
+
+define i32* @test_v4i32_post_imm_st1_lane(<4 x i32> %in, i32* %addr) {
+; CHECK-LABEL: test_v4i32_post_imm_st1_lane:
+; CHECK: st1.s { v0 }[3], [x0], #4
+ %elt = extractelement <4 x i32> %in, i32 3
+ store i32 %elt, i32* %addr
+
+ %newaddr = getelementptr i32* %addr, i32 1
+ ret i32* %newaddr
+}
+
+define i32* @test_v4i32_post_reg_st1_lane(<4 x i32> %in, i32* %addr) {
+; CHECK-LABEL: test_v4i32_post_reg_st1_lane:
+; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x8
+; CHECK: st1.s { v0 }[3], [x0], x[[OFFSET]]
+ %elt = extractelement <4 x i32> %in, i32 3
+ store i32 %elt, i32* %addr
+
+ %newaddr = getelementptr i32* %addr, i32 2
+ ret i32* %newaddr
+}
+
+define float* @test_v4f32_post_imm_st1_lane(<4 x float> %in, float* %addr) {
+; CHECK-LABEL: test_v4f32_post_imm_st1_lane:
+; CHECK: st1.s { v0 }[3], [x0], #4
+ %elt = extractelement <4 x float> %in, i32 3
+ store float %elt, float* %addr
+
+ %newaddr = getelementptr float* %addr, i32 1
+ ret float* %newaddr
+}
+
+define float* @test_v4f32_post_reg_st1_lane(<4 x float> %in, float* %addr) {
+; CHECK-LABEL: test_v4f32_post_reg_st1_lane:
+; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x8
+; CHECK: st1.s { v0 }[3], [x0], x[[OFFSET]]
+ %elt = extractelement <4 x float> %in, i32 3
+ store float %elt, float* %addr
+
+ %newaddr = getelementptr float* %addr, i32 2
+ ret float* %newaddr
+}
+
+define i64* @test_v2i64_post_imm_st1_lane(<2 x i64> %in, i64* %addr) {
+; CHECK-LABEL: test_v2i64_post_imm_st1_lane:
+; CHECK: st1.d { v0 }[1], [x0], #8
+ %elt = extractelement <2 x i64> %in, i64 1
+ store i64 %elt, i64* %addr
+
+ %newaddr = getelementptr i64* %addr, i64 1
+ ret i64* %newaddr
+}
+
+define i64* @test_v2i64_post_reg_st1_lane(<2 x i64> %in, i64* %addr) {
+; CHECK-LABEL: test_v2i64_post_reg_st1_lane:
+; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x10
+; CHECK: st1.d { v0 }[1], [x0], x[[OFFSET]]
+ %elt = extractelement <2 x i64> %in, i64 1
+ store i64 %elt, i64* %addr
+
+ %newaddr = getelementptr i64* %addr, i64 2
+ ret i64* %newaddr
+}
+
+define double* @test_v2f64_post_imm_st1_lane(<2 x double> %in, double* %addr) {
+; CHECK-LABEL: test_v2f64_post_imm_st1_lane:
+; CHECK: st1.d { v0 }[1], [x0], #8
+ %elt = extractelement <2 x double> %in, i32 1
+ store double %elt, double* %addr
+
+ %newaddr = getelementptr double* %addr, i32 1
+ ret double* %newaddr
+}
+
+define double* @test_v2f64_post_reg_st1_lane(<2 x double> %in, double* %addr) {
+; CHECK-LABEL: test_v2f64_post_reg_st1_lane:
+; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x10
+; CHECK: st1.d { v0 }[1], [x0], x[[OFFSET]]
+ %elt = extractelement <2 x double> %in, i32 1
+ store double %elt, double* %addr
+
+ %newaddr = getelementptr double* %addr, i32 2
+ ret double* %newaddr
+}
+
+define i8* @test_v8i8_post_imm_st1_lane(<8 x i8> %in, i8* %addr) {
+; CHECK-LABEL: test_v8i8_post_imm_st1_lane:
+; CHECK: st1.b { v0 }[3], [x0], #1
+ %elt = extractelement <8 x i8> %in, i32 3
+ store i8 %elt, i8* %addr
+
+ %newaddr = getelementptr i8* %addr, i32 1
+ ret i8* %newaddr
+}
+
+define i8* @test_v8i8_post_reg_st1_lane(<8 x i8> %in, i8* %addr) {
+; CHECK-LABEL: test_v8i8_post_reg_st1_lane:
+; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x2
+; CHECK: st1.b { v0 }[3], [x0], x[[OFFSET]]
+ %elt = extractelement <8 x i8> %in, i32 3
+ store i8 %elt, i8* %addr
+
+ %newaddr = getelementptr i8* %addr, i32 2
+ ret i8* %newaddr
+}
+
+define i16* @test_v4i16_post_imm_st1_lane(<4 x i16> %in, i16* %addr) {
+; CHECK-LABEL: test_v4i16_post_imm_st1_lane:
+; CHECK: st1.h { v0 }[3], [x0], #2
+ %elt = extractelement <4 x i16> %in, i32 3
+ store i16 %elt, i16* %addr
+
+ %newaddr = getelementptr i16* %addr, i32 1
+ ret i16* %newaddr
+}
+
+define i16* @test_v4i16_post_reg_st1_lane(<4 x i16> %in, i16* %addr) {
+; CHECK-LABEL: test_v4i16_post_reg_st1_lane:
+; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x4
+; CHECK: st1.h { v0 }[3], [x0], x[[OFFSET]]
+ %elt = extractelement <4 x i16> %in, i32 3
+ store i16 %elt, i16* %addr
+
+ %newaddr = getelementptr i16* %addr, i32 2
+ ret i16* %newaddr
+}
+
+define i32* @test_v2i32_post_imm_st1_lane(<2 x i32> %in, i32* %addr) {
+; CHECK-LABEL: test_v2i32_post_imm_st1_lane:
+; CHECK: st1.s { v0 }[1], [x0], #4
+ %elt = extractelement <2 x i32> %in, i32 1
+ store i32 %elt, i32* %addr
+
+ %newaddr = getelementptr i32* %addr, i32 1
+ ret i32* %newaddr
+}
+
+define i32* @test_v2i32_post_reg_st1_lane(<2 x i32> %in, i32* %addr) {
+; CHECK-LABEL: test_v2i32_post_reg_st1_lane:
+; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x8
+; CHECK: st1.s { v0 }[1], [x0], x[[OFFSET]]
+ %elt = extractelement <2 x i32> %in, i32 1
+ store i32 %elt, i32* %addr
+
+ %newaddr = getelementptr i32* %addr, i32 2
+ ret i32* %newaddr
+}
+
+define float* @test_v2f32_post_imm_st1_lane(<2 x float> %in, float* %addr) {
+; CHECK-LABEL: test_v2f32_post_imm_st1_lane:
+; CHECK: st1.s { v0 }[1], [x0], #4
+ %elt = extractelement <2 x float> %in, i32 1
+ store float %elt, float* %addr
+
+ %newaddr = getelementptr float* %addr, i32 1
+ ret float* %newaddr
+}
+
+define float* @test_v2f32_post_reg_st1_lane(<2 x float> %in, float* %addr) {
+; CHECK-LABEL: test_v2f32_post_reg_st1_lane:
+; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x8
+; CHECK: st1.s { v0 }[1], [x0], x[[OFFSET]]
+ %elt = extractelement <2 x float> %in, i32 1
+ store float %elt, float* %addr
+
+ %newaddr = getelementptr float* %addr, i32 2
+ ret float* %newaddr
+}
More information about the llvm-commits
mailing list