[llvm-commits] [llvm] r145927 - in /llvm/trunk: lib/Target/X86/X86InstrSSE.td test/CodeGen/X86/avx-intrinsics-x86.ll
Craig Topper
craig.topper at gmail.com
Tue Dec 6 01:04:59 PST 2011
Author: ctopper
Date: Tue Dec 6 03:04:59 2011
New Revision: 145927
URL: http://llvm.org/viewvc/llvm-project?rev=145927&view=rev
Log:
Fix a bunch of SSE/AVX patterns to use v2i64/v4i64 loads since all other integer vector loads are promoted to those.
Modified:
llvm/trunk/lib/Target/X86/X86InstrSSE.td
llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=145927&r1=145926&r2=145927&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Tue Dec 6 03:04:59 2011
@@ -5092,7 +5092,7 @@
/// SS3I_unop_rm_int - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}.
multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr,
- PatFrag mem_frag128, Intrinsic IntId128> {
+ Intrinsic IntId128> {
def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
@@ -5104,12 +5104,12 @@
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst,
(IntId128
- (bitconvert (mem_frag128 addr:$src))))]>, OpSize;
+ (bitconvert (memopv2i64 addr:$src))))]>, OpSize;
}
/// SS3I_unop_rm_int_y - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}.
multiclass SS3I_unop_rm_int_y<bits<8> opc, string OpcodeStr,
- PatFrag mem_frag256, Intrinsic IntId256> {
+ Intrinsic IntId256> {
def rr256 : SS38I<opc, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
@@ -5121,32 +5121,32 @@
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst,
(IntId256
- (bitconvert (mem_frag256 addr:$src))))]>, OpSize;
+ (bitconvert (memopv4i64 addr:$src))))]>, OpSize;
}
let Predicates = [HasAVX] in {
- defm VPABSB : SS3I_unop_rm_int<0x1C, "vpabsb", memopv16i8,
+ defm VPABSB : SS3I_unop_rm_int<0x1C, "vpabsb",
int_x86_ssse3_pabs_b_128>, VEX;
- defm VPABSW : SS3I_unop_rm_int<0x1D, "vpabsw", memopv8i16,
+ defm VPABSW : SS3I_unop_rm_int<0x1D, "vpabsw",
int_x86_ssse3_pabs_w_128>, VEX;
- defm VPABSD : SS3I_unop_rm_int<0x1E, "vpabsd", memopv4i32,
+ defm VPABSD : SS3I_unop_rm_int<0x1E, "vpabsd",
int_x86_ssse3_pabs_d_128>, VEX;
}
let Predicates = [HasAVX2] in {
- defm VPABSB : SS3I_unop_rm_int_y<0x1C, "vpabsb", memopv32i8,
+ defm VPABSB : SS3I_unop_rm_int_y<0x1C, "vpabsb",
int_x86_avx2_pabs_b>, VEX;
- defm VPABSW : SS3I_unop_rm_int_y<0x1D, "vpabsw", memopv16i16,
+ defm VPABSW : SS3I_unop_rm_int_y<0x1D, "vpabsw",
int_x86_avx2_pabs_w>, VEX;
- defm VPABSD : SS3I_unop_rm_int_y<0x1E, "vpabsd", memopv8i32,
+ defm VPABSD : SS3I_unop_rm_int_y<0x1E, "vpabsd",
int_x86_avx2_pabs_d>, VEX;
}
-defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb", memopv16i8,
+defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb",
int_x86_ssse3_pabs_b_128>;
-defm PABSW : SS3I_unop_rm_int<0x1D, "pabsw", memopv8i16,
+defm PABSW : SS3I_unop_rm_int<0x1D, "pabsw",
int_x86_ssse3_pabs_w_128>;
-defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd", memopv4i32,
+defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd",
int_x86_ssse3_pabs_d_128>;
//===---------------------------------------------------------------------===//
@@ -5155,8 +5155,7 @@
/// SS3I_binop_rm_int - Simple SSSE3 bin op whose type can be v*{i8,i16,i32}.
multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr,
- PatFrag mem_frag128, Intrinsic IntId128,
- bit Is2Addr = 1> {
+ Intrinsic IntId128, bit Is2Addr = 1> {
let isCommutable = 1 in
def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
@@ -5172,11 +5171,11 @@
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst,
(IntId128 VR128:$src1,
- (bitconvert (mem_frag128 addr:$src2))))]>, OpSize;
+ (bitconvert (memopv2i64 addr:$src2))))]>, OpSize;
}
multiclass SS3I_binop_rm_int_y<bits<8> opc, string OpcodeStr,
- PatFrag mem_frag256, Intrinsic IntId256> {
+ Intrinsic IntId256> {
let isCommutable = 1 in
def rr256 : SS38I<opc, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2),
@@ -5188,94 +5187,94 @@
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst,
(IntId256 VR256:$src1,
- (bitconvert (mem_frag256 addr:$src2))))]>, OpSize;
+ (bitconvert (memopv4i64 addr:$src2))))]>, OpSize;
}
let ImmT = NoImm, Predicates = [HasAVX] in {
let isCommutable = 0 in {
- defm VPHADDW : SS3I_binop_rm_int<0x01, "vphaddw", memopv8i16,
+ defm VPHADDW : SS3I_binop_rm_int<0x01, "vphaddw",
int_x86_ssse3_phadd_w_128, 0>, VEX_4V;
- defm VPHADDD : SS3I_binop_rm_int<0x02, "vphaddd", memopv4i32,
+ defm VPHADDD : SS3I_binop_rm_int<0x02, "vphaddd",
int_x86_ssse3_phadd_d_128, 0>, VEX_4V;
- defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw", memopv8i16,
+ defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw",
int_x86_ssse3_phadd_sw_128, 0>, VEX_4V;
- defm VPHSUBW : SS3I_binop_rm_int<0x05, "vphsubw", memopv8i16,
+ defm VPHSUBW : SS3I_binop_rm_int<0x05, "vphsubw",
int_x86_ssse3_phsub_w_128, 0>, VEX_4V;
- defm VPHSUBD : SS3I_binop_rm_int<0x06, "vphsubd", memopv4i32,
+ defm VPHSUBD : SS3I_binop_rm_int<0x06, "vphsubd",
int_x86_ssse3_phsub_d_128, 0>, VEX_4V;
- defm VPHSUBSW : SS3I_binop_rm_int<0x07, "vphsubsw", memopv8i16,
+ defm VPHSUBSW : SS3I_binop_rm_int<0x07, "vphsubsw",
int_x86_ssse3_phsub_sw_128, 0>, VEX_4V;
- defm VPMADDUBSW : SS3I_binop_rm_int<0x04, "vpmaddubsw", memopv16i8,
+ defm VPMADDUBSW : SS3I_binop_rm_int<0x04, "vpmaddubsw",
int_x86_ssse3_pmadd_ub_sw_128, 0>, VEX_4V;
- defm VPSHUFB : SS3I_binop_rm_int<0x00, "vpshufb", memopv16i8,
+ defm VPSHUFB : SS3I_binop_rm_int<0x00, "vpshufb",
int_x86_ssse3_pshuf_b_128, 0>, VEX_4V;
- defm VPSIGNB : SS3I_binop_rm_int<0x08, "vpsignb", memopv16i8,
+ defm VPSIGNB : SS3I_binop_rm_int<0x08, "vpsignb",
int_x86_ssse3_psign_b_128, 0>, VEX_4V;
- defm VPSIGNW : SS3I_binop_rm_int<0x09, "vpsignw", memopv8i16,
+ defm VPSIGNW : SS3I_binop_rm_int<0x09, "vpsignw",
int_x86_ssse3_psign_w_128, 0>, VEX_4V;
- defm VPSIGND : SS3I_binop_rm_int<0x0A, "vpsignd", memopv4i32,
+ defm VPSIGND : SS3I_binop_rm_int<0x0A, "vpsignd",
int_x86_ssse3_psign_d_128, 0>, VEX_4V;
}
-defm VPMULHRSW : SS3I_binop_rm_int<0x0B, "vpmulhrsw", memopv8i16,
+defm VPMULHRSW : SS3I_binop_rm_int<0x0B, "vpmulhrsw",
int_x86_ssse3_pmul_hr_sw_128, 0>, VEX_4V;
}
let ImmT = NoImm, Predicates = [HasAVX2] in {
let isCommutable = 0 in {
- defm VPHADDW : SS3I_binop_rm_int_y<0x01, "vphaddw", memopv16i16,
+ defm VPHADDW : SS3I_binop_rm_int_y<0x01, "vphaddw",
int_x86_avx2_phadd_w>, VEX_4V;
- defm VPHADDD : SS3I_binop_rm_int_y<0x02, "vphaddd", memopv8i32,
+ defm VPHADDD : SS3I_binop_rm_int_y<0x02, "vphaddd",
int_x86_avx2_phadd_d>, VEX_4V;
- defm VPHADDSW : SS3I_binop_rm_int_y<0x03, "vphaddsw", memopv16i16,
+ defm VPHADDSW : SS3I_binop_rm_int_y<0x03, "vphaddsw",
int_x86_avx2_phadd_sw>, VEX_4V;
- defm VPHSUBW : SS3I_binop_rm_int_y<0x05, "vphsubw", memopv16i16,
+ defm VPHSUBW : SS3I_binop_rm_int_y<0x05, "vphsubw",
int_x86_avx2_phsub_w>, VEX_4V;
- defm VPHSUBD : SS3I_binop_rm_int_y<0x06, "vphsubd", memopv8i32,
+ defm VPHSUBD : SS3I_binop_rm_int_y<0x06, "vphsubd",
int_x86_avx2_phsub_d>, VEX_4V;
- defm VPHSUBSW : SS3I_binop_rm_int_y<0x07, "vphsubsw", memopv16i16,
+ defm VPHSUBSW : SS3I_binop_rm_int_y<0x07, "vphsubsw",
int_x86_avx2_phsub_sw>, VEX_4V;
- defm VPMADDUBSW : SS3I_binop_rm_int_y<0x04, "vpmaddubsw", memopv32i8,
+ defm VPMADDUBSW : SS3I_binop_rm_int_y<0x04, "vpmaddubsw",
int_x86_avx2_pmadd_ub_sw>, VEX_4V;
- defm VPSHUFB : SS3I_binop_rm_int_y<0x00, "vpshufb", memopv32i8,
+ defm VPSHUFB : SS3I_binop_rm_int_y<0x00, "vpshufb",
int_x86_avx2_pshuf_b>, VEX_4V;
- defm VPSIGNB : SS3I_binop_rm_int_y<0x08, "vpsignb", memopv32i8,
+ defm VPSIGNB : SS3I_binop_rm_int_y<0x08, "vpsignb",
int_x86_avx2_psign_b>, VEX_4V;
- defm VPSIGNW : SS3I_binop_rm_int_y<0x09, "vpsignw", memopv16i16,
+ defm VPSIGNW : SS3I_binop_rm_int_y<0x09, "vpsignw",
int_x86_avx2_psign_w>, VEX_4V;
- defm VPSIGND : SS3I_binop_rm_int_y<0x0A, "vpsignd", memopv8i32,
+ defm VPSIGND : SS3I_binop_rm_int_y<0x0A, "vpsignd",
int_x86_avx2_psign_d>, VEX_4V;
}
-defm VPMULHRSW : SS3I_binop_rm_int_y<0x0B, "vpmulhrsw", memopv16i16,
+defm VPMULHRSW : SS3I_binop_rm_int_y<0x0B, "vpmulhrsw",
int_x86_avx2_pmul_hr_sw>, VEX_4V;
}
// None of these have i8 immediate fields.
let ImmT = NoImm, Constraints = "$src1 = $dst" in {
let isCommutable = 0 in {
- defm PHADDW : SS3I_binop_rm_int<0x01, "phaddw", memopv8i16,
+ defm PHADDW : SS3I_binop_rm_int<0x01, "phaddw",
int_x86_ssse3_phadd_w_128>;
- defm PHADDD : SS3I_binop_rm_int<0x02, "phaddd", memopv4i32,
+ defm PHADDD : SS3I_binop_rm_int<0x02, "phaddd",
int_x86_ssse3_phadd_d_128>;
- defm PHADDSW : SS3I_binop_rm_int<0x03, "phaddsw", memopv8i16,
+ defm PHADDSW : SS3I_binop_rm_int<0x03, "phaddsw",
int_x86_ssse3_phadd_sw_128>;
- defm PHSUBW : SS3I_binop_rm_int<0x05, "phsubw", memopv8i16,
+ defm PHSUBW : SS3I_binop_rm_int<0x05, "phsubw",
int_x86_ssse3_phsub_w_128>;
- defm PHSUBD : SS3I_binop_rm_int<0x06, "phsubd", memopv4i32,
+ defm PHSUBD : SS3I_binop_rm_int<0x06, "phsubd",
int_x86_ssse3_phsub_d_128>;
- defm PHSUBSW : SS3I_binop_rm_int<0x07, "phsubsw", memopv8i16,
+ defm PHSUBSW : SS3I_binop_rm_int<0x07, "phsubsw",
int_x86_ssse3_phsub_sw_128>;
- defm PMADDUBSW : SS3I_binop_rm_int<0x04, "pmaddubsw", memopv16i8,
+ defm PMADDUBSW : SS3I_binop_rm_int<0x04, "pmaddubsw",
int_x86_ssse3_pmadd_ub_sw_128>;
- defm PSHUFB : SS3I_binop_rm_int<0x00, "pshufb", memopv16i8,
+ defm PSHUFB : SS3I_binop_rm_int<0x00, "pshufb",
int_x86_ssse3_pshuf_b_128>;
- defm PSIGNB : SS3I_binop_rm_int<0x08, "psignb", memopv16i8,
+ defm PSIGNB : SS3I_binop_rm_int<0x08, "psignb",
int_x86_ssse3_psign_b_128>;
- defm PSIGNW : SS3I_binop_rm_int<0x09, "psignw", memopv8i16,
+ defm PSIGNW : SS3I_binop_rm_int<0x09, "psignw",
int_x86_ssse3_psign_w_128>;
- defm PSIGND : SS3I_binop_rm_int<0x0A, "psignd", memopv4i32,
+ defm PSIGND : SS3I_binop_rm_int<0x0A, "psignd",
int_x86_ssse3_psign_d_128>;
}
-defm PMULHRSW : SS3I_binop_rm_int<0x0B, "pmulhrsw", memopv8i16,
+defm PMULHRSW : SS3I_binop_rm_int<0x0B, "pmulhrsw",
int_x86_ssse3_pmul_hr_sw_128>;
}
@@ -6202,7 +6201,7 @@
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst,
(IntId128
- (bitconvert (memopv8i16 addr:$src))))]>, OpSize;
+ (bitconvert (memopv2i64 addr:$src))))]>, OpSize;
}
let Predicates = [HasAVX] in
@@ -6228,7 +6227,7 @@
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst,
(IntId128 VR128:$src1,
- (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
+ (bitconvert (memopv2i64 addr:$src2))))]>, OpSize;
}
/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator
@@ -6244,7 +6243,7 @@
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst,
(IntId256 VR256:$src1,
- (bitconvert (memopv32i8 addr:$src2))))]>, OpSize;
+ (bitconvert (memopv4i64 addr:$src2))))]>, OpSize;
}
let Predicates = [HasAVX] in {
@@ -7245,7 +7244,8 @@
def rm : AVX8I<opc_rm, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop_i:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, (IntVar RC:$src1, (i_frag addr:$src2)))]>, VEX_4V;
+ [(set RC:$dst, (IntVar RC:$src1,
+ (bitconvert (i_frag addr:$src2))))]>, VEX_4V;
def ri : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, i8imm:$src2),
@@ -7259,11 +7259,11 @@
let ExeDomain = SSEPackedSingle in {
defm VPERMILPS : avx_permil<0x0C, 0x04, "vpermilps", VR128, f128mem, i128mem,
- memopv4f32, memopv4i32,
+ memopv4f32, memopv2i64,
int_x86_avx_vpermilvar_ps,
int_x86_avx_vpermil_ps>;
defm VPERMILPSY : avx_permil<0x0C, 0x04, "vpermilps", VR256, f256mem, i256mem,
- memopv8f32, memopv8i32,
+ memopv8f32, memopv4i64,
int_x86_avx_vpermilvar_ps_256,
int_x86_avx_vpermil_ps_256>;
}
@@ -7494,11 +7494,12 @@
(ins VR256:$src1, i256mem:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR256:$dst, (Int VR256:$src1, (mem_frag addr:$src2)))]>,
+ [(set VR256:$dst, (Int VR256:$src1,
+ (bitconvert (mem_frag addr:$src2))))]>,
VEX_4V;
}
-defm VPERMD : avx2_perm<0x36, "vpermd", memopv8i32, int_x86_avx2_permd>;
+defm VPERMD : avx2_perm<0x36, "vpermd", memopv4i64, int_x86_avx2_permd>;
let ExeDomain = SSEPackedSingle in
defm VPERMPS : avx2_perm<0x16, "vpermps", memopv8f32, int_x86_avx2_permps>;
Modified: llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll?rev=145927&r1=145926&r2=145927&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll Tue Dec 6 03:04:59 2011
@@ -2333,6 +2333,12 @@
%res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a1) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
}
+define <4 x float> @test_x86_avx_vpermilvar_ps_load(<4 x float> %a0, <4 x i32>* %a1) {
+ ; CHECK: vpermilps
+ %a2 = load <4 x i32>* %a1
+ %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a2) ; <<4 x float>> [#uses=1]
+ ret <4 x float> %res
+}
declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>) nounwind readnone
More information about the llvm-commits
mailing list