[llvm] r307671 - [Hexagon] Add support for nontemporal loads and stores on HVX
Krzysztof Parzyszek via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 11 09:39:34 PDT 2017
Author: kparzysz
Date: Tue Jul 11 09:39:33 2017
New Revision: 307671
URL: http://llvm.org/viewvc/llvm-project?rev=307671&view=rev
Log:
[Hexagon] Add support for nontemporal loads and stores on HVX
Patch by Michael Wu.
Differential Revision: https://reviews.llvm.org/D35104
Added:
llvm/trunk/test/CodeGen/Hexagon/hvx-nontemporal.ll
Modified:
llvm/trunk/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
llvm/trunk/lib/Target/Hexagon/HexagonInstrInfo.cpp
llvm/trunk/lib/Target/Hexagon/HexagonPatterns.td
llvm/trunk/lib/Target/Hexagon/HexagonPseudo.td
Modified: llvm/trunk/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp?rev=307671&r1=307670&r2=307671&view=diff
==============================================================================
--- llvm/trunk/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp Tue Jul 11 09:39:33 2017
@@ -241,22 +241,31 @@ void HexagonDAGToDAGISel::SelectIndexedL
case MVT::v32i16:
case MVT::v16i32:
case MVT::v8i64:
- if (isAlignedMemNode(LD))
- Opcode = IsValidInc ? Hexagon::V6_vL32b_pi : Hexagon::V6_vL32b_ai;
- else
+ if (isAlignedMemNode(LD)) {
+ if (LD->isNonTemporal())
+ Opcode = IsValidInc ? Hexagon::V6_vL32b_nt_pi : Hexagon::V6_vL32b_nt_ai;
+ else
+ Opcode = IsValidInc ? Hexagon::V6_vL32b_pi : Hexagon::V6_vL32b_ai;
+ } else {
Opcode = IsValidInc ? Hexagon::V6_vL32Ub_pi : Hexagon::V6_vL32Ub_ai;
+ }
break;
// 128B
case MVT::v128i8:
case MVT::v64i16:
case MVT::v32i32:
case MVT::v16i64:
- if (isAlignedMemNode(LD))
- Opcode = IsValidInc ? Hexagon::V6_vL32b_pi_128B
- : Hexagon::V6_vL32b_ai_128B;
- else
+ if (isAlignedMemNode(LD)) {
+ if (LD->isNonTemporal())
+ Opcode = IsValidInc ? Hexagon::V6_vL32b_nt_pi_128B
+ : Hexagon::V6_vL32b_nt_ai_128B;
+ else
+ Opcode = IsValidInc ? Hexagon::V6_vL32b_pi_128B
+ : Hexagon::V6_vL32b_ai_128B;
+ } else {
Opcode = IsValidInc ? Hexagon::V6_vL32Ub_pi_128B
: Hexagon::V6_vL32Ub_ai_128B;
+ }
break;
default:
llvm_unreachable("Unexpected memory type in indexed load");
@@ -529,22 +538,31 @@ void HexagonDAGToDAGISel::SelectIndexedS
case MVT::v32i16:
case MVT::v16i32:
case MVT::v8i64:
- if (isAlignedMemNode(ST))
- Opcode = IsValidInc ? Hexagon::V6_vS32b_pi : Hexagon::V6_vS32b_ai;
- else
+ if (isAlignedMemNode(ST)) {
+ if (ST->isNonTemporal())
+ Opcode = IsValidInc ? Hexagon::V6_vS32b_nt_pi : Hexagon::V6_vS32b_nt_ai;
+ else
+ Opcode = IsValidInc ? Hexagon::V6_vS32b_pi : Hexagon::V6_vS32b_ai;
+ } else {
Opcode = IsValidInc ? Hexagon::V6_vS32Ub_pi : Hexagon::V6_vS32Ub_ai;
+ }
break;
// 128B
case MVT::v128i8:
case MVT::v64i16:
case MVT::v32i32:
case MVT::v16i64:
- if (isAlignedMemNode(ST))
- Opcode = IsValidInc ? Hexagon::V6_vS32b_pi_128B
- : Hexagon::V6_vS32b_ai_128B;
- else
+ if (isAlignedMemNode(ST)) {
+ if (ST->isNonTemporal())
+ Opcode = IsValidInc ? Hexagon::V6_vS32b_nt_pi_128B
+ : Hexagon::V6_vS32b_nt_ai_128B;
+ else
+ Opcode = IsValidInc ? Hexagon::V6_vS32b_pi_128B
+ : Hexagon::V6_vS32b_ai_128B;
+ } else {
Opcode = IsValidInc ? Hexagon::V6_vS32Ub_pi_128B
: Hexagon::V6_vS32Ub_ai_128B;
+ }
break;
default:
llvm_unreachable("Unexpected memory type in indexed store");
Modified: llvm/trunk/lib/Target/Hexagon/HexagonInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Hexagon/HexagonInstrInfo.cpp?rev=307671&r1=307670&r2=307671&view=diff
==============================================================================
--- llvm/trunk/lib/Target/Hexagon/HexagonInstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/Hexagon/HexagonInstrInfo.cpp Tue Jul 11 09:39:33 2017
@@ -250,15 +250,19 @@ unsigned HexagonInstrInfo::isLoadFromSta
case Hexagon::L2_loadri_io:
case Hexagon::L2_loadrd_io:
case Hexagon::V6_vL32b_ai:
+ case Hexagon::V6_vL32b_nt_ai:
case Hexagon::V6_vL32b_ai_128B:
+ case Hexagon::V6_vL32b_nt_ai_128B:
case Hexagon::V6_vL32Ub_ai:
case Hexagon::V6_vL32Ub_ai_128B:
case Hexagon::LDriw_pred:
case Hexagon::LDriw_mod:
case Hexagon::PS_vloadrq_ai:
case Hexagon::PS_vloadrw_ai:
+ case Hexagon::PS_vloadrw_nt_ai:
case Hexagon::PS_vloadrq_ai_128B:
- case Hexagon::PS_vloadrw_ai_128B: {
+ case Hexagon::PS_vloadrw_ai_128B:
+ case Hexagon::PS_vloadrw_nt_ai_128B: {
const MachineOperand OpFI = MI.getOperand(1);
if (!OpFI.isFI())
return 0;
@@ -2473,20 +2477,28 @@ bool HexagonInstrInfo::isValidOffset(uns
switch (Opcode) {
case Hexagon::PS_vstorerq_ai:
case Hexagon::PS_vstorerw_ai:
+ case Hexagon::PS_vstorerw_nt_ai:
case Hexagon::PS_vloadrq_ai:
case Hexagon::PS_vloadrw_ai:
+ case Hexagon::PS_vloadrw_nt_ai:
case Hexagon::V6_vL32b_ai:
case Hexagon::V6_vS32b_ai:
+ case Hexagon::V6_vL32b_nt_ai:
+ case Hexagon::V6_vS32b_nt_ai:
case Hexagon::V6_vL32Ub_ai:
case Hexagon::V6_vS32Ub_ai:
return isShiftedInt<4,6>(Offset);
case Hexagon::PS_vstorerq_ai_128B:
case Hexagon::PS_vstorerw_ai_128B:
+ case Hexagon::PS_vstorerw_nt_ai_128B:
case Hexagon::PS_vloadrq_ai_128B:
case Hexagon::PS_vloadrw_ai_128B:
+ case Hexagon::PS_vloadrw_nt_ai_128B:
case Hexagon::V6_vL32b_ai_128B:
case Hexagon::V6_vS32b_ai_128B:
+ case Hexagon::V6_vL32b_nt_ai_128B:
+ case Hexagon::V6_vS32b_nt_ai_128B:
case Hexagon::V6_vL32Ub_ai_128B:
case Hexagon::V6_vS32Ub_ai_128B:
return isShiftedInt<4,7>(Offset);
@@ -3198,11 +3210,19 @@ int HexagonInstrInfo::getDotCurOp(const
return Hexagon::V6_vL32b_cur_pi;
case Hexagon::V6_vL32b_ai:
return Hexagon::V6_vL32b_cur_ai;
+ case Hexagon::V6_vL32b_nt_pi:
+ return Hexagon::V6_vL32b_nt_cur_pi;
+ case Hexagon::V6_vL32b_nt_ai:
+ return Hexagon::V6_vL32b_nt_cur_ai;
//128B
case Hexagon::V6_vL32b_pi_128B:
return Hexagon::V6_vL32b_cur_pi_128B;
case Hexagon::V6_vL32b_ai_128B:
return Hexagon::V6_vL32b_cur_ai_128B;
+ case Hexagon::V6_vL32b_nt_pi_128B:
+ return Hexagon::V6_vL32b_nt_cur_pi_128B;
+ case Hexagon::V6_vL32b_nt_ai_128B:
+ return Hexagon::V6_vL32b_nt_cur_ai_128B;
}
return 0;
}
@@ -3215,11 +3235,19 @@ int HexagonInstrInfo::getNonDotCurOp(con
return Hexagon::V6_vL32b_pi;
case Hexagon::V6_vL32b_cur_ai:
return Hexagon::V6_vL32b_ai;
+ case Hexagon::V6_vL32b_nt_cur_pi:
+ return Hexagon::V6_vL32b_nt_pi;
+ case Hexagon::V6_vL32b_nt_cur_ai:
+ return Hexagon::V6_vL32b_nt_ai;
//128B
case Hexagon::V6_vL32b_cur_pi_128B:
return Hexagon::V6_vL32b_pi_128B;
case Hexagon::V6_vL32b_cur_ai_128B:
return Hexagon::V6_vL32b_ai_128B;
+ case Hexagon::V6_vL32b_nt_cur_pi_128B:
+ return Hexagon::V6_vL32b_nt_pi_128B;
+ case Hexagon::V6_vL32b_nt_cur_ai_128B:
+ return Hexagon::V6_vL32b_nt_ai_128B;
}
return 0;
}
Modified: llvm/trunk/lib/Target/Hexagon/HexagonPatterns.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Hexagon/HexagonPatterns.td?rev=307671&r1=307670&r2=307671&view=diff
==============================================================================
--- llvm/trunk/lib/Target/Hexagon/HexagonPatterns.td (original)
+++ llvm/trunk/lib/Target/Hexagon/HexagonPatterns.td Tue Jul 11 09:39:33 2017
@@ -2770,6 +2770,9 @@ def unalignedstore : PatFrag<(ops node:$
multiclass vS32b_ai_pats <ValueType VTSgl, ValueType VTDbl> {
// Aligned stores
+ def : Pat<(alignednontemporalstore (VTSgl VectorRegs:$src1), IntRegs:$addr),
+ (V6_vS32b_nt_ai IntRegs:$addr, 0, (VTSgl VectorRegs:$src1))>,
+ Requires<[UseHVXSgl]>;
def : Pat<(alignedstore (VTSgl VectorRegs:$src1), IntRegs:$addr),
(V6_vS32b_ai IntRegs:$addr, 0, (VTSgl VectorRegs:$src1))>,
Requires<[UseHVXSgl]>;
@@ -2778,6 +2781,9 @@ multiclass vS32b_ai_pats <ValueType VTSg
Requires<[UseHVXSgl]>;
// 128B Aligned stores
+ def : Pat<(alignednontemporalstore (VTDbl VectorRegs128B:$src1), IntRegs:$addr),
+ (V6_vS32b_nt_ai_128B IntRegs:$addr, 0, (VTDbl VectorRegs128B:$src1))>,
+ Requires<[UseHVXDbl]>;
def : Pat<(alignedstore (VTDbl VectorRegs128B:$src1), IntRegs:$addr),
(V6_vS32b_ai_128B IntRegs:$addr, 0, (VTDbl VectorRegs128B:$src1))>,
Requires<[UseHVXDbl]>;
@@ -2787,6 +2793,11 @@ multiclass vS32b_ai_pats <ValueType VTSg
// Fold Add R+OFF into vector store.
let AddedComplexity = 10 in {
+ def : Pat<(alignednontemporalstore (VTSgl VectorRegs:$src1),
+ (add IntRegs:$src2, Iss4_6:$offset)),
+ (V6_vS32b_nt_ai IntRegs:$src2, Iss4_6:$offset,
+ (VTSgl VectorRegs:$src1))>,
+ Requires<[UseHVXSgl]>;
def : Pat<(alignedstore (VTSgl VectorRegs:$src1),
(add IntRegs:$src2, Iss4_6:$offset)),
(V6_vS32b_ai IntRegs:$src2, Iss4_6:$offset,
@@ -2799,6 +2810,11 @@ multiclass vS32b_ai_pats <ValueType VTSg
Requires<[UseHVXSgl]>;
// Fold Add R+OFF into vector store 128B.
+ def : Pat<(alignednontemporalstore (VTDbl VectorRegs128B:$src1),
+ (add IntRegs:$src2, Iss4_7:$offset)),
+ (V6_vS32b_nt_ai_128B IntRegs:$src2, Iss4_7:$offset,
+ (VTDbl VectorRegs128B:$src1))>,
+ Requires<[UseHVXDbl]>;
def : Pat<(alignedstore (VTDbl VectorRegs128B:$src1),
(add IntRegs:$src2, Iss4_7:$offset)),
(V6_vS32b_ai_128B IntRegs:$src2, Iss4_7:$offset,
@@ -2820,6 +2836,9 @@ defm : vS32b_ai_pats <v8i64, v16i64>;
multiclass vL32b_ai_pats <ValueType VTSgl, ValueType VTDbl> {
// Aligned loads
+ def : Pat < (VTSgl (alignednontemporalload IntRegs:$addr)),
+ (V6_vL32b_nt_ai IntRegs:$addr, 0) >,
+ Requires<[UseHVXSgl]>;
def : Pat < (VTSgl (alignedload IntRegs:$addr)),
(V6_vL32b_ai IntRegs:$addr, 0) >,
Requires<[UseHVXSgl]>;
@@ -2828,6 +2847,9 @@ multiclass vL32b_ai_pats <ValueType VTSg
Requires<[UseHVXSgl]>;
// 128B Load
+ def : Pat < (VTDbl (alignednontemporalload IntRegs:$addr)),
+ (V6_vL32b_nt_ai_128B IntRegs:$addr, 0) >,
+ Requires<[UseHVXDbl]>;
def : Pat < (VTDbl (alignedload IntRegs:$addr)),
(V6_vL32b_ai_128B IntRegs:$addr, 0) >,
Requires<[UseHVXDbl]>;
@@ -2837,6 +2859,9 @@ multiclass vL32b_ai_pats <ValueType VTSg
// Fold Add R+OFF into vector load.
let AddedComplexity = 10 in {
+ def : Pat<(VTDbl (alignednontemporalload (add IntRegs:$src2, Iss4_7:$offset))),
+ (V6_vL32b_nt_ai_128B IntRegs:$src2, Iss4_7:$offset)>,
+ Requires<[UseHVXDbl]>;
def : Pat<(VTDbl (alignedload (add IntRegs:$src2, Iss4_7:$offset))),
(V6_vL32b_ai_128B IntRegs:$src2, Iss4_7:$offset)>,
Requires<[UseHVXDbl]>;
@@ -2844,6 +2869,9 @@ multiclass vL32b_ai_pats <ValueType VTSg
(V6_vL32Ub_ai_128B IntRegs:$src2, Iss4_7:$offset)>,
Requires<[UseHVXDbl]>;
+ def : Pat<(VTSgl (alignednontemporalload (add IntRegs:$src2, Iss4_6:$offset))),
+ (V6_vL32b_nt_ai IntRegs:$src2, Iss4_6:$offset)>,
+ Requires<[UseHVXSgl]>;
def : Pat<(VTSgl (alignedload (add IntRegs:$src2, Iss4_6:$offset))),
(V6_vL32b_ai IntRegs:$src2, Iss4_6:$offset)>,
Requires<[UseHVXSgl]>;
@@ -2859,6 +2887,9 @@ defm : vL32b_ai_pats <v16i32, v32i32>;
defm : vL32b_ai_pats <v8i64, v16i64>;
multiclass STrivv_pats <ValueType VTSgl, ValueType VTDbl> {
+ def : Pat<(alignednontemporalstore (VTSgl VecDblRegs:$src1), IntRegs:$addr),
+ (PS_vstorerw_nt_ai IntRegs:$addr, 0, (VTSgl VecDblRegs:$src1))>,
+ Requires<[UseHVXSgl]>;
def : Pat<(alignedstore (VTSgl VecDblRegs:$src1), IntRegs:$addr),
(PS_vstorerw_ai IntRegs:$addr, 0, (VTSgl VecDblRegs:$src1))>,
Requires<[UseHVXSgl]>;
@@ -2866,6 +2897,10 @@ multiclass STrivv_pats <ValueType VTSgl,
(PS_vstorerwu_ai IntRegs:$addr, 0, (VTSgl VecDblRegs:$src1))>,
Requires<[UseHVXSgl]>;
+ def : Pat<(alignednontemporalstore (VTDbl VecDblRegs128B:$src1), IntRegs:$addr),
+ (PS_vstorerw_nt_ai_128B IntRegs:$addr, 0,
+ (VTDbl VecDblRegs128B:$src1))>,
+ Requires<[UseHVXDbl]>;
def : Pat<(alignedstore (VTDbl VecDblRegs128B:$src1), IntRegs:$addr),
(PS_vstorerw_ai_128B IntRegs:$addr, 0,
(VTDbl VecDblRegs128B:$src1))>,
@@ -2882,6 +2917,9 @@ defm : STrivv_pats <v32i32, v64i32>;
defm : STrivv_pats <v16i64, v32i64>;
multiclass LDrivv_pats <ValueType VTSgl, ValueType VTDbl> {
+ def : Pat<(VTSgl (alignednontemporalload I32:$addr)),
+ (PS_vloadrw_nt_ai I32:$addr, 0)>,
+ Requires<[UseHVXSgl]>;
def : Pat<(VTSgl (alignedload I32:$addr)),
(PS_vloadrw_ai I32:$addr, 0)>,
Requires<[UseHVXSgl]>;
@@ -2889,6 +2927,9 @@ multiclass LDrivv_pats <ValueType VTSgl,
(PS_vloadrwu_ai I32:$addr, 0)>,
Requires<[UseHVXSgl]>;
+ def : Pat<(VTDbl (alignednontemporalload I32:$addr)),
+ (PS_vloadrw_nt_ai_128B I32:$addr, 0)>,
+ Requires<[UseHVXDbl]>;
def : Pat<(VTDbl (alignedload I32:$addr)),
(PS_vloadrw_ai_128B I32:$addr, 0)>,
Requires<[UseHVXDbl]>;
Modified: llvm/trunk/lib/Target/Hexagon/HexagonPseudo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Hexagon/HexagonPseudo.td?rev=307671&r1=307670&r2=307671&view=diff
==============================================================================
--- llvm/trunk/lib/Target/Hexagon/HexagonPseudo.td (original)
+++ llvm/trunk/lib/Target/Hexagon/HexagonPseudo.td Tue Jul 11 09:39:33 2017
@@ -407,6 +407,11 @@ def PS_vstorerw_ai: STrivv_template<VecD
def PS_vstorerw_ai_128B: STrivv_template<VecDblRegs128B, V6_vS32b_ai_128B>,
Requires<[HasV60T,UseHVXDbl]>;
+def PS_vstorerw_nt_ai: STrivv_template<VecDblRegs, V6_vS32b_nt_ai>,
+ Requires<[HasV60T,UseHVXSgl]>;
+def PS_vstorerw_nt_ai_128B: STrivv_template<VecDblRegs128B, V6_vS32b_nt_ai_128B>,
+ Requires<[HasV60T,UseHVXDbl]>;
+
def PS_vstorerwu_ai: STrivv_template<VecDblRegs, V6_vS32Ub_ai>,
Requires<[HasV60T,UseHVXSgl]>;
def PS_vstorerwu_ai_128B: STrivv_template<VecDblRegs128B, V6_vS32Ub_ai_128B>,
@@ -433,6 +438,11 @@ def PS_vloadrw_ai: LDrivv_template<VecDb
def PS_vloadrw_ai_128B: LDrivv_template<VecDblRegs128B, V6_vL32b_ai_128B>,
Requires<[HasV60T,UseHVXDbl]>;
+def PS_vloadrw_nt_ai: LDrivv_template<VecDblRegs, V6_vL32b_nt_ai>,
+ Requires<[HasV60T,UseHVXSgl]>;
+def PS_vloadrw_nt_ai_128B: LDrivv_template<VecDblRegs128B, V6_vL32b_nt_ai_128B>,
+ Requires<[HasV60T,UseHVXDbl]>;
+
def PS_vloadrwu_ai: LDrivv_template<VecDblRegs, V6_vL32Ub_ai>,
Requires<[HasV60T,UseHVXSgl]>;
def PS_vloadrwu_ai_128B: LDrivv_template<VecDblRegs128B, V6_vL32Ub_ai_128B>,
Added: llvm/trunk/test/CodeGen/Hexagon/hvx-nontemporal.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Hexagon/hvx-nontemporal.ll?rev=307671&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/Hexagon/hvx-nontemporal.ll (added)
+++ llvm/trunk/test/CodeGen/Hexagon/hvx-nontemporal.ll Tue Jul 11 09:39:33 2017
@@ -0,0 +1,28 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+target triple = "hexagon"
+
+; Function Attrs: norecurse nounwind
+define void @test(<32 x i32>* nocapture readonly %x, <32 x i32>* nocapture readnone %y, <32 x i32>* nocapture %a, <32 x i32>* nocapture %b) #0 {
+entry:
+; CHECK: v0 = vmem(r0+#7):nt
+ %add.ptr = getelementptr inbounds <32 x i32>, <32 x i32>* %x, i32 7
+ %0 = load <32 x i32>, <32 x i32>* %add.ptr, align 128, !tbaa !1, !nontemporal !4
+
+; CHECK: v1.cur = vmem(r2+#0):nt
+ %1 = load <32 x i32>, <32 x i32>* %a, align 128, !tbaa !1, !nontemporal !4
+
+; CHECK: vmem(r3+#3):nt = v1
+ %add.ptr2 = getelementptr inbounds <32 x i32>, <32 x i32>* %b, i32 3
+ store <32 x i32> %1, <32 x i32>* %add.ptr2, align 128, !tbaa !1, !nontemporal !4
+
+; CHECK: vmem(r2+#0):nt = v0
+ store <32 x i32> %0, <32 x i32>* %a, align 128, !tbaa !1, !nontemporal !4
+ ret void
+}
+
+attributes #0 = { norecurse nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-double" }
+
+!1 = !{!2, !2, i64 0}
+!2 = !{!"omnipotent char", !3, i64 0}
+!3 = !{!"Simple C/C++ TBAA"}
+!4 = !{i32 1}
More information about the llvm-commits
mailing list