[llvm] r308186 - [Hexagon] Remove custom lowering of loads of v4i16
Krzysztof Parzyszek via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 17 08:45:45 PDT 2017
Author: kparzysz
Date: Mon Jul 17 08:45:45 2017
New Revision: 308186
URL: http://llvm.org/viewvc/llvm-project?rev=308186&view=rev
Log:
[Hexagon] Remove custom lowering of loads of v4i16
The target-independent lowering works fine, except concatenating 32-bit
words. Add a pattern to generate A2_combinew instead of 64-bit asl/or.
Added:
llvm/trunk/test/CodeGen/Hexagon/vect/vect-load-v4i16.ll
llvm/trunk/test/CodeGen/Hexagon/vect/vect-v4i16.ll
Removed:
llvm/trunk/test/CodeGen/Hexagon/vect/vect-loadv4i16.ll
Modified:
llvm/trunk/lib/Target/Hexagon/HexagonISelLowering.cpp
llvm/trunk/lib/Target/Hexagon/HexagonISelLowering.h
llvm/trunk/lib/Target/Hexagon/HexagonPatterns.td
Modified: llvm/trunk/lib/Target/Hexagon/HexagonISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Hexagon/HexagonISelLowering.cpp?rev=308186&r1=308185&r2=308186&view=diff
==============================================================================
--- llvm/trunk/lib/Target/Hexagon/HexagonISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/Hexagon/HexagonISelLowering.cpp Mon Jul 17 08:45:45 2017
@@ -1364,79 +1364,6 @@ HexagonTargetLowering::LowerVSELECT(SDVa
return SDValue();
}
-// Handle only specific vector loads.
-SDValue HexagonTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
- EVT VT = Op.getValueType();
- SDLoc DL(Op);
- LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
- SDValue Chain = LoadNode->getChain();
- SDValue Ptr = Op.getOperand(1);
- SDValue LoweredLoad;
- SDValue Result;
- SDValue Base = LoadNode->getBasePtr();
- ISD::LoadExtType Ext = LoadNode->getExtensionType();
- unsigned Alignment = LoadNode->getAlignment();
- SDValue LoadChain;
-
- if(Ext == ISD::NON_EXTLOAD)
- Ext = ISD::ZEXTLOAD;
-
- if (VT == MVT::v4i16) {
- if (Alignment == 2) {
- SDValue Loads[4];
- // Base load.
- Loads[0] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Base,
- LoadNode->getPointerInfo(), MVT::i16, Alignment,
- LoadNode->getMemOperand()->getFlags());
- // Base+2 load.
- SDValue Increment = DAG.getConstant(2, DL, MVT::i32);
- Ptr = DAG.getNode(ISD::ADD, DL, Base.getValueType(), Base, Increment);
- Loads[1] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Ptr,
- LoadNode->getPointerInfo(), MVT::i16, Alignment,
- LoadNode->getMemOperand()->getFlags());
- // SHL 16, then OR base and base+2.
- SDValue ShiftAmount = DAG.getConstant(16, DL, MVT::i32);
- SDValue Tmp1 = DAG.getNode(ISD::SHL, DL, MVT::i32, Loads[1], ShiftAmount);
- SDValue Tmp2 = DAG.getNode(ISD::OR, DL, MVT::i32, Tmp1, Loads[0]);
- // Base + 4.
- Increment = DAG.getConstant(4, DL, MVT::i32);
- Ptr = DAG.getNode(ISD::ADD, DL, Base.getValueType(), Base, Increment);
- Loads[2] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Ptr,
- LoadNode->getPointerInfo(), MVT::i16, Alignment,
- LoadNode->getMemOperand()->getFlags());
- // Base + 6.
- Increment = DAG.getConstant(6, DL, MVT::i32);
- Ptr = DAG.getNode(ISD::ADD, DL, Base.getValueType(), Base, Increment);
- Loads[3] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Ptr,
- LoadNode->getPointerInfo(), MVT::i16, Alignment,
- LoadNode->getMemOperand()->getFlags());
- // SHL 16, then OR base+4 and base+6.
- Tmp1 = DAG.getNode(ISD::SHL, DL, MVT::i32, Loads[3], ShiftAmount);
- SDValue Tmp4 = DAG.getNode(ISD::OR, DL, MVT::i32, Tmp1, Loads[2]);
- // Combine to i64. This could be optimised out later if we can
- // affect reg allocation of this code.
- Result = DAG.getNode(HexagonISD::COMBINE, DL, MVT::i64, Tmp4, Tmp2);
- LoadChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
- Loads[0].getValue(1), Loads[1].getValue(1),
- Loads[2].getValue(1), Loads[3].getValue(1));
- } else {
- // Perform default type expansion.
- Result = DAG.getLoad(MVT::i64, DL, Chain, Ptr, LoadNode->getPointerInfo(),
- LoadNode->getAlignment(),
- LoadNode->getMemOperand()->getFlags());
- LoadChain = Result.getValue(1);
- }
- } else
- llvm_unreachable("Custom lowering unsupported load");
-
- Result = DAG.getNode(ISD::BITCAST, DL, VT, Result);
- // Since we pretend to lower a load, we need the original chain
- // info attached to the result.
- SDValue Ops[] = { Result, LoadChain };
-
- return DAG.getMergeValues(Ops, DL);
-}
-
SDValue
HexagonTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
EVT ValTy = Op.getValueType();
@@ -1961,18 +1888,12 @@ HexagonTargetLowering::HexagonTargetLowe
// Handling of vector operations.
//
- // Custom lower v4i16 load only. Let v4i16 store to be
- // promoted for now.
promoteLdStType(MVT::v4i8, MVT::i32);
promoteLdStType(MVT::v2i16, MVT::i32);
promoteLdStType(MVT::v8i8, MVT::i64);
+ promoteLdStType(MVT::v4i16, MVT::i64);
promoteLdStType(MVT::v2i32, MVT::i64);
- setOperationAction(ISD::LOAD, MVT::v4i16, Custom);
- setOperationAction(ISD::STORE, MVT::v4i16, Promote);
- AddPromotedToType(ISD::LOAD, MVT::v4i16, MVT::i64);
- AddPromotedToType(ISD::STORE, MVT::v4i16, MVT::i64);
-
// Set the action for vector operations to "expand", then override it with
// either "custom" or "legal" for specific cases.
static const unsigned VectExpOps[] = {
@@ -2970,8 +2891,6 @@ HexagonTargetLowering::LowerOperation(SD
case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
case ISD::VASTART: return LowerVASTART(Op, DAG);
- // Custom lower some vector loads.
- case ISD::LOAD: return LowerLOAD(Op, DAG);
case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
case ISD::SETCC: return LowerSETCC(Op, DAG);
case ISD::VSELECT: return LowerVSELECT(Op, DAG);
Modified: llvm/trunk/lib/Target/Hexagon/HexagonISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Hexagon/HexagonISelLowering.h?rev=308186&r1=308185&r2=308186&view=diff
==============================================================================
--- llvm/trunk/lib/Target/Hexagon/HexagonISelLowering.h (original)
+++ llvm/trunk/lib/Target/Hexagon/HexagonISelLowering.h Mon Jul 17 08:45:45 2017
@@ -165,7 +165,6 @@ namespace HexagonISD {
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const;
SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
bool CanLowerReturn(CallingConv::ID CallConv,
MachineFunction &MF, bool isVarArg,
Modified: llvm/trunk/lib/Target/Hexagon/HexagonPatterns.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Hexagon/HexagonPatterns.td?rev=308186&r1=308185&r2=308186&view=diff
==============================================================================
--- llvm/trunk/lib/Target/Hexagon/HexagonPatterns.td (original)
+++ llvm/trunk/lib/Target/Hexagon/HexagonPatterns.td Mon Jul 17 08:45:45 2017
@@ -2250,6 +2250,12 @@ def: Storea_pat<SwapSt<atomic_store_16>,
def: Storea_pat<SwapSt<atomic_store_32>, I32, addrgp, PS_storeriabs>;
def: Storea_pat<SwapSt<atomic_store_64>, I64, addrgp, PS_storerdabs>;
+// Prefer this pattern to S2_asl_i_p_or for the special case of joining
+// two 32-bit words into a 64-bit word.
+let AddedComplexity = 200 in
+def: Pat<(or (shl (Aext64 I32:$a), (i32 32)), (Zext64 I32:$b)),
+ (A2_combinew I32:$a, I32:$b)>;
+
def: Pat<(or (or (or (shl (i64 (zext (and I32:$b, (i32 65535)))), (i32 16)),
(i64 (zext (i32 (and I32:$a, (i32 65535)))))),
(shl (i64 (anyext (and I32:$c, (i32 65535)))), (i32 32))),
Added: llvm/trunk/test/CodeGen/Hexagon/vect/vect-load-v4i16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Hexagon/vect/vect-load-v4i16.ll?rev=308186&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/Hexagon/vect/vect-load-v4i16.ll (added)
+++ llvm/trunk/test/CodeGen/Hexagon/vect/vect-load-v4i16.ll Mon Jul 17 08:45:45 2017
@@ -0,0 +1,23 @@
+; RUN: llc -march=hexagon -O0 < %s | FileCheck %s
+
+; CHECK-LABEL: danny:
+; CHECK-DAG: [[T0:r[0-9]+]] = memuh(r0+#0)
+; CHECK-DAG: [[T1:r[0-9]+]] = memuh(r0+#2)
+; CHECK: [[T0]] |= asl([[T1]],#16)
+; CHECK-DAG: [[T2:r[0-9]+]] = memuh(r0+#4)
+; CHECK-DAG: [[T3:r[0-9]+]] = memuh(r0+#6)
+; CHECK: [[T2]] |= asl([[T3]],#16)
+; CHECK: combine([[T2]],[[T0]])
+define <4 x i16> @danny(<4 x i16>* %p) {
+ %t0 = load <4 x i16>, <4 x i16>* %p, align 2
+ ret <4 x i16> %t0
+}
+
+; CHECK-LABEL: sammy:
+; CHECK-DAG: [[T0:r[0-9]+]] = memw(r0+#0)
+; CHECK-DAG: [[T1:r[0-9]+]] = memw(r0+#4)
+; CHECK: combine([[T1]],[[T0]])
+define <4 x i16> @sammy(<4 x i16>* %p) {
+ %t0 = load <4 x i16>, <4 x i16>* %p, align 4
+ ret <4 x i16> %t0
+}
Removed: llvm/trunk/test/CodeGen/Hexagon/vect/vect-loadv4i16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Hexagon/vect/vect-loadv4i16.ll?rev=308185&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/Hexagon/vect/vect-loadv4i16.ll (original)
+++ llvm/trunk/test/CodeGen/Hexagon/vect/vect-loadv4i16.ll (removed)
@@ -1,73 +0,0 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv5 -disable-hsdr < %s | FileCheck %s
-
-; Check that store is post-incremented.
-; CHECK: memuh(r{{[0-9]+}}+#6)
-; CHECK: combine(r{{[0-9]+}},r{{[0-9]+}})
-; CHECK: vaddh
-
-target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32"
-target triple = "hexagon"
-
-define void @matrix_add_const(i32 %N, i16* nocapture %A, i16 signext %val) #0 {
-entry:
- %cmp5 = icmp eq i32 %N, 0
- br i1 %cmp5, label %for.end, label %polly.cond
-
-for.end.loopexit: ; preds = %polly.stmt.for.body29
- br label %for.end
-
-for.end: ; preds = %for.end.loopexit, %polly.loop_header24.preheader, %entry
- ret void
-
-polly.cond: ; preds = %entry
- %0 = icmp sgt i32 %N, 3
- br i1 %0, label %polly.then, label %polly.loop_header24.preheader
-
-polly.then: ; preds = %polly.cond
- %1 = add i32 %N, -1
- %leftover_lb = and i32 %1, -4
- %2 = icmp sgt i32 %leftover_lb, 0
- br i1 %2, label %polly.loop_body.lr.ph, label %polly.loop_header24.preheader
-
-polly.loop_body.lr.ph: ; preds = %polly.then
- %3 = insertelement <4 x i16> undef, i16 %val, i32 0
- %4 = insertelement <4 x i16> %3, i16 %val, i32 1
- %5 = insertelement <4 x i16> %4, i16 %val, i32 2
- %6 = insertelement <4 x i16> %5, i16 %val, i32 3
- br label %polly.loop_body
-
-polly.loop_header24.preheader.loopexit: ; preds = %polly.loop_body
- br label %polly.loop_header24.preheader
-
-polly.loop_header24.preheader: ; preds = %polly.loop_header24.preheader.loopexit, %polly.then, %polly.cond
- %polly.loopiv27.ph = phi i32 [ 0, %polly.cond ], [ %leftover_lb, %polly.then ], [ %leftover_lb, %polly.loop_header24.preheader.loopexit ]
- %7 = icmp slt i32 %polly.loopiv27.ph, %N
- br i1 %7, label %polly.stmt.for.body29.preheader, label %for.end
-
-polly.stmt.for.body29.preheader: ; preds = %polly.loop_header24.preheader
- br label %polly.stmt.for.body29
-
-polly.loop_body: ; preds = %polly.loop_body.lr.ph, %polly.loop_body
- %p_arrayidx.phi = phi i16* [ %A, %polly.loop_body.lr.ph ], [ %p_arrayidx.inc, %polly.loop_body ]
- %polly.loopiv34 = phi i32 [ 0, %polly.loop_body.lr.ph ], [ %polly.next_loopiv, %polly.loop_body ]
- %polly.next_loopiv = add nsw i32 %polly.loopiv34, 4
- %vector_ptr = bitcast i16* %p_arrayidx.phi to <4 x i16>*
- %_p_vec_full = load <4 x i16>, <4 x i16>* %vector_ptr, align 2
- %addp_vec = add <4 x i16> %_p_vec_full, %6
- store <4 x i16> %addp_vec, <4 x i16>* %vector_ptr, align 2
- %8 = icmp slt i32 %polly.next_loopiv, %leftover_lb
- %p_arrayidx.inc = getelementptr i16, i16* %p_arrayidx.phi, i32 4
- br i1 %8, label %polly.loop_body, label %polly.loop_header24.preheader.loopexit
-
-polly.stmt.for.body29: ; preds = %polly.stmt.for.body29.preheader, %polly.stmt.for.body29
- %polly.loopiv2733 = phi i32 [ %polly.next_loopiv28, %polly.stmt.for.body29 ], [ %polly.loopiv27.ph, %polly.stmt.for.body29.preheader ]
- %polly.next_loopiv28 = add nsw i32 %polly.loopiv2733, 1
- %p_arrayidx30 = getelementptr i16, i16* %A, i32 %polly.loopiv2733
- %_p_scalar_ = load i16, i16* %p_arrayidx30, align 2
- %p_add = add i16 %_p_scalar_, %val
- store i16 %p_add, i16* %p_arrayidx30, align 2
- %exitcond = icmp eq i32 %polly.next_loopiv28, %N
- br i1 %exitcond, label %for.end.loopexit, label %polly.stmt.for.body29
-}
-
-attributes #0 = { nounwind "fp-contract-model"="standard" "no-frame-pointer-elim-non-leaf" "realign-stack" "relocation-model"="static" "ssp-buffers-size"="8" }
Added: llvm/trunk/test/CodeGen/Hexagon/vect/vect-v4i16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Hexagon/vect/vect-v4i16.ll?rev=308186&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/Hexagon/vect/vect-v4i16.ll (added)
+++ llvm/trunk/test/CodeGen/Hexagon/vect/vect-v4i16.ll Mon Jul 17 08:45:45 2017
@@ -0,0 +1,73 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5 -disable-hsdr < %s | FileCheck %s
+
+; Check that store is post-incremented.
+; CHECK: memuh(r{{[0-9]+}}+#6)
+; CHECK: combine(r{{[0-9]+}},r{{[0-9]+}})
+; CHECK: vaddh
+
+target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32"
+target triple = "hexagon"
+
+define void @matrix_add_const(i32 %N, i16* nocapture %A, i16 signext %val) #0 {
+entry:
+ %cmp5 = icmp eq i32 %N, 0
+ br i1 %cmp5, label %for.end, label %polly.cond
+
+for.end.loopexit: ; preds = %polly.stmt.for.body29
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %polly.loop_header24.preheader, %entry
+ ret void
+
+polly.cond: ; preds = %entry
+ %0 = icmp sgt i32 %N, 3
+ br i1 %0, label %polly.then, label %polly.loop_header24.preheader
+
+polly.then: ; preds = %polly.cond
+ %1 = add i32 %N, -1
+ %leftover_lb = and i32 %1, -4
+ %2 = icmp sgt i32 %leftover_lb, 0
+ br i1 %2, label %polly.loop_body.lr.ph, label %polly.loop_header24.preheader
+
+polly.loop_body.lr.ph: ; preds = %polly.then
+ %3 = insertelement <4 x i16> undef, i16 %val, i32 0
+ %4 = insertelement <4 x i16> %3, i16 %val, i32 1
+ %5 = insertelement <4 x i16> %4, i16 %val, i32 2
+ %6 = insertelement <4 x i16> %5, i16 %val, i32 3
+ br label %polly.loop_body
+
+polly.loop_header24.preheader.loopexit: ; preds = %polly.loop_body
+ br label %polly.loop_header24.preheader
+
+polly.loop_header24.preheader: ; preds = %polly.loop_header24.preheader.loopexit, %polly.then, %polly.cond
+ %polly.loopiv27.ph = phi i32 [ 0, %polly.cond ], [ %leftover_lb, %polly.then ], [ %leftover_lb, %polly.loop_header24.preheader.loopexit ]
+ %7 = icmp slt i32 %polly.loopiv27.ph, %N
+ br i1 %7, label %polly.stmt.for.body29.preheader, label %for.end
+
+polly.stmt.for.body29.preheader: ; preds = %polly.loop_header24.preheader
+ br label %polly.stmt.for.body29
+
+polly.loop_body: ; preds = %polly.loop_body.lr.ph, %polly.loop_body
+ %p_arrayidx.phi = phi i16* [ %A, %polly.loop_body.lr.ph ], [ %p_arrayidx.inc, %polly.loop_body ]
+ %polly.loopiv34 = phi i32 [ 0, %polly.loop_body.lr.ph ], [ %polly.next_loopiv, %polly.loop_body ]
+ %polly.next_loopiv = add nsw i32 %polly.loopiv34, 4
+ %vector_ptr = bitcast i16* %p_arrayidx.phi to <4 x i16>*
+ %_p_vec_full = load <4 x i16>, <4 x i16>* %vector_ptr, align 2
+ %addp_vec = add <4 x i16> %_p_vec_full, %6
+ store <4 x i16> %addp_vec, <4 x i16>* %vector_ptr, align 2
+ %8 = icmp slt i32 %polly.next_loopiv, %leftover_lb
+ %p_arrayidx.inc = getelementptr i16, i16* %p_arrayidx.phi, i32 4
+ br i1 %8, label %polly.loop_body, label %polly.loop_header24.preheader.loopexit
+
+polly.stmt.for.body29: ; preds = %polly.stmt.for.body29.preheader, %polly.stmt.for.body29
+ %polly.loopiv2733 = phi i32 [ %polly.next_loopiv28, %polly.stmt.for.body29 ], [ %polly.loopiv27.ph, %polly.stmt.for.body29.preheader ]
+ %polly.next_loopiv28 = add nsw i32 %polly.loopiv2733, 1
+ %p_arrayidx30 = getelementptr i16, i16* %A, i32 %polly.loopiv2733
+ %_p_scalar_ = load i16, i16* %p_arrayidx30, align 2
+ %p_add = add i16 %_p_scalar_, %val
+ store i16 %p_add, i16* %p_arrayidx30, align 2
+ %exitcond = icmp eq i32 %polly.next_loopiv28, %N
+ br i1 %exitcond, label %for.end.loopexit, label %polly.stmt.for.body29
+}
+
+attributes #0 = { nounwind "fp-contract-model"="standard" "no-frame-pointer-elim-non-leaf" "realign-stack" "relocation-model"="static" "ssp-buffers-size"="8" }
More information about the llvm-commits
mailing list