[llvm] r268293 - AMDGPU: Make i64 loads/stores promote to v2i32
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon May 2 13:07:26 PDT 2016
Author: arsenm
Date: Mon May 2 15:07:26 2016
New Revision: 268293
URL: http://llvm.org/viewvc/llvm-project?rev=268293&view=rev
Log:
AMDGPU: Make i64 loads/stores promote to v2i32
Now that unaligned access expansion should not attempt
to produce i64 accesses, we can remove the hack in
PreprocessISelDAG where this is done.
This allows splitting i64 private accesses while
allowing the new add nodes indexing the vector components
can be folded with the base pointer arithmetic.
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/trunk/test/CodeGen/AMDGPU/bitreverse-inline-immediates.ll
llvm/trunk/test/CodeGen/AMDGPU/fcanonicalize.ll
llvm/trunk/test/CodeGen/AMDGPU/imm.ll
llvm/trunk/test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp?rev=268293&r1=268292&r2=268293&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp Mon May 2 15:07:26 2016
@@ -1586,61 +1586,6 @@ void AMDGPUDAGToDAGISel::PreprocessISelD
}
}
}
-
- // XXX - Other targets seem to be able to do this without a worklist.
- SmallVector<LoadSDNode *, 8> LoadsToReplace;
- SmallVector<StoreSDNode *, 8> StoresToReplace;
-
- for (SDNode &Node : CurDAG->allnodes()) {
- if (LoadSDNode *LD = dyn_cast<LoadSDNode>(&Node)) {
- EVT VT = LD->getValueType(0);
- if (VT != MVT::i64 || LD->getExtensionType() != ISD::NON_EXTLOAD)
- continue;
-
- // To simplify the TableGen patters, we replace all i64 loads with v2i32
- // loads. Alternatively, we could promote i64 loads to v2i32 during DAG
- // legalization, however, so places (ExpandUnalignedLoad) in the DAG
- // legalizer assume that if i64 is legal, so doing this promotion early
- // can cause problems.
- LoadsToReplace.push_back(LD);
- } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(&Node)) {
- // Handle i64 stores here for the same reason mentioned above for loads.
- SDValue Value = ST->getValue();
- if (Value.getValueType() != MVT::i64 || ST->isTruncatingStore())
- continue;
- StoresToReplace.push_back(ST);
- }
- }
-
- for (LoadSDNode *LD : LoadsToReplace) {
- SDLoc SL(LD);
-
- SDValue NewLoad = CurDAG->getLoad(MVT::v2i32, SL, LD->getChain(),
- LD->getBasePtr(), LD->getMemOperand());
- SDValue BitCast = CurDAG->getNode(ISD::BITCAST, SL,
- MVT::i64, NewLoad);
- CurDAG->ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLoad.getValue(1));
- CurDAG->ReplaceAllUsesOfValueWith(SDValue(LD, 0), BitCast);
- Modified = true;
- }
-
- for (StoreSDNode *ST : StoresToReplace) {
- SDValue NewValue = CurDAG->getNode(ISD::BITCAST, SDLoc(ST),
- MVT::v2i32, ST->getValue());
- const SDValue StoreOps[] = {
- ST->getChain(),
- NewValue,
- ST->getBasePtr(),
- ST->getOffset()
- };
-
- CurDAG->UpdateNodeOperands(ST, StoreOps);
- Modified = true;
- }
-
- // XXX - Is this necessary?
- if (Modified)
- CurDAG->RemoveDeadNodes();
}
void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=268293&r1=268292&r2=268293&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Mon May 2 15:07:26 2016
@@ -95,12 +95,24 @@ SITargetLowering::SITargetLowering(Targe
setOperationAction(ISD::LOAD, MVT::v8i32, Custom);
setOperationAction(ISD::LOAD, MVT::v16i32, Custom);
+ setOperationAction(ISD::LOAD, MVT::f64, Promote);
+ AddPromotedToType(ISD::LOAD, MVT::f64, MVT::v2i32);
+
+ setOperationAction(ISD::LOAD, MVT::i64, Promote);
+ AddPromotedToType(ISD::LOAD, MVT::i64, MVT::v2i32);
+
setOperationAction(ISD::STORE, MVT::v8i32, Custom);
setOperationAction(ISD::STORE, MVT::v16i32, Custom);
setOperationAction(ISD::STORE, MVT::i1, Custom);
setOperationAction(ISD::STORE, MVT::v4i32, Custom);
+ setOperationAction(ISD::STORE, MVT::f64, Promote);
+ AddPromotedToType(ISD::STORE, MVT::f64, MVT::v2i32);
+
+ setOperationAction(ISD::STORE, MVT::i64, Promote);
+ AddPromotedToType(ISD::STORE, MVT::i64, MVT::v2i32);
+
setOperationAction(ISD::SELECT, MVT::i64, Custom);
setOperationAction(ISD::SELECT, MVT::f64, Promote);
AddPromotedToType(ISD::SELECT, MVT::f64, MVT::i64);
Modified: llvm/trunk/test/CodeGen/AMDGPU/bitreverse-inline-immediates.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/bitreverse-inline-immediates.ll?rev=268293&r1=268292&r2=268293&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/bitreverse-inline-immediates.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/bitreverse-inline-immediates.ll Mon May 2 15:07:26 2016
@@ -14,7 +14,7 @@ define void @materialize_0_i32(i32 addrs
; GCN-LABEL: {{^}}materialize_0_i64:
; GCN: v_mov_b32_e32 v[[LOK:[0-9]+]], 0{{$}}
-; GCN: v_mov_b32_e32 v[[HIK:[0-9]+]], 0{{$}}
+; GCN: v_mov_b32_e32 v[[HIK:[0-9]+]], v[[LOK]]{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[LOK]]:[[HIK]]{{\]}}
define void @materialize_0_i64(i64 addrspace(1)* %out) {
store i64 0, i64 addrspace(1)* %out
@@ -31,7 +31,7 @@ define void @materialize_neg1_i32(i32 ad
; GCN-LABEL: {{^}}materialize_neg1_i64:
; GCN: v_mov_b32_e32 v[[LOK:[0-9]+]], -1{{$}}
-; GCN: v_mov_b32_e32 v[[HIK:[0-9]+]], -1{{$}}
+; GCN: v_mov_b32_e32 v[[HIK:[0-9]+]], v[[LOK]]{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[LOK]]:[[HIK]]{{\]}}
define void @materialize_neg1_i64(i64 addrspace(1)* %out) {
store i64 -1, i64 addrspace(1)* %out
Modified: llvm/trunk/test/CodeGen/AMDGPU/fcanonicalize.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fcanonicalize.ll?rev=268293&r1=268292&r2=268293&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fcanonicalize.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fcanonicalize.ll Mon May 2 15:07:26 2016
@@ -172,7 +172,7 @@ define void @s_test_canonicalize_var_f64
; GCN-LABEL: {{^}}test_fold_canonicalize_p0_f64:
; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
-; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
+; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], v[[LO]]{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
define void @test_fold_canonicalize_p0_f64(double addrspace(1)* %out) #1 {
%canonicalized = call double @llvm.canonicalize.f64(double 0.0)
@@ -225,7 +225,7 @@ define void @test_fold_canonicalize_lite
; DENORM-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0xfffff{{$}}
; NODENORM: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
-; NODENORM: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
+; NODENORM: v_mov_b32_e32 v[[HI:[0-9]+]], v[[LO]]{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
define void @test_fold_canonicalize_denormal0_f64(double addrspace(1)* %out) #1 {
%canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 4503599627370495 to double))
@@ -238,7 +238,7 @@ define void @test_fold_canonicalize_deno
; DENORM-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x800fffff{{$}}
; NODENORM: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
-; NODENORM: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
+; NODENORM: v_mov_b32_e32 v[[HI:[0-9]+]], v[[LO]]{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
define void @test_fold_canonicalize_denormal1_f64(double addrspace(1)* %out) #1 {
%canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9227875636482146303 to double))
Modified: llvm/trunk/test/CodeGen/AMDGPU/imm.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/imm.ll?rev=268293&r1=268292&r2=268293&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/imm.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/imm.ll Mon May 2 15:07:26 2016
@@ -510,7 +510,7 @@ define void @add_inline_imm_64_f64(doubl
; CHECK-LABEL: {{^}}store_inline_imm_0.0_f64:
; CHECK: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0
-; CHECK: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0
+; CHECK: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], v[[LO_VREG]]{{$}}
; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
define void @store_inline_imm_0.0_f64(double addrspace(1)* %out) {
store double 0.0, double addrspace(1)* %out
Modified: llvm/trunk/test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll?rev=268293&r1=268292&r2=268293&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll Mon May 2 15:07:26 2016
@@ -4,12 +4,11 @@
; GCN-LABEL: {{^}}v_uextract_bit_31_i128:
; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
-; GCN: v_mov_b32_e32 v[[ZERO1:[0-9]+]], 0{{$}}
; GCN: v_mov_b32_e32 v[[ZERO0:[0-9]+]], 0{{$}}
-; GCN: v_mov_b32_e32 v[[ZERO2:[0-9]+]], 0{{$}}
+; GCN: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO0]]{{$}}
; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]]
-; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[ZERO1]]:[[ZERO2]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
+; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[ZERO0]]:[[ZERO1]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO0]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; GCN: s_endpgm
define void @v_uextract_bit_31_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %in) #1 {
@@ -27,12 +26,11 @@ define void @v_uextract_bit_31_i128(i128
; GCN-LABEL: {{^}}v_uextract_bit_63_i128:
; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
-; GCN: v_mov_b32_e32 v[[ZERO1:[0-9]+]], 0{{$}}
-; GCN: v_mov_b32_e32 v[[ZERO2:[0-9]+]], 0{{$}}
; GCN: v_mov_b32_e32 v[[ZERO0:[0-9]+]], 0{{$}}
+; GCN: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO0]]{{$}}
; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]]
-; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[ZERO1]]:[[ZERO2]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
+; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[ZERO0]]:[[ZERO1]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO0]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; GCN: s_endpgm
define void @v_uextract_bit_63_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %in) #1 {
@@ -50,12 +48,11 @@ define void @v_uextract_bit_63_i128(i128
; GCN-LABEL: {{^}}v_uextract_bit_95_i128:
; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
-; GCN: v_mov_b32_e32 v[[ZERO1:[0-9]+]], 0{{$}}
-; GCN: v_mov_b32_e32 v[[ZERO2:[0-9]+]], 0{{$}}
; GCN: v_mov_b32_e32 v[[ZERO0:[0-9]+]], 0{{$}}
+; GCN: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO0]]{{$}}
; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]]
-; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[ZERO1]]:[[ZERO2]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
+; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[ZERO0]]:[[ZERO1]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO0]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; GCN: s_endpgm
define void @v_uextract_bit_95_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %in) #1 {
@@ -73,12 +70,11 @@ define void @v_uextract_bit_95_i128(i128
; GCN-LABEL: {{^}}v_uextract_bit_127_i128:
; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}}
-; GCN: v_mov_b32_e32 v[[ZERO1:[0-9]+]], 0{{$}}
-; GCN: v_mov_b32_e32 v[[ZERO2:[0-9]+]], 0{{$}}
; GCN: v_mov_b32_e32 v[[ZERO0:[0-9]+]], 0{{$}}
+; GCN: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO0]]{{$}}
; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]]
-; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[ZERO1]]:[[ZERO2]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
+; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[ZERO0]]:[[ZERO1]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO0]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; GCN: s_endpgm
define void @v_uextract_bit_127_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %in) #1 {
More information about the llvm-commits
mailing list