[llvm] r318057 - [X86] Allow X86ISD::Wrapper to be folded into the base of gather/scatter address
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 13 09:53:59 PST 2017
Author: ctopper
Date: Mon Nov 13 09:53:59 2017
New Revision: 318057
URL: http://llvm.org/viewvc/llvm-project?rev=318057&view=rev
Log:
[X86] Allow X86ISD::Wrapper to be folded into the base of gather/scatter address
If the base of our gather corresponds to something contained in X86ISD::Wrapper we should be able to fold it into the address.
This patch refactors some of the address matching to more fully use the X86ISelAddressMode struct and the getAddressOperands helper. A new helper function matchVectorAddress is added to call matchWrapper or fall back to matchAddressBase.
We should also be able to support constant offsets from a wrapper, but I'll look into that in a future patch. We may even be able to completely reuse matchAddress here, but I wanted to start simple and work up to it.
Differential Revision: https://reviews.llvm.org/D39927
Modified:
llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp?rev=318057&r1=318056&r2=318057&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp Mon Nov 13 09:53:59 2017
@@ -194,6 +194,7 @@ namespace {
bool matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM);
bool matchWrapper(SDValue N, X86ISelAddressMode &AM);
bool matchAddress(SDValue N, X86ISelAddressMode &AM);
+ bool matchVectorAddress(SDValue N, X86ISelAddressMode &AM);
bool matchAdd(SDValue N, X86ISelAddressMode &AM, unsigned Depth);
bool matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
unsigned Depth);
@@ -1502,22 +1503,34 @@ bool X86DAGToDAGISel::matchAddressBase(S
return false;
}
+/// Helper for selectVectorAddr. Handles things that can be folded into a
+/// gather scatter address. The index register and scale should have already
+/// been handled.
+bool X86DAGToDAGISel::matchVectorAddress(SDValue N, X86ISelAddressMode &AM) {
+ // TODO: Support other operations.
+ switch (N.getOpcode()) {
+ case X86ISD::Wrapper:
+ if (!matchWrapper(N, AM))
+ return false;
+ break;
+ }
+
+ return matchAddressBase(N, AM);
+}
+
bool X86DAGToDAGISel::selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index,
SDValue &Disp, SDValue &Segment) {
- unsigned ScalarSize;
+ X86ISelAddressMode AM;
if (auto Mgs = dyn_cast<MaskedGatherScatterSDNode>(Parent)) {
- Base = Mgs->getBasePtr();
- Index = Mgs->getIndex();
- ScalarSize = Mgs->getValue().getScalarValueSizeInBits();
+ AM.IndexReg = Mgs->getIndex();
+ AM.Scale = Mgs->getValue().getScalarValueSizeInBits() / 8;
} else {
auto X86Gather = cast<X86MaskedGatherSDNode>(Parent);
- Base = X86Gather->getBasePtr();
- Index = X86Gather->getIndex();
- ScalarSize = X86Gather->getValue().getScalarValueSizeInBits();
+ AM.IndexReg = X86Gather->getIndex();
+ AM.Scale = X86Gather->getValue().getScalarValueSizeInBits() / 8;
}
- X86ISelAddressMode AM;
unsigned AddrSpace = cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace();
// AddrSpace 256 -> GS, 257 -> FS, 258 -> SS.
if (AddrSpace == 256)
@@ -1527,21 +1540,23 @@ bool X86DAGToDAGISel::selectVectorAddr(S
if (AddrSpace == 258)
AM.Segment = CurDAG->getRegister(X86::SS, MVT::i16);
- SDLoc DL(N);
- Scale = getI8Imm(ScalarSize/8, DL);
-
// If Base is 0, the whole address is in index and the Scale is 1
- if (isa<ConstantSDNode>(Base)) {
- assert(cast<ConstantSDNode>(Base)->isNullValue() &&
+ if (isa<ConstantSDNode>(N)) {
+ assert(cast<ConstantSDNode>(N)->isNullValue() &&
"Unexpected base in gather/scatter");
- Scale = getI8Imm(1, DL);
- Base = CurDAG->getRegister(0, MVT::i32);
+ AM.Scale = 1;
}
- if (AM.Segment.getNode())
- Segment = AM.Segment;
- else
- Segment = CurDAG->getRegister(0, MVT::i32);
- Disp = CurDAG->getTargetConstant(0, DL, MVT::i32);
+ // Otherwise, try to match into the base and displacement fields.
+ else if (matchVectorAddress(N, AM))
+ return false;
+
+ MVT VT = N.getSimpleValueType();
+ if (AM.BaseType == X86ISelAddressMode::RegBase) {
+ if (!AM.Base_Reg.getNode())
+ AM.Base_Reg = CurDAG->getRegister(0, VT);
+ }
+
+ getAddressOperands(AM, SDLoc(N), Base, Scale, Index, Disp, Segment);
return true;
}
Modified: llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll?rev=318057&r1=318056&r2=318057&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll (original)
+++ llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll Mon Nov 13 09:53:59 2017
@@ -1,7 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f < %s | FileCheck %s --check-prefix=ALL --check-prefix=KNL_64
; RUN: llc -mtriple=i386-unknown-linux-gnu -mattr=+avx512f < %s | FileCheck %s --check-prefix=ALL --check-prefix=KNL_32
-; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512vl -mattr=+avx512dq < %s | FileCheck %s --check-prefix=ALL --check-prefix=SKX
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512vl -mattr=+avx512dq < %s | FileCheck %s --check-prefix=ALL --check-prefix=SKX --check-prefix=SKX_SMALL
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512vl -mattr=+avx512dq -code-model=large < %s | FileCheck %s --check-prefix=ALL --check-prefix=SKX --check-prefix=SKX_LARGE
; RUN: llc -mtriple=i386-unknown-linux-gnu -mattr=+avx512vl -mattr=+avx512dq < %s | FileCheck %s --check-prefix=ALL --check-prefix=SKX_32
; RUN: opt -mtriple=x86_64-apple-darwin -scalarize-masked-mem-intrin -mcpu=corei7-avx -S < %s | FileCheck %s -check-prefix=SCALAR
; RUN: llc -O0 -mtriple=x86_64-unknown-linux-gnu -mcpu=skx < %s -o /dev/null
@@ -491,18 +492,34 @@ define <8 x i32> @test9(%struct.ST* %bas
; KNL_32-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1}
; KNL_32-NEXT: retl
;
-; SKX-LABEL: test9:
-; SKX: # BB#0: # %entry
-; SKX-NEXT: vpbroadcastq %rdi, %zmm2
-; SKX-NEXT: vpmullq {{.*}}(%rip){1to8}, %zmm0, %zmm0
-; SKX-NEXT: vpmovsxdq %ymm1, %zmm1
-; SKX-NEXT: vpmullq {{.*}}(%rip){1to8}, %zmm1, %zmm1
-; SKX-NEXT: vpaddq %zmm1, %zmm0, %zmm0
-; SKX-NEXT: vpaddq %zmm0, %zmm2, %zmm0
-; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm1
-; SKX-NEXT: kxnorw %k0, %k0, %k1
-; SKX-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1}
-; SKX-NEXT: retq
+; SKX_SMALL-LABEL: test9:
+; SKX_SMALL: # BB#0: # %entry
+; SKX_SMALL-NEXT: vpbroadcastq %rdi, %zmm2
+; SKX_SMALL-NEXT: vpmullq {{.*}}(%rip){1to8}, %zmm0, %zmm0
+; SKX_SMALL-NEXT: vpmovsxdq %ymm1, %zmm1
+; SKX_SMALL-NEXT: vpmullq {{.*}}(%rip){1to8}, %zmm1, %zmm1
+; SKX_SMALL-NEXT: vpaddq %zmm1, %zmm0, %zmm0
+; SKX_SMALL-NEXT: vpaddq %zmm0, %zmm2, %zmm0
+; SKX_SMALL-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm1
+; SKX_SMALL-NEXT: kxnorw %k0, %k0, %k1
+; SKX_SMALL-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1}
+; SKX_SMALL-NEXT: retq
+;
+; SKX_LARGE-LABEL: test9:
+; SKX_LARGE: # BB#0: # %entry
+; SKX_LARGE-NEXT: vpbroadcastq %rdi, %zmm2
+; SKX_LARGE-NEXT: vpmovsxdq %ymm1, %zmm1
+; SKX_LARGE-NEXT: movabsq ${{\.LCPI.*}}, %rax
+; SKX_LARGE-NEXT: vpmullq (%rax){1to8}, %zmm1, %zmm1
+; SKX_LARGE-NEXT: movabsq ${{\.LCPI.*}}, %rax
+; SKX_LARGE-NEXT: vpmullq (%rax){1to8}, %zmm0, %zmm0
+; SKX_LARGE-NEXT: vpaddq %zmm1, %zmm0, %zmm0
+; SKX_LARGE-NEXT: vpaddq %zmm0, %zmm2, %zmm0
+; SKX_LARGE-NEXT: movabsq ${{\.LCPI.*}}, %rax
+; SKX_LARGE-NEXT: vpaddq (%rax){1to8}, %zmm0, %zmm1
+; SKX_LARGE-NEXT: kxnorw %k0, %k0, %k1
+; SKX_LARGE-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1}
+; SKX_LARGE-NEXT: retq
;
; SKX_32-LABEL: test9:
; SKX_32: # BB#0: # %entry
@@ -560,18 +577,34 @@ define <8 x i32> @test10(%struct.ST* %ba
; KNL_32-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1}
; KNL_32-NEXT: retl
;
-; SKX-LABEL: test10:
-; SKX: # BB#0: # %entry
-; SKX-NEXT: vpbroadcastq %rdi, %zmm2
-; SKX-NEXT: vpmullq {{.*}}(%rip){1to8}, %zmm0, %zmm0
-; SKX-NEXT: vpmovsxdq %ymm1, %zmm1
-; SKX-NEXT: vpmullq {{.*}}(%rip){1to8}, %zmm1, %zmm1
-; SKX-NEXT: vpaddq %zmm1, %zmm0, %zmm0
-; SKX-NEXT: vpaddq %zmm0, %zmm2, %zmm0
-; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm1
-; SKX-NEXT: kxnorw %k0, %k0, %k1
-; SKX-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1}
-; SKX-NEXT: retq
+; SKX_SMALL-LABEL: test10:
+; SKX_SMALL: # BB#0: # %entry
+; SKX_SMALL-NEXT: vpbroadcastq %rdi, %zmm2
+; SKX_SMALL-NEXT: vpmullq {{.*}}(%rip){1to8}, %zmm0, %zmm0
+; SKX_SMALL-NEXT: vpmovsxdq %ymm1, %zmm1
+; SKX_SMALL-NEXT: vpmullq {{.*}}(%rip){1to8}, %zmm1, %zmm1
+; SKX_SMALL-NEXT: vpaddq %zmm1, %zmm0, %zmm0
+; SKX_SMALL-NEXT: vpaddq %zmm0, %zmm2, %zmm0
+; SKX_SMALL-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm1
+; SKX_SMALL-NEXT: kxnorw %k0, %k0, %k1
+; SKX_SMALL-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1}
+; SKX_SMALL-NEXT: retq
+;
+; SKX_LARGE-LABEL: test10:
+; SKX_LARGE: # BB#0: # %entry
+; SKX_LARGE-NEXT: vpbroadcastq %rdi, %zmm2
+; SKX_LARGE-NEXT: vpmovsxdq %ymm1, %zmm1
+; SKX_LARGE-NEXT: movabsq ${{\.LCPI.*}}, %rax
+; SKX_LARGE-NEXT: vpmullq (%rax){1to8}, %zmm1, %zmm1
+; SKX_LARGE-NEXT: movabsq ${{\.LCPI.*}}, %rax
+; SKX_LARGE-NEXT: vpmullq (%rax){1to8}, %zmm0, %zmm0
+; SKX_LARGE-NEXT: vpaddq %zmm1, %zmm0, %zmm0
+; SKX_LARGE-NEXT: vpaddq %zmm0, %zmm2, %zmm0
+; SKX_LARGE-NEXT: movabsq ${{\.LCPI.*}}, %rax
+; SKX_LARGE-NEXT: vpaddq (%rax){1to8}, %zmm0, %zmm1
+; SKX_LARGE-NEXT: kxnorw %k0, %k0, %k1
+; SKX_LARGE-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1}
+; SKX_LARGE-NEXT: retq
;
; SKX_32-LABEL: test10:
; SKX_32: # BB#0: # %entry
@@ -2330,33 +2363,37 @@ declare <4 x i64> @llvm.masked.gather.v4
define <8 x i32> @test_global_array(<8 x i64> %indxs) {
; KNL_64-LABEL: test_global_array:
; KNL_64: # BB#0:
-; KNL_64-NEXT: movl $glob_array, %eax
; KNL_64-NEXT: kxnorw %k0, %k0, %k1
-; KNL_64-NEXT: vpgatherqd (%rax,%zmm0,4), %ymm1 {%k1}
+; KNL_64-NEXT: vpgatherqd glob_array(,%zmm0,4), %ymm1 {%k1}
; KNL_64-NEXT: vmovdqa %ymm1, %ymm0
; KNL_64-NEXT: retq
;
; KNL_32-LABEL: test_global_array:
; KNL_32: # BB#0:
-; KNL_32-NEXT: movl $glob_array, %eax
; KNL_32-NEXT: kxnorw %k0, %k0, %k1
-; KNL_32-NEXT: vpgatherqd (%eax,%zmm0,4), %ymm1 {%k1}
+; KNL_32-NEXT: vpgatherqd glob_array(,%zmm0,4), %ymm1 {%k1}
; KNL_32-NEXT: vmovdqa %ymm1, %ymm0
; KNL_32-NEXT: retl
;
-; SKX-LABEL: test_global_array:
-; SKX: # BB#0:
-; SKX-NEXT: movl $glob_array, %eax
-; SKX-NEXT: kxnorw %k0, %k0, %k1
-; SKX-NEXT: vpgatherqd (%rax,%zmm0,4), %ymm1 {%k1}
-; SKX-NEXT: vmovdqa %ymm1, %ymm0
-; SKX-NEXT: retq
+; SKX_SMALL-LABEL: test_global_array:
+; SKX_SMALL: # BB#0:
+; SKX_SMALL-NEXT: kxnorw %k0, %k0, %k1
+; SKX_SMALL-NEXT: vpgatherqd glob_array(,%zmm0,4), %ymm1 {%k1}
+; SKX_SMALL-NEXT: vmovdqa %ymm1, %ymm0
+; SKX_SMALL-NEXT: retq
+;
+; SKX_LARGE-LABEL: test_global_array:
+; SKX_LARGE: # BB#0:
+; SKX_LARGE-NEXT: movabsq $glob_array, %rax
+; SKX_LARGE-NEXT: kxnorw %k0, %k0, %k1
+; SKX_LARGE-NEXT: vpgatherqd (%rax,%zmm0,4), %ymm1 {%k1}
+; SKX_LARGE-NEXT: vmovdqa %ymm1, %ymm0
+; SKX_LARGE-NEXT: retq
;
; SKX_32-LABEL: test_global_array:
; SKX_32: # BB#0:
-; SKX_32-NEXT: movl $glob_array, %eax
; SKX_32-NEXT: kxnorw %k0, %k0, %k1
-; SKX_32-NEXT: vpgatherqd (%eax,%zmm0,4), %ymm1 {%k1}
+; SKX_32-NEXT: vpgatherqd glob_array(,%zmm0,4), %ymm1 {%k1}
; SKX_32-NEXT: vmovdqa %ymm1, %ymm0
; SKX_32-NEXT: retl
%p = getelementptr inbounds [16 x i32], [16 x i32]* @glob_array, i64 0, <8 x i64> %indxs
More information about the llvm-commits
mailing list