[llvm] 154c036 - [X86] combineX86GatherScatter - only fold scale if the index isn't extended

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Fri Oct 29 03:48:33 PDT 2021


Author: Simon Pilgrim
Date: 2021-10-29T11:48:05+01:00
New Revision: 154c036ebb9ba788f4025fa15c8896cb1f13e066

URL: https://github.com/llvm/llvm-project/commit/154c036ebb9ba788f4025fa15c8896cb1f13e066
DIFF: https://github.com/llvm/llvm-project/commit/154c036ebb9ba788f4025fa15c8896cb1f13e066.diff

LOG: [X86] combineX86GatherScatter - only fold scale if the index isn't extended

As mentioned on D108539, when the gather indices are smaller than the pointer size, they are sign-extended BEFORE scale is applied, making the general fold unsafe.

If the index have sufficient sign-bits then folding the scale could be safe - I'll investigate this.

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/masked_gather_scatter.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index e922cb356dfe2..7954290f06254 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -50230,16 +50230,20 @@ static SDValue combineX86GatherScatter(SDNode *N, SelectionDAG &DAG,
                                        TargetLowering::DAGCombinerInfo &DCI,
                                        const X86Subtarget &Subtarget) {
   auto *MemOp = cast<X86MaskedGatherScatterSDNode>(N);
+  SDValue BasePtr = MemOp->getBasePtr();
   SDValue Index = MemOp->getIndex();
   SDValue Scale = MemOp->getScale();
   SDValue Mask = MemOp->getMask();
 
   // Attempt to fold an index scale into the scale value directly.
+  // For smaller indices, implicit sext is performed BEFORE scale, preventing
+  // this fold under most circumstances.
   // TODO: Move this into X86DAGToDAGISel::matchVectorAddressRecursively?
   if ((Index.getOpcode() == X86ISD::VSHLI ||
        (Index.getOpcode() == ISD::ADD &&
         Index.getOperand(0) == Index.getOperand(1))) &&
-      isa<ConstantSDNode>(Scale)) {
+      isa<ConstantSDNode>(Scale) &&
+      BasePtr.getScalarValueSizeInBits() == Index.getScalarValueSizeInBits()) {
     unsigned ShiftAmt =
         Index.getOpcode() == ISD::ADD ? 1 : Index.getConstantOperandVal(1);
     uint64_t ScaleAmt = cast<ConstantSDNode>(Scale)->getZExtValue();

diff  --git a/llvm/test/CodeGen/X86/masked_gather_scatter.ll b/llvm/test/CodeGen/X86/masked_gather_scatter.ll
index fbe02af64e3db..387513db012a0 100644
--- a/llvm/test/CodeGen/X86/masked_gather_scatter.ll
+++ b/llvm/test/CodeGen/X86/masked_gather_scatter.ll
@@ -4992,9 +4992,9 @@ define void @splat_ptr_scatter(i32* %ptr, <4 x i1> %mask, <4 x i32> %val) {
 define <8 x float> @scaleidx_x86gather(float* %base, <8 x i32> %index, <8 x i32> %imask) nounwind {
 ; KNL_64-LABEL: scaleidx_x86gather:
 ; KNL_64:       # %bb.0:
-; KNL_64-NEXT:    vxorps %xmm2, %xmm2, %xmm2
-; KNL_64-NEXT:    vgatherdps %ymm1, (%rdi,%ymm0,4), %ymm2
-; KNL_64-NEXT:    vmovaps %ymm2, %ymm0
+; KNL_64-NEXT:    vpslld $2, %ymm0, %ymm2
+; KNL_64-NEXT:    vpxor %xmm0, %xmm0, %xmm0
+; KNL_64-NEXT:    vgatherdps %ymm1, (%rdi,%ymm2), %ymm0
 ; KNL_64-NEXT:    retq
 ;
 ; KNL_32-LABEL: scaleidx_x86gather:
@@ -5007,9 +5007,9 @@ define <8 x float> @scaleidx_x86gather(float* %base, <8 x i32> %index, <8 x i32>
 ;
 ; SKX-LABEL: scaleidx_x86gather:
 ; SKX:       # %bb.0:
-; SKX-NEXT:    vxorps %xmm2, %xmm2, %xmm2
-; SKX-NEXT:    vgatherdps %ymm1, (%rdi,%ymm0,4), %ymm2
-; SKX-NEXT:    vmovaps %ymm2, %ymm0
+; SKX-NEXT:    vpslld $2, %ymm0, %ymm2
+; SKX-NEXT:    vpxor %xmm0, %xmm0, %xmm0
+; SKX-NEXT:    vgatherdps %ymm1, (%rdi,%ymm2), %ymm0
 ; SKX-NEXT:    retq
 ;
 ; SKX_32-LABEL: scaleidx_x86gather:
@@ -5068,7 +5068,8 @@ define void @scaleidx_x86scatter(<16 x float> %value, float* %base, <16 x i32> %
 ; KNL_64-LABEL: scaleidx_x86scatter:
 ; KNL_64:       # %bb.0:
 ; KNL_64-NEXT:    kmovw %esi, %k1
-; KNL_64-NEXT:    vscatterdps %zmm0, (%rdi,%zmm1,4) {%k1}
+; KNL_64-NEXT:    vpaddd %zmm1, %zmm1, %zmm1
+; KNL_64-NEXT:    vscatterdps %zmm0, (%rdi,%zmm1,2) {%k1}
 ; KNL_64-NEXT:    vzeroupper
 ; KNL_64-NEXT:    retq
 ;
@@ -5083,7 +5084,8 @@ define void @scaleidx_x86scatter(<16 x float> %value, float* %base, <16 x i32> %
 ; SKX-LABEL: scaleidx_x86scatter:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    kmovw %esi, %k1
-; SKX-NEXT:    vscatterdps %zmm0, (%rdi,%zmm1,4) {%k1}
+; SKX-NEXT:    vpaddd %zmm1, %zmm1, %zmm1
+; SKX-NEXT:    vscatterdps %zmm0, (%rdi,%zmm1,2) {%k1}
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
 ;
@@ -5129,8 +5131,9 @@ define void @scaleidx_scatter(<8 x float> %value, float* %base, <8 x i32> %index
 ;
 ; SKX-LABEL: scaleidx_scatter:
 ; SKX:       # %bb.0:
+; SKX-NEXT:    vpaddd %ymm1, %ymm1, %ymm1
 ; SKX-NEXT:    kmovw %esi, %k1
-; SKX-NEXT:    vscatterdps %ymm0, (%rdi,%ymm1,8) {%k1}
+; SKX-NEXT:    vscatterdps %ymm0, (%rdi,%ymm1,4) {%k1}
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
 ;


        


More information about the llvm-commits mailing list