[llvm] e6ead19 - Revert "Recommit "[SLP] Fix lookahead operand reordering for splat loads." attempt 2, fixed assertion crash."

Wed Mar 23 11:08:58 PDT 2022

This change has landed and been reverted several times, causing a decent 
amount of churn and some local merge conflicts for me.

When this is relanded, can I ask that we try to split the change in a 
way to reduce churn if this needs to be reverted again? Maybe add an 
temporary flag off-by-default, then enable separately?  Or 
alternatively, it looks like some of this might be separable into an NFC 
api change?

Philip

On 3/23/22 10:59, Arthur Eubanks via llvm-commits wrote:
> Author: Arthur Eubanks
> Date: 2022-03-23T10:57:45-07:00
> New Revision: e6ead19b774718113007ecb1a4449d7af0cbcfeb
>
> URL: https://github.com/llvm/llvm-project/commit/e6ead19b774718113007ecb1a4449d7af0cbcfeb
> DIFF: https://github.com/llvm/llvm-project/commit/e6ead19b774718113007ecb1a4449d7af0cbcfeb.diff
>
> LOG: Revert "Recommit "[SLP] Fix lookahead operand reordering for splat loads." attempt 2, fixed assertion crash."
>
> This reverts commit 27bd8f94928201f87f6b659fc2228efd539e8245.
>
> Causes crashes, see comments in D121973
>
> Added:
>      
>
> Modified:
>      llvm/include/llvm/Analysis/TargetTransformInfo.h
>      llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
>      llvm/include/llvm/CodeGen/BasicTTIImpl.h
>      llvm/lib/Analysis/TargetTransformInfo.cpp
>      llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
>      llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
>      llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
>      llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
>      llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
>      llvm/lib/Target/ARM/ARMTargetTransformInfo.h
>      llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
>      llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
>      llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
>      llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
>      llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
>      llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
>      llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
>      llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
>      llvm/lib/Target/X86/X86TargetTransformInfo.cpp
>      llvm/lib/Target/X86/X86TargetTransformInfo.h
>      llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
>      llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll
>      llvm/test/Transforms/SLPVectorizer/X86/operandorder.ll
>
> Removed:
>      
>
>
> ################################################################################
> diff  --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
> index 31eb40e365b63..23a3faa4d9687 100644
> --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
> +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
> @@ -658,10 +658,6 @@ class TargetTransformInfo {
>     /// Return true if the target supports nontemporal load.
>     bool isLegalNTLoad(Type *DataType, Align Alignment) const;
>   
> -  /// \Returns true if the target supports broadcasting a load to a vector of
> -  /// type <NumElements x ElementTy>.
> -  bool isLegalBroadcastLoad(Type *ElementTy, unsigned NumElements) const;
> -
>     /// Return true if the target supports masked scatter.
>     bool isLegalMaskedScatter(Type *DataType, Align Alignment) const;
>     /// Return true if the target supports masked gather.
> @@ -1048,14 +1044,11 @@ class TargetTransformInfo {
>     /// The exact mask may be passed as Mask, or else the array will be empty.
>     /// The index and subtype parameters are used by the subvector insertion and
>     /// extraction shuffle kinds to show the insert/extract point and the type of
> -  /// the subvector being inserted/extracted. The operands of the shuffle can be
> -  /// passed through \p Args, which helps improve the cost estimation in some
> -  /// cases, like in broadcast loads.
> +  /// the subvector being inserted/extracted.
>     /// NOTE: For subvector extractions Tp represents the source type.
>     InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp,
>                                    ArrayRef<int> Mask = None, int Index = 0,
> -                                 VectorType *SubTp = nullptr,
> -                                 ArrayRef<Value *> Args = None) const;
> +                                 VectorType *SubTp = nullptr) const;
>   
>     /// Represents a hint about the context in which a cast is used.
>     ///
> @@ -1556,8 +1549,6 @@ class TargetTransformInfo::Concept {
>     virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment) = 0;
>     virtual bool isLegalNTStore(Type *DataType, Align Alignment) = 0;
>     virtual bool isLegalNTLoad(Type *DataType, Align Alignment) = 0;
> -  virtual bool isLegalBroadcastLoad(Type *ElementTy,
> -                                    unsigned NumElements) const = 0;
>     virtual bool isLegalMaskedScatter(Type *DataType, Align Alignment) = 0;
>     virtual bool isLegalMaskedGather(Type *DataType, Align Alignment) = 0;
>     virtual bool forceScalarizeMaskedGather(VectorType *DataType,
> @@ -1668,8 +1659,7 @@ class TargetTransformInfo::Concept {
>         ArrayRef<const Value *> Args, const Instruction *CxtI = nullptr) = 0;
>     virtual InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp,
>                                            ArrayRef<int> Mask, int Index,
> -                                         VectorType *SubTp,
> -                                         ArrayRef<Value *> Args) = 0;
> +                                         VectorType *SubTp) = 0;
>     virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst,
>                                              Type *Src, CastContextHint CCH,
>                                              TTI::TargetCostKind CostKind,
> @@ -1962,10 +1952,6 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
>     bool isLegalNTLoad(Type *DataType, Align Alignment) override {
>       return Impl.isLegalNTLoad(DataType, Alignment);
>     }
> -  bool isLegalBroadcastLoad(Type *ElementTy,
> -                            unsigned NumElements) const override {
> -    return Impl.isLegalBroadcastLoad(ElementTy, NumElements);
> -  }
>     bool isLegalMaskedScatter(Type *DataType, Align Alignment) override {
>       return Impl.isLegalMaskedScatter(DataType, Alignment);
>     }
> @@ -2193,9 +2179,8 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
>     }
>     InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp,
>                                    ArrayRef<int> Mask, int Index,
> -                                 VectorType *SubTp,
> -                                 ArrayRef<Value *> Args) override {
> -    return Impl.getShuffleCost(Kind, Tp, Mask, Index, SubTp, Args);
> +                                 VectorType *SubTp) override {
> +    return Impl.getShuffleCost(Kind, Tp, Mask, Index, SubTp);
>     }
>     InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
>                                      CastContextHint CCH,
>
> diff  --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
> index 806014ee54fdc..45990266fb30e 100644
> --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
> +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
> @@ -256,10 +256,6 @@ class TargetTransformInfoImplBase {
>       return Alignment >= DataSize && isPowerOf2_32(DataSize);
>     }
>   
> -  bool isLegalBroadcastLoad(Type *ElementTy, unsigned NumElements) const {
> -    return false;
> -  }
> -
>     bool isLegalMaskedScatter(Type *DataType, Align Alignment) const {
>       return false;
>     }
> @@ -492,8 +488,7 @@ class TargetTransformInfoImplBase {
>   
>     InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Ty,
>                                    ArrayRef<int> Mask, int Index,
> -                                 VectorType *SubTp,
> -                                 ArrayRef<Value *> Args = None) const {
> +                                 VectorType *SubTp) const {
>       return 1;
>     }
>   
>
> diff  --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
> index d3b2272090d4d..8e62dfff117b5 100644
> --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
> +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
> @@ -871,8 +871,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
>   
>     InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
>                                    ArrayRef<int> Mask, int Index,
> -                                 VectorType *SubTp,
> -                                 ArrayRef<Value *> Args = None) {
> +                                 VectorType *SubTp) {
>   
>       switch (improveShuffleKindFromMask(Kind, Mask)) {
>       case TTI::SK_Broadcast:
>
> diff  --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
> index 804331ecfa14d..ae2221433eca7 100644
> --- a/llvm/lib/Analysis/TargetTransformInfo.cpp
> +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
> @@ -396,11 +396,6 @@ bool TargetTransformInfo::isLegalNTLoad(Type *DataType, Align Alignment) const {
>     return TTIImpl->isLegalNTLoad(DataType, Alignment);
>   }
>   
> -bool TargetTransformInfo::isLegalBroadcastLoad(Type *ElementTy,
> -                                               unsigned NumElements) const {
> -  return TTIImpl->isLegalBroadcastLoad(ElementTy, NumElements);
> -}
> -
>   bool TargetTransformInfo::isLegalMaskedGather(Type *DataType,
>                                                 Align Alignment) const {
>     return TTIImpl->isLegalMaskedGather(DataType, Alignment);
> @@ -745,11 +740,12 @@ InstructionCost TargetTransformInfo::getArithmeticInstrCost(
>     return Cost;
>   }
>   
> -InstructionCost TargetTransformInfo::getShuffleCost(
> -    ShuffleKind Kind, VectorType *Ty, ArrayRef<int> Mask, int Index,
> -    VectorType *SubTp, ArrayRef<Value *> Args) const {
> -  InstructionCost Cost =
> -      TTIImpl->getShuffleCost(Kind, Ty, Mask, Index, SubTp, Args);
> +InstructionCost TargetTransformInfo::getShuffleCost(ShuffleKind Kind,
> +                                                    VectorType *Ty,
> +                                                    ArrayRef<int> Mask,
> +                                                    int Index,
> +                                                    VectorType *SubTp) const {
> +  InstructionCost Cost = TTIImpl->getShuffleCost(Kind, Ty, Mask, Index, SubTp);
>     assert(Cost >= 0 && "TTI should not produce negative costs!");
>     return Cost;
>   }
>
> diff  --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
> index b9f6f49dd04db..2cd3c93167f14 100644
> --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
> +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
> @@ -2604,8 +2604,7 @@ InstructionCost AArch64TTIImpl::getSpliceCost(VectorType *Tp, int Index) {
>   InstructionCost AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
>                                                  VectorType *Tp,
>                                                  ArrayRef<int> Mask, int Index,
> -                                               VectorType *SubTp,
> -                                               ArrayRef<Value *> Args) {
> +                                               VectorType *SubTp) {
>     Kind = improveShuffleKindFromMask(Kind, Mask);
>     if (Kind == TTI::SK_Broadcast || Kind == TTI::SK_Transpose ||
>         Kind == TTI::SK_Select || Kind == TTI::SK_PermuteSingleSrc ||
>
> diff  --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
> index 92005b3ba40c9..a6029b9f24456 100644
> --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
> +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
> @@ -330,8 +330,7 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
>   
>     InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
>                                    ArrayRef<int> Mask, int Index,
> -                                 VectorType *SubTp,
> -                                 ArrayRef<Value *> Args = None);
> +                                 VectorType *SubTp);
>     /// @}
>   };
>   
>
> diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
> index bdd22a4614f4f..a8df7789c8a19 100644
> --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
> +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
> @@ -1042,8 +1042,7 @@ Value *GCNTTIImpl::rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
>   
>   InstructionCost GCNTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
>                                              VectorType *VT, ArrayRef<int> Mask,
> -                                           int Index, VectorType *SubTp,
> -                                           ArrayRef<Value *> Args) {
> +                                           int Index, VectorType *SubTp) {
>     Kind = improveShuffleKindFromMask(Kind, Mask);
>     if (ST->hasVOP3PInsts()) {
>       if (cast<FixedVectorType>(VT)->getNumElements() == 2 &&
>
> diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
> index 4743042f5faea..e901b5c5747d6 100644
> --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
> +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
> @@ -201,8 +201,7 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
>   
>     InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
>                                    ArrayRef<int> Mask, int Index,
> -                                 VectorType *SubTp,
> -                                 ArrayRef<Value *> Args = None);
> +                                 VectorType *SubTp);
>   
>     bool areInlineCompatible(const Function *Caller,
>                              const Function *Callee) const;
>
> diff  --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
> index 7e802238b362e..d9d563ead2605 100644
> --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
> +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
> @@ -1202,8 +1202,7 @@ InstructionCost ARMTTIImpl::getMemcpyCost(const Instruction *I) {
>   
>   InstructionCost ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
>                                              VectorType *Tp, ArrayRef<int> Mask,
> -                                           int Index, VectorType *SubTp,
> -                                           ArrayRef<Value *> Args) {
> +                                           int Index, VectorType *SubTp) {
>     Kind = improveShuffleKindFromMask(Kind, Mask);
>     if (ST->hasNEON()) {
>       if (Kind == TTI::SK_Broadcast) {
>
> diff  --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
> index 3139c412aeb87..5bb84899e5ef0 100644
> --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
> +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
> @@ -213,8 +213,7 @@ class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
>   
>     InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
>                                    ArrayRef<int> Mask, int Index,
> -                                 VectorType *SubTp,
> -                                 ArrayRef<Value *> Args = None);
> +                                 VectorType *SubTp);
>   
>     bool preferInLoopReduction(unsigned Opcode, Type *Ty,
>                                TTI::ReductionFlags Flags) const;
>
> diff  --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
> index 280d8f19725b4..1bdd8c3c513a1 100644
> --- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
> +++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
> @@ -223,8 +223,7 @@ HexagonTTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
>   
>   InstructionCost HexagonTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp,
>                                                  ArrayRef<int> Mask, int Index,
> -                                               Type *SubTp,
> -                                               ArrayRef<Value *> Args) {
> +                                               Type *SubTp) {
>     return 1;
>   }
>   
>
> diff  --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
> index 65eb9d9fb5bbe..9e637dfc3e161 100644
> --- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
> +++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
> @@ -125,8 +125,7 @@ class HexagonTTIImpl : public BasicTTIImplBase<HexagonTTIImpl> {
>                                           Align Alignment, unsigned AddressSpace,
>                                           TTI::TargetCostKind CostKind);
>     InstructionCost getShuffleCost(TTI::ShuffleKind Kind, Type *Tp,
> -                                 ArrayRef<int> Mask, int Index, Type *SubTp,
> -                                 ArrayRef<Value *> Args = None);
> +                                 ArrayRef<int> Mask, int Index, Type *SubTp);
>     InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
>                                            const Value *Ptr, bool VariableMask,
>                                            Align Alignment,
>
> diff  --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
> index 1ee960fcdbc73..cc5738a5d7b63 100644
> --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
> +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
> @@ -1015,8 +1015,7 @@ InstructionCost PPCTTIImpl::getArithmeticInstrCost(
>   
>   InstructionCost PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp,
>                                              ArrayRef<int> Mask, int Index,
> -                                           Type *SubTp,
> -                                           ArrayRef<Value *> Args) {
> +                                           Type *SubTp) {
>   
>     InstructionCost CostFactor =
>         vectorCostAdjustmentFactor(Instruction::ShuffleVector, Tp, nullptr);
>
> diff  --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
> index 61cb689c352fa..0af6f2a308d91 100644
> --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
> +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
> @@ -111,8 +111,7 @@ class PPCTTIImpl : public BasicTTIImplBase<PPCTTIImpl> {
>         ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
>         const Instruction *CxtI = nullptr);
>     InstructionCost getShuffleCost(TTI::ShuffleKind Kind, Type *Tp,
> -                                 ArrayRef<int> Mask, int Index, Type *SubTp,
> -                                 ArrayRef<Value *> Args = None);
> +                                 ArrayRef<int> Mask, int Index, Type *SubTp);
>     InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
>                                      TTI::CastContextHint CCH,
>                                      TTI::TargetCostKind CostKind,
>
> diff  --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
> index 5f797f1dae7f8..6721a0a258d3d 100644
> --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
> +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
> @@ -175,8 +175,7 @@ InstructionCost RISCVTTIImpl::getSpliceCost(VectorType *Tp, int Index) {
>   
>   InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
>                                                VectorType *Tp, ArrayRef<int> Mask,
> -                                             int Index, VectorType *SubTp,
> -                                             ArrayRef<Value *> Args) {
> +                                             int Index, VectorType *SubTp) {
>     if (Kind == TTI::SK_Splice && isa<ScalableVectorType>(Tp))
>       return getSpliceCost(Tp, Index);
>     return BaseT::getShuffleCost(Kind, Tp, Mask, Index, SubTp);
>
> diff  --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
> index 9088d4847f3d9..63c7ed0bbdbb7 100644
> --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
> +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
> @@ -80,8 +80,7 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
>     InstructionCost getSpliceCost(VectorType *Tp, int Index);
>     InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
>                                    ArrayRef<int> Mask, int Index,
> -                                 VectorType *SubTp,
> -                                 ArrayRef<Value *> Args = None);
> +                                 VectorType *SubTp);
>   
>     InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
>                                            const Value *Ptr, bool VariableMask,
>
> diff  --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
> index 5e0c4805c5091..6d66ebfced05e 100644
> --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
> +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
> @@ -559,8 +559,7 @@ InstructionCost SystemZTTIImpl::getArithmeticInstrCost(
>   InstructionCost SystemZTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
>                                                  VectorType *Tp,
>                                                  ArrayRef<int> Mask, int Index,
> -                                               VectorType *SubTp,
> -                                               ArrayRef<Value *> Args) {
> +                                               VectorType *SubTp) {
>     Kind = improveShuffleKindFromMask(Kind, Mask);
>     if (ST->hasVector()) {
>       unsigned NumVectors = getNumVectorRegs(Tp);
>
> diff  --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
> index acc9aee0c8fe5..db4ec794b3e4a 100644
> --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
> +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
> @@ -92,8 +92,7 @@ class SystemZTTIImpl : public BasicTTIImplBase<SystemZTTIImpl> {
>         const Instruction *CxtI = nullptr);
>     InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
>                                    ArrayRef<int> Mask, int Index,
> -                                 VectorType *SubTp,
> -                                 ArrayRef<Value *> Args = None);
> +                                 VectorType *SubTp);
>     unsigned getVectorTruncCost(Type *SrcTy, Type *DstTy);
>     unsigned getVectorBitmaskConversionCost(Type *SrcTy, Type *DstTy);
>     unsigned getBoolVecToIntConversionCost(unsigned Opcode, Type *Dst,
>
> diff  --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
> index ed3a7e2d5c238..32f9f56184d47 100644
> --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
> +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
> @@ -1085,8 +1085,7 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
>   InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
>                                              VectorType *BaseTp,
>                                              ArrayRef<int> Mask, int Index,
> -                                           VectorType *SubTp,
> -                                           ArrayRef<Value *> Args) {
> +                                           VectorType *SubTp) {
>     // 64-bit packed float vectors (v2f32) are widened to type v4f32.
>     // 64-bit packed integer vectors (v2i32) are widened to type v4i32.
>     std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, BaseTp);
> @@ -1546,27 +1545,9 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
>       { TTI::SK_PermuteTwoSrc,    MVT::v16i8, 13 }, // blend+permute
>     };
>   
> -  static const CostTblEntry SSE3BroadcastLoadTbl[] = {
> -      {TTI::SK_Broadcast, MVT::v2f64, 0}, // broadcast handled by movddup
> -  };
> -
> -  if (ST->hasSSE2()) {
> -    bool IsLoad = !Args.empty() && llvm::all_of(Args, [](const Value *V) {
> -      return isa<LoadInst>(V);
> -    });
> -    if (ST->hasSSE3() && IsLoad)
> -      if (const auto *Entry =
> -              CostTableLookup(SSE3BroadcastLoadTbl, Kind, LT.second)) {
> -        assert(isLegalBroadcastLoad(
> -                   BaseTp->getElementType(),
> -                   cast<FixedVectorType>(BaseTp)->getNumElements()) &&
> -               "Table entry missing from isLegalBroadcastLoad()");
> -        return LT.first * Entry->Cost;
> -      }
> -
> +  if (ST->hasSSE2())
>       if (const auto *Entry = CostTableLookup(SSE2ShuffleTbl, Kind, LT.second))
>         return LT.first * Entry->Cost;
> -  }
>   
>     static const CostTblEntry SSE1ShuffleTbl[] = {
>       { TTI::SK_Broadcast,        MVT::v4f32, 1 }, // shufps
> @@ -5137,13 +5118,6 @@ bool X86TTIImpl::isLegalNTStore(Type *DataType, Align Alignment) {
>     return true;
>   }
>   
> -bool X86TTIImpl::isLegalBroadcastLoad(Type *ElementTy,
> -                                      unsigned NumElements) const {
> -  // movddup
> -  return ST->hasSSE3() && NumElements == 2 &&
> -         ElementTy == Type::getDoubleTy(ElementTy->getContext());
> -}
> -
>   bool X86TTIImpl::isLegalMaskedExpandLoad(Type *DataTy) {
>     if (!isa<VectorType>(DataTy))
>       return false;
>
> diff  --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h
> index d262835dd44a4..a8909ee6457de 100644
> --- a/llvm/lib/Target/X86/X86TargetTransformInfo.h
> +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h
> @@ -131,8 +131,7 @@ class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> {
>         const Instruction *CxtI = nullptr);
>     InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
>                                    ArrayRef<int> Mask, int Index,
> -                                 VectorType *SubTp,
> -                                 ArrayRef<Value *> Args = None);
> +                                 VectorType *SubTp);
>     InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
>                                      TTI::CastContextHint CCH,
>                                      TTI::TargetCostKind CostKind,
> @@ -227,7 +226,6 @@ class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> {
>     bool isLegalMaskedStore(Type *DataType, Align Alignment);
>     bool isLegalNTLoad(Type *DataType, Align Alignment);
>     bool isLegalNTStore(Type *DataType, Align Alignment);
> -  bool isLegalBroadcastLoad(Type *ElementTy, unsigned NumElements) const;
>     bool forceScalarizeMaskedGather(VectorType *VTy, Align Alignment);
>     bool forceScalarizeMaskedScatter(VectorType *VTy, Align Alignment) {
>       return forceScalarizeMaskedGather(VTy, Alignment);
>
> diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
> index 29e5903ab6abe..80bec962b9518 100644
> --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
> +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
> @@ -1138,11 +1138,6 @@ class BoUpSLP {
>   
>       /// Loads from consecutive memory addresses, e.g. load(A[i]), load(A[i+1]).
>       static const int ScoreConsecutiveLoads = 4;
> -    /// The same load multiple times. This should have a better score than
> -    /// `ScoreSplat` because it in x86 for a 2-lane vector we can represent it
> -    /// with `movddup (%reg), xmm0` which has a throughput of 0.5 versus 0.5 for
> -    /// a vector load and 1.0 for a broadcast.
> -    static const int ScoreSplatLoads = 3;
>       /// Loads from reversed memory addresses, e.g. load(A[i+1]), load(A[i]).
>       static const int ScoreReversedLoads = 3;
>       /// ExtractElementInst from same vector and consecutive indexes.
> @@ -1169,18 +1164,9 @@ class BoUpSLP {
>       /// MainAltOps.
>       static int getShallowScore(Value *V1, Value *V2, const DataLayout &DL,
>                                  ScalarEvolution &SE, int NumLanes,
> -                               ArrayRef<Value *> MainAltOps,
> -                               const TargetTransformInfo *TTI) {
> -      if (V1 == V2) {
> -        if (isa<LoadInst>(V1)) {
> -          // A broadcast of a load can be cheaper on some targets.
> -          // TODO: For now accept a broadcast load with no other internal uses.
> -          if (TTI->isLegalBroadcastLoad(V1->getType(), NumLanes) &&
> -              (int)V1->getNumUses() == NumLanes)
> -            return VLOperands::ScoreSplatLoads;
> -        }
> +                               ArrayRef<Value *> MainAltOps) {
> +      if (V1 == V2)
>           return VLOperands::ScoreSplat;
> -      }
>   
>         auto *LI1 = dyn_cast<LoadInst>(V1);
>         auto *LI2 = dyn_cast<LoadInst>(V2);
> @@ -1359,7 +1345,7 @@ class BoUpSLP {
>   
>         // Get the shallow score of V1 and V2.
>         int ShallowScoreAtThisLevel =
> -          getShallowScore(LHS, RHS, DL, SE, getNumLanes(), MainAltOps, R.TTI);
> +          getShallowScore(LHS, RHS, DL, SE, getNumLanes(), MainAltOps);
>   
>         // If reached MaxLevel,
>         //  or if V1 and V2 are not instructions,
> @@ -5253,9 +5239,7 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
>         // broadcast.
>         assert(VecTy == FinalVecTy &&
>                "No reused scalars expected for broadcast.");
> -      return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy,
> -                                 /*Mask=*/None, /*Index=*/0,
> -                                 /*SubTp=*/nullptr, /*Args=*/VL);
> +      return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy);
>       }
>       InstructionCost ReuseShuffleCost = 0;
>       if (NeedToShuffleReuses)
>
> diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll b/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll
> index 0054520cff567..6c456bb77da6e 100644
> --- a/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll
> +++ b/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll
> @@ -643,62 +643,32 @@ define i1 @foo(float %a, float %b, float %c, <4 x float> %vec, i64 %idx2) {
>   
>   ; Same as @ChecksExtractScores, but the extratelement vector operands do not match.
>   define void @ChecksExtractScores_
> diff erent_vectors(double* %storeArray, double* %array, <2 x double> *%vecPtr1, <2 x double>* %vecPtr2, <2 x double>* %vecPtr3, <2 x double>* %vecPtr4) {
> -; SSE-LABEL: @ChecksExtractScores_
> diff erent_vectors(
> -; SSE-NEXT:    [[IDX0:%.*]] = getelementptr inbounds double, double* [[ARRAY:%.*]], i64 0
> -; SSE-NEXT:    [[IDX1:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 1
> -; SSE-NEXT:    [[TMP1:%.*]] = bitcast double* [[IDX0]] to <2 x double>*
> -; SSE-NEXT:    [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 4
> -; SSE-NEXT:    [[LOADVEC:%.*]] = load <2 x double>, <2 x double>* [[VECPTR1:%.*]], align 4
> -; SSE-NEXT:    [[LOADVEC2:%.*]] = load <2 x double>, <2 x double>* [[VECPTR2:%.*]], align 4
> -; SSE-NEXT:    [[EXTRA0:%.*]] = extractelement <2 x double> [[LOADVEC]], i32 0
> -; SSE-NEXT:    [[EXTRA1:%.*]] = extractelement <2 x double> [[LOADVEC2]], i32 1
> -; SSE-NEXT:    [[LOADVEC3:%.*]] = load <2 x double>, <2 x double>* [[VECPTR3:%.*]], align 4
> -; SSE-NEXT:    [[LOADVEC4:%.*]] = load <2 x double>, <2 x double>* [[VECPTR4:%.*]], align 4
> -; SSE-NEXT:    [[EXTRB0:%.*]] = extractelement <2 x double> [[LOADVEC3]], i32 0
> -; SSE-NEXT:    [[EXTRB1:%.*]] = extractelement <2 x double> [[LOADVEC4]], i32 1
> -; SSE-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> poison, double [[EXTRA1]], i32 0
> -; SSE-NEXT:    [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[EXTRB0]], i32 1
> -; SSE-NEXT:    [[TMP5:%.*]] = fmul <2 x double> [[TMP4]], [[TMP2]]
> -; SSE-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
> -; SSE-NEXT:    [[TMP6:%.*]] = insertelement <2 x double> poison, double [[EXTRA0]], i32 0
> -; SSE-NEXT:    [[TMP7:%.*]] = insertelement <2 x double> [[TMP6]], double [[EXTRB1]], i32 1
> -; SSE-NEXT:    [[TMP8:%.*]] = fmul <2 x double> [[TMP7]], [[TMP2]]
> -; SSE-NEXT:    [[TMP9:%.*]] = fadd <2 x double> [[SHUFFLE]], [[TMP8]]
> -; SSE-NEXT:    [[SIDX0:%.*]] = getelementptr inbounds double, double* [[STOREARRAY:%.*]], i64 0
> -; SSE-NEXT:    [[SIDX1:%.*]] = getelementptr inbounds double, double* [[STOREARRAY]], i64 1
> -; SSE-NEXT:    [[TMP10:%.*]] = bitcast double* [[SIDX0]] to <2 x double>*
> -; SSE-NEXT:    store <2 x double> [[TMP9]], <2 x double>* [[TMP10]], align 8
> -; SSE-NEXT:    ret void
> -;
> -; AVX-LABEL: @ChecksExtractScores_
> diff erent_vectors(
> -; AVX-NEXT:    [[IDX0:%.*]] = getelementptr inbounds double, double* [[ARRAY:%.*]], i64 0
> -; AVX-NEXT:    [[IDX1:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 1
> -; AVX-NEXT:    [[LOADA0:%.*]] = load double, double* [[IDX0]], align 4
> -; AVX-NEXT:    [[LOADA1:%.*]] = load double, double* [[IDX1]], align 4
> -; AVX-NEXT:    [[LOADVEC:%.*]] = load <2 x double>, <2 x double>* [[VECPTR1:%.*]], align 4
> -; AVX-NEXT:    [[LOADVEC2:%.*]] = load <2 x double>, <2 x double>* [[VECPTR2:%.*]], align 4
> -; AVX-NEXT:    [[EXTRA0:%.*]] = extractelement <2 x double> [[LOADVEC]], i32 0
> -; AVX-NEXT:    [[EXTRA1:%.*]] = extractelement <2 x double> [[LOADVEC2]], i32 1
> -; AVX-NEXT:    [[LOADVEC3:%.*]] = load <2 x double>, <2 x double>* [[VECPTR3:%.*]], align 4
> -; AVX-NEXT:    [[LOADVEC4:%.*]] = load <2 x double>, <2 x double>* [[VECPTR4:%.*]], align 4
> -; AVX-NEXT:    [[EXTRB0:%.*]] = extractelement <2 x double> [[LOADVEC3]], i32 0
> -; AVX-NEXT:    [[EXTRB1:%.*]] = extractelement <2 x double> [[LOADVEC4]], i32 1
> -; AVX-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> poison, double [[EXTRA0]], i32 0
> -; AVX-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[EXTRA1]], i32 1
> -; AVX-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> poison, double [[LOADA0]], i32 0
> -; AVX-NEXT:    [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[LOADA0]], i32 1
> -; AVX-NEXT:    [[TMP5:%.*]] = fmul <2 x double> [[TMP2]], [[TMP4]]
> -; AVX-NEXT:    [[TMP6:%.*]] = insertelement <2 x double> poison, double [[EXTRB0]], i32 0
> -; AVX-NEXT:    [[TMP7:%.*]] = insertelement <2 x double> [[TMP6]], double [[EXTRB1]], i32 1
> -; AVX-NEXT:    [[TMP8:%.*]] = insertelement <2 x double> poison, double [[LOADA1]], i32 0
> -; AVX-NEXT:    [[TMP9:%.*]] = insertelement <2 x double> [[TMP8]], double [[LOADA1]], i32 1
> -; AVX-NEXT:    [[TMP10:%.*]] = fmul <2 x double> [[TMP7]], [[TMP9]]
> -; AVX-NEXT:    [[TMP11:%.*]] = fadd <2 x double> [[TMP5]], [[TMP10]]
> -; AVX-NEXT:    [[SIDX0:%.*]] = getelementptr inbounds double, double* [[STOREARRAY:%.*]], i64 0
> -; AVX-NEXT:    [[SIDX1:%.*]] = getelementptr inbounds double, double* [[STOREARRAY]], i64 1
> -; AVX-NEXT:    [[TMP12:%.*]] = bitcast double* [[SIDX0]] to <2 x double>*
> -; AVX-NEXT:    store <2 x double> [[TMP11]], <2 x double>* [[TMP12]], align 8
> -; AVX-NEXT:    ret void
> +; CHECK-LABEL: @ChecksExtractScores_
> diff erent_vectors(
> +; CHECK-NEXT:    [[IDX0:%.*]] = getelementptr inbounds double, double* [[ARRAY:%.*]], i64 0
> +; CHECK-NEXT:    [[IDX1:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 1
> +; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[IDX0]] to <2 x double>*
> +; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 4
> +; CHECK-NEXT:    [[LOADVEC:%.*]] = load <2 x double>, <2 x double>* [[VECPTR1:%.*]], align 4
> +; CHECK-NEXT:    [[LOADVEC2:%.*]] = load <2 x double>, <2 x double>* [[VECPTR2:%.*]], align 4
> +; CHECK-NEXT:    [[EXTRA0:%.*]] = extractelement <2 x double> [[LOADVEC]], i32 0
> +; CHECK-NEXT:    [[EXTRA1:%.*]] = extractelement <2 x double> [[LOADVEC2]], i32 1
> +; CHECK-NEXT:    [[LOADVEC3:%.*]] = load <2 x double>, <2 x double>* [[VECPTR3:%.*]], align 4
> +; CHECK-NEXT:    [[LOADVEC4:%.*]] = load <2 x double>, <2 x double>* [[VECPTR4:%.*]], align 4
> +; CHECK-NEXT:    [[EXTRB0:%.*]] = extractelement <2 x double> [[LOADVEC3]], i32 0
> +; CHECK-NEXT:    [[EXTRB1:%.*]] = extractelement <2 x double> [[LOADVEC4]], i32 1
> +; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> poison, double [[EXTRA1]], i32 0
> +; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[EXTRB0]], i32 1
> +; CHECK-NEXT:    [[TMP5:%.*]] = fmul <2 x double> [[TMP4]], [[TMP2]]
> +; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
> +; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x double> poison, double [[EXTRA0]], i32 0
> +; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <2 x double> [[TMP6]], double [[EXTRB1]], i32 1
> +; CHECK-NEXT:    [[TMP8:%.*]] = fmul <2 x double> [[TMP7]], [[TMP2]]
> +; CHECK-NEXT:    [[TMP9:%.*]] = fadd <2 x double> [[SHUFFLE]], [[TMP8]]
> +; CHECK-NEXT:    [[SIDX0:%.*]] = getelementptr inbounds double, double* [[STOREARRAY:%.*]], i64 0
> +; CHECK-NEXT:    [[SIDX1:%.*]] = getelementptr inbounds double, double* [[STOREARRAY]], i64 1
> +; CHECK-NEXT:    [[TMP10:%.*]] = bitcast double* [[SIDX0]] to <2 x double>*
> +; CHECK-NEXT:    store <2 x double> [[TMP9]], <2 x double>* [[TMP10]], align 8
> +; CHECK-NEXT:    ret void
>   ;
>     %idx0 = getelementptr inbounds double, double* %array, i64 0
>     %idx1 = getelementptr inbounds double, double* %array, i64 1
> @@ -731,50 +701,28 @@ define void @ChecksExtractScores_
> diff erent_vectors(double* %storeArray, double*
>   ; This checks that we we prefer splats rather than reverse load vectors + shuffles.
>   ; 2-wide splat loads in x86 use a single instruction so they are quite cheap.
>   define double @splat_loads(double *%array1, double *%array2, double *%ptrA, double *%ptrB) {
> -; SSE-LABEL: @splat_loads(
> -; SSE-NEXT:  entry:
> -; SSE-NEXT:    [[GEP_1_0:%.*]] = getelementptr inbounds double, double* [[ARRAY1:%.*]], i64 0
> -; SSE-NEXT:    [[GEP_1_1:%.*]] = getelementptr inbounds double, double* [[ARRAY1]], i64 1
> -; SSE-NEXT:    [[TMP0:%.*]] = bitcast double* [[GEP_1_0]] to <2 x double>*
> -; SSE-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
> -; SSE-NEXT:    [[GEP_2_0:%.*]] = getelementptr inbounds double, double* [[ARRAY2:%.*]], i64 0
> -; SSE-NEXT:    [[GEP_2_1:%.*]] = getelementptr inbounds double, double* [[ARRAY2]], i64 1
> -; SSE-NEXT:    [[TMP2:%.*]] = bitcast double* [[GEP_2_0]] to <2 x double>*
> -; SSE-NEXT:    [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8
> -; SSE-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
> -; SSE-NEXT:    [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE]]
> -; SSE-NEXT:    [[TMP5:%.*]] = extractelement <2 x double> [[SHUFFLE]], i32 1
> -; SSE-NEXT:    [[TMP6:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i32 0
> -; SSE-NEXT:    [[TMP7:%.*]] = extractelement <2 x double> [[SHUFFLE]], i32 0
> -; SSE-NEXT:    [[TMP8:%.*]] = insertelement <2 x double> [[TMP6]], double [[TMP7]], i32 1
> -; SSE-NEXT:    [[TMP9:%.*]] = fmul <2 x double> [[TMP1]], [[TMP8]]
> -; SSE-NEXT:    [[TMP10:%.*]] = fadd <2 x double> [[TMP4]], [[TMP9]]
> -; SSE-NEXT:    [[TMP11:%.*]] = extractelement <2 x double> [[TMP10]], i32 0
> -; SSE-NEXT:    [[TMP12:%.*]] = extractelement <2 x double> [[TMP10]], i32 1
> -; SSE-NEXT:    [[ADD3:%.*]] = fadd double [[TMP11]], [[TMP12]]
> -; SSE-NEXT:    ret double [[ADD3]]
> -;
> -; AVX-LABEL: @splat_loads(
> -; AVX-NEXT:  entry:
> -; AVX-NEXT:    [[GEP_1_0:%.*]] = getelementptr inbounds double, double* [[ARRAY1:%.*]], i64 0
> -; AVX-NEXT:    [[GEP_1_1:%.*]] = getelementptr inbounds double, double* [[ARRAY1]], i64 1
> -; AVX-NEXT:    [[TMP0:%.*]] = bitcast double* [[GEP_1_0]] to <2 x double>*
> -; AVX-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
> -; AVX-NEXT:    [[GEP_2_0:%.*]] = getelementptr inbounds double, double* [[ARRAY2:%.*]], i64 0
> -; AVX-NEXT:    [[GEP_2_1:%.*]] = getelementptr inbounds double, double* [[ARRAY2]], i64 1
> -; AVX-NEXT:    [[LD_2_0:%.*]] = load double, double* [[GEP_2_0]], align 8
> -; AVX-NEXT:    [[LD_2_1:%.*]] = load double, double* [[GEP_2_1]], align 8
> -; AVX-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> poison, double [[LD_2_0]], i32 0
> -; AVX-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[LD_2_0]], i32 1
> -; AVX-NEXT:    [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]]
> -; AVX-NEXT:    [[TMP5:%.*]] = insertelement <2 x double> poison, double [[LD_2_1]], i32 0
> -; AVX-NEXT:    [[TMP6:%.*]] = insertelement <2 x double> [[TMP5]], double [[LD_2_1]], i32 1
> -; AVX-NEXT:    [[TMP7:%.*]] = fmul <2 x double> [[TMP1]], [[TMP6]]
> -; AVX-NEXT:    [[TMP8:%.*]] = fadd <2 x double> [[TMP4]], [[TMP7]]
> -; AVX-NEXT:    [[TMP9:%.*]] = extractelement <2 x double> [[TMP8]], i32 0
> -; AVX-NEXT:    [[TMP10:%.*]] = extractelement <2 x double> [[TMP8]], i32 1
> -; AVX-NEXT:    [[ADD3:%.*]] = fadd double [[TMP9]], [[TMP10]]
> -; AVX-NEXT:    ret double [[ADD3]]
> +; CHECK-LABEL: @splat_loads(
> +; CHECK-NEXT:  entry:
> +; CHECK-NEXT:    [[GEP_1_0:%.*]] = getelementptr inbounds double, double* [[ARRAY1:%.*]], i64 0
> +; CHECK-NEXT:    [[GEP_1_1:%.*]] = getelementptr inbounds double, double* [[ARRAY1]], i64 1
> +; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double* [[GEP_1_0]] to <2 x double>*
> +; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
> +; CHECK-NEXT:    [[GEP_2_0:%.*]] = getelementptr inbounds double, double* [[ARRAY2:%.*]], i64 0
> +; CHECK-NEXT:    [[GEP_2_1:%.*]] = getelementptr inbounds double, double* [[ARRAY2]], i64 1
> +; CHECK-NEXT:    [[TMP2:%.*]] = bitcast double* [[GEP_2_0]] to <2 x double>*
> +; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8
> +; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
> +; CHECK-NEXT:    [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE]]
> +; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x double> [[SHUFFLE]], i32 1
> +; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i32 0
> +; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x double> [[SHUFFLE]], i32 0
> +; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <2 x double> [[TMP6]], double [[TMP7]], i32 1
> +; CHECK-NEXT:    [[TMP9:%.*]] = fmul <2 x double> [[TMP1]], [[TMP8]]
> +; CHECK-NEXT:    [[TMP10:%.*]] = fadd <2 x double> [[TMP4]], [[TMP9]]
> +; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <2 x double> [[TMP10]], i32 0
> +; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <2 x double> [[TMP10]], i32 1
> +; CHECK-NEXT:    [[ADD3:%.*]] = fadd double [[TMP11]], [[TMP12]]
> +; CHECK-NEXT:    ret double [[ADD3]]
>   ;
>   entry:
>     %gep_1_0 = getelementptr inbounds double, double* %array1, i64 0
>
> diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/operandorder.ll b/llvm/test/Transforms/SLPVectorizer/X86/operandorder.ll
> index 487751877f880..51d564f0bfacc 100644
> --- a/llvm/test/Transforms/SLPVectorizer/X86/operandorder.ll
> +++ b/llvm/test/Transforms/SLPVectorizer/X86/operandorder.ll
> @@ -253,16 +253,13 @@ define void @vecload_vs_broadcast4(double * noalias %from, double * noalias %to,
>   ; CHECK-NEXT:    br label [[LP:%.*]]
>   ; CHECK:       lp:
>   ; CHECK-NEXT:    [[P:%.*]] = phi double [ 1.000000e+00, [[LP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
> -; CHECK-NEXT:    [[FROM_1:%.*]] = getelementptr double, double* [[FROM:%.*]], i32 1
> -; CHECK-NEXT:    [[V0_1:%.*]] = load double, double* [[FROM]], align 4
> -; CHECK-NEXT:    [[V0_2:%.*]] = load double, double* [[FROM_1]], align 4
> -; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x double> poison, double [[V0_2]], i64 0
> -; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[P]], i64 1
> -; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> poison, double [[V0_1]], i64 0
> -; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> zeroinitializer
> -; CHECK-NEXT:    [[TMP4:%.*]] = fadd <2 x double> [[TMP1]], [[TMP3]]
> -; CHECK-NEXT:    [[TMP5:%.*]] = bitcast double* [[TO:%.*]] to <2 x double>*
> -; CHECK-NEXT:    store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 4
> +; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double* [[FROM:%.*]] to <2 x double>*
> +; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 4
> +; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
> +; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[P]], i64 1
> +; CHECK-NEXT:    [[TMP3:%.*]] = fadd <2 x double> [[TMP2]], [[SHUFFLE]]
> +; CHECK-NEXT:    [[TMP4:%.*]] = bitcast double* [[TO:%.*]] to <2 x double>*
> +; CHECK-NEXT:    store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 4
>   ; CHECK-NEXT:    br i1 undef, label [[LP]], label [[EXT:%.*]]
>   ; CHECK:       ext:
>   ; CHECK-NEXT:    ret void
> @@ -309,16 +306,13 @@ define void @shuffle_nodes_match2(double * noalias %from, double * noalias %to,
>   ; CHECK-NEXT:    br label [[LP:%.*]]
>   ; CHECK:       lp:
>   ; CHECK-NEXT:    [[P:%.*]] = phi double [ 1.000000e+00, [[LP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
> -; CHECK-NEXT:    [[FROM_1:%.*]] = getelementptr double, double* [[FROM:%.*]], i32 1
> -; CHECK-NEXT:    [[V0_1:%.*]] = load double, double* [[FROM]], align 4
> -; CHECK-NEXT:    [[V0_2:%.*]] = load double, double* [[FROM_1]], align 4
> -; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x double> poison, double [[V0_1]], i64 0
> -; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> zeroinitializer
> -; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> poison, double [[V0_2]], i64 0
> -; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[P]], i64 1
> -; CHECK-NEXT:    [[TMP4:%.*]] = fadd <2 x double> [[TMP1]], [[TMP3]]
> -; CHECK-NEXT:    [[TMP5:%.*]] = bitcast double* [[TO:%.*]] to <2 x double>*
> -; CHECK-NEXT:    store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 4
> +; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double* [[FROM:%.*]] to <2 x double>*
> +; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 4
> +; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
> +; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[P]], i64 1
> +; CHECK-NEXT:    [[TMP3:%.*]] = fadd <2 x double> [[SHUFFLE]], [[TMP2]]
> +; CHECK-NEXT:    [[TMP4:%.*]] = bitcast double* [[TO:%.*]] to <2 x double>*
> +; CHECK-NEXT:    store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 4
>   ; CHECK-NEXT:    br i1 undef, label [[LP]], label [[EXT:%.*]]
>   ; CHECK:       ext:
>   ; CHECK-NEXT:    ret void
>
>
>          
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits