[PATCH] D52548: Stop instcombining propagating wider shufflevector arguments to predecessors.

Jan Vesely via llvm-commits llvm-commits at lists.llvm.org
Sat Sep 29 06:52:46 PDT 2018


On Fri, 2018-09-28 at 15:26 +0000, Sanjay Patel via Phabricator via
llvm-commits wrote:
> This revision was automatically updated to reflect the committed changes.
> Closed by commit rL343329: [InstCombine] don't propagate wider shufflevector arguments to predecessors (authored by spatel, committed by ).
> 
> Changed prior to commit:
>   https://reviews.llvm.org/D52548?vs=167472&id=167481#toc
> 
> Repository:
>   rL LLVM
> 
> https://reviews.llvm.org/D52548
> 
> Files:
>   llvm/trunk/include/llvm/IR/Instructions.h
>   llvm/trunk/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
>   llvm/trunk/test/Transforms/InstCombine/vec_shuffle.ll
> 
> 
> Index: llvm/trunk/test/Transforms/InstCombine/vec_shuffle.ll
> ===================================================================
> --- llvm/trunk/test/Transforms/InstCombine/vec_shuffle.ll
> +++ llvm/trunk/test/Transforms/InstCombine/vec_shuffle.ll
> @@ -184,27 +184,32 @@
>    ret <2 x i8> %D
>  }
>  
> -; TODO: Increasing length of vector ops is not a good canonicalization.
> - 
> +; Increasing length of vector ops is not a good canonicalization.
> +
>  define <3 x i32> @add_wider(i32 %y, i32 %z) {
> -; CHECK-LABEL: @add(
> -; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <3 x i32> undef, i32 [[Y:%.*]], i32 0
> -; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <3 x i32> [[TMP1]], i32 [[Z:%.*]], i32 1
> -; CHECK-NEXT:    [[TMP3:%.*]] = add <3 x i32> [[TMP2]], <i32 255, i32 255, i32 undef>
> -; CHECK-NEXT:    ret <3 x i32> [[TMP3]]
> +; CHECK-LABEL: @add_wider(
> +; CHECK-NEXT:    [[I0:%.*]] = insertelement <2 x i32> undef, i32 [[Y:%.*]], i32 0
> +; CHECK-NEXT:    [[I1:%.*]] = insertelement <2 x i32> [[I0]], i32 [[Z:%.*]], i32 1
> +; CHECK-NEXT:    [[A:%.*]] = add <2 x i32> [[I1]], <i32 255, i32 255>
> +; CHECK-NEXT:    [[EXT:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> undef, <3 x i32> <i32 0, i32 1, i32 undef>
> +; CHECK-NEXT:    ret <3 x i32> [[EXT]]
>  ;
>    %i0 = insertelement <2 x i32> undef, i32 %y, i32 0
>    %i1 = insertelement <2 x i32> %i0, i32 %z, i32 1
>    %a = add <2 x i32> %i1, <i32 255, i32 255>
>    %ext = shufflevector <2 x i32> %a, <2 x i32> undef, <3 x i32> <i32 0, i32 1, i32 undef>
>    ret <3 x i32> %ext
>  }
>  
> -; FIXME: Increasing length of vector ops must be safe from illegal undef propagation.
> +; Increasing length of vector ops must be safe from illegal undef propagation.
>  
>  define <3 x i32> @div_wider(i32 %y, i32 %z) {
> -; CHECK-LABEL: @div(
> -; CHECK-NEXT:    ret <3 x i32> undef
> +; CHECK-LABEL: @div_wider(
> +; CHECK-NEXT:    [[I0:%.*]] = insertelement <2 x i32> undef, i32 [[Y:%.*]], i32 0
> +; CHECK-NEXT:    [[I1:%.*]] = insertelement <2 x i32> [[I0]], i32 [[Z:%.*]], i32 1
> +; CHECK-NEXT:    [[A:%.*]] = sdiv <2 x i32> [[I1]], <i32 255, i32 255>
> +; CHECK-NEXT:    [[EXT:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> undef, <3 x i32> <i32 0, i32 1, i32 undef>
> +; CHECK-NEXT:    ret <3 x i32> [[EXT]]
>  ;
>    %i0 = insertelement <2 x i32> undef, i32 %y, i32 0
>    %i1 = insertelement <2 x i32> %i0, i32 %z, i32 1
> Index: llvm/trunk/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
> ===================================================================
> --- llvm/trunk/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
> +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
> @@ -1464,7 +1464,8 @@
>      if (isRHSID) return replaceInstUsesWith(SVI, RHS);
>    }
>  
> -  if (isa<UndefValue>(RHS) && CanEvaluateShuffled(LHS, Mask)) {
> +  if (isa<UndefValue>(RHS) && !SVI.increasesLength() &&
> +      CanEvaluateShuffled(LHS, Mask)) {
>      Value *V = EvaluateInDifferentElementOrder(LHS, Mask);
>      return replaceInstUsesWith(SVI, V);
>    }
> Index: llvm/trunk/include/llvm/IR/Instructions.h
> ===================================================================
> --- llvm/trunk/include/llvm/IR/Instructions.h
> +++ llvm/trunk/include/llvm/IR/Instructions.h
> @@ -2457,13 +2457,23 @@
>  
>    /// Return true if this shuffle returns a vector with a different number of
>    /// elements than its source vectors.
> -  /// Example: shufflevector <4 x n> A, <4 x n> B, <1,2>
> +  /// Examples: shufflevector <4 x n> A, <4 x n> B, <1,2,3>
> +  ///           shufflevector <4 x n> A, <4 x n> B, <1,2,3,4,5>
>    bool changesLength() const {
>      unsigned NumSourceElts = Op<0>()->getType()->getVectorNumElements();
>      unsigned NumMaskElts = getMask()->getType()->getVectorNumElements();
>      return NumSourceElts != NumMaskElts;
>    }
>  
> +  /// Return true if this shuffle returns a vector with a greater number of
> +  /// elements than its source vectors.
> +  /// Example: shufflevector <2 x n> A, <2 x n> B, <1,2,3>
> +  bool increasesLength() const {
> +    unsigned NumSourceElts = Op<0>()->getType()->getVectorNumElements();
> +    unsigned NumMaskElts = getMask()->getType()->getVectorNumElements();
> +    return NumSourceElts < NumMaskElts;
> +  }
> +
>    /// Return true if this shuffle mask chooses elements from exactly one source
>    /// vector.
>    /// Example: <7,5,undef,7>

Hi,

this change introduces failures when loading/storing char3 vectors in
OpenCL (amdgcn backend):
llc: /home/orome/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp:4159:
llvm::SDValue llvm::SelectionDAG::getNode(unsigned int, const
llvm::SDLoc&, llvm::EVT, llvm::SDValue, llvm::SDNodeFlags): Assertion
`VT.getSizeInBits() == Operand.getValueSizeInBits() && "Cannot BITCAST
between types of different sizes!"' failed.
Stack dump:
0.	Program arguments: /home/orome/.local/bin/llc -march=amdgcn 
1.	Running pass 'CallGraph Pass Manager' on module '<stdin>'.
2.	Running pass 'AMDGPU DAG->DAG Pattern Instruction Selection'
on function '@vload3_constant'

the patch changes how char3 vector is constructed. instead of starting
with <4xi8> (edited):

  %3 = insertelement <4 x i8> undef, i8 %2, i32 0
  %5 = insertelement <4 x i8> %3, i8 %4, i32 1
  %7 = insertelement <4 x i8> %5, i8 %6, i32 2
  store <4 x i8> %7, <4 x i8> addrspace(1)* %storetmp, align 4, !tbaa !11

it creates <3 x i8>, which is then shuffled into another <3 x i8>, which is then bitcast to <4 x i8>:

  %vecinit.i = insertelement <3 x i8> undef, i8 %2, i32 0
  %vecinit1.i = insertelement <3 x i8> %vecinit.i, i8 %3, i32 1
  %vecinit4.i = insertelement <3 x i8> %vecinit1.i, i8 %4, i32 2

  %extractVec = shufflevector <3 x i8> %vecinit4.i, <3 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
  ^^^ the shuffle result should probably be <4 x i8> ????

  %storetmp = bitcast <3 x i8> addrspace(1)* %out to <4 x i8> addrspace(1)*
  ^^^ This one fails
  store <4 x i8> %extractVec, <4 x i8> addrspace(1)* %storetmp, align 4, !tbaa !11


I've attached the original .cl file (it will also need libclc to
compile) and both the good and bad .ll processed files.
Just running 
'llc -march=amdgcn < bad.link-0.ll'
reproduces the above assertion failure.
The problem appears only with char3 and not other 3 element vector
types nor vectors of different size.

thanks,
Jan

> 
> 
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits

-- 
Jan Vesely <jan.vesely at rutgers.edu>
-------------- next part --------------
; ModuleID = 'link'
source_filename = "link"
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
target triple = "amdgcn-mesa-mesa3d"

; Function Attrs: norecurse nounwind
define amdgpu_kernel void @vload2_constant(i8 addrspace(4)* nocapture readonly %in, <2 x i8> addrspace(1)* nocapture %out) local_unnamed_addr #0 !kernel_arg_addr_space !6 !kernel_arg_access_qual !7 !kernel_arg_type !8 !kernel_arg_base_type !9 !kernel_arg_type_qual !10 {
entry:
  %0 = bitcast i8 addrspace(4)* %in to <2 x i8> addrspace(4)*
  %1 = load <2 x i8>, <2 x i8> addrspace(4)* %0, align 1, !tbaa !11
  store <2 x i8> %1, <2 x i8> addrspace(1)* %out, align 2, !tbaa !11
  %add.ptr = getelementptr inbounds i8, i8 addrspace(4)* %in, i64 1
  %2 = bitcast i8 addrspace(4)* %add.ptr to <2 x i8> addrspace(4)*
  %3 = load <2 x i8>, <2 x i8> addrspace(4)* %2, align 1, !tbaa !11
  %arrayidx2 = getelementptr inbounds <2 x i8>, <2 x i8> addrspace(1)* %out, i64 1
  store <2 x i8> %3, <2 x i8> addrspace(1)* %arrayidx2, align 2, !tbaa !11
  ret void
}

; Function Attrs: norecurse nounwind
define amdgpu_kernel void @vload2_constant_offset(i8 addrspace(4)* nocapture readonly %in, <2 x i8> addrspace(1)* nocapture %out) local_unnamed_addr #0 !kernel_arg_addr_space !6 !kernel_arg_access_qual !7 !kernel_arg_type !8 !kernel_arg_base_type !9 !kernel_arg_type_qual !10 {
entry:
  %arrayidx.i = getelementptr inbounds i8, i8 addrspace(4)* %in, i64 2
  %0 = bitcast i8 addrspace(4)* %arrayidx.i to <2 x i8> addrspace(4)*
  %1 = load <2 x i8>, <2 x i8> addrspace(4)* %0, align 1, !tbaa !11
  store <2 x i8> %1, <2 x i8> addrspace(1)* %out, align 2, !tbaa !11
  %arrayidx.i5 = getelementptr inbounds i8, i8 addrspace(4)* %in, i64 3
  %2 = bitcast i8 addrspace(4)* %arrayidx.i5 to <2 x i8> addrspace(4)*
  %3 = load <2 x i8>, <2 x i8> addrspace(4)* %2, align 1, !tbaa !11
  %arrayidx2 = getelementptr inbounds <2 x i8>, <2 x i8> addrspace(1)* %out, i64 1
  store <2 x i8> %3, <2 x i8> addrspace(1)* %arrayidx2, align 2, !tbaa !11
  ret void
}

; Function Attrs: norecurse nounwind
define amdgpu_kernel void @vload3_constant(i8 addrspace(4)* nocapture readonly %in, <3 x i8> addrspace(1)* nocapture %out) local_unnamed_addr #0 !kernel_arg_addr_space !6 !kernel_arg_access_qual !7 !kernel_arg_type !14 !kernel_arg_base_type !15 !kernel_arg_type_qual !10 {
entry:
  %0 = bitcast i8 addrspace(4)* %in to <2 x i8> addrspace(4)*
  %1 = load <2 x i8>, <2 x i8> addrspace(4)* %0, align 1, !tbaa !11
  %2 = extractelement <2 x i8> %1, i64 0
  %vecinit.i = insertelement <3 x i8> undef, i8 %2, i32 0
  %3 = extractelement <2 x i8> %1, i64 1
  %vecinit1.i = insertelement <3 x i8> %vecinit.i, i8 %3, i32 1
  %arrayidx3.i = getelementptr inbounds i8, i8 addrspace(4)* %in, i64 2
  %4 = load i8, i8 addrspace(4)* %arrayidx3.i, align 1, !tbaa !11
  %vecinit4.i = insertelement <3 x i8> %vecinit1.i, i8 %4, i32 2
  %extractVec = shufflevector <3 x i8> %vecinit4.i, <3 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
  %storetmp = bitcast <3 x i8> addrspace(1)* %out to <4 x i8> addrspace(1)*
  store <4 x i8> %extractVec, <4 x i8> addrspace(1)* %storetmp, align 4, !tbaa !11
  %add.ptr = getelementptr inbounds i8, i8 addrspace(4)* %in, i64 1
  %5 = bitcast i8 addrspace(4)* %add.ptr to <2 x i8> addrspace(4)*
  %6 = load <2 x i8>, <2 x i8> addrspace(4)* %5, align 1, !tbaa !11
  %7 = extractelement <2 x i8> %6, i64 0
  %vecinit.i7 = insertelement <3 x i8> undef, i8 %7, i32 0
  %8 = extractelement <2 x i8> %6, i64 1
  %vecinit1.i8 = insertelement <3 x i8> %vecinit.i7, i8 %8, i32 1
  %arrayidx3.i9 = getelementptr inbounds i8, i8 addrspace(4)* %in, i64 3
  %9 = load i8, i8 addrspace(4)* %arrayidx3.i9, align 1, !tbaa !11
  %vecinit4.i10 = insertelement <3 x i8> %vecinit1.i8, i8 %9, i32 2
  %arrayidx2 = getelementptr inbounds <3 x i8>, <3 x i8> addrspace(1)* %out, i64 1
  %extractVec3 = shufflevector <3 x i8> %vecinit4.i10, <3 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
  %storetmp4 = bitcast <3 x i8> addrspace(1)* %arrayidx2 to <4 x i8> addrspace(1)*
  store <4 x i8> %extractVec3, <4 x i8> addrspace(1)* %storetmp4, align 4, !tbaa !11
  ret void
}

; Function Attrs: norecurse nounwind
define amdgpu_kernel void @vload3_constant_offset(i8 addrspace(4)* nocapture readonly %in, <3 x i8> addrspace(1)* nocapture %out) local_unnamed_addr #0 !kernel_arg_addr_space !6 !kernel_arg_access_qual !7 !kernel_arg_type !14 !kernel_arg_base_type !15 !kernel_arg_type_qual !10 {
entry:
  %arrayidx.i = getelementptr inbounds i8, i8 addrspace(4)* %in, i64 3
  %0 = bitcast i8 addrspace(4)* %arrayidx.i to <2 x i8> addrspace(4)*
  %1 = load <2 x i8>, <2 x i8> addrspace(4)* %0, align 1, !tbaa !11
  %2 = extractelement <2 x i8> %1, i64 0
  %vecinit.i = insertelement <3 x i8> undef, i8 %2, i32 0
  %3 = extractelement <2 x i8> %1, i64 1
  %vecinit1.i = insertelement <3 x i8> %vecinit.i, i8 %3, i32 1
  %arrayidx3.i = getelementptr inbounds i8, i8 addrspace(4)* %in, i64 5
  %4 = load i8, i8 addrspace(4)* %arrayidx3.i, align 1, !tbaa !11
  %vecinit4.i = insertelement <3 x i8> %vecinit1.i, i8 %4, i32 2
  %extractVec = shufflevector <3 x i8> %vecinit4.i, <3 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
  %storetmp = bitcast <3 x i8> addrspace(1)* %out to <4 x i8> addrspace(1)*
  store <4 x i8> %extractVec, <4 x i8> addrspace(1)* %storetmp, align 4, !tbaa !11
  %arrayidx.i7 = getelementptr inbounds i8, i8 addrspace(4)* %in, i64 4
  %5 = bitcast i8 addrspace(4)* %arrayidx.i7 to <2 x i8> addrspace(4)*
  %6 = load <2 x i8>, <2 x i8> addrspace(4)* %5, align 1, !tbaa !11
  %7 = extractelement <2 x i8> %6, i64 0
  %vecinit.i8 = insertelement <3 x i8> undef, i8 %7, i32 0
  %8 = extractelement <2 x i8> %6, i64 1
  %vecinit1.i9 = insertelement <3 x i8> %vecinit.i8, i8 %8, i32 1
  %arrayidx3.i10 = getelementptr inbounds i8, i8 addrspace(4)* %in, i64 6
  %9 = load i8, i8 addrspace(4)* %arrayidx3.i10, align 1, !tbaa !11
  %vecinit4.i11 = insertelement <3 x i8> %vecinit1.i9, i8 %9, i32 2
  %arrayidx2 = getelementptr inbounds <3 x i8>, <3 x i8> addrspace(1)* %out, i64 1
  %extractVec3 = shufflevector <3 x i8> %vecinit4.i11, <3 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
  %storetmp4 = bitcast <3 x i8> addrspace(1)* %arrayidx2 to <4 x i8> addrspace(1)*
  store <4 x i8> %extractVec3, <4 x i8> addrspace(1)* %storetmp4, align 4, !tbaa !11
  ret void
}

; Function Attrs: norecurse nounwind
define amdgpu_kernel void @vload4_constant(i8 addrspace(4)* nocapture readonly %in, <4 x i8> addrspace(1)* nocapture %out) local_unnamed_addr #0 !kernel_arg_addr_space !6 !kernel_arg_access_qual !7 !kernel_arg_type !16 !kernel_arg_base_type !17 !kernel_arg_type_qual !10 {
entry:
  %0 = bitcast i8 addrspace(4)* %in to i32 addrspace(4)*
  %1 = load i32, i32 addrspace(4)* %0, align 1, !tbaa !11
  %2 = bitcast <4 x i8> addrspace(1)* %out to i32 addrspace(1)*
  store i32 %1, i32 addrspace(1)* %2, align 4, !tbaa !11
  %add.ptr = getelementptr inbounds i8, i8 addrspace(4)* %in, i64 1
  %3 = bitcast i8 addrspace(4)* %add.ptr to i32 addrspace(4)*
  %4 = load i32, i32 addrspace(4)* %3, align 1, !tbaa !11
  %arrayidx2 = getelementptr inbounds <4 x i8>, <4 x i8> addrspace(1)* %out, i64 1
  %5 = bitcast <4 x i8> addrspace(1)* %arrayidx2 to i32 addrspace(1)*
  store i32 %4, i32 addrspace(1)* %5, align 4, !tbaa !11
  ret void
}

; Function Attrs: norecurse nounwind
define amdgpu_kernel void @vload4_constant_offset(i8 addrspace(4)* nocapture readonly %in, <4 x i8> addrspace(1)* nocapture %out) local_unnamed_addr #0 !kernel_arg_addr_space !6 !kernel_arg_access_qual !7 !kernel_arg_type !16 !kernel_arg_base_type !17 !kernel_arg_type_qual !10 {
entry:
  %arrayidx.i = getelementptr inbounds i8, i8 addrspace(4)* %in, i64 4
  %0 = bitcast i8 addrspace(4)* %arrayidx.i to i32 addrspace(4)*
  %1 = load i32, i32 addrspace(4)* %0, align 1, !tbaa !11
  %2 = bitcast <4 x i8> addrspace(1)* %out to i32 addrspace(1)*
  store i32 %1, i32 addrspace(1)* %2, align 4, !tbaa !11
  %arrayidx.i5 = getelementptr inbounds i8, i8 addrspace(4)* %in, i64 5
  %3 = bitcast i8 addrspace(4)* %arrayidx.i5 to i32 addrspace(4)*
  %4 = load i32, i32 addrspace(4)* %3, align 1, !tbaa !11
  %arrayidx2 = getelementptr inbounds <4 x i8>, <4 x i8> addrspace(1)* %out, i64 1
  %5 = bitcast <4 x i8> addrspace(1)* %arrayidx2 to i32 addrspace(1)*
  store i32 %4, i32 addrspace(1)* %5, align 4, !tbaa !11
  ret void
}

; Function Attrs: norecurse nounwind
define amdgpu_kernel void @vload8_constant(i8 addrspace(4)* nocapture readonly %in, <8 x i8> addrspace(1)* nocapture %out) local_unnamed_addr #0 !kernel_arg_addr_space !6 !kernel_arg_access_qual !7 !kernel_arg_type !18 !kernel_arg_base_type !19 !kernel_arg_type_qual !10 {
entry:
  %0 = bitcast i8 addrspace(4)* %in to i64 addrspace(4)*
  %1 = load i64, i64 addrspace(4)* %0, align 1, !tbaa !11
  %2 = bitcast <8 x i8> addrspace(1)* %out to i64 addrspace(1)*
  store i64 %1, i64 addrspace(1)* %2, align 8, !tbaa !11
  %add.ptr = getelementptr inbounds i8, i8 addrspace(4)* %in, i64 1
  %3 = bitcast i8 addrspace(4)* %add.ptr to i64 addrspace(4)*
  %4 = load i64, i64 addrspace(4)* %3, align 1, !tbaa !11
  %arrayidx2 = getelementptr inbounds <8 x i8>, <8 x i8> addrspace(1)* %out, i64 1
  %5 = bitcast <8 x i8> addrspace(1)* %arrayidx2 to i64 addrspace(1)*
  store i64 %4, i64 addrspace(1)* %5, align 8, !tbaa !11
  ret void
}

; Function Attrs: norecurse nounwind
define amdgpu_kernel void @vload8_constant_offset(i8 addrspace(4)* nocapture readonly %in, <8 x i8> addrspace(1)* nocapture %out) local_unnamed_addr #0 !kernel_arg_addr_space !6 !kernel_arg_access_qual !7 !kernel_arg_type !18 !kernel_arg_base_type !19 !kernel_arg_type_qual !10 {
entry:
  %arrayidx.i = getelementptr inbounds i8, i8 addrspace(4)* %in, i64 8
  %0 = bitcast i8 addrspace(4)* %arrayidx.i to i64 addrspace(4)*
  %1 = load i64, i64 addrspace(4)* %0, align 1, !tbaa !11
  %2 = bitcast <8 x i8> addrspace(1)* %out to i64 addrspace(1)*
  store i64 %1, i64 addrspace(1)* %2, align 8, !tbaa !11
  %arrayidx.i5 = getelementptr inbounds i8, i8 addrspace(4)* %in, i64 9
  %3 = bitcast i8 addrspace(4)* %arrayidx.i5 to i64 addrspace(4)*
  %4 = load i64, i64 addrspace(4)* %3, align 1, !tbaa !11
  %arrayidx2 = getelementptr inbounds <8 x i8>, <8 x i8> addrspace(1)* %out, i64 1
  %5 = bitcast <8 x i8> addrspace(1)* %arrayidx2 to i64 addrspace(1)*
  store i64 %4, i64 addrspace(1)* %5, align 8, !tbaa !11
  ret void
}

; Function Attrs: norecurse nounwind
define amdgpu_kernel void @vload16_constant(i8 addrspace(4)* nocapture readonly %in, <16 x i8> addrspace(1)* nocapture %out) local_unnamed_addr #0 !kernel_arg_addr_space !6 !kernel_arg_access_qual !7 !kernel_arg_type !20 !kernel_arg_base_type !21 !kernel_arg_type_qual !10 {
entry:
  %0 = bitcast i8 addrspace(4)* %in to <16 x i8> addrspace(4)*
  %1 = load <16 x i8>, <16 x i8> addrspace(4)* %0, align 1, !tbaa !11
  store <16 x i8> %1, <16 x i8> addrspace(1)* %out, align 16, !tbaa !11
  %add.ptr = getelementptr inbounds i8, i8 addrspace(4)* %in, i64 1
  %2 = bitcast i8 addrspace(4)* %add.ptr to <16 x i8> addrspace(4)*
  %3 = load <16 x i8>, <16 x i8> addrspace(4)* %2, align 1, !tbaa !11
  %arrayidx2 = getelementptr inbounds <16 x i8>, <16 x i8> addrspace(1)* %out, i64 1
  store <16 x i8> %3, <16 x i8> addrspace(1)* %arrayidx2, align 16, !tbaa !11
  ret void
}

; Function Attrs: norecurse nounwind
define amdgpu_kernel void @vload16_constant_offset(i8 addrspace(4)* nocapture readonly %in, <16 x i8> addrspace(1)* nocapture %out) local_unnamed_addr #0 !kernel_arg_addr_space !6 !kernel_arg_access_qual !7 !kernel_arg_type !20 !kernel_arg_base_type !21 !kernel_arg_type_qual !10 {
entry:
  %arrayidx.i = getelementptr inbounds i8, i8 addrspace(4)* %in, i64 16
  %0 = bitcast i8 addrspace(4)* %arrayidx.i to <16 x i8> addrspace(4)*
  %1 = load <16 x i8>, <16 x i8> addrspace(4)* %0, align 1, !tbaa !11
  store <16 x i8> %1, <16 x i8> addrspace(1)* %out, align 16, !tbaa !11
  %arrayidx.i5 = getelementptr inbounds i8, i8 addrspace(4)* %in, i64 17
  %2 = bitcast i8 addrspace(4)* %arrayidx.i5 to <16 x i8> addrspace(4)*
  %3 = load <16 x i8>, <16 x i8> addrspace(4)* %2, align 1, !tbaa !11
  %arrayidx2 = getelementptr inbounds <16 x i8>, <16 x i8> addrspace(1)* %out, i64 1
  store <16 x i8> %3, <16 x i8> addrspace(1)* %arrayidx2, align 16, !tbaa !11
  ret void
}

attributes #0 = { norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="gfx902" "target-features"="+16-bit-insts,+ci-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx9-insts,+s-memrealtime,+vi-insts" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" }

!opencl.ocl.version = !{!0}
!llvm.ident = !{!1, !2, !3}
!llvm.module.flags = !{!4, !5}

!0 = !{i32 1, i32 1}
!1 = !{!"clang version 8.0.0 (https://git.llvm.org/git/clang.git 40ec5c4d12536fe608d41b4e7b8b6791c60d5e36) (https://git.llvm.org/git/llvm.git 331a5ec71329eb0a6d46104ff00af89598f4292a)"}
!2 = !{!"clang version 8.0.0 (https://git.llvm.org/git/clang.git 0a8a76da18b28bea9500c5f4539fe9c65075b4ac) (https://git.llvm.org/git/llvm.git a67d12007b6f06d9a47d21f545e199789b1652a2)"}
!3 = !{!"clang version 7.0.0 (https://git.llvm.org/git/clang.git 261aad80c8b0592f57fbda1244a44295ad2bfeb3) (https://git.llvm.org/git/llvm.git 639a95afd42e0a525108992263b94d0def6fdca7)"}
!4 = !{i32 1, !"wchar_size", i32 4}
!5 = !{i32 7, !"PIC Level", i32 1}
!6 = !{i32 2, i32 1}
!7 = !{!"none", !"none"}
!8 = !{!"char*", !"char2*"}
!9 = !{!"char*", !"char __attribute__((ext_vector_type(2)))*"}
!10 = !{!"const", !""}
!11 = !{!12, !12, i64 0}
!12 = !{!"omnipotent char", !13, i64 0}
!13 = !{!"Simple C/C++ TBAA"}
!14 = !{!"char*", !"char3*"}
!15 = !{!"char*", !"char __attribute__((ext_vector_type(3)))*"}
!16 = !{!"char*", !"char4*"}
!17 = !{!"char*", !"char __attribute__((ext_vector_type(4)))*"}
!18 = !{!"char*", !"char8*"}
!19 = !{!"char*", !"char __attribute__((ext_vector_type(8)))*"}
!20 = !{!"char*", !"char16*"}
!21 = !{!"char*", !"char __attribute__((ext_vector_type(16)))*"}
-------------- next part --------------
; ModuleID = 'link'
source_filename = "link"
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
target triple = "amdgcn-mesa-mesa3d"

; Function Attrs: norecurse nounwind
define amdgpu_kernel void @vload2_constant(i8 addrspace(4)* nocapture readonly %in, <2 x i8> addrspace(1)* nocapture %out) local_unnamed_addr #0 !kernel_arg_addr_space !6 !kernel_arg_access_qual !7 !kernel_arg_type !8 !kernel_arg_base_type !9 !kernel_arg_type_qual !10 {
entry:
  %0 = bitcast i8 addrspace(4)* %in to <2 x i8> addrspace(4)*
  %1 = load <2 x i8>, <2 x i8> addrspace(4)* %0, align 1, !tbaa !11
  store <2 x i8> %1, <2 x i8> addrspace(1)* %out, align 2, !tbaa !11
  %add.ptr = getelementptr inbounds i8, i8 addrspace(4)* %in, i64 1
  %2 = bitcast i8 addrspace(4)* %add.ptr to <2 x i8> addrspace(4)*
  %3 = load <2 x i8>, <2 x i8> addrspace(4)* %2, align 1, !tbaa !11
  %arrayidx2 = getelementptr inbounds <2 x i8>, <2 x i8> addrspace(1)* %out, i64 1
  store <2 x i8> %3, <2 x i8> addrspace(1)* %arrayidx2, align 2, !tbaa !11
  ret void
}

; Function Attrs: norecurse nounwind
define amdgpu_kernel void @vload2_constant_offset(i8 addrspace(4)* nocapture readonly %in, <2 x i8> addrspace(1)* nocapture %out) local_unnamed_addr #0 !kernel_arg_addr_space !6 !kernel_arg_access_qual !7 !kernel_arg_type !8 !kernel_arg_base_type !9 !kernel_arg_type_qual !10 {
entry:
  %arrayidx.i = getelementptr inbounds i8, i8 addrspace(4)* %in, i64 2
  %0 = bitcast i8 addrspace(4)* %arrayidx.i to <2 x i8> addrspace(4)*
  %1 = load <2 x i8>, <2 x i8> addrspace(4)* %0, align 1, !tbaa !11
  store <2 x i8> %1, <2 x i8> addrspace(1)* %out, align 2, !tbaa !11
  %arrayidx.i5 = getelementptr inbounds i8, i8 addrspace(4)* %in, i64 3
  %2 = bitcast i8 addrspace(4)* %arrayidx.i5 to <2 x i8> addrspace(4)*
  %3 = load <2 x i8>, <2 x i8> addrspace(4)* %2, align 1, !tbaa !11
  %arrayidx2 = getelementptr inbounds <2 x i8>, <2 x i8> addrspace(1)* %out, i64 1
  store <2 x i8> %3, <2 x i8> addrspace(1)* %arrayidx2, align 2, !tbaa !11
  ret void
}

; Function Attrs: norecurse nounwind
define amdgpu_kernel void @vload3_constant(i8 addrspace(4)* nocapture readonly %in, <3 x i8> addrspace(1)* nocapture %out) local_unnamed_addr #0 !kernel_arg_addr_space !6 !kernel_arg_access_qual !7 !kernel_arg_type !14 !kernel_arg_base_type !15 !kernel_arg_type_qual !10 {
entry:
  %0 = bitcast i8 addrspace(4)* %in to <2 x i8> addrspace(4)*
  %1 = load <2 x i8>, <2 x i8> addrspace(4)* %0, align 1, !tbaa !11
  %2 = extractelement <2 x i8> %1, i64 0
  %3 = insertelement <4 x i8> undef, i8 %2, i32 0
  %4 = extractelement <2 x i8> %1, i64 1
  %5 = insertelement <4 x i8> %3, i8 %4, i32 1
  %arrayidx3.i = getelementptr inbounds i8, i8 addrspace(4)* %in, i64 2
  %6 = load i8, i8 addrspace(4)* %arrayidx3.i, align 1, !tbaa !11
  %7 = insertelement <4 x i8> %5, i8 %6, i32 2
  %storetmp = bitcast <3 x i8> addrspace(1)* %out to <4 x i8> addrspace(1)*
  store <4 x i8> %7, <4 x i8> addrspace(1)* %storetmp, align 4, !tbaa !11
  %add.ptr = getelementptr inbounds i8, i8 addrspace(4)* %in, i64 1
  %8 = bitcast i8 addrspace(4)* %add.ptr to <2 x i8> addrspace(4)*
  %9 = load <2 x i8>, <2 x i8> addrspace(4)* %8, align 1, !tbaa !11
  %10 = extractelement <2 x i8> %9, i64 0
  %11 = insertelement <4 x i8> undef, i8 %10, i32 0
  %12 = extractelement <2 x i8> %9, i64 1
  %13 = insertelement <4 x i8> %11, i8 %12, i32 1
  %arrayidx3.i9 = getelementptr inbounds i8, i8 addrspace(4)* %in, i64 3
  %14 = load i8, i8 addrspace(4)* %arrayidx3.i9, align 1, !tbaa !11
  %15 = insertelement <4 x i8> %13, i8 %14, i32 2
  %arrayidx2 = getelementptr inbounds <3 x i8>, <3 x i8> addrspace(1)* %out, i64 1
  %storetmp4 = bitcast <3 x i8> addrspace(1)* %arrayidx2 to <4 x i8> addrspace(1)*
  store <4 x i8> %15, <4 x i8> addrspace(1)* %storetmp4, align 4, !tbaa !11
  ret void
}

; Function Attrs: norecurse nounwind
define amdgpu_kernel void @vload3_constant_offset(i8 addrspace(4)* nocapture readonly %in, <3 x i8> addrspace(1)* nocapture %out) local_unnamed_addr #0 !kernel_arg_addr_space !6 !kernel_arg_access_qual !7 !kernel_arg_type !14 !kernel_arg_base_type !15 !kernel_arg_type_qual !10 {
entry:
  %arrayidx.i = getelementptr inbounds i8, i8 addrspace(4)* %in, i64 3
  %0 = bitcast i8 addrspace(4)* %arrayidx.i to <2 x i8> addrspace(4)*
  %1 = load <2 x i8>, <2 x i8> addrspace(4)* %0, align 1, !tbaa !11
  %2 = extractelement <2 x i8> %1, i64 0
  %3 = insertelement <4 x i8> undef, i8 %2, i32 0
  %4 = extractelement <2 x i8> %1, i64 1
  %5 = insertelement <4 x i8> %3, i8 %4, i32 1
  %arrayidx3.i = getelementptr inbounds i8, i8 addrspace(4)* %in, i64 5
  %6 = load i8, i8 addrspace(4)* %arrayidx3.i, align 1, !tbaa !11
  %7 = insertelement <4 x i8> %5, i8 %6, i32 2
  %storetmp = bitcast <3 x i8> addrspace(1)* %out to <4 x i8> addrspace(1)*
  store <4 x i8> %7, <4 x i8> addrspace(1)* %storetmp, align 4, !tbaa !11
  %arrayidx.i7 = getelementptr inbounds i8, i8 addrspace(4)* %in, i64 4
  %8 = bitcast i8 addrspace(4)* %arrayidx.i7 to <2 x i8> addrspace(4)*
  %9 = load <2 x i8>, <2 x i8> addrspace(4)* %8, align 1, !tbaa !11
  %10 = extractelement <2 x i8> %9, i64 0
  %11 = insertelement <4 x i8> undef, i8 %10, i32 0
  %12 = extractelement <2 x i8> %9, i64 1
  %13 = insertelement <4 x i8> %11, i8 %12, i32 1
  %arrayidx3.i10 = getelementptr inbounds i8, i8 addrspace(4)* %in, i64 6
  %14 = load i8, i8 addrspace(4)* %arrayidx3.i10, align 1, !tbaa !11
  %15 = insertelement <4 x i8> %13, i8 %14, i32 2
  %arrayidx2 = getelementptr inbounds <3 x i8>, <3 x i8> addrspace(1)* %out, i64 1
  %storetmp4 = bitcast <3 x i8> addrspace(1)* %arrayidx2 to <4 x i8> addrspace(1)*
  store <4 x i8> %15, <4 x i8> addrspace(1)* %storetmp4, align 4, !tbaa !11
  ret void
}

; Function Attrs: norecurse nounwind
define amdgpu_kernel void @vload4_constant(i8 addrspace(4)* nocapture readonly %in, <4 x i8> addrspace(1)* nocapture %out) local_unnamed_addr #0 !kernel_arg_addr_space !6 !kernel_arg_access_qual !7 !kernel_arg_type !16 !kernel_arg_base_type !17 !kernel_arg_type_qual !10 {
entry:
  %0 = bitcast i8 addrspace(4)* %in to i32 addrspace(4)*
  %1 = load i32, i32 addrspace(4)* %0, align 1, !tbaa !11
  %2 = bitcast <4 x i8> addrspace(1)* %out to i32 addrspace(1)*
  store i32 %1, i32 addrspace(1)* %2, align 4, !tbaa !11
  %add.ptr = getelementptr inbounds i8, i8 addrspace(4)* %in, i64 1
  %3 = bitcast i8 addrspace(4)* %add.ptr to i32 addrspace(4)*
  %4 = load i32, i32 addrspace(4)* %3, align 1, !tbaa !11
  %arrayidx2 = getelementptr inbounds <4 x i8>, <4 x i8> addrspace(1)* %out, i64 1
  %5 = bitcast <4 x i8> addrspace(1)* %arrayidx2 to i32 addrspace(1)*
  store i32 %4, i32 addrspace(1)* %5, align 4, !tbaa !11
  ret void
}

; Function Attrs: norecurse nounwind
define amdgpu_kernel void @vload4_constant_offset(i8 addrspace(4)* nocapture readonly %in, <4 x i8> addrspace(1)* nocapture %out) local_unnamed_addr #0 !kernel_arg_addr_space !6 !kernel_arg_access_qual !7 !kernel_arg_type !16 !kernel_arg_base_type !17 !kernel_arg_type_qual !10 {
entry:
  %arrayidx.i = getelementptr inbounds i8, i8 addrspace(4)* %in, i64 4
  %0 = bitcast i8 addrspace(4)* %arrayidx.i to i32 addrspace(4)*
  %1 = load i32, i32 addrspace(4)* %0, align 1, !tbaa !11
  %2 = bitcast <4 x i8> addrspace(1)* %out to i32 addrspace(1)*
  store i32 %1, i32 addrspace(1)* %2, align 4, !tbaa !11
  %arrayidx.i5 = getelementptr inbounds i8, i8 addrspace(4)* %in, i64 5
  %3 = bitcast i8 addrspace(4)* %arrayidx.i5 to i32 addrspace(4)*
  %4 = load i32, i32 addrspace(4)* %3, align 1, !tbaa !11
  %arrayidx2 = getelementptr inbounds <4 x i8>, <4 x i8> addrspace(1)* %out, i64 1
  %5 = bitcast <4 x i8> addrspace(1)* %arrayidx2 to i32 addrspace(1)*
  store i32 %4, i32 addrspace(1)* %5, align 4, !tbaa !11
  ret void
}

; Function Attrs: norecurse nounwind
define amdgpu_kernel void @vload8_constant(i8 addrspace(4)* nocapture readonly %in, <8 x i8> addrspace(1)* nocapture %out) local_unnamed_addr #0 !kernel_arg_addr_space !6 !kernel_arg_access_qual !7 !kernel_arg_type !18 !kernel_arg_base_type !19 !kernel_arg_type_qual !10 {
entry:
  %0 = bitcast i8 addrspace(4)* %in to i64 addrspace(4)*
  %1 = load i64, i64 addrspace(4)* %0, align 1, !tbaa !11
  %2 = bitcast <8 x i8> addrspace(1)* %out to i64 addrspace(1)*
  store i64 %1, i64 addrspace(1)* %2, align 8, !tbaa !11
  %add.ptr = getelementptr inbounds i8, i8 addrspace(4)* %in, i64 1
  %3 = bitcast i8 addrspace(4)* %add.ptr to i64 addrspace(4)*
  %4 = load i64, i64 addrspace(4)* %3, align 1, !tbaa !11
  %arrayidx2 = getelementptr inbounds <8 x i8>, <8 x i8> addrspace(1)* %out, i64 1
  %5 = bitcast <8 x i8> addrspace(1)* %arrayidx2 to i64 addrspace(1)*
  store i64 %4, i64 addrspace(1)* %5, align 8, !tbaa !11
  ret void
}

; Function Attrs: norecurse nounwind
define amdgpu_kernel void @vload8_constant_offset(i8 addrspace(4)* nocapture readonly %in, <8 x i8> addrspace(1)* nocapture %out) local_unnamed_addr #0 !kernel_arg_addr_space !6 !kernel_arg_access_qual !7 !kernel_arg_type !18 !kernel_arg_base_type !19 !kernel_arg_type_qual !10 {
entry:
  %arrayidx.i = getelementptr inbounds i8, i8 addrspace(4)* %in, i64 8
  %0 = bitcast i8 addrspace(4)* %arrayidx.i to i64 addrspace(4)*
  %1 = load i64, i64 addrspace(4)* %0, align 1, !tbaa !11
  %2 = bitcast <8 x i8> addrspace(1)* %out to i64 addrspace(1)*
  store i64 %1, i64 addrspace(1)* %2, align 8, !tbaa !11
  %arrayidx.i5 = getelementptr inbounds i8, i8 addrspace(4)* %in, i64 9
  %3 = bitcast i8 addrspace(4)* %arrayidx.i5 to i64 addrspace(4)*
  %4 = load i64, i64 addrspace(4)* %3, align 1, !tbaa !11
  %arrayidx2 = getelementptr inbounds <8 x i8>, <8 x i8> addrspace(1)* %out, i64 1
  %5 = bitcast <8 x i8> addrspace(1)* %arrayidx2 to i64 addrspace(1)*
  store i64 %4, i64 addrspace(1)* %5, align 8, !tbaa !11
  ret void
}

; Function Attrs: norecurse nounwind
define amdgpu_kernel void @vload16_constant(i8 addrspace(4)* nocapture readonly %in, <16 x i8> addrspace(1)* nocapture %out) local_unnamed_addr #0 !kernel_arg_addr_space !6 !kernel_arg_access_qual !7 !kernel_arg_type !20 !kernel_arg_base_type !21 !kernel_arg_type_qual !10 {
entry:
  %0 = bitcast i8 addrspace(4)* %in to <16 x i8> addrspace(4)*
  %1 = load <16 x i8>, <16 x i8> addrspace(4)* %0, align 1, !tbaa !11
  store <16 x i8> %1, <16 x i8> addrspace(1)* %out, align 16, !tbaa !11
  %add.ptr = getelementptr inbounds i8, i8 addrspace(4)* %in, i64 1
  %2 = bitcast i8 addrspace(4)* %add.ptr to <16 x i8> addrspace(4)*
  %3 = load <16 x i8>, <16 x i8> addrspace(4)* %2, align 1, !tbaa !11
  %arrayidx2 = getelementptr inbounds <16 x i8>, <16 x i8> addrspace(1)* %out, i64 1
  store <16 x i8> %3, <16 x i8> addrspace(1)* %arrayidx2, align 16, !tbaa !11
  ret void
}

; Function Attrs: norecurse nounwind
define amdgpu_kernel void @vload16_constant_offset(i8 addrspace(4)* nocapture readonly %in, <16 x i8> addrspace(1)* nocapture %out) local_unnamed_addr #0 !kernel_arg_addr_space !6 !kernel_arg_access_qual !7 !kernel_arg_type !20 !kernel_arg_base_type !21 !kernel_arg_type_qual !10 {
entry:
  %arrayidx.i = getelementptr inbounds i8, i8 addrspace(4)* %in, i64 16
  %0 = bitcast i8 addrspace(4)* %arrayidx.i to <16 x i8> addrspace(4)*
  %1 = load <16 x i8>, <16 x i8> addrspace(4)* %0, align 1, !tbaa !11
  store <16 x i8> %1, <16 x i8> addrspace(1)* %out, align 16, !tbaa !11
  %arrayidx.i5 = getelementptr inbounds i8, i8 addrspace(4)* %in, i64 17
  %2 = bitcast i8 addrspace(4)* %arrayidx.i5 to <16 x i8> addrspace(4)*
  %3 = load <16 x i8>, <16 x i8> addrspace(4)* %2, align 1, !tbaa !11
  %arrayidx2 = getelementptr inbounds <16 x i8>, <16 x i8> addrspace(1)* %out, i64 1
  store <16 x i8> %3, <16 x i8> addrspace(1)* %arrayidx2, align 16, !tbaa !11
  ret void
}

attributes #0 = { norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="gfx902" "target-features"="+16-bit-insts,+ci-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx9-insts,+s-memrealtime,+vi-insts" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" }

!opencl.ocl.version = !{!0}
!llvm.ident = !{!1, !2, !3}
!llvm.module.flags = !{!4, !5}

!0 = !{i32 1, i32 1}
!1 = !{!"clang version 8.0.0 (https://git.llvm.org/git/clang.git 40ec5c4d12536fe608d41b4e7b8b6791c60d5e36) (https://git.llvm.org/git/llvm.git 67529047631348866b13473e3213659a1a1906c9)"}
!2 = !{!"clang version 8.0.0 (https://git.llvm.org/git/clang.git 0a8a76da18b28bea9500c5f4539fe9c65075b4ac) (https://git.llvm.org/git/llvm.git a67d12007b6f06d9a47d21f545e199789b1652a2)"}
!3 = !{!"clang version 7.0.0 (https://git.llvm.org/git/clang.git 261aad80c8b0592f57fbda1244a44295ad2bfeb3) (https://git.llvm.org/git/llvm.git 639a95afd42e0a525108992263b94d0def6fdca7)"}
!4 = !{i32 1, !"wchar_size", i32 4}
!5 = !{i32 7, !"PIC Level", i32 1}
!6 = !{i32 2, i32 1}
!7 = !{!"none", !"none"}
!8 = !{!"char*", !"char2*"}
!9 = !{!"char*", !"char __attribute__((ext_vector_type(2)))*"}
!10 = !{!"const", !""}
!11 = !{!12, !12, i64 0}
!12 = !{!"omnipotent char", !13, i64 0}
!13 = !{!"Simple C/C++ TBAA"}
!14 = !{!"char*", !"char3*"}
!15 = !{!"char*", !"char __attribute__((ext_vector_type(3)))*"}
!16 = !{!"char*", !"char4*"}
!17 = !{!"char*", !"char __attribute__((ext_vector_type(4)))*"}
!18 = !{!"char*", !"char8*"}
!19 = !{!"char*", !"char __attribute__((ext_vector_type(8)))*"}
!20 = !{!"char*", !"char16*"}
!21 = !{!"char*", !"char __attribute__((ext_vector_type(16)))*"}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: vload-char-constant.cl
Type: text/x-opencl-src
Size: 3885 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20180929/8f0605db/attachment-0001.bin>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: This is a digitally signed message part
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20180929/8f0605db/attachment-0001.sig>


More information about the llvm-commits mailing list