[LLVMdev] i1* function argument on x86-64

Mon Jul 27 10:55:17 PDT 2015

I attach the orginal IR and the generated assembly where one can see 
that the array elements are treated 1 bit wide. Is this the intended 
behavior? I doubt it, because I am passing in C pointers, and those have 
byte granularity. (In C a bit cannot be addressed with a pointer).

Frank

On 07/27/2015 01:44 PM, Frank Winter wrote:
> I am running into a problem with 'i1*' as a function's argument which 
> seems to have appeared since I switched to LLVM 3.6 (but can have 
> other source, of course). If I look at the assembler that the MCJIT 
> generates for an x86-64 target I see that the array 'i1*' is taken as 
> a sequence of 1 bit wide elements. (I guess that's correct). However, 
> I used to call the function from C passing in a 'bool*' which has 1 
> byte wideelements, I guess. (not sure if that's a compiler's choice) 
> Now, since I haven't changed my code on these parts but only made the 
> transition from LLVM 3.4/5 -> 3.6 I wonder if the element width has 
> changed when i1* is used as a function's argument..!?
>
> Thanks,
> Frank
>

-------------- next part --------------
; ModuleID = 'module'
target triple = "x86_64-unknown-linux-gnu"

define void @main(i64 %lo, i64 %hi, float* %arg0, float* %arg1, i1* %arg2) {
vectorized:
  %0 = getelementptr i1* %arg2, i32 0
  %1 = bitcast i1* %0 to <4 x i1>*
  %2 = load <4 x i1>* %1
  %3 = getelementptr float* %arg1, i32 0
  %4 = bitcast float* %3 to <4 x float>*
  %5 = load <4 x float>* %4
  %6 = getelementptr float* %arg0, i32 0
  %7 = bitcast float* %6 to <4 x float>*
  %8 = load <4 x float>* %7
  %9 = getelementptr float* %arg0, i32 0
  %10 = sext <4 x i1> %2 to <4 x i32>
  %11 = bitcast <4 x float> %5 to <4 x i32>
  %12 = and <4 x i32> %11, %10
  %13 = xor <4 x i32> %10, <i32 -1, i32 -1, i32 -1, i32 -1>
  %14 = bitcast <4 x float> %8 to <4 x i32>
  %15 = and <4 x i32> %14, %13
  %16 = or <4 x i32> %15, %12
  %17 = bitcast <4 x i32> %16 to <4 x float>
  %18 = bitcast float* %9 to <4 x float>*
  store <4 x float> %17, <4 x float>* %18
  %19 = getelementptr i1* %arg2, i32 4
  %20 = bitcast i1* %19 to <4 x i1>*
  %21 = load <4 x i1>* %20
  %22 = getelementptr float* %arg1, i32 4
  %23 = bitcast float* %22 to <4 x float>*
  %24 = load <4 x float>* %23
  %25 = getelementptr float* %arg0, i32 4
  %26 = bitcast float* %25 to <4 x float>*
  %27 = load <4 x float>* %26
  %28 = getelementptr float* %arg0, i32 4
  %29 = sext <4 x i1> %21 to <4 x i32>
  %30 = bitcast <4 x float> %24 to <4 x i32>
  %31 = and <4 x i32> %30, %29
  %32 = xor <4 x i32> %29, <i32 -1, i32 -1, i32 -1, i32 -1>
  %33 = bitcast <4 x float> %27 to <4 x i32>
  %34 = and <4 x i32> %33, %32
  %35 = or <4 x i32> %34, %31
  %36 = bitcast <4 x i32> %35 to <4 x float>
  %37 = bitcast float* %28 to <4 x float>*
  store <4 x float> %36, <4 x float>* %37
  %38 = getelementptr i1* %arg2, i32 8
  %39 = bitcast i1* %38 to <4 x i1>*
  %40 = load <4 x i1>* %39
  %41 = getelementptr float* %arg1, i32 8
  %42 = bitcast float* %41 to <4 x float>*
  %43 = load <4 x float>* %42
  %44 = getelementptr float* %arg0, i32 8
  %45 = bitcast float* %44 to <4 x float>*
  %46 = load <4 x float>* %45
  %47 = getelementptr float* %arg0, i32 8
  %48 = sext <4 x i1> %40 to <4 x i32>
  %49 = bitcast <4 x float> %43 to <4 x i32>
  %50 = and <4 x i32> %49, %48
  %51 = xor <4 x i32> %48, <i32 -1, i32 -1, i32 -1, i32 -1>
  %52 = bitcast <4 x float> %46 to <4 x i32>
  %53 = and <4 x i32> %52, %51
  %54 = or <4 x i32> %53, %50
  %55 = bitcast <4 x i32> %54 to <4 x float>
  %56 = bitcast float* %47 to <4 x float>*
  store <4 x float> %55, <4 x float>* %56
  %57 = getelementptr i1* %arg2, i32 12
  %58 = bitcast i1* %57 to <4 x i1>*
  %59 = load <4 x i1>* %58
  %60 = getelementptr float* %arg1, i32 12
  %61 = bitcast float* %60 to <4 x float>*
  %62 = load <4 x float>* %61
  %63 = getelementptr float* %arg0, i32 12
  %64 = bitcast float* %63 to <4 x float>*
  %65 = load <4 x float>* %64
  %66 = getelementptr float* %arg0, i32 12
  %67 = sext <4 x i1> %59 to <4 x i32>
  %68 = bitcast <4 x float> %62 to <4 x i32>
  %69 = and <4 x i32> %68, %67
  %70 = xor <4 x i32> %67, <i32 -1, i32 -1, i32 -1, i32 -1>
  %71 = bitcast <4 x float> %65 to <4 x i32>
  %72 = and <4 x i32> %71, %70
  %73 = or <4 x i32> %72, %69
  %74 = bitcast <4 x i32> %73 to <4 x float>
  %75 = bitcast float* %66 to <4 x float>*
  store <4 x float> %74, <4 x float>* %75
  br label %entrypoint

entrypoint:                                       ; preds = %vectorized
  ret void
}
-------------- next part --------------
	.text
	.file	"module"
	.globl	main
	.align	16, 0x90
	.type	main, at function
main:
	.cfi_startproc
	movzbl	(%r8), %eax
	movq	%rax, %rsi
	shlq	$62, %rsi
	sarq	$63, %rsi
	movq	%rax, %rdi
	shlq	$63, %rdi
	sarq	$63, %rdi
	movd	%edi, %xmm0
	pinsrd	$1, %esi, %xmm0
	movq	%rax, %rsi
	shlq	$61, %rsi
	sarq	$63, %rsi
	pinsrd	$2, %esi, %xmm0
	shlq	$60, %rax
	sarq	$63, %rax
	pinsrd	$3, %eax, %xmm0
	movdqa	(%rcx), %xmm1
	pand	%xmm0, %xmm1
	pandn	(%rdx), %xmm0
	por	%xmm1, %xmm0
	movdqa	%xmm0, (%rdx)
	movzbl	4(%r8), %eax
	movq	%rax, %rsi
	shlq	$62, %rsi
	sarq	$63, %rsi
	movq	%rax, %rdi
	shlq	$63, %rdi
	sarq	$63, %rdi
	movd	%edi, %xmm0
	pinsrd	$1, %esi, %xmm0
	movq	%rax, %rsi
	shlq	$61, %rsi
	sarq	$63, %rsi
	pinsrd	$2, %esi, %xmm0
	shlq	$60, %rax
	sarq	$63, %rax
	pinsrd	$3, %eax, %xmm0
	movdqa	16(%rcx), %xmm1
	pand	%xmm0, %xmm1
	pandn	16(%rdx), %xmm0
	por	%xmm1, %xmm0
	movdqa	%xmm0, 16(%rdx)
	movzbl	8(%r8), %eax
	movq	%rax, %rsi
	shlq	$62, %rsi
	sarq	$63, %rsi
	movq	%rax, %rdi
	shlq	$63, %rdi
	sarq	$63, %rdi
	movd	%edi, %xmm0
	pinsrd	$1, %esi, %xmm0
	movq	%rax, %rsi
	shlq	$61, %rsi
	sarq	$63, %rsi
	pinsrd	$2, %esi, %xmm0
	shlq	$60, %rax
	sarq	$63, %rax
	pinsrd	$3, %eax, %xmm0
	movdqa	32(%rcx), %xmm1
	pand	%xmm0, %xmm1
	pandn	32(%rdx), %xmm0
	por	%xmm1, %xmm0
	movdqa	%xmm0, 32(%rdx)
	movzbl	12(%r8), %eax
	movq	%rax, %rsi
	shlq	$62, %rsi
	sarq	$63, %rsi
	movq	%rax, %rdi
	shlq	$63, %rdi
	sarq	$63, %rdi
	movd	%edi, %xmm0
	pinsrd	$1, %esi, %xmm0
	movq	%rax, %rsi
	shlq	$61, %rsi
	sarq	$63, %rsi
	pinsrd	$2, %esi, %xmm0
	shlq	$60, %rax
	sarq	$63, %rax
	pinsrd	$3, %eax, %xmm0
	movdqa	48(%rcx), %xmm1
	pand	%xmm0, %xmm1
	pandn	48(%rdx), %xmm0
	por	%xmm1, %xmm0
	movdqa	%xmm0, 48(%rdx)
	retq
.Ltmp0:
	.size	main, .Ltmp0-main
	.cfi_endproc

	.section	".note.GNU-stack","", at progbits