[llvm-branch-commits] [llvm-branch] r164637 - /llvm/branches/AMDILBackend/test/CodeGen/AMDIL/bit_opt.ll
Micah Villmow
villmow at gmail.com
Tue Sep 25 15:00:12 PDT 2012
Author: mvillmow
Date: Tue Sep 25 17:00:12 2012
New Revision: 164637
URL: http://llvm.org/viewvc/llvm-project?rev=164637&view=rev
Log:
Add a test that verifies that bit_opt is correctly selected, currently it needs a few more patterns to be matched.
Added:
llvm/branches/AMDILBackend/test/CodeGen/AMDIL/bit_opt.ll
Added: llvm/branches/AMDILBackend/test/CodeGen/AMDIL/bit_opt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/test/CodeGen/AMDIL/bit_opt.ll?rev=164637&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/test/CodeGen/AMDIL/bit_opt.ll (added)
+++ llvm/branches/AMDILBackend/test/CodeGen/AMDIL/bit_opt.ll Tue Sep 25 17:00:12 2012
@@ -0,0 +1,1111 @@
+; RUN: opt -std-compile-opts < %s | llc -march=amdil | FileCheck %s
+target triple = "amdil-pc-amdopencl"
+
+define i32 @get_global_id(i32) nounwind {
+return:
+ ret i32 1
+}
+
+define void @ubit_opt_case_0(i32 addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind {
+ ; CHECK: ubit_opt_case_0
+ ; CHECK: bfi
+entry:
+ %a.addr = alloca i32 addrspace(1)*, align 4
+ %b.addr = alloca <4 x i32> addrspace(1)*, align 4
+ %idx = alloca i32, align 4
+ %c = alloca <4 x i32>, align 16
+ %d = alloca i32, align 4
+ store i32 addrspace(1)* %a, i32 addrspace(1)** %a.addr, align 4
+ store <4 x i32> addrspace(1)* %b, <4 x i32> addrspace(1)** %b.addr, align 4
+ %call = call i32 @get_global_id(i32 0) nounwind
+ store i32 %call, i32* %idx, align 4
+ %tmp = load <4 x i32> addrspace(1)** %b.addr, align 4
+ %tmp1 = load i32* %idx, align 4
+ %arrayidx = getelementptr <4 x i32> addrspace(1)* %tmp, i32 %tmp1
+ %tmp2 = load <4 x i32> addrspace(1)* %arrayidx, align 16
+ store <4 x i32> %tmp2, <4 x i32>* %c, align 16
+ %tmp3 = load <4 x i32>* %c, align 16
+ %tmp4 = extractelement <4 x i32> %tmp3, i32 1
+ store i32 %tmp4, i32* %d, align 4
+ %tmp5 = load i32* %d, align 4
+ %tmp6 = and i32 %tmp5, 255
+ %tmp7 = shl i32 %tmp6, 16
+ %tmp8 = load <4 x i32>* %c, align 16
+ %tmp9 = extractelement <4 x i32> %tmp8, i32 0
+ %tmp10 = and i32 %tmp9, 255
+ %tmp11 = shl i32 %tmp10, 8
+ %tmp12 = or i32 %tmp7, %tmp11
+ store i32 %tmp12, i32* %d, align 4
+ %tmp13 = load i32 addrspace(1)** %a.addr, align 4
+ %tmp14 = load i32* %idx, align 4
+ %arrayidx15 = getelementptr i32 addrspace(1)* %tmp13, i32 %tmp14
+ %tmp16 = load i32* %d, align 4
+ store i32 %tmp16, i32 addrspace(1)* %arrayidx15, align 4
+ br label %return
+
+return: ; preds = %entry
+ ret void
+}
+
+
+define void @ubit_opt_case_1(i32 addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind {
+ ; CHECK: ubit_opt_case_1
+ ; CHECK: bfi
+entry:
+ %a.addr = alloca i32 addrspace(1)*, align 4
+ %b.addr = alloca <4 x i32> addrspace(1)*, align 4
+ %idx = alloca i32, align 4
+ %c = alloca <4 x i32>, align 16
+ %d = alloca i32, align 4
+ store i32 addrspace(1)* %a, i32 addrspace(1)** %a.addr, align 4
+ store <4 x i32> addrspace(1)* %b, <4 x i32> addrspace(1)** %b.addr, align 4
+ %call = call i32 @get_global_id(i32 0) nounwind
+ store i32 %call, i32* %idx, align 4
+ %tmp = load <4 x i32> addrspace(1)** %b.addr, align 4
+ %tmp1 = load i32* %idx, align 4
+ %arrayidx = getelementptr <4 x i32> addrspace(1)* %tmp, i32 %tmp1
+ %tmp2 = load <4 x i32> addrspace(1)* %arrayidx, align 16
+ store <4 x i32> %tmp2, <4 x i32>* %c, align 16
+ %tmp3 = load <4 x i32>* %c, align 16
+ %tmp4 = extractelement <4 x i32> %tmp3, i32 1
+ store i32 %tmp4, i32* %d, align 4
+ %tmp5 = load i32* %d, align 4
+ %tmp6 = and i32 %tmp5, 16711680
+ %tmp7 = load <4 x i32>* %c, align 16
+ %tmp8 = extractelement <4 x i32> %tmp7, i32 0
+ %tmp9 = and i32 %tmp8, 255
+ %tmp10 = shl i32 %tmp9, 8
+ %tmp11 = or i32 %tmp6, %tmp10
+ store i32 %tmp11, i32* %d, align 4
+ %tmp12 = load i32 addrspace(1)** %a.addr, align 4
+ %tmp13 = load i32* %idx, align 4
+ %arrayidx14 = getelementptr i32 addrspace(1)* %tmp12, i32 %tmp13
+ %tmp15 = load i32* %d, align 4
+ store i32 %tmp15, i32 addrspace(1)* %arrayidx14, align 4
+ br label %return
+
+return: ; preds = %entry
+ ret void
+}
+
+define void @ubit_opt_case_2(i32 addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind {
+ ; CHECK: ubit_opt_case_2
+ ; CHECK: bfi
+entry:
+ %a.addr = alloca i32 addrspace(1)*, align 4
+ %b.addr = alloca <4 x i32> addrspace(1)*, align 4
+ %idx = alloca i32, align 4
+ %c = alloca <4 x i32>, align 16
+ %d = alloca i32, align 4
+ store i32 addrspace(1)* %a, i32 addrspace(1)** %a.addr, align 4
+ store <4 x i32> addrspace(1)* %b, <4 x i32> addrspace(1)** %b.addr, align 4
+ %call = call i32 @get_global_id(i32 0) nounwind
+ store i32 %call, i32* %idx, align 4
+ %tmp = load <4 x i32> addrspace(1)** %b.addr, align 4
+ %tmp1 = load i32* %idx, align 4
+ %arrayidx = getelementptr <4 x i32> addrspace(1)* %tmp, i32 %tmp1
+ %tmp2 = load <4 x i32> addrspace(1)* %arrayidx, align 16
+ store <4 x i32> %tmp2, <4 x i32>* %c, align 16
+ %tmp3 = load <4 x i32>* %c, align 16
+ %tmp4 = extractelement <4 x i32> %tmp3, i32 1
+ store i32 %tmp4, i32* %d, align 4
+ %tmp5 = load i32* %d, align 4
+ %tmp6 = and i32 %tmp5, 255
+ %tmp7 = shl i32 %tmp6, 17
+ %tmp8 = load <4 x i32>* %c, align 16
+ %tmp9 = extractelement <4 x i32> %tmp8, i32 0
+ %tmp10 = and i32 %tmp9, 65280
+ %tmp11 = or i32 %tmp7, %tmp10
+ store i32 %tmp11, i32* %d, align 4
+ %tmp12 = load i32 addrspace(1)** %a.addr, align 4
+ %tmp13 = load i32* %idx, align 4
+ %arrayidx14 = getelementptr i32 addrspace(1)* %tmp12, i32 %tmp13
+ %tmp15 = load i32* %d, align 4
+ store i32 %tmp15, i32 addrspace(1)* %arrayidx14, align 4
+ br label %return
+
+return: ; preds = %entry
+ ret void
+}
+
+define void @ubit_opt_case_3(i32 addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind {
+ ; CHECK: ubit_opt_case_3
+ ; CHECK: bfi
+entry:
+ %a.addr = alloca i32 addrspace(1)*, align 4
+ %b.addr = alloca <4 x i32> addrspace(1)*, align 4
+ %idx = alloca i32, align 4
+ %c = alloca <4 x i32>, align 16
+ %d = alloca i32, align 4
+ store i32 addrspace(1)* %a, i32 addrspace(1)** %a.addr, align 4
+ store <4 x i32> addrspace(1)* %b, <4 x i32> addrspace(1)** %b.addr, align 4
+ %call = call i32 @get_global_id(i32 0) nounwind
+ store i32 %call, i32* %idx, align 4
+ %tmp = load <4 x i32> addrspace(1)** %b.addr, align 4
+ %tmp1 = load i32* %idx, align 4
+ %arrayidx = getelementptr <4 x i32> addrspace(1)* %tmp, i32 %tmp1
+ %tmp2 = load <4 x i32> addrspace(1)* %arrayidx, align 16
+ store <4 x i32> %tmp2, <4 x i32>* %c, align 16
+ %tmp3 = load <4 x i32>* %c, align 16
+ %tmp4 = extractelement <4 x i32> %tmp3, i32 1
+ store i32 %tmp4, i32* %d, align 4
+ %tmp5 = load i32* %d, align 4
+ %tmp6 = and i32 %tmp5, 255
+ %tmp7 = shl i32 %tmp6, 17
+ %tmp8 = load <4 x i32>* %c, align 16
+ %tmp9 = extractelement <4 x i32> %tmp8, i32 0
+ %tmp10 = shl i32 %tmp9, 25
+ %tmp11 = or i32 %tmp7, %tmp10
+ store i32 %tmp11, i32* %d, align 4
+ %tmp12 = load i32 addrspace(1)** %a.addr, align 4
+ %tmp13 = load i32* %idx, align 4
+ %arrayidx14 = getelementptr i32 addrspace(1)* %tmp12, i32 %tmp13
+ %tmp15 = load i32* %d, align 4
+ store i32 %tmp15, i32 addrspace(1)* %arrayidx14, align 4
+ br label %return
+
+return: ; preds = %entry
+ ret void
+}
+
+define void @ubit_opt_case_4(i32 addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind {
+ ; CHECK: ubit_opt_case_4
+ ; CHECK: bfi
+entry:
+ %a.addr = alloca i32 addrspace(1)*, align 4
+ %b.addr = alloca <4 x i32> addrspace(1)*, align 4
+ %idx = alloca i32, align 4
+ %c = alloca <4 x i32>, align 16
+ %d = alloca i32, align 4
+ store i32 addrspace(1)* %a, i32 addrspace(1)** %a.addr, align 4
+ store <4 x i32> addrspace(1)* %b, <4 x i32> addrspace(1)** %b.addr, align 4
+ %call = call i32 @get_global_id(i32 0) nounwind
+ store i32 %call, i32* %idx, align 4
+ %tmp = load <4 x i32> addrspace(1)** %b.addr, align 4
+ %tmp1 = load i32* %idx, align 4
+ %arrayidx = getelementptr <4 x i32> addrspace(1)* %tmp, i32 %tmp1
+ %tmp2 = load <4 x i32> addrspace(1)* %arrayidx, align 16
+ store <4 x i32> %tmp2, <4 x i32>* %c, align 16
+ %tmp3 = load <4 x i32>* %c, align 16
+ %tmp4 = extractelement <4 x i32> %tmp3, i32 1
+ store i32 %tmp4, i32* %d, align 4
+ %tmp5 = load i32* %d, align 4
+ %tmp6 = shl i32 %tmp5, 17
+ %tmp7 = load <4 x i32>* %c, align 16
+ %tmp8 = extractelement <4 x i32> %tmp7, i32 0
+ %tmp9 = and i32 %tmp8, 4080
+ %tmp10 = shl i32 %tmp9, 5
+ %tmp11 = or i32 %tmp6, %tmp10
+ store i32 %tmp11, i32* %d, align 4
+ %tmp12 = load i32 addrspace(1)** %a.addr, align 4
+ %tmp13 = load i32* %idx, align 4
+ %arrayidx14 = getelementptr i32 addrspace(1)* %tmp12, i32 %tmp13
+ %tmp15 = load i32* %d, align 4
+ store i32 %tmp15, i32 addrspace(1)* %arrayidx14, align 4
+ br label %return
+
+return: ; preds = %entry
+ ret void
+}
+
+define void @ubit_opt_case_5(i32 addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind {
+ ; CHECK: ubit_opt_case_5
+ ; CHECK: bfi
+entry:
+ %a.addr = alloca i32 addrspace(1)*, align 4
+ %b.addr = alloca <4 x i32> addrspace(1)*, align 4
+ %idx = alloca i32, align 4
+ %c = alloca <4 x i32>, align 16
+ %d = alloca i32, align 4
+ store i32 addrspace(1)* %a, i32 addrspace(1)** %a.addr, align 4
+ store <4 x i32> addrspace(1)* %b, <4 x i32> addrspace(1)** %b.addr, align 4
+ %call = call i32 @get_global_id(i32 0) nounwind
+ store i32 %call, i32* %idx, align 4
+ %tmp = load <4 x i32> addrspace(1)** %b.addr, align 4
+ %tmp1 = load i32* %idx, align 4
+ %arrayidx = getelementptr <4 x i32> addrspace(1)* %tmp, i32 %tmp1
+ %tmp2 = load <4 x i32> addrspace(1)* %arrayidx, align 16
+ store <4 x i32> %tmp2, <4 x i32>* %c, align 16
+ %tmp3 = load <4 x i32>* %c, align 16
+ %tmp4 = extractelement <4 x i32> %tmp3, i32 1
+ store i32 %tmp4, i32* %d, align 4
+ %tmp5 = load i32* %d, align 4
+ %tmp6 = and i32 %tmp5, 65280
+ %tmp7 = load <4 x i32>* %c, align 16
+ %tmp8 = extractelement <4 x i32> %tmp7, i32 0
+ %tmp9 = and i32 %tmp8, 255
+ %tmp10 = or i32 %tmp6, %tmp9
+ store i32 %tmp10, i32* %d, align 4
+ %tmp11 = load i32 addrspace(1)** %a.addr, align 4
+ %tmp12 = load i32* %idx, align 4
+ %arrayidx13 = getelementptr i32 addrspace(1)* %tmp11, i32 %tmp12
+ %tmp14 = load i32* %d, align 4
+ store i32 %tmp14, i32 addrspace(1)* %arrayidx13, align 4
+ br label %return
+
+return: ; preds = %entry
+ ret void
+}
+
+define void @ubit_opt_case_6(i32 addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind {
+ ; CHECK: ubit_opt_case_6
+ ; CHECK: bfi
+entry:
+ %a.addr = alloca i32 addrspace(1)*, align 4
+ %b.addr = alloca <4 x i32> addrspace(1)*, align 4
+ %idx = alloca i32, align 4
+ %c = alloca <4 x i32>, align 16
+ %d = alloca i32, align 4
+ store i32 addrspace(1)* %a, i32 addrspace(1)** %a.addr, align 4
+ store <4 x i32> addrspace(1)* %b, <4 x i32> addrspace(1)** %b.addr, align 4
+ %call = call i32 @get_global_id(i32 0) nounwind
+ store i32 %call, i32* %idx, align 4
+ %tmp = load <4 x i32> addrspace(1)** %b.addr, align 4
+ %tmp1 = load i32* %idx, align 4
+ %arrayidx = getelementptr <4 x i32> addrspace(1)* %tmp, i32 %tmp1
+ %tmp2 = load <4 x i32> addrspace(1)* %arrayidx, align 16
+ store <4 x i32> %tmp2, <4 x i32>* %c, align 16
+ %tmp3 = load <4 x i32>* %c, align 16
+ %tmp4 = extractelement <4 x i32> %tmp3, i32 1
+ store i32 %tmp4, i32* %d, align 4
+ %tmp5 = load i32* %d, align 4
+ %tmp6 = and i32 %tmp5, 65280
+ %tmp7 = load <4 x i32>* %c, align 16
+ %tmp8 = extractelement <4 x i32> %tmp7, i32 0
+ %tmp9 = shl i32 %tmp8, 16
+ %tmp10 = or i32 %tmp6, %tmp9
+ store i32 %tmp10, i32* %d, align 4
+ %tmp11 = load i32 addrspace(1)** %a.addr, align 4
+ %tmp12 = load i32* %idx, align 4
+ %arrayidx13 = getelementptr i32 addrspace(1)* %tmp11, i32 %tmp12
+ %tmp14 = load i32* %d, align 4
+ store i32 %tmp14, i32 addrspace(1)* %arrayidx13, align 4
+ br label %return
+
+return: ; preds = %entry
+ ret void
+}
+
+define void @ubit_opt_case_7(i32 addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind {
+ ; CHECK: ubit_opt_case_7
+ ; CHECK: bfi
+entry:
+ %a.addr = alloca i32 addrspace(1)*, align 4
+ %b.addr = alloca <4 x i32> addrspace(1)*, align 4
+ %idx = alloca i32, align 4
+ %c = alloca <4 x i32>, align 16
+ %d = alloca i32, align 4
+ store i32 addrspace(1)* %a, i32 addrspace(1)** %a.addr, align 4
+ store <4 x i32> addrspace(1)* %b, <4 x i32> addrspace(1)** %b.addr, align 4
+ %call = call i32 @get_global_id(i32 0) nounwind
+ store i32 %call, i32* %idx, align 4
+ %tmp = load <4 x i32> addrspace(1)** %b.addr, align 4
+ %tmp1 = load i32* %idx, align 4
+ %arrayidx = getelementptr <4 x i32> addrspace(1)* %tmp, i32 %tmp1
+ %tmp2 = load <4 x i32> addrspace(1)* %arrayidx, align 16
+ store <4 x i32> %tmp2, <4 x i32>* %c, align 16
+ %tmp3 = load <4 x i32>* %c, align 16
+ %tmp4 = extractelement <4 x i32> %tmp3, i32 1
+ store i32 %tmp4, i32* %d, align 4
+ %tmp5 = load i32* %d, align 4
+ %tmp6 = shl i32 %tmp5, 9
+ %tmp7 = load <4 x i32>* %c, align 16
+ %tmp8 = extractelement <4 x i32> %tmp7, i32 0
+ %tmp9 = and i32 %tmp8, 255
+ %tmp10 = or i32 %tmp6, %tmp9
+ store i32 %tmp10, i32* %d, align 4
+ %tmp11 = load i32 addrspace(1)** %a.addr, align 4
+ %tmp12 = load i32* %idx, align 4
+ %arrayidx13 = getelementptr i32 addrspace(1)* %tmp11, i32 %tmp12
+ %tmp14 = load i32* %d, align 4
+ store i32 %tmp14, i32 addrspace(1)* %arrayidx13, align 4
+ br label %return
+
+return: ; preds = %entry
+ ret void
+}
+
+define void @ubit_opt_case_8(i32 addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind {
+ ; CHECK: ubit_opt_case_8
+ ; CHECK: bfi
+entry:
+ %a.addr = alloca i32 addrspace(1)*, align 4
+ %b.addr = alloca <4 x i32> addrspace(1)*, align 4
+ %idx = alloca i32, align 4
+ %c = alloca <4 x i32>, align 16
+ %d = alloca i32, align 4
+ store i32 addrspace(1)* %a, i32 addrspace(1)** %a.addr, align 4
+ store <4 x i32> addrspace(1)* %b, <4 x i32> addrspace(1)** %b.addr, align 4
+ %call = call i32 @get_global_id(i32 0) nounwind
+ store i32 %call, i32* %idx, align 4
+ %tmp = load <4 x i32> addrspace(1)** %b.addr, align 4
+ %tmp1 = load i32* %idx, align 4
+ %arrayidx = getelementptr <4 x i32> addrspace(1)* %tmp, i32 %tmp1
+ %tmp2 = load <4 x i32> addrspace(1)* %arrayidx, align 16
+ store <4 x i32> %tmp2, <4 x i32>* %c, align 16
+ %tmp3 = load <4 x i32>* %c, align 16
+ %tmp4 = extractelement <4 x i32> %tmp3, i32 1
+ store i32 %tmp4, i32* %d, align 4
+ %tmp5 = load i32* %d, align 4
+ %tmp6 = and i32 %tmp5, 255
+ %tmp7 = shl i32 %tmp6, 8
+ %tmp8 = load <4 x i32>* %c, align 16
+ %tmp9 = extractelement <4 x i32> %tmp8, i32 0
+ %tmp10 = and i32 %tmp9, -65281
+ %tmp11 = or i32 %tmp7, %tmp10
+ store i32 %tmp11, i32* %d, align 4
+ %tmp12 = load i32 addrspace(1)** %a.addr, align 4
+ %tmp13 = load i32* %idx, align 4
+ %arrayidx14 = getelementptr i32 addrspace(1)* %tmp12, i32 %tmp13
+ %tmp15 = load i32* %d, align 4
+ store i32 %tmp15, i32 addrspace(1)* %arrayidx14, align 4
+ br label %return
+
+return: ; preds = %entry
+ ret void
+}
+
+define void @ubit_opt_case_9(i32 addrspace(1)* %greal, <4 x i32> addrspace(1)* %gimag) nounwind {
+ ; CHECK: ubit_opt_case_9
+ ; CHECK: bfi
+entry:
+ %greal.addr = alloca i32 addrspace(1)*, align 4
+ %gimag.addr = alloca <4 x i32> addrspace(1)*, align 4
+ %tmp = alloca <4 x i32>, align 16
+ %gid = alloca i32, align 4
+ store i32 addrspace(1)* %greal, i32 addrspace(1)** %greal.addr, align 4
+ store <4 x i32> addrspace(1)* %gimag, <4 x i32> addrspace(1)** %gimag.addr, align 4
+ %call = call i32 @get_global_id(i32 0) nounwind
+ store i32 %call, i32* %gid, align 4
+ %tmp1 = load <4 x i32> addrspace(1)** %gimag.addr, align 4
+ %tmp2 = load i32* %gid, align 4
+ %arrayidx = getelementptr <4 x i32> addrspace(1)* %tmp1, i32 %tmp2
+ %tmp3 = load i32* %gid, align 4
+ %tmp4 = lshr i32 %tmp3, 6
+ %tmp5 = mul i32 %tmp4, 1024
+ %tmp6 = load i32* %gid, align 4
+ %tmp7 = and i32 %tmp6, 252
+ %tmp8 = add i32 %tmp5, %tmp7
+ %conv = insertelement <4 x i32> undef, i32 %tmp8, i32 0
+ %conv9 = insertelement <4 x i32> %conv, i32 %tmp8, i32 1
+ %conv10 = insertelement <4 x i32> %conv9, i32 %tmp8, i32 2
+ %conv11 = insertelement <4 x i32> %conv10, i32 %tmp8, i32 3
+ store <4 x i32> %conv11, <4 x i32>* %tmp, align 16
+ %tmp12 = load <4 x i32>* %tmp, align 16
+ store <4 x i32> %tmp12, <4 x i32> addrspace(1)* %arrayidx, align 16
+ br label %return
+
+return: ; preds = %entry
+ ret void
+}
+
+define void @ubit_opt_case_10(i32 addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind {
+ ; CHECK: ubit_opt_case_10
+ ; CHECK: bfi
+entry:
+ %a.addr = alloca i32 addrspace(1)*, align 4
+ %b.addr = alloca <4 x i32> addrspace(1)*, align 4
+ %idx = alloca i32, align 4
+ %c = alloca <4 x i32>, align 16
+ %d = alloca i32, align 4
+ store i32 addrspace(1)* %a, i32 addrspace(1)** %a.addr, align 4
+ store <4 x i32> addrspace(1)* %b, <4 x i32> addrspace(1)** %b.addr, align 4
+ %call = call i32 @get_global_id(i32 0) nounwind
+ store i32 %call, i32* %idx, align 4
+ %tmp = load <4 x i32> addrspace(1)** %b.addr, align 4
+ %tmp1 = load i32* %idx, align 4
+ %arrayidx = getelementptr <4 x i32> addrspace(1)* %tmp, i32 %tmp1
+ %tmp2 = load <4 x i32> addrspace(1)* %arrayidx, align 16
+ store <4 x i32> %tmp2, <4 x i32>* %c, align 16
+ %tmp3 = load <4 x i32>* %c, align 16
+ %tmp4 = extractelement <4 x i32> %tmp3, i32 1
+ store i32 %tmp4, i32* %d, align 4
+ %tmp5 = load i32* %d, align 4
+ %tmp6 = shl i32 %tmp5, 10
+ %tmp7 = load <4 x i32>* %c, align 16
+ %tmp8 = extractelement <4 x i32> %tmp7, i32 0
+ %tmp9 = and i32 %tmp8, 63
+ %tmp10 = or i32 %tmp6, %tmp9
+ store i32 %tmp10, i32* %d, align 4
+ %tmp11 = load i32 addrspace(1)** %a.addr, align 4
+ %tmp12 = load i32* %idx, align 4
+ %arrayidx13 = getelementptr i32 addrspace(1)* %tmp11, i32 %tmp12
+ %tmp14 = load i32* %d, align 4
+ store i32 %tmp14, i32 addrspace(1)* %arrayidx13, align 4
+ br label %return
+
+return: ; preds = %entry
+ ret void
+}
+
+define void @ubit_opt_case_11(i32 addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind {
+ ; CHECK: ubit_opt_case_11
+ ; CHECK: bfi
+entry:
+ %a.addr = alloca i32 addrspace(1)*, align 4
+ %b.addr = alloca <4 x i32> addrspace(1)*, align 4
+ %idx = alloca i32, align 4
+ %c = alloca <4 x i32>, align 16
+ %d = alloca i32, align 4
+ store i32 addrspace(1)* %a, i32 addrspace(1)** %a.addr, align 4
+ store <4 x i32> addrspace(1)* %b, <4 x i32> addrspace(1)** %b.addr, align 4
+ %call = call i32 @get_global_id(i32 0) nounwind
+ store i32 %call, i32* %idx, align 4
+ %tmp = load <4 x i32> addrspace(1)** %b.addr, align 4
+ %tmp1 = load i32* %idx, align 4
+ %arrayidx = getelementptr <4 x i32> addrspace(1)* %tmp, i32 %tmp1
+ %tmp2 = load <4 x i32> addrspace(1)* %arrayidx, align 16
+ store <4 x i32> %tmp2, <4 x i32>* %c, align 16
+ %tmp3 = load <4 x i32>* %c, align 16
+ %tmp4 = extractelement <4 x i32> %tmp3, i32 1
+ store i32 %tmp4, i32* %d, align 4
+ %tmp5 = load i32* %d, align 4
+ %tmp6 = and i32 %tmp5, 896
+ %tmp7 = shl i32 %tmp6, 4
+ %tmp8 = load <4 x i32>* %c, align 16
+ %tmp9 = extractelement <4 x i32> %tmp8, i32 0
+ %tmp10 = and i32 %tmp9, 7
+ %tmp11 = or i32 %tmp7, %tmp10
+ store i32 %tmp11, i32* %d, align 4
+ %tmp12 = load i32 addrspace(1)** %a.addr, align 4
+ %tmp13 = load i32* %idx, align 4
+ %arrayidx14 = getelementptr i32 addrspace(1)* %tmp12, i32 %tmp13
+ %tmp15 = load i32* %d, align 4
+ store i32 %tmp15, i32 addrspace(1)* %arrayidx14, align 4
+ br label %return
+
+return: ; preds = %entry
+ ret void
+}
+
+define void @ubit_opt_case_12(i32 addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind {
+ ; CHECK: ubit_opt_case_12
+ ; CHECK: bfi
+entry:
+ %a.addr = alloca i32 addrspace(1)*, align 4
+ %b.addr = alloca <4 x i32> addrspace(1)*, align 4
+ %idx = alloca i32, align 4
+ %c = alloca <4 x i32>, align 16
+ %d = alloca i32, align 4
+ store i32 addrspace(1)* %a, i32 addrspace(1)** %a.addr, align 4
+ store <4 x i32> addrspace(1)* %b, <4 x i32> addrspace(1)** %b.addr, align 4
+ %call = call i32 @get_global_id(i32 0) nounwind
+ store i32 %call, i32* %idx, align 4
+ %tmp = load <4 x i32> addrspace(1)** %b.addr, align 4
+ %tmp1 = load i32* %idx, align 4
+ %arrayidx = getelementptr <4 x i32> addrspace(1)* %tmp, i32 %tmp1
+ %tmp2 = load <4 x i32> addrspace(1)* %arrayidx, align 16
+ store <4 x i32> %tmp2, <4 x i32>* %c, align 16
+ %tmp3 = load <4 x i32>* %c, align 16
+ %tmp4 = extractelement <4 x i32> %tmp3, i32 1
+ store i32 %tmp4, i32* %d, align 4
+ %tmp5 = load i32* %d, align 4
+ %tmp6 = and i32 %tmp5, 7
+ %tmp7 = shl i32 %tmp6, 7
+ %tmp8 = load <4 x i32>* %c, align 16
+ %tmp9 = extractelement <4 x i32> %tmp8, i32 0
+ %tmp10 = and i32 %tmp9, 56
+ %tmp11 = or i32 %tmp7, %tmp10
+ store i32 %tmp11, i32* %d, align 4
+ %tmp12 = load i32 addrspace(1)** %a.addr, align 4
+ %tmp13 = load i32* %idx, align 4
+ %arrayidx14 = getelementptr i32 addrspace(1)* %tmp12, i32 %tmp13
+ %tmp15 = load i32* %d, align 4
+ store i32 %tmp15, i32 addrspace(1)* %arrayidx14, align 4
+ br label %return
+
+return: ; preds = %entry
+ ret void
+}
+
+define void @ubit_opt_case_13(i32 addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind {
+ ; CHECK: ubit_opt_case_13
+ ; CHECK: bfi
+entry:
+ %a.addr = alloca i32 addrspace(1)*, align 4
+ %b.addr = alloca <4 x i32> addrspace(1)*, align 4
+ %idx = alloca i32, align 4
+ %c = alloca <4 x i32>, align 16
+ %d = alloca i32, align 4
+ store i32 addrspace(1)* %a, i32 addrspace(1)** %a.addr, align 4
+ store <4 x i32> addrspace(1)* %b, <4 x i32> addrspace(1)** %b.addr, align 4
+ %call = call i32 @get_global_id(i32 0) nounwind
+ store i32 %call, i32* %idx, align 4
+ %tmp = load <4 x i32> addrspace(1)** %b.addr, align 4
+ %tmp1 = load i32* %idx, align 4
+ %arrayidx = getelementptr <4 x i32> addrspace(1)* %tmp, i32 %tmp1
+ %tmp2 = load <4 x i32> addrspace(1)* %arrayidx, align 16
+ store <4 x i32> %tmp2, <4 x i32>* %c, align 16
+ %tmp3 = load <4 x i32>* %c, align 16
+ %tmp4 = extractelement <4 x i32> %tmp3, i32 1
+ store i32 %tmp4, i32* %d, align 4
+ %tmp5 = load i32* %d, align 4
+ %tmp6 = and i32 %tmp5, 63
+ %tmp7 = shl i32 %tmp6, 1
+ %tmp8 = load <4 x i32>* %c, align 16
+ %tmp9 = extractelement <4 x i32> %tmp8, i32 0
+ %tmp10 = shl i32 %tmp9, 10
+ %tmp11 = or i32 %tmp7, %tmp10
+ store i32 %tmp11, i32* %d, align 4
+ %tmp12 = load i32 addrspace(1)** %a.addr, align 4
+ %tmp13 = load i32* %idx, align 4
+ %arrayidx14 = getelementptr i32 addrspace(1)* %tmp12, i32 %tmp13
+ %tmp15 = load i32* %d, align 4
+ store i32 %tmp15, i32 addrspace(1)* %arrayidx14, align 4
+ br label %return
+
+return: ; preds = %entry
+ ret void
+}
+
+define void @ubit_opt_case_14(i32 addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind {
+ ; CHECK: ubit_opt_case_14
+ ; CHECK: bfi
+entry:
+ %a.addr = alloca i32 addrspace(1)*, align 4
+ %b.addr = alloca <4 x i32> addrspace(1)*, align 4
+ %idx = alloca i32, align 4
+ %c = alloca <4 x i32>, align 16
+ store i32 addrspace(1)* %a, i32 addrspace(1)** %a.addr, align 4
+ store <4 x i32> addrspace(1)* %b, <4 x i32> addrspace(1)** %b.addr, align 4
+ %call = call i32 @get_global_id(i32 0) nounwind
+ store i32 %call, i32* %idx, align 4
+ %tmp = load <4 x i32> addrspace(1)** %b.addr, align 4
+ %tmp1 = load i32* %idx, align 4
+ %arrayidx = getelementptr <4 x i32> addrspace(1)* %tmp, i32 %tmp1
+ %tmp2 = load <4 x i32> addrspace(1)* %arrayidx, align 16
+ store <4 x i32> %tmp2, <4 x i32>* %c, align 16
+ %tmp3 = load i32 addrspace(1)** %a.addr, align 4
+ %tmp4 = load i32* %idx, align 4
+ %arrayidx5 = getelementptr i32 addrspace(1)* %tmp3, i32 %tmp4
+ %tmp6 = load <4 x i32>* %c, align 16
+ %tmp7 = extractelement <4 x i32> %tmp6, i32 0
+ %tmp8 = load <4 x i32>* %c, align 16
+ %tmp9 = extractelement <4 x i32> %tmp8, i32 3
+ %tmp10 = and i32 %tmp7, %tmp9
+ %tmp11 = load <4 x i32>* %c, align 16
+ %tmp12 = extractelement <4 x i32> %tmp11, i32 1
+ %tmp13 = load <4 x i32>* %c, align 16
+ %tmp14 = extractelement <4 x i32> %tmp13, i32 3
+ %tmp15 = xor i32 %tmp14, -1
+ %tmp16 = and i32 %tmp12, %tmp15
+ %tmp17 = or i32 %tmp10, %tmp16
+ store i32 %tmp17, i32 addrspace(1)* %arrayidx5, align 4
+ br label %return
+
+return: ; preds = %entry
+ ret void
+}
+
+define void @ubit_opt_case_15(i32 addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind {
+ ; CHECK: ubit_opt_case_15
+ ; CHECK: bfi
+entry:
+ %a.addr = alloca i32 addrspace(1)*, align 4
+ %b.addr = alloca <4 x i32> addrspace(1)*, align 4
+ %idx = alloca i32, align 4
+ %c = alloca <4 x i32>, align 16
+ store i32 addrspace(1)* %a, i32 addrspace(1)** %a.addr, align 4
+ store <4 x i32> addrspace(1)* %b, <4 x i32> addrspace(1)** %b.addr, align 4
+ %call = call i32 @get_global_id(i32 0) nounwind
+ store i32 %call, i32* %idx, align 4
+ %tmp = load <4 x i32> addrspace(1)** %b.addr, align 4
+ %tmp1 = load i32* %idx, align 4
+ %arrayidx = getelementptr <4 x i32> addrspace(1)* %tmp, i32 %tmp1
+ %tmp2 = load <4 x i32> addrspace(1)* %arrayidx, align 16
+ store <4 x i32> %tmp2, <4 x i32>* %c, align 16
+ %tmp3 = load i32 addrspace(1)** %a.addr, align 4
+ %tmp4 = load i32* %idx, align 4
+ %arrayidx5 = getelementptr i32 addrspace(1)* %tmp3, i32 %tmp4
+ %tmp6 = load <4 x i32>* %c, align 16
+ %tmp7 = extractelement <4 x i32> %tmp6, i32 0
+ %tmp8 = load <4 x i32>* %c, align 16
+ %tmp9 = extractelement <4 x i32> %tmp8, i32 3
+ %tmp10 = and i32 %tmp7, %tmp9
+ %tmp11 = load <4 x i32>* %c, align 16
+ %tmp12 = extractelement <4 x i32> %tmp11, i32 1
+ %tmp13 = load <4 x i32>* %c, align 16
+ %tmp14 = extractelement <4 x i32> %tmp13, i32 3
+ %tmp15 = xor i32 %tmp14, -1
+ %tmp16 = and i32 %tmp12, %tmp15
+ %tmp17 = or i32 %tmp10, %tmp16
+ store i32 %tmp17, i32 addrspace(1)* %arrayidx5, align 4
+ br label %return
+
+return: ; preds = %entry
+ ret void
+}
+
+define void @ubit_opt_case_16(i32 addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind {
+ ; CHECK: ubit_opt_case_16
+ ; CHECK: bfi
+entry:
+ %a.addr = alloca i32 addrspace(1)*, align 4
+ %b.addr = alloca <4 x i32> addrspace(1)*, align 4
+ %idx = alloca i32, align 4
+ %c = alloca <4 x i32>, align 16
+ store i32 addrspace(1)* %a, i32 addrspace(1)** %a.addr, align 4
+ store <4 x i32> addrspace(1)* %b, <4 x i32> addrspace(1)** %b.addr, align 4
+ %call = call i32 @get_global_id(i32 0) nounwind
+ store i32 %call, i32* %idx, align 4
+ %tmp = load <4 x i32> addrspace(1)** %b.addr, align 4
+ %tmp1 = load i32* %idx, align 4
+ %arrayidx = getelementptr <4 x i32> addrspace(1)* %tmp, i32 %tmp1
+ %tmp2 = load <4 x i32> addrspace(1)* %arrayidx, align 16
+ store <4 x i32> %tmp2, <4 x i32>* %c, align 16
+ %tmp3 = load i32 addrspace(1)** %a.addr, align 4
+ %tmp4 = load i32* %idx, align 4
+ %arrayidx5 = getelementptr i32 addrspace(1)* %tmp3, i32 %tmp4
+ %tmp6 = load <4 x i32>* %c, align 16
+ %tmp7 = extractelement <4 x i32> %tmp6, i32 3
+ %tmp8 = load <4 x i32>* %c, align 16
+ %tmp9 = extractelement <4 x i32> %tmp8, i32 0
+ %tmp10 = load <4 x i32>* %c, align 16
+ %tmp11 = extractelement <4 x i32> %tmp10, i32 1
+ %tmp12 = load <4 x i32>* %c, align 16
+ %tmp13 = extractelement <4 x i32> %tmp12, i32 3
+ %tmp14 = xor i32 %tmp11, %tmp13
+ %tmp15 = and i32 %tmp9, %tmp14
+ %tmp16 = xor i32 %tmp7, %tmp15
+ store i32 %tmp16, i32 addrspace(1)* %arrayidx5, align 4
+ br label %return
+
+return: ; preds = %entry
+ ret void
+}
+
+define void @bfm_opt_case_0(i32 addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind {
+ ; CHECK: bfm_opt_case_0
+ ; CHECK: bfm
+entry:
+ %a.addr = alloca i32 addrspace(1)*, align 4
+ %b.addr = alloca <4 x i32> addrspace(1)*, align 4
+ %idx = alloca i32, align 4
+ %c = alloca <4 x i32>, align 16
+ store i32 addrspace(1)* %a, i32 addrspace(1)** %a.addr, align 4
+ store <4 x i32> addrspace(1)* %b, <4 x i32> addrspace(1)** %b.addr, align 4
+ %call = call i32 @get_global_id(i32 0) nounwind
+ store i32 %call, i32* %idx, align 4
+ %tmp = load <4 x i32> addrspace(1)** %b.addr, align 4
+ %tmp1 = load i32* %idx, align 4
+ %arrayidx = getelementptr <4 x i32> addrspace(1)* %tmp, i32 %tmp1
+ %tmp2 = load <4 x i32> addrspace(1)* %arrayidx, align 16
+ store <4 x i32> %tmp2, <4 x i32>* %c, align 16
+ %tmp3 = load i32 addrspace(1)** %a.addr, align 4
+ %tmp4 = load i32* %idx, align 4
+ %arrayidx5 = getelementptr i32 addrspace(1)* %tmp3, i32 %tmp4
+ %tmp6 = load <4 x i32>* %c, align 16
+ %tmp7 = extractelement <4 x i32> %tmp6, i32 0
+ %tmp8 = and i32 %tmp7, 31
+ %tmp9 = and i32 %tmp8, 31
+ %tmp10 = shl i32 1, %tmp9
+ %tmp11 = sub nsw i32 %tmp10, 1
+ %tmp12 = load <4 x i32>* %c, align 16
+ %tmp13 = extractelement <4 x i32> %tmp12, i32 1
+ %tmp14 = and i32 %tmp13, 31
+ %tmp15 = and i32 %tmp14, 31
+ %tmp16 = shl i32 %tmp11, %tmp15
+ store i32 %tmp16, i32 addrspace(1)* %arrayidx5, align 4
+ br label %return
+
+return: ; preds = %entry
+ ret void
+}
+
+define void @ubit_opt_case_17(i32 addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind {
+ ; CHECK: ubit_opt_case_17
+ ; CHECK: bfi
+entry:
+ %a.addr = alloca i32 addrspace(1)*, align 4
+ %b.addr = alloca <4 x i32> addrspace(1)*, align 4
+ %idx = alloca i32, align 4
+ %c = alloca <4 x i32>, align 16
+ store i32 addrspace(1)* %a, i32 addrspace(1)** %a.addr, align 4
+ store <4 x i32> addrspace(1)* %b, <4 x i32> addrspace(1)** %b.addr, align 4
+ %call = call i32 @get_global_id(i32 0) nounwind
+ store i32 %call, i32* %idx, align 4
+ %tmp = load <4 x i32> addrspace(1)** %b.addr, align 4
+ %tmp1 = load i32* %idx, align 4
+ %arrayidx = getelementptr <4 x i32> addrspace(1)* %tmp, i32 %tmp1
+ %tmp2 = load <4 x i32> addrspace(1)* %arrayidx, align 16
+ store <4 x i32> %tmp2, <4 x i32>* %c, align 16
+ %tmp3 = load i32 addrspace(1)** %a.addr, align 4
+ %tmp4 = load i32* %idx, align 4
+ %arrayidx5 = getelementptr i32 addrspace(1)* %tmp3, i32 %tmp4
+ %tmp6 = load <4 x i32>* %c, align 16
+ %tmp7 = extractelement <4 x i32> %tmp6, i32 0
+ %tmp8 = load <4 x i32>* %c, align 16
+ %tmp9 = extractelement <4 x i32> %tmp8, i32 0
+ %tmp10 = load <4 x i32>* %c, align 16
+ %tmp11 = extractelement <4 x i32> %tmp10, i32 1
+ %tmp12 = xor i32 %tmp9, %tmp11
+ %tmp13 = load <4 x i32>* %c, align 16
+ %tmp14 = extractelement <4 x i32> %tmp13, i32 2
+ %tmp15 = and i32 %tmp12, %tmp14
+ %tmp16 = xor i32 %tmp7, %tmp15
+ store i32 %tmp16, i32 addrspace(1)* %arrayidx5, align 4
+ br label %return
+
+return: ; preds = %entry
+ ret void
+}
+
+define void @ubit_noopt_case_0(i32 addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind {
+ ; CHECK: ubit_noopt_case_0
+ ; CHECK-NOT: bfi
+entry:
+ %a.addr = alloca i32 addrspace(1)*, align 4
+ %b.addr = alloca <4 x i32> addrspace(1)*, align 4
+ %idx = alloca i32, align 4
+ %c = alloca <4 x i32>, align 16
+ %d = alloca i32, align 4
+ store i32 addrspace(1)* %a, i32 addrspace(1)** %a.addr, align 4
+ store <4 x i32> addrspace(1)* %b, <4 x i32> addrspace(1)** %b.addr, align 4
+ %call = call i32 @get_global_id(i32 0) nounwind
+ store i32 %call, i32* %idx, align 4
+ %tmp = load <4 x i32> addrspace(1)** %b.addr, align 4
+ %tmp1 = load i32* %idx, align 4
+ %arrayidx = getelementptr <4 x i32> addrspace(1)* %tmp, i32 %tmp1
+ %tmp2 = load <4 x i32> addrspace(1)* %arrayidx, align 16
+ store <4 x i32> %tmp2, <4 x i32>* %c, align 16
+ %tmp3 = load <4 x i32>* %c, align 16
+ %tmp4 = extractelement <4 x i32> %tmp3, i32 1
+ store i32 %tmp4, i32* %d, align 4
+ %tmp5 = load i32* %d, align 4
+ %tmp6 = and i32 %tmp5, 255
+ %tmp7 = shl i32 %tmp6, 16
+ %tmp8 = load <4 x i32>* %c, align 16
+ %tmp9 = extractelement <4 x i32> %tmp8, i32 0
+ %tmp10 = and i32 %tmp9, 255
+ %tmp11 = shl i32 %tmp10, 9
+ %tmp12 = or i32 %tmp7, %tmp11
+ store i32 %tmp12, i32* %d, align 4
+ %tmp13 = load i32 addrspace(1)** %a.addr, align 4
+ %tmp14 = load i32* %idx, align 4
+ %arrayidx15 = getelementptr i32 addrspace(1)* %tmp13, i32 %tmp14
+ %tmp16 = load i32* %d, align 4
+ store i32 %tmp16, i32 addrspace(1)* %arrayidx15, align 4
+ br label %return
+
+return: ; preds = %entry
+ ret void
+}
+
+define void @ubit_noopt_case_1(i32 addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind {
+ ; CHECK: ubit_noopt_case_1
+ ; CHECK-NOT: bfi
+entry:
+ %a.addr = alloca i32 addrspace(1)*, align 4
+ %b.addr = alloca <4 x i32> addrspace(1)*, align 4
+ %idx = alloca i32, align 4
+ %c = alloca <4 x i32>, align 16
+ %d = alloca i32, align 4
+ store i32 addrspace(1)* %a, i32 addrspace(1)** %a.addr, align 4
+ store <4 x i32> addrspace(1)* %b, <4 x i32> addrspace(1)** %b.addr, align 4
+ %call = call i32 @get_global_id(i32 0) nounwind
+ store i32 %call, i32* %idx, align 4
+ %tmp = load <4 x i32> addrspace(1)** %b.addr, align 4
+ %tmp1 = load i32* %idx, align 4
+ %arrayidx = getelementptr <4 x i32> addrspace(1)* %tmp, i32 %tmp1
+ %tmp2 = load <4 x i32> addrspace(1)* %arrayidx, align 16
+ store <4 x i32> %tmp2, <4 x i32>* %c, align 16
+ %tmp3 = load <4 x i32>* %c, align 16
+ %tmp4 = extractelement <4 x i32> %tmp3, i32 1
+ store i32 %tmp4, i32* %d, align 4
+ %tmp5 = load i32* %d, align 4
+ %tmp6 = and i32 %tmp5, 16744448
+ %tmp7 = load <4 x i32>* %c, align 16
+ %tmp8 = extractelement <4 x i32> %tmp7, i32 0
+ %tmp9 = and i32 %tmp8, 255
+ %tmp10 = shl i32 %tmp9, 8
+ %tmp11 = or i32 %tmp6, %tmp10
+ store i32 %tmp11, i32* %d, align 4
+ %tmp12 = load i32 addrspace(1)** %a.addr, align 4
+ %tmp13 = load i32* %idx, align 4
+ %arrayidx14 = getelementptr i32 addrspace(1)* %tmp12, i32 %tmp13
+ %tmp15 = load i32* %d, align 4
+ store i32 %tmp15, i32 addrspace(1)* %arrayidx14, align 4
+ br label %return
+
+return: ; preds = %entry
+ ret void
+}
+
+define void @ubit_noopt_case_2(i32 addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind {
+ ; CHECK: ubit_noopt_case_2
+ ; CHECK-NOT: bfi
+entry:
+ %a.addr = alloca i32 addrspace(1)*, align 4
+ %b.addr = alloca <4 x i32> addrspace(1)*, align 4
+ %idx = alloca i32, align 4
+ %c = alloca <4 x i32>, align 16
+ %d = alloca i32, align 4
+ store i32 addrspace(1)* %a, i32 addrspace(1)** %a.addr, align 4
+ store <4 x i32> addrspace(1)* %b, <4 x i32> addrspace(1)** %b.addr, align 4
+ %call = call i32 @get_global_id(i32 0) nounwind
+ store i32 %call, i32* %idx, align 4
+ %tmp = load <4 x i32> addrspace(1)** %b.addr, align 4
+ %tmp1 = load i32* %idx, align 4
+ %arrayidx = getelementptr <4 x i32> addrspace(1)* %tmp, i32 %tmp1
+ %tmp2 = load <4 x i32> addrspace(1)* %arrayidx, align 16
+ store <4 x i32> %tmp2, <4 x i32>* %c, align 16
+ %tmp3 = load <4 x i32>* %c, align 16
+ %tmp4 = extractelement <4 x i32> %tmp3, i32 1
+ store i32 %tmp4, i32* %d, align 4
+ %tmp5 = load i32* %d, align 4
+ %tmp6 = and i32 %tmp5, 255
+ %tmp7 = shl i32 %tmp6, 15
+ %tmp8 = load <4 x i32>* %c, align 16
+ %tmp9 = extractelement <4 x i32> %tmp8, i32 0
+ %tmp10 = and i32 %tmp9, 65280
+ %tmp11 = or i32 %tmp7, %tmp10
+ store i32 %tmp11, i32* %d, align 4
+ %tmp12 = load i32 addrspace(1)** %a.addr, align 4
+ %tmp13 = load i32* %idx, align 4
+ %arrayidx14 = getelementptr i32 addrspace(1)* %tmp12, i32 %tmp13
+ %tmp15 = load i32* %d, align 4
+ store i32 %tmp15, i32 addrspace(1)* %arrayidx14, align 4
+ br label %return
+
+return: ; preds = %entry
+ ret void
+}
+
+define void @ubit_noopt_case_3(i32 addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind {
+ ; CHECK: ubit_noopt_case_3
+ ; CHECK-NOT: bfi
+entry:
+ %a.addr = alloca i32 addrspace(1)*, align 4
+ %b.addr = alloca <4 x i32> addrspace(1)*, align 4
+ %idx = alloca i32, align 4
+ %c = alloca <4 x i32>, align 16
+ %d = alloca i32, align 4
+ store i32 addrspace(1)* %a, i32 addrspace(1)** %a.addr, align 4
+ store <4 x i32> addrspace(1)* %b, <4 x i32> addrspace(1)** %b.addr, align 4
+ %call = call i32 @get_global_id(i32 0) nounwind
+ store i32 %call, i32* %idx, align 4
+ %tmp = load <4 x i32> addrspace(1)** %b.addr, align 4
+ %tmp1 = load i32* %idx, align 4
+ %arrayidx = getelementptr <4 x i32> addrspace(1)* %tmp, i32 %tmp1
+ %tmp2 = load <4 x i32> addrspace(1)* %arrayidx, align 16
+ store <4 x i32> %tmp2, <4 x i32>* %c, align 16
+ %tmp3 = load <4 x i32>* %c, align 16
+ %tmp4 = extractelement <4 x i32> %tmp3, i32 1
+ store i32 %tmp4, i32* %d, align 4
+ %tmp5 = load i32* %d, align 4
+ %tmp6 = and i32 %tmp5, 255
+ %tmp7 = shl i32 %tmp6, 17
+ %tmp8 = load <4 x i32>* %c, align 16
+ %tmp9 = extractelement <4 x i32> %tmp8, i32 0
+ %tmp10 = shl i32 %tmp9, 22
+ %tmp11 = or i32 %tmp7, %tmp10
+ store i32 %tmp11, i32* %d, align 4
+ %tmp12 = load i32 addrspace(1)** %a.addr, align 4
+ %tmp13 = load i32* %idx, align 4
+ %arrayidx14 = getelementptr i32 addrspace(1)* %tmp12, i32 %tmp13
+ %tmp15 = load i32* %d, align 4
+ store i32 %tmp15, i32 addrspace(1)* %arrayidx14, align 4
+ br label %return
+
+return: ; preds = %entry
+ ret void
+}
+
+define void @ubit_noopt_case_4(i32 addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind {
+ ; CHECK: ubit_noopt_case_4
+ ; CHECK-NOT: bfi
+entry:
+ %a.addr = alloca i32 addrspace(1)*, align 4
+ %b.addr = alloca <4 x i32> addrspace(1)*, align 4
+ %idx = alloca i32, align 4
+ %c = alloca <4 x i32>, align 16
+ %d = alloca i32, align 4
+ store i32 addrspace(1)* %a, i32 addrspace(1)** %a.addr, align 4
+ store <4 x i32> addrspace(1)* %b, <4 x i32> addrspace(1)** %b.addr, align 4
+ %call = call i32 @get_global_id(i32 0) nounwind
+ store i32 %call, i32* %idx, align 4
+ %tmp = load <4 x i32> addrspace(1)** %b.addr, align 4
+ %tmp1 = load i32* %idx, align 4
+ %arrayidx = getelementptr <4 x i32> addrspace(1)* %tmp, i32 %tmp1
+ %tmp2 = load <4 x i32> addrspace(1)* %arrayidx, align 16
+ store <4 x i32> %tmp2, <4 x i32>* %c, align 16
+ %tmp3 = load <4 x i32>* %c, align 16
+ %tmp4 = extractelement <4 x i32> %tmp3, i32 1
+ store i32 %tmp4, i32* %d, align 4
+ %tmp5 = load i32* %d, align 4
+ %tmp6 = shl i32 %tmp5, 15
+ %tmp7 = load <4 x i32>* %c, align 16
+ %tmp8 = extractelement <4 x i32> %tmp7, i32 0
+ %tmp9 = and i32 %tmp8, 4080
+ %tmp10 = shl i32 %tmp9, 5
+ %tmp11 = or i32 %tmp6, %tmp10
+ store i32 %tmp11, i32* %d, align 4
+ %tmp12 = load i32 addrspace(1)** %a.addr, align 4
+ %tmp13 = load i32* %idx, align 4
+ %arrayidx14 = getelementptr i32 addrspace(1)* %tmp12, i32 %tmp13
+ %tmp15 = load i32* %d, align 4
+ store i32 %tmp15, i32 addrspace(1)* %arrayidx14, align 4
+ br label %return
+
+return: ; preds = %entry
+ ret void
+}
+
+define void @ubit_noopt_case_5(i32 addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind {
+ ; CHECK: ubit_noopt_case_5
+ ; CHECK-NOT: bfi
+entry:
+ %a.addr = alloca i32 addrspace(1)*, align 4
+ %b.addr = alloca <4 x i32> addrspace(1)*, align 4
+ %idx = alloca i32, align 4
+ %c = alloca <4 x i32>, align 16
+ %d = alloca i32, align 4
+ store i32 addrspace(1)* %a, i32 addrspace(1)** %a.addr, align 4
+ store <4 x i32> addrspace(1)* %b, <4 x i32> addrspace(1)** %b.addr, align 4
+ %call = call i32 @get_global_id(i32 0) nounwind
+ store i32 %call, i32* %idx, align 4
+ %tmp = load <4 x i32> addrspace(1)** %b.addr, align 4
+ %tmp1 = load i32* %idx, align 4
+ %arrayidx = getelementptr <4 x i32> addrspace(1)* %tmp, i32 %tmp1
+ %tmp2 = load <4 x i32> addrspace(1)* %arrayidx, align 16
+ store <4 x i32> %tmp2, <4 x i32>* %c, align 16
+ %tmp3 = load <4 x i32>* %c, align 16
+ %tmp4 = extractelement <4 x i32> %tmp3, i32 1
+ store i32 %tmp4, i32* %d, align 4
+ %tmp5 = load i32* %d, align 4
+ %tmp6 = and i32 %tmp5, 61680
+ %tmp7 = load <4 x i32>* %c, align 16
+ %tmp8 = extractelement <4 x i32> %tmp7, i32 0
+ %tmp9 = and i32 %tmp8, 3855
+ %tmp10 = or i32 %tmp6, %tmp9
+ store i32 %tmp10, i32* %d, align 4
+ %tmp11 = load i32 addrspace(1)** %a.addr, align 4
+ %tmp12 = load i32* %idx, align 4
+ %arrayidx13 = getelementptr i32 addrspace(1)* %tmp11, i32 %tmp12
+ %tmp14 = load i32* %d, align 4
+ store i32 %tmp14, i32 addrspace(1)* %arrayidx13, align 4
+ br label %return
+
+return: ; preds = %entry
+ ret void
+}
+
+define void @ubit_noopt_case_6(i32 addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind {
+ ; CHECK: ubit_noopt_case_6
+ ; CHECK-NOT: bfi
+entry:
+ %a.addr = alloca i32 addrspace(1)*, align 4
+ %b.addr = alloca <4 x i32> addrspace(1)*, align 4
+ %idx = alloca i32, align 4
+ %c = alloca <4 x i32>, align 16
+ %d = alloca i32, align 4
+ store i32 addrspace(1)* %a, i32 addrspace(1)** %a.addr, align 4
+ store <4 x i32> addrspace(1)* %b, <4 x i32> addrspace(1)** %b.addr, align 4
+ %call = call i32 @get_global_id(i32 0) nounwind
+ store i32 %call, i32* %idx, align 4
+ %tmp = load <4 x i32> addrspace(1)** %b.addr, align 4
+ %tmp1 = load i32* %idx, align 4
+ %arrayidx = getelementptr <4 x i32> addrspace(1)* %tmp, i32 %tmp1
+ %tmp2 = load <4 x i32> addrspace(1)* %arrayidx, align 16
+ store <4 x i32> %tmp2, <4 x i32>* %c, align 16
+ %tmp3 = load <4 x i32>* %c, align 16
+ %tmp4 = extractelement <4 x i32> %tmp3, i32 1
+ store i32 %tmp4, i32* %d, align 4
+ %tmp5 = load i32* %d, align 4
+ %tmp6 = and i32 %tmp5, 65280
+ %tmp7 = load <4 x i32>* %c, align 16
+ %tmp8 = extractelement <4 x i32> %tmp7, i32 0
+ %tmp9 = shl i32 %tmp8, 12
+ %tmp10 = or i32 %tmp6, %tmp9
+ store i32 %tmp10, i32* %d, align 4
+ %tmp11 = load i32 addrspace(1)** %a.addr, align 4
+ %tmp12 = load i32* %idx, align 4
+ %arrayidx13 = getelementptr i32 addrspace(1)* %tmp11, i32 %tmp12
+ %tmp14 = load i32* %d, align 4
+ store i32 %tmp14, i32 addrspace(1)* %arrayidx13, align 4
+ br label %return
+
+return: ; preds = %entry
+ ret void
+}
+
+define void @ubit_noopt_case_7(i32 addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind {
+ ; CHECK: ubit_noopt_case_7
+ ; CHECK-NOT: bfi
+entry:
+ %a.addr = alloca i32 addrspace(1)*, align 4
+ %b.addr = alloca <4 x i32> addrspace(1)*, align 4
+ %idx = alloca i32, align 4
+ %c = alloca <4 x i32>, align 16
+ %d = alloca i32, align 4
+ store i32 addrspace(1)* %a, i32 addrspace(1)** %a.addr, align 4
+ store <4 x i32> addrspace(1)* %b, <4 x i32> addrspace(1)** %b.addr, align 4
+ %call = call i32 @get_global_id(i32 0) nounwind
+ store i32 %call, i32* %idx, align 4
+ %tmp = load <4 x i32> addrspace(1)** %b.addr, align 4
+ %tmp1 = load i32* %idx, align 4
+ %arrayidx = getelementptr <4 x i32> addrspace(1)* %tmp, i32 %tmp1
+ %tmp2 = load <4 x i32> addrspace(1)* %arrayidx, align 16
+ store <4 x i32> %tmp2, <4 x i32>* %c, align 16
+ %tmp3 = load <4 x i32>* %c, align 16
+ %tmp4 = extractelement <4 x i32> %tmp3, i32 1
+ store i32 %tmp4, i32* %d, align 4
+ %tmp5 = load i32* %d, align 4
+ %tmp6 = shl i32 %tmp5, 9
+ %tmp7 = load <4 x i32>* %c, align 16
+ %tmp8 = extractelement <4 x i32> %tmp7, i32 0
+ %tmp9 = and i32 %tmp8, 4088
+ %tmp10 = or i32 %tmp6, %tmp9
+ store i32 %tmp10, i32* %d, align 4
+ %tmp11 = load i32 addrspace(1)** %a.addr, align 4
+ %tmp12 = load i32* %idx, align 4
+ %arrayidx13 = getelementptr i32 addrspace(1)* %tmp11, i32 %tmp12
+ %tmp14 = load i32* %d, align 4
+ store i32 %tmp14, i32 addrspace(1)* %arrayidx13, align 4
+ br label %return
+
+return: ; preds = %entry
+ ret void
+}
+
+define void @ubit_noopt_case_8(i32 addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind {
+ ; CHECK: ubit_noopt_case_8
+ ; CHECK-NOT: bfi
+entry:
+ %a.addr = alloca i32 addrspace(1)*, align 4
+ %b.addr = alloca <4 x i32> addrspace(1)*, align 4
+ %idx = alloca i32, align 4
+ %c = alloca <4 x i32>, align 16
+ %d = alloca i32, align 4
+ store i32 addrspace(1)* %a, i32 addrspace(1)** %a.addr, align 4
+ store <4 x i32> addrspace(1)* %b, <4 x i32> addrspace(1)** %b.addr, align 4
+ %call = call i32 @get_global_id(i32 0) nounwind
+ store i32 %call, i32* %idx, align 4
+ %tmp = load <4 x i32> addrspace(1)** %b.addr, align 4
+ %tmp1 = load i32* %idx, align 4
+ %arrayidx = getelementptr <4 x i32> addrspace(1)* %tmp, i32 %tmp1
+ %tmp2 = load <4 x i32> addrspace(1)* %arrayidx, align 16
+ store <4 x i32> %tmp2, <4 x i32>* %c, align 16
+ %tmp3 = load <4 x i32>* %c, align 16
+ %tmp4 = extractelement <4 x i32> %tmp3, i32 1
+ store i32 %tmp4, i32* %d, align 4
+ %tmp5 = load i32* %d, align 4
+ %tmp6 = and i32 %tmp5, 43690
+ %tmp7 = shl i32 %tmp6, 9
+ %tmp8 = load <4 x i32>* %c, align 16
+ %tmp9 = extractelement <4 x i32> %tmp8, i32 0
+ %tmp10 = and i32 %tmp9, 13107
+ %tmp11 = shl i32 %tmp10, 9
+ %tmp12 = or i32 %tmp7, %tmp11
+ store i32 %tmp12, i32* %d, align 4
+ %tmp13 = load i32 addrspace(1)** %a.addr, align 4
+ %tmp14 = load i32* %idx, align 4
+ %arrayidx15 = getelementptr i32 addrspace(1)* %tmp13, i32 %tmp14
+ %tmp16 = load i32* %d, align 4
+ store i32 %tmp16, i32 addrspace(1)* %arrayidx15, align 4
+ br label %return
+
+return: ; preds = %entry
+ ret void
+}
+
More information about the llvm-branch-commits
mailing list