[llvm] r239657 - R600 -> AMDGPU rename
Tom Stellard
thomas.stellard at amd.com
Fri Jun 12 20:28:16 PDT 2015
Removed: llvm/trunk/test/CodeGen/R600/llvm.SI.gather4.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/llvm.SI.gather4.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/llvm.SI.gather4.ll (original)
+++ llvm/trunk/test/CodeGen/R600/llvm.SI.gather4.ll (removed)
@@ -1,509 +0,0 @@
-;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
-;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
-
-;CHECK-LABEL: {{^}}gather4_v2:
-;CHECK: image_gather4 {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @gather4_v2() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.gather4.v2i32(<2 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}gather4:
-;CHECK: image_gather4 {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @gather4() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.gather4.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}gather4_cl:
-;CHECK: image_gather4_cl {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @gather4_cl() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.gather4.cl.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}gather4_l:
-;CHECK: image_gather4_l {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @gather4_l() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.gather4.l.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}gather4_b:
-;CHECK: image_gather4_b {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @gather4_b() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.gather4.b.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}gather4_b_cl:
-;CHECK: image_gather4_b_cl {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @gather4_b_cl() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.gather4.b.cl.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}gather4_b_cl_v8:
-;CHECK: image_gather4_b_cl {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @gather4_b_cl_v8() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.gather4.b.cl.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}gather4_lz_v2:
-;CHECK: image_gather4_lz {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @gather4_lz_v2() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.gather4.lz.v2i32(<2 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}gather4_lz:
-;CHECK: image_gather4_lz {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @gather4_lz() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.gather4.lz.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-
-
-;CHECK-LABEL: {{^}}gather4_o:
-;CHECK: image_gather4_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @gather4_o() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.gather4.o.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}gather4_cl_o:
-;CHECK: image_gather4_cl_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @gather4_cl_o() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.gather4.cl.o.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}gather4_cl_o_v8:
-;CHECK: image_gather4_cl_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @gather4_cl_o_v8() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.gather4.cl.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}gather4_l_o:
-;CHECK: image_gather4_l_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @gather4_l_o() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.gather4.l.o.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}gather4_l_o_v8:
-;CHECK: image_gather4_l_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @gather4_l_o_v8() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.gather4.l.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}gather4_b_o:
-;CHECK: image_gather4_b_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @gather4_b_o() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.gather4.b.o.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}gather4_b_o_v8:
-;CHECK: image_gather4_b_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @gather4_b_o_v8() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.gather4.b.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}gather4_b_cl_o:
-;CHECK: image_gather4_b_cl_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @gather4_b_cl_o() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.gather4.b.cl.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}gather4_lz_o:
-;CHECK: image_gather4_lz_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @gather4_lz_o() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.gather4.lz.o.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-
-
-;CHECK-LABEL: {{^}}gather4_c:
-;CHECK: image_gather4_c {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @gather4_c() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.gather4.c.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}gather4_c_cl:
-;CHECK: image_gather4_c_cl {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @gather4_c_cl() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.gather4.c.cl.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}gather4_c_cl_v8:
-;CHECK: image_gather4_c_cl {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @gather4_c_cl_v8() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.gather4.c.cl.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}gather4_c_l:
-;CHECK: image_gather4_c_l {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @gather4_c_l() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.gather4.c.l.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}gather4_c_l_v8:
-;CHECK: image_gather4_c_l {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @gather4_c_l_v8() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.gather4.c.l.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}gather4_c_b:
-;CHECK: image_gather4_c_b {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @gather4_c_b() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.gather4.c.b.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}gather4_c_b_v8:
-;CHECK: image_gather4_c_b {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @gather4_c_b_v8() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.gather4.c.b.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}gather4_c_b_cl:
-;CHECK: image_gather4_c_b_cl {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @gather4_c_b_cl() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.gather4.c.b.cl.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}gather4_c_lz:
-;CHECK: image_gather4_c_lz {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @gather4_c_lz() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.gather4.c.lz.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-
-
-;CHECK-LABEL: {{^}}gather4_c_o:
-;CHECK: image_gather4_c_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @gather4_c_o() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.gather4.c.o.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}gather4_c_o_v8:
-;CHECK: image_gather4_c_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @gather4_c_o_v8() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.gather4.c.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}gather4_c_cl_o:
-;CHECK: image_gather4_c_cl_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @gather4_c_cl_o() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.gather4.c.cl.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}gather4_c_l_o:
-;CHECK: image_gather4_c_l_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @gather4_c_l_o() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.gather4.c.l.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}gather4_c_b_o:
-;CHECK: image_gather4_c_b_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @gather4_c_b_o() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.gather4.c.b.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}gather4_c_b_cl_o:
-;CHECK: image_gather4_c_b_cl_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @gather4_c_b_cl_o() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.gather4.c.b.cl.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}gather4_c_lz_o:
-;CHECK: image_gather4_c_lz_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @gather4_c_lz_o() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.gather4.c.lz.o.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}gather4_c_lz_o_v8:
-;CHECK: image_gather4_c_lz_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @gather4_c_lz_o_v8() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.gather4.c.lz.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-
-
-declare <4 x float> @llvm.SI.gather4.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.gather4.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.gather4.cl.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.gather4.l.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.gather4.b.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.gather4.b.cl.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.gather4.b.cl.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.gather4.lz.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.gather4.lz.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-
-declare <4 x float> @llvm.SI.gather4.o.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.gather4.cl.o.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.gather4.cl.o.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.gather4.l.o.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.gather4.l.o.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.gather4.b.o.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.gather4.b.o.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.gather4.b.cl.o.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.gather4.lz.o.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-
-declare <4 x float> @llvm.SI.gather4.c.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.gather4.c.cl.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.gather4.c.cl.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.gather4.c.l.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.gather4.c.l.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.gather4.c.b.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.gather4.c.b.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.gather4.c.b.cl.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.gather4.c.lz.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-
-declare <4 x float> @llvm.SI.gather4.c.o.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.gather4.c.o.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.gather4.c.cl.o.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.gather4.c.l.o.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.gather4.c.b.o.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.gather4.c.b.cl.o.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.gather4.c.lz.o.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.gather4.c.lz.o.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-
-declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
-
-attributes #0 = { "ShaderType"="0" }
-attributes #1 = { nounwind readnone }
Removed: llvm/trunk/test/CodeGen/R600/llvm.SI.getlod.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/llvm.SI.getlod.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/llvm.SI.getlod.ll (original)
+++ llvm/trunk/test/CodeGen/R600/llvm.SI.getlod.ll (removed)
@@ -1,45 +0,0 @@
-;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
-;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
-
-;CHECK-LABEL: {{^}}getlod:
-;CHECK: image_get_lod {{v\[[0-9]+:[0-9]+\]}}, 3, 0, 0, -1, 0, 0, 0, 0, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @getlod() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.getlod.i32(i32 undef, <32 x i8> undef, <16 x i8> undef, i32 15, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r0, float %r1)
- ret void
-}
-
-;CHECK-LABEL: {{^}}getlod_v2:
-;CHECK: image_get_lod {{v\[[0-9]+:[0-9]+\]}}, 3, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @getlod_v2() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.getlod.v2i32(<2 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 15, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r0, float %r1)
- ret void
-}
-
-;CHECK-LABEL: {{^}}getlod_v4:
-;CHECK: image_get_lod {{v\[[0-9]+:[0-9]+\]}}, 3, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @getlod_v4() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.getlod.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 15, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r0, float %r1)
- ret void
-}
-
-
-declare <4 x float> @llvm.SI.getlod.i32(i32, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.getlod.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.getlod.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-
-declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
-
-attributes #0 = { "ShaderType"="0" }
-attributes #1 = { nounwind readnone }
Removed: llvm/trunk/test/CodeGen/R600/llvm.SI.image.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/llvm.SI.image.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/llvm.SI.image.ll (original)
+++ llvm/trunk/test/CodeGen/R600/llvm.SI.image.ll (removed)
@@ -1,50 +0,0 @@
-;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
-;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
-
-;CHECK-LABEL: {{^}}image_load:
-;CHECK: image_load {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @image_load() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.image.load.v4i32(<4 x i32> undef, <8 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}image_load_mip:
-;CHECK: image_load_mip {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @image_load_mip() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.image.load.mip.v4i32(<4 x i32> undef, <8 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}getresinfo:
-;CHECK: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @getresinfo() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.getresinfo.i32(i32 undef, <8 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-declare <4 x float> @llvm.SI.image.load.v4i32(<4 x i32>, <8 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.load.mip.v4i32(<4 x i32>, <8 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.getresinfo.i32(i32, <8 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-
-declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
-
-attributes #0 = { "ShaderType"="0" }
-attributes #1 = { nounwind readnone }
Removed: llvm/trunk/test/CodeGen/R600/llvm.SI.image.sample.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/llvm.SI.image.sample.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/llvm.SI.image.sample.ll (original)
+++ llvm/trunk/test/CodeGen/R600/llvm.SI.image.sample.ll (removed)
@@ -1,310 +0,0 @@
-;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
-;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
-
-;CHECK-LABEL: {{^}}sample:
-;CHECK: s_wqm
-;CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @sample() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}sample_cl:
-;CHECK: s_wqm
-;CHECK: image_sample_cl {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @sample_cl() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.image.sample.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}sample_d:
-;CHECK-NOT: s_wqm
-;CHECK: image_sample_d {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @sample_d() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.image.sample.d.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}sample_d_cl:
-;CHECK-NOT: s_wqm
-;CHECK: image_sample_d_cl {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @sample_d_cl() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.image.sample.d.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}sample_l:
-;CHECK-NOT: s_wqm
-;CHECK: image_sample_l {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @sample_l() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}sample_b:
-;CHECK: s_wqm
-;CHECK: image_sample_b {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @sample_b() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.image.sample.b.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}sample_b_cl:
-;CHECK: s_wqm
-;CHECK: image_sample_b_cl {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @sample_b_cl() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.image.sample.b.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}sample_lz:
-;CHECK-NOT: s_wqm
-;CHECK: image_sample_lz {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @sample_lz() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.image.sample.lz.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}sample_cd:
-;CHECK-NOT: s_wqm
-;CHECK: image_sample_cd {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @sample_cd() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.image.sample.cd.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}sample_cd_cl:
-;CHECK-NOT: s_wqm
-;CHECK: image_sample_cd_cl {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @sample_cd_cl() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.image.sample.cd.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}sample_c:
-;CHECK: s_wqm
-;CHECK: image_sample_c {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @sample_c() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.image.sample.c.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}sample_c_cl:
-;CHECK: s_wqm
-;CHECK: image_sample_c_cl {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @sample_c_cl() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.image.sample.c.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}sample_c_d:
-;CHECK-NOT: s_wqm
-;CHECK: image_sample_c_d {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @sample_c_d() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.image.sample.c.d.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}sample_c_d_cl:
-;CHECK-NOT: s_wqm
-;CHECK: image_sample_c_d_cl {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @sample_c_d_cl() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.image.sample.c.d.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}sample_c_l:
-;CHECK-NOT: s_wqm
-;CHECK: image_sample_c_l {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @sample_c_l() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}sample_c_b:
-;CHECK: s_wqm
-;CHECK: image_sample_c_b {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @sample_c_b() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.image.sample.c.b.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}sample_c_b_cl:
-;CHECK: s_wqm
-;CHECK: image_sample_c_b_cl {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @sample_c_b_cl() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.image.sample.c.b.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}sample_c_lz:
-;CHECK-NOT: s_wqm
-;CHECK: image_sample_c_lz {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @sample_c_lz() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.image.sample.c.lz.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}sample_c_cd:
-;CHECK-NOT: s_wqm
-;CHECK: image_sample_c_cd {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @sample_c_cd() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.image.sample.c.cd.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}sample_c_cd_cl:
-;CHECK-NOT: s_wqm
-;CHECK: image_sample_c_cd_cl {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @sample_c_cd_cl() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.image.sample.c.cd.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-
-declare <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.d.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.d.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.b.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.b.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.lz.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.cd.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.cd.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-
-declare <4 x float> @llvm.SI.image.sample.c.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.c.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.c.d.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.c.d.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.c.b.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.c.b.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.c.lz.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.c.cd.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.c.cd.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-
-declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
-
-attributes #0 = { "ShaderType"="0" }
-attributes #1 = { nounwind readnone }
Removed: llvm/trunk/test/CodeGen/R600/llvm.SI.image.sample.o.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/llvm.SI.image.sample.o.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/llvm.SI.image.sample.o.ll (original)
+++ llvm/trunk/test/CodeGen/R600/llvm.SI.image.sample.o.ll (removed)
@@ -1,310 +0,0 @@
-;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
-;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
-
-;CHECK-LABEL: {{^}}sample:
-;CHECK: s_wqm
-;CHECK: image_sample_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @sample() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.image.sample.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}sample_cl:
-;CHECK: s_wqm
-;CHECK: image_sample_cl_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @sample_cl() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.image.sample.cl.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}sample_d:
-;CHECK-NOT: s_wqm
-;CHECK: image_sample_d_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @sample_d() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.image.sample.d.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}sample_d_cl:
-;CHECK-NOT: s_wqm
-;CHECK: image_sample_d_cl_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @sample_d_cl() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.image.sample.d.cl.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}sample_l:
-;CHECK-NOT: s_wqm
-;CHECK: image_sample_l_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @sample_l() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}sample_b:
-;CHECK: s_wqm
-;CHECK: image_sample_b_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @sample_b() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.image.sample.b.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}sample_b_cl:
-;CHECK: s_wqm
-;CHECK: image_sample_b_cl_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @sample_b_cl() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.image.sample.b.cl.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}sample_lz:
-;CHECK-NOT: s_wqm
-;CHECK: image_sample_lz_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @sample_lz() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.image.sample.lz.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}sample_cd:
-;CHECK-NOT: s_wqm
-;CHECK: image_sample_cd_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @sample_cd() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.image.sample.cd.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}sample_cd_cl:
-;CHECK-NOT: s_wqm
-;CHECK: image_sample_cd_cl_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @sample_cd_cl() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.image.sample.cd.cl.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}sample_c:
-;CHECK: s_wqm
-;CHECK: image_sample_c_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @sample_c() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.image.sample.c.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}sample_c_cl:
-;CHECK: s_wqm
-;CHECK: image_sample_c_cl_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @sample_c_cl() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.image.sample.c.cl.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}sample_c_d:
-;CHECK-NOT: s_wqm
-;CHECK: image_sample_c_d_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @sample_c_d() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.image.sample.c.d.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}sample_c_d_cl:
-;CHECK-NOT: s_wqm
-;CHECK: image_sample_c_d_cl_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @sample_c_d_cl() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.image.sample.c.d.cl.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}sample_c_l:
-;CHECK-NOT: s_wqm
-;CHECK: image_sample_c_l_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @sample_c_l() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.image.sample.c.l.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}sample_c_b:
-;CHECK: s_wqm
-;CHECK: image_sample_c_b_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @sample_c_b() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.image.sample.c.b.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}sample_c_b_cl:
-;CHECK: s_wqm
-;CHECK: image_sample_c_b_cl_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @sample_c_b_cl() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.image.sample.c.b.cl.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}sample_c_lz:
-;CHECK-NOT: s_wqm
-;CHECK: image_sample_c_lz_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @sample_c_lz() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.image.sample.c.lz.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}sample_c_cd:
-;CHECK-NOT: s_wqm
-;CHECK: image_sample_c_cd_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @sample_c_cd() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.image.sample.c.cd.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-;CHECK-LABEL: {{^}}sample_c_cd_cl:
-;CHECK-NOT: s_wqm
-;CHECK: image_sample_c_cd_cl_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define void @sample_c_cd_cl() #0 {
-main_body:
- %r = call <4 x float> @llvm.SI.image.sample.c.cd.cl.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
- %r0 = extractelement <4 x float> %r, i32 0
- %r1 = extractelement <4 x float> %r, i32 1
- %r2 = extractelement <4 x float> %r, i32 2
- %r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
- ret void
-}
-
-
-declare <4 x float> @llvm.SI.image.sample.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.cl.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.d.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.d.cl.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.b.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.b.cl.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.lz.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.cd.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.cd.cl.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-
-declare <4 x float> @llvm.SI.image.sample.c.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.c.cl.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.c.d.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.c.d.cl.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.c.l.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.c.b.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.c.b.cl.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.c.lz.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.c.cd.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.c.cd.cl.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-
-declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
-
-attributes #0 = { "ShaderType"="0" }
-attributes #1 = { nounwind readnone }
Removed: llvm/trunk/test/CodeGen/R600/llvm.SI.imageload.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/llvm.SI.imageload.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/llvm.SI.imageload.ll (original)
+++ llvm/trunk/test/CodeGen/R600/llvm.SI.imageload.ll (removed)
@@ -1,132 +0,0 @@
-;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
-;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
-
-;CHECK-DAG: image_load {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, -1
-;CHECK-DAG: image_load_mip {{v\[[0-9]+:[0-9]+\]}}, 3, 0, 0, 0
-;CHECK-DAG: image_load_mip {{v[0-9]+}}, 2, 0, 0, 0
-;CHECK-DAG: image_load_mip {{v[0-9]+}}, 1, 0, 0, 0
-;CHECK-DAG: image_load_mip {{v[0-9]+}}, 4, 0, 0, 0
-;CHECK-DAG: image_load_mip {{v[0-9]+}}, 8, 0, 0, 0
-;CHECK-DAG: image_load_mip {{v\[[0-9]+:[0-9]+\]}}, 5, 0, 0, 0
-;CHECK-DAG: image_load_mip {{v\[[0-9]+:[0-9]+\]}}, 12, 0, 0, -1
-;CHECK-DAG: image_load_mip {{v\[[0-9]+:[0-9]+\]}}, 7, 0, 0, 0
-;CHECK-DAG: image_load_mip {{v[0-9]+}}, 8, 0, 0, -1
-
-define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
- %v1 = insertelement <4 x i32> undef, i32 %a1, i32 0
- %v2 = insertelement <4 x i32> undef, i32 %a1, i32 1
- %v3 = insertelement <4 x i32> undef, i32 %a1, i32 2
- %v4 = insertelement <4 x i32> undef, i32 %a1, i32 3
- %v5 = insertelement <4 x i32> undef, i32 %a2, i32 0
- %v6 = insertelement <4 x i32> undef, i32 %a2, i32 1
- %v10 = insertelement <4 x i32> undef, i32 %a3, i32 1
- %v11 = insertelement <4 x i32> undef, i32 %a3, i32 2
- %v15 = insertelement <4 x i32> undef, i32 %a4, i32 2
- %v16 = insertelement <4 x i32> undef, i32 %a4, i32 3
- %res1 = call <4 x i32> @llvm.SI.imageload.(<4 x i32> %v1,
- <32 x i8> undef, i32 1)
- %res2 = call <4 x i32> @llvm.SI.imageload.(<4 x i32> %v2,
- <32 x i8> undef, i32 2)
- %res3 = call <4 x i32> @llvm.SI.imageload.(<4 x i32> %v3,
- <32 x i8> undef, i32 3)
- %res4 = call <4 x i32> @llvm.SI.imageload.(<4 x i32> %v4,
- <32 x i8> undef, i32 4)
- %res5 = call <4 x i32> @llvm.SI.imageload.(<4 x i32> %v5,
- <32 x i8> undef, i32 5)
- %res6 = call <4 x i32> @llvm.SI.imageload.(<4 x i32> %v6,
- <32 x i8> undef, i32 6)
- %res10 = call <4 x i32> @llvm.SI.imageload.(<4 x i32> %v10,
- <32 x i8> undef, i32 10)
- %res11 = call <4 x i32> @llvm.SI.imageload.(<4 x i32> %v11,
- <32 x i8> undef, i32 11)
- %res15 = call <4 x i32> @llvm.SI.imageload.(<4 x i32> %v15,
- <32 x i8> undef, i32 15)
- %res16 = call <4 x i32> @llvm.SI.imageload.(<4 x i32> %v16,
- <32 x i8> undef, i32 16)
- %e1 = extractelement <4 x i32> %res1, i32 0
- %e2 = extractelement <4 x i32> %res2, i32 1
- %e3 = extractelement <4 x i32> %res3, i32 2
- %e4 = extractelement <4 x i32> %res4, i32 3
- %t0 = extractelement <4 x i32> %res5, i32 0
- %t1 = extractelement <4 x i32> %res5, i32 1
- %e5 = add i32 %t0, %t1
- %t2 = extractelement <4 x i32> %res6, i32 0
- %t3 = extractelement <4 x i32> %res6, i32 2
- %e6 = add i32 %t2, %t3
- %t10 = extractelement <4 x i32> %res10, i32 2
- %t11 = extractelement <4 x i32> %res10, i32 3
- %e10 = add i32 %t10, %t11
- %t12 = extractelement <4 x i32> %res11, i32 0
- %t13 = extractelement <4 x i32> %res11, i32 1
- %t14 = extractelement <4 x i32> %res11, i32 2
- %t15 = add i32 %t12, %t13
- %e11 = add i32 %t14, %t15
- %t28 = extractelement <4 x i32> %res15, i32 0
- %t29 = extractelement <4 x i32> %res15, i32 1
- %t30 = extractelement <4 x i32> %res15, i32 2
- %t31 = extractelement <4 x i32> %res15, i32 3
- %t32 = add i32 %t28, %t29
- %t33 = add i32 %t30, %t31
- %e15 = add i32 %t32, %t33
- %e16 = extractelement <4 x i32> %res16, i32 3
- %s1 = add i32 %e1, %e2
- %s2 = add i32 %s1, %e3
- %s3 = add i32 %s2, %e4
- %s4 = add i32 %s3, %e5
- %s5 = add i32 %s4, %e6
- %s9 = add i32 %s5, %e10
- %s10 = add i32 %s9, %e11
- %s14 = add i32 %s10, %e15
- %s15 = add i32 %s14, %e16
- %s16 = bitcast i32 %s15 to float
- call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %s16, float %s16, float %s16, float %s16)
- ret void
-}
-
-; Test that ccordinates are stored in vgprs and not sgprs
-; CHECK: vgpr_coords
-; CHECK: image_load_mip {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}
-define void @vgpr_coords(float addrspace(2)* addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
-main_body:
- %20 = getelementptr float addrspace(2)*, float addrspace(2)* addrspace(2)* %0, i32 0
- %21 = load float addrspace(2)*, float addrspace(2)* addrspace(2)* %20, !tbaa !2
- %22 = getelementptr float, float addrspace(2)* %21, i32 0
- %23 = load float, float addrspace(2)* %22, !tbaa !2, !invariant.load !1
- %24 = getelementptr float, float addrspace(2)* %21, i32 1
- %25 = load float, float addrspace(2)* %24, !tbaa !2, !invariant.load !1
- %26 = getelementptr float, float addrspace(2)* %21, i32 4
- %27 = load float, float addrspace(2)* %26, !tbaa !2, !invariant.load !1
- %28 = getelementptr <32 x i8>, <32 x i8> addrspace(2)* %2, i32 0
- %29 = load <32 x i8>, <32 x i8> addrspace(2)* %28, !tbaa !2
- %30 = bitcast float %27 to i32
- %31 = bitcast float %23 to i32
- %32 = bitcast float %25 to i32
- %33 = insertelement <4 x i32> undef, i32 %31, i32 0
- %34 = insertelement <4 x i32> %33, i32 %32, i32 1
- %35 = insertelement <4 x i32> %34, i32 %30, i32 2
- %36 = insertelement <4 x i32> %35, i32 undef, i32 3
- %37 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %36, <32 x i8> %29, i32 2)
- %38 = extractelement <4 x i32> %37, i32 0
- %39 = extractelement <4 x i32> %37, i32 1
- %40 = extractelement <4 x i32> %37, i32 2
- %41 = extractelement <4 x i32> %37, i32 3
- %42 = bitcast i32 %38 to float
- %43 = bitcast i32 %39 to float
- %44 = bitcast i32 %40 to float
- %45 = bitcast i32 %41 to float
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %42, float %43, float %44, float %45)
- ret void
-}
-
-declare <4 x i32> @llvm.SI.imageload.(<4 x i32>, <32 x i8>, i32) readnone
-; Function Attrs: nounwind readnone
-declare <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32>, <32 x i8>, i32) #1
-
-declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
-
-attributes #0 = { "ShaderType"="0" }
-attributes #1 = { nounwind readnone }
-
-!0 = !{!"const", null}
-!1 = !{}
-!2 = !{!0, !0, i64 0, i32 1}
Removed: llvm/trunk/test/CodeGen/R600/llvm.SI.load.dword.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/llvm.SI.load.dword.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/llvm.SI.load.dword.ll (original)
+++ llvm/trunk/test/CodeGen/R600/llvm.SI.load.dword.ll (removed)
@@ -1,53 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=verde -show-mc-encoding -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -march=amdgcn -mcpu=tonga -show-mc-encoding -verify-machineinstrs < %s | FileCheck %s
-
-; Example of a simple geometry shader loading vertex attributes from the
-; ESGS ring buffer
-
-; FIXME: Out of bounds immediate offset crashes
-
-; CHECK-LABEL: {{^}}main:
-; CHECK: buffer_load_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 glc slc
-; CHECK: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen glc slc
-; CHECK: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 idxen glc slc
-; CHECK: buffer_load_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 idxen offen glc slc
-; CHECK: s_movk_i32 [[K:s[0-9]+]], 0x4d2 ; encoding
-; CHECK: buffer_load_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, [[K]] idxen offen offset:65535 glc slc
-
-define void @main([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <32 x i8>] addrspace(2)* byval %arg2, [2 x <16 x i8>] addrspace(2)* byval %arg3, [17 x <16 x i8>] addrspace(2)* inreg %arg4, [17 x <16 x i8>] addrspace(2)* inreg %arg5, i32 %arg6, i32 %arg7, i32 %arg8, i32 %arg9) #0 {
-main_body:
- %tmp = getelementptr [2 x <16 x i8>], [2 x <16 x i8>] addrspace(2)* %arg3, i64 0, i32 1
- %tmp10 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
- %tmp11 = shl i32 %arg6, 2
- %tmp12 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %tmp10, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 0)
- %tmp13 = bitcast i32 %tmp12 to float
- %tmp14 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %tmp10, i32 %tmp11, i32 0, i32 0, i32 1, i32 0, i32 1, i32 1, i32 0)
- %tmp15 = bitcast i32 %tmp14 to float
- %tmp16 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %tmp10, i32 %tmp11, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 0)
- %tmp17 = bitcast i32 %tmp16 to float
- %tmp18 = call i32 @llvm.SI.buffer.load.dword.i32.v2i32(<16 x i8> %tmp10, <2 x i32> zeroinitializer, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 0)
- %tmp19 = bitcast i32 %tmp18 to float
-
- %tmp20 = call i32 @llvm.SI.buffer.load.dword.i32.v2i32(<16 x i8> %tmp10, <2 x i32> zeroinitializer, i32 0, i32 123, i32 1, i32 1, i32 1, i32 1, i32 0)
- %tmp21 = bitcast i32 %tmp20 to float
-
- %tmp22 = call i32 @llvm.SI.buffer.load.dword.i32.v2i32(<16 x i8> %tmp10, <2 x i32> zeroinitializer, i32 1234, i32 65535, i32 1, i32 1, i32 1, i32 1, i32 0)
- %tmp23 = bitcast i32 %tmp22 to float
-
- call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %tmp13, float %tmp15, float %tmp17, float %tmp19)
- call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %tmp21, float %tmp23, float %tmp23, float %tmp23)
- ret void
-}
-
-; Function Attrs: nounwind readonly
-declare i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-
-; Function Attrs: nounwind readonly
-declare i32 @llvm.SI.buffer.load.dword.i32.v2i32(<16 x i8>, <2 x i32>, i32, i32, i32, i32, i32, i32, i32) #1
-
-declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
-
-attributes #0 = { "ShaderType"="1" }
-attributes #1 = { nounwind readonly }
-
-!0 = !{!"const", null, i32 1}
Removed: llvm/trunk/test/CodeGen/R600/llvm.SI.resinfo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/llvm.SI.resinfo.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/llvm.SI.resinfo.ll (original)
+++ llvm/trunk/test/CodeGen/R600/llvm.SI.resinfo.ll (removed)
@@ -1,111 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s
-
-; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, -1
-; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 3, 0, 0, 0
-; CHECK-DAG: image_get_resinfo {{v[0-9]+}}, 2, 0, 0, 0
-; CHECK-DAG: image_get_resinfo {{v[0-9]+}}, 1, 0, 0, 0
-; CHECK-DAG: image_get_resinfo {{v[0-9]+}}, 4, 0, 0, 0
-; CHECK-DAG: image_get_resinfo {{v[0-9]+}}, 8, 0, 0, 0
-; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 5, 0, 0, 0
-; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 9, 0, 0, 0
-; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 6, 0, 0, 0
-; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 10, 0, 0, -1
-; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 12, 0, 0, -1
-; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 7, 0, 0, 0
-; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 11, 0, 0, 0
-; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 13, 0, 0, 0
-; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 14, 0, 0, 0
-; CHECK-DAG: image_get_resinfo {{v[0-9]+}}, 8, 0, 0, -1
-
-define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8,
- i32 %a9, i32 %a10, i32 %a11, i32 %a12, i32 %a13, i32 %a14, i32 %a15, i32 %a16) {
- %res1 = call <4 x i32> @llvm.SI.resinfo(i32 %a1, <32 x i8> undef, i32 1)
- %res2 = call <4 x i32> @llvm.SI.resinfo(i32 %a2, <32 x i8> undef, i32 2)
- %res3 = call <4 x i32> @llvm.SI.resinfo(i32 %a3, <32 x i8> undef, i32 3)
- %res4 = call <4 x i32> @llvm.SI.resinfo(i32 %a4, <32 x i8> undef, i32 4)
- %res5 = call <4 x i32> @llvm.SI.resinfo(i32 %a5, <32 x i8> undef, i32 5)
- %res6 = call <4 x i32> @llvm.SI.resinfo(i32 %a6, <32 x i8> undef, i32 6)
- %res7 = call <4 x i32> @llvm.SI.resinfo(i32 %a7, <32 x i8> undef, i32 7)
- %res8 = call <4 x i32> @llvm.SI.resinfo(i32 %a8, <32 x i8> undef, i32 8)
- %res9 = call <4 x i32> @llvm.SI.resinfo(i32 %a9, <32 x i8> undef, i32 9)
- %res10 = call <4 x i32> @llvm.SI.resinfo(i32 %a10, <32 x i8> undef, i32 10)
- %res11 = call <4 x i32> @llvm.SI.resinfo(i32 %a11, <32 x i8> undef, i32 11)
- %res12 = call <4 x i32> @llvm.SI.resinfo(i32 %a12, <32 x i8> undef, i32 12)
- %res13 = call <4 x i32> @llvm.SI.resinfo(i32 %a13, <32 x i8> undef, i32 13)
- %res14 = call <4 x i32> @llvm.SI.resinfo(i32 %a14, <32 x i8> undef, i32 14)
- %res15 = call <4 x i32> @llvm.SI.resinfo(i32 %a15, <32 x i8> undef, i32 15)
- %res16 = call <4 x i32> @llvm.SI.resinfo(i32 %a16, <32 x i8> undef, i32 16)
- %e1 = extractelement <4 x i32> %res1, i32 0
- %e2 = extractelement <4 x i32> %res2, i32 1
- %e3 = extractelement <4 x i32> %res3, i32 2
- %e4 = extractelement <4 x i32> %res4, i32 3
- %t0 = extractelement <4 x i32> %res5, i32 0
- %t1 = extractelement <4 x i32> %res5, i32 1
- %e5 = add i32 %t0, %t1
- %t2 = extractelement <4 x i32> %res6, i32 0
- %t3 = extractelement <4 x i32> %res6, i32 2
- %e6 = add i32 %t2, %t3
- %t4 = extractelement <4 x i32> %res7, i32 0
- %t5 = extractelement <4 x i32> %res7, i32 3
- %e7 = add i32 %t4, %t5
- %t6 = extractelement <4 x i32> %res8, i32 1
- %t7 = extractelement <4 x i32> %res8, i32 2
- %e8 = add i32 %t6, %t7
- %t8 = extractelement <4 x i32> %res9, i32 1
- %t9 = extractelement <4 x i32> %res9, i32 3
- %e9 = add i32 %t8, %t9
- %t10 = extractelement <4 x i32> %res10, i32 2
- %t11 = extractelement <4 x i32> %res10, i32 3
- %e10 = add i32 %t10, %t11
- %t12 = extractelement <4 x i32> %res11, i32 0
- %t13 = extractelement <4 x i32> %res11, i32 1
- %t14 = extractelement <4 x i32> %res11, i32 2
- %t15 = add i32 %t12, %t13
- %e11 = add i32 %t14, %t15
- %t16 = extractelement <4 x i32> %res12, i32 0
- %t17 = extractelement <4 x i32> %res12, i32 1
- %t18 = extractelement <4 x i32> %res12, i32 3
- %t19 = add i32 %t16, %t17
- %e12 = add i32 %t18, %t19
- %t20 = extractelement <4 x i32> %res13, i32 0
- %t21 = extractelement <4 x i32> %res13, i32 2
- %t22 = extractelement <4 x i32> %res13, i32 3
- %t23 = add i32 %t20, %t21
- %e13 = add i32 %t22, %t23
- %t24 = extractelement <4 x i32> %res14, i32 1
- %t25 = extractelement <4 x i32> %res14, i32 2
- %t26 = extractelement <4 x i32> %res14, i32 3
- %t27 = add i32 %t24, %t25
- %e14 = add i32 %t26, %t27
- %t28 = extractelement <4 x i32> %res15, i32 0
- %t29 = extractelement <4 x i32> %res15, i32 1
- %t30 = extractelement <4 x i32> %res15, i32 2
- %t31 = extractelement <4 x i32> %res15, i32 3
- %t32 = add i32 %t28, %t29
- %t33 = add i32 %t30, %t31
- %e15 = add i32 %t32, %t33
- %e16 = extractelement <4 x i32> %res16, i32 3
- %s1 = add i32 %e1, %e2
- %s2 = add i32 %s1, %e3
- %s3 = add i32 %s2, %e4
- %s4 = add i32 %s3, %e5
- %s5 = add i32 %s4, %e6
- %s6 = add i32 %s5, %e7
- %s7 = add i32 %s6, %e8
- %s8 = add i32 %s7, %e9
- %s9 = add i32 %s8, %e10
- %s10 = add i32 %s9, %e11
- %s11 = add i32 %s10, %e12
- %s12 = add i32 %s11, %e13
- %s13 = add i32 %s12, %e14
- %s14 = add i32 %s13, %e15
- %s15 = add i32 %s14, %e16
- %s16 = bitcast i32 %s15 to float
- call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %s16, float %s16, float %s16, float %s16)
- ret void
-}
-
-declare <4 x i32> @llvm.SI.resinfo(i32, <32 x i8>, i32) readnone
-
-declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
Removed: llvm/trunk/test/CodeGen/R600/llvm.SI.sample-masked.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/llvm.SI.sample-masked.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/llvm.SI.sample-masked.ll (original)
+++ llvm/trunk/test/CodeGen/R600/llvm.SI.sample-masked.ll (removed)
@@ -1,96 +0,0 @@
-;RUN: llc < %s -march=amdgcn -mcpu=verde | FileCheck %s
-;RUN: llc < %s -march=amdgcn -mcpu=tonga | FileCheck %s
-
-; CHECK-LABEL: {{^}}v1:
-; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 13
-define void @v1(i32 %a1) #0 {
-entry:
- %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
- %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
- %2 = extractelement <4 x float> %1, i32 0
- %3 = extractelement <4 x float> %1, i32 2
- %4 = extractelement <4 x float> %1, i32 3
- call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %4, float %4)
- ret void
-}
-
-; CHECK-LABEL: {{^}}v2:
-; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 11
-define void @v2(i32 %a1) #0 {
-entry:
- %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
- %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
- %2 = extractelement <4 x float> %1, i32 0
- %3 = extractelement <4 x float> %1, i32 1
- %4 = extractelement <4 x float> %1, i32 3
- call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %4, float %4)
- ret void
-}
-
-; CHECK-LABEL: {{^}}v3:
-; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 14
-define void @v3(i32 %a1) #0 {
-entry:
- %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
- %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
- %2 = extractelement <4 x float> %1, i32 1
- %3 = extractelement <4 x float> %1, i32 2
- %4 = extractelement <4 x float> %1, i32 3
- call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %4, float %4)
- ret void
-}
-
-; CHECK-LABEL: {{^}}v4:
-; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 7
-define void @v4(i32 %a1) #0 {
-entry:
- %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
- %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
- %2 = extractelement <4 x float> %1, i32 0
- %3 = extractelement <4 x float> %1, i32 1
- %4 = extractelement <4 x float> %1, i32 2
- call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %4, float %4)
- ret void
-}
-
-; CHECK-LABEL: {{^}}v5:
-; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 10
-define void @v5(i32 %a1) #0 {
-entry:
- %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
- %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
- %2 = extractelement <4 x float> %1, i32 1
- %3 = extractelement <4 x float> %1, i32 3
- call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %3, float %3)
- ret void
-}
-
-; CHECK-LABEL: {{^}}v6:
-; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 6
-define void @v6(i32 %a1) #0 {
-entry:
- %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
- %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
- %2 = extractelement <4 x float> %1, i32 1
- %3 = extractelement <4 x float> %1, i32 2
- call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %3, float %3)
- ret void
-}
-
-; CHECK-LABEL: {{^}}v7:
-; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 9
-define void @v7(i32 %a1) #0 {
-entry:
- %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
- %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
- %2 = extractelement <4 x float> %1, i32 0
- %3 = extractelement <4 x float> %1, i32 3
- call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %3, float %3)
- ret void
-}
-
-declare <4 x float> @llvm.SI.sample.v1i32(<1 x i32>, <32 x i8>, <16 x i8>, i32) readnone
-
-declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
-
-attributes #0 = { "ShaderType"="0" }
Removed: llvm/trunk/test/CodeGen/R600/llvm.SI.sample.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/llvm.SI.sample.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/llvm.SI.sample.ll (original)
+++ llvm/trunk/test/CodeGen/R600/llvm.SI.sample.ll (removed)
@@ -1,160 +0,0 @@
-;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
-;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
-
-;CHECK-DAG: image_sample {{v\[[0-9]+:[0-9]+\]}}, 15
-;CHECK-DAG: image_sample {{v\[[0-9]+:[0-9]+\]}}, 3
-;CHECK-DAG: image_sample {{v[0-9]+}}, 2
-;CHECK-DAG: image_sample {{v[0-9]+}}, 1
-;CHECK-DAG: image_sample {{v[0-9]+}}, 4
-;CHECK-DAG: image_sample {{v[0-9]+}}, 8
-;CHECK-DAG: image_sample_c {{v\[[0-9]+:[0-9]+\]}}, 5
-;CHECK-DAG: image_sample_c {{v\[[0-9]+:[0-9]+\]}}, 9
-;CHECK-DAG: image_sample_c {{v\[[0-9]+:[0-9]+\]}}, 6
-;CHECK-DAG: image_sample {{v\[[0-9]+:[0-9]+\]}}, 10
-;CHECK-DAG: image_sample {{v\[[0-9]+:[0-9]+\]}}, 12
-;CHECK-DAG: image_sample_c {{v\[[0-9]+:[0-9]+\]}}, 7
-;CHECK-DAG: image_sample_c {{v\[[0-9]+:[0-9]+\]}}, 11
-;CHECK-DAG: image_sample_c {{v\[[0-9]+:[0-9]+\]}}, 13
-;CHECK-DAG: image_sample {{v\[[0-9]+:[0-9]+\]}}, 14
-;CHECK-DAG: image_sample {{v[0-9]+}}, 8
-
-define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) #0 {
- %v1 = insertelement <4 x i32> undef, i32 %a1, i32 0
- %v2 = insertelement <4 x i32> undef, i32 %a1, i32 1
- %v3 = insertelement <4 x i32> undef, i32 %a1, i32 2
- %v4 = insertelement <4 x i32> undef, i32 %a1, i32 3
- %v5 = insertelement <4 x i32> undef, i32 %a2, i32 0
- %v6 = insertelement <4 x i32> undef, i32 %a2, i32 1
- %v7 = insertelement <4 x i32> undef, i32 %a2, i32 2
- %v8 = insertelement <4 x i32> undef, i32 %a2, i32 3
- %v9 = insertelement <4 x i32> undef, i32 %a3, i32 0
- %v10 = insertelement <4 x i32> undef, i32 %a3, i32 1
- %v11 = insertelement <4 x i32> undef, i32 %a3, i32 2
- %v12 = insertelement <4 x i32> undef, i32 %a3, i32 3
- %v13 = insertelement <4 x i32> undef, i32 %a4, i32 0
- %v14 = insertelement <4 x i32> undef, i32 %a4, i32 1
- %v15 = insertelement <4 x i32> undef, i32 %a4, i32 2
- %v16 = insertelement <4 x i32> undef, i32 %a4, i32 3
- %res1 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v1,
- <32 x i8> undef, <16 x i8> undef, i32 1)
- %res2 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v2,
- <32 x i8> undef, <16 x i8> undef, i32 2)
- %res3 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v3,
- <32 x i8> undef, <16 x i8> undef, i32 3)
- %res4 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v4,
- <32 x i8> undef, <16 x i8> undef, i32 4)
- %res5 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v5,
- <32 x i8> undef, <16 x i8> undef, i32 5)
- %res6 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v6,
- <32 x i8> undef, <16 x i8> undef, i32 6)
- %res7 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v7,
- <32 x i8> undef, <16 x i8> undef, i32 7)
- %res8 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v8,
- <32 x i8> undef, <16 x i8> undef, i32 8)
- %res9 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v9,
- <32 x i8> undef, <16 x i8> undef, i32 9)
- %res10 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v10,
- <32 x i8> undef, <16 x i8> undef, i32 10)
- %res11 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v11,
- <32 x i8> undef, <16 x i8> undef, i32 11)
- %res12 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v12,
- <32 x i8> undef, <16 x i8> undef, i32 12)
- %res13 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v13,
- <32 x i8> undef, <16 x i8> undef, i32 13)
- %res14 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v14,
- <32 x i8> undef, <16 x i8> undef, i32 14)
- %res15 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v15,
- <32 x i8> undef, <16 x i8> undef, i32 15)
- %res16 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v16,
- <32 x i8> undef, <16 x i8> undef, i32 16)
- %e1 = extractelement <4 x float> %res1, i32 0
- %e2 = extractelement <4 x float> %res2, i32 1
- %e3 = extractelement <4 x float> %res3, i32 2
- %e4 = extractelement <4 x float> %res4, i32 3
- %t0 = extractelement <4 x float> %res5, i32 0
- %t1 = extractelement <4 x float> %res5, i32 1
- %e5 = fadd float %t0, %t1
- %t2 = extractelement <4 x float> %res6, i32 0
- %t3 = extractelement <4 x float> %res6, i32 2
- %e6 = fadd float %t2, %t3
- %t4 = extractelement <4 x float> %res7, i32 0
- %t5 = extractelement <4 x float> %res7, i32 3
- %e7 = fadd float %t4, %t5
- %t6 = extractelement <4 x float> %res8, i32 1
- %t7 = extractelement <4 x float> %res8, i32 2
- %e8 = fadd float %t6, %t7
- %t8 = extractelement <4 x float> %res9, i32 1
- %t9 = extractelement <4 x float> %res9, i32 3
- %e9 = fadd float %t8, %t9
- %t10 = extractelement <4 x float> %res10, i32 2
- %t11 = extractelement <4 x float> %res10, i32 3
- %e10 = fadd float %t10, %t11
- %t12 = extractelement <4 x float> %res11, i32 0
- %t13 = extractelement <4 x float> %res11, i32 1
- %t14 = extractelement <4 x float> %res11, i32 2
- %t15 = fadd float %t12, %t13
- %e11 = fadd float %t14, %t15
- %t16 = extractelement <4 x float> %res12, i32 0
- %t17 = extractelement <4 x float> %res12, i32 1
- %t18 = extractelement <4 x float> %res12, i32 3
- %t19 = fadd float %t16, %t17
- %e12 = fadd float %t18, %t19
- %t20 = extractelement <4 x float> %res13, i32 0
- %t21 = extractelement <4 x float> %res13, i32 2
- %t22 = extractelement <4 x float> %res13, i32 3
- %t23 = fadd float %t20, %t21
- %e13 = fadd float %t22, %t23
- %t24 = extractelement <4 x float> %res14, i32 1
- %t25 = extractelement <4 x float> %res14, i32 2
- %t26 = extractelement <4 x float> %res14, i32 3
- %t27 = fadd float %t24, %t25
- %e14 = fadd float %t26, %t27
- %t28 = extractelement <4 x float> %res15, i32 0
- %t29 = extractelement <4 x float> %res15, i32 1
- %t30 = extractelement <4 x float> %res15, i32 2
- %t31 = extractelement <4 x float> %res15, i32 3
- %t32 = fadd float %t28, %t29
- %t33 = fadd float %t30, %t31
- %e15 = fadd float %t32, %t33
- %e16 = extractelement <4 x float> %res16, i32 3
- %s1 = fadd float %e1, %e2
- %s2 = fadd float %s1, %e3
- %s3 = fadd float %s2, %e4
- %s4 = fadd float %s3, %e5
- %s5 = fadd float %s4, %e6
- %s6 = fadd float %s5, %e7
- %s7 = fadd float %s6, %e8
- %s8 = fadd float %s7, %e9
- %s9 = fadd float %s8, %e10
- %s10 = fadd float %s9, %e11
- %s11 = fadd float %s10, %e12
- %s12 = fadd float %s11, %e13
- %s13 = fadd float %s12, %e14
- %s14 = fadd float %s13, %e15
- %s15 = fadd float %s14, %e16
- call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %s15, float %s15, float %s15, float %s15)
- ret void
-}
-
-; CHECK: {{^}}v1:
-; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 15
-define void @v1(i32 %a1) #0 {
-entry:
- %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
- %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
- %2 = extractelement <4 x float> %1, i32 0
- %3 = extractelement <4 x float> %1, i32 1
- %4 = extractelement <4 x float> %1, i32 2
- %5 = extractelement <4 x float> %1, i32 3
- call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %4, float %5)
- ret void
-}
-
-
-declare <4 x float> @llvm.SI.sample.v1i32(<1 x i32>, <32 x i8>, <16 x i8>, i32) readnone
-
-declare <4 x float> @llvm.SI.sample.(<4 x i32>, <32 x i8>, <16 x i8>, i32) readnone
-
-declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
-
-attributes #0 = { "ShaderType"="0" }
Removed: llvm/trunk/test/CodeGen/R600/llvm.SI.sampled.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/llvm.SI.sampled.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/llvm.SI.sampled.ll (original)
+++ llvm/trunk/test/CodeGen/R600/llvm.SI.sampled.ll (removed)
@@ -1,143 +0,0 @@
-;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
-;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
-
-;CHECK-DAG: image_sample_d {{v\[[0-9]+:[0-9]+\]}}, 15
-;CHECK-DAG: image_sample_d {{v\[[0-9]+:[0-9]+\]}}, 3
-;CHECK-DAG: image_sample_d {{v[0-9]+}}, 2
-;CHECK-DAG: image_sample_d {{v[0-9]+}}, 1
-;CHECK-DAG: image_sample_d {{v[0-9]+}}, 4
-;CHECK-DAG: image_sample_d {{v[0-9]+}}, 8
-;CHECK-DAG: image_sample_c_d {{v\[[0-9]+:[0-9]+\]}}, 5
-;CHECK-DAG: image_sample_c_d {{v\[[0-9]+:[0-9]+\]}}, 9
-;CHECK-DAG: image_sample_c_d {{v\[[0-9]+:[0-9]+\]}}, 6
-;CHECK-DAG: image_sample_d {{v\[[0-9]+:[0-9]+\]}}, 10
-;CHECK-DAG: image_sample_d {{v\[[0-9]+:[0-9]+\]}}, 12
-;CHECK-DAG: image_sample_c_d {{v\[[0-9]+:[0-9]+\]}}, 7
-;CHECK-DAG: image_sample_c_d {{v\[[0-9]+:[0-9]+\]}}, 11
-;CHECK-DAG: image_sample_c_d {{v\[[0-9]+:[0-9]+\]}}, 13
-;CHECK-DAG: image_sample_d {{v\[[0-9]+:[0-9]+\]}}, 14
-;CHECK-DAG: image_sample_d {{v[0-9]+}}, 8
-
-define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) #0 {
- %v1 = insertelement <4 x i32> undef, i32 %a1, i32 0
- %v2 = insertelement <4 x i32> undef, i32 %a1, i32 1
- %v3 = insertelement <4 x i32> undef, i32 %a1, i32 2
- %v4 = insertelement <4 x i32> undef, i32 %a1, i32 3
- %v5 = insertelement <4 x i32> undef, i32 %a2, i32 0
- %v6 = insertelement <4 x i32> undef, i32 %a2, i32 1
- %v7 = insertelement <4 x i32> undef, i32 %a2, i32 2
- %v8 = insertelement <4 x i32> undef, i32 %a2, i32 3
- %v9 = insertelement <4 x i32> undef, i32 %a3, i32 0
- %v10 = insertelement <4 x i32> undef, i32 %a3, i32 1
- %v11 = insertelement <4 x i32> undef, i32 %a3, i32 2
- %v12 = insertelement <4 x i32> undef, i32 %a3, i32 3
- %v13 = insertelement <4 x i32> undef, i32 %a4, i32 0
- %v14 = insertelement <4 x i32> undef, i32 %a4, i32 1
- %v15 = insertelement <4 x i32> undef, i32 %a4, i32 2
- %v16 = insertelement <4 x i32> undef, i32 %a4, i32 3
- %res1 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v1,
- <32 x i8> undef, <16 x i8> undef, i32 1)
- %res2 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v2,
- <32 x i8> undef, <16 x i8> undef, i32 2)
- %res3 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v3,
- <32 x i8> undef, <16 x i8> undef, i32 3)
- %res4 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v4,
- <32 x i8> undef, <16 x i8> undef, i32 4)
- %res5 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v5,
- <32 x i8> undef, <16 x i8> undef, i32 5)
- %res6 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v6,
- <32 x i8> undef, <16 x i8> undef, i32 6)
- %res7 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v7,
- <32 x i8> undef, <16 x i8> undef, i32 7)
- %res8 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v8,
- <32 x i8> undef, <16 x i8> undef, i32 8)
- %res9 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v9,
- <32 x i8> undef, <16 x i8> undef, i32 9)
- %res10 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v10,
- <32 x i8> undef, <16 x i8> undef, i32 10)
- %res11 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v11,
- <32 x i8> undef, <16 x i8> undef, i32 11)
- %res12 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v12,
- <32 x i8> undef, <16 x i8> undef, i32 12)
- %res13 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v13,
- <32 x i8> undef, <16 x i8> undef, i32 13)
- %res14 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v14,
- <32 x i8> undef, <16 x i8> undef, i32 14)
- %res15 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v15,
- <32 x i8> undef, <16 x i8> undef, i32 15)
- %res16 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v16,
- <32 x i8> undef, <16 x i8> undef, i32 16)
- %e1 = extractelement <4 x float> %res1, i32 0
- %e2 = extractelement <4 x float> %res2, i32 1
- %e3 = extractelement <4 x float> %res3, i32 2
- %e4 = extractelement <4 x float> %res4, i32 3
- %t0 = extractelement <4 x float> %res5, i32 0
- %t1 = extractelement <4 x float> %res5, i32 1
- %e5 = fadd float %t0, %t1
- %t2 = extractelement <4 x float> %res6, i32 0
- %t3 = extractelement <4 x float> %res6, i32 2
- %e6 = fadd float %t2, %t3
- %t4 = extractelement <4 x float> %res7, i32 0
- %t5 = extractelement <4 x float> %res7, i32 3
- %e7 = fadd float %t4, %t5
- %t6 = extractelement <4 x float> %res8, i32 1
- %t7 = extractelement <4 x float> %res8, i32 2
- %e8 = fadd float %t6, %t7
- %t8 = extractelement <4 x float> %res9, i32 1
- %t9 = extractelement <4 x float> %res9, i32 3
- %e9 = fadd float %t8, %t9
- %t10 = extractelement <4 x float> %res10, i32 2
- %t11 = extractelement <4 x float> %res10, i32 3
- %e10 = fadd float %t10, %t11
- %t12 = extractelement <4 x float> %res11, i32 0
- %t13 = extractelement <4 x float> %res11, i32 1
- %t14 = extractelement <4 x float> %res11, i32 2
- %t15 = fadd float %t12, %t13
- %e11 = fadd float %t14, %t15
- %t16 = extractelement <4 x float> %res12, i32 0
- %t17 = extractelement <4 x float> %res12, i32 1
- %t18 = extractelement <4 x float> %res12, i32 3
- %t19 = fadd float %t16, %t17
- %e12 = fadd float %t18, %t19
- %t20 = extractelement <4 x float> %res13, i32 0
- %t21 = extractelement <4 x float> %res13, i32 2
- %t22 = extractelement <4 x float> %res13, i32 3
- %t23 = fadd float %t20, %t21
- %e13 = fadd float %t22, %t23
- %t24 = extractelement <4 x float> %res14, i32 1
- %t25 = extractelement <4 x float> %res14, i32 2
- %t26 = extractelement <4 x float> %res14, i32 3
- %t27 = fadd float %t24, %t25
- %e14 = fadd float %t26, %t27
- %t28 = extractelement <4 x float> %res15, i32 0
- %t29 = extractelement <4 x float> %res15, i32 1
- %t30 = extractelement <4 x float> %res15, i32 2
- %t31 = extractelement <4 x float> %res15, i32 3
- %t32 = fadd float %t28, %t29
- %t33 = fadd float %t30, %t31
- %e15 = fadd float %t32, %t33
- %e16 = extractelement <4 x float> %res16, i32 3
- %s1 = fadd float %e1, %e2
- %s2 = fadd float %s1, %e3
- %s3 = fadd float %s2, %e4
- %s4 = fadd float %s3, %e5
- %s5 = fadd float %s4, %e6
- %s6 = fadd float %s5, %e7
- %s7 = fadd float %s6, %e8
- %s8 = fadd float %s7, %e9
- %s9 = fadd float %s8, %e10
- %s10 = fadd float %s9, %e11
- %s11 = fadd float %s10, %e12
- %s12 = fadd float %s11, %e13
- %s13 = fadd float %s12, %e14
- %s14 = fadd float %s13, %e15
- %s15 = fadd float %s14, %e16
- call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %s15, float %s15, float %s15, float %s15)
- ret void
-}
-
-declare <4 x float> @llvm.SI.sampled.(<4 x i32>, <32 x i8>, <16 x i8>, i32) readnone
-
-declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
-
-attributes #0 = { "ShaderType"="0" }
Removed: llvm/trunk/test/CodeGen/R600/llvm.SI.sendmsg-m0.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/llvm.SI.sendmsg-m0.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/llvm.SI.sendmsg-m0.ll (original)
+++ llvm/trunk/test/CodeGen/R600/llvm.SI.sendmsg-m0.ll (removed)
@@ -1,20 +0,0 @@
-;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=BOTH %s
-;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=VI --check-prefix=BOTH %s
-
-; BOTH-LABEL: {{^}}main:
-; BOTH: s_mov_b32 m0, s0
-; VI-NEXT: s_nop 0
-; BOTH-NEXT: s_sendmsg Gs_done(nop)
-; BOTH-NEXT: s_endpgm
-
-define void @main(i32 inreg %a) #0 {
-main_body:
- call void @llvm.SI.sendmsg(i32 3, i32 %a)
- ret void
-}
-
-; Function Attrs: nounwind
-declare void @llvm.SI.sendmsg(i32, i32) #1
-
-attributes #0 = { "ShaderType"="2" "unsafe-fp-math"="true" }
-attributes #1 = { nounwind }
Removed: llvm/trunk/test/CodeGen/R600/llvm.SI.sendmsg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/llvm.SI.sendmsg.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/llvm.SI.sendmsg.ll (original)
+++ llvm/trunk/test/CodeGen/R600/llvm.SI.sendmsg.ll (removed)
@@ -1,24 +0,0 @@
-;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
-;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
-
-; CHECK-LABEL: {{^}}main:
-; CHECK: s_mov_b32 m0, 0
-; CHECK-NOT: s_mov_b32 m0
-; CHECK: s_sendmsg Gs(emit stream 0)
-; CHECK: s_sendmsg Gs(cut stream 1)
-; CHECK: s_sendmsg Gs(emit-cut stream 2)
-; CHECK: s_sendmsg Gs_done(nop)
-
-define void @main() {
-main_body:
- call void @llvm.SI.sendmsg(i32 34, i32 0);
- call void @llvm.SI.sendmsg(i32 274, i32 0);
- call void @llvm.SI.sendmsg(i32 562, i32 0);
- call void @llvm.SI.sendmsg(i32 3, i32 0);
- ret void
-}
-
-; Function Attrs: nounwind
-declare void @llvm.SI.sendmsg(i32, i32) #0
-
-attributes #0 = { nounwind }
Removed: llvm/trunk/test/CodeGen/R600/llvm.SI.tbuffer.store.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/llvm.SI.tbuffer.store.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/llvm.SI.tbuffer.store.ll (original)
+++ llvm/trunk/test/CodeGen/R600/llvm.SI.tbuffer.store.ll (removed)
@@ -1,47 +0,0 @@
-;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
-;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
-
-;CHECK-LABEL: {{^}}test1:
-;CHECK: tbuffer_store_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, 0x20, -1, 0, -1, 0, 14, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0
-define void @test1(i32 %a1, i32 %vaddr) #0 {
- %vdata = insertelement <4 x i32> undef, i32 %a1, i32 0
- call void @llvm.SI.tbuffer.store.v4i32(<16 x i8> undef, <4 x i32> %vdata,
- i32 4, i32 %vaddr, i32 0, i32 32, i32 14, i32 4, i32 1, i32 0, i32 1,
- i32 1, i32 0)
- ret void
-}
-
-;CHECK-LABEL: {{^}}test2:
-;CHECK: tbuffer_store_format_xyz {{v\[[0-9]+:[0-9]+\]}}, 0x18, -1, 0, -1, 0, 13, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0
-define void @test2(i32 %a1, i32 %vaddr) #0 {
- %vdata = insertelement <4 x i32> undef, i32 %a1, i32 0
- call void @llvm.SI.tbuffer.store.v4i32(<16 x i8> undef, <4 x i32> %vdata,
- i32 3, i32 %vaddr, i32 0, i32 24, i32 13, i32 4, i32 1, i32 0, i32 1,
- i32 1, i32 0)
- ret void
-}
-
-;CHECK-LABEL: {{^}}test3:
-;CHECK: tbuffer_store_format_xy {{v\[[0-9]+:[0-9]+\]}}, 0x10, -1, 0, -1, 0, 11, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0
-define void @test3(i32 %a1, i32 %vaddr) #0 {
- %vdata = insertelement <2 x i32> undef, i32 %a1, i32 0
- call void @llvm.SI.tbuffer.store.v2i32(<16 x i8> undef, <2 x i32> %vdata,
- i32 2, i32 %vaddr, i32 0, i32 16, i32 11, i32 4, i32 1, i32 0, i32 1,
- i32 1, i32 0)
- ret void
-}
-
-;CHECK-LABEL: {{^}}test4:
-;CHECK: tbuffer_store_format_x {{v[0-9]+}}, 0x8, -1, 0, -1, 0, 4, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0
-define void @test4(i32 %vdata, i32 %vaddr) #0 {
- call void @llvm.SI.tbuffer.store.i32(<16 x i8> undef, i32 %vdata,
- i32 1, i32 %vaddr, i32 0, i32 8, i32 4, i32 4, i32 1, i32 0, i32 1,
- i32 1, i32 0)
- ret void
-}
-
-declare void @llvm.SI.tbuffer.store.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
-declare void @llvm.SI.tbuffer.store.v2i32(<16 x i8>, <2 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
-declare void @llvm.SI.tbuffer.store.v4i32(<16 x i8>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
-
-attributes #0 = { "ShaderType"="1" }
Removed: llvm/trunk/test/CodeGen/R600/llvm.SI.tid.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/llvm.SI.tid.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/llvm.SI.tid.ll (original)
+++ llvm/trunk/test/CodeGen/R600/llvm.SI.tid.ll (removed)
@@ -1,18 +0,0 @@
-;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=GCN %s
-;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=VI --check-prefix=GCN %s
-
-;GCN: v_mbcnt_lo_u32_b32_e64
-;SI: v_mbcnt_hi_u32_b32_e32
-;VI: v_mbcnt_hi_u32_b32_e64
-
-define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg) "ShaderType"="0" {
-main_body:
- %4 = call i32 @llvm.SI.tid()
- %5 = bitcast i32 %4 to float
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %5, float %5, float %5, float %5)
- ret void
-}
-
-declare i32 @llvm.SI.tid() readnone
-
-declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
Removed: llvm/trunk/test/CodeGen/R600/llvm.amdgpu.dp4.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/llvm.amdgpu.dp4.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/llvm.amdgpu.dp4.ll (original)
+++ llvm/trunk/test/CodeGen/R600/llvm.amdgpu.dp4.ll (removed)
@@ -1,11 +0,0 @@
-; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s
-
-declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) nounwind readnone
-
-define void @test_dp4(float addrspace(1)* %out, <4 x float> addrspace(1)* %a, <4 x float> addrspace(1)* %b) nounwind {
- %src0 = load <4 x float>, <4 x float> addrspace(1)* %a, align 16
- %src1 = load <4 x float>, <4 x float> addrspace(1)* %b, align 16
- %dp4 = call float @llvm.AMDGPU.dp4(<4 x float> %src0, <4 x float> %src1) nounwind readnone
- store float %dp4, float addrspace(1)* %out, align 4
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/llvm.amdgpu.kilp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/llvm.amdgpu.kilp.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/llvm.amdgpu.kilp.ll (original)
+++ llvm/trunk/test/CodeGen/R600/llvm.amdgpu.kilp.ll (removed)
@@ -1,21 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
-
-; SI-LABEL: {{^}}kilp_gs_const:
-; SI: s_mov_b64 exec, 0
-define void @kilp_gs_const() #0 {
-main_body:
- %0 = icmp ule i32 0, 3
- %1 = select i1 %0, float 1.000000e+00, float -1.000000e+00
- call void @llvm.AMDGPU.kilp(float %1)
- %2 = icmp ule i32 3, 0
- %3 = select i1 %2, float 1.000000e+00, float -1.000000e+00
- call void @llvm.AMDGPU.kilp(float %3)
- ret void
-}
-
-declare void @llvm.AMDGPU.kilp(float)
-
-attributes #0 = { "ShaderType"="2" }
-
-!0 = !{!"const", null, i32 1}
Removed: llvm/trunk/test/CodeGen/R600/llvm.amdgpu.lrp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/llvm.amdgpu.lrp.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/llvm.amdgpu.lrp.ll (original)
+++ llvm/trunk/test/CodeGen/R600/llvm.amdgpu.lrp.ll (removed)
@@ -1,13 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-
-declare float @llvm.AMDGPU.lrp(float, float, float) nounwind readnone
-
-; FUNC-LABEL: {{^}}test_lrp:
-; SI: v_sub_f32
-; SI: v_mad_f32
-define void @test_lrp(float addrspace(1)* %out, float %src0, float %src1, float %src2) nounwind {
- %mad = call float @llvm.AMDGPU.lrp(float %src0, float %src1, float %src2) nounwind readnone
- store float %mad, float addrspace(1)* %out, align 4
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/llvm.cos.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/llvm.cos.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/llvm.cos.ll (original)
+++ llvm/trunk/test/CodeGen/R600/llvm.cos.ll (removed)
@@ -1,41 +0,0 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -check-prefix=EG -check-prefix=FUNC
-;RUN: llc < %s -march=amdgcn -mcpu=SI | FileCheck %s -check-prefix=SI -check-prefix=FUNC
-;RUN: llc < %s -march=amdgcn -mcpu=tonga | FileCheck %s -check-prefix=SI -check-prefix=FUNC
-
-;FUNC-LABEL: test
-;EG: MULADD_IEEE *
-;EG: FRACT *
-;EG: ADD *
-;EG: COS * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
-;EG-NOT: COS
-;SI: v_cos_f32
-;SI-NOT: v_cos_f32
-
-define void @test(float addrspace(1)* %out, float %x) #1 {
- %cos = call float @llvm.cos.f32(float %x)
- store float %cos, float addrspace(1)* %out
- ret void
-}
-
-;FUNC-LABEL: testv
-;EG: COS * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
-;EG: COS * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
-;EG: COS * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
-;EG: COS * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
-;EG-NOT: COS
-;SI: v_cos_f32
-;SI: v_cos_f32
-;SI: v_cos_f32
-;SI: v_cos_f32
-;SI-NOT: v_cos_f32
-
-define void @testv(<4 x float> addrspace(1)* %out, <4 x float> inreg %vx) #1 {
- %cos = call <4 x float> @llvm.cos.v4f32(<4 x float> %vx)
- store <4 x float> %cos, <4 x float> addrspace(1)* %out
- ret void
-}
-
-declare float @llvm.cos.f32(float) readnone
-declare <4 x float> @llvm.cos.v4f32(<4 x float>) readnone
-
-attributes #0 = { "ShaderType"="0" }
Removed: llvm/trunk/test/CodeGen/R600/llvm.exp2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/llvm.exp2.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/llvm.exp2.ll (original)
+++ llvm/trunk/test/CodeGen/R600/llvm.exp2.ll (removed)
@@ -1,80 +0,0 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC
-;RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=CM --check-prefix=FUNC
-;RUN: llc < %s -march=amdgcn -mcpu=SI | FileCheck %s --check-prefix=SI --check-prefix=FUNC
-;RUN: llc < %s -march=amdgcn -mcpu=tonga | FileCheck %s --check-prefix=SI --check-prefix=FUNC
-
-;FUNC-LABEL: {{^}}test:
-;EG: EXP_IEEE
-;CM-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
-;CM-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
-;CM-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
-;CM-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}}
-;SI: v_exp_f32
-
-define void @test(float addrspace(1)* %out, float %in) {
-entry:
- %0 = call float @llvm.exp2.f32(float %in)
- store float %0, float addrspace(1)* %out
- ret void
-}
-
-;FUNC-LABEL: {{^}}testv2:
-;EG: EXP_IEEE
-;EG: EXP_IEEE
-; FIXME: We should be able to merge these packets together on Cayman so we
-; have a maximum of 4 instructions.
-;CM-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
-;CM-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
-;CM-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
-;CM-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
-;CM-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
-;CM-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
-;CM-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}}
-;CM-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}}
-;SI: v_exp_f32
-;SI: v_exp_f32
-
-define void @testv2(<2 x float> addrspace(1)* %out, <2 x float> %in) {
-entry:
- %0 = call <2 x float> @llvm.exp2.v2f32(<2 x float> %in)
- store <2 x float> %0, <2 x float> addrspace(1)* %out
- ret void
-}
-
-;FUNC-LABEL: {{^}}testv4:
-;EG: EXP_IEEE
-;EG: EXP_IEEE
-;EG: EXP_IEEE
-;EG: EXP_IEEE
-; FIXME: We should be able to merge these packets together on Cayman so we
-; have a maximum of 4 instructions.
-;CM-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
-;CM-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
-;CM-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
-;CM-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
-;CM-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
-;CM-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
-;CM-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
-;CM-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
-;CM-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
-;CM-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
-;CM-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
-;CM-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
-;CM-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}}
-;CM-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}}
-;CM-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}}
-;CM-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}}
-;SI: v_exp_f32
-;SI: v_exp_f32
-;SI: v_exp_f32
-;SI: v_exp_f32
-define void @testv4(<4 x float> addrspace(1)* %out, <4 x float> %in) {
-entry:
- %0 = call <4 x float> @llvm.exp2.v4f32(<4 x float> %in)
- store <4 x float> %0, <4 x float> addrspace(1)* %out
- ret void
-}
-
-declare float @llvm.exp2.f32(float) readnone
-declare <2 x float> @llvm.exp2.v2f32(<2 x float>) readnone
-declare <4 x float> @llvm.exp2.v4f32(<4 x float>) readnone
Removed: llvm/trunk/test/CodeGen/R600/llvm.log2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/llvm.log2.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/llvm.log2.ll (original)
+++ llvm/trunk/test/CodeGen/R600/llvm.log2.ll (removed)
@@ -1,80 +0,0 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC
-;RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=CM --check-prefix=FUNC
-;RUN: llc < %s -march=amdgcn -mcpu=SI | FileCheck %s --check-prefix=SI --check-prefix=FUNC
-;RUN: llc < %s -march=amdgcn -mcpu=tonga | FileCheck %s --check-prefix=SI --check-prefix=FUNC
-
-;FUNC-LABEL: {{^}}test:
-;EG: LOG_IEEE
-;CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
-;CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
-;CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
-;CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}}
-;SI: v_log_f32
-
-define void @test(float addrspace(1)* %out, float %in) {
-entry:
- %0 = call float @llvm.log2.f32(float %in)
- store float %0, float addrspace(1)* %out
- ret void
-}
-
-;FUNC-LABEL: {{^}}testv2:
-;EG: LOG_IEEE
-;EG: LOG_IEEE
-; FIXME: We should be able to merge these packets together on Cayman so we
-; have a maximum of 4 instructions.
-;CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
-;CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
-;CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
-;CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
-;CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
-;CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
-;CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}}
-;CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}}
-;SI: v_log_f32
-;SI: v_log_f32
-
-define void @testv2(<2 x float> addrspace(1)* %out, <2 x float> %in) {
-entry:
- %0 = call <2 x float> @llvm.log2.v2f32(<2 x float> %in)
- store <2 x float> %0, <2 x float> addrspace(1)* %out
- ret void
-}
-
-;FUNC-LABEL: {{^}}testv4:
-;EG: LOG_IEEE
-;EG: LOG_IEEE
-;EG: LOG_IEEE
-;EG: LOG_IEEE
-; FIXME: We should be able to merge these packets together on Cayman so we
-; have a maximum of 4 instructions.
-;CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
-;CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
-;CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
-;CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
-;CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
-;CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
-;CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
-;CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
-;CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
-;CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
-;CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
-;CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
-;CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}}
-;CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}}
-;CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}}
-;CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}}
-;SI: v_log_f32
-;SI: v_log_f32
-;SI: v_log_f32
-;SI: v_log_f32
-define void @testv4(<4 x float> addrspace(1)* %out, <4 x float> %in) {
-entry:
- %0 = call <4 x float> @llvm.log2.v4f32(<4 x float> %in)
- store <4 x float> %0, <4 x float> addrspace(1)* %out
- ret void
-}
-
-declare float @llvm.log2.f32(float) readnone
-declare <2 x float> @llvm.log2.v2f32(<2 x float>) readnone
-declare <4 x float> @llvm.log2.v4f32(<4 x float>) readnone
Removed: llvm/trunk/test/CodeGen/R600/llvm.memcpy.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/llvm.memcpy.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/llvm.memcpy.ll (original)
+++ llvm/trunk/test/CodeGen/R600/llvm.memcpy.ll (removed)
@@ -1,365 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-
-declare void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* nocapture, i8 addrspace(3)* nocapture, i32, i32, i1) nounwind
-declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture, i64, i32, i1) nounwind
-
-
-; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align1:
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-
-; SI: s_endpgm
-define void @test_small_memcpy_i64_lds_to_lds_align1(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
- %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
- %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
- call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 1, i1 false) nounwind
- ret void
-}
-
-; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align2:
-; SI: ds_read_u16
-; SI: ds_read_u16
-; SI: ds_read_u16
-; SI: ds_read_u16
-; SI: ds_read_u16
-; SI: ds_read_u16
-; SI: ds_read_u16
-; SI: ds_read_u16
-
-; SI: ds_read_u16
-; SI: ds_read_u16
-; SI: ds_read_u16
-; SI: ds_read_u16
-; SI: ds_read_u16
-; SI: ds_read_u16
-; SI: ds_read_u16
-; SI: ds_read_u16
-
-; SI: ds_write_b16
-; SI: ds_write_b16
-; SI: ds_write_b16
-; SI: ds_write_b16
-; SI: ds_write_b16
-; SI: ds_write_b16
-; SI: ds_write_b16
-; SI: ds_write_b16
-
-; SI: ds_write_b16
-; SI: ds_write_b16
-; SI: ds_write_b16
-; SI: ds_write_b16
-; SI: ds_write_b16
-; SI: ds_write_b16
-; SI: ds_write_b16
-; SI: ds_write_b16
-
-; SI: s_endpgm
-define void @test_small_memcpy_i64_lds_to_lds_align2(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
- %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
- %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
- call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 2, i1 false) nounwind
- ret void
-}
-
-; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align4:
-; SI-DAG: ds_read_b32
-; SI-DAG: ds_write_b32
-
-; SI-DAG: ds_read_b32
-; SI-DAG: ds_write_b32
-
-; SI-DAG: ds_read_b32
-; SI-DAG: ds_write_b32
-
-; SI-DAG: ds_read_b32
-; SI-DAG: ds_write_b32
-
-; SI-DAG: ds_read_b32
-; SI-DAG: ds_write_b32
-
-; SI-DAG: ds_read_b32
-; SI-DAG: ds_write_b32
-
-; SI-DAG: ds_read_b32
-; SI-DAG: ds_write_b32
-
-; SI-DAG: ds_read_b32
-; SI-DAG: ds_write_b32
-
-; SI-DAG: ds_read_b32
-; SI-DAG: ds_write_b32
-
-; SI: s_endpgm
-define void @test_small_memcpy_i64_lds_to_lds_align4(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
- %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
- %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
- call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 4, i1 false) nounwind
- ret void
-}
-
-; FIXME: Use 64-bit ops
-; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align8:
-
-; SI-DAG: ds_read_b32
-; SI-DAG: ds_write_b32
-
-; SI-DAG: ds_read_b32
-; SI-DAG: ds_write_b32
-
-; SI-DAG: ds_read_b32
-; SI-DAG: ds_write_b32
-
-; SI-DAG: ds_read_b32
-; SI-DAG: ds_write_b32
-
-; SI-DAG: ds_read_b32
-; SI-DAG: ds_write_b32
-
-; SI-DAG: ds_read_b32
-; SI-DAG: ds_write_b32
-
-; SI-DAG: ds_read_b32
-; SI-DAG: ds_write_b32
-
-; SI-DAG: ds_read_b32
-; SI-DAG: ds_write_b32
-
-; SI-DAG: ds_read_b32
-; SI-DAG: ds_write_b32
-
-; SI-DAG: s_endpgm
-define void @test_small_memcpy_i64_lds_to_lds_align8(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
- %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
- %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
- call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 8, i1 false) nounwind
- ret void
-}
-
-; FUNC-LABEL: {{^}}test_small_memcpy_i64_global_to_global_align1:
-; SI-DAG: buffer_load_ubyte
-; SI-DAG: buffer_store_byte
-; SI-DAG: buffer_load_ubyte
-; SI-DAG: buffer_store_byte
-; SI-DAG: buffer_load_ubyte
-; SI-DAG: buffer_store_byte
-; SI-DAG: buffer_load_ubyte
-; SI-DAG: buffer_store_byte
-; SI-DAG: buffer_load_ubyte
-; SI-DAG: buffer_store_byte
-; SI-DAG: buffer_load_ubyte
-; SI-DAG: buffer_store_byte
-; SI-DAG: buffer_load_ubyte
-; SI-DAG: buffer_store_byte
-; SI-DAG: buffer_load_ubyte
-; SI-DAG: buffer_store_byte
-
-; SI-DAG: buffer_load_ubyte
-; SI-DAG: buffer_store_byte
-; SI-DAG: buffer_load_ubyte
-; SI-DAG: buffer_store_byte
-; SI-DAG: buffer_load_ubyte
-; SI-DAG: buffer_store_byte
-; SI-DAG: buffer_load_ubyte
-; SI-DAG: buffer_store_byte
-; SI-DAG: buffer_load_ubyte
-; SI-DAG: buffer_store_byte
-; SI-DAG: buffer_load_ubyte
-; SI-DAG: buffer_store_byte
-; SI-DAG: buffer_load_ubyte
-; SI-DAG: buffer_store_byte
-; SI-DAG: buffer_load_ubyte
-; SI-DAG: buffer_store_byte
-
-; SI-DAG: buffer_load_ubyte
-; SI-DAG: buffer_store_byte
-; SI-DAG: buffer_load_ubyte
-; SI-DAG: buffer_store_byte
-; SI-DAG: buffer_load_ubyte
-; SI-DAG: buffer_store_byte
-; SI-DAG: buffer_load_ubyte
-; SI-DAG: buffer_store_byte
-; SI-DAG: buffer_load_ubyte
-; SI-DAG: buffer_store_byte
-; SI-DAG: buffer_load_ubyte
-; SI-DAG: buffer_store_byte
-; SI-DAG: buffer_load_ubyte
-; SI-DAG: buffer_store_byte
-; SI-DAG: buffer_load_ubyte
-; SI-DAG: buffer_store_byte
-
-; SI-DAG: buffer_load_ubyte
-; SI-DAG: buffer_store_byte
-; SI-DAG: buffer_load_ubyte
-; SI-DAG: buffer_store_byte
-; SI-DAG: buffer_load_ubyte
-; SI-DAG: buffer_store_byte
-; SI-DAG: buffer_load_ubyte
-; SI-DAG: buffer_store_byte
-; SI-DAG: buffer_load_ubyte
-; SI-DAG: buffer_store_byte
-; SI-DAG: buffer_load_ubyte
-; SI-DAG: buffer_store_byte
-; SI-DAG: buffer_load_ubyte
-; SI-DAG: buffer_store_byte
-; SI-DAG: buffer_load_ubyte
-; SI-DAG: buffer_store_byte
-
-; SI: s_endpgm
-define void @test_small_memcpy_i64_global_to_global_align1(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
- %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
- %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
- call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 1, i1 false) nounwind
- ret void
-}
-
-; FUNC-LABEL: {{^}}test_small_memcpy_i64_global_to_global_align2:
-; SI-DAG: buffer_load_ushort
-; SI-DAG: buffer_load_ushort
-; SI-DAG: buffer_load_ushort
-; SI-DAG: buffer_load_ushort
-; SI-DAG: buffer_load_ushort
-; SI-DAG: buffer_load_ushort
-; SI-DAG: buffer_load_ushort
-; SI-DAG: buffer_load_ushort
-; SI-DAG: buffer_load_ushort
-; SI-DAG: buffer_load_ushort
-; SI-DAG: buffer_load_ushort
-; SI-DAG: buffer_load_ushort
-; SI-DAG: buffer_load_ushort
-; SI-DAG: buffer_load_ushort
-; SI-DAG: buffer_load_ushort
-; SI-DAG: buffer_load_ushort
-
-; SI-DAG: buffer_store_short
-; SI-DAG: buffer_store_short
-; SI-DAG: buffer_store_short
-; SI-DAG: buffer_store_short
-; SI-DAG: buffer_store_short
-; SI-DAG: buffer_store_short
-; SI-DAG: buffer_store_short
-; SI-DAG: buffer_store_short
-; SI-DAG: buffer_store_short
-; SI-DAG: buffer_store_short
-; SI-DAG: buffer_store_short
-; SI-DAG: buffer_store_short
-; SI-DAG: buffer_store_short
-; SI-DAG: buffer_store_short
-; SI-DAG: buffer_store_short
-; SI-DAG: buffer_store_short
-
-; SI: s_endpgm
-define void @test_small_memcpy_i64_global_to_global_align2(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
- %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
- %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
- call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 2, i1 false) nounwind
- ret void
-}
-
-; FUNC-LABEL: {{^}}test_small_memcpy_i64_global_to_global_align4:
-; SI: buffer_load_dwordx4
-; SI: buffer_load_dwordx4
-; SI: buffer_store_dwordx4
-; SI: buffer_store_dwordx4
-; SI: s_endpgm
-define void @test_small_memcpy_i64_global_to_global_align4(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
- %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
- %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
- call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 4, i1 false) nounwind
- ret void
-}
-
-; FUNC-LABEL: {{^}}test_small_memcpy_i64_global_to_global_align8:
-; SI: buffer_load_dwordx4
-; SI: buffer_load_dwordx4
-; SI: buffer_store_dwordx4
-; SI: buffer_store_dwordx4
-; SI: s_endpgm
-define void @test_small_memcpy_i64_global_to_global_align8(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
- %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
- %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
- call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 8, i1 false) nounwind
- ret void
-}
-
-; FUNC-LABEL: {{^}}test_small_memcpy_i64_global_to_global_align16:
-; SI: buffer_load_dwordx4
-; SI: buffer_load_dwordx4
-; SI: buffer_store_dwordx4
-; SI: buffer_store_dwordx4
-; SI: s_endpgm
-define void @test_small_memcpy_i64_global_to_global_align16(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
- %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
- %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
- call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 16, i1 false) nounwind
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/llvm.pow.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/llvm.pow.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/llvm.pow.ll (original)
+++ llvm/trunk/test/CodeGen/R600/llvm.pow.ll (removed)
@@ -1,40 +0,0 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-
-;CHECK-LABEL: test1:
-;CHECK: LOG_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}},
-;CHECK-NEXT: MUL NON-IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}},
-;CHECK-NEXT: EXP_IEEE * T{{[0-9]+\.[XYZW], PV\.[XYZW]}},
-
-define void @test1(<4 x float> inreg %reg0) #0 {
- %r0 = extractelement <4 x float> %reg0, i32 0
- %r1 = extractelement <4 x float> %reg0, i32 1
- %r2 = call float @llvm.pow.f32( float %r0, float %r1)
- %vec = insertelement <4 x float> undef, float %r2, i32 0
- call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
- ret void
-}
-
-;CHECK-LABEL: test2:
-;CHECK: LOG_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}},
-;CHECK-NEXT: MUL NON-IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}},
-;CHECK-NEXT: LOG_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}},
-;CHECK-NEXT: MUL NON-IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}},
-;CHECK-NEXT: EXP_IEEE * T{{[0-9]+\.[XYZW], PV\.[XYZW]}},
-;CHECK-NEXT: EXP_IEEE * T{{[0-9]+\.[XYZW], PV\.[XYZW]}},
-;CHECK-NEXT: LOG_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}},
-;CHECK-NEXT: MUL NON-IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}},
-;CHECK-NEXT: LOG_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}},
-;CHECK-NEXT: MUL NON-IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}},
-;CHECK-NEXT: EXP_IEEE * T{{[0-9]+\.[XYZW], PV\.[XYZW]}},
-;CHECK-NEXT: EXP_IEEE * T{{[0-9]+\.[XYZW], PV\.[XYZW]}},
-define void @test2(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 {
- %vec = call <4 x float> @llvm.pow.v4f32( <4 x float> %reg0, <4 x float> %reg1)
- call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
- ret void
-}
-
-declare float @llvm.pow.f32(float ,float ) readonly
-declare <4 x float> @llvm.pow.v4f32(<4 x float> ,<4 x float> ) readonly
-declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
-
-attributes #0 = { "ShaderType"="0" }
Removed: llvm/trunk/test/CodeGen/R600/llvm.rint.f64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/llvm.rint.f64.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/llvm.rint.f64.ll (original)
+++ llvm/trunk/test/CodeGen/R600/llvm.rint.f64.ll (removed)
@@ -1,46 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-
-; FUNC-LABEL: {{^}}rint_f64:
-; CI: v_rndne_f64_e32
-
-; SI-DAG: v_add_f64
-; SI-DAG: v_add_f64
-; SI-DAG v_cmp_gt_f64_e64
-; SI: v_cndmask_b32
-; SI: v_cndmask_b32
-; SI: s_endpgm
-define void @rint_f64(double addrspace(1)* %out, double %in) {
-entry:
- %0 = call double @llvm.rint.f64(double %in)
- store double %0, double addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}rint_v2f64:
-; CI: v_rndne_f64_e32
-; CI: v_rndne_f64_e32
-define void @rint_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %in) {
-entry:
- %0 = call <2 x double> @llvm.rint.v2f64(<2 x double> %in)
- store <2 x double> %0, <2 x double> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}rint_v4f64:
-; CI: v_rndne_f64_e32
-; CI: v_rndne_f64_e32
-; CI: v_rndne_f64_e32
-; CI: v_rndne_f64_e32
-define void @rint_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %in) {
-entry:
- %0 = call <4 x double> @llvm.rint.v4f64(<4 x double> %in)
- store <4 x double> %0, <4 x double> addrspace(1)* %out
- ret void
-}
-
-
-declare double @llvm.rint.f64(double) #0
-declare <2 x double> @llvm.rint.v2f64(<2 x double>) #0
-declare <4 x double> @llvm.rint.v4f64(<4 x double>) #0
Removed: llvm/trunk/test/CodeGen/R600/llvm.rint.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/llvm.rint.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/llvm.rint.ll (original)
+++ llvm/trunk/test/CodeGen/R600/llvm.rint.ll (removed)
@@ -1,62 +0,0 @@
-; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s -check-prefix=R600 -check-prefix=FUNC
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-
-; FUNC-LABEL: {{^}}rint_f32:
-; R600: RNDNE
-
-; SI: v_rndne_f32_e32
-define void @rint_f32(float addrspace(1)* %out, float %in) {
-entry:
- %0 = call float @llvm.rint.f32(float %in) #0
- store float %0, float addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}rint_v2f32:
-; R600: RNDNE
-; R600: RNDNE
-
-; SI: v_rndne_f32_e32
-; SI: v_rndne_f32_e32
-define void @rint_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
-entry:
- %0 = call <2 x float> @llvm.rint.v2f32(<2 x float> %in) #0
- store <2 x float> %0, <2 x float> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}rint_v4f32:
-; R600: RNDNE
-; R600: RNDNE
-; R600: RNDNE
-; R600: RNDNE
-
-; SI: v_rndne_f32_e32
-; SI: v_rndne_f32_e32
-; SI: v_rndne_f32_e32
-; SI: v_rndne_f32_e32
-define void @rint_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) {
-entry:
- %0 = call <4 x float> @llvm.rint.v4f32(<4 x float> %in) #0
- store <4 x float> %0, <4 x float> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}legacy_amdil_round_nearest_f32:
-; R600: RNDNE
-
-; SI: v_rndne_f32_e32
-define void @legacy_amdil_round_nearest_f32(float addrspace(1)* %out, float %in) {
-entry:
- %0 = call float @llvm.AMDIL.round.nearest.f32(float %in) #0
- store float %0, float addrspace(1)* %out
- ret void
-}
-
-declare float @llvm.AMDIL.round.nearest.f32(float) #0
-declare float @llvm.rint.f32(float) #0
-declare <2 x float> @llvm.rint.v2f32(<2 x float>) #0
-declare <4 x float> @llvm.rint.v4f32(<4 x float>) #0
-
-attributes #0 = { nounwind readnone }
Removed: llvm/trunk/test/CodeGen/R600/llvm.round.f64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/llvm.round.f64.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/llvm.round.f64.ll (original)
+++ llvm/trunk/test/CodeGen/R600/llvm.round.f64.ll (removed)
@@ -1,74 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-
-; FUNC-LABEL: {{^}}round_f64:
-; SI: s_endpgm
-define void @round_f64(double addrspace(1)* %out, double %x) #0 {
- %result = call double @llvm.round.f64(double %x) #1
- store double %result, double addrspace(1)* %out
- ret void
-}
-
-; This is a pretty large function, so just test a few of the
-; instructions that are necessary.
-
-; FUNC-LABEL: {{^}}v_round_f64:
-; SI: buffer_load_dwordx2
-; SI: v_bfe_u32 [[EXP:v[0-9]+]], v{{[0-9]+}}, 20, 11
-
-; SI-DAG: v_not_b32_e32
-; SI-DAG: v_not_b32_e32
-
-; SI-DAG: v_cmp_eq_i32
-
-; SI-DAG: s_mov_b32 [[BFIMASK:s[0-9]+]], 0x7fffffff
-; SI-DAG: v_cmp_gt_i32_e64
-; SI-DAG: v_bfi_b32 [[COPYSIGN:v[0-9]+]], [[BFIMASK]]
-
-; SI-DAG: v_cmp_gt_i32_e64
-
-
-; SI: buffer_store_dwordx2
-; SI: s_endpgm
-define void @v_round_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
- %tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep = getelementptr double, double addrspace(1)* %in, i32 %tid
- %out.gep = getelementptr double, double addrspace(1)* %out, i32 %tid
- %x = load double, double addrspace(1)* %gep
- %result = call double @llvm.round.f64(double %x) #1
- store double %result, double addrspace(1)* %out.gep
- ret void
-}
-
-; FUNC-LABEL: {{^}}round_v2f64:
-; SI: s_endpgm
-define void @round_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %in) #0 {
- %result = call <2 x double> @llvm.round.v2f64(<2 x double> %in) #1
- store <2 x double> %result, <2 x double> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}round_v4f64:
-; SI: s_endpgm
-define void @round_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %in) #0 {
- %result = call <4 x double> @llvm.round.v4f64(<4 x double> %in) #1
- store <4 x double> %result, <4 x double> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}round_v8f64:
-; SI: s_endpgm
-define void @round_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %in) #0 {
- %result = call <8 x double> @llvm.round.v8f64(<8 x double> %in) #1
- store <8 x double> %result, <8 x double> addrspace(1)* %out
- ret void
-}
-
-declare i32 @llvm.r600.read.tidig.x() #1
-
-declare double @llvm.round.f64(double) #1
-declare <2 x double> @llvm.round.v2f64(<2 x double>) #1
-declare <4 x double> @llvm.round.v4f64(<4 x double>) #1
-declare <8 x double> @llvm.round.v8f64(<8 x double>) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
Removed: llvm/trunk/test/CodeGen/R600/llvm.round.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/llvm.round.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/llvm.round.ll (original)
+++ llvm/trunk/test/CodeGen/R600/llvm.round.ll (removed)
@@ -1,67 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
-
-; FUNC-LABEL: {{^}}round_f32:
-; SI-DAG: s_load_dword [[SX:s[0-9]+]]
-; SI-DAG: s_mov_b32 [[K:s[0-9]+]], 0x7fffffff
-; SI: v_trunc_f32_e32 [[TRUNC:v[0-9]+]], [[SX]]
-; SI: v_sub_f32_e32 [[SUB:v[0-9]+]], [[SX]], [[TRUNC]]
-; SI: v_mov_b32_e32 [[VX:v[0-9]+]], [[SX]]
-; SI: v_bfi_b32 [[COPYSIGN:v[0-9]+]], [[K]], 1.0, [[VX]]
-; SI: v_cmp_le_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], 0.5, |[[SUB]]|
-; SI: v_cndmask_b32_e64 [[SEL:v[0-9]+]], 0, [[VX]], [[CMP]]
-; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], [[SEL]], [[TRUNC]]
-; SI: buffer_store_dword [[RESULT]]
-
-; R600: TRUNC {{.*}}, [[ARG:KC[0-9]\[[0-9]+\]\.[XYZW]]]
-; R600-DAG: ADD {{.*}},
-; R600-DAG: BFI_INT
-; R600-DAG: SETGE
-; R600-DAG: CNDE
-; R600-DAG: ADD
-define void @round_f32(float addrspace(1)* %out, float %x) #0 {
- %result = call float @llvm.round.f32(float %x) #1
- store float %result, float addrspace(1)* %out
- ret void
-}
-
-; The vector tests are really difficult to verify, since it can be hard to
-; predict how the scheduler will order the instructions. We already have
-; a test for the scalar case, so the vector tests just check that the
-; compiler doesn't crash.
-
-; FUNC-LABEL: {{^}}round_v2f32:
-; SI: s_endpgm
-; R600: CF_END
-define void @round_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) #0 {
- %result = call <2 x float> @llvm.round.v2f32(<2 x float> %in) #1
- store <2 x float> %result, <2 x float> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}round_v4f32:
-; SI: s_endpgm
-; R600: CF_END
-define void @round_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) #0 {
- %result = call <4 x float> @llvm.round.v4f32(<4 x float> %in) #1
- store <4 x float> %result, <4 x float> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}round_v8f32:
-; SI: s_endpgm
-; R600: CF_END
-define void @round_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %in) #0 {
- %result = call <8 x float> @llvm.round.v8f32(<8 x float> %in) #1
- store <8 x float> %result, <8 x float> addrspace(1)* %out
- ret void
-}
-
-declare float @llvm.round.f32(float) #1
-declare <2 x float> @llvm.round.v2f32(<2 x float>) #1
-declare <4 x float> @llvm.round.v4f32(<4 x float>) #1
-declare <8 x float> @llvm.round.v8f32(<8 x float>) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
Removed: llvm/trunk/test/CodeGen/R600/llvm.sin.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/llvm.sin.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/llvm.sin.ll (original)
+++ llvm/trunk/test/CodeGen/R600/llvm.sin.ll (removed)
@@ -1,92 +0,0 @@
-; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=SI-SAFE -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=SI -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI -check-prefix=SI-UNSAFE -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=SI-SAFE -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI -check-prefix=SI-UNSAFE -check-prefix=FUNC %s
-
-; FUNC-LABEL: sin_f32
-; EG: MULADD_IEEE *
-; EG: FRACT *
-; EG: ADD *
-; EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
-; EG-NOT: SIN
-; SI: v_mul_f32
-; SI: v_fract_f32
-; SI: v_sin_f32
-; SI-NOT: v_sin_f32
-
-define void @sin_f32(float addrspace(1)* %out, float %x) #1 {
- %sin = call float @llvm.sin.f32(float %x)
- store float %sin, float addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}sin_3x_f32:
-; SI-UNSAFE-NOT: v_add_f32
-; SI-UNSAFE: 0x3ef47644
-; SI-UNSAFE: v_mul_f32
-; SI-SAFE: v_mul_f32
-; SI-SAFE: v_mul_f32
-; SI: v_fract_f32
-; SI: v_sin_f32
-; SI-NOT: v_sin_f32
-define void @sin_3x_f32(float addrspace(1)* %out, float %x) #1 {
- %y = fmul float 3.0, %x
- %sin = call float @llvm.sin.f32(float %y)
- store float %sin, float addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}sin_2x_f32:
-; SI-UNSAFE-NOT: v_add_f32
-; SI-UNSAFE: 0x3ea2f983
-; SI-UNSAFE: v_mul_f32
-; SI-SAFE: v_add_f32
-; SI-SAFE: v_mul_f32
-; SI: v_fract_f32
-; SI: v_sin_f32
-; SI-NOT: v_sin_f32
-define void @sin_2x_f32(float addrspace(1)* %out, float %x) #1 {
- %y = fmul float 2.0, %x
- %sin = call float @llvm.sin.f32(float %y)
- store float %sin, float addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}test_2sin_f32:
-; SI-UNSAFE: 0x3ea2f983
-; SI-UNSAFE: v_mul_f32
-; SI-SAFE: v_add_f32
-; SI-SAFE: v_mul_f32
-; SI: v_fract_f32
-; SI: v_sin_f32
-; SI-NOT: v_sin_f32
-define void @test_2sin_f32(float addrspace(1)* %out, float %x) #1 {
- %y = fmul float 2.0, %x
- %sin = call float @llvm.sin.f32(float %y)
- store float %sin, float addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}sin_v4f32:
-; EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
-; EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
-; EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
-; EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
-; EG-NOT: SIN
-; SI: v_sin_f32
-; SI: v_sin_f32
-; SI: v_sin_f32
-; SI: v_sin_f32
-; SI-NOT: v_sin_f32
-
-define void @sin_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %vx) #1 {
- %sin = call <4 x float> @llvm.sin.v4f32( <4 x float> %vx)
- store <4 x float> %sin, <4 x float> addrspace(1)* %out
- ret void
-}
-
-declare float @llvm.sin.f32(float) readnone
-declare <4 x float> @llvm.sin.v4f32(<4 x float>) readnone
-
-attributes #0 = { "ShaderType"="0" }
Removed: llvm/trunk/test/CodeGen/R600/llvm.sqrt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/llvm.sqrt.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/llvm.sqrt.ll (original)
+++ llvm/trunk/test/CodeGen/R600/llvm.sqrt.ll (removed)
@@ -1,105 +0,0 @@
-; RUN: llc < %s -march=r600 --mcpu=redwood | FileCheck %s --check-prefix=R600
-; RUN: llc < %s -march=amdgcn --mcpu=SI -verify-machineinstrs| FileCheck %s --check-prefix=SI
-; RUN: llc < %s -march=amdgcn --mcpu=tonga -verify-machineinstrs| FileCheck %s --check-prefix=SI
-
-; R600-LABEL: {{^}}sqrt_f32:
-; R600: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[2].Z
-; R600: MUL NON-IEEE T{{[0-9]\.[XYZW]}}, KC0[2].Z, PS
-; SI-LABEL: {{^}}sqrt_f32:
-; SI: v_sqrt_f32_e32
-define void @sqrt_f32(float addrspace(1)* %out, float %in) {
-entry:
- %0 = call float @llvm.sqrt.f32(float %in)
- store float %0, float addrspace(1)* %out
- ret void
-}
-
-; R600-LABEL: {{^}}sqrt_v2f32:
-; R600-DAG: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[2].W
-; R600-DAG: MUL NON-IEEE T{{[0-9]\.[XYZW]}}, KC0[2].W, PS
-; R600-DAG: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[3].X
-; R600-DAG: MUL NON-IEEE T{{[0-9]\.[XYZW]}}, KC0[3].X, PS
-; SI-LABEL: {{^}}sqrt_v2f32:
-; SI: v_sqrt_f32_e32
-; SI: v_sqrt_f32_e32
-define void @sqrt_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
-entry:
- %0 = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %in)
- store <2 x float> %0, <2 x float> addrspace(1)* %out
- ret void
-}
-
-; R600-LABEL: {{^}}sqrt_v4f32:
-; R600-DAG: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[3].Y
-; R600-DAG: MUL NON-IEEE T{{[0-9]\.[XYZW]}}, KC0[3].Y, PS
-; R600-DAG: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[3].Z
-; R600-DAG: MUL NON-IEEE T{{[0-9]\.[XYZW]}}, KC0[3].Z, PS
-; R600-DAG: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[3].W
-; R600-DAG: MUL NON-IEEE T{{[0-9]\.[XYZW]}}, KC0[3].W, PS
-; R600-DAG: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[4].X
-; R600-DAG: MUL NON-IEEE T{{[0-9]\.[XYZW]}}, KC0[4].X, PS
-; SI-LABEL: {{^}}sqrt_v4f32:
-; SI: v_sqrt_f32_e32
-; SI: v_sqrt_f32_e32
-; SI: v_sqrt_f32_e32
-; SI: v_sqrt_f32_e32
-define void @sqrt_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) {
-entry:
- %0 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %in)
- store <4 x float> %0, <4 x float> addrspace(1)* %out
- ret void
-}
-
-; SI-LABEL: {{^}}elim_redun_check:
-; SI: v_sqrt_f32_e32
-; SI-NOT: v_cndmask
-define void @elim_redun_check(float addrspace(1)* %out, float %in) {
-entry:
- %sqrt = call float @llvm.sqrt.f32(float %in)
- %cmp = fcmp olt float %in, -0.000000e+00
- %res = select i1 %cmp, float 0x7FF8000000000000, float %sqrt
- store float %res, float addrspace(1)* %out
- ret void
-}
-
-; SI-LABEL: {{^}}elim_redun_check_ult:
-; SI: v_sqrt_f32_e32
-; SI-NOT: v_cndmask
-define void @elim_redun_check_ult(float addrspace(1)* %out, float %in) {
-entry:
- %sqrt = call float @llvm.sqrt.f32(float %in)
- %cmp = fcmp ult float %in, -0.000000e+00
- %res = select i1 %cmp, float 0x7FF8000000000000, float %sqrt
- store float %res, float addrspace(1)* %out
- ret void
-}
-
-; SI-LABEL: {{^}}elim_redun_check_v2:
-; SI: v_sqrt_f32_e32
-; SI: v_sqrt_f32_e32
-; SI-NOT: v_cndmask
-define void @elim_redun_check_v2(<2 x float> addrspace(1)* %out, <2 x float> %in) {
-entry:
- %sqrt = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %in)
- %cmp = fcmp olt <2 x float> %in, <float -0.000000e+00, float -0.000000e+00>
- %res = select <2 x i1> %cmp, <2 x float> <float 0x7FF8000000000000, float 0x7FF8000000000000>, <2 x float> %sqrt
- store <2 x float> %res, <2 x float> addrspace(1)* %out
- ret void
-}
-
-; SI-LABEL: {{^}}elim_redun_check_v2_ult
-; SI: v_sqrt_f32_e32
-; SI: v_sqrt_f32_e32
-; SI-NOT: v_cndmask
-define void @elim_redun_check_v2_ult(<2 x float> addrspace(1)* %out, <2 x float> %in) {
-entry:
- %sqrt = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %in)
- %cmp = fcmp ult <2 x float> %in, <float -0.000000e+00, float -0.000000e+00>
- %res = select <2 x i1> %cmp, <2 x float> <float 0x7FF8000000000000, float 0x7FF8000000000000>, <2 x float> %sqrt
- store <2 x float> %res, <2 x float> addrspace(1)* %out
- ret void
-}
-
-declare float @llvm.sqrt.f32(float %in)
-declare <2 x float> @llvm.sqrt.v2f32(<2 x float> %in)
-declare <4 x float> @llvm.sqrt.v4f32(<4 x float> %in)
Removed: llvm/trunk/test/CodeGen/R600/load-i1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/load-i1.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/load-i1.ll (original)
+++ llvm/trunk/test/CodeGen/R600/load-i1.ll (removed)
@@ -1,149 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-
-; FUNC-LABEL: {{^}}global_copy_i1_to_i1:
-; SI: buffer_load_ubyte
-; SI: v_and_b32_e32 v{{[0-9]+}}, 1
-; SI: buffer_store_byte
-; SI: s_endpgm
-
-; EG: VTX_READ_8
-; EG: AND_INT
-define void @global_copy_i1_to_i1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
- %load = load i1, i1 addrspace(1)* %in
- store i1 %load, i1 addrspace(1)* %out, align 1
- ret void
-}
-
-; FUNC-LABEL: {{^}}local_copy_i1_to_i1:
-; SI: ds_read_u8
-; SI: v_and_b32_e32 v{{[0-9]+}}, 1
-; SI: ds_write_b8
-; SI: s_endpgm
-
-; EG: LDS_UBYTE_READ_RET
-; EG: AND_INT
-; EG: LDS_BYTE_WRITE
-define void @local_copy_i1_to_i1(i1 addrspace(3)* %out, i1 addrspace(3)* %in) nounwind {
- %load = load i1, i1 addrspace(3)* %in
- store i1 %load, i1 addrspace(3)* %out, align 1
- ret void
-}
-
-; FUNC-LABEL: {{^}}constant_copy_i1_to_i1:
-; SI: buffer_load_ubyte
-; SI: v_and_b32_e32 v{{[0-9]+}}, 1
-; SI: buffer_store_byte
-; SI: s_endpgm
-
-; EG: VTX_READ_8
-; EG: AND_INT
-define void @constant_copy_i1_to_i1(i1 addrspace(1)* %out, i1 addrspace(2)* %in) nounwind {
- %load = load i1, i1 addrspace(2)* %in
- store i1 %load, i1 addrspace(1)* %out, align 1
- ret void
-}
-
-; FUNC-LABEL: {{^}}global_sextload_i1_to_i32:
-; SI: buffer_load_ubyte
-; SI: v_bfe_i32
-; SI: buffer_store_dword
-; SI: s_endpgm
-
-; EG: VTX_READ_8
-; EG: BFE_INT
-define void @global_sextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
- %load = load i1, i1 addrspace(1)* %in
- %ext = sext i1 %load to i32
- store i32 %ext, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}global_zextload_i1_to_i32:
-; SI: buffer_load_ubyte
-; SI: buffer_store_dword
-; SI: s_endpgm
-
-define void @global_zextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
- %load = load i1, i1 addrspace(1)* %in
- %ext = zext i1 %load to i32
- store i32 %ext, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}global_sextload_i1_to_i64:
-; SI: buffer_load_ubyte
-; SI: v_bfe_i32
-; SI: buffer_store_dwordx2
-; SI: s_endpgm
-define void @global_sextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
- %load = load i1, i1 addrspace(1)* %in
- %ext = sext i1 %load to i64
- store i64 %ext, i64 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}global_zextload_i1_to_i64:
-; SI: buffer_load_ubyte
-; SI: v_mov_b32_e32 {{v[0-9]+}}, 0
-; SI: buffer_store_dwordx2
-; SI: s_endpgm
-define void @global_zextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
- %load = load i1, i1 addrspace(1)* %in
- %ext = zext i1 %load to i64
- store i64 %ext, i64 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}i1_arg:
-; SI: buffer_load_ubyte
-; SI: v_and_b32_e32
-; SI: buffer_store_byte
-; SI: s_endpgm
-define void @i1_arg(i1 addrspace(1)* %out, i1 %x) nounwind {
- store i1 %x, i1 addrspace(1)* %out, align 1
- ret void
-}
-
-; FUNC-LABEL: {{^}}i1_arg_zext_i32:
-; SI: buffer_load_ubyte
-; SI: buffer_store_dword
-; SI: s_endpgm
-define void @i1_arg_zext_i32(i32 addrspace(1)* %out, i1 %x) nounwind {
- %ext = zext i1 %x to i32
- store i32 %ext, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}i1_arg_zext_i64:
-; SI: buffer_load_ubyte
-; SI: buffer_store_dwordx2
-; SI: s_endpgm
-define void @i1_arg_zext_i64(i64 addrspace(1)* %out, i1 %x) nounwind {
- %ext = zext i1 %x to i64
- store i64 %ext, i64 addrspace(1)* %out, align 8
- ret void
-}
-
-; FUNC-LABEL: {{^}}i1_arg_sext_i32:
-; SI: buffer_load_ubyte
-; SI: buffer_store_dword
-; SI: s_endpgm
-define void @i1_arg_sext_i32(i32 addrspace(1)* %out, i1 %x) nounwind {
- %ext = sext i1 %x to i32
- store i32 %ext, i32addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}i1_arg_sext_i64:
-; SI: buffer_load_ubyte
-; SI: v_bfe_i32
-; SI: v_ashrrev_i32
-; SI: buffer_store_dwordx2
-; SI: s_endpgm
-define void @i1_arg_sext_i64(i64 addrspace(1)* %out, i1 %x) nounwind {
- %ext = sext i1 %x to i64
- store i64 %ext, i64 addrspace(1)* %out, align 8
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/load-input-fold.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/load-input-fold.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/load-input-fold.ll (original)
+++ llvm/trunk/test/CodeGen/R600/load-input-fold.ll (removed)
@@ -1,117 +0,0 @@
-;RUN: llc < %s -march=r600 -mcpu=cayman
-
-define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2, <4 x float> inreg %reg3) #0 {
-main_body:
- %0 = extractelement <4 x float> %reg1, i32 0
- %1 = extractelement <4 x float> %reg1, i32 1
- %2 = extractelement <4 x float> %reg1, i32 2
- %3 = extractelement <4 x float> %reg1, i32 3
- %4 = extractelement <4 x float> %reg2, i32 0
- %5 = extractelement <4 x float> %reg2, i32 1
- %6 = extractelement <4 x float> %reg2, i32 2
- %7 = extractelement <4 x float> %reg2, i32 3
- %8 = extractelement <4 x float> %reg3, i32 0
- %9 = extractelement <4 x float> %reg3, i32 1
- %10 = extractelement <4 x float> %reg3, i32 2
- %11 = extractelement <4 x float> %reg3, i32 3
- %12 = load <4 x float>, <4 x float> addrspace(8)* null
- %13 = extractelement <4 x float> %12, i32 0
- %14 = fmul float %0, %13
- %15 = load <4 x float>, <4 x float> addrspace(8)* null
- %16 = extractelement <4 x float> %15, i32 1
- %17 = fmul float %0, %16
- %18 = load <4 x float>, <4 x float> addrspace(8)* null
- %19 = extractelement <4 x float> %18, i32 2
- %20 = fmul float %0, %19
- %21 = load <4 x float>, <4 x float> addrspace(8)* null
- %22 = extractelement <4 x float> %21, i32 3
- %23 = fmul float %0, %22
- %24 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
- %25 = extractelement <4 x float> %24, i32 0
- %26 = fmul float %1, %25
- %27 = fadd float %26, %14
- %28 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
- %29 = extractelement <4 x float> %28, i32 1
- %30 = fmul float %1, %29
- %31 = fadd float %30, %17
- %32 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
- %33 = extractelement <4 x float> %32, i32 2
- %34 = fmul float %1, %33
- %35 = fadd float %34, %20
- %36 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
- %37 = extractelement <4 x float> %36, i32 3
- %38 = fmul float %1, %37
- %39 = fadd float %38, %23
- %40 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
- %41 = extractelement <4 x float> %40, i32 0
- %42 = fmul float %2, %41
- %43 = fadd float %42, %27
- %44 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
- %45 = extractelement <4 x float> %44, i32 1
- %46 = fmul float %2, %45
- %47 = fadd float %46, %31
- %48 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
- %49 = extractelement <4 x float> %48, i32 2
- %50 = fmul float %2, %49
- %51 = fadd float %50, %35
- %52 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
- %53 = extractelement <4 x float> %52, i32 3
- %54 = fmul float %2, %53
- %55 = fadd float %54, %39
- %56 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
- %57 = extractelement <4 x float> %56, i32 0
- %58 = fmul float %3, %57
- %59 = fadd float %58, %43
- %60 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
- %61 = extractelement <4 x float> %60, i32 1
- %62 = fmul float %3, %61
- %63 = fadd float %62, %47
- %64 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
- %65 = extractelement <4 x float> %64, i32 2
- %66 = fmul float %3, %65
- %67 = fadd float %66, %51
- %68 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
- %69 = extractelement <4 x float> %68, i32 3
- %70 = fmul float %3, %69
- %71 = fadd float %70, %55
- %72 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
- %73 = extractelement <4 x float> %72, i32 0
- %74 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
- %75 = extractelement <4 x float> %74, i32 1
- %76 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
- %77 = extractelement <4 x float> %76, i32 2
- %78 = insertelement <4 x float> undef, float %4, i32 0
- %79 = insertelement <4 x float> %78, float %5, i32 1
- %80 = insertelement <4 x float> %79, float %6, i32 2
- %81 = insertelement <4 x float> %80, float 0.000000e+00, i32 3
- %82 = insertelement <4 x float> undef, float %73, i32 0
- %83 = insertelement <4 x float> %82, float %75, i32 1
- %84 = insertelement <4 x float> %83, float %77, i32 2
- %85 = insertelement <4 x float> %84, float 0.000000e+00, i32 3
- %86 = call float @llvm.AMDGPU.dp4(<4 x float> %81, <4 x float> %85)
- %87 = insertelement <4 x float> undef, float %86, i32 0
- call void @llvm.R600.store.swizzle(<4 x float> %87, i32 2, i32 2)
- ret void
-}
-
-; Function Attrs: readnone
-declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
-
-; Function Attrs: readonly
-declare float @fabs(float) #2
-
-; Function Attrs: readnone
-declare float @llvm.AMDGPU.rsq(float) #1
-
-; Function Attrs: readnone
-declare float @llvm.AMDIL.clamp.(float, float, float) #1
-
-; Function Attrs: nounwind readonly
-declare float @llvm.pow.f32(float, float) #3
-
-declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
-
-attributes #0 = { "ShaderType"="1" }
-attributes #1 = { readnone }
-attributes #2 = { readonly }
-attributes #3 = { nounwind readonly }
Removed: llvm/trunk/test/CodeGen/R600/load.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/load.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/load.ll (original)
+++ llvm/trunk/test/CodeGen/R600/load.ll (removed)
@@ -1,709 +0,0 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600 --check-prefix=FUNC %s
-; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck --check-prefix=R600 --check-prefix=FUNC %s
-; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s
-; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s
-
-;===------------------------------------------------------------------------===;
-; GLOBAL ADDRESS SPACE
-;===------------------------------------------------------------------------===;
-
-; Load an i8 value from the global address space.
-; FUNC-LABEL: {{^}}load_i8:
-; R600: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
-
-; SI: buffer_load_ubyte v{{[0-9]+}},
-define void @load_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
- %1 = load i8, i8 addrspace(1)* %in
- %2 = zext i8 %1 to i32
- store i32 %2, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}load_i8_sext:
-; R600: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]]
-; R600: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
-; R600: 8
-; SI: buffer_load_sbyte
-define void @load_i8_sext(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
-entry:
- %0 = load i8, i8 addrspace(1)* %in
- %1 = sext i8 %0 to i32
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}load_v2i8:
-; R600: VTX_READ_8
-; R600: VTX_READ_8
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-define void @load_v2i8(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) {
-entry:
- %0 = load <2 x i8>, <2 x i8> addrspace(1)* %in
- %1 = zext <2 x i8> %0 to <2 x i32>
- store <2 x i32> %1, <2 x i32> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}load_v2i8_sext:
-; R600-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
-; R600-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
-; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal
-; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal
-; R600-DAG: 8
-; R600-DAG: 8
-
-; SI: buffer_load_sbyte
-; SI: buffer_load_sbyte
-define void @load_v2i8_sext(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) {
-entry:
- %0 = load <2 x i8>, <2 x i8> addrspace(1)* %in
- %1 = sext <2 x i8> %0 to <2 x i32>
- store <2 x i32> %1, <2 x i32> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}load_v4i8:
-; R600: VTX_READ_8
-; R600: VTX_READ_8
-; R600: VTX_READ_8
-; R600: VTX_READ_8
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-define void @load_v4i8(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) {
-entry:
- %0 = load <4 x i8>, <4 x i8> addrspace(1)* %in
- %1 = zext <4 x i8> %0 to <4 x i32>
- store <4 x i32> %1, <4 x i32> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}load_v4i8_sext:
-; R600-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
-; R600-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
-; R600-DAG: VTX_READ_8 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]]
-; R600-DAG: VTX_READ_8 [[DST_W:T[0-9]\.[XYZW]]], [[DST_W]]
-; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal
-; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal
-; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Z]], 0.0, literal
-; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_W]], 0.0, literal
-; R600-DAG: 8
-; R600-DAG: 8
-; R600-DAG: 8
-; R600-DAG: 8
-; SI: buffer_load_sbyte
-; SI: buffer_load_sbyte
-; SI: buffer_load_sbyte
-; SI: buffer_load_sbyte
-define void @load_v4i8_sext(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) {
-entry:
- %0 = load <4 x i8>, <4 x i8> addrspace(1)* %in
- %1 = sext <4 x i8> %0 to <4 x i32>
- store <4 x i32> %1, <4 x i32> addrspace(1)* %out
- ret void
-}
-
-; Load an i16 value from the global address space.
-; FUNC-LABEL: {{^}}load_i16:
-; R600: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
-; SI: buffer_load_ushort
-define void @load_i16(i32 addrspace(1)* %out, i16 addrspace(1)* %in) {
-entry:
- %0 = load i16 , i16 addrspace(1)* %in
- %1 = zext i16 %0 to i32
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}load_i16_sext:
-; R600: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]]
-; R600: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
-; R600: 16
-; SI: buffer_load_sshort
-define void @load_i16_sext(i32 addrspace(1)* %out, i16 addrspace(1)* %in) {
-entry:
- %0 = load i16, i16 addrspace(1)* %in
- %1 = sext i16 %0 to i32
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}load_v2i16:
-; R600: VTX_READ_16
-; R600: VTX_READ_16
-; SI: buffer_load_ushort
-; SI: buffer_load_ushort
-define void @load_v2i16(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
-entry:
- %0 = load <2 x i16>, <2 x i16> addrspace(1)* %in
- %1 = zext <2 x i16> %0 to <2 x i32>
- store <2 x i32> %1, <2 x i32> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}load_v2i16_sext:
-; R600-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
-; R600-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
-; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal
-; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal
-; R600-DAG: 16
-; R600-DAG: 16
-; SI: buffer_load_sshort
-; SI: buffer_load_sshort
-define void @load_v2i16_sext(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
-entry:
- %0 = load <2 x i16>, <2 x i16> addrspace(1)* %in
- %1 = sext <2 x i16> %0 to <2 x i32>
- store <2 x i32> %1, <2 x i32> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}load_v4i16:
-; R600: VTX_READ_16
-; R600: VTX_READ_16
-; R600: VTX_READ_16
-; R600: VTX_READ_16
-; SI: buffer_load_ushort
-; SI: buffer_load_ushort
-; SI: buffer_load_ushort
-; SI: buffer_load_ushort
-define void @load_v4i16(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
-entry:
- %0 = load <4 x i16>, <4 x i16> addrspace(1)* %in
- %1 = zext <4 x i16> %0 to <4 x i32>
- store <4 x i32> %1, <4 x i32> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}load_v4i16_sext:
-; R600-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
-; R600-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
-; R600-DAG: VTX_READ_16 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]]
-; R600-DAG: VTX_READ_16 [[DST_W:T[0-9]\.[XYZW]]], [[DST_W]]
-; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal
-; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal
-; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Z]], 0.0, literal
-; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_W]], 0.0, literal
-; R600-DAG: 16
-; R600-DAG: 16
-; R600-DAG: 16
-; R600-DAG: 16
-; SI: buffer_load_sshort
-; SI: buffer_load_sshort
-; SI: buffer_load_sshort
-; SI: buffer_load_sshort
-define void @load_v4i16_sext(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
-entry:
- %0 = load <4 x i16>, <4 x i16> addrspace(1)* %in
- %1 = sext <4 x i16> %0 to <4 x i32>
- store <4 x i32> %1, <4 x i32> addrspace(1)* %out
- ret void
-}
-
-; load an i32 value from the global address space.
-; FUNC-LABEL: {{^}}load_i32:
-; R600: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
-
-; SI: buffer_load_dword v{{[0-9]+}}
-define void @load_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
-entry:
- %0 = load i32, i32 addrspace(1)* %in
- store i32 %0, i32 addrspace(1)* %out
- ret void
-}
-
-; load a f32 value from the global address space.
-; FUNC-LABEL: {{^}}load_f32:
-; R600: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
-
-; SI: buffer_load_dword v{{[0-9]+}}
-define void @load_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
-entry:
- %0 = load float, float addrspace(1)* %in
- store float %0, float addrspace(1)* %out
- ret void
-}
-
-; load a v2f32 value from the global address space
-; FUNC-LABEL: {{^}}load_v2f32:
-; R600: MEM_RAT
-; R600: VTX_READ_64
-; SI: buffer_load_dwordx2
-define void @load_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) {
-entry:
- %0 = load <2 x float>, <2 x float> addrspace(1)* %in
- store <2 x float> %0, <2 x float> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}load_i64:
-; R600: VTX_READ_64
-; SI: buffer_load_dwordx2
-define void @load_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
-entry:
- %0 = load i64, i64 addrspace(1)* %in
- store i64 %0, i64 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}load_i64_sext:
-; R600: MEM_RAT
-; R600: MEM_RAT
-; R600: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}}, literal.x
-; R600: 31
-; SI: buffer_load_dword
-
-define void @load_i64_sext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
-entry:
- %0 = load i32, i32 addrspace(1)* %in
- %1 = sext i32 %0 to i64
- store i64 %1, i64 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}load_i64_zext:
-; R600: MEM_RAT
-; R600: MEM_RAT
-define void @load_i64_zext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
-entry:
- %0 = load i32, i32 addrspace(1)* %in
- %1 = zext i32 %0 to i64
- store i64 %1, i64 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}load_v8i32:
-; R600: VTX_READ_128
-; R600: VTX_READ_128
-; XXX: We should be using DWORDX4 instructions on SI.
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-define void @load_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) {
-entry:
- %0 = load <8 x i32>, <8 x i32> addrspace(1)* %in
- store <8 x i32> %0, <8 x i32> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}load_v16i32:
-; R600: VTX_READ_128
-; R600: VTX_READ_128
-; R600: VTX_READ_128
-; R600: VTX_READ_128
-; XXX: We should be using DWORDX4 instructions on SI.
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-define void @load_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(1)* %in) {
-entry:
- %0 = load <16 x i32>, <16 x i32> addrspace(1)* %in
- store <16 x i32> %0, <16 x i32> addrspace(1)* %out
- ret void
-}
-
-;===------------------------------------------------------------------------===;
-; CONSTANT ADDRESS SPACE
-;===------------------------------------------------------------------------===;
-
-; Load a sign-extended i8 value
-; FUNC-LABEL: {{^}}load_const_i8_sext:
-; R600: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]]
-; R600: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
-; R600: 8
-; SI: buffer_load_sbyte v{{[0-9]+}},
-define void @load_const_i8_sext(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
-entry:
- %0 = load i8, i8 addrspace(2)* %in
- %1 = sext i8 %0 to i32
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; Load an aligned i8 value
-; FUNC-LABEL: {{^}}load_const_i8_aligned:
-; R600: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
-; SI: buffer_load_ubyte v{{[0-9]+}},
-define void @load_const_i8_aligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
-entry:
- %0 = load i8, i8 addrspace(2)* %in
- %1 = zext i8 %0 to i32
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; Load an un-aligned i8 value
-; FUNC-LABEL: {{^}}load_const_i8_unaligned:
-; R600: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
-; SI: buffer_load_ubyte v{{[0-9]+}},
-define void @load_const_i8_unaligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
-entry:
- %0 = getelementptr i8, i8 addrspace(2)* %in, i32 1
- %1 = load i8, i8 addrspace(2)* %0
- %2 = zext i8 %1 to i32
- store i32 %2, i32 addrspace(1)* %out
- ret void
-}
-
-; Load a sign-extended i16 value
-; FUNC-LABEL: {{^}}load_const_i16_sext:
-; R600: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]]
-; R600: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
-; R600: 16
-; SI: buffer_load_sshort
-define void @load_const_i16_sext(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
-entry:
- %0 = load i16, i16 addrspace(2)* %in
- %1 = sext i16 %0 to i32
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; Load an aligned i16 value
-; FUNC-LABEL: {{^}}load_const_i16_aligned:
-; R600: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
-; SI: buffer_load_ushort
-define void @load_const_i16_aligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
-entry:
- %0 = load i16, i16 addrspace(2)* %in
- %1 = zext i16 %0 to i32
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; Load an un-aligned i16 value
-; FUNC-LABEL: {{^}}load_const_i16_unaligned:
-; R600: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
-; SI: buffer_load_ushort
-define void @load_const_i16_unaligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
-entry:
- %0 = getelementptr i16, i16 addrspace(2)* %in, i32 1
- %1 = load i16, i16 addrspace(2)* %0
- %2 = zext i16 %1 to i32
- store i32 %2, i32 addrspace(1)* %out
- ret void
-}
-
-; Load an i32 value from the constant address space.
-; FUNC-LABEL: {{^}}load_const_addrspace_i32:
-; R600: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
-
-; SI: s_load_dword s{{[0-9]+}}
-define void @load_const_addrspace_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
-entry:
- %0 = load i32, i32 addrspace(2)* %in
- store i32 %0, i32 addrspace(1)* %out
- ret void
-}
-
-; Load a f32 value from the constant address space.
-; FUNC-LABEL: {{^}}load_const_addrspace_f32:
-; R600: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
-
-; SI: s_load_dword s{{[0-9]+}}
-define void @load_const_addrspace_f32(float addrspace(1)* %out, float addrspace(2)* %in) {
- %1 = load float, float addrspace(2)* %in
- store float %1, float addrspace(1)* %out
- ret void
-}
-
-;===------------------------------------------------------------------------===;
-; LOCAL ADDRESS SPACE
-;===------------------------------------------------------------------------===;
-
-; Load an i8 value from the local address space.
-; FUNC-LABEL: {{^}}load_i8_local:
-; R600: LDS_UBYTE_READ_RET
-; SI-NOT: s_wqm_b64
-; SI: s_mov_b32 m0
-; SI: ds_read_u8
-define void @load_i8_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
- %1 = load i8, i8 addrspace(3)* %in
- %2 = zext i8 %1 to i32
- store i32 %2, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}load_i8_sext_local:
-; R600: LDS_UBYTE_READ_RET
-; R600: BFE_INT
-; SI-NOT: s_wqm_b64
-; SI: s_mov_b32 m0
-; SI: ds_read_i8
-define void @load_i8_sext_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
-entry:
- %0 = load i8, i8 addrspace(3)* %in
- %1 = sext i8 %0 to i32
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}load_v2i8_local:
-; R600: LDS_UBYTE_READ_RET
-; R600: LDS_UBYTE_READ_RET
-; SI-NOT: s_wqm_b64
-; SI: s_mov_b32 m0
-; SI: ds_read_u8
-; SI: ds_read_u8
-define void @load_v2i8_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) {
-entry:
- %0 = load <2 x i8>, <2 x i8> addrspace(3)* %in
- %1 = zext <2 x i8> %0 to <2 x i32>
- store <2 x i32> %1, <2 x i32> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}load_v2i8_sext_local:
-; R600-DAG: LDS_UBYTE_READ_RET
-; R600-DAG: LDS_UBYTE_READ_RET
-; R600-DAG: BFE_INT
-; R600-DAG: BFE_INT
-; SI-NOT: s_wqm_b64
-; SI: s_mov_b32 m0
-; SI: ds_read_i8
-; SI: ds_read_i8
-define void @load_v2i8_sext_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) {
-entry:
- %0 = load <2 x i8>, <2 x i8> addrspace(3)* %in
- %1 = sext <2 x i8> %0 to <2 x i32>
- store <2 x i32> %1, <2 x i32> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}load_v4i8_local:
-; R600: LDS_UBYTE_READ_RET
-; R600: LDS_UBYTE_READ_RET
-; R600: LDS_UBYTE_READ_RET
-; R600: LDS_UBYTE_READ_RET
-; SI-NOT: s_wqm_b64
-; SI: s_mov_b32 m0
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-define void @load_v4i8_local(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(3)* %in) {
-entry:
- %0 = load <4 x i8>, <4 x i8> addrspace(3)* %in
- %1 = zext <4 x i8> %0 to <4 x i32>
- store <4 x i32> %1, <4 x i32> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}load_v4i8_sext_local:
-; R600-DAG: LDS_UBYTE_READ_RET
-; R600-DAG: LDS_UBYTE_READ_RET
-; R600-DAG: LDS_UBYTE_READ_RET
-; R600-DAG: LDS_UBYTE_READ_RET
-; R600-DAG: BFE_INT
-; R600-DAG: BFE_INT
-; R600-DAG: BFE_INT
-; R600-DAG: BFE_INT
-; SI-NOT: s_wqm_b64
-; SI: s_mov_b32 m0
-; SI: ds_read_i8
-; SI: ds_read_i8
-; SI: ds_read_i8
-; SI: ds_read_i8
-define void @load_v4i8_sext_local(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(3)* %in) {
-entry:
- %0 = load <4 x i8>, <4 x i8> addrspace(3)* %in
- %1 = sext <4 x i8> %0 to <4 x i32>
- store <4 x i32> %1, <4 x i32> addrspace(1)* %out
- ret void
-}
-
-; Load an i16 value from the local address space.
-; FUNC-LABEL: {{^}}load_i16_local:
-; R600: LDS_USHORT_READ_RET
-; SI-NOT: s_wqm_b64
-; SI: s_mov_b32 m0
-; SI: ds_read_u16
-define void @load_i16_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) {
-entry:
- %0 = load i16 , i16 addrspace(3)* %in
- %1 = zext i16 %0 to i32
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}load_i16_sext_local:
-; R600: LDS_USHORT_READ_RET
-; R600: BFE_INT
-; SI-NOT: s_wqm_b64
-; SI: s_mov_b32 m0
-; SI: ds_read_i16
-define void @load_i16_sext_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) {
-entry:
- %0 = load i16, i16 addrspace(3)* %in
- %1 = sext i16 %0 to i32
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}load_v2i16_local:
-; R600: LDS_USHORT_READ_RET
-; R600: LDS_USHORT_READ_RET
-; SI-NOT: s_wqm_b64
-; SI: s_mov_b32 m0
-; SI: ds_read_u16
-; SI: ds_read_u16
-define void @load_v2i16_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) {
-entry:
- %0 = load <2 x i16>, <2 x i16> addrspace(3)* %in
- %1 = zext <2 x i16> %0 to <2 x i32>
- store <2 x i32> %1, <2 x i32> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}load_v2i16_sext_local:
-; R600-DAG: LDS_USHORT_READ_RET
-; R600-DAG: LDS_USHORT_READ_RET
-; R600-DAG: BFE_INT
-; R600-DAG: BFE_INT
-; SI-NOT: s_wqm_b64
-; SI: s_mov_b32 m0
-; SI: ds_read_i16
-; SI: ds_read_i16
-define void @load_v2i16_sext_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) {
-entry:
- %0 = load <2 x i16>, <2 x i16> addrspace(3)* %in
- %1 = sext <2 x i16> %0 to <2 x i32>
- store <2 x i32> %1, <2 x i32> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}load_v4i16_local:
-; R600: LDS_USHORT_READ_RET
-; R600: LDS_USHORT_READ_RET
-; R600: LDS_USHORT_READ_RET
-; R600: LDS_USHORT_READ_RET
-; SI-NOT: s_wqm_b64
-; SI: s_mov_b32 m0
-; SI: ds_read_u16
-; SI: ds_read_u16
-; SI: ds_read_u16
-; SI: ds_read_u16
-define void @load_v4i16_local(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(3)* %in) {
-entry:
- %0 = load <4 x i16>, <4 x i16> addrspace(3)* %in
- %1 = zext <4 x i16> %0 to <4 x i32>
- store <4 x i32> %1, <4 x i32> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}load_v4i16_sext_local:
-; R600-DAG: LDS_USHORT_READ_RET
-; R600-DAG: LDS_USHORT_READ_RET
-; R600-DAG: LDS_USHORT_READ_RET
-; R600-DAG: LDS_USHORT_READ_RET
-; R600-DAG: BFE_INT
-; R600-DAG: BFE_INT
-; R600-DAG: BFE_INT
-; R600-DAG: BFE_INT
-; SI-NOT: s_wqm_b64
-; SI: s_mov_b32 m0
-; SI: ds_read_i16
-; SI: ds_read_i16
-; SI: ds_read_i16
-; SI: ds_read_i16
-define void @load_v4i16_sext_local(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(3)* %in) {
-entry:
- %0 = load <4 x i16>, <4 x i16> addrspace(3)* %in
- %1 = sext <4 x i16> %0 to <4 x i32>
- store <4 x i32> %1, <4 x i32> addrspace(1)* %out
- ret void
-}
-
-; load an i32 value from the local address space.
-; FUNC-LABEL: {{^}}load_i32_local:
-; R600: LDS_READ_RET
-; SI-NOT: s_wqm_b64
-; SI: s_mov_b32 m0
-; SI: ds_read_b32
-define void @load_i32_local(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
-entry:
- %0 = load i32, i32 addrspace(3)* %in
- store i32 %0, i32 addrspace(1)* %out
- ret void
-}
-
-; load a f32 value from the local address space.
-; FUNC-LABEL: {{^}}load_f32_local:
-; R600: LDS_READ_RET
-; SI: s_mov_b32 m0
-; SI: ds_read_b32
-define void @load_f32_local(float addrspace(1)* %out, float addrspace(3)* %in) {
-entry:
- %0 = load float, float addrspace(3)* %in
- store float %0, float addrspace(1)* %out
- ret void
-}
-
-; load a v2f32 value from the local address space
-; FUNC-LABEL: {{^}}load_v2f32_local:
-; R600: LDS_READ_RET
-; R600: LDS_READ_RET
-; SI: s_mov_b32 m0
-; SI: ds_read_b64
-define void @load_v2f32_local(<2 x float> addrspace(1)* %out, <2 x float> addrspace(3)* %in) {
-entry:
- %0 = load <2 x float>, <2 x float> addrspace(3)* %in
- store <2 x float> %0, <2 x float> addrspace(1)* %out
- ret void
-}
-
-; Test loading a i32 and v2i32 value from the same base pointer.
-; FUNC-LABEL: {{^}}load_i32_v2i32_local:
-; R600: LDS_READ_RET
-; R600: LDS_READ_RET
-; R600: LDS_READ_RET
-; SI-DAG: ds_read_b32
-; SI-DAG: ds_read2_b32
-define void @load_i32_v2i32_local(<2 x i32> addrspace(1)* %out, i32 addrspace(3)* %in) {
- %scalar = load i32, i32 addrspace(3)* %in
- %tmp0 = bitcast i32 addrspace(3)* %in to <2 x i32> addrspace(3)*
- %vec_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(3)* %tmp0, i32 2
- %vec0 = load <2 x i32>, <2 x i32> addrspace(3)* %vec_ptr, align 4
- %vec1 = insertelement <2 x i32> <i32 0, i32 0>, i32 %scalar, i32 0
- %vec = add <2 x i32> %vec0, %vec1
- store <2 x i32> %vec, <2 x i32> addrspace(1)* %out
- ret void
-}
-
-
- at lds = addrspace(3) global [512 x i32] undef, align 4
-
-; On SI we need to make sure that the base offset is a register and not
-; an immediate.
-; FUNC-LABEL: {{^}}load_i32_local_const_ptr:
-; SI: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0
-; SI: ds_read_b32 v0, v[[ZERO]] offset:4
-; R600: LDS_READ_RET
-define void @load_i32_local_const_ptr(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
-entry:
- %tmp0 = getelementptr [512 x i32], [512 x i32] addrspace(3)* @lds, i32 0, i32 1
- %tmp1 = load i32, i32 addrspace(3)* %tmp0
- %tmp2 = getelementptr i32, i32 addrspace(1)* %out, i32 1
- store i32 %tmp1, i32 addrspace(1)* %tmp2
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/load.vec.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/load.vec.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/load.vec.ll (original)
+++ llvm/trunk/test/CodeGen/R600/load.vec.ll (removed)
@@ -1,25 +0,0 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG %s
-; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI %s
-; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=SI %s
-
-; load a v2i32 value from the global address space.
-; EG: {{^}}load_v2i32:
-; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0
-; SI: {{^}}load_v2i32:
-; SI: buffer_load_dwordx2 v[{{[0-9]+:[0-9]+}}]
-define void @load_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
- %a = load <2 x i32>, <2 x i32> addrspace(1) * %in
- store <2 x i32> %a, <2 x i32> addrspace(1)* %out
- ret void
-}
-
-; load a v4i32 value from the global address space.
-; EG: {{^}}load_v4i32:
-; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0
-; SI: {{^}}load_v4i32:
-; SI: buffer_load_dwordx4 v[{{[0-9]+:[0-9]+}}]
-define void @load_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
- %a = load <4 x i32>, <4 x i32> addrspace(1) * %in
- store <4 x i32> %a, <4 x i32> addrspace(1)* %out
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/load64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/load64.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/load64.ll (original)
+++ llvm/trunk/test/CodeGen/R600/load64.ll (removed)
@@ -1,31 +0,0 @@
-; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s
-; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
-
-; load a f64 value from the global address space.
-; CHECK-LABEL: {{^}}load_f64:
-; CHECK: buffer_load_dwordx2 v[{{[0-9]+:[0-9]+}}]
-; CHECK: buffer_store_dwordx2 v[{{[0-9]+:[0-9]+}}]
-define void @load_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
- %1 = load double, double addrspace(1)* %in
- store double %1, double addrspace(1)* %out
- ret void
-}
-
-; CHECK-LABEL: {{^}}load_i64:
-; CHECK: buffer_load_dwordx2 v[{{[0-9]+:[0-9]+}}]
-; CHECK: buffer_store_dwordx2 v[{{[0-9]+:[0-9]+}}]
-define void @load_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
- %tmp = load i64, i64 addrspace(1)* %in
- store i64 %tmp, i64 addrspace(1)* %out, align 8
- ret void
-}
-
-; Load a f64 value from the constant address space.
-; CHECK-LABEL: {{^}}load_const_addrspace_f64:
-; CHECK: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}]
-; CHECK: buffer_store_dwordx2 v[{{[0-9]+:[0-9]+}}]
-define void @load_const_addrspace_f64(double addrspace(1)* %out, double addrspace(2)* %in) {
- %1 = load double, double addrspace(2)* %in
- store double %1, double addrspace(1)* %out
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/local-64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/local-64.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/local-64.ll (original)
+++ llvm/trunk/test/CodeGen/R600/local-64.ll (removed)
@@ -1,167 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck --check-prefix=SI --check-prefix=BOTH %s
-; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs< %s | FileCheck --check-prefix=CI --check-prefix=BOTH %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck --check-prefix=CI --check-prefix=BOTH %s
-
-; BOTH-LABEL: {{^}}local_i32_load
-; BOTH: ds_read_b32 [[REG:v[0-9]+]], v{{[0-9]+}} offset:28
-; BOTH: buffer_store_dword [[REG]],
-define void @local_i32_load(i32 addrspace(1)* %out, i32 addrspace(3)* %in) nounwind {
- %gep = getelementptr i32, i32 addrspace(3)* %in, i32 7
- %val = load i32, i32 addrspace(3)* %gep, align 4
- store i32 %val, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; BOTH-LABEL: {{^}}local_i32_load_0_offset
-; BOTH: ds_read_b32 [[REG:v[0-9]+]], v{{[0-9]+}}
-; BOTH: buffer_store_dword [[REG]],
-define void @local_i32_load_0_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %in) nounwind {
- %val = load i32, i32 addrspace(3)* %in, align 4
- store i32 %val, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; BOTH-LABEL: {{^}}local_i8_load_i16_max_offset:
-; BOTH-NOT: ADD
-; BOTH: ds_read_u8 [[REG:v[0-9]+]], {{v[0-9]+}} offset:65535
-; BOTH: buffer_store_byte [[REG]],
-define void @local_i8_load_i16_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)* %in) nounwind {
- %gep = getelementptr i8, i8 addrspace(3)* %in, i32 65535
- %val = load i8, i8 addrspace(3)* %gep, align 4
- store i8 %val, i8 addrspace(1)* %out, align 4
- ret void
-}
-
-; BOTH-LABEL: {{^}}local_i8_load_over_i16_max_offset:
-; The LDS offset will be 65536 bytes, which is larger than the size of LDS on
-; SI, which is why it is being OR'd with the base pointer.
-; SI: s_or_b32 [[ADDR:s[0-9]+]], s{{[0-9]+}}, 0x10000
-; CI: s_add_i32 [[ADDR:s[0-9]+]], s{{[0-9]+}}, 0x10000
-; BOTH: v_mov_b32_e32 [[VREGADDR:v[0-9]+]], [[ADDR]]
-; BOTH: ds_read_u8 [[REG:v[0-9]+]], [[VREGADDR]]
-; BOTH: buffer_store_byte [[REG]],
-define void @local_i8_load_over_i16_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)* %in) nounwind {
- %gep = getelementptr i8, i8 addrspace(3)* %in, i32 65536
- %val = load i8, i8 addrspace(3)* %gep, align 4
- store i8 %val, i8 addrspace(1)* %out, align 4
- ret void
-}
-
-; BOTH-LABEL: {{^}}local_i64_load:
-; BOTH-NOT: ADD
-; BOTH: ds_read_b64 [[REG:v[[0-9]+:[0-9]+]]], v{{[0-9]+}} offset:56
-; BOTH: buffer_store_dwordx2 [[REG]],
-define void @local_i64_load(i64 addrspace(1)* %out, i64 addrspace(3)* %in) nounwind {
- %gep = getelementptr i64, i64 addrspace(3)* %in, i32 7
- %val = load i64, i64 addrspace(3)* %gep, align 8
- store i64 %val, i64 addrspace(1)* %out, align 8
- ret void
-}
-
-; BOTH-LABEL: {{^}}local_i64_load_0_offset
-; BOTH: ds_read_b64 [[REG:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}
-; BOTH: buffer_store_dwordx2 [[REG]],
-define void @local_i64_load_0_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %in) nounwind {
- %val = load i64, i64 addrspace(3)* %in, align 8
- store i64 %val, i64 addrspace(1)* %out, align 8
- ret void
-}
-
-; BOTH-LABEL: {{^}}local_f64_load:
-; BOTH-NOT: ADD
-; BOTH: ds_read_b64 [[REG:v[[0-9]+:[0-9]+]]], v{{[0-9]+}} offset:56
-; BOTH: buffer_store_dwordx2 [[REG]],
-define void @local_f64_load(double addrspace(1)* %out, double addrspace(3)* %in) nounwind {
- %gep = getelementptr double, double addrspace(3)* %in, i32 7
- %val = load double, double addrspace(3)* %gep, align 8
- store double %val, double addrspace(1)* %out, align 8
- ret void
-}
-
-; BOTH-LABEL: {{^}}local_f64_load_0_offset
-; BOTH: ds_read_b64 [[REG:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}
-; BOTH: buffer_store_dwordx2 [[REG]],
-define void @local_f64_load_0_offset(double addrspace(1)* %out, double addrspace(3)* %in) nounwind {
- %val = load double, double addrspace(3)* %in, align 8
- store double %val, double addrspace(1)* %out, align 8
- ret void
-}
-
-; BOTH-LABEL: {{^}}local_i64_store:
-; BOTH-NOT: ADD
-; BOTH: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:56
-define void @local_i64_store(i64 addrspace(3)* %out) nounwind {
- %gep = getelementptr i64, i64 addrspace(3)* %out, i32 7
- store i64 5678, i64 addrspace(3)* %gep, align 8
- ret void
-}
-
-; BOTH-LABEL: {{^}}local_i64_store_0_offset:
-; BOTH-NOT: ADD
-; BOTH: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}
-define void @local_i64_store_0_offset(i64 addrspace(3)* %out) nounwind {
- store i64 1234, i64 addrspace(3)* %out, align 8
- ret void
-}
-
-; BOTH-LABEL: {{^}}local_f64_store:
-; BOTH-NOT: ADD
-; BOTH: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:56
-define void @local_f64_store(double addrspace(3)* %out) nounwind {
- %gep = getelementptr double, double addrspace(3)* %out, i32 7
- store double 16.0, double addrspace(3)* %gep, align 8
- ret void
-}
-
-; BOTH-LABEL: {{^}}local_f64_store_0_offset
-; BOTH: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}
-define void @local_f64_store_0_offset(double addrspace(3)* %out) nounwind {
- store double 20.0, double addrspace(3)* %out, align 8
- ret void
-}
-
-; BOTH-LABEL: {{^}}local_v2i64_store:
-; BOTH-NOT: ADD
-; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:112
-; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:120
-; BOTH: s_endpgm
-define void @local_v2i64_store(<2 x i64> addrspace(3)* %out) nounwind {
- %gep = getelementptr <2 x i64>, <2 x i64> addrspace(3)* %out, i32 7
- store <2 x i64> <i64 5678, i64 5678>, <2 x i64> addrspace(3)* %gep, align 16
- ret void
-}
-
-; BOTH-LABEL: {{^}}local_v2i64_store_0_offset:
-; BOTH-NOT: ADD
-; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}
-; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:8
-; BOTH: s_endpgm
-define void @local_v2i64_store_0_offset(<2 x i64> addrspace(3)* %out) nounwind {
- store <2 x i64> <i64 1234, i64 1234>, <2 x i64> addrspace(3)* %out, align 16
- ret void
-}
-
-; BOTH-LABEL: {{^}}local_v4i64_store:
-; BOTH-NOT: ADD
-; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:224
-; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:232
-; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:240
-; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:248
-; BOTH: s_endpgm
-define void @local_v4i64_store(<4 x i64> addrspace(3)* %out) nounwind {
- %gep = getelementptr <4 x i64>, <4 x i64> addrspace(3)* %out, i32 7
- store <4 x i64> <i64 5678, i64 5678, i64 5678, i64 5678>, <4 x i64> addrspace(3)* %gep, align 16
- ret void
-}
-
-; BOTH-LABEL: {{^}}local_v4i64_store_0_offset:
-; BOTH-NOT: ADD
-; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}
-; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:8
-; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:16
-; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:24
-; BOTH: s_endpgm
-define void @local_v4i64_store_0_offset(<4 x i64> addrspace(3)* %out) nounwind {
- store <4 x i64> <i64 1234, i64 1234, i64 1234, i64 1234>, <4 x i64> addrspace(3)* %out, align 16
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/local-atomics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/local-atomics.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/local-atomics.ll (original)
+++ llvm/trunk/test/CodeGen/R600/local-atomics.ll (removed)
@@ -1,551 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CIVI -check-prefix=GCN -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=CIVI -check-prefix=GCN -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-
-; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i32:
-; EG: LDS_WRXCHG_RET *
-; GCN: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
-; GCN: s_load_dword [[SPTR:s[0-9]+]],
-; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
-; GCN: ds_wrxchg_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]]
-; GCN: buffer_store_dword [[RESULT]],
-; GCN: s_endpgm
-define void @lds_atomic_xchg_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
- %result = atomicrmw xchg i32 addrspace(3)* %ptr, i32 4 seq_cst
- store i32 %result, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i32_offset:
-; EG: LDS_WRXCHG_RET *
-; GCN: ds_wrxchg_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; GCN: s_endpgm
-define void @lds_atomic_xchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
- %result = atomicrmw xchg i32 addrspace(3)* %gep, i32 4 seq_cst
- store i32 %result, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; XXX - Is it really necessary to load 4 into VGPR?
-; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32:
-; EG: LDS_ADD_RET *
-; GCN: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
-; GCN: s_load_dword [[SPTR:s[0-9]+]],
-; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
-; GCN: ds_add_rtn_u32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]]
-; GCN: buffer_store_dword [[RESULT]],
-; GCN: s_endpgm
-define void @lds_atomic_add_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
- %result = atomicrmw add i32 addrspace(3)* %ptr, i32 4 seq_cst
- store i32 %result, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32_offset:
-; EG: LDS_ADD_RET *
-; GCN: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; GCN: s_endpgm
-define void @lds_atomic_add_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
- %result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst
- store i32 %result, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32_bad_si_offset:
-; EG: LDS_ADD_RET *
-; SI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-; CIVI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; GCN: s_endpgm
-define void @lds_atomic_add_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
- %sub = sub i32 %a, %b
- %add = add i32 %sub, 4
- %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 %add
- %result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst
- store i32 %result, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_inc_ret_i32:
-; EG: LDS_ADD_RET *
-; GCN: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
-; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]]
-; GCN: s_endpgm
-define void @lds_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
- %result = atomicrmw add i32 addrspace(3)* %ptr, i32 1 seq_cst
- store i32 %result, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_inc_ret_i32_offset:
-; EG: LDS_ADD_RET *
-; GCN: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
-; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] offset:16
-; GCN: s_endpgm
-define void @lds_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
- %result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst
- store i32 %result, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_inc_ret_i32_bad_si_offset:
-; EG: LDS_ADD_RET *
-; SI: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-; CIVI: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; GCN: s_endpgm
-define void @lds_atomic_inc_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
- %sub = sub i32 %a, %b
- %add = add i32 %sub, 4
- %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 %add
- %result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst
- store i32 %result, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_sub_ret_i32:
-; EG: LDS_SUB_RET *
-; GCN: ds_sub_rtn_u32
-; GCN: s_endpgm
-define void @lds_atomic_sub_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
- %result = atomicrmw sub i32 addrspace(3)* %ptr, i32 4 seq_cst
- store i32 %result, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_sub_ret_i32_offset:
-; EG: LDS_SUB_RET *
-; GCN: ds_sub_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; GCN: s_endpgm
-define void @lds_atomic_sub_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
- %result = atomicrmw sub i32 addrspace(3)* %gep, i32 4 seq_cst
- store i32 %result, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_dec_ret_i32:
-; EG: LDS_SUB_RET *
-; GCN: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
-; GCN: ds_dec_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]]
-; GCN: s_endpgm
-define void @lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
- %result = atomicrmw sub i32 addrspace(3)* %ptr, i32 1 seq_cst
- store i32 %result, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_dec_ret_i32_offset:
-; EG: LDS_SUB_RET *
-; GCN: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
-; GCN: ds_dec_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] offset:16
-; GCN: s_endpgm
-define void @lds_atomic_dec_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
- %result = atomicrmw sub i32 addrspace(3)* %gep, i32 1 seq_cst
- store i32 %result, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_and_ret_i32:
-; EG: LDS_AND_RET *
-; GCN: ds_and_rtn_b32
-; GCN: s_endpgm
-define void @lds_atomic_and_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
- %result = atomicrmw and i32 addrspace(3)* %ptr, i32 4 seq_cst
- store i32 %result, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_and_ret_i32_offset:
-; EG: LDS_AND_RET *
-; GCN: ds_and_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; GCN: s_endpgm
-define void @lds_atomic_and_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
- %result = atomicrmw and i32 addrspace(3)* %gep, i32 4 seq_cst
- store i32 %result, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_or_ret_i32:
-; EG: LDS_OR_RET *
-; GCN: ds_or_rtn_b32
-; GCN: s_endpgm
-define void @lds_atomic_or_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
- %result = atomicrmw or i32 addrspace(3)* %ptr, i32 4 seq_cst
- store i32 %result, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_or_ret_i32_offset:
-; EG: LDS_OR_RET *
-; GCN: ds_or_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; GCN: s_endpgm
-define void @lds_atomic_or_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
- %result = atomicrmw or i32 addrspace(3)* %gep, i32 4 seq_cst
- store i32 %result, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_xor_ret_i32:
-; EG: LDS_XOR_RET *
-; GCN: ds_xor_rtn_b32
-; GCN: s_endpgm
-define void @lds_atomic_xor_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
- %result = atomicrmw xor i32 addrspace(3)* %ptr, i32 4 seq_cst
- store i32 %result, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_xor_ret_i32_offset:
-; EG: LDS_XOR_RET *
-; GCN: ds_xor_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; GCN: s_endpgm
-define void @lds_atomic_xor_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
- %result = atomicrmw xor i32 addrspace(3)* %gep, i32 4 seq_cst
- store i32 %result, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FIXME: There is no atomic nand instr
-; XFUNC-LABEL: {{^}}lds_atomic_nand_ret_i32:uction, so we somehow need to expand this.
-; define void @lds_atomic_nand_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
-; %result = atomicrmw nand i32 addrspace(3)* %ptr, i32 4 seq_cst
-; store i32 %result, i32 addrspace(1)* %out, align 4
-; ret void
-; }
-
-; FUNC-LABEL: {{^}}lds_atomic_min_ret_i32:
-; EG: LDS_MIN_INT_RET *
-; GCN: ds_min_rtn_i32
-; GCN: s_endpgm
-define void @lds_atomic_min_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
- %result = atomicrmw min i32 addrspace(3)* %ptr, i32 4 seq_cst
- store i32 %result, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_min_ret_i32_offset:
-; EG: LDS_MIN_INT_RET *
-; GCN: ds_min_rtn_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; GCN: s_endpgm
-define void @lds_atomic_min_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
- %result = atomicrmw min i32 addrspace(3)* %gep, i32 4 seq_cst
- store i32 %result, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_max_ret_i32:
-; EG: LDS_MAX_INT_RET *
-; GCN: ds_max_rtn_i32
-; GCN: s_endpgm
-define void @lds_atomic_max_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
- %result = atomicrmw max i32 addrspace(3)* %ptr, i32 4 seq_cst
- store i32 %result, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_max_ret_i32_offset:
-; EG: LDS_MAX_INT_RET *
-; GCN: ds_max_rtn_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; GCN: s_endpgm
-define void @lds_atomic_max_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
- %result = atomicrmw max i32 addrspace(3)* %gep, i32 4 seq_cst
- store i32 %result, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_umin_ret_i32:
-; EG: LDS_MIN_UINT_RET *
-; GCN: ds_min_rtn_u32
-; GCN: s_endpgm
-define void @lds_atomic_umin_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
- %result = atomicrmw umin i32 addrspace(3)* %ptr, i32 4 seq_cst
- store i32 %result, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_umin_ret_i32_offset:
-; EG: LDS_MIN_UINT_RET *
-; GCN: ds_min_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; GCN: s_endpgm
-define void @lds_atomic_umin_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
- %result = atomicrmw umin i32 addrspace(3)* %gep, i32 4 seq_cst
- store i32 %result, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_umax_ret_i32:
-; EG: LDS_MAX_UINT_RET *
-; GCN: ds_max_rtn_u32
-; GCN: s_endpgm
-define void @lds_atomic_umax_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
- %result = atomicrmw umax i32 addrspace(3)* %ptr, i32 4 seq_cst
- store i32 %result, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_umax_ret_i32_offset:
-; EG: LDS_MAX_UINT_RET *
-; GCN: ds_max_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; GCN: s_endpgm
-define void @lds_atomic_umax_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
- %result = atomicrmw umax i32 addrspace(3)* %gep, i32 4 seq_cst
- store i32 %result, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_xchg_noret_i32:
-; GCN: s_load_dword [[SPTR:s[0-9]+]],
-; GCN: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
-; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
-; GCN: ds_wrxchg_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]]
-; GCN: s_endpgm
-define void @lds_atomic_xchg_noret_i32(i32 addrspace(3)* %ptr) nounwind {
- %result = atomicrmw xchg i32 addrspace(3)* %ptr, i32 4 seq_cst
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_xchg_noret_i32_offset:
-; GCN: ds_wrxchg_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; GCN: s_endpgm
-define void @lds_atomic_xchg_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
- %result = atomicrmw xchg i32 addrspace(3)* %gep, i32 4 seq_cst
- ret void
-}
-
-; XXX - Is it really necessary to load 4 into VGPR?
-; FUNC-LABEL: {{^}}lds_atomic_add_noret_i32:
-; GCN: s_load_dword [[SPTR:s[0-9]+]],
-; GCN: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
-; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
-; GCN: ds_add_u32 [[VPTR]], [[DATA]]
-; GCN: s_endpgm
-define void @lds_atomic_add_noret_i32(i32 addrspace(3)* %ptr) nounwind {
- %result = atomicrmw add i32 addrspace(3)* %ptr, i32 4 seq_cst
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_add_noret_i32_offset:
-; GCN: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; GCN: s_endpgm
-define void @lds_atomic_add_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
- %result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_add_noret_i32_bad_si_offset
-; SI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}}
-; CIVI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; GCN: s_endpgm
-define void @lds_atomic_add_noret_i32_bad_si_offset(i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
- %sub = sub i32 %a, %b
- %add = add i32 %sub, 4
- %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 %add
- %result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i32:
-; GCN: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
-; GCN: ds_inc_u32 v{{[0-9]+}}, [[NEGONE]]
-; GCN: s_endpgm
-define void @lds_atomic_inc_noret_i32(i32 addrspace(3)* %ptr) nounwind {
- %result = atomicrmw add i32 addrspace(3)* %ptr, i32 1 seq_cst
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i32_offset:
-; GCN: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
-; GCN: ds_inc_u32 v{{[0-9]+}}, [[NEGONE]] offset:16
-; GCN: s_endpgm
-define void @lds_atomic_inc_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
- %result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i32_bad_si_offset:
-; SI: ds_inc_u32 v{{[0-9]+}}, v{{[0-9]+}}
-; CIVI: ds_inc_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; GCN: s_endpgm
-define void @lds_atomic_inc_noret_i32_bad_si_offset(i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
- %sub = sub i32 %a, %b
- %add = add i32 %sub, 4
- %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 %add
- %result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_sub_noret_i32:
-; GCN: ds_sub_u32
-; GCN: s_endpgm
-define void @lds_atomic_sub_noret_i32(i32 addrspace(3)* %ptr) nounwind {
- %result = atomicrmw sub i32 addrspace(3)* %ptr, i32 4 seq_cst
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_sub_noret_i32_offset:
-; GCN: ds_sub_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; GCN: s_endpgm
-define void @lds_atomic_sub_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
- %result = atomicrmw sub i32 addrspace(3)* %gep, i32 4 seq_cst
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i32:
-; GCN: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
-; GCN: ds_dec_u32 v{{[0-9]+}}, [[NEGONE]]
-; GCN: s_endpgm
-define void @lds_atomic_dec_noret_i32(i32 addrspace(3)* %ptr) nounwind {
- %result = atomicrmw sub i32 addrspace(3)* %ptr, i32 1 seq_cst
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i32_offset:
-; GCN: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
-; GCN: ds_dec_u32 v{{[0-9]+}}, [[NEGONE]] offset:16
-; GCN: s_endpgm
-define void @lds_atomic_dec_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
- %result = atomicrmw sub i32 addrspace(3)* %gep, i32 1 seq_cst
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_and_noret_i32:
-; GCN: ds_and_b32
-; GCN: s_endpgm
-define void @lds_atomic_and_noret_i32(i32 addrspace(3)* %ptr) nounwind {
- %result = atomicrmw and i32 addrspace(3)* %ptr, i32 4 seq_cst
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_and_noret_i32_offset:
-; GCN: ds_and_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; GCN: s_endpgm
-define void @lds_atomic_and_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
- %result = atomicrmw and i32 addrspace(3)* %gep, i32 4 seq_cst
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_or_noret_i32:
-; GCN: ds_or_b32
-; GCN: s_endpgm
-define void @lds_atomic_or_noret_i32(i32 addrspace(3)* %ptr) nounwind {
- %result = atomicrmw or i32 addrspace(3)* %ptr, i32 4 seq_cst
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_or_noret_i32_offset:
-; GCN: ds_or_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; GCN: s_endpgm
-define void @lds_atomic_or_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
- %result = atomicrmw or i32 addrspace(3)* %gep, i32 4 seq_cst
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_xor_noret_i32:
-; GCN: ds_xor_b32
-; GCN: s_endpgm
-define void @lds_atomic_xor_noret_i32(i32 addrspace(3)* %ptr) nounwind {
- %result = atomicrmw xor i32 addrspace(3)* %ptr, i32 4 seq_cst
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_xor_noret_i32_offset:
-; GCN: ds_xor_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; GCN: s_endpgm
-define void @lds_atomic_xor_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
- %result = atomicrmw xor i32 addrspace(3)* %gep, i32 4 seq_cst
- ret void
-}
-
-; FIXME: There is no atomic nand instr
-; XFUNC-LABEL: {{^}}lds_atomic_nand_noret_i32:uction, so we somehow need to expand this.
-; define void @lds_atomic_nand_noret_i32(i32 addrspace(3)* %ptr) nounwind {
-; %result = atomicrmw nand i32 addrspace(3)* %ptr, i32 4 seq_cst
-; ret void
-; }
-
-; FUNC-LABEL: {{^}}lds_atomic_min_noret_i32:
-; GCN: ds_min_i32
-; GCN: s_endpgm
-define void @lds_atomic_min_noret_i32(i32 addrspace(3)* %ptr) nounwind {
- %result = atomicrmw min i32 addrspace(3)* %ptr, i32 4 seq_cst
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_min_noret_i32_offset:
-; GCN: ds_min_i32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; GCN: s_endpgm
-define void @lds_atomic_min_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
- %result = atomicrmw min i32 addrspace(3)* %gep, i32 4 seq_cst
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_max_noret_i32:
-; GCN: ds_max_i32
-; GCN: s_endpgm
-define void @lds_atomic_max_noret_i32(i32 addrspace(3)* %ptr) nounwind {
- %result = atomicrmw max i32 addrspace(3)* %ptr, i32 4 seq_cst
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_max_noret_i32_offset:
-; GCN: ds_max_i32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; GCN: s_endpgm
-define void @lds_atomic_max_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
- %result = atomicrmw max i32 addrspace(3)* %gep, i32 4 seq_cst
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_umin_noret_i32:
-; GCN: ds_min_u32
-; GCN: s_endpgm
-define void @lds_atomic_umin_noret_i32(i32 addrspace(3)* %ptr) nounwind {
- %result = atomicrmw umin i32 addrspace(3)* %ptr, i32 4 seq_cst
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_umin_noret_i32_offset:
-; GCN: ds_min_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; GCN: s_endpgm
-define void @lds_atomic_umin_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
- %result = atomicrmw umin i32 addrspace(3)* %gep, i32 4 seq_cst
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_umax_noret_i32:
-; GCN: ds_max_u32
-; GCN: s_endpgm
-define void @lds_atomic_umax_noret_i32(i32 addrspace(3)* %ptr) nounwind {
- %result = atomicrmw umax i32 addrspace(3)* %ptr, i32 4 seq_cst
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_umax_noret_i32_offset:
-; GCN: ds_max_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; GCN: s_endpgm
-define void @lds_atomic_umax_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
- %result = atomicrmw umax i32 addrspace(3)* %gep, i32 4 seq_cst
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/local-atomics64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/local-atomics64.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/local-atomics64.ll (original)
+++ llvm/trunk/test/CodeGen/R600/local-atomics64.ll (removed)
@@ -1,470 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=SI -check-prefix=GCN %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=VI -check-prefix=GCN %s
-
-; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i64:
-; GCN: ds_wrxchg_rtn_b64
-; GCN: s_endpgm
-define void @lds_atomic_xchg_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
- %result = atomicrmw xchg i64 addrspace(3)* %ptr, i64 4 seq_cst
- store i64 %result, i64 addrspace(1)* %out, align 8
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i64_offset:
-; GCN: ds_wrxchg_rtn_b64 {{.*}} offset:32
-; GCN: s_endpgm
-define void @lds_atomic_xchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
- %result = atomicrmw xchg i64 addrspace(3)* %gep, i64 4 seq_cst
- store i64 %result, i64 addrspace(1)* %out, align 8
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_add_ret_i64:
-; GCN: ds_add_rtn_u64
-; GCN: s_endpgm
-define void @lds_atomic_add_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
- %result = atomicrmw add i64 addrspace(3)* %ptr, i64 4 seq_cst
- store i64 %result, i64 addrspace(1)* %out, align 8
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_add_ret_i64_offset:
-; GCN: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9
-; GCN: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0
-; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; VI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
-; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
-; GCN: ds_add_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32
-; GCN: buffer_store_dwordx2 [[RESULT]],
-; GCN: s_endpgm
-define void @lds_atomic_add_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64, i64 addrspace(3)* %ptr, i64 4
- %result = atomicrmw add i64 addrspace(3)* %gep, i64 9 seq_cst
- store i64 %result, i64 addrspace(1)* %out, align 8
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_inc_ret_i64:
-; GCN: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], -1
-; GCN: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], -1
-; GCN: ds_inc_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
-; GCN: buffer_store_dwordx2 [[RESULT]],
-; GCN: s_endpgm
-define void @lds_atomic_inc_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
- %result = atomicrmw add i64 addrspace(3)* %ptr, i64 1 seq_cst
- store i64 %result, i64 addrspace(1)* %out, align 8
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_inc_ret_i64_offset:
-; GCN: ds_inc_rtn_u64 {{.*}} offset:32
-; GCN: s_endpgm
-define void @lds_atomic_inc_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
- %result = atomicrmw add i64 addrspace(3)* %gep, i64 1 seq_cst
- store i64 %result, i64 addrspace(1)* %out, align 8
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_sub_ret_i64:
-; GCN: ds_sub_rtn_u64
-; GCN: s_endpgm
-define void @lds_atomic_sub_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
- %result = atomicrmw sub i64 addrspace(3)* %ptr, i64 4 seq_cst
- store i64 %result, i64 addrspace(1)* %out, align 8
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_sub_ret_i64_offset:
-; GCN: ds_sub_rtn_u64 {{.*}} offset:32
-; GCN: s_endpgm
-define void @lds_atomic_sub_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
- %result = atomicrmw sub i64 addrspace(3)* %gep, i64 4 seq_cst
- store i64 %result, i64 addrspace(1)* %out, align 8
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_dec_ret_i64:
-; GCN: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], -1
-; GCN: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], -1
-; GCN: ds_dec_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
-; GCN: buffer_store_dwordx2 [[RESULT]],
-; GCN: s_endpgm
-define void @lds_atomic_dec_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
- %result = atomicrmw sub i64 addrspace(3)* %ptr, i64 1 seq_cst
- store i64 %result, i64 addrspace(1)* %out, align 8
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_dec_ret_i64_offset:
-; GCN: ds_dec_rtn_u64 {{.*}} offset:32
-; GCN: s_endpgm
-define void @lds_atomic_dec_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
- %result = atomicrmw sub i64 addrspace(3)* %gep, i64 1 seq_cst
- store i64 %result, i64 addrspace(1)* %out, align 8
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_and_ret_i64:
-; GCN: ds_and_rtn_b64
-; GCN: s_endpgm
-define void @lds_atomic_and_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
- %result = atomicrmw and i64 addrspace(3)* %ptr, i64 4 seq_cst
- store i64 %result, i64 addrspace(1)* %out, align 8
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_and_ret_i64_offset:
-; GCN: ds_and_rtn_b64 {{.*}} offset:32
-; GCN: s_endpgm
-define void @lds_atomic_and_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
- %result = atomicrmw and i64 addrspace(3)* %gep, i64 4 seq_cst
- store i64 %result, i64 addrspace(1)* %out, align 8
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_or_ret_i64:
-; GCN: ds_or_rtn_b64
-; GCN: s_endpgm
-define void @lds_atomic_or_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
- %result = atomicrmw or i64 addrspace(3)* %ptr, i64 4 seq_cst
- store i64 %result, i64 addrspace(1)* %out, align 8
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_or_ret_i64_offset:
-; GCN: ds_or_rtn_b64 {{.*}} offset:32
-; GCN: s_endpgm
-define void @lds_atomic_or_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
- %result = atomicrmw or i64 addrspace(3)* %gep, i64 4 seq_cst
- store i64 %result, i64 addrspace(1)* %out, align 8
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_xor_ret_i64:
-; GCN: ds_xor_rtn_b64
-; GCN: s_endpgm
-define void @lds_atomic_xor_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
- %result = atomicrmw xor i64 addrspace(3)* %ptr, i64 4 seq_cst
- store i64 %result, i64 addrspace(1)* %out, align 8
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_xor_ret_i64_offset:
-; GCN: ds_xor_rtn_b64 {{.*}} offset:32
-; GCN: s_endpgm
-define void @lds_atomic_xor_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
- %result = atomicrmw xor i64 addrspace(3)* %gep, i64 4 seq_cst
- store i64 %result, i64 addrspace(1)* %out, align 8
- ret void
-}
-
-; FIXME: There is no atomic nand instr
-; XFUNC-LABEL: {{^}}lds_atomic_nand_ret_i64:uction, so we somehow need to expand this.
-; define void @lds_atomic_nand_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
-; %result = atomicrmw nand i64 addrspace(3)* %ptr, i32 4 seq_cst
-; store i64 %result, i64 addrspace(1)* %out, align 8
-; ret void
-; }
-
-; FUNC-LABEL: {{^}}lds_atomic_min_ret_i64:
-; GCN: ds_min_rtn_i64
-; GCN: s_endpgm
-define void @lds_atomic_min_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
- %result = atomicrmw min i64 addrspace(3)* %ptr, i64 4 seq_cst
- store i64 %result, i64 addrspace(1)* %out, align 8
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_min_ret_i64_offset:
-; GCN: ds_min_rtn_i64 {{.*}} offset:32
-; GCN: s_endpgm
-define void @lds_atomic_min_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
- %result = atomicrmw min i64 addrspace(3)* %gep, i64 4 seq_cst
- store i64 %result, i64 addrspace(1)* %out, align 8
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_max_ret_i64:
-; GCN: ds_max_rtn_i64
-; GCN: s_endpgm
-define void @lds_atomic_max_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
- %result = atomicrmw max i64 addrspace(3)* %ptr, i64 4 seq_cst
- store i64 %result, i64 addrspace(1)* %out, align 8
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_max_ret_i64_offset:
-; GCN: ds_max_rtn_i64 {{.*}} offset:32
-; GCN: s_endpgm
-define void @lds_atomic_max_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
- %result = atomicrmw max i64 addrspace(3)* %gep, i64 4 seq_cst
- store i64 %result, i64 addrspace(1)* %out, align 8
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_umin_ret_i64:
-; GCN: ds_min_rtn_u64
-; GCN: s_endpgm
-define void @lds_atomic_umin_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
- %result = atomicrmw umin i64 addrspace(3)* %ptr, i64 4 seq_cst
- store i64 %result, i64 addrspace(1)* %out, align 8
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_umin_ret_i64_offset:
-; GCN: ds_min_rtn_u64 {{.*}} offset:32
-; GCN: s_endpgm
-define void @lds_atomic_umin_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
- %result = atomicrmw umin i64 addrspace(3)* %gep, i64 4 seq_cst
- store i64 %result, i64 addrspace(1)* %out, align 8
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_umax_ret_i64:
-; GCN: ds_max_rtn_u64
-; GCN: s_endpgm
-define void @lds_atomic_umax_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
- %result = atomicrmw umax i64 addrspace(3)* %ptr, i64 4 seq_cst
- store i64 %result, i64 addrspace(1)* %out, align 8
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_umax_ret_i64_offset:
-; GCN: ds_max_rtn_u64 {{.*}} offset:32
-; GCN: s_endpgm
-define void @lds_atomic_umax_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
- %result = atomicrmw umax i64 addrspace(3)* %gep, i64 4 seq_cst
- store i64 %result, i64 addrspace(1)* %out, align 8
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_xchg_noret_i64:
-; GCN: ds_wrxchg_rtn_b64
-; GCN: s_endpgm
-define void @lds_atomic_xchg_noret_i64(i64 addrspace(3)* %ptr) nounwind {
- %result = atomicrmw xchg i64 addrspace(3)* %ptr, i64 4 seq_cst
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_xchg_noret_i64_offset:
-; GCN: ds_wrxchg_rtn_b64 {{.*}} offset:32
-; GCN: s_endpgm
-define void @lds_atomic_xchg_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
- %result = atomicrmw xchg i64 addrspace(3)* %gep, i64 4 seq_cst
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_add_noret_i64:
-; GCN: ds_add_u64
-; GCN: s_endpgm
-define void @lds_atomic_add_noret_i64(i64 addrspace(3)* %ptr) nounwind {
- %result = atomicrmw add i64 addrspace(3)* %ptr, i64 4 seq_cst
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_add_noret_i64_offset:
-; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9
-; VI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x24
-; GCN: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9
-; GCN: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0
-; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
-; GCN: ds_add_u64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32
-; GCN: s_endpgm
-define void @lds_atomic_add_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64, i64 addrspace(3)* %ptr, i64 4
- %result = atomicrmw add i64 addrspace(3)* %gep, i64 9 seq_cst
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i64:
-; GCN: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], -1
-; GCN: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], -1
-; GCN: ds_inc_u64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
-; GCN: s_endpgm
-define void @lds_atomic_inc_noret_i64(i64 addrspace(3)* %ptr) nounwind {
- %result = atomicrmw add i64 addrspace(3)* %ptr, i64 1 seq_cst
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i64_offset:
-; GCN: ds_inc_u64 {{.*}} offset:32
-; GCN: s_endpgm
-define void @lds_atomic_inc_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
- %result = atomicrmw add i64 addrspace(3)* %gep, i64 1 seq_cst
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_sub_noret_i64:
-; GCN: ds_sub_u64
-; GCN: s_endpgm
-define void @lds_atomic_sub_noret_i64(i64 addrspace(3)* %ptr) nounwind {
- %result = atomicrmw sub i64 addrspace(3)* %ptr, i64 4 seq_cst
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_sub_noret_i64_offset:
-; GCN: ds_sub_u64 {{.*}} offset:32
-; GCN: s_endpgm
-define void @lds_atomic_sub_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
- %result = atomicrmw sub i64 addrspace(3)* %gep, i64 4 seq_cst
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i64:
-; GCN: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], -1
-; GCN: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], -1
-; GCN: ds_dec_u64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
-; GCN: s_endpgm
-define void @lds_atomic_dec_noret_i64(i64 addrspace(3)* %ptr) nounwind {
- %result = atomicrmw sub i64 addrspace(3)* %ptr, i64 1 seq_cst
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i64_offset:
-; GCN: ds_dec_u64 {{.*}} offset:32
-; GCN: s_endpgm
-define void @lds_atomic_dec_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
- %result = atomicrmw sub i64 addrspace(3)* %gep, i64 1 seq_cst
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_and_noret_i64:
-; GCN: ds_and_b64
-; GCN: s_endpgm
-define void @lds_atomic_and_noret_i64(i64 addrspace(3)* %ptr) nounwind {
- %result = atomicrmw and i64 addrspace(3)* %ptr, i64 4 seq_cst
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_and_noret_i64_offset:
-; GCN: ds_and_b64 {{.*}} offset:32
-; GCN: s_endpgm
-define void @lds_atomic_and_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
- %result = atomicrmw and i64 addrspace(3)* %gep, i64 4 seq_cst
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_or_noret_i64:
-; GCN: ds_or_b64
-; GCN: s_endpgm
-define void @lds_atomic_or_noret_i64(i64 addrspace(3)* %ptr) nounwind {
- %result = atomicrmw or i64 addrspace(3)* %ptr, i64 4 seq_cst
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_or_noret_i64_offset:
-; GCN: ds_or_b64 {{.*}} offset:32
-; GCN: s_endpgm
-define void @lds_atomic_or_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
- %result = atomicrmw or i64 addrspace(3)* %gep, i64 4 seq_cst
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_xor_noret_i64:
-; GCN: ds_xor_b64
-; GCN: s_endpgm
-define void @lds_atomic_xor_noret_i64(i64 addrspace(3)* %ptr) nounwind {
- %result = atomicrmw xor i64 addrspace(3)* %ptr, i64 4 seq_cst
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_xor_noret_i64_offset:
-; GCN: ds_xor_b64 {{.*}} offset:32
-; GCN: s_endpgm
-define void @lds_atomic_xor_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
- %result = atomicrmw xor i64 addrspace(3)* %gep, i64 4 seq_cst
- ret void
-}
-
-; FIXME: There is no atomic nand instr
-; XFUNC-LABEL: {{^}}lds_atomic_nand_noret_i64:uction, so we somehow need to expand this.
-; define void @lds_atomic_nand_noret_i64(i64 addrspace(3)* %ptr) nounwind {
-; %result = atomicrmw nand i64 addrspace(3)* %ptr, i32 4 seq_cst
-; ret void
-; }
-
-; FUNC-LABEL: {{^}}lds_atomic_min_noret_i64:
-; GCN: ds_min_i64
-; GCN: s_endpgm
-define void @lds_atomic_min_noret_i64(i64 addrspace(3)* %ptr) nounwind {
- %result = atomicrmw min i64 addrspace(3)* %ptr, i64 4 seq_cst
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_min_noret_i64_offset:
-; GCN: ds_min_i64 {{.*}} offset:32
-; GCN: s_endpgm
-define void @lds_atomic_min_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
- %result = atomicrmw min i64 addrspace(3)* %gep, i64 4 seq_cst
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_max_noret_i64:
-; GCN: ds_max_i64
-; GCN: s_endpgm
-define void @lds_atomic_max_noret_i64(i64 addrspace(3)* %ptr) nounwind {
- %result = atomicrmw max i64 addrspace(3)* %ptr, i64 4 seq_cst
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_max_noret_i64_offset:
-; GCN: ds_max_i64 {{.*}} offset:32
-; GCN: s_endpgm
-define void @lds_atomic_max_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
- %result = atomicrmw max i64 addrspace(3)* %gep, i64 4 seq_cst
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_umin_noret_i64:
-; GCN: ds_min_u64
-; GCN: s_endpgm
-define void @lds_atomic_umin_noret_i64(i64 addrspace(3)* %ptr) nounwind {
- %result = atomicrmw umin i64 addrspace(3)* %ptr, i64 4 seq_cst
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_umin_noret_i64_offset:
-; GCN: ds_min_u64 {{.*}} offset:32
-; GCN: s_endpgm
-define void @lds_atomic_umin_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
- %result = atomicrmw umin i64 addrspace(3)* %gep, i64 4 seq_cst
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_umax_noret_i64:
-; GCN: ds_max_u64
-; GCN: s_endpgm
-define void @lds_atomic_umax_noret_i64(i64 addrspace(3)* %ptr) nounwind {
- %result = atomicrmw umax i64 addrspace(3)* %ptr, i64 4 seq_cst
- ret void
-}
-
-; FUNC-LABEL: {{^}}lds_atomic_umax_noret_i64_offset:
-; GCN: ds_max_u64 {{.*}} offset:32
-; GCN: s_endpgm
-define void @lds_atomic_umax_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
- %result = atomicrmw umax i64 addrspace(3)* %gep, i64 4 seq_cst
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/local-memory-two-objects.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/local-memory-two-objects.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/local-memory-two-objects.ll (original)
+++ llvm/trunk/test/CodeGen/R600/local-memory-two-objects.ll (removed)
@@ -1,63 +0,0 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG %s
-; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=GCN --check-prefix=SI %s
-; RUN: llc < %s -march=amdgcn -mcpu=bonaire -verify-machineinstrs | FileCheck --check-prefix=GCN --check-prefix=CI %s
-
- at local_memory_two_objects.local_mem0 = internal unnamed_addr addrspace(3) global [4 x i32] undef, align 4
- at local_memory_two_objects.local_mem1 = internal unnamed_addr addrspace(3) global [4 x i32] undef, align 4
-
-
-; Check that the LDS size emitted correctly
-; EG: .long 166120
-; EG-NEXT: .long 8
-; GCN: .long 47180
-; GCN-NEXT: .long 38792
-
-; EG: {{^}}local_memory_two_objects:
-
-; We would like to check the the lds writes are using different
-; addresses, but due to variations in the scheduler, we can't do
-; this consistently on evergreen GPUs.
-; EG: LDS_WRITE
-; EG: LDS_WRITE
-; GCN: ds_write_b32 {{v[0-9]*}}, v[[ADDRW:[0-9]*]]
-; GCN-NOT: ds_write_b32 {{v[0-9]*}}, v[[ADDRW]]
-
-; GROUP_BARRIER must be the last instruction in a clause
-; EG: GROUP_BARRIER
-; EG-NEXT: ALU clause
-
-; Make sure the lds reads are using different addresses, at different
-; constant offsets.
-; EG: LDS_READ_RET {{[*]*}} OQAP, {{PV|T}}[[ADDRR:[0-9]*\.[XYZW]]]
-; EG-NOT: LDS_READ_RET {{[*]*}} OQAP, T[[ADDRR]]
-; SI: v_add_i32_e32 [[SIPTR:v[0-9]+]], 16, v{{[0-9]+}}
-; SI: ds_read_b32 {{v[0-9]+}}, [[SIPTR]]
-; CI: ds_read_b32 {{v[0-9]+}}, [[ADDRR:v[0-9]+]] offset:16
-; CI: ds_read_b32 {{v[0-9]+}}, [[ADDRR]]
-
-define void @local_memory_two_objects(i32 addrspace(1)* %out) {
-entry:
- %x.i = call i32 @llvm.r600.read.tidig.x() #0
- %arrayidx = getelementptr inbounds [4 x i32], [4 x i32] addrspace(3)* @local_memory_two_objects.local_mem0, i32 0, i32 %x.i
- store i32 %x.i, i32 addrspace(3)* %arrayidx, align 4
- %mul = shl nsw i32 %x.i, 1
- %arrayidx1 = getelementptr inbounds [4 x i32], [4 x i32] addrspace(3)* @local_memory_two_objects.local_mem1, i32 0, i32 %x.i
- store i32 %mul, i32 addrspace(3)* %arrayidx1, align 4
- %sub = sub nsw i32 3, %x.i
- call void @llvm.AMDGPU.barrier.local()
- %arrayidx2 = getelementptr inbounds [4 x i32], [4 x i32] addrspace(3)* @local_memory_two_objects.local_mem0, i32 0, i32 %sub
- %0 = load i32, i32 addrspace(3)* %arrayidx2, align 4
- %arrayidx3 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %x.i
- store i32 %0, i32 addrspace(1)* %arrayidx3, align 4
- %arrayidx4 = getelementptr inbounds [4 x i32], [4 x i32] addrspace(3)* @local_memory_two_objects.local_mem1, i32 0, i32 %sub
- %1 = load i32, i32 addrspace(3)* %arrayidx4, align 4
- %add = add nsw i32 %x.i, 4
- %arrayidx5 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %add
- store i32 %1, i32 addrspace(1)* %arrayidx5, align 4
- ret void
-}
-
-declare i32 @llvm.r600.read.tidig.x() #0
-declare void @llvm.AMDGPU.barrier.local()
-
-attributes #0 = { readnone }
Removed: llvm/trunk/test/CodeGen/R600/local-memory.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/local-memory.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/local-memory.ll (original)
+++ llvm/trunk/test/CodeGen/R600/local-memory.ll (removed)
@@ -1,49 +0,0 @@
-; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
-
- at local_memory.local_mem = internal unnamed_addr addrspace(3) global [128 x i32] undef, align 4
-
-
-; Check that the LDS size emitted correctly
-; EG: .long 166120
-; EG-NEXT: .long 128
-; SI: .long 47180
-; SI-NEXT: .long 71560
-; CI: .long 47180
-; CI-NEXT: .long 38792
-
-; FUNC-LABEL: {{^}}local_memory:
-
-; EG: LDS_WRITE
-; SI-NOT: s_wqm_b64
-; SI: ds_write_b32
-
-; GROUP_BARRIER must be the last instruction in a clause
-; EG: GROUP_BARRIER
-; EG-NEXT: ALU clause
-; SI: s_barrier
-
-; EG: LDS_READ_RET
-; SI: ds_read_b32 {{v[0-9]+}},
-
-define void @local_memory(i32 addrspace(1)* %out) {
-entry:
- %y.i = call i32 @llvm.r600.read.tidig.x() #0
- %arrayidx = getelementptr inbounds [128 x i32], [128 x i32] addrspace(3)* @local_memory.local_mem, i32 0, i32 %y.i
- store i32 %y.i, i32 addrspace(3)* %arrayidx, align 4
- %add = add nsw i32 %y.i, 1
- %cmp = icmp eq i32 %add, 16
- %.add = select i1 %cmp, i32 0, i32 %add
- call void @llvm.AMDGPU.barrier.local()
- %arrayidx1 = getelementptr inbounds [128 x i32], [128 x i32] addrspace(3)* @local_memory.local_mem, i32 0, i32 %.add
- %0 = load i32, i32 addrspace(3)* %arrayidx1, align 4
- %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %y.i
- store i32 %0, i32 addrspace(1)* %arrayidx2, align 4
- ret void
-}
-
-declare i32 @llvm.r600.read.tidig.x() #0
-declare void @llvm.AMDGPU.barrier.local()
-
-attributes #0 = { readnone }
Removed: llvm/trunk/test/CodeGen/R600/loop-address.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/loop-address.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/loop-address.ll (original)
+++ llvm/trunk/test/CodeGen/R600/loop-address.ll (removed)
@@ -1,34 +0,0 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood < %s | FileCheck %s
-
-;CHECK: ALU_PUSH
-;CHECK: LOOP_START_DX10 @11
-;CHECK: LOOP_BREAK @10
-;CHECK: POP @10
-
-define void @loop_ge(i32 addrspace(1)* nocapture %out, i32 %iterations) #0 {
-entry:
- %cmp5 = icmp sgt i32 %iterations, 0
- br i1 %cmp5, label %for.body, label %for.end
-
-for.body: ; preds = %for.body, %entry
- %i.07.in = phi i32 [ %i.07, %for.body ], [ %iterations, %entry ]
- %ai.06 = phi i32 [ %add, %for.body ], [ 0, %entry ]
- %i.07 = add nsw i32 %i.07.in, -1
- %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %ai.06
- store i32 %i.07, i32 addrspace(1)* %arrayidx, align 4
- %add = add nsw i32 %ai.06, 1
- %exitcond = icmp eq i32 %add, %iterations
- br i1 %exitcond, label %for.end, label %for.body
-
-for.end: ; preds = %for.body, %entry
- ret void
-}
-
-attributes #0 = { nounwind "fp-contract-model"="standard" "relocation-model"="pic" "ssp-buffers-size"="8" }
-
-!opencl.kernels = !{!0, !1, !2, !3}
-
-!0 = !{void (i32 addrspace(1)*, i32)* @loop_ge}
-!1 = !{null}
-!2 = !{null}
-!3 = !{null}
Removed: llvm/trunk/test/CodeGen/R600/loop-idiom.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/loop-idiom.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/loop-idiom.ll (original)
+++ llvm/trunk/test/CodeGen/R600/loop-idiom.ll (removed)
@@ -1,51 +0,0 @@
-; RUN: opt -basicaa -loop-idiom -S < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600 --check-prefix=FUNC %s
-; RUN: opt -basicaa -loop-idiom -S < %s -march=amdgcn -mcpu=SI -verify-machineinstrs| FileCheck --check-prefix=SI --check-prefix=FUNC %s
-; RUN: opt -basicaa -loop-idiom -S < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs| FileCheck --check-prefix=SI --check-prefix=FUNC %s
-
-
-; Make sure loop-idiom doesn't create memcpy or memset. There are no library
-; implementations of these for R600.
-
-; FUNC: @no_memcpy
-; R600-NOT: {{^}}llvm.memcpy
-; SI-NOT: {{^}}llvm.memcpy
-define void @no_memcpy(i8 addrspace(3)* %in, i32 %size) {
-entry:
- %dest = alloca i8, i32 32
- br label %for.body
-
-for.body:
- %0 = phi i32 [0, %entry], [%4, %for.body]
- %1 = getelementptr i8, i8 addrspace(3)* %in, i32 %0
- %2 = getelementptr i8, i8* %dest, i32 %0
- %3 = load i8, i8 addrspace(3)* %1
- store i8 %3, i8* %2
- %4 = add i32 %0, 1
- %5 = icmp eq i32 %4, %size
- br i1 %5, label %for.end, label %for.body
-
-for.end:
- ret void
-}
-
-; FUNC: @no_memset
-; R600-NOT: {{^}}llvm.memset
-; R600-NOT: {{^}}memset_pattern16:
-; SI-NOT: {{^}}llvm.memset
-; SI-NOT: {{^}}memset_pattern16:
-define void @no_memset(i32 %size) {
-entry:
- %dest = alloca i8, i32 32
- br label %for.body
-
-for.body:
- %0 = phi i32 [0, %entry], [%2, %for.body]
- %1 = getelementptr i8, i8* %dest, i32 %0
- store i8 0, i8* %1
- %2 = add i32 %0, 1
- %3 = icmp eq i32 %2, %size
- br i1 %3, label %for.end, label %for.body
-
-for.end:
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/lshl.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/lshl.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/lshl.ll (original)
+++ llvm/trunk/test/CodeGen/R600/lshl.ll (removed)
@@ -1,15 +0,0 @@
-;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
-;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
-
-;CHECK: s_lshl_b32 s{{[0-9]}}, s{{[0-9]}}, 1
-
-define void @test(i32 %p) {
- %i = mul i32 %p, 2
- %r = bitcast i32 %i to float
- call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %r, float %r, float %r, float %r)
- ret void
-}
-
-declare <4 x float> @llvm.SI.sample.(i32, <4 x i32>, <8 x i32>, <4 x i32>, i32) readnone
-
-declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
Removed: llvm/trunk/test/CodeGen/R600/lshr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/lshr.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/lshr.ll (original)
+++ llvm/trunk/test/CodeGen/R600/lshr.ll (removed)
@@ -1,15 +0,0 @@
-;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
-;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
-
-;CHECK: s_lshr_b32 s{{[0-9]}}, s{{[0-9]}}, 1
-
-define void @test(i32 %p) {
- %i = udiv i32 %p, 2
- %r = bitcast i32 %i to float
- call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %r, float %r, float %r, float %r)
- ret void
-}
-
-declare <4 x float> @llvm.SI.sample.(i32, <4 x i32>, <8 x i32>, <4 x i32>, i32) readnone
-
-declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
Removed: llvm/trunk/test/CodeGen/R600/m0-spill.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/m0-spill.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/m0-spill.ll (original)
+++ llvm/trunk/test/CodeGen/R600/m0-spill.ll (removed)
@@ -1,35 +0,0 @@
-; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s
-; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
-
- at lds = external addrspace(3) global [64 x float]
-
-; CHECK-LABEL: {{^}}main:
-; CHECK-NOT: v_readlane_b32 m0
-define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg) "ShaderType"="0" {
-main_body:
- %4 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %3)
- %cmp = fcmp ueq float 0.0, %4
- br i1 %cmp, label %if, label %else
-
-if:
- %lds_ptr = getelementptr [64 x float], [64 x float] addrspace(3)* @lds, i32 0, i32 0
- %lds_data = load float, float addrspace(3)* %lds_ptr
- br label %endif
-
-else:
- %interp = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %3)
- br label %endif
-
-endif:
- %export = phi float [%lds_data, %if], [%interp, %else]
- %5 = call i32 @llvm.SI.packf16(float %export, float %export)
- %6 = bitcast i32 %5 to float
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %6, float %6, float %6, float %6)
- ret void
-}
-
-declare float @llvm.SI.fs.constant(i32, i32, i32) readnone
-
-declare i32 @llvm.SI.packf16(float, float) readnone
-
-declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
Removed: llvm/trunk/test/CodeGen/R600/mad-combine.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/mad-combine.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/mad-combine.ll (original)
+++ llvm/trunk/test/CodeGen/R600/mad-combine.ll (removed)
@@ -1,567 +0,0 @@
-; Make sure we still form mad even when unsafe math or fp-contract is allowed instead of fma.
-
-; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SI-STD -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefix=SI -check-prefix=SI-STD -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI -check-prefix=SI-STD -check-prefix=FUNC %s
-
-; Make sure we don't form mad with denormals
-; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=+fp32-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SI-DENORM -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=verde -mattr=+fp32-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SI-DENORM-SLOWFMAF -check-prefix=FUNC %s
-
-declare i32 @llvm.r600.read.tidig.x() #0
-declare float @llvm.fabs.f32(float) #0
-declare float @llvm.fma.f32(float, float, float) #0
-declare float @llvm.fmuladd.f32(float, float, float) #0
-
-; (fadd (fmul x, y), z) -> (fma x, y, z)
-; FUNC-LABEL: {{^}}combine_to_mad_f32_0:
-; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
-; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
-; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
-
-; SI-STD: v_mad_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[C]]
-
-; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[C]]
-
-; SI-DENORM-SLOWFMAF-NOT: v_fma
-; SI-DENORM-SLOWFMAF-NOT: v_mad
-
-; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]]
-; SI-DENORM-SLOWFMAF: v_add_f32_e32 [[RESULT:v[0-9]+]], [[C]], [[TMP]]
-
-; SI: buffer_store_dword [[RESULT]]
-define void @combine_to_mad_f32_0(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
- %tid = tail call i32 @llvm.r600.read.tidig.x() #0
- %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
- %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
- %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
-
- %a = load float, float addrspace(1)* %gep.0
- %b = load float, float addrspace(1)* %gep.1
- %c = load float, float addrspace(1)* %gep.2
-
- %mul = fmul float %a, %b
- %fma = fadd float %mul, %c
- store float %fma, float addrspace(1)* %gep.out
- ret void
-}
-
-; (fadd (fmul x, y), z) -> (fma x, y, z)
-; FUNC-LABEL: {{^}}combine_to_mad_f32_0_2use:
-; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
-; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
-; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
-; SI-DAG: buffer_load_dword [[D:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}}
-
-; SI-STD-DAG: v_mad_f32 [[RESULT0:v[0-9]+]], [[A]], [[B]], [[C]]
-; SI-STD-DAG: v_mad_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], [[D]]
-
-; SI-DENORM-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[A]], [[B]], [[C]]
-; SI-DENORM-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], [[D]]
-
-; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]]
-; SI-DENORM-SLOWFMAF-DAG: v_add_f32_e32 [[RESULT0:v[0-9]+]], [[C]], [[TMP]]
-; SI-DENORM-SLOWFMAF-DAG: v_add_f32_e32 [[RESULT1:v[0-9]+]], [[D]], [[TMP]]
-
-; SI-DAG: buffer_store_dword [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
-; SI-DAG: buffer_store_dword [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
-; SI: s_endpgm
-define void @combine_to_mad_f32_0_2use(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
- %tid = tail call i32 @llvm.r600.read.tidig.x() #0
- %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
- %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
- %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3
- %gep.out.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
- %gep.out.1 = getelementptr float, float addrspace(1)* %gep.out.0, i32 1
-
- %a = load float, float addrspace(1)* %gep.0
- %b = load float, float addrspace(1)* %gep.1
- %c = load float, float addrspace(1)* %gep.2
- %d = load float, float addrspace(1)* %gep.3
-
- %mul = fmul float %a, %b
- %fma0 = fadd float %mul, %c
- %fma1 = fadd float %mul, %d
-
- store float %fma0, float addrspace(1)* %gep.out.0
- store float %fma1, float addrspace(1)* %gep.out.1
- ret void
-}
-
-; (fadd x, (fmul y, z)) -> (fma y, z, x)
-; FUNC-LABEL: {{^}}combine_to_mad_f32_1:
-; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
-; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
-; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
-
-; SI-STD: v_mad_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[C]]
-; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[C]]
-
-; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]]
-; SI-DENORM-SLOWFMAF: v_add_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[C]]
-
-; SI: buffer_store_dword [[RESULT]]
-define void @combine_to_mad_f32_1(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
- %tid = tail call i32 @llvm.r600.read.tidig.x() #0
- %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
- %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
- %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
-
- %a = load float, float addrspace(1)* %gep.0
- %b = load float, float addrspace(1)* %gep.1
- %c = load float, float addrspace(1)* %gep.2
-
- %mul = fmul float %a, %b
- %fma = fadd float %c, %mul
- store float %fma, float addrspace(1)* %gep.out
- ret void
-}
-
-; (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
-; FUNC-LABEL: {{^}}combine_to_mad_fsub_0_f32:
-; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
-; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
-; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
-
-; SI-STD: v_mad_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], -[[C]]
-; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], -[[C]]
-
-; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]]
-; SI-DENORM-SLOWFMAF: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[C]], [[TMP]]
-
-; SI: buffer_store_dword [[RESULT]]
-define void @combine_to_mad_fsub_0_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
- %tid = tail call i32 @llvm.r600.read.tidig.x() #0
- %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
- %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
- %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
-
- %a = load float, float addrspace(1)* %gep.0
- %b = load float, float addrspace(1)* %gep.1
- %c = load float, float addrspace(1)* %gep.2
-
- %mul = fmul float %a, %b
- %fma = fsub float %mul, %c
- store float %fma, float addrspace(1)* %gep.out
- ret void
-}
-
-; (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
-; FUNC-LABEL: {{^}}combine_to_mad_fsub_0_f32_2use:
-; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
-; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
-; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
-; SI-DAG: buffer_load_dword [[D:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}}
-
-; SI-STD-DAG: v_mad_f32 [[RESULT0:v[0-9]+]], [[A]], [[B]], -[[C]]
-; SI-STD-DAG: v_mad_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], -[[D]]
-
-; SI-DENORM-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[A]], [[B]], -[[C]]
-; SI-DENORM-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], -[[D]]
-
-; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]]
-; SI-DENORM-SLOWFMAF-DAG: v_subrev_f32_e32 [[RESULT0:v[0-9]+]], [[C]], [[TMP]]
-; SI-DENORM-SLOWFMAF-DAG: v_subrev_f32_e32 [[RESULT1:v[0-9]+]], [[D]], [[TMP]]
-
-; SI-DAG: buffer_store_dword [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
-; SI-DAG: buffer_store_dword [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
-; SI: s_endpgm
-define void @combine_to_mad_fsub_0_f32_2use(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
- %tid = tail call i32 @llvm.r600.read.tidig.x() #0
- %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
- %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
- %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3
- %gep.out.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
- %gep.out.1 = getelementptr float, float addrspace(1)* %gep.out.0, i32 1
-
- %a = load float, float addrspace(1)* %gep.0
- %b = load float, float addrspace(1)* %gep.1
- %c = load float, float addrspace(1)* %gep.2
- %d = load float, float addrspace(1)* %gep.3
-
- %mul = fmul float %a, %b
- %fma0 = fsub float %mul, %c
- %fma1 = fsub float %mul, %d
- store float %fma0, float addrspace(1)* %gep.out.0
- store float %fma1, float addrspace(1)* %gep.out.1
- ret void
-}
-
-; (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
-; FUNC-LABEL: {{^}}combine_to_mad_fsub_1_f32:
-; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
-; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
-; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
-
-; SI-STD: v_mad_f32 [[RESULT:v[0-9]+]], -[[A]], [[B]], [[C]]
-; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], -[[A]], [[B]], [[C]]
-
-; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]]
-; SI-DENORM-SLOWFMAF: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[C]]
-
-; SI: buffer_store_dword [[RESULT]]
-define void @combine_to_mad_fsub_1_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
- %tid = tail call i32 @llvm.r600.read.tidig.x() #0
- %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
- %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
- %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
-
- %a = load float, float addrspace(1)* %gep.0
- %b = load float, float addrspace(1)* %gep.1
- %c = load float, float addrspace(1)* %gep.2
-
- %mul = fmul float %a, %b
- %fma = fsub float %c, %mul
- store float %fma, float addrspace(1)* %gep.out
- ret void
-}
-
-; (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
-; FUNC-LABEL: {{^}}combine_to_mad_fsub_1_f32_2use:
-; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
-; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
-; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
-
-; SI-STD-DAG: v_mad_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], [[C]]
-; SI-STD-DAG: v_mad_f32 [[RESULT1:v[0-9]+]], -[[A]], [[B]], [[D]]
-
-; SI-DENORM-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], [[C]]
-; SI-DENORM-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], -[[A]], [[B]], [[D]]
-
-; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]]
-; SI-DENORM-SLOWFMAF-DAG: v_subrev_f32_e32 [[RESULT0:v[0-9]+]], [[TMP]], [[C]]
-; SI-DENORM-SLOWFMAF-DAG: v_subrev_f32_e32 [[RESULT1:v[0-9]+]], [[TMP]], [[D]]
-
-; SI-DAG: buffer_store_dword [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
-; SI-DAG: buffer_store_dword [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
-; SI: s_endpgm
-define void @combine_to_mad_fsub_1_f32_2use(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
- %tid = tail call i32 @llvm.r600.read.tidig.x() #0
- %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
- %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
- %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3
- %gep.out.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
- %gep.out.1 = getelementptr float, float addrspace(1)* %gep.out.0, i32 1
-
- %a = load float, float addrspace(1)* %gep.0
- %b = load float, float addrspace(1)* %gep.1
- %c = load float, float addrspace(1)* %gep.2
- %d = load float, float addrspace(1)* %gep.3
-
- %mul = fmul float %a, %b
- %fma0 = fsub float %c, %mul
- %fma1 = fsub float %d, %mul
- store float %fma0, float addrspace(1)* %gep.out.0
- store float %fma1, float addrspace(1)* %gep.out.1
- ret void
-}
-
-; (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
-; FUNC-LABEL: {{^}}combine_to_mad_fsub_2_f32:
-; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
-; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
-; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
-
-; SI-STD: v_mad_f32 [[RESULT:v[0-9]+]], -[[A]], [[B]], -[[C]]
-
-; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], -[[A]], [[B]], -[[C]]
-
-; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]]
-; SI-DENORM-SLOWFMAF: v_sub_f32_e64 [[RESULT:v[0-9]+]], -[[TMP]], [[C]]
-
-; SI: buffer_store_dword [[RESULT]]
-define void @combine_to_mad_fsub_2_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
- %tid = tail call i32 @llvm.r600.read.tidig.x() #0
- %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
- %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
- %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
-
- %a = load float, float addrspace(1)* %gep.0
- %b = load float, float addrspace(1)* %gep.1
- %c = load float, float addrspace(1)* %gep.2
-
- %mul = fmul float %a, %b
- %mul.neg = fsub float -0.0, %mul
- %fma = fsub float %mul.neg, %c
-
- store float %fma, float addrspace(1)* %gep.out
- ret void
-}
-
-; (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
-; FUNC-LABEL: {{^}}combine_to_mad_fsub_2_f32_2uses_neg:
-; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
-; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
-; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
-
-; SI-STD-DAG: v_mad_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], -[[C]]
-; SI-STD-DAG: v_mad_f32 [[RESULT1:v[0-9]+]], -[[A]], [[B]], -[[D]]
-
-; SI-DENORM-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], -[[C]]
-; SI-DENORM-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], -[[A]], [[B]], -[[D]]
-
-; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]]
-; SI-DENORM-SLOWFMAF-DAG: v_sub_f32_e64 [[RESULT0:v[0-9]+]], -[[TMP]], [[C]]
-; SI-DENORM-SLOWFMAF-DAG: v_sub_f32_e64 [[RESULT1:v[0-9]+]], -[[TMP]], [[D]]
-
-; SI-DAG: buffer_store_dword [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
-; SI-DAG: buffer_store_dword [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
-; SI: s_endpgm
-define void @combine_to_mad_fsub_2_f32_2uses_neg(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
- %tid = tail call i32 @llvm.r600.read.tidig.x() #0
- %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
- %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
- %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3
- %gep.out.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
- %gep.out.1 = getelementptr float, float addrspace(1)* %gep.out.0, i32 1
-
- %a = load float, float addrspace(1)* %gep.0
- %b = load float, float addrspace(1)* %gep.1
- %c = load float, float addrspace(1)* %gep.2
- %d = load float, float addrspace(1)* %gep.3
-
- %mul = fmul float %a, %b
- %mul.neg = fsub float -0.0, %mul
- %fma0 = fsub float %mul.neg, %c
- %fma1 = fsub float %mul.neg, %d
-
- store float %fma0, float addrspace(1)* %gep.out.0
- store float %fma1, float addrspace(1)* %gep.out.1
- ret void
-}
-
-; (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
-; FUNC-LABEL: {{^}}combine_to_mad_fsub_2_f32_2uses_mul:
-; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
-; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
-; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
-
-; SI-STD-DAG: v_mad_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], -[[C]]
-; SI-STD-DAG: v_mad_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], -[[D]]
-
-; SI-DENORM-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], -[[C]]
-; SI-DENORM-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], -[[D]]
-
-; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]]
-; SI-DENORM-SLOWFMAF-DAG: v_sub_f32_e64 [[RESULT0:v[0-9]+]], -[[TMP]], [[C]]
-; SI-DENORM-SLOWFMAF-DAG: v_subrev_f32_e32 [[RESULT1:v[0-9]+]], [[D]], [[TMP]]
-
-; SI-DAG: buffer_store_dword [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
-; SI-DAG: buffer_store_dword [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
-; SI: s_endpgm
-define void @combine_to_mad_fsub_2_f32_2uses_mul(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
- %tid = tail call i32 @llvm.r600.read.tidig.x() #0
- %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
- %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
- %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3
- %gep.out.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
- %gep.out.1 = getelementptr float, float addrspace(1)* %gep.out.0, i32 1
-
- %a = load float, float addrspace(1)* %gep.0
- %b = load float, float addrspace(1)* %gep.1
- %c = load float, float addrspace(1)* %gep.2
- %d = load float, float addrspace(1)* %gep.3
-
- %mul = fmul float %a, %b
- %mul.neg = fsub float -0.0, %mul
- %fma0 = fsub float %mul.neg, %c
- %fma1 = fsub float %mul, %d
-
- store float %fma0, float addrspace(1)* %gep.out.0
- store float %fma1, float addrspace(1)* %gep.out.1
- ret void
-}
-
-; fold (fsub (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, (fneg z)))
-
-; FUNC-LABEL: {{^}}aggressive_combine_to_mad_fsub_0_f32:
-; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
-; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
-; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
-; SI-DAG: buffer_load_dword [[D:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}}
-; SI-DAG: buffer_load_dword [[E:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
-
-; SI-STD: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]]
-; SI-STD: v_fma_f32 [[TMP1:v[0-9]+]], [[A]], [[B]], [[TMP0]]
-; SI-STD: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[C]], [[TMP1]]
-
-; SI-DENORM: v_fma_f32 [[TMP0:v[0-9]+]], [[D]], [[E]], -[[C]]
-; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[TMP0]]
-
-; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]]
-; SI-DENORM-SLOWFMAF: v_fma_f32 [[TMP1:v[0-9]+]], [[A]], [[B]], [[TMP0]]
-; SI-DENORM-SLOWFMAF: v_subrev_f32_e32 [[RESULT1:v[0-9]+]], [[C]], [[TMP1]]
-
-; SI: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
-define void @aggressive_combine_to_mad_fsub_0_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
- %tid = tail call i32 @llvm.r600.read.tidig.x() #0
- %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
- %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
- %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3
- %gep.4 = getelementptr float, float addrspace(1)* %gep.0, i32 4
- %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
-
- %x = load float, float addrspace(1)* %gep.0
- %y = load float, float addrspace(1)* %gep.1
- %z = load float, float addrspace(1)* %gep.2
- %u = load float, float addrspace(1)* %gep.3
- %v = load float, float addrspace(1)* %gep.4
-
- %tmp0 = fmul float %u, %v
- %tmp1 = call float @llvm.fma.f32(float %x, float %y, float %tmp0) #0
- %tmp2 = fsub float %tmp1, %z
-
- store float %tmp2, float addrspace(1)* %gep.out
- ret void
-}
-
-; fold (fsub x, (fma y, z, (fmul u, v)))
-; -> (fma (fneg y), z, (fma (fneg u), v, x))
-
-; FUNC-LABEL: {{^}}aggressive_combine_to_mad_fsub_1_f32:
-; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
-; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
-; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
-; SI-DAG: buffer_load_dword [[D:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}}
-; SI-DAG: buffer_load_dword [[E:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
-
-; SI-STD: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]]
-; SI-STD: v_fma_f32 [[TMP1:v[0-9]+]], [[B]], [[C]], [[TMP0]]
-; SI-STD: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[TMP1]], [[A]]
-
-; SI-DENORM: v_fma_f32 [[TMP0:v[0-9]+]], -[[D]], [[E]], [[A]]
-; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], -[[B]], [[C]], [[TMP0]]
-
-; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]]
-; SI-DENORM-SLOWFMAF: v_fma_f32 [[TMP1:v[0-9]+]], [[B]], [[C]], [[TMP0]]
-; SI-DENORM-SLOWFMAF: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[TMP1]], [[A]]
-
-; SI: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
-; SI: s_endpgm
-define void @aggressive_combine_to_mad_fsub_1_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
- %tid = tail call i32 @llvm.r600.read.tidig.x() #0
- %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
- %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
- %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3
- %gep.4 = getelementptr float, float addrspace(1)* %gep.0, i32 4
- %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
-
- %x = load float, float addrspace(1)* %gep.0
- %y = load float, float addrspace(1)* %gep.1
- %z = load float, float addrspace(1)* %gep.2
- %u = load float, float addrspace(1)* %gep.3
- %v = load float, float addrspace(1)* %gep.4
-
- %tmp0 = fmul float %u, %v
- %tmp1 = call float @llvm.fma.f32(float %y, float %z, float %tmp0) #0
- %tmp2 = fsub float %x, %tmp1
-
- store float %tmp2, float addrspace(1)* %gep.out
- ret void
-}
-
-; fold (fsub (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, (fneg z)))
-
-; FUNC-LABEL: {{^}}aggressive_combine_to_mad_fsub_2_f32:
-; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
-; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
-; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
-; SI-DAG: buffer_load_dword [[D:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}}
-; SI-DAG: buffer_load_dword [[E:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
-
-; SI-STD: v_mad_f32 [[TMP:v[0-9]+]], [[D]], [[E]], -[[C]]
-; SI-STD: v_mad_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[TMP]]
-
-; SI-DENORM: v_fma_f32 [[TMP:v[0-9]+]], [[D]], [[E]], -[[C]]
-; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[TMP]]
-
-; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]]
-; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP1:v[0-9]+]], [[B]], [[A]]
-; SI-DENORM-SLOWFMAF: v_add_f32_e32 [[TMP2:v[0-9]+]], [[TMP0]], [[TMP1]]
-; SI-DENORM-SLOWFMAF: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[C]], [[TMP2]]
-
-; SI: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
-; SI: s_endpgm
-define void @aggressive_combine_to_mad_fsub_2_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
- %tid = tail call i32 @llvm.r600.read.tidig.x() #0
- %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
- %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
- %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3
- %gep.4 = getelementptr float, float addrspace(1)* %gep.0, i32 4
- %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
-
- %x = load float, float addrspace(1)* %gep.0
- %y = load float, float addrspace(1)* %gep.1
- %z = load float, float addrspace(1)* %gep.2
- %u = load float, float addrspace(1)* %gep.3
- %v = load float, float addrspace(1)* %gep.4
-
- %tmp0 = fmul float %u, %v
- %tmp1 = call float @llvm.fmuladd.f32(float %x, float %y, float %tmp0) #0
- %tmp2 = fsub float %tmp1, %z
-
- store float %tmp2, float addrspace(1)* %gep.out
- ret void
-}
-
-; fold (fsub x, (fmuladd y, z, (fmul u, v)))
-; -> (fmuladd (fneg y), z, (fmuladd (fneg u), v, x))
-
-; FUNC-LABEL: {{^}}aggressive_combine_to_mad_fsub_3_f32:
-; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
-; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
-; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
-; SI-DAG: buffer_load_dword [[D:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}}
-; SI-DAG: buffer_load_dword [[E:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
-
-; SI-STD: v_mad_f32 [[TMP:v[0-9]+]], -[[D]], [[E]], [[A]]
-; SI-STD: v_mad_f32 [[RESULT:v[0-9]+]], -[[B]], [[C]], [[TMP]]
-
-; SI-DENORM: v_fma_f32 [[TMP:v[0-9]+]], -[[D]], [[E]], [[A]]
-; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], -[[B]], [[C]], [[TMP]]
-
-; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]]
-; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP1:v[0-9]+]], [[C]], [[B]]
-; SI-DENORM-SLOWFMAF: v_add_f32_e32 [[TMP2:v[0-9]+]], [[TMP0]], [[TMP1]]
-; SI-DENORM-SLOWFMAF: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[TMP2]], [[A]]
-
-; SI: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
-; SI: s_endpgm
-define void @aggressive_combine_to_mad_fsub_3_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
- %tid = tail call i32 @llvm.r600.read.tidig.x() #0
- %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
- %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
- %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3
- %gep.4 = getelementptr float, float addrspace(1)* %gep.0, i32 4
- %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
-
- %x = load float, float addrspace(1)* %gep.0
- %y = load float, float addrspace(1)* %gep.1
- %z = load float, float addrspace(1)* %gep.2
- %u = load float, float addrspace(1)* %gep.3
- %v = load float, float addrspace(1)* %gep.4
-
- %tmp0 = fmul float %u, %v
- %tmp1 = call float @llvm.fmuladd.f32(float %y, float %z, float %tmp0) #0
- %tmp2 = fsub float %x, %tmp1
-
- store float %tmp2, float addrspace(1)* %gep.out
- ret void
-}
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
Removed: llvm/trunk/test/CodeGen/R600/mad-sub.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/mad-sub.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/mad-sub.ll (original)
+++ llvm/trunk/test/CodeGen/R600/mad-sub.ll (removed)
@@ -1,215 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-
-declare i32 @llvm.r600.read.tidig.x() #0
-declare float @llvm.fabs.f32(float) #0
-
-; FUNC-LABEL: {{^}}mad_sub_f32:
-; SI: buffer_load_dword [[REGA:v[0-9]+]]
-; SI: buffer_load_dword [[REGB:v[0-9]+]]
-; SI: buffer_load_dword [[REGC:v[0-9]+]]
-; SI: v_mad_f32 [[RESULT:v[0-9]+]], [[REGA]], [[REGB]], -[[REGC]]
-; SI: buffer_store_dword [[RESULT]]
-define void @mad_sub_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #1 {
- %tid = tail call i32 @llvm.r600.read.tidig.x() #0
- %tid.ext = sext i32 %tid to i64
- %gep0 = getelementptr float, float addrspace(1)* %ptr, i64 %tid.ext
- %add1 = add i64 %tid.ext, 1
- %gep1 = getelementptr float, float addrspace(1)* %ptr, i64 %add1
- %add2 = add i64 %tid.ext, 2
- %gep2 = getelementptr float, float addrspace(1)* %ptr, i64 %add2
- %outgep = getelementptr float, float addrspace(1)* %out, i64 %tid.ext
- %a = load float, float addrspace(1)* %gep0, align 4
- %b = load float, float addrspace(1)* %gep1, align 4
- %c = load float, float addrspace(1)* %gep2, align 4
- %mul = fmul float %a, %b
- %sub = fsub float %mul, %c
- store float %sub, float addrspace(1)* %outgep, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}mad_sub_inv_f32:
-; SI: buffer_load_dword [[REGA:v[0-9]+]]
-; SI: buffer_load_dword [[REGB:v[0-9]+]]
-; SI: buffer_load_dword [[REGC:v[0-9]+]]
-; SI: v_mad_f32 [[RESULT:v[0-9]+]], -[[REGA]], [[REGB]], [[REGC]]
-; SI: buffer_store_dword [[RESULT]]
-define void @mad_sub_inv_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #1 {
- %tid = tail call i32 @llvm.r600.read.tidig.x() #0
- %tid.ext = sext i32 %tid to i64
- %gep0 = getelementptr float, float addrspace(1)* %ptr, i64 %tid.ext
- %add1 = add i64 %tid.ext, 1
- %gep1 = getelementptr float, float addrspace(1)* %ptr, i64 %add1
- %add2 = add i64 %tid.ext, 2
- %gep2 = getelementptr float, float addrspace(1)* %ptr, i64 %add2
- %outgep = getelementptr float, float addrspace(1)* %out, i64 %tid.ext
- %a = load float, float addrspace(1)* %gep0, align 4
- %b = load float, float addrspace(1)* %gep1, align 4
- %c = load float, float addrspace(1)* %gep2, align 4
- %mul = fmul float %a, %b
- %sub = fsub float %c, %mul
- store float %sub, float addrspace(1)* %outgep, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}mad_sub_f64:
-; SI: v_mul_f64
-; SI: v_add_f64
-define void @mad_sub_f64(double addrspace(1)* noalias nocapture %out, double addrspace(1)* noalias nocapture readonly %ptr) #1 {
- %tid = tail call i32 @llvm.r600.read.tidig.x() #0
- %tid.ext = sext i32 %tid to i64
- %gep0 = getelementptr double, double addrspace(1)* %ptr, i64 %tid.ext
- %add1 = add i64 %tid.ext, 1
- %gep1 = getelementptr double, double addrspace(1)* %ptr, i64 %add1
- %add2 = add i64 %tid.ext, 2
- %gep2 = getelementptr double, double addrspace(1)* %ptr, i64 %add2
- %outgep = getelementptr double, double addrspace(1)* %out, i64 %tid.ext
- %a = load double, double addrspace(1)* %gep0, align 8
- %b = load double, double addrspace(1)* %gep1, align 8
- %c = load double, double addrspace(1)* %gep2, align 8
- %mul = fmul double %a, %b
- %sub = fsub double %mul, %c
- store double %sub, double addrspace(1)* %outgep, align 8
- ret void
-}
-
-; FUNC-LABEL: {{^}}mad_sub_fabs_f32:
-; SI: buffer_load_dword [[REGA:v[0-9]+]]
-; SI: buffer_load_dword [[REGB:v[0-9]+]]
-; SI: buffer_load_dword [[REGC:v[0-9]+]]
-; SI: v_mad_f32 [[RESULT:v[0-9]+]], [[REGA]], [[REGB]], -|[[REGC]]|
-; SI: buffer_store_dword [[RESULT]]
-define void @mad_sub_fabs_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #1 {
- %tid = tail call i32 @llvm.r600.read.tidig.x() #0
- %tid.ext = sext i32 %tid to i64
- %gep0 = getelementptr float, float addrspace(1)* %ptr, i64 %tid.ext
- %add1 = add i64 %tid.ext, 1
- %gep1 = getelementptr float, float addrspace(1)* %ptr, i64 %add1
- %add2 = add i64 %tid.ext, 2
- %gep2 = getelementptr float, float addrspace(1)* %ptr, i64 %add2
- %outgep = getelementptr float, float addrspace(1)* %out, i64 %tid.ext
- %a = load float, float addrspace(1)* %gep0, align 4
- %b = load float, float addrspace(1)* %gep1, align 4
- %c = load float, float addrspace(1)* %gep2, align 4
- %c.abs = call float @llvm.fabs.f32(float %c) #0
- %mul = fmul float %a, %b
- %sub = fsub float %mul, %c.abs
- store float %sub, float addrspace(1)* %outgep, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}mad_sub_fabs_inv_f32:
-; SI: buffer_load_dword [[REGA:v[0-9]+]]
-; SI: buffer_load_dword [[REGB:v[0-9]+]]
-; SI: buffer_load_dword [[REGC:v[0-9]+]]
-; SI: v_mad_f32 [[RESULT:v[0-9]+]], -[[REGA]], [[REGB]], |[[REGC]]|
-; SI: buffer_store_dword [[RESULT]]
-define void @mad_sub_fabs_inv_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #1 {
- %tid = tail call i32 @llvm.r600.read.tidig.x() #0
- %tid.ext = sext i32 %tid to i64
- %gep0 = getelementptr float, float addrspace(1)* %ptr, i64 %tid.ext
- %add1 = add i64 %tid.ext, 1
- %gep1 = getelementptr float, float addrspace(1)* %ptr, i64 %add1
- %add2 = add i64 %tid.ext, 2
- %gep2 = getelementptr float, float addrspace(1)* %ptr, i64 %add2
- %outgep = getelementptr float, float addrspace(1)* %out, i64 %tid.ext
- %a = load float, float addrspace(1)* %gep0, align 4
- %b = load float, float addrspace(1)* %gep1, align 4
- %c = load float, float addrspace(1)* %gep2, align 4
- %c.abs = call float @llvm.fabs.f32(float %c) #0
- %mul = fmul float %a, %b
- %sub = fsub float %c.abs, %mul
- store float %sub, float addrspace(1)* %outgep, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}neg_neg_mad_f32:
-; SI: v_mad_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
-define void @neg_neg_mad_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #1 {
- %tid = tail call i32 @llvm.r600.read.tidig.x() #0
- %tid.ext = sext i32 %tid to i64
- %gep0 = getelementptr float, float addrspace(1)* %ptr, i64 %tid.ext
- %add1 = add i64 %tid.ext, 1
- %gep1 = getelementptr float, float addrspace(1)* %ptr, i64 %add1
- %add2 = add i64 %tid.ext, 2
- %gep2 = getelementptr float, float addrspace(1)* %ptr, i64 %add2
- %outgep = getelementptr float, float addrspace(1)* %out, i64 %tid.ext
- %a = load float, float addrspace(1)* %gep0, align 4
- %b = load float, float addrspace(1)* %gep1, align 4
- %c = load float, float addrspace(1)* %gep2, align 4
- %nega = fsub float -0.000000e+00, %a
- %negb = fsub float -0.000000e+00, %b
- %mul = fmul float %nega, %negb
- %sub = fadd float %mul, %c
- store float %sub, float addrspace(1)* %outgep, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}mad_fabs_sub_f32:
-; SI: buffer_load_dword [[REGA:v[0-9]+]]
-; SI: buffer_load_dword [[REGB:v[0-9]+]]
-; SI: buffer_load_dword [[REGC:v[0-9]+]]
-; SI: v_mad_f32 [[RESULT:v[0-9]+]], [[REGA]], |[[REGB]]|, -[[REGC]]
-; SI: buffer_store_dword [[RESULT]]
-define void @mad_fabs_sub_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #1 {
- %tid = tail call i32 @llvm.r600.read.tidig.x() #0
- %tid.ext = sext i32 %tid to i64
- %gep0 = getelementptr float, float addrspace(1)* %ptr, i64 %tid.ext
- %add1 = add i64 %tid.ext, 1
- %gep1 = getelementptr float, float addrspace(1)* %ptr, i64 %add1
- %add2 = add i64 %tid.ext, 2
- %gep2 = getelementptr float, float addrspace(1)* %ptr, i64 %add2
- %outgep = getelementptr float, float addrspace(1)* %out, i64 %tid.ext
- %a = load float, float addrspace(1)* %gep0, align 4
- %b = load float, float addrspace(1)* %gep1, align 4
- %c = load float, float addrspace(1)* %gep2, align 4
- %b.abs = call float @llvm.fabs.f32(float %b) #0
- %mul = fmul float %a, %b.abs
- %sub = fsub float %mul, %c
- store float %sub, float addrspace(1)* %outgep, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}fsub_c_fadd_a_a:
-; SI-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
-; SI-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
-; SI: v_mad_f32 [[RESULT:v[0-9]+]], -2.0, [[R1]], [[R2]]
-; SI: buffer_store_dword [[RESULT]]
-define void @fsub_c_fadd_a_a(float addrspace(1)* %out, float addrspace(1)* %in) {
- %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
- %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
- %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
-
- %r1 = load float, float addrspace(1)* %gep.0
- %r2 = load float, float addrspace(1)* %gep.1
-
- %add = fadd float %r1, %r1
- %r3 = fsub float %r2, %add
-
- store float %r3, float addrspace(1)* %gep.out
- ret void
-}
-
-; FUNC-LABEL: {{^}}fsub_fadd_a_a_c:
-; SI-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
-; SI-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
-; SI: v_mad_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], -[[R2]]
-; SI: buffer_store_dword [[RESULT]]
-define void @fsub_fadd_a_a_c(float addrspace(1)* %out, float addrspace(1)* %in) {
- %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
- %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
- %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
-
- %r1 = load float, float addrspace(1)* %gep.0
- %r2 = load float, float addrspace(1)* %gep.1
-
- %add = fadd float %r1, %r1
- %r3 = fsub float %add, %r2
-
- store float %r3, float addrspace(1)* %gep.out
- ret void
-}
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
Removed: llvm/trunk/test/CodeGen/R600/mad_int24.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/mad_int24.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/mad_int24.ll (original)
+++ llvm/trunk/test/CodeGen/R600/mad_int24.ll (removed)
@@ -1,35 +0,0 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC
-; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=CM --check-prefix=FUNC
-; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC
-; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC
-
-declare i32 @llvm.AMDGPU.imul24(i32, i32) nounwind readnone
-
-; FUNC-LABEL: {{^}}i32_mad24:
-; Signed 24-bit multiply is not supported on pre-Cayman GPUs.
-; EG: MULLO_INT
-; Make sure we aren't masking the inputs.
-; CM-NOT: AND
-; CM: MULADD_INT24
-; SI-NOT: and
-; SI: v_mad_i32_i24
-define void @i32_mad24(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
-entry:
- %0 = shl i32 %a, 8
- %a_24 = ashr i32 %0, 8
- %1 = shl i32 %b, 8
- %b_24 = ashr i32 %1, 8
- %2 = mul i32 %a_24, %b_24
- %3 = add i32 %2, %c
- store i32 %3, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: @test_imul24
-; SI: v_mad_i32_i24
-define void @test_imul24(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) nounwind {
- %mul = call i32 @llvm.AMDGPU.imul24(i32 %src0, i32 %src1) nounwind readnone
- %add = add i32 %mul, %src2
- store i32 %add, i32 addrspace(1)* %out, align 4
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/mad_uint24.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/mad_uint24.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/mad_uint24.ll (original)
+++ llvm/trunk/test/CodeGen/R600/mad_uint24.ll (removed)
@@ -1,76 +0,0 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC
-; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG --check-prefix=FUNC
-; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC
-; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC
-
-; FUNC-LABEL: {{^}}u32_mad24:
-; EG: MULADD_UINT24
-; SI: v_mad_u32_u24
-
-define void @u32_mad24(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
-entry:
- %0 = shl i32 %a, 8
- %a_24 = lshr i32 %0, 8
- %1 = shl i32 %b, 8
- %b_24 = lshr i32 %1, 8
- %2 = mul i32 %a_24, %b_24
- %3 = add i32 %2, %c
- store i32 %3, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}i16_mad24:
-; The order of A and B does not matter.
-; EG: MULADD_UINT24 {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]]
-; The result must be sign-extended
-; EG: BFE_INT {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[MAD_CHAN]], 0.0, literal.x
-; EG: 16
-; SI: v_mad_u32_u24 [[MAD:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
-; SI: v_bfe_i32 v{{[0-9]}}, [[MAD]], 0, 16
-
-define void @i16_mad24(i32 addrspace(1)* %out, i16 %a, i16 %b, i16 %c) {
-entry:
- %0 = mul i16 %a, %b
- %1 = add i16 %0, %c
- %2 = sext i16 %1 to i32
- store i32 %2, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}i8_mad24:
-; EG: MULADD_UINT24 {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]]
-; The result must be sign-extended
-; EG: BFE_INT {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[MAD_CHAN]], 0.0, literal.x
-; EG: 8
-; SI: v_mad_u32_u24 [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
-; SI: v_bfe_i32 v{{[0-9]}}, [[MUL]], 0, 8
-
-define void @i8_mad24(i32 addrspace(1)* %out, i8 %a, i8 %b, i8 %c) {
-entry:
- %0 = mul i8 %a, %b
- %1 = add i8 %0, %c
- %2 = sext i8 %1 to i32
- store i32 %2, i32 addrspace(1)* %out
- ret void
-}
-
-; This tests for a bug where the mad_u24 pattern matcher would call
-; SimplifyDemandedBits on the first operand of the mul instruction
-; assuming that the pattern would be matched to a 24-bit mad. This
-; led to some instructions being incorrectly erased when the entire
-; 24-bit mad pattern wasn't being matched.
-
-; Check that the select instruction is not deleted.
-; FUNC-LABEL: {{^}}i24_i32_i32_mad:
-; EG: CNDE_INT
-; SI: v_cndmask
-define void @i24_i32_i32_mad(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d) {
-entry:
- %0 = ashr i32 %a, 8
- %1 = icmp ne i32 %c, 0
- %2 = select i1 %1, i32 %0, i32 34
- %3 = mul i32 %2, %c
- %4 = add i32 %3, %d
- store i32 %4, i32 addrspace(1)* %out
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/madak.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/madak.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/madak.ll (original)
+++ llvm/trunk/test/CodeGen/R600/madak.ll (removed)
@@ -1,193 +0,0 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
-; XUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s
-
-; FIXME: Enable VI
-
-declare i32 @llvm.r600.read.tidig.x() nounwind readnone
-declare float @llvm.fabs.f32(float) nounwind readnone
-
-; GCN-LABEL: {{^}}madak_f32:
-; GCN: buffer_load_dword [[VA:v[0-9]+]]
-; GCN: buffer_load_dword [[VB:v[0-9]+]]
-; GCN: v_madak_f32_e32 {{v[0-9]+}}, [[VB]], [[VA]], 0x41200000
-define void @madak_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
- %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
- %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
- %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
-
- %a = load float, float addrspace(1)* %in.a.gep, align 4
- %b = load float, float addrspace(1)* %in.b.gep, align 4
-
- %mul = fmul float %a, %b
- %madak = fadd float %mul, 10.0
- store float %madak, float addrspace(1)* %out.gep, align 4
- ret void
-}
-
-; Make sure this is only folded with one use. This is a code size
-; optimization and if we fold the immediate multiple times, we'll undo
-; it.
-
-; GCN-LABEL: {{^}}madak_2_use_f32:
-; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
-; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
-; GCN-DAG: buffer_load_dword [[VC:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8
-; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000
-; GCN-DAG: v_mad_f32 {{v[0-9]+}}, [[VA]], [[VB]], [[VK]]
-; GCN-DAG: v_mad_f32 {{v[0-9]+}}, [[VA]], [[VC]], [[VK]]
-; GCN: s_endpgm
-define void @madak_2_use_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
- %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
-
- %in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
- %in.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1
- %in.gep.2 = getelementptr float, float addrspace(1)* %in.gep.0, i32 2
-
- %out.gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
- %out.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1
-
- %a = load float, float addrspace(1)* %in.gep.0, align 4
- %b = load float, float addrspace(1)* %in.gep.1, align 4
- %c = load float, float addrspace(1)* %in.gep.2, align 4
-
- %mul0 = fmul float %a, %b
- %mul1 = fmul float %a, %c
- %madak0 = fadd float %mul0, 10.0
- %madak1 = fadd float %mul1, 10.0
-
- store float %madak0, float addrspace(1)* %out.gep.0, align 4
- store float %madak1, float addrspace(1)* %out.gep.1, align 4
- ret void
-}
-
-; GCN-LABEL: {{^}}madak_m_inline_imm_f32:
-; GCN: buffer_load_dword [[VA:v[0-9]+]]
-; GCN: v_madak_f32_e32 {{v[0-9]+}}, 4.0, [[VA]], 0x41200000
-define void @madak_m_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a) nounwind {
- %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
- %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
-
- %a = load float, float addrspace(1)* %in.a.gep, align 4
-
- %mul = fmul float 4.0, %a
- %madak = fadd float %mul, 10.0
- store float %madak, float addrspace(1)* %out.gep, align 4
- ret void
-}
-
-; Make sure nothing weird happens with a value that is also allowed as
-; an inline immediate.
-
-; GCN-LABEL: {{^}}madak_inline_imm_f32:
-; GCN: buffer_load_dword [[VA:v[0-9]+]]
-; GCN: buffer_load_dword [[VB:v[0-9]+]]
-; GCN: v_mad_f32 {{v[0-9]+}}, [[VA]], [[VB]], 4.0
-define void @madak_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
- %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
- %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
- %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
-
- %a = load float, float addrspace(1)* %in.a.gep, align 4
- %b = load float, float addrspace(1)* %in.b.gep, align 4
-
- %mul = fmul float %a, %b
- %madak = fadd float %mul, 4.0
- store float %madak, float addrspace(1)* %out.gep, align 4
- ret void
-}
-
-; We can't use an SGPR when forming madak
-; GCN-LABEL: {{^}}s_v_madak_f32:
-; GCN: s_load_dword [[SB:s[0-9]+]]
-; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000
-; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]]
-; GCN-NOT: v_madak_f32
-; GCN: v_mad_f32 {{v[0-9]+}}, [[SB]], [[VA]], [[VK]]
-define void @s_v_madak_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float %b) nounwind {
- %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
- %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
-
- %a = load float, float addrspace(1)* %in.a.gep, align 4
-
- %mul = fmul float %a, %b
- %madak = fadd float %mul, 10.0
- store float %madak, float addrspace(1)* %out.gep, align 4
- ret void
-}
-
-; GCN-LABEL: @v_s_madak_f32
-; GCN-DAG: s_load_dword [[SB:s[0-9]+]]
-; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000
-; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]]
-; GCN-NOT: v_madak_f32
-; GCN: v_mad_f32 {{v[0-9]+}}, [[VA]], [[SB]], [[VK]]
-define void @v_s_madak_f32(float addrspace(1)* noalias %out, float %a, float addrspace(1)* noalias %in.b) nounwind {
- %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
- %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
-
- %b = load float, float addrspace(1)* %in.b.gep, align 4
-
- %mul = fmul float %a, %b
- %madak = fadd float %mul, 10.0
- store float %madak, float addrspace(1)* %out.gep, align 4
- ret void
-}
-
-; GCN-LABEL: {{^}}s_s_madak_f32:
-; GCN-NOT: v_madak_f32
-; GCN: v_mad_f32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
-define void @s_s_madak_f32(float addrspace(1)* %out, float %a, float %b) nounwind {
- %mul = fmul float %a, %b
- %madak = fadd float %mul, 10.0
- store float %madak, float addrspace(1)* %out, align 4
- ret void
-}
-
-; GCN-LABEL: {{^}}no_madak_src0_modifier_f32:
-; GCN: buffer_load_dword [[VA:v[0-9]+]]
-; GCN: buffer_load_dword [[VB:v[0-9]+]]
-; GCN: v_mad_f32 {{v[0-9]+}}, |{{v[0-9]+}}|, {{v[0-9]+}}, {{[sv][0-9]+}}
-; GCN: s_endpgm
-define void @no_madak_src0_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
- %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
- %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
- %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
-
- %a = load float, float addrspace(1)* %in.a.gep, align 4
- %b = load float, float addrspace(1)* %in.b.gep, align 4
-
- %a.fabs = call float @llvm.fabs.f32(float %a) nounwind readnone
-
- %mul = fmul float %a.fabs, %b
- %madak = fadd float %mul, 10.0
- store float %madak, float addrspace(1)* %out.gep, align 4
- ret void
-}
-
-; GCN-LABEL: {{^}}no_madak_src1_modifier_f32:
-; GCN: buffer_load_dword [[VA:v[0-9]+]]
-; GCN: buffer_load_dword [[VB:v[0-9]+]]
-; GCN: v_mad_f32 {{v[0-9]+}}, {{v[0-9]+}}, |{{v[0-9]+}}|, {{[sv][0-9]+}}
-; GCN: s_endpgm
-define void @no_madak_src1_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
- %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
- %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
- %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
-
- %a = load float, float addrspace(1)* %in.a.gep, align 4
- %b = load float, float addrspace(1)* %in.b.gep, align 4
-
- %b.fabs = call float @llvm.fabs.f32(float %b) nounwind readnone
-
- %mul = fmul float %a, %b.fabs
- %madak = fadd float %mul, 10.0
- store float %madak, float addrspace(1)* %out.gep, align 4
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/madmk.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/madmk.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/madmk.ll (original)
+++ llvm/trunk/test/CodeGen/R600/madmk.ll (removed)
@@ -1,205 +0,0 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
-; XUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
-
-declare i32 @llvm.r600.read.tidig.x() nounwind readnone
-declare float @llvm.fabs.f32(float) nounwind readnone
-
-; GCN-LABEL: {{^}}madmk_f32:
-; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
-; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
-; GCN: v_madmk_f32_e32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000
-define void @madmk_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
- %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
- %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
-
- %a = load float, float addrspace(1)* %gep.0, align 4
- %b = load float, float addrspace(1)* %gep.1, align 4
-
- %mul = fmul float %a, 10.0
- %madmk = fadd float %mul, %b
- store float %madmk, float addrspace(1)* %out.gep, align 4
- ret void
-}
-
-; GCN-LABEL: {{^}}madmk_2_use_f32:
-; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
-; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
-; GCN-DAG: buffer_load_dword [[VC:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8
-; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000
-; GCN-DAG: v_mad_f32 {{v[0-9]+}}, [[VA]], [[VK]], [[VB]]
-; GCN-DAG: v_mad_f32 {{v[0-9]+}}, [[VA]], [[VK]], [[VC]]
-; GCN: s_endpgm
-define void @madmk_2_use_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
- %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
-
- %in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
- %in.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1
- %in.gep.2 = getelementptr float, float addrspace(1)* %in.gep.0, i32 2
-
- %out.gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
- %out.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1
-
- %a = load float, float addrspace(1)* %in.gep.0, align 4
- %b = load float, float addrspace(1)* %in.gep.1, align 4
- %c = load float, float addrspace(1)* %in.gep.2, align 4
-
- %mul0 = fmul float %a, 10.0
- %mul1 = fmul float %a, 10.0
- %madmk0 = fadd float %mul0, %b
- %madmk1 = fadd float %mul1, %c
-
- store float %madmk0, float addrspace(1)* %out.gep.0, align 4
- store float %madmk1, float addrspace(1)* %out.gep.1, align 4
- ret void
-}
-
-; We don't get any benefit if the constant is an inline immediate.
-; GCN-LABEL: {{^}}madmk_inline_imm_f32:
-; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
-; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
-; GCN: v_mad_f32 {{v[0-9]+}}, 4.0, [[VA]], [[VB]]
-define void @madmk_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
- %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
- %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
-
- %a = load float, float addrspace(1)* %gep.0, align 4
- %b = load float, float addrspace(1)* %gep.1, align 4
-
- %mul = fmul float %a, 4.0
- %madmk = fadd float %mul, %b
- store float %madmk, float addrspace(1)* %out.gep, align 4
- ret void
-}
-
-; GCN-LABEL: {{^}}s_s_madmk_f32:
-; GCN-NOT: v_madmk_f32
-; GCN: v_mad_f32
-; GCN: s_endpgm
-define void @s_s_madmk_f32(float addrspace(1)* noalias %out, float %a, float %b) nounwind {
- %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
-
- %mul = fmul float %a, 10.0
- %madmk = fadd float %mul, %b
- store float %madmk, float addrspace(1)* %out.gep, align 4
- ret void
-}
-
-; GCN-LABEL: {{^}}v_s_madmk_f32:
-; GCN-NOT: v_madmk_f32
-; GCN: v_mad_f32
-; GCN: s_endpgm
-define void @v_s_madmk_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in, float %b) nounwind {
- %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
- %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
- %a = load float, float addrspace(1)* %gep.0, align 4
-
- %mul = fmul float %a, 10.0
- %madmk = fadd float %mul, %b
- store float %madmk, float addrspace(1)* %out.gep, align 4
- ret void
-}
-
-; GCN-LABEL: {{^}}scalar_vector_madmk_f32:
-; GCN-NOT: v_madmk_f32
-; GCN: v_mad_f32
-; GCN: s_endpgm
-define void @scalar_vector_madmk_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in, float %a) nounwind {
- %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
- %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
- %b = load float, float addrspace(1)* %gep.0, align 4
-
- %mul = fmul float %a, 10.0
- %madmk = fadd float %mul, %b
- store float %madmk, float addrspace(1)* %out.gep, align 4
- ret void
-}
-
-; GCN-LABEL: {{^}}no_madmk_src0_modifier_f32:
-; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
-; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
-; GCN: v_mad_f32 {{v[0-9]+}}, |{{v[0-9]+}}|, {{v[0-9]+}}, {{[sv][0-9]+}}
-define void @no_madmk_src0_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
- %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
- %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
-
- %a = load float, float addrspace(1)* %gep.0, align 4
- %b = load float, float addrspace(1)* %gep.1, align 4
-
- %a.fabs = call float @llvm.fabs.f32(float %a) nounwind readnone
-
- %mul = fmul float %a.fabs, 10.0
- %madmk = fadd float %mul, %b
- store float %madmk, float addrspace(1)* %out.gep, align 4
- ret void
-}
-
-; GCN-LABEL: {{^}}no_madmk_src2_modifier_f32:
-; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
-; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
-; GCN: v_mad_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, |{{[sv][0-9]+}}|
-define void @no_madmk_src2_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
- %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
- %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
-
- %a = load float, float addrspace(1)* %gep.0, align 4
- %b = load float, float addrspace(1)* %gep.1, align 4
-
- %b.fabs = call float @llvm.fabs.f32(float %b) nounwind readnone
-
- %mul = fmul float %a, 10.0
- %madmk = fadd float %mul, %b.fabs
- store float %madmk, float addrspace(1)* %out.gep, align 4
- ret void
-}
-
-; GCN-LABEL: {{^}}madmk_add_inline_imm_f32:
-; GCN: buffer_load_dword [[A:v[0-9]+]]
-; GCN: v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000
-; GCN: v_mad_f32 {{v[0-9]+}}, [[VK]], [[A]], 2.0
-define void @madmk_add_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
- %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
- %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
-
- %a = load float, float addrspace(1)* %gep.0, align 4
-
- %mul = fmul float %a, 10.0
- %madmk = fadd float %mul, 2.0
- store float %madmk, float addrspace(1)* %out.gep, align 4
- ret void
-}
-
-; SI-LABEL: {{^}}kill_madmk_verifier_error:
-; SI: s_xor_b64
-; SI: v_madmk_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, 0x472aee8c
-; SI: s_or_b64
-define void @kill_madmk_verifier_error() nounwind {
-bb:
- br label %bb2
-
-bb1: ; preds = %bb2
- ret void
-
-bb2: ; preds = %bb6, %bb
- %tmp = phi float [ undef, %bb ], [ %tmp8, %bb6 ]
- %tmp3 = fsub float undef, %tmp
- %tmp5 = fcmp oeq float %tmp3, 1.000000e+04
- br i1 %tmp5, label %bb1, label %bb6
-
-bb6: ; preds = %bb2
- %tmp4 = fmul float %tmp, undef
- %tmp7 = fmul float %tmp4, 0x40E55DD180000000
- %tmp8 = fadd float %tmp7, undef
- br label %bb2
-}
Removed: llvm/trunk/test/CodeGen/R600/max-literals.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/max-literals.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/max-literals.ll (original)
+++ llvm/trunk/test/CodeGen/R600/max-literals.ll (removed)
@@ -1,67 +0,0 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-
-; CHECK-LABEL: {{^}}main:
-; CHECK: ADD *
-
-define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2) #0 {
-main_body:
- %0 = extractelement <4 x float> %reg1, i32 0
- %1 = extractelement <4 x float> %reg1, i32 1
- %2 = extractelement <4 x float> %reg1, i32 2
- %3 = extractelement <4 x float> %reg1, i32 3
- %4 = extractelement <4 x float> %reg2, i32 0
- %5 = fadd float %0, 2.0
- %6 = fadd float %1, 3.0
- %7 = fadd float %2, 4.0
- %8 = fadd float %3, 5.0
- %9 = bitcast float %4 to i32
- %10 = mul i32 %9, 6
- %11 = bitcast i32 %10 to float
- %12 = insertelement <4 x float> undef, float %5, i32 0
- %13 = insertelement <4 x float> %12, float %6, i32 1
- %14 = insertelement <4 x float> %13, float %7, i32 2
- %15 = insertelement <4 x float> %14, float %8, i32 3
- %16 = insertelement <4 x float> %15, float %11, i32 3
-
- %17 = call float @llvm.AMDGPU.dp4(<4 x float> %15,<4 x float> %16)
- %18 = insertelement <4 x float> undef, float %17, i32 0
- call void @llvm.R600.store.swizzle(<4 x float> %18, i32 0, i32 2)
- ret void
-}
-
-; CHECK-LABEL: {{^}}main2:
-; CHECK-NOT: ADD *
-
-define void @main2(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2) #0 {
-main_body:
- %0 = extractelement <4 x float> %reg1, i32 0
- %1 = extractelement <4 x float> %reg1, i32 1
- %2 = extractelement <4 x float> %reg1, i32 2
- %3 = extractelement <4 x float> %reg1, i32 3
- %4 = extractelement <4 x float> %reg2, i32 0
- %5 = fadd float %0, 2.0
- %6 = fadd float %1, 3.0
- %7 = fadd float %2, 4.0
- %8 = fadd float %3, 2.0
- %9 = bitcast float %4 to i32
- %10 = mul i32 %9, 6
- %11 = bitcast i32 %10 to float
- %12 = insertelement <4 x float> undef, float %5, i32 0
- %13 = insertelement <4 x float> %12, float %6, i32 1
- %14 = insertelement <4 x float> %13, float %7, i32 2
- %15 = insertelement <4 x float> %14, float %8, i32 3
- %16 = insertelement <4 x float> %15, float %11, i32 3
-
- %17 = call float @llvm.AMDGPU.dp4(<4 x float> %15,<4 x float> %16)
- %18 = insertelement <4 x float> undef, float %17, i32 0
- call void @llvm.R600.store.swizzle(<4 x float> %18, i32 0, i32 2)
- ret void
-}
-
-; Function Attrs: readnone
-declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
-
-declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
-
-attributes #0 = { "ShaderType"="1" }
-attributes #1 = { readnone }
Removed: llvm/trunk/test/CodeGen/R600/max.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/max.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/max.ll (original)
+++ llvm/trunk/test/CodeGen/R600/max.ll (removed)
@@ -1,168 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-
-declare i32 @llvm.r600.read.tidig.x() nounwind readnone
-
-; FUNC-LABEL: @v_test_imax_sge_i32
-; SI: v_max_i32_e32
-define void @v_test_imax_sge_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
- %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
- %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
- %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
- %a = load i32, i32 addrspace(1)* %gep0, align 4
- %b = load i32, i32 addrspace(1)* %gep1, align 4
- %cmp = icmp sge i32 %a, %b
- %val = select i1 %cmp, i32 %a, i32 %b
- store i32 %val, i32 addrspace(1)* %outgep, align 4
- ret void
-}
-
-; FUNC-LABEL: @s_test_imax_sge_i32
-; SI: s_max_i32
-define void @s_test_imax_sge_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
- %cmp = icmp sge i32 %a, %b
- %val = select i1 %cmp, i32 %a, i32 %b
- store i32 %val, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}s_test_imax_sge_imm_i32:
-; SI: s_max_i32 {{s[0-9]+}}, {{s[0-9]+}}, 9
-define void @s_test_imax_sge_imm_i32(i32 addrspace(1)* %out, i32 %a) nounwind {
- %cmp = icmp sge i32 %a, 9
- %val = select i1 %cmp, i32 %a, i32 9
- store i32 %val, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}s_test_imax_sgt_imm_i32:
-; SI: s_max_i32 {{s[0-9]+}}, {{s[0-9]+}}, 9
-define void @s_test_imax_sgt_imm_i32(i32 addrspace(1)* %out, i32 %a) nounwind {
- %cmp = icmp sgt i32 %a, 9
- %val = select i1 %cmp, i32 %a, i32 9
- store i32 %val, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: @v_test_imax_sgt_i32
-; SI: v_max_i32_e32
-define void @v_test_imax_sgt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
- %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
- %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
- %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
- %a = load i32, i32 addrspace(1)* %gep0, align 4
- %b = load i32, i32 addrspace(1)* %gep1, align 4
- %cmp = icmp sgt i32 %a, %b
- %val = select i1 %cmp, i32 %a, i32 %b
- store i32 %val, i32 addrspace(1)* %outgep, align 4
- ret void
-}
-
-; FUNC-LABEL: @s_test_imax_sgt_i32
-; SI: s_max_i32
-define void @s_test_imax_sgt_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
- %cmp = icmp sgt i32 %a, %b
- %val = select i1 %cmp, i32 %a, i32 %b
- store i32 %val, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: @v_test_umax_uge_i32
-; SI: v_max_u32_e32
-define void @v_test_umax_uge_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
- %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
- %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
- %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
- %a = load i32, i32 addrspace(1)* %gep0, align 4
- %b = load i32, i32 addrspace(1)* %gep1, align 4
- %cmp = icmp uge i32 %a, %b
- %val = select i1 %cmp, i32 %a, i32 %b
- store i32 %val, i32 addrspace(1)* %outgep, align 4
- ret void
-}
-
-; FUNC-LABEL: @s_test_umax_uge_i32
-; SI: s_max_u32
-define void @s_test_umax_uge_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
- %cmp = icmp uge i32 %a, %b
- %val = select i1 %cmp, i32 %a, i32 %b
- store i32 %val, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: @v_test_umax_ugt_i32
-; SI: v_max_u32_e32
-define void @v_test_umax_ugt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
- %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
- %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
- %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
- %a = load i32, i32 addrspace(1)* %gep0, align 4
- %b = load i32, i32 addrspace(1)* %gep1, align 4
- %cmp = icmp ugt i32 %a, %b
- %val = select i1 %cmp, i32 %a, i32 %b
- store i32 %val, i32 addrspace(1)* %outgep, align 4
- ret void
-}
-
-; FUNC-LABEL: @s_test_umax_ugt_i32
-; SI: s_max_u32
-define void @s_test_umax_ugt_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
- %cmp = icmp ugt i32 %a, %b
- %val = select i1 %cmp, i32 %a, i32 %b
- store i32 %val, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; Make sure redundant and removed
-; FUNC-LABEL: {{^}}simplify_demanded_bits_test_umax_ugt_i16:
-; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
-; SI-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xc
-; SI: s_max_u32 [[MIN:s[0-9]+]], [[A]], [[B]]
-; SI-NEXT: v_mov_b32_e32 [[VMIN:v[0-9]+]], [[MIN]]
-; SI-NEXT: buffer_store_dword [[VMIN]]
-define void @simplify_demanded_bits_test_umax_ugt_i16(i32 addrspace(1)* %out, i16 zeroext %a, i16 zeroext %b) nounwind {
- %a.ext = zext i16 %a to i32
- %b.ext = zext i16 %b to i32
- %cmp = icmp ugt i32 %a.ext, %b.ext
- %val = select i1 %cmp, i32 %a.ext, i32 %b.ext
- %mask = and i32 %val, 65535
- store i32 %mask, i32 addrspace(1)* %out
- ret void
-}
-
-; Make sure redundant sign_extend_inreg removed.
-
-; FUNC-LABEL: {{^}}simplify_demanded_bits_test_min_slt_i16:
-; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
-; SI-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xc
-; SI: s_max_i32 [[MIN:s[0-9]+]], [[A]], [[B]]
-; SI-NEXT: v_mov_b32_e32 [[VMIN:v[0-9]+]], [[MIN]]
-; SI-NEXT: buffer_store_dword [[VMIN]]
-define void @simplify_demanded_bits_test_min_slt_i16(i32 addrspace(1)* %out, i16 signext %a, i16 signext %b) nounwind {
- %a.ext = sext i16 %a to i32
- %b.ext = sext i16 %b to i32
- %cmp = icmp sgt i32 %a.ext, %b.ext
- %val = select i1 %cmp, i32 %a.ext, i32 %b.ext
- %shl = shl i32 %val, 16
- %sextinreg = ashr i32 %shl, 16
- store i32 %sextinreg, i32 addrspace(1)* %out
- ret void
-}
-
-; FIXME: Should get match min/max through extends inserted by
-; legalization.
-
-; FUNC-LABEL: {{^}}s_test_imin_sge_i16:
-; SI: s_sext_i32_i16
-; SI: s_sext_i32_i16
-; SI: v_cmp_ge_i32_e32
-; SI: v_cndmask_b32
-define void @s_test_imin_sge_i16(i16 addrspace(1)* %out, i16 %a, i16 %b) nounwind {
- %cmp = icmp sge i16 %a, %b
- %val = select i1 %cmp, i16 %a, i16 %b
- store i16 %val, i16 addrspace(1)* %out
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/max3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/max3.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/max3.ll (original)
+++ llvm/trunk/test/CodeGen/R600/max3.ll (removed)
@@ -1,41 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-
-declare i32 @llvm.r600.read.tidig.x() nounwind readnone
-
-; FUNC-LABEL: @v_test_imax3_sgt_i32
-; SI: v_max3_i32
-define void @v_test_imax3_sgt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
- %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
- %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
- %gep2 = getelementptr i32, i32 addrspace(1)* %cptr, i32 %tid
- %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
- %a = load i32, i32 addrspace(1)* %gep0, align 4
- %b = load i32, i32 addrspace(1)* %gep1, align 4
- %c = load i32, i32 addrspace(1)* %gep2, align 4
- %icmp0 = icmp sgt i32 %a, %b
- %i0 = select i1 %icmp0, i32 %a, i32 %b
- %icmp1 = icmp sgt i32 %i0, %c
- %i1 = select i1 %icmp1, i32 %i0, i32 %c
- store i32 %i1, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: @v_test_umax3_ugt_i32
-; SI: v_max3_u32
-define void @v_test_umax3_ugt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
- %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
- %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
- %gep2 = getelementptr i32, i32 addrspace(1)* %cptr, i32 %tid
- %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
- %a = load i32, i32 addrspace(1)* %gep0, align 4
- %b = load i32, i32 addrspace(1)* %gep1, align 4
- %c = load i32, i32 addrspace(1)* %gep2, align 4
- %icmp0 = icmp ugt i32 %a, %b
- %i0 = select i1 %icmp0, i32 %a, i32 %b
- %icmp1 = icmp ugt i32 %i0, %c
- %i1 = select i1 %icmp1, i32 %i0, i32 %c
- store i32 %i1, i32 addrspace(1)* %out, align 4
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/merge-stores.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/merge-stores.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/merge-stores.ll (original)
+++ llvm/trunk/test/CodeGen/R600/merge-stores.ll (removed)
@@ -1,536 +0,0 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
-; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
-
-; Run with devices with different unaligned load restrictions.
-
-; TODO: Vector element tests
-; TODO: Non-zero base offset for load and store combinations
-; TODO: Same base addrspacecasted
-
-
-; GCN-LABEL: {{^}}merge_global_store_2_constants_i8:
-; GCN: buffer_store_byte
-; GCN: buffer_store_byte
-; GCN: s_endpgm
-define void @merge_global_store_2_constants_i8(i8 addrspace(1)* %out) #0 {
- %out.gep.1 = getelementptr i8, i8 addrspace(1)* %out, i32 1
-
- store i8 123, i8 addrspace(1)* %out.gep.1
- store i8 456, i8 addrspace(1)* %out, align 2
- ret void
-}
-
-; GCN-LABEL: {{^}}merge_global_store_2_constants_i8_natural_align:
-; GCN: buffer_store_byte
-; GCN: buffer_store_byte
-; GCN: s_endpgm
-define void @merge_global_store_2_constants_i8_natural_align(i8 addrspace(1)* %out) #0 {
- %out.gep.1 = getelementptr i8, i8 addrspace(1)* %out, i32 1
-
- store i8 123, i8 addrspace(1)* %out.gep.1
- store i8 456, i8 addrspace(1)* %out
- ret void
-}
-
-; GCN-LABEL: {{^}}merge_global_store_2_constants_i16:
-; GCN: buffer_store_dword v
-define void @merge_global_store_2_constants_i16(i16 addrspace(1)* %out) #0 {
- %out.gep.1 = getelementptr i16, i16 addrspace(1)* %out, i32 1
-
- store i16 123, i16 addrspace(1)* %out.gep.1
- store i16 456, i16 addrspace(1)* %out, align 4
- ret void
-}
-
-; GCN-LABEL: {{^}}merge_global_store_2_constants_0_i16:
-; GCN: buffer_store_dword v
-define void @merge_global_store_2_constants_0_i16(i16 addrspace(1)* %out) #0 {
- %out.gep.1 = getelementptr i16, i16 addrspace(1)* %out, i32 1
-
- store i16 0, i16 addrspace(1)* %out.gep.1
- store i16 0, i16 addrspace(1)* %out, align 4
- ret void
-}
-
-; GCN-LABEL: {{^}}merge_global_store_2_constants_i16_natural_align:
-; GCN: buffer_store_short
-; GCN: buffer_store_short
-; GCN: s_endpgm
-define void @merge_global_store_2_constants_i16_natural_align(i16 addrspace(1)* %out) #0 {
- %out.gep.1 = getelementptr i16, i16 addrspace(1)* %out, i32 1
-
- store i16 123, i16 addrspace(1)* %out.gep.1
- store i16 456, i16 addrspace(1)* %out
- ret void
-}
-
-; GCN-LABEL: {{^}}merge_global_store_2_constants_i32:
-; SI-DAG: s_movk_i32 [[SLO:s[0-9]+]], 0x1c8
-; SI-DAG: s_movk_i32 [[SHI:s[0-9]+]], 0x7b
-; SI-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], [[SLO]]
-; SI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[SHI]]
-; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
-define void @merge_global_store_2_constants_i32(i32 addrspace(1)* %out) #0 {
- %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
-
- store i32 123, i32 addrspace(1)* %out.gep.1
- store i32 456, i32 addrspace(1)* %out
- ret void
-}
-
-; GCN-LABEL: {{^}}merge_global_store_2_constants_i32_f32:
-; GCN: buffer_store_dwordx2
-define void @merge_global_store_2_constants_i32_f32(i32 addrspace(1)* %out) #0 {
- %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
- %out.gep.1.bc = bitcast i32 addrspace(1)* %out.gep.1 to float addrspace(1)*
- store float 1.0, float addrspace(1)* %out.gep.1.bc
- store i32 456, i32 addrspace(1)* %out
- ret void
-}
-
-; GCN-LABEL: {{^}}merge_global_store_2_constants_f32_i32:
-; GCN: buffer_store_dwordx2
-define void @merge_global_store_2_constants_f32_i32(float addrspace(1)* %out) #0 {
- %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
- %out.gep.1.bc = bitcast float addrspace(1)* %out.gep.1 to i32 addrspace(1)*
- store i32 123, i32 addrspace(1)* %out.gep.1.bc
- store float 4.0, float addrspace(1)* %out
- ret void
-}
-
-; GCN-LABEL: {{^}}merge_global_store_4_constants_i32:
-; GCN: buffer_store_dwordx4
-define void @merge_global_store_4_constants_i32(i32 addrspace(1)* %out) #0 {
- %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
- %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2
- %out.gep.3 = getelementptr i32, i32 addrspace(1)* %out, i32 3
-
- store i32 123, i32 addrspace(1)* %out.gep.1
- store i32 456, i32 addrspace(1)* %out.gep.2
- store i32 333, i32 addrspace(1)* %out.gep.3
- store i32 1234, i32 addrspace(1)* %out
- ret void
-}
-
-; GCN-LABEL: {{^}}merge_global_store_4_constants_f32_order:
-; XGCN: buffer_store_dwordx4
-; GCN: buffer_store_dword v
-; GCN: buffer_store_dword v
-; GCN: buffer_store_dwordx2 v
-define void @merge_global_store_4_constants_f32_order(float addrspace(1)* %out) #0 {
- %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
- %out.gep.2 = getelementptr float, float addrspace(1)* %out, i32 2
- %out.gep.3 = getelementptr float, float addrspace(1)* %out, i32 3
-
- store float 8.0, float addrspace(1)* %out
- store float 1.0, float addrspace(1)* %out.gep.1
- store float 2.0, float addrspace(1)* %out.gep.2
- store float 4.0, float addrspace(1)* %out.gep.3
- ret void
-}
-
-; First store is out of order. Because of order of combines, the
-; consecutive store fails because only some of the stores have been
-; replaced with integer constant stores, and then won't merge because
-; the types are different.
-
-; GCN-LABEL: {{^}}merge_global_store_4_constants_f32:
-; XGCN: buffer_store_dwordx4
-; GCN: buffer_store_dword v
-; GCN: buffer_store_dword v
-; GCN: buffer_store_dword v
-; GCN: buffer_store_dword v
-define void @merge_global_store_4_constants_f32(float addrspace(1)* %out) #0 {
- %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
- %out.gep.2 = getelementptr float, float addrspace(1)* %out, i32 2
- %out.gep.3 = getelementptr float, float addrspace(1)* %out, i32 3
-
- store float 1.0, float addrspace(1)* %out.gep.1
- store float 2.0, float addrspace(1)* %out.gep.2
- store float 4.0, float addrspace(1)* %out.gep.3
- store float 8.0, float addrspace(1)* %out
- ret void
-}
-
-; GCN-LABEL: {{^}}merge_global_store_3_constants_i32:
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dword
-; SI-NOT: buffer_store_dword
-; GCN: s_endpgm
-define void @merge_global_store_3_constants_i32(i32 addrspace(1)* %out) #0 {
- %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
- %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2
-
- store i32 123, i32 addrspace(1)* %out.gep.1
- store i32 456, i32 addrspace(1)* %out.gep.2
- store i32 1234, i32 addrspace(1)* %out
- ret void
-}
-
-; GCN-LABEL: {{^}}merge_global_store_2_constants_i64:
-; XGCN: buffer_store_dwordx4
-; GCN: buffer_store_dwordx2
-; GCN: buffer_store_dwordx2
-define void @merge_global_store_2_constants_i64(i64 addrspace(1)* %out) #0 {
- %out.gep.1 = getelementptr i64, i64 addrspace(1)* %out, i64 1
-
- store i64 123, i64 addrspace(1)* %out.gep.1
- store i64 456, i64 addrspace(1)* %out
- ret void
-}
-
-; GCN-LABEL: {{^}}merge_global_store_4_constants_i64:
-; XGCN: buffer_store_dwordx4
-; XGCN: buffer_store_dwordx4
-
-; GCN: buffer_store_dwordx2
-; GCN: buffer_store_dwordx2
-; GCN: buffer_store_dwordx2
-; GCN: buffer_store_dwordx2
-define void @merge_global_store_4_constants_i64(i64 addrspace(1)* %out) #0 {
- %out.gep.1 = getelementptr i64, i64 addrspace(1)* %out, i64 1
- %out.gep.2 = getelementptr i64, i64 addrspace(1)* %out, i64 2
- %out.gep.3 = getelementptr i64, i64 addrspace(1)* %out, i64 3
-
- store i64 123, i64 addrspace(1)* %out.gep.1
- store i64 456, i64 addrspace(1)* %out.gep.2
- store i64 333, i64 addrspace(1)* %out.gep.3
- store i64 1234, i64 addrspace(1)* %out
- ret void
-}
-
-; GCN-LABEL: {{^}}merge_global_store_2_adjacent_loads_i32:
-; GCN: buffer_load_dwordx2 [[LOAD:v\[[0-9]+:[0-9]+\]]]
-; GCN: buffer_store_dwordx2 [[LOAD]]
-define void @merge_global_store_2_adjacent_loads_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
- %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
- %in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 1
-
- %lo = load i32, i32 addrspace(1)* %in
- %hi = load i32, i32 addrspace(1)* %in.gep.1
-
- store i32 %lo, i32 addrspace(1)* %out
- store i32 %hi, i32 addrspace(1)* %out.gep.1
- ret void
-}
-
-; GCN-LABEL: {{^}}merge_global_store_2_adjacent_loads_i32_nonzero_base:
-; GCN: buffer_load_dwordx2 [[LOAD:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8
-; GCN: buffer_store_dwordx2 [[LOAD]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8
-define void @merge_global_store_2_adjacent_loads_i32_nonzero_base(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
- %in.gep.0 = getelementptr i32, i32 addrspace(1)* %in, i32 2
- %in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 3
-
- %out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i32 2
- %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 3
- %lo = load i32, i32 addrspace(1)* %in.gep.0
- %hi = load i32, i32 addrspace(1)* %in.gep.1
-
- store i32 %lo, i32 addrspace(1)* %out.gep.0
- store i32 %hi, i32 addrspace(1)* %out.gep.1
- ret void
-}
-
-; GCN-LABEL: {{^}}merge_global_store_2_adjacent_loads_shuffle_i32:
-; GCN: buffer_load_dword v
-; GCN: buffer_load_dword v
-; GCN: buffer_store_dword v
-; GCN: buffer_store_dword v
-define void @merge_global_store_2_adjacent_loads_shuffle_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
- %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
- %in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 1
-
- %lo = load i32, i32 addrspace(1)* %in
- %hi = load i32, i32 addrspace(1)* %in.gep.1
-
- store i32 %hi, i32 addrspace(1)* %out
- store i32 %lo, i32 addrspace(1)* %out.gep.1
- ret void
-}
-
-; GCN-LABEL: {{^}}merge_global_store_4_adjacent_loads_i32:
-; GCN: buffer_load_dwordx4 [[LOAD:v\[[0-9]+:[0-9]+\]]]
-; GCN: buffer_store_dwordx4 [[LOAD]]
-define void @merge_global_store_4_adjacent_loads_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
- %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
- %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2
- %out.gep.3 = getelementptr i32, i32 addrspace(1)* %out, i32 3
- %in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 1
- %in.gep.2 = getelementptr i32, i32 addrspace(1)* %in, i32 2
- %in.gep.3 = getelementptr i32, i32 addrspace(1)* %in, i32 3
-
- %x = load i32, i32 addrspace(1)* %in
- %y = load i32, i32 addrspace(1)* %in.gep.1
- %z = load i32, i32 addrspace(1)* %in.gep.2
- %w = load i32, i32 addrspace(1)* %in.gep.3
-
- store i32 %x, i32 addrspace(1)* %out
- store i32 %y, i32 addrspace(1)* %out.gep.1
- store i32 %z, i32 addrspace(1)* %out.gep.2
- store i32 %w, i32 addrspace(1)* %out.gep.3
- ret void
-}
-
-; GCN-LABEL: {{^}}merge_global_store_3_adjacent_loads_i32:
-; SI-DAG: buffer_load_dwordx2
-; SI-DAG: buffer_load_dword v
-; GCN: s_waitcnt
-; SI-DAG: buffer_store_dword v
-; SI-DAG: buffer_store_dwordx2 v
-; GCN: s_endpgm
-define void @merge_global_store_3_adjacent_loads_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
- %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
- %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2
- %in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 1
- %in.gep.2 = getelementptr i32, i32 addrspace(1)* %in, i32 2
-
- %x = load i32, i32 addrspace(1)* %in
- %y = load i32, i32 addrspace(1)* %in.gep.1
- %z = load i32, i32 addrspace(1)* %in.gep.2
-
- store i32 %x, i32 addrspace(1)* %out
- store i32 %y, i32 addrspace(1)* %out.gep.1
- store i32 %z, i32 addrspace(1)* %out.gep.2
- ret void
-}
-
-; GCN-LABEL: {{^}}merge_global_store_4_adjacent_loads_f32:
-; GCN: buffer_load_dwordx4 [[LOAD:v\[[0-9]+:[0-9]+\]]]
-; GCN: buffer_store_dwordx4 [[LOAD]]
-define void @merge_global_store_4_adjacent_loads_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
- %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
- %out.gep.2 = getelementptr float, float addrspace(1)* %out, i32 2
- %out.gep.3 = getelementptr float, float addrspace(1)* %out, i32 3
- %in.gep.1 = getelementptr float, float addrspace(1)* %in, i32 1
- %in.gep.2 = getelementptr float, float addrspace(1)* %in, i32 2
- %in.gep.3 = getelementptr float, float addrspace(1)* %in, i32 3
-
- %x = load float, float addrspace(1)* %in
- %y = load float, float addrspace(1)* %in.gep.1
- %z = load float, float addrspace(1)* %in.gep.2
- %w = load float, float addrspace(1)* %in.gep.3
-
- store float %x, float addrspace(1)* %out
- store float %y, float addrspace(1)* %out.gep.1
- store float %z, float addrspace(1)* %out.gep.2
- store float %w, float addrspace(1)* %out.gep.3
- ret void
-}
-
-; GCN-LABEL: {{^}}merge_global_store_4_adjacent_loads_i32_nonzero_base:
-; GCN: buffer_load_dwordx4 [[LOAD:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:44
-; GCN: buffer_store_dwordx4 [[LOAD]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:28
-define void @merge_global_store_4_adjacent_loads_i32_nonzero_base(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
- %in.gep.0 = getelementptr i32, i32 addrspace(1)* %in, i32 11
- %in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 12
- %in.gep.2 = getelementptr i32, i32 addrspace(1)* %in, i32 13
- %in.gep.3 = getelementptr i32, i32 addrspace(1)* %in, i32 14
- %out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i32 7
- %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 8
- %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 9
- %out.gep.3 = getelementptr i32, i32 addrspace(1)* %out, i32 10
-
- %x = load i32, i32 addrspace(1)* %in.gep.0
- %y = load i32, i32 addrspace(1)* %in.gep.1
- %z = load i32, i32 addrspace(1)* %in.gep.2
- %w = load i32, i32 addrspace(1)* %in.gep.3
-
- store i32 %x, i32 addrspace(1)* %out.gep.0
- store i32 %y, i32 addrspace(1)* %out.gep.1
- store i32 %z, i32 addrspace(1)* %out.gep.2
- store i32 %w, i32 addrspace(1)* %out.gep.3
- ret void
-}
-
-; GCN-LABEL: {{^}}merge_global_store_4_adjacent_loads_inverse_i32:
-; GCN: buffer_load_dwordx4 [[LOAD:v\[[0-9]+:[0-9]+\]]]
-; GCN: s_barrier
-; GCN: buffer_store_dwordx4 [[LOAD]]
-define void @merge_global_store_4_adjacent_loads_inverse_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
- %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
- %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2
- %out.gep.3 = getelementptr i32, i32 addrspace(1)* %out, i32 3
- %in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 1
- %in.gep.2 = getelementptr i32, i32 addrspace(1)* %in, i32 2
- %in.gep.3 = getelementptr i32, i32 addrspace(1)* %in, i32 3
-
- %x = load i32, i32 addrspace(1)* %in
- %y = load i32, i32 addrspace(1)* %in.gep.1
- %z = load i32, i32 addrspace(1)* %in.gep.2
- %w = load i32, i32 addrspace(1)* %in.gep.3
-
- ; Make sure the barrier doesn't stop this
- tail call void @llvm.AMDGPU.barrier.local() #1
-
- store i32 %w, i32 addrspace(1)* %out.gep.3
- store i32 %z, i32 addrspace(1)* %out.gep.2
- store i32 %y, i32 addrspace(1)* %out.gep.1
- store i32 %x, i32 addrspace(1)* %out
-
- ret void
-}
-
-; TODO: Re-packing of loaded register required. Maybe an IR pass
-; should catch this?
-
-; GCN-LABEL: {{^}}merge_global_store_4_adjacent_loads_shuffle_i32:
-; GCN: buffer_load_dword v
-; GCN: buffer_load_dword v
-; GCN: buffer_load_dword v
-; GCN: buffer_load_dword v
-; GCN: s_barrier
-; GCN: buffer_store_dword v
-; GCN: buffer_store_dword v
-; GCN: buffer_store_dword v
-; GCN: buffer_store_dword v
-define void @merge_global_store_4_adjacent_loads_shuffle_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
- %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
- %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2
- %out.gep.3 = getelementptr i32, i32 addrspace(1)* %out, i32 3
- %in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 1
- %in.gep.2 = getelementptr i32, i32 addrspace(1)* %in, i32 2
- %in.gep.3 = getelementptr i32, i32 addrspace(1)* %in, i32 3
-
- %x = load i32, i32 addrspace(1)* %in
- %y = load i32, i32 addrspace(1)* %in.gep.1
- %z = load i32, i32 addrspace(1)* %in.gep.2
- %w = load i32, i32 addrspace(1)* %in.gep.3
-
- ; Make sure the barrier doesn't stop this
- tail call void @llvm.AMDGPU.barrier.local() #1
-
- store i32 %w, i32 addrspace(1)* %out
- store i32 %z, i32 addrspace(1)* %out.gep.1
- store i32 %y, i32 addrspace(1)* %out.gep.2
- store i32 %x, i32 addrspace(1)* %out.gep.3
-
- ret void
-}
-
-; GCN-LABEL: {{^}}merge_global_store_4_adjacent_loads_i8:
-; GCN: buffer_load_dword [[LOAD:v[0-9]+]]
-; GCN: buffer_store_dword [[LOAD]]
-; GCN: s_endpgm
-define void @merge_global_store_4_adjacent_loads_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
- %out.gep.1 = getelementptr i8, i8 addrspace(1)* %out, i8 1
- %out.gep.2 = getelementptr i8, i8 addrspace(1)* %out, i8 2
- %out.gep.3 = getelementptr i8, i8 addrspace(1)* %out, i8 3
- %in.gep.1 = getelementptr i8, i8 addrspace(1)* %in, i8 1
- %in.gep.2 = getelementptr i8, i8 addrspace(1)* %in, i8 2
- %in.gep.3 = getelementptr i8, i8 addrspace(1)* %in, i8 3
-
- %x = load i8, i8 addrspace(1)* %in, align 4
- %y = load i8, i8 addrspace(1)* %in.gep.1
- %z = load i8, i8 addrspace(1)* %in.gep.2
- %w = load i8, i8 addrspace(1)* %in.gep.3
-
- store i8 %x, i8 addrspace(1)* %out, align 4
- store i8 %y, i8 addrspace(1)* %out.gep.1
- store i8 %z, i8 addrspace(1)* %out.gep.2
- store i8 %w, i8 addrspace(1)* %out.gep.3
- ret void
-}
-
-; GCN-LABEL: {{^}}merge_global_store_4_adjacent_loads_i8_natural_align:
-; GCN: buffer_load_ubyte
-; GCN: buffer_load_ubyte
-; GCN: buffer_load_ubyte
-; GCN: buffer_load_ubyte
-; GCN: buffer_store_byte
-; GCN: buffer_store_byte
-; GCN: buffer_store_byte
-; GCN: buffer_store_byte
-; GCN: s_endpgm
-define void @merge_global_store_4_adjacent_loads_i8_natural_align(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
- %out.gep.1 = getelementptr i8, i8 addrspace(1)* %out, i8 1
- %out.gep.2 = getelementptr i8, i8 addrspace(1)* %out, i8 2
- %out.gep.3 = getelementptr i8, i8 addrspace(1)* %out, i8 3
- %in.gep.1 = getelementptr i8, i8 addrspace(1)* %in, i8 1
- %in.gep.2 = getelementptr i8, i8 addrspace(1)* %in, i8 2
- %in.gep.3 = getelementptr i8, i8 addrspace(1)* %in, i8 3
-
- %x = load i8, i8 addrspace(1)* %in
- %y = load i8, i8 addrspace(1)* %in.gep.1
- %z = load i8, i8 addrspace(1)* %in.gep.2
- %w = load i8, i8 addrspace(1)* %in.gep.3
-
- store i8 %x, i8 addrspace(1)* %out
- store i8 %y, i8 addrspace(1)* %out.gep.1
- store i8 %z, i8 addrspace(1)* %out.gep.2
- store i8 %w, i8 addrspace(1)* %out.gep.3
- ret void
-}
-
-; This works once AA is enabled on the subtarget
-; GCN-LABEL: {{^}}merge_global_store_4_vector_elts_loads_v4i32:
-; GCN: buffer_load_dwordx4 [[LOAD:v\[[0-9]+:[0-9]+\]]]
-; XGCN: buffer_store_dwordx4 [[LOAD]]
-; GCN: buffer_store_dword v
-; GCN: buffer_store_dword v
-; GCN: buffer_store_dword v
-; GCN: buffer_store_dword v
-define void @merge_global_store_4_vector_elts_loads_v4i32(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
- %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
- %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2
- %out.gep.3 = getelementptr i32, i32 addrspace(1)* %out, i32 3
- %vec = load <4 x i32>, <4 x i32> addrspace(1)* %in
-
- %x = extractelement <4 x i32> %vec, i32 0
- %y = extractelement <4 x i32> %vec, i32 1
- %z = extractelement <4 x i32> %vec, i32 2
- %w = extractelement <4 x i32> %vec, i32 3
-
- store i32 %x, i32 addrspace(1)* %out
- store i32 %y, i32 addrspace(1)* %out.gep.1
- store i32 %z, i32 addrspace(1)* %out.gep.2
- store i32 %w, i32 addrspace(1)* %out.gep.3
- ret void
-}
-
-; GCN-LABEL: {{^}}merge_local_store_2_constants_i8:
-; GCN: ds_write_b8
-; GCN: ds_write_b8
-; GCN: s_endpgm
-define void @merge_local_store_2_constants_i8(i8 addrspace(3)* %out) #0 {
- %out.gep.1 = getelementptr i8, i8 addrspace(3)* %out, i32 1
-
- store i8 123, i8 addrspace(3)* %out.gep.1
- store i8 456, i8 addrspace(3)* %out, align 2
- ret void
-}
-
-; GCN-LABEL: {{^}}merge_local_store_2_constants_i32:
-; GCN-DAG: s_movk_i32 [[SLO:s[0-9]+]], 0x1c8
-; GCN-DAG: s_movk_i32 [[SHI:s[0-9]+]], 0x7b
-; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], [[SLO]]
-; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[SHI]]
-; GCN: ds_write2_b32 v{{[0-9]+}}, v[[LO]], v[[HI]] offset1:1{{$}}
-define void @merge_local_store_2_constants_i32(i32 addrspace(3)* %out) #0 {
- %out.gep.1 = getelementptr i32, i32 addrspace(3)* %out, i32 1
-
- store i32 123, i32 addrspace(3)* %out.gep.1
- store i32 456, i32 addrspace(3)* %out
- ret void
-}
-
-; GCN-LABEL: {{^}}merge_local_store_4_constants_i32:
-; GCN: ds_write_b32
-; GCN: ds_write_b32
-; GCN: ds_write_b32
-; GCN: ds_write_b32
-define void @merge_local_store_4_constants_i32(i32 addrspace(3)* %out) #0 {
- %out.gep.1 = getelementptr i32, i32 addrspace(3)* %out, i32 1
- %out.gep.2 = getelementptr i32, i32 addrspace(3)* %out, i32 2
- %out.gep.3 = getelementptr i32, i32 addrspace(3)* %out, i32 3
-
- store i32 123, i32 addrspace(3)* %out.gep.1
- store i32 456, i32 addrspace(3)* %out.gep.2
- store i32 333, i32 addrspace(3)* %out.gep.3
- store i32 1234, i32 addrspace(3)* %out
- ret void
-}
-
-declare void @llvm.AMDGPU.barrier.local() #1
-
-attributes #0 = { nounwind }
-attributes #1 = { noduplicate nounwind }
Removed: llvm/trunk/test/CodeGen/R600/min.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/min.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/min.ll (original)
+++ llvm/trunk/test/CodeGen/R600/min.ll (removed)
@@ -1,189 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-
-declare i32 @llvm.r600.read.tidig.x() nounwind readnone
-
-; FUNC-LABEL: @v_test_imin_sle_i32
-; SI: v_min_i32_e32
-define void @v_test_imin_sle_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
- %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
- %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
- %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
- %a = load i32, i32 addrspace(1)* %gep0, align 4
- %b = load i32, i32 addrspace(1)* %gep1, align 4
- %cmp = icmp sle i32 %a, %b
- %val = select i1 %cmp, i32 %a, i32 %b
- store i32 %val, i32 addrspace(1)* %outgep, align 4
- ret void
-}
-
-; FUNC-LABEL: @s_test_imin_sle_i32
-; SI: s_min_i32
-define void @s_test_imin_sle_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
- %cmp = icmp sle i32 %a, %b
- %val = select i1 %cmp, i32 %a, i32 %b
- store i32 %val, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: @v_test_imin_slt_i32
-; SI: v_min_i32_e32
-define void @v_test_imin_slt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
- %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
- %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
- %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
- %a = load i32, i32 addrspace(1)* %gep0, align 4
- %b = load i32, i32 addrspace(1)* %gep1, align 4
- %cmp = icmp slt i32 %a, %b
- %val = select i1 %cmp, i32 %a, i32 %b
- store i32 %val, i32 addrspace(1)* %outgep, align 4
- ret void
-}
-
-; FUNC-LABEL: @s_test_imin_slt_i32
-; SI: s_min_i32
-define void @s_test_imin_slt_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
- %cmp = icmp slt i32 %a, %b
- %val = select i1 %cmp, i32 %a, i32 %b
- store i32 %val, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}s_test_imin_slt_imm_i32:
-; SI: s_min_i32 {{s[0-9]+}}, {{s[0-9]+}}, 8
-define void @s_test_imin_slt_imm_i32(i32 addrspace(1)* %out, i32 %a) nounwind {
- %cmp = icmp slt i32 %a, 8
- %val = select i1 %cmp, i32 %a, i32 8
- store i32 %val, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}s_test_imin_sle_imm_i32:
-; SI: s_min_i32 {{s[0-9]+}}, {{s[0-9]+}}, 8
-define void @s_test_imin_sle_imm_i32(i32 addrspace(1)* %out, i32 %a) nounwind {
- %cmp = icmp sle i32 %a, 8
- %val = select i1 %cmp, i32 %a, i32 8
- store i32 %val, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: @v_test_umin_ule_i32
-; SI: v_min_u32_e32
-define void @v_test_umin_ule_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
- %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
- %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
- %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
- %a = load i32, i32 addrspace(1)* %gep0, align 4
- %b = load i32, i32 addrspace(1)* %gep1, align 4
- %cmp = icmp ule i32 %a, %b
- %val = select i1 %cmp, i32 %a, i32 %b
- store i32 %val, i32 addrspace(1)* %outgep, align 4
- ret void
-}
-
-; FUNC-LABEL: @s_test_umin_ule_i32
-; SI: s_min_u32
-define void @s_test_umin_ule_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
- %cmp = icmp ule i32 %a, %b
- %val = select i1 %cmp, i32 %a, i32 %b
- store i32 %val, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: @v_test_umin_ult_i32
-; SI: v_min_u32_e32
-define void @v_test_umin_ult_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
- %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
- %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
- %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
- %a = load i32, i32 addrspace(1)* %gep0, align 4
- %b = load i32, i32 addrspace(1)* %gep1, align 4
- %cmp = icmp ult i32 %a, %b
- %val = select i1 %cmp, i32 %a, i32 %b
- store i32 %val, i32 addrspace(1)* %outgep, align 4
- ret void
-}
-
-; FUNC-LABEL: @s_test_umin_ult_i32
-; SI: s_min_u32
-define void @s_test_umin_ult_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
- %cmp = icmp ult i32 %a, %b
- %val = select i1 %cmp, i32 %a, i32 %b
- store i32 %val, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: @v_test_umin_ult_i32_multi_use
-; SI-NOT: v_min
-; SI: v_cmp_lt_u32
-; SI-NEXT: v_cndmask_b32
-; SI-NOT: v_min
-; SI: s_endpgm
-define void @v_test_umin_ult_i32_multi_use(i32 addrspace(1)* %out0, i1 addrspace(1)* %out1, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
- %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
- %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
- %outgep0 = getelementptr i32, i32 addrspace(1)* %out0, i32 %tid
- %outgep1 = getelementptr i1, i1 addrspace(1)* %out1, i32 %tid
- %a = load i32, i32 addrspace(1)* %gep0, align 4
- %b = load i32, i32 addrspace(1)* %gep1, align 4
- %cmp = icmp ult i32 %a, %b
- %val = select i1 %cmp, i32 %a, i32 %b
- store i32 %val, i32 addrspace(1)* %outgep0, align 4
- store i1 %cmp, i1 addrspace(1)* %outgep1
- ret void
-}
-
-; Make sure redundant and removed
-; FUNC-LABEL: {{^}}simplify_demanded_bits_test_umin_ult_i16:
-; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
-; SI-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xc
-; SI: s_min_u32 [[MIN:s[0-9]+]], [[A]], [[B]]
-; SI-NEXT: v_mov_b32_e32 [[VMIN:v[0-9]+]], [[MIN]]
-; SI-NEXT: buffer_store_dword [[VMIN]]
-define void @simplify_demanded_bits_test_umin_ult_i16(i32 addrspace(1)* %out, i16 zeroext %a, i16 zeroext %b) nounwind {
- %a.ext = zext i16 %a to i32
- %b.ext = zext i16 %b to i32
- %cmp = icmp ult i32 %a.ext, %b.ext
- %val = select i1 %cmp, i32 %a.ext, i32 %b.ext
- %mask = and i32 %val, 65535
- store i32 %mask, i32 addrspace(1)* %out
- ret void
-}
-
-; Make sure redundant sign_extend_inreg removed.
-
-; FUNC-LABEL: {{^}}simplify_demanded_bits_test_min_slt_i16:
-; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
-; SI-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xc
-; SI: s_min_i32 [[MIN:s[0-9]+]], [[A]], [[B]]
-; SI-NEXT: v_mov_b32_e32 [[VMIN:v[0-9]+]], [[MIN]]
-; SI-NEXT: buffer_store_dword [[VMIN]]
-define void @simplify_demanded_bits_test_min_slt_i16(i32 addrspace(1)* %out, i16 signext %a, i16 signext %b) nounwind {
- %a.ext = sext i16 %a to i32
- %b.ext = sext i16 %b to i32
- %cmp = icmp slt i32 %a.ext, %b.ext
- %val = select i1 %cmp, i32 %a.ext, i32 %b.ext
- %shl = shl i32 %val, 16
- %sextinreg = ashr i32 %shl, 16
- store i32 %sextinreg, i32 addrspace(1)* %out
- ret void
-}
-
-; FIXME: Should get match min/max through extends inserted by
-; legalization.
-
-; FUNC-LABEL: {{^}}s_test_imin_sle_i16:
-; SI: s_sext_i32_i16
-; SI: s_sext_i32_i16
-; SI: v_cmp_le_i32_e32
-; SI: v_cndmask_b32
-define void @s_test_imin_sle_i16(i16 addrspace(1)* %out, i16 %a, i16 %b) nounwind {
- %cmp = icmp sle i16 %a, %b
- %val = select i1 %cmp, i16 %a, i16 %b
- store i16 %val, i16 addrspace(1)* %out
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/min3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/min3.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/min3.ll (original)
+++ llvm/trunk/test/CodeGen/R600/min3.ll (removed)
@@ -1,111 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-
-declare i32 @llvm.r600.read.tidig.x() nounwind readnone
-
-; FUNC-LABEL: @v_test_imin3_slt_i32
-; SI: v_min3_i32
-define void @v_test_imin3_slt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
- %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
- %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
- %gep2 = getelementptr i32, i32 addrspace(1)* %cptr, i32 %tid
- %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
- %a = load i32, i32 addrspace(1)* %gep0, align 4
- %b = load i32, i32 addrspace(1)* %gep1, align 4
- %c = load i32, i32 addrspace(1)* %gep2, align 4
- %icmp0 = icmp slt i32 %a, %b
- %i0 = select i1 %icmp0, i32 %a, i32 %b
- %icmp1 = icmp slt i32 %i0, %c
- %i1 = select i1 %icmp1, i32 %i0, i32 %c
- store i32 %i1, i32 addrspace(1)* %outgep, align 4
- ret void
-}
-
-; FUNC-LABEL: @v_test_umin3_ult_i32
-; SI: v_min3_u32
-define void @v_test_umin3_ult_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
- %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
- %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
- %gep2 = getelementptr i32, i32 addrspace(1)* %cptr, i32 %tid
- %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
- %a = load i32, i32 addrspace(1)* %gep0, align 4
- %b = load i32, i32 addrspace(1)* %gep1, align 4
- %c = load i32, i32 addrspace(1)* %gep2, align 4
- %icmp0 = icmp ult i32 %a, %b
- %i0 = select i1 %icmp0, i32 %a, i32 %b
- %icmp1 = icmp ult i32 %i0, %c
- %i1 = select i1 %icmp1, i32 %i0, i32 %c
- store i32 %i1, i32 addrspace(1)* %outgep, align 4
- ret void
-}
-
-; FUNC-LABEL: @v_test_umin_umin_umin
-; SI: v_min_i32
-; SI: v_min3_i32
-define void @v_test_umin_umin_umin(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
- %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %tid2 = mul i32 %tid, 2
- %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
- %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
- %gep2 = getelementptr i32, i32 addrspace(1)* %cptr, i32 %tid
-
- %gep3 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid2
- %gep4 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid2
- %gep5 = getelementptr i32, i32 addrspace(1)* %cptr, i32 %tid2
-
- %outgep0 = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
- %outgep1 = getelementptr i32, i32 addrspace(1)* %out, i32 %tid2
-
- %a = load i32, i32 addrspace(1)* %gep0, align 4
- %b = load i32, i32 addrspace(1)* %gep1, align 4
- %c = load i32, i32 addrspace(1)* %gep2, align 4
- %d = load i32, i32 addrspace(1)* %gep3, align 4
-
- %icmp0 = icmp slt i32 %a, %b
- %i0 = select i1 %icmp0, i32 %a, i32 %b
-
- %icmp1 = icmp slt i32 %c, %d
- %i1 = select i1 %icmp1, i32 %c, i32 %d
-
- %icmp2 = icmp slt i32 %i0, %i1
- %i2 = select i1 %icmp2, i32 %i0, i32 %i1
-
- store i32 %i2, i32 addrspace(1)* %outgep1, align 4
- ret void
-}
-
-; FUNC-LABEL: @v_test_umin3_2_uses
-; SI-NOT: v_min3
-define void @v_test_umin3_2_uses(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
- %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %tid2 = mul i32 %tid, 2
- %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
- %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
- %gep2 = getelementptr i32, i32 addrspace(1)* %cptr, i32 %tid
-
- %gep3 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid2
- %gep4 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid2
- %gep5 = getelementptr i32, i32 addrspace(1)* %cptr, i32 %tid2
-
- %outgep0 = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
- %outgep1 = getelementptr i32, i32 addrspace(1)* %out, i32 %tid2
-
- %a = load i32, i32 addrspace(1)* %gep0, align 4
- %b = load i32, i32 addrspace(1)* %gep1, align 4
- %c = load i32, i32 addrspace(1)* %gep2, align 4
- %d = load i32, i32 addrspace(1)* %gep3, align 4
-
- %icmp0 = icmp slt i32 %a, %b
- %i0 = select i1 %icmp0, i32 %a, i32 %b
-
- %icmp1 = icmp slt i32 %c, %d
- %i1 = select i1 %icmp1, i32 %c, i32 %d
-
- %icmp2 = icmp slt i32 %i0, %c
- %i2 = select i1 %icmp2, i32 %i0, i32 %c
-
- store i32 %i2, i32 addrspace(1)* %outgep0, align 4
- store i32 %i0, i32 addrspace(1)* %outgep1, align 4
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/missing-store.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/missing-store.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/missing-store.ll (original)
+++ llvm/trunk/test/CodeGen/R600/missing-store.ll (removed)
@@ -1,26 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=SI %s
-
- at ptr_load = addrspace(3) global i32 addrspace(2)* undef, align 8
-
-; Make sure when the load from %ptr2 is folded the chain isn't lost,
-; resulting in losing the store to gptr
-
-; FUNC-LABEL: {{^}}missing_store_reduced:
-; SI: ds_read_b64
-; SI: buffer_store_dword
-; SI: buffer_load_dword
-; SI: buffer_store_dword
-; SI: s_endpgm
-define void @missing_store_reduced(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 {
- %ptr0 = load i32 addrspace(2)*, i32 addrspace(2)* addrspace(3)* @ptr_load, align 8
- %ptr2 = getelementptr inbounds i32, i32 addrspace(2)* %ptr0, i64 2
-
- store i32 99, i32 addrspace(1)* %gptr, align 4
- %tmp2 = load i32, i32 addrspace(2)* %ptr2, align 4
-
- store i32 %tmp2, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-attributes #0 = { nounwind }
-
Removed: llvm/trunk/test/CodeGen/R600/mubuf.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/mubuf.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/mubuf.ll (original)
+++ llvm/trunk/test/CodeGen/R600/mubuf.ll (removed)
@@ -1,183 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -show-mc-encoding -verify-machineinstrs < %s | FileCheck %s
-
-declare i32 @llvm.r600.read.tidig.x() readnone
-
-;;;==========================================================================;;;
-;;; MUBUF LOAD TESTS
-;;;==========================================================================;;;
-
-; MUBUF load with an immediate byte offset that fits into 12-bits
-; CHECK-LABEL: {{^}}mubuf_load0:
-; CHECK: buffer_load_dword v{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0 offset:4 ; encoding: [0x04,0x00,0x30,0xe0
-define void @mubuf_load0(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
-entry:
- %0 = getelementptr i32, i32 addrspace(1)* %in, i64 1
- %1 = load i32, i32 addrspace(1)* %0
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; MUBUF load with the largest possible immediate offset
-; CHECK-LABEL: {{^}}mubuf_load1:
-; CHECK: buffer_load_ubyte v{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0 offset:4095 ; encoding: [0xff,0x0f,0x20,0xe0
-define void @mubuf_load1(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
-entry:
- %0 = getelementptr i8, i8 addrspace(1)* %in, i64 4095
- %1 = load i8, i8 addrspace(1)* %0
- store i8 %1, i8 addrspace(1)* %out
- ret void
-}
-
-; MUBUF load with an immediate byte offset that doesn't fit into 12-bits
-; CHECK-LABEL: {{^}}mubuf_load2:
-; CHECK: s_movk_i32 [[SOFFSET:s[0-9]+]], 0x1000
-; CHECK: buffer_load_dword v{{[0-9]}}, s[{{[0-9]+:[0-9]+}}], [[SOFFSET]] ; encoding: [0x00,0x00,0x30,0xe0
-define void @mubuf_load2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
-entry:
- %0 = getelementptr i32, i32 addrspace(1)* %in, i64 1024
- %1 = load i32, i32 addrspace(1)* %0
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; MUBUF load with a 12-bit immediate offset and a register offset
-; CHECK-LABEL: {{^}}mubuf_load3:
-; CHECK-NOT: ADD
-; CHECK: buffer_load_dword v{{[0-9]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:4 ; encoding: [0x04,0x80,0x30,0xe0
-define void @mubuf_load3(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i64 %offset) {
-entry:
- %0 = getelementptr i32, i32 addrspace(1)* %in, i64 %offset
- %1 = getelementptr i32, i32 addrspace(1)* %0, i64 1
- %2 = load i32, i32 addrspace(1)* %1
- store i32 %2, i32 addrspace(1)* %out
- ret void
-}
-
-; CHECK-LABEL: {{^}}soffset_max_imm:
-; CHECK: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 64 offen glc
-define void @soffset_max_imm([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) #1 {
-main_body:
- %tmp0 = getelementptr [6 x <16 x i8>], [6 x <16 x i8>] addrspace(2)* %0, i32 0, i32 0
- %tmp1 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp0
- %tmp2 = shl i32 %6, 2
- %tmp3 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %tmp1, i32 %tmp2, i32 64, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0)
- %tmp4 = add i32 %6, 16
- %tmp5 = bitcast float 0.0 to i32
- call void @llvm.SI.tbuffer.store.i32(<16 x i8> %tmp1, i32 %tmp5, i32 1, i32 %tmp4, i32 %4, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
- ret void
-}
-
-; Make sure immediates that aren't inline constants don't get folded into
-; the soffset operand.
-; FIXME: for this test we should be smart enough to shift the immediate into
-; the offset field.
-; CHECK-LABEL: {{^}}soffset_no_fold:
-; CHECK: s_movk_i32 [[SOFFSET:s[0-9]+]], 0x41
-; CHECK: buffer_load_dword v{{[0-9+]}}, v{{[0-9+]}}, s[{{[0-9]+}}:{{[0-9]+}}], [[SOFFSET]] offen glc
-define void @soffset_no_fold([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) #1 {
-main_body:
- %tmp0 = getelementptr [6 x <16 x i8>], [6 x <16 x i8>] addrspace(2)* %0, i32 0, i32 0
- %tmp1 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp0
- %tmp2 = shl i32 %6, 2
- %tmp3 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %tmp1, i32 %tmp2, i32 65, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0)
- %tmp4 = add i32 %6, 16
- %tmp5 = bitcast float 0.0 to i32
- call void @llvm.SI.tbuffer.store.i32(<16 x i8> %tmp1, i32 %tmp5, i32 1, i32 %tmp4, i32 %4, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
- ret void
-}
-
-;;;==========================================================================;;;
-;;; MUBUF STORE TESTS
-;;;==========================================================================;;;
-
-; MUBUF store with an immediate byte offset that fits into 12-bits
-; CHECK-LABEL: {{^}}mubuf_store0:
-; CHECK: buffer_store_dword v{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0 offset:4 ; encoding: [0x04,0x00,0x70,0xe0
-define void @mubuf_store0(i32 addrspace(1)* %out) {
-entry:
- %0 = getelementptr i32, i32 addrspace(1)* %out, i64 1
- store i32 0, i32 addrspace(1)* %0
- ret void
-}
-
-; MUBUF store with the largest possible immediate offset
-; CHECK-LABEL: {{^}}mubuf_store1:
-; CHECK: buffer_store_byte v{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0 offset:4095 ; encoding: [0xff,0x0f,0x60,0xe0
-
-define void @mubuf_store1(i8 addrspace(1)* %out) {
-entry:
- %0 = getelementptr i8, i8 addrspace(1)* %out, i64 4095
- store i8 0, i8 addrspace(1)* %0
- ret void
-}
-
-; MUBUF store with an immediate byte offset that doesn't fit into 12-bits
-; CHECK-LABEL: {{^}}mubuf_store2:
-; CHECK: s_movk_i32 [[SOFFSET:s[0-9]+]], 0x1000
-; CHECK: buffer_store_dword v{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[SOFFSET]] ; encoding: [0x00,0x00,0x70,0xe0
-define void @mubuf_store2(i32 addrspace(1)* %out) {
-entry:
- %0 = getelementptr i32, i32 addrspace(1)* %out, i64 1024
- store i32 0, i32 addrspace(1)* %0
- ret void
-}
-
-; MUBUF store with a 12-bit immediate offset and a register offset
-; CHECK-LABEL: {{^}}mubuf_store3:
-; CHECK-NOT: ADD
-; CHECK: buffer_store_dword v{{[0-9]}}, v[{{[0-9]:[0-9]}}], s[{{[0-9]:[0-9]}}], 0 addr64 offset:4 ; encoding: [0x04,0x80,0x70,0xe0
-define void @mubuf_store3(i32 addrspace(1)* %out, i64 %offset) {
-entry:
- %0 = getelementptr i32, i32 addrspace(1)* %out, i64 %offset
- %1 = getelementptr i32, i32 addrspace(1)* %0, i64 1
- store i32 0, i32 addrspace(1)* %1
- ret void
-}
-
-; CHECK-LABEL: {{^}}store_sgpr_ptr:
-; CHECK: buffer_store_dword v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0
-define void @store_sgpr_ptr(i32 addrspace(1)* %out) #0 {
- store i32 99, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; CHECK-LABEL: {{^}}store_sgpr_ptr_offset:
-; CHECK: buffer_store_dword v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:40
-define void @store_sgpr_ptr_offset(i32 addrspace(1)* %out) #0 {
- %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 10
- store i32 99, i32 addrspace(1)* %out.gep, align 4
- ret void
-}
-
-; CHECK-LABEL: {{^}}store_sgpr_ptr_large_offset:
-; CHECK: s_mov_b32 [[SOFFSET:s[0-9]+]], 0x20000
-; CHECK: buffer_store_dword v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, [[SOFFSET]]
-define void @store_sgpr_ptr_large_offset(i32 addrspace(1)* %out) #0 {
- %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 32768
- store i32 99, i32 addrspace(1)* %out.gep, align 4
- ret void
-}
-
-; CHECK-LABEL: {{^}}store_sgpr_ptr_large_offset_atomic:
-; CHECK: s_mov_b32 [[SOFFSET:s[0-9]+]], 0x20000
-; CHECK: buffer_atomic_add v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, [[SOFFSET]]
-define void @store_sgpr_ptr_large_offset_atomic(i32 addrspace(1)* %out) #0 {
- %gep = getelementptr i32, i32 addrspace(1)* %out, i32 32768
- %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 5 seq_cst
- ret void
-}
-
-; CHECK-LABEL: {{^}}store_vgpr_ptr:
-; CHECK: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64
-define void @store_vgpr_ptr(i32 addrspace(1)* %out) #0 {
- %tid = call i32 @llvm.r600.read.tidig.x() readnone
- %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
- store i32 99, i32 addrspace(1)* %out.gep, align 4
- ret void
-}
-
-declare i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #3
-declare void @llvm.SI.tbuffer.store.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
-
-attributes #1 = { "ShaderType"="2" "unsafe-fp-math"="true" }
-attributes #3 = { nounwind readonly }
Removed: llvm/trunk/test/CodeGen/R600/mul.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/mul.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/mul.ll (original)
+++ llvm/trunk/test/CodeGen/R600/mul.ll (removed)
@@ -1,200 +0,0 @@
-; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG %s -check-prefix=FUNC
-; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-
-; mul24 and mad24 are affected
-
-; FUNC-LABEL: {{^}}test_mul_v2i32:
-; EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-
-; SI: v_mul_lo_i32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-; SI: v_mul_lo_i32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-
-define void @test_mul_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
- %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
- %a = load <2 x i32>, <2 x i32> addrspace(1) * %in
- %b = load <2 x i32>, <2 x i32> addrspace(1) * %b_ptr
- %result = mul <2 x i32> %a, %b
- store <2 x i32> %result, <2 x i32> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}v_mul_v4i32:
-; EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-
-; SI: v_mul_lo_i32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-; SI: v_mul_lo_i32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-; SI: v_mul_lo_i32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-; SI: v_mul_lo_i32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-
-define void @v_mul_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
- %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
- %a = load <4 x i32>, <4 x i32> addrspace(1) * %in
- %b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr
- %result = mul <4 x i32> %a, %b
- store <4 x i32> %result, <4 x i32> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}s_trunc_i64_mul_to_i32:
-; SI: s_load_dword
-; SI: s_load_dword
-; SI: s_mul_i32
-; SI: buffer_store_dword
-define void @s_trunc_i64_mul_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) {
- %mul = mul i64 %b, %a
- %trunc = trunc i64 %mul to i32
- store i32 %trunc, i32 addrspace(1)* %out, align 8
- ret void
-}
-
-; FUNC-LABEL: {{^}}v_trunc_i64_mul_to_i32:
-; SI: s_load_dword
-; SI: s_load_dword
-; SI: v_mul_lo_i32
-; SI: buffer_store_dword
-define void @v_trunc_i64_mul_to_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
- %a = load i64, i64 addrspace(1)* %aptr, align 8
- %b = load i64, i64 addrspace(1)* %bptr, align 8
- %mul = mul i64 %b, %a
- %trunc = trunc i64 %mul to i32
- store i32 %trunc, i32 addrspace(1)* %out, align 8
- ret void
-}
-
-; This 64-bit multiply should just use MUL_HI and MUL_LO, since the top
-; 32-bits of both arguments are sign bits.
-; FUNC-LABEL: {{^}}mul64_sext_c:
-; EG-DAG: MULLO_INT
-; EG-DAG: MULHI_INT
-; SI-DAG: s_mul_i32
-; SI-DAG: v_mul_hi_i32
-define void @mul64_sext_c(i64 addrspace(1)* %out, i32 %in) {
-entry:
- %0 = sext i32 %in to i64
- %1 = mul i64 %0, 80
- store i64 %1, i64 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}v_mul64_sext_c:
-; EG-DAG: MULLO_INT
-; EG-DAG: MULHI_INT
-; SI-DAG: v_mul_lo_i32
-; SI-DAG: v_mul_hi_i32
-; SI: s_endpgm
-define void @v_mul64_sext_c(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %val = load i32, i32 addrspace(1)* %in, align 4
- %ext = sext i32 %val to i64
- %mul = mul i64 %ext, 80
- store i64 %mul, i64 addrspace(1)* %out, align 8
- ret void
-}
-
-; FUNC-LABEL: {{^}}v_mul64_sext_inline_imm:
-; SI-DAG: v_mul_lo_i32 v{{[0-9]+}}, 9, v{{[0-9]+}}
-; SI-DAG: v_mul_hi_i32 v{{[0-9]+}}, 9, v{{[0-9]+}}
-; SI: s_endpgm
-define void @v_mul64_sext_inline_imm(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %val = load i32, i32 addrspace(1)* %in, align 4
- %ext = sext i32 %val to i64
- %mul = mul i64 %ext, 9
- store i64 %mul, i64 addrspace(1)* %out, align 8
- ret void
-}
-
-; FUNC-LABEL: {{^}}s_mul_i32:
-; SI: s_load_dword [[SRC0:s[0-9]+]],
-; SI: s_load_dword [[SRC1:s[0-9]+]],
-; SI: s_mul_i32 [[SRESULT:s[0-9]+]], [[SRC0]], [[SRC1]]
-; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
-; SI: buffer_store_dword [[VRESULT]],
-; SI: s_endpgm
-define void @s_mul_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
- %mul = mul i32 %a, %b
- store i32 %mul, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}v_mul_i32:
-; SI: v_mul_lo_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define void @v_mul_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
- %a = load i32, i32 addrspace(1)* %in
- %b = load i32, i32 addrspace(1)* %b_ptr
- %result = mul i32 %a, %b
- store i32 %result, i32 addrspace(1)* %out
- ret void
-}
-
-; A standard 64-bit multiply. The expansion should be around 6 instructions.
-; It would be difficult to match the expansion correctly without writing
-; a really complicated list of FileCheck expressions. I don't want
-; to confuse people who may 'break' this test with a correct optimization,
-; so this test just uses FUNC-LABEL to make sure the compiler does not
-; crash with a 'failed to select' error.
-
-; FUNC-LABEL: {{^}}s_mul_i64:
-define void @s_mul_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
- %mul = mul i64 %a, %b
- store i64 %mul, i64 addrspace(1)* %out, align 8
- ret void
-}
-
-; FUNC-LABEL: {{^}}v_mul_i64:
-; SI: v_mul_lo_i32
-define void @v_mul_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) {
- %a = load i64, i64 addrspace(1)* %aptr, align 8
- %b = load i64, i64 addrspace(1)* %bptr, align 8
- %mul = mul i64 %a, %b
- store i64 %mul, i64 addrspace(1)* %out, align 8
- ret void
-}
-
-; FUNC-LABEL: {{^}}mul32_in_branch:
-; SI: s_mul_i32
-define void @mul32_in_branch(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %a, i32 %b, i32 %c) {
-entry:
- %0 = icmp eq i32 %a, 0
- br i1 %0, label %if, label %else
-
-if:
- %1 = load i32, i32 addrspace(1)* %in
- br label %endif
-
-else:
- %2 = mul i32 %a, %b
- br label %endif
-
-endif:
- %3 = phi i32 [%1, %if], [%2, %else]
- store i32 %3, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}mul64_in_branch:
-; SI-DAG: s_mul_i32
-; SI-DAG: v_mul_hi_u32
-; SI: s_endpgm
-define void @mul64_in_branch(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %a, i64 %b, i64 %c) {
-entry:
- %0 = icmp eq i64 %a, 0
- br i1 %0, label %if, label %else
-
-if:
- %1 = load i64, i64 addrspace(1)* %in
- br label %endif
-
-else:
- %2 = mul i64 %a, %b
- br label %endif
-
-endif:
- %3 = phi i64 [%1, %if], [%2, %else]
- store i64 %3, i64 addrspace(1)* %out
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/mul_int24.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/mul_int24.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/mul_int24.ll (original)
+++ llvm/trunk/test/CodeGen/R600/mul_int24.ll (removed)
@@ -1,23 +0,0 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC
-; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=CM --check-prefix=FUNC
-; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC
-; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC
-
-; FUNC-LABEL: {{^}}i32_mul24:
-; Signed 24-bit multiply is not supported on pre-Cayman GPUs.
-; EG: MULLO_INT
-; Make sure we are not masking the inputs
-; CM-NOT: AND
-; CM: MUL_INT24
-; SI-NOT: and
-; SI: v_mul_i32_i24
-define void @i32_mul24(i32 addrspace(1)* %out, i32 %a, i32 %b) {
-entry:
- %0 = shl i32 %a, 8
- %a_24 = ashr i32 %0, 8
- %1 = shl i32 %b, 8
- %b_24 = ashr i32 %1, 8
- %2 = mul i32 %a_24, %b_24
- store i32 %2, i32 addrspace(1)* %out
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/mul_uint24.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/mul_uint24.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/mul_uint24.ll (original)
+++ llvm/trunk/test/CodeGen/R600/mul_uint24.ll (removed)
@@ -1,67 +0,0 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC
-; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG --check-prefix=FUNC
-; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC
-; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC
-
-; FUNC-LABEL: {{^}}u32_mul24:
-; EG: MUL_UINT24 {{[* ]*}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, KC0[2].W
-; SI: v_mul_u32_u24
-
-define void @u32_mul24(i32 addrspace(1)* %out, i32 %a, i32 %b) {
-entry:
- %0 = shl i32 %a, 8
- %a_24 = lshr i32 %0, 8
- %1 = shl i32 %b, 8
- %b_24 = lshr i32 %1, 8
- %2 = mul i32 %a_24, %b_24
- store i32 %2, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}i16_mul24:
-; EG: MUL_UINT24 {{[* ]*}}T{{[0-9]}}.[[MUL_CHAN:[XYZW]]]
-; The result must be sign-extended
-; EG: BFE_INT {{[* ]*}}T{{[0-9]}}.{{[XYZW]}}, PV.[[MUL_CHAN]], 0.0, literal.x
-; EG: 16
-; SI: v_mul_u32_u24_e{{(32|64)}} [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
-; SI: v_bfe_i32 v{{[0-9]}}, [[MUL]], 0, 16
-define void @i16_mul24(i32 addrspace(1)* %out, i16 %a, i16 %b) {
-entry:
- %0 = mul i16 %a, %b
- %1 = sext i16 %0 to i32
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}i8_mul24:
-; EG: MUL_UINT24 {{[* ]*}}T{{[0-9]}}.[[MUL_CHAN:[XYZW]]]
-; The result must be sign-extended
-; EG: BFE_INT {{[* ]*}}T{{[0-9]}}.{{[XYZW]}}, PV.[[MUL_CHAN]], 0.0, literal.x
-; SI: v_mul_u32_u24_e{{(32|64)}} [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
-; SI: v_bfe_i32 v{{[0-9]}}, [[MUL]], 0, 8
-
-define void @i8_mul24(i32 addrspace(1)* %out, i8 %a, i8 %b) {
-entry:
- %0 = mul i8 %a, %b
- %1 = sext i8 %0 to i32
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; Multiply with 24-bit inputs and 64-bit output
-; FUNC_LABEL: {{^}}mul24_i64:
-; EG; MUL_UINT24
-; EG: MULHI
-; SI: v_mul_u32_u24
-; FIXME: SI support 24-bit mulhi
-; SI: v_mul_hi_u32
-define void @mul24_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
-entry:
- %0 = shl i64 %a, 40
- %a_24 = lshr i64 %0, 40
- %1 = shl i64 %b, 40
- %b_24 = lshr i64 %1, 40
- %2 = mul i64 %a_24, %b_24
- store i64 %2, i64 addrspace(1)* %out
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/mulhu.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/mulhu.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/mulhu.ll (original)
+++ llvm/trunk/test/CodeGen/R600/mulhu.ll (removed)
@@ -1,17 +0,0 @@
-;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
-;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
-
-;CHECK: v_mov_b32_e32 v{{[0-9]+}}, 0xaaaaaaab
-;CHECK: v_mul_hi_u32 v0, {{v[0-9]+}}, {{s[0-9]+}}
-;CHECK-NEXT: v_lshrrev_b32_e32 v0, 1, v0
-
-define void @test(i32 %p) {
- %i = udiv i32 %p, 3
- %r = bitcast i32 %i to float
- call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %r, float %r, float %r, float %r)
- ret void
-}
-
-declare <4 x float> @llvm.SI.sample.(i32, <4 x i32>, <8 x i32>, <4 x i32>, i32) readnone
-
-declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
Removed: llvm/trunk/test/CodeGen/R600/no-initializer-constant-addrspace.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/no-initializer-constant-addrspace.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/no-initializer-constant-addrspace.ll (original)
+++ llvm/trunk/test/CodeGen/R600/no-initializer-constant-addrspace.ll (removed)
@@ -1,21 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -o /dev/null %s
-; RUN: llc -march=amdgcn -mcpu=tonga -o /dev/null %s
-; RUN: llc -march=r600 -mcpu=cypress -o /dev/null %s
-
- at extern_const_addrspace = external unnamed_addr addrspace(2) constant [5 x i32], align 4
-
-; FUNC-LABEL: {{^}}load_extern_const_init:
-define void @load_extern_const_init(i32 addrspace(1)* %out) nounwind {
- %val = load i32, i32 addrspace(2)* getelementptr ([5 x i32], [5 x i32] addrspace(2)* @extern_const_addrspace, i64 0, i64 3), align 4
- store i32 %val, i32 addrspace(1)* %out, align 4
- ret void
-}
-
- at undef_const_addrspace = unnamed_addr addrspace(2) constant [5 x i32] undef, align 4
-
-; FUNC-LABEL: {{^}}load_undef_const_init:
-define void @load_undef_const_init(i32 addrspace(1)* %out) nounwind {
- %val = load i32, i32 addrspace(2)* getelementptr ([5 x i32], [5 x i32] addrspace(2)* @undef_const_addrspace, i64 0, i64 3), align 4
- store i32 %val, i32 addrspace(1)* %out, align 4
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/no-shrink-extloads.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/no-shrink-extloads.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/no-shrink-extloads.ll (original)
+++ llvm/trunk/test/CodeGen/R600/no-shrink-extloads.ll (removed)
@@ -1,191 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-
-declare i32 @llvm.r600.read.tidig.x() nounwind readnone
-
-; Make sure we don't turn the 32-bit argument load into a 16-bit
-; load. There aren't extending scalar lods, so that would require
-; using a buffer_load instruction.
-
-; FUNC-LABEL: {{^}}truncate_kernarg_i32_to_i16:
-; SI: s_load_dword s
-; SI: buffer_store_short v
-define void @truncate_kernarg_i32_to_i16(i16 addrspace(1)* %out, i32 %arg) nounwind {
- %trunc = trunc i32 %arg to i16
- store i16 %trunc, i16 addrspace(1)* %out
- ret void
-}
-
-; It should be OK (and probably performance neutral) to reduce this,
-; but we don't know if the load is uniform yet.
-
-; FUNC-LABEL: {{^}}truncate_buffer_load_i32_to_i16:
-; SI: buffer_load_dword v
-; SI: buffer_store_short v
-define void @truncate_buffer_load_i32_to_i16(i16 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
- %gep.out = getelementptr i16, i16 addrspace(1)* %out, i32 %tid
- %load = load i32, i32 addrspace(1)* %gep.in
- %trunc = trunc i32 %load to i16
- store i16 %trunc, i16 addrspace(1)* %gep.out
- ret void
-}
-
-; FUNC-LABEL: {{^}}truncate_kernarg_i32_to_i8:
-; SI: s_load_dword s
-; SI: buffer_store_byte v
-define void @truncate_kernarg_i32_to_i8(i8 addrspace(1)* %out, i32 %arg) nounwind {
- %trunc = trunc i32 %arg to i8
- store i8 %trunc, i8 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}truncate_buffer_load_i32_to_i8:
-; SI: buffer_load_dword v
-; SI: buffer_store_byte v
-define void @truncate_buffer_load_i32_to_i8(i8 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
- %gep.out = getelementptr i8, i8 addrspace(1)* %out, i32 %tid
- %load = load i32, i32 addrspace(1)* %gep.in
- %trunc = trunc i32 %load to i8
- store i8 %trunc, i8 addrspace(1)* %gep.out
- ret void
-}
-
-; FUNC-LABEL: {{^}}truncate_kernarg_i32_to_i1:
-; SI: s_load_dword s
-; SI: buffer_store_byte v
-define void @truncate_kernarg_i32_to_i1(i1 addrspace(1)* %out, i32 %arg) nounwind {
- %trunc = trunc i32 %arg to i1
- store i1 %trunc, i1 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}truncate_buffer_load_i32_to_i1:
-; SI: buffer_load_dword v
-; SI: buffer_store_byte v
-define void @truncate_buffer_load_i32_to_i1(i1 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
- %gep.out = getelementptr i1, i1 addrspace(1)* %out, i32 %tid
- %load = load i32, i32 addrspace(1)* %gep.in
- %trunc = trunc i32 %load to i1
- store i1 %trunc, i1 addrspace(1)* %gep.out
- ret void
-}
-
-; FUNC-LABEL: {{^}}truncate_kernarg_i64_to_i32:
-; SI: s_load_dword s
-; SI: buffer_store_dword v
-define void @truncate_kernarg_i64_to_i32(i32 addrspace(1)* %out, i64 %arg) nounwind {
- %trunc = trunc i64 %arg to i32
- store i32 %trunc, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}truncate_buffer_load_i64_to_i32:
-; SI: buffer_load_dword v
-; SI: buffer_store_dword v
-define void @truncate_buffer_load_i64_to_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind {
- %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
- %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
- %load = load i64, i64 addrspace(1)* %gep.in
- %trunc = trunc i64 %load to i32
- store i32 %trunc, i32 addrspace(1)* %gep.out
- ret void
-}
-
-; FUNC-LABEL: {{^}}srl_kernarg_i64_to_i32:
-; SI: s_load_dword s
-; SI: buffer_store_dword v
-define void @srl_kernarg_i64_to_i32(i32 addrspace(1)* %out, i64 %arg) nounwind {
- %srl = lshr i64 %arg, 32
- %trunc = trunc i64 %srl to i32
- store i32 %trunc, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}srl_buffer_load_i64_to_i32:
-; SI: buffer_load_dword v
-; SI: buffer_store_dword v
-define void @srl_buffer_load_i64_to_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind {
- %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
- %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
- %load = load i64, i64 addrspace(1)* %gep.in
- %srl = lshr i64 %load, 32
- %trunc = trunc i64 %srl to i32
- store i32 %trunc, i32 addrspace(1)* %gep.out
- ret void
-}
-
-; Might as well reduce to 8-bit loads.
-; FUNC-LABEL: {{^}}truncate_kernarg_i16_to_i8:
-; SI: s_load_dword s
-; SI: buffer_store_byte v
-define void @truncate_kernarg_i16_to_i8(i8 addrspace(1)* %out, i16 %arg) nounwind {
- %trunc = trunc i16 %arg to i8
- store i8 %trunc, i8 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}truncate_buffer_load_i16_to_i8:
-; SI: buffer_load_ubyte v
-; SI: buffer_store_byte v
-define void @truncate_buffer_load_i16_to_i8(i8 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
- %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.in = getelementptr i16, i16 addrspace(1)* %in, i32 %tid
- %gep.out = getelementptr i8, i8 addrspace(1)* %out, i32 %tid
- %load = load i16, i16 addrspace(1)* %gep.in
- %trunc = trunc i16 %load to i8
- store i8 %trunc, i8 addrspace(1)* %gep.out
- ret void
-}
-
-; FUNC-LABEL: {{^}}srl_kernarg_i64_to_i8:
-; SI: s_load_dword s
-; SI: buffer_store_byte v
-define void @srl_kernarg_i64_to_i8(i8 addrspace(1)* %out, i64 %arg) nounwind {
- %srl = lshr i64 %arg, 32
- %trunc = trunc i64 %srl to i8
- store i8 %trunc, i8 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}srl_buffer_load_i64_to_i8:
-; SI: buffer_load_dword v
-; SI: buffer_store_byte v
-define void @srl_buffer_load_i64_to_i8(i8 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind {
- %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
- %gep.out = getelementptr i8, i8 addrspace(1)* %out, i32 %tid
- %load = load i64, i64 addrspace(1)* %gep.in
- %srl = lshr i64 %load, 32
- %trunc = trunc i64 %srl to i8
- store i8 %trunc, i8 addrspace(1)* %gep.out
- ret void
-}
-
-; FUNC-LABEL: {{^}}truncate_kernarg_i64_to_i8:
-; SI: s_load_dword s
-; SI: buffer_store_byte v
-define void @truncate_kernarg_i64_to_i8(i8 addrspace(1)* %out, i64 %arg) nounwind {
- %trunc = trunc i64 %arg to i8
- store i8 %trunc, i8 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}truncate_buffer_load_i64_to_i8:
-; SI: buffer_load_dword v
-; SI: buffer_store_byte v
-define void @truncate_buffer_load_i64_to_i8(i8 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind {
- %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
- %gep.out = getelementptr i8, i8 addrspace(1)* %out, i32 %tid
- %load = load i64, i64 addrspace(1)* %gep.in
- %trunc = trunc i64 %load to i8
- store i8 %trunc, i8 addrspace(1)* %gep.out
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/operand-folding.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/operand-folding.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/operand-folding.ll (original)
+++ llvm/trunk/test/CodeGen/R600/operand-folding.ll (removed)
@@ -1,113 +0,0 @@
-; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s
-
-; CHECK-LABEL: {{^}}fold_sgpr:
-; CHECK: v_add_i32_e32 v{{[0-9]+}}, s
-define void @fold_sgpr(i32 addrspace(1)* %out, i32 %fold) {
-entry:
- %tmp0 = icmp ne i32 %fold, 0
- br i1 %tmp0, label %if, label %endif
-
-if:
- %id = call i32 @llvm.r600.read.tidig.x()
- %offset = add i32 %fold, %id
- %tmp1 = getelementptr i32, i32 addrspace(1)* %out, i32 %offset
- store i32 0, i32 addrspace(1)* %tmp1
- br label %endif
-
-endif:
- ret void
-}
-
-; CHECK-LABEL: {{^}}fold_imm:
-; CHECK: v_or_b32_e32 v{{[0-9]+}}, 5
-define void @fold_imm(i32 addrspace(1)* %out, i32 %cmp) {
-entry:
- %fold = add i32 3, 2
- %tmp0 = icmp ne i32 %cmp, 0
- br i1 %tmp0, label %if, label %endif
-
-if:
- %id = call i32 @llvm.r600.read.tidig.x()
- %val = or i32 %id, %fold
- store i32 %val, i32 addrspace(1)* %out
- br label %endif
-
-endif:
- ret void
-}
-
-; CHECK-LABEL: {{^}}fold_64bit_constant_add:
-; CHECK-NOT: s_mov_b64
-; FIXME: It would be better if we could use v_add here and drop the extra
-; v_mov_b32 instructions.
-; CHECK-DAG: s_add_u32 [[LO:s[0-9]+]], s{{[0-9]+}}, 1
-; CHECK-DAG: s_addc_u32 [[HI:s[0-9]+]], s{{[0-9]+}}, 0
-; CHECK-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], [[LO]]
-; CHECK-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], [[HI]]
-; CHECK: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}},
-
-define void @fold_64bit_constant_add(i64 addrspace(1)* %out, i32 %cmp, i64 %val) {
-entry:
- %tmp0 = add i64 %val, 1
- store i64 %tmp0, i64 addrspace(1)* %out
- ret void
-}
-
-; Inline constants should always be folded.
-
-; CHECK-LABEL: {{^}}vector_inline:
-; CHECK: v_xor_b32_e32 v{{[0-9]+}}, 5, v{{[0-9]+}}
-; CHECK: v_xor_b32_e32 v{{[0-9]+}}, 5, v{{[0-9]+}}
-; CHECK: v_xor_b32_e32 v{{[0-9]+}}, 5, v{{[0-9]+}}
-; CHECK: v_xor_b32_e32 v{{[0-9]+}}, 5, v{{[0-9]+}}
-
-define void @vector_inline(<4 x i32> addrspace(1)* %out) {
-entry:
- %tmp0 = call i32 @llvm.r600.read.tidig.x()
- %tmp1 = add i32 %tmp0, 1
- %tmp2 = add i32 %tmp0, 2
- %tmp3 = add i32 %tmp0, 3
- %vec0 = insertelement <4 x i32> undef, i32 %tmp0, i32 0
- %vec1 = insertelement <4 x i32> %vec0, i32 %tmp1, i32 1
- %vec2 = insertelement <4 x i32> %vec1, i32 %tmp2, i32 2
- %vec3 = insertelement <4 x i32> %vec2, i32 %tmp3, i32 3
- %tmp4 = xor <4 x i32> <i32 5, i32 5, i32 5, i32 5>, %vec3
- store <4 x i32> %tmp4, <4 x i32> addrspace(1)* %out
- ret void
-}
-
-; Immediates with one use should be folded
-; CHECK-LABEL: {{^}}imm_one_use:
-; CHECK: v_xor_b32_e32 v{{[0-9]+}}, 0x64, v{{[0-9]+}}
-
-define void @imm_one_use(i32 addrspace(1)* %out) {
-entry:
- %tmp0 = call i32 @llvm.r600.read.tidig.x()
- %tmp1 = xor i32 %tmp0, 100
- store i32 %tmp1, i32 addrspace(1)* %out
- ret void
-}
-; CHECK-LABEL: {{^}}vector_imm:
-; CHECK: s_movk_i32 [[IMM:s[0-9]+]], 0x64
-; CHECK: v_xor_b32_e32 v{{[0-9]}}, [[IMM]], v{{[0-9]}}
-; CHECK: v_xor_b32_e32 v{{[0-9]}}, [[IMM]], v{{[0-9]}}
-; CHECK: v_xor_b32_e32 v{{[0-9]}}, [[IMM]], v{{[0-9]}}
-; CHECK: v_xor_b32_e32 v{{[0-9]}}, [[IMM]], v{{[0-9]}}
-
-define void @vector_imm(<4 x i32> addrspace(1)* %out) {
-entry:
- %tmp0 = call i32 @llvm.r600.read.tidig.x()
- %tmp1 = add i32 %tmp0, 1
- %tmp2 = add i32 %tmp0, 2
- %tmp3 = add i32 %tmp0, 3
- %vec0 = insertelement <4 x i32> undef, i32 %tmp0, i32 0
- %vec1 = insertelement <4 x i32> %vec0, i32 %tmp1, i32 1
- %vec2 = insertelement <4 x i32> %vec1, i32 %tmp2, i32 2
- %vec3 = insertelement <4 x i32> %vec2, i32 %tmp3, i32 3
- %tmp4 = xor <4 x i32> <i32 100, i32 100, i32 100, i32 100>, %vec3
- store <4 x i32> %tmp4, <4 x i32> addrspace(1)* %out
- ret void
-}
-
-declare i32 @llvm.r600.read.tidig.x() #0
-attributes #0 = { readnone }
Removed: llvm/trunk/test/CodeGen/R600/operand-spacing.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/operand-spacing.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/operand-spacing.ll (original)
+++ llvm/trunk/test/CodeGen/R600/operand-spacing.ll (removed)
@@ -1,18 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=SI -check-prefix=GCN %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=VI -check-prefix=GCN %s
-
-; Make sure there isn't an extra space between the instruction name and first operands.
-
-; GCN-LABEL: {{^}}add_f32:
-; SI-DAG: s_load_dword [[SREGA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; SI-DAG: s_load_dword [[SREGB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
-; VI-DAG: s_load_dword [[SREGA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
-; VI-DAG: s_load_dword [[SREGB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30
-; GCN: v_mov_b32_e32 [[VREGB:v[0-9]+]], [[SREGB]]
-; GCN: v_add_f32_e32 [[RESULT:v[0-9]+]], [[SREGA]], [[VREGB]]
-; GCN: buffer_store_dword [[RESULT]],
-define void @add_f32(float addrspace(1)* %out, float %a, float %b) {
- %result = fadd float %a, %b
- store float %result, float addrspace(1)* %out
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/or.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/or.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/or.ll (original)
+++ llvm/trunk/test/CodeGen/R600/or.ll (removed)
@@ -1,178 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-
-
-; FUNC-LABEL: {{^}}or_v2i32:
-; EG: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; EG: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-
-; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-define void @or_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
- %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
- %a = load <2 x i32>, <2 x i32> addrspace(1) * %in
- %b = load <2 x i32>, <2 x i32> addrspace(1) * %b_ptr
- %result = or <2 x i32> %a, %b
- store <2 x i32> %result, <2 x i32> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}or_v4i32:
-; EG: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; EG: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; EG: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; EG: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-
-; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-define void @or_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
- %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
- %a = load <4 x i32>, <4 x i32> addrspace(1) * %in
- %b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr
- %result = or <4 x i32> %a, %b
- store <4 x i32> %result, <4 x i32> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}scalar_or_i32:
-; SI: s_or_b32
-define void @scalar_or_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
- %or = or i32 %a, %b
- store i32 %or, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}vector_or_i32:
-; SI: v_or_b32_e32 v{{[0-9]}}
-define void @vector_or_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 %b) {
- %loada = load i32, i32 addrspace(1)* %a
- %or = or i32 %loada, %b
- store i32 %or, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}scalar_or_literal_i32:
-; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x1869f
-define void @scalar_or_literal_i32(i32 addrspace(1)* %out, i32 %a) {
- %or = or i32 %a, 99999
- store i32 %or, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}vector_or_literal_i32:
-; SI: v_or_b32_e32 v{{[0-9]+}}, 0xffff, v{{[0-9]+}}
-define void @vector_or_literal_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 addrspace(1)* %b) {
- %loada = load i32, i32 addrspace(1)* %a, align 4
- %or = or i32 %loada, 65535
- store i32 %or, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}vector_or_inline_immediate_i32:
-; SI: v_or_b32_e32 v{{[0-9]+}}, 4, v{{[0-9]+}}
-define void @vector_or_inline_immediate_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 addrspace(1)* %b) {
- %loada = load i32, i32 addrspace(1)* %a, align 4
- %or = or i32 %loada, 4
- store i32 %or, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}scalar_or_i64:
-; EG-DAG: OR_INT * T{{[0-9]\.[XYZW]}}, KC0[2].W, KC0[3].Y
-; EG-DAG: OR_INT * T{{[0-9]\.[XYZW]}}, KC0[3].X, KC0[3].Z
-
-; SI: s_or_b64
-define void @scalar_or_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
- %or = or i64 %a, %b
- store i64 %or, i64 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}vector_or_i64:
-; SI: v_or_b32_e32 v{{[0-9]}}
-; SI: v_or_b32_e32 v{{[0-9]}}
-define void @vector_or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
- %loada = load i64, i64 addrspace(1)* %a, align 8
- %loadb = load i64, i64 addrspace(1)* %a, align 8
- %or = or i64 %loada, %loadb
- store i64 %or, i64 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}scalar_vector_or_i64:
-; SI: v_or_b32_e32 v{{[0-9]}}
-; SI: v_or_b32_e32 v{{[0-9]}}
-define void @scalar_vector_or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 %b) {
- %loada = load i64, i64 addrspace(1)* %a
- %or = or i64 %loada, %b
- store i64 %or, i64 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}vector_or_i64_loadimm:
-; SI-DAG: s_mov_b32 [[LO_S_IMM:s[0-9]+]], 0xdf77987f
-; SI-DAG: s_movk_i32 [[HI_S_IMM:s[0-9]+]], 0x146f
-; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
-; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
-; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
-; SI: s_endpgm
-define void @vector_or_i64_loadimm(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
- %loada = load i64, i64 addrspace(1)* %a, align 8
- %or = or i64 %loada, 22470723082367
- store i64 %or, i64 addrspace(1)* %out
- ret void
-}
-
-; FIXME: The or 0 should really be removed.
-; FUNC-LABEL: {{^}}vector_or_i64_imm:
-; SI: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
-; SI: v_or_b32_e32 {{v[0-9]+}}, 8, v[[LO_VREG]]
-; SI: v_or_b32_e32 {{v[0-9]+}}, 0, {{.*}}
-; SI: s_endpgm
-define void @vector_or_i64_imm(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
- %loada = load i64, i64 addrspace(1)* %a, align 8
- %or = or i64 %loada, 8
- store i64 %or, i64 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}trunc_i64_or_to_i32:
-; SI: s_load_dword s[[SREG0:[0-9]+]]
-; SI: s_load_dword s[[SREG1:[0-9]+]]
-; SI: s_or_b32 s[[SRESULT:[0-9]+]], s[[SREG1]], s[[SREG0]]
-; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], s[[SRESULT]]
-; SI: buffer_store_dword [[VRESULT]],
-define void @trunc_i64_or_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) {
- %add = or i64 %b, %a
- %trunc = trunc i64 %add to i32
- store i32 %trunc, i32 addrspace(1)* %out, align 8
- ret void
-}
-
-; FUNC-LABEL: {{^}}or_i1:
-; EG: OR_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], PS}}
-
-; SI: s_or_b64 s[{{[0-9]+:[0-9]+}}], vcc, s[{{[0-9]+:[0-9]+}}]
-define void @or_i1(i32 addrspace(1)* %out, float addrspace(1)* %in0, float addrspace(1)* %in1) {
- %a = load float, float addrspace(1)* %in0
- %b = load float, float addrspace(1)* %in1
- %acmp = fcmp oge float %a, 0.000000e+00
- %bcmp = fcmp oge float %b, 0.000000e+00
- %or = or i1 %acmp, %bcmp
- %result = zext i1 %or to i32
- store i32 %result, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}s_or_i1:
-; SI: s_or_b64 s[{{[0-9]+:[0-9]+}}], vcc, s[{{[0-9]+:[0-9]+}}]
-define void @s_or_i1(i1 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d) {
- %cmp0 = icmp eq i32 %a, %b
- %cmp1 = icmp eq i32 %c, %d
- %or = or i1 %cmp0, %cmp1
- store i1 %or, i1 addrspace(1)* %out
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/packetizer.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/packetizer.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/packetizer.ll (original)
+++ llvm/trunk/test/CodeGen/R600/packetizer.ll (removed)
@@ -1,34 +0,0 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s
-
-; CHECK: {{^}}test:
-; CHECK: BIT_ALIGN_INT T{{[0-9]}}.X
-; CHECK: BIT_ALIGN_INT T{{[0-9]}}.Y
-; CHECK: BIT_ALIGN_INT T{{[0-9]}}.Z
-; CHECK: BIT_ALIGN_INT * T{{[0-9]}}.W
-
-define void @test(i32 addrspace(1)* %out, i32 %x_arg, i32 %y_arg, i32 %z_arg, i32 %w_arg, i32 %e) {
-entry:
- %shl = sub i32 32, %e
- %x = add i32 %x_arg, 1
- %x.0 = shl i32 %x, %shl
- %x.1 = lshr i32 %x, %e
- %x.2 = or i32 %x.0, %x.1
- %y = add i32 %y_arg, 1
- %y.0 = shl i32 %y, %shl
- %y.1 = lshr i32 %y, %e
- %y.2 = or i32 %y.0, %y.1
- %z = add i32 %z_arg, 1
- %z.0 = shl i32 %z, %shl
- %z.1 = lshr i32 %z, %e
- %z.2 = or i32 %z.0, %z.1
- %w = add i32 %w_arg, 1
- %w.0 = shl i32 %w, %shl
- %w.1 = lshr i32 %w, %e
- %w.2 = or i32 %w.0, %w.1
- %xy = or i32 %x.2, %y.2
- %zw = or i32 %z.2, %w.2
- %xyzw = or i32 %xy, %zw
- store i32 %xyzw, i32 addrspace(1)* %out
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/parallelandifcollapse.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/parallelandifcollapse.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/parallelandifcollapse.ll (original)
+++ llvm/trunk/test/CodeGen/R600/parallelandifcollapse.ll (removed)
@@ -1,59 +0,0 @@
-; Function Attrs: nounwind
-; RUN: llc -march=r600 -mcpu=redwood -mattr=-promote-alloca < %s | FileCheck %s
-;
-; CFG flattening should use parallel-and mode to generate branch conditions and
-; then merge if-regions with the same bodies.
-;
-; CHECK: AND_INT
-; CHECK-NEXT: AND_INT
-; CHECK-NEXT: OR_INT
-
-; FIXME: For some reason having the allocas here allowed the flatten cfg pass
-; to do its transfomation, however now that we are using local memory for
-; allocas, the transformation isn't happening.
-
-define void @_Z9chk1D_512v() #0 {
-entry:
- %a0 = alloca i32, align 4
- %b0 = alloca i32, align 4
- %c0 = alloca i32, align 4
- %d0 = alloca i32, align 4
- %a1 = alloca i32, align 4
- %b1 = alloca i32, align 4
- %c1 = alloca i32, align 4
- %d1 = alloca i32, align 4
- %data = alloca i32, align 4
- %0 = load i32, i32* %a0, align 4
- %1 = load i32, i32* %b0, align 4
- %cmp = icmp ne i32 %0, %1
- br i1 %cmp, label %land.lhs.true, label %if.end
-
-land.lhs.true: ; preds = %entry
- %2 = load i32, i32* %c0, align 4
- %3 = load i32, i32* %d0, align 4
- %cmp1 = icmp ne i32 %2, %3
- br i1 %cmp1, label %if.then, label %if.end
-
-if.then: ; preds = %land.lhs.true
- store i32 1, i32* %data, align 4
- br label %if.end
-
-if.end: ; preds = %if.then, %land.lhs.true, %entry
- %4 = load i32, i32* %a1, align 4
- %5 = load i32, i32* %b1, align 4
- %cmp2 = icmp ne i32 %4, %5
- br i1 %cmp2, label %land.lhs.true3, label %if.end6
-
-land.lhs.true3: ; preds = %if.end
- %6 = load i32, i32* %c1, align 4
- %7 = load i32, i32* %d1, align 4
- %cmp4 = icmp ne i32 %6, %7
- br i1 %cmp4, label %if.then5, label %if.end6
-
-if.then5: ; preds = %land.lhs.true3
- store i32 1, i32* %data, align 4
- br label %if.end6
-
-if.end6: ; preds = %if.then5, %land.lhs.true3, %if.end
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/parallelorifcollapse.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/parallelorifcollapse.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/parallelorifcollapse.ll (original)
+++ llvm/trunk/test/CodeGen/R600/parallelorifcollapse.ll (removed)
@@ -1,66 +0,0 @@
-; Function Attrs: nounwind
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-;
-; CFG flattening should use parallel-or to generate branch conditions and
-; then merge if-regions with the same bodies.
-
-; FIXME: For some reason having the allocas here allowed the flatten cfg pass
-; to do its transfomation, however now that we are using local memory for
-; allocas, the transformation isn't happening.
-; XFAIL: *
-;
-; CHECK: OR_INT
-; CHECK-NEXT: OR_INT
-; CHECK-NEXT: OR_INT
-define void @_Z9chk1D_512v() #0 {
-entry:
- %a0 = alloca i32, align 4
- %b0 = alloca i32, align 4
- %c0 = alloca i32, align 4
- %d0 = alloca i32, align 4
- %a1 = alloca i32, align 4
- %b1 = alloca i32, align 4
- %c1 = alloca i32, align 4
- %d1 = alloca i32, align 4
- %data = alloca i32, align 4
- %0 = load i32, i32* %a0, align 4
- %1 = load i32, i32* %b0, align 4
- %cmp = icmp ne i32 %0, %1
- br i1 %cmp, label %land.lhs.true, label %if.else
-
-land.lhs.true: ; preds = %entry
- %2 = load i32, i32* %c0, align 4
- %3 = load i32, i32* %d0, align 4
- %cmp1 = icmp ne i32 %2, %3
- br i1 %cmp1, label %if.then, label %if.else
-
-if.then: ; preds = %land.lhs.true
- br label %if.end
-
-if.else: ; preds = %land.lhs.true, %entry
- store i32 1, i32* %data, align 4
- br label %if.end
-
-if.end: ; preds = %if.else, %if.then
- %4 = load i32, i32* %a1, align 4
- %5 = load i32, i32* %b1, align 4
- %cmp2 = icmp ne i32 %4, %5
- br i1 %cmp2, label %land.lhs.true3, label %if.else6
-
-land.lhs.true3: ; preds = %if.end
- %6 = load i32, i32* %c1, align 4
- %7 = load i32, i32* %d1, align 4
- %cmp4 = icmp ne i32 %6, %7
- br i1 %cmp4, label %if.then5, label %if.else6
-
-if.then5: ; preds = %land.lhs.true3
- br label %if.end7
-
-if.else6: ; preds = %land.lhs.true3, %if.end
- store i32 1, i32* %data, align 4
- br label %if.end7
-
-if.end7: ; preds = %if.else6, %if.then5
- ret void
-}
-
Removed: llvm/trunk/test/CodeGen/R600/predicate-dp4.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/predicate-dp4.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/predicate-dp4.ll (original)
+++ llvm/trunk/test/CodeGen/R600/predicate-dp4.ll (removed)
@@ -1,27 +0,0 @@
-;RUN: llc < %s -march=r600 -mcpu=cayman
-
-; CHECK-LABEL: {{^}}main:
-; CHECK: PRED_SETE_INT * Pred,
-; CHECK: DOT4 T{{[0-9]+}}.X, T0.X, T0.X, Pred_sel_one
-define void @main(<4 x float> inreg) #0 {
-main_body:
- %1 = extractelement <4 x float> %0, i32 0
- %2 = bitcast float %1 to i32
- %3 = icmp eq i32 %2, 0
- br i1 %3, label %IF, label %ENDIF
-
-IF: ; preds = %main_body
- %4 = call float @llvm.AMDGPU.dp4(<4 x float> %0, <4 x float> %0)
- br label %ENDIF
-
-ENDIF: ; preds = %IF, %main_body
- %5 = phi float [%4, %IF], [0.000000e+00, %main_body]
- %6 = insertelement <4 x float> undef, float %5, i32 0
- call void @llvm.R600.store.swizzle(<4 x float> %6, i32 0, i32 0)
- ret void
-}
-
-declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
-declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
-attributes #1 = { readnone }
-attributes #0 = { "ShaderType"="0" }
Removed: llvm/trunk/test/CodeGen/R600/predicates.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/predicates.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/predicates.ll (original)
+++ llvm/trunk/test/CodeGen/R600/predicates.ll (removed)
@@ -1,104 +0,0 @@
-; RUN: llc < %s -march=r600 -mattr=disable-irstructurizer -mcpu=redwood | FileCheck %s
-
-; These tests make sure the compiler is optimizing branches using predicates
-; when it is legal to do so.
-
-; CHECK: {{^}}simple_if:
-; CHECK: PRED_SET{{[EGN][ET]*}}_INT * Pred,
-; CHECK: LSHL * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, Pred_sel
-define void @simple_if(i32 addrspace(1)* %out, i32 %in) {
-entry:
- %0 = icmp sgt i32 %in, 0
- br i1 %0, label %IF, label %ENDIF
-
-IF:
- %1 = shl i32 %in, 1
- br label %ENDIF
-
-ENDIF:
- %2 = phi i32 [ %in, %entry ], [ %1, %IF ]
- store i32 %2, i32 addrspace(1)* %out
- ret void
-}
-
-; CHECK: {{^}}simple_if_else:
-; CHECK: PRED_SET{{[EGN][ET]*}}_INT * Pred,
-; CHECK: LSH{{[LR] \* T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, Pred_sel
-; CHECK: LSH{{[LR] \* T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, Pred_sel
-define void @simple_if_else(i32 addrspace(1)* %out, i32 %in) {
-entry:
- %0 = icmp sgt i32 %in, 0
- br i1 %0, label %IF, label %ELSE
-
-IF:
- %1 = shl i32 %in, 1
- br label %ENDIF
-
-ELSE:
- %2 = lshr i32 %in, 1
- br label %ENDIF
-
-ENDIF:
- %3 = phi i32 [ %1, %IF ], [ %2, %ELSE ]
- store i32 %3, i32 addrspace(1)* %out
- ret void
-}
-
-; CHECK: {{^}}nested_if:
-; CHECK: ALU_PUSH_BEFORE
-; CHECK: JUMP
-; CHECK: POP
-; CHECK: PRED_SET{{[EGN][ET]*}}_INT * Exec
-; CHECK: PRED_SET{{[EGN][ET]*}}_INT * Pred,
-; CHECK: LSHL * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, Pred_sel
-define void @nested_if(i32 addrspace(1)* %out, i32 %in) {
-entry:
- %0 = icmp sgt i32 %in, 0
- br i1 %0, label %IF0, label %ENDIF
-
-IF0:
- %1 = add i32 %in, 10
- %2 = icmp sgt i32 %1, 0
- br i1 %2, label %IF1, label %ENDIF
-
-IF1:
- %3 = shl i32 %1, 1
- br label %ENDIF
-
-ENDIF:
- %4 = phi i32 [%in, %entry], [%1, %IF0], [%3, %IF1]
- store i32 %4, i32 addrspace(1)* %out
- ret void
-}
-
-; CHECK: {{^}}nested_if_else:
-; CHECK: ALU_PUSH_BEFORE
-; CHECK: JUMP
-; CHECK: POP
-; CHECK: PRED_SET{{[EGN][ET]*}}_INT * Exec
-; CHECK: PRED_SET{{[EGN][ET]*}}_INT * Pred,
-; CHECK: LSH{{[LR] \* T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, Pred_sel
-; CHECK: LSH{{[LR] \* T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, Pred_sel
-define void @nested_if_else(i32 addrspace(1)* %out, i32 %in) {
-entry:
- %0 = icmp sgt i32 %in, 0
- br i1 %0, label %IF0, label %ENDIF
-
-IF0:
- %1 = add i32 %in, 10
- %2 = icmp sgt i32 %1, 0
- br i1 %2, label %IF1, label %ELSE1
-
-IF1:
- %3 = shl i32 %1, 1
- br label %ENDIF
-
-ELSE1:
- %4 = lshr i32 %in, 1
- br label %ENDIF
-
-ENDIF:
- %5 = phi i32 [%in, %entry], [%3, %IF1], [%4, %ELSE1]
- store i32 %5, i32 addrspace(1)* %out
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/private-memory-atomics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/private-memory-atomics.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/private-memory-atomics.ll (original)
+++ llvm/trunk/test/CodeGen/R600/private-memory-atomics.ll (removed)
@@ -1,32 +0,0 @@
-; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=SI < %s
-; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s
-
-; This works because promote allocas pass replaces these with LDS atomics.
-
-; Private atomics have no real use, but at least shouldn't crash on it.
-define void @atomicrmw_private(i32 addrspace(1)* %out, i32 %in) nounwind {
-entry:
- %tmp = alloca [2 x i32]
- %tmp1 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 0
- %tmp2 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 1
- store i32 0, i32* %tmp1
- store i32 1, i32* %tmp2
- %tmp3 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 %in
- %tmp4 = atomicrmw add i32* %tmp3, i32 7 acq_rel
- store i32 %tmp4, i32 addrspace(1)* %out
- ret void
-}
-
-define void @cmpxchg_private(i32 addrspace(1)* %out, i32 %in) nounwind {
-entry:
- %tmp = alloca [2 x i32]
- %tmp1 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 0
- %tmp2 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 1
- store i32 0, i32* %tmp1
- store i32 1, i32* %tmp2
- %tmp3 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 %in
- %tmp4 = cmpxchg i32* %tmp3, i32 0, i32 1 acq_rel monotonic
- %val = extractvalue { i32, i1 } %tmp4, 0
- store i32 %val, i32 addrspace(1)* %out
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/private-memory-broken.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/private-memory-broken.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/private-memory-broken.ll (original)
+++ llvm/trunk/test/CodeGen/R600/private-memory-broken.ll (removed)
@@ -1,21 +0,0 @@
-; RUN: not llc -verify-machineinstrs -march=amdgcn -mcpu=SI %s -o /dev/null 2>&1 | FileCheck %s
-; RUN: not llc -verify-machineinstrs -march=amdgcn -mcpu=tonga %s -o /dev/null 2>&1 | FileCheck %s
-
-; Make sure promote alloca pass doesn't crash
-
-; CHECK: unsupported call
-
-declare i32 @foo(i32*) nounwind
-
-define void @call_private(i32 addrspace(1)* %out, i32 %in) nounwind {
-entry:
- %tmp = alloca [2 x i32]
- %tmp1 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 0
- %tmp2 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 1
- store i32 0, i32* %tmp1
- store i32 1, i32* %tmp2
- %tmp3 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 %in
- %val = call i32 @foo(i32* %tmp3) nounwind
- store i32 %val, i32 addrspace(1)* %out
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/private-memory.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/private-memory.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/private-memory.ll (original)
+++ llvm/trunk/test/CodeGen/R600/private-memory.ll (removed)
@@ -1,313 +0,0 @@
-; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s -check-prefix=R600 -check-prefix=FUNC
-; RUN: llc -show-mc-encoding -mattr=+promote-alloca -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck %s -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC
-; RUN: llc -show-mc-encoding -mattr=-promote-alloca -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC
-; RUN: llc -show-mc-encoding -mattr=+promote-alloca -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck %s -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC
-; RUN: llc -show-mc-encoding -mattr=-promote-alloca -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC
-
-declare i32 @llvm.r600.read.tidig.x() nounwind readnone
-
-; FUNC-LABEL: {{^}}mova_same_clause:
-
-; R600: LDS_WRITE
-; R600: LDS_WRITE
-; R600: LDS_READ
-; R600: LDS_READ
-
-; SI-PROMOTE: ds_write_b32
-; SI-PROMOTE: ds_write_b32
-; SI-PROMOTE: ds_read_b32
-; SI-PROMOTE: ds_read_b32
-
-; SI-ALLOCA: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; encoding: [0x00,0x10,0x70,0xe0
-; SI-ALLOCA: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; encoding: [0x00,0x10,0x70,0xe0
-define void @mova_same_clause(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) {
-entry:
- %stack = alloca [5 x i32], align 4
- %0 = load i32, i32 addrspace(1)* %in, align 4
- %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %0
- store i32 4, i32* %arrayidx1, align 4
- %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
- %1 = load i32, i32 addrspace(1)* %arrayidx2, align 4
- %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %1
- store i32 5, i32* %arrayidx3, align 4
- %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 0
- %2 = load i32, i32* %arrayidx10, align 4
- store i32 %2, i32 addrspace(1)* %out, align 4
- %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 1
- %3 = load i32, i32* %arrayidx12
- %arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1
- store i32 %3, i32 addrspace(1)* %arrayidx13
- ret void
-}
-
-; This test checks that the stack offset is calculated correctly for structs.
-; All register loads/stores should be optimized away, so there shouldn't be
-; any MOVA instructions.
-;
-; XXX: This generated code has unnecessary MOVs, we should be able to optimize
-; this.
-
-; FUNC-LABEL: {{^}}multiple_structs:
-; R600-NOT: MOVA_INT
-; SI-NOT: v_movrel
-; SI-NOT: v_movrel
-%struct.point = type { i32, i32 }
-
-define void @multiple_structs(i32 addrspace(1)* %out) {
-entry:
- %a = alloca %struct.point
- %b = alloca %struct.point
- %a.x.ptr = getelementptr %struct.point, %struct.point* %a, i32 0, i32 0
- %a.y.ptr = getelementptr %struct.point, %struct.point* %a, i32 0, i32 1
- %b.x.ptr = getelementptr %struct.point, %struct.point* %b, i32 0, i32 0
- %b.y.ptr = getelementptr %struct.point, %struct.point* %b, i32 0, i32 1
- store i32 0, i32* %a.x.ptr
- store i32 1, i32* %a.y.ptr
- store i32 2, i32* %b.x.ptr
- store i32 3, i32* %b.y.ptr
- %a.indirect.ptr = getelementptr %struct.point, %struct.point* %a, i32 0, i32 0
- %b.indirect.ptr = getelementptr %struct.point, %struct.point* %b, i32 0, i32 0
- %a.indirect = load i32, i32* %a.indirect.ptr
- %b.indirect = load i32, i32* %b.indirect.ptr
- %0 = add i32 %a.indirect, %b.indirect
- store i32 %0, i32 addrspace(1)* %out
- ret void
-}
-
-; Test direct access of a private array inside a loop. The private array
-; loads and stores should be lowered to copies, so there shouldn't be any
-; MOVA instructions.
-
-; FUNC-LABEL: {{^}}direct_loop:
-; R600-NOT: MOVA_INT
-; SI-NOT: v_movrel
-
-define void @direct_loop(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
-entry:
- %prv_array_const = alloca [2 x i32]
- %prv_array = alloca [2 x i32]
- %a = load i32, i32 addrspace(1)* %in
- %b_src_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
- %b = load i32, i32 addrspace(1)* %b_src_ptr
- %a_dst_ptr = getelementptr [2 x i32], [2 x i32]* %prv_array_const, i32 0, i32 0
- store i32 %a, i32* %a_dst_ptr
- %b_dst_ptr = getelementptr [2 x i32], [2 x i32]* %prv_array_const, i32 0, i32 1
- store i32 %b, i32* %b_dst_ptr
- br label %for.body
-
-for.body:
- %inc = phi i32 [0, %entry], [%count, %for.body]
- %x_ptr = getelementptr [2 x i32], [2 x i32]* %prv_array_const, i32 0, i32 0
- %x = load i32, i32* %x_ptr
- %y_ptr = getelementptr [2 x i32], [2 x i32]* %prv_array, i32 0, i32 0
- %y = load i32, i32* %y_ptr
- %xy = add i32 %x, %y
- store i32 %xy, i32* %y_ptr
- %count = add i32 %inc, 1
- %done = icmp eq i32 %count, 4095
- br i1 %done, label %for.end, label %for.body
-
-for.end:
- %value_ptr = getelementptr [2 x i32], [2 x i32]* %prv_array, i32 0, i32 0
- %value = load i32, i32* %value_ptr
- store i32 %value, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}short_array:
-
-; R600: MOVA_INT
-
-; SI-PROMOTE-DAG: buffer_store_short v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; encoding: [0x00,0x10,0x68,0xe0
-; SI-PROMOTE-DAG: buffer_store_short v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen offset:2 ; encoding: [0x02,0x10,0x68,0xe0
-; SI-PROMOTE: buffer_load_sshort v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}}
-define void @short_array(i32 addrspace(1)* %out, i32 %index) {
-entry:
- %0 = alloca [2 x i16]
- %1 = getelementptr [2 x i16], [2 x i16]* %0, i32 0, i32 0
- %2 = getelementptr [2 x i16], [2 x i16]* %0, i32 0, i32 1
- store i16 0, i16* %1
- store i16 1, i16* %2
- %3 = getelementptr [2 x i16], [2 x i16]* %0, i32 0, i32 %index
- %4 = load i16, i16* %3
- %5 = sext i16 %4 to i32
- store i32 %5, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}char_array:
-
-; R600: MOVA_INT
-
-; SI-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; encoding: [0x00,0x10,0x60,0xe0
-; SI-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen offset:1 ; encoding: [0x01,0x10,0x60,0xe0
-define void @char_array(i32 addrspace(1)* %out, i32 %index) {
-entry:
- %0 = alloca [2 x i8]
- %1 = getelementptr [2 x i8], [2 x i8]* %0, i32 0, i32 0
- %2 = getelementptr [2 x i8], [2 x i8]* %0, i32 0, i32 1
- store i8 0, i8* %1
- store i8 1, i8* %2
- %3 = getelementptr [2 x i8], [2 x i8]* %0, i32 0, i32 %index
- %4 = load i8, i8* %3
- %5 = sext i8 %4 to i32
- store i32 %5, i32 addrspace(1)* %out
- ret void
-
-}
-
-; Make sure we don't overwrite workitem information with private memory
-
-; FUNC-LABEL: {{^}}work_item_info:
-; R600-NOT: MOV T0.X
-; Additional check in case the move ends up in the last slot
-; R600-NOT: MOV * TO.X
-
-; SI-NOT: v_mov_b32_e{{(32|64)}} v0
-define void @work_item_info(i32 addrspace(1)* %out, i32 %in) {
-entry:
- %0 = alloca [2 x i32]
- %1 = getelementptr [2 x i32], [2 x i32]* %0, i32 0, i32 0
- %2 = getelementptr [2 x i32], [2 x i32]* %0, i32 0, i32 1
- store i32 0, i32* %1
- store i32 1, i32* %2
- %3 = getelementptr [2 x i32], [2 x i32]* %0, i32 0, i32 %in
- %4 = load i32, i32* %3
- %5 = call i32 @llvm.r600.read.tidig.x()
- %6 = add i32 %4, %5
- store i32 %6, i32 addrspace(1)* %out
- ret void
-}
-
-; Test that two stack objects are not stored in the same register
-; The second stack object should be in T3.X
-; FUNC-LABEL: {{^}}no_overlap:
-; R600_CHECK: MOV
-; R600_CHECK: [[CHAN:[XYZW]]]+
-; R600-NOT: [[CHAN]]+
-; SI: v_mov_b32_e32 v3
-define void @no_overlap(i32 addrspace(1)* %out, i32 %in) {
-entry:
- %0 = alloca [3 x i8], align 1
- %1 = alloca [2 x i8], align 1
- %2 = getelementptr [3 x i8], [3 x i8]* %0, i32 0, i32 0
- %3 = getelementptr [3 x i8], [3 x i8]* %0, i32 0, i32 1
- %4 = getelementptr [3 x i8], [3 x i8]* %0, i32 0, i32 2
- %5 = getelementptr [2 x i8], [2 x i8]* %1, i32 0, i32 0
- %6 = getelementptr [2 x i8], [2 x i8]* %1, i32 0, i32 1
- store i8 0, i8* %2
- store i8 1, i8* %3
- store i8 2, i8* %4
- store i8 1, i8* %5
- store i8 0, i8* %6
- %7 = getelementptr [3 x i8], [3 x i8]* %0, i32 0, i32 %in
- %8 = getelementptr [2 x i8], [2 x i8]* %1, i32 0, i32 %in
- %9 = load i8, i8* %7
- %10 = load i8, i8* %8
- %11 = add i8 %9, %10
- %12 = sext i8 %11 to i32
- store i32 %12, i32 addrspace(1)* %out
- ret void
-}
-
-define void @char_array_array(i32 addrspace(1)* %out, i32 %index) {
-entry:
- %alloca = alloca [2 x [2 x i8]]
- %gep0 = getelementptr [2 x [2 x i8]], [2 x [2 x i8]]* %alloca, i32 0, i32 0, i32 0
- %gep1 = getelementptr [2 x [2 x i8]], [2 x [2 x i8]]* %alloca, i32 0, i32 0, i32 1
- store i8 0, i8* %gep0
- store i8 1, i8* %gep1
- %gep2 = getelementptr [2 x [2 x i8]], [2 x [2 x i8]]* %alloca, i32 0, i32 0, i32 %index
- %load = load i8, i8* %gep2
- %sext = sext i8 %load to i32
- store i32 %sext, i32 addrspace(1)* %out
- ret void
-}
-
-define void @i32_array_array(i32 addrspace(1)* %out, i32 %index) {
-entry:
- %alloca = alloca [2 x [2 x i32]]
- %gep0 = getelementptr [2 x [2 x i32]], [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 0
- %gep1 = getelementptr [2 x [2 x i32]], [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 1
- store i32 0, i32* %gep0
- store i32 1, i32* %gep1
- %gep2 = getelementptr [2 x [2 x i32]], [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 %index
- %load = load i32, i32* %gep2
- store i32 %load, i32 addrspace(1)* %out
- ret void
-}
-
-define void @i64_array_array(i64 addrspace(1)* %out, i32 %index) {
-entry:
- %alloca = alloca [2 x [2 x i64]]
- %gep0 = getelementptr [2 x [2 x i64]], [2 x [2 x i64]]* %alloca, i32 0, i32 0, i32 0
- %gep1 = getelementptr [2 x [2 x i64]], [2 x [2 x i64]]* %alloca, i32 0, i32 0, i32 1
- store i64 0, i64* %gep0
- store i64 1, i64* %gep1
- %gep2 = getelementptr [2 x [2 x i64]], [2 x [2 x i64]]* %alloca, i32 0, i32 0, i32 %index
- %load = load i64, i64* %gep2
- store i64 %load, i64 addrspace(1)* %out
- ret void
-}
-
-%struct.pair32 = type { i32, i32 }
-
-define void @struct_array_array(i32 addrspace(1)* %out, i32 %index) {
-entry:
- %alloca = alloca [2 x [2 x %struct.pair32]]
- %gep0 = getelementptr [2 x [2 x %struct.pair32]], [2 x [2 x %struct.pair32]]* %alloca, i32 0, i32 0, i32 0, i32 1
- %gep1 = getelementptr [2 x [2 x %struct.pair32]], [2 x [2 x %struct.pair32]]* %alloca, i32 0, i32 0, i32 1, i32 1
- store i32 0, i32* %gep0
- store i32 1, i32* %gep1
- %gep2 = getelementptr [2 x [2 x %struct.pair32]], [2 x [2 x %struct.pair32]]* %alloca, i32 0, i32 0, i32 %index, i32 0
- %load = load i32, i32* %gep2
- store i32 %load, i32 addrspace(1)* %out
- ret void
-}
-
-define void @struct_pair32_array(i32 addrspace(1)* %out, i32 %index) {
-entry:
- %alloca = alloca [2 x %struct.pair32]
- %gep0 = getelementptr [2 x %struct.pair32], [2 x %struct.pair32]* %alloca, i32 0, i32 0, i32 1
- %gep1 = getelementptr [2 x %struct.pair32], [2 x %struct.pair32]* %alloca, i32 0, i32 1, i32 0
- store i32 0, i32* %gep0
- store i32 1, i32* %gep1
- %gep2 = getelementptr [2 x %struct.pair32], [2 x %struct.pair32]* %alloca, i32 0, i32 %index, i32 0
- %load = load i32, i32* %gep2
- store i32 %load, i32 addrspace(1)* %out
- ret void
-}
-
-define void @select_private(i32 addrspace(1)* %out, i32 %in) nounwind {
-entry:
- %tmp = alloca [2 x i32]
- %tmp1 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 0
- %tmp2 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 1
- store i32 0, i32* %tmp1
- store i32 1, i32* %tmp2
- %cmp = icmp eq i32 %in, 0
- %sel = select i1 %cmp, i32* %tmp1, i32* %tmp2
- %load = load i32, i32* %sel
- store i32 %load, i32 addrspace(1)* %out
- ret void
-}
-
-; AMDGPUPromoteAlloca does not know how to handle ptrtoint. When it
-; finds one, it should stop trying to promote.
-
-; FUNC-LABEL: ptrtoint:
-; SI-NOT: ds_write
-; SI: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen
-; SI: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen offset:5
-define void @ptrtoint(i32 addrspace(1)* %out, i32 %a, i32 %b) {
- %alloca = alloca [16 x i32]
- %tmp0 = getelementptr [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a
- store i32 5, i32* %tmp0
- %tmp1 = ptrtoint [16 x i32]* %alloca to i32
- %tmp2 = add i32 %tmp1, 5
- %tmp3 = inttoptr i32 %tmp2 to i32*
- %tmp4 = getelementptr i32, i32* %tmp3, i32 %b
- %tmp5 = load i32, i32* %tmp4
- store i32 %tmp5, i32 addrspace(1)* %out
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/pv-packing.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/pv-packing.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/pv-packing.ll (original)
+++ llvm/trunk/test/CodeGen/R600/pv-packing.ll (removed)
@@ -1,45 +0,0 @@
-; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s
-
-;CHECK: DOT4 T{{[0-9]\.X}}
-;CHECK: MULADD_IEEE * T{{[0-9]\.W}}
-
-define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2, <4 x float> inreg %reg3) #0 {
-main_body:
- %0 = extractelement <4 x float> %reg1, i32 0
- %1 = extractelement <4 x float> %reg1, i32 1
- %2 = extractelement <4 x float> %reg1, i32 2
- %3 = extractelement <4 x float> %reg2, i32 0
- %4 = extractelement <4 x float> %reg2, i32 1
- %5 = extractelement <4 x float> %reg2, i32 2
- %6 = extractelement <4 x float> %reg3, i32 0
- %7 = extractelement <4 x float> %reg3, i32 1
- %8 = extractelement <4 x float> %reg3, i32 2
- %9 = load <4 x float>, <4 x float> addrspace(8)* null
- %10 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
- %11 = call float @llvm.AMDGPU.dp4(<4 x float> %9, <4 x float> %9)
- %12 = fmul float %0, %3
- %13 = fadd float %12, %6
- %14 = fmul float %1, %4
- %15 = fadd float %14, %7
- %16 = fmul float %2, %5
- %17 = fadd float %16, %8
- %18 = fmul float %11, %11
- %19 = fadd float %18, %0
- %20 = insertelement <4 x float> undef, float %13, i32 0
- %21 = insertelement <4 x float> %20, float %15, i32 1
- %22 = insertelement <4 x float> %21, float %17, i32 2
- %23 = insertelement <4 x float> %22, float %19, i32 3
- %24 = call float @llvm.AMDGPU.dp4(<4 x float> %23, <4 x float> %10)
- %25 = insertelement <4 x float> undef, float %24, i32 0
- call void @llvm.R600.store.swizzle(<4 x float> %25, i32 0, i32 2)
- ret void
-}
-
-; Function Attrs: readnone
-declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
-
-
-declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
-
-attributes #0 = { "ShaderType"="1" }
-attributes #1 = { readnone }
Removed: llvm/trunk/test/CodeGen/R600/pv.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/pv.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/pv.ll (original)
+++ llvm/trunk/test/CodeGen/R600/pv.ll (removed)
@@ -1,241 +0,0 @@
-; RUN: llc < %s -march=r600 | FileCheck %s
-
-; CHECK: DOT4 * T{{[0-9]\.W}} (MASKED)
-; CHECK: MAX T{{[0-9].[XYZW]}}, 0.0, PV.X
-
-define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2, <4 x float> inreg %reg3, <4 x float> inreg %reg4, <4 x float> inreg %reg5, <4 x float> inreg %reg6, <4 x float> inreg %reg7) #0 {
-main_body:
- %0 = extractelement <4 x float> %reg1, i32 0
- %1 = extractelement <4 x float> %reg1, i32 1
- %2 = extractelement <4 x float> %reg1, i32 2
- %3 = extractelement <4 x float> %reg1, i32 3
- %4 = extractelement <4 x float> %reg2, i32 0
- %5 = extractelement <4 x float> %reg2, i32 1
- %6 = extractelement <4 x float> %reg2, i32 2
- %7 = extractelement <4 x float> %reg2, i32 3
- %8 = extractelement <4 x float> %reg3, i32 0
- %9 = extractelement <4 x float> %reg3, i32 1
- %10 = extractelement <4 x float> %reg3, i32 2
- %11 = extractelement <4 x float> %reg3, i32 3
- %12 = extractelement <4 x float> %reg4, i32 0
- %13 = extractelement <4 x float> %reg4, i32 1
- %14 = extractelement <4 x float> %reg4, i32 2
- %15 = extractelement <4 x float> %reg4, i32 3
- %16 = extractelement <4 x float> %reg5, i32 0
- %17 = extractelement <4 x float> %reg5, i32 1
- %18 = extractelement <4 x float> %reg5, i32 2
- %19 = extractelement <4 x float> %reg5, i32 3
- %20 = extractelement <4 x float> %reg6, i32 0
- %21 = extractelement <4 x float> %reg6, i32 1
- %22 = extractelement <4 x float> %reg6, i32 2
- %23 = extractelement <4 x float> %reg6, i32 3
- %24 = extractelement <4 x float> %reg7, i32 0
- %25 = extractelement <4 x float> %reg7, i32 1
- %26 = extractelement <4 x float> %reg7, i32 2
- %27 = extractelement <4 x float> %reg7, i32 3
- %28 = load <4 x float>, <4 x float> addrspace(8)* null
- %29 = extractelement <4 x float> %28, i32 0
- %30 = fmul float %0, %29
- %31 = load <4 x float>, <4 x float> addrspace(8)* null
- %32 = extractelement <4 x float> %31, i32 1
- %33 = fmul float %0, %32
- %34 = load <4 x float>, <4 x float> addrspace(8)* null
- %35 = extractelement <4 x float> %34, i32 2
- %36 = fmul float %0, %35
- %37 = load <4 x float>, <4 x float> addrspace(8)* null
- %38 = extractelement <4 x float> %37, i32 3
- %39 = fmul float %0, %38
- %40 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
- %41 = extractelement <4 x float> %40, i32 0
- %42 = fmul float %1, %41
- %43 = fadd float %42, %30
- %44 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
- %45 = extractelement <4 x float> %44, i32 1
- %46 = fmul float %1, %45
- %47 = fadd float %46, %33
- %48 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
- %49 = extractelement <4 x float> %48, i32 2
- %50 = fmul float %1, %49
- %51 = fadd float %50, %36
- %52 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
- %53 = extractelement <4 x float> %52, i32 3
- %54 = fmul float %1, %53
- %55 = fadd float %54, %39
- %56 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
- %57 = extractelement <4 x float> %56, i32 0
- %58 = fmul float %2, %57
- %59 = fadd float %58, %43
- %60 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
- %61 = extractelement <4 x float> %60, i32 1
- %62 = fmul float %2, %61
- %63 = fadd float %62, %47
- %64 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
- %65 = extractelement <4 x float> %64, i32 2
- %66 = fmul float %2, %65
- %67 = fadd float %66, %51
- %68 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
- %69 = extractelement <4 x float> %68, i32 3
- %70 = fmul float %2, %69
- %71 = fadd float %70, %55
- %72 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
- %73 = extractelement <4 x float> %72, i32 0
- %74 = fmul float %3, %73
- %75 = fadd float %74, %59
- %76 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
- %77 = extractelement <4 x float> %76, i32 1
- %78 = fmul float %3, %77
- %79 = fadd float %78, %63
- %80 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
- %81 = extractelement <4 x float> %80, i32 2
- %82 = fmul float %3, %81
- %83 = fadd float %82, %67
- %84 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
- %85 = extractelement <4 x float> %84, i32 3
- %86 = fmul float %3, %85
- %87 = fadd float %86, %71
- %88 = insertelement <4 x float> undef, float %4, i32 0
- %89 = insertelement <4 x float> %88, float %5, i32 1
- %90 = insertelement <4 x float> %89, float %6, i32 2
- %91 = insertelement <4 x float> %90, float 0.000000e+00, i32 3
- %92 = insertelement <4 x float> undef, float %4, i32 0
- %93 = insertelement <4 x float> %92, float %5, i32 1
- %94 = insertelement <4 x float> %93, float %6, i32 2
- %95 = insertelement <4 x float> %94, float 0.000000e+00, i32 3
- %96 = call float @llvm.AMDGPU.dp4(<4 x float> %91, <4 x float> %95)
- %97 = call float @fabs(float %96)
- %98 = call float @llvm.AMDGPU.rsq.f32(float %97)
- %99 = fmul float %4, %98
- %100 = fmul float %5, %98
- %101 = fmul float %6, %98
- %102 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
- %103 = extractelement <4 x float> %102, i32 0
- %104 = fmul float %103, %8
- %105 = fadd float %104, %20
- %106 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
- %107 = extractelement <4 x float> %106, i32 1
- %108 = fmul float %107, %9
- %109 = fadd float %108, %21
- %110 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
- %111 = extractelement <4 x float> %110, i32 2
- %112 = fmul float %111, %10
- %113 = fadd float %112, %22
- %114 = call float @llvm.AMDIL.clamp.(float %105, float 0.000000e+00, float 1.000000e+00)
- %115 = call float @llvm.AMDIL.clamp.(float %109, float 0.000000e+00, float 1.000000e+00)
- %116 = call float @llvm.AMDIL.clamp.(float %113, float 0.000000e+00, float 1.000000e+00)
- %117 = call float @llvm.AMDIL.clamp.(float %15, float 0.000000e+00, float 1.000000e+00)
- %118 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
- %119 = extractelement <4 x float> %118, i32 0
- %120 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
- %121 = extractelement <4 x float> %120, i32 1
- %122 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
- %123 = extractelement <4 x float> %122, i32 2
- %124 = insertelement <4 x float> undef, float %99, i32 0
- %125 = insertelement <4 x float> %124, float %100, i32 1
- %126 = insertelement <4 x float> %125, float %101, i32 2
- %127 = insertelement <4 x float> %126, float 0.000000e+00, i32 3
- %128 = insertelement <4 x float> undef, float %119, i32 0
- %129 = insertelement <4 x float> %128, float %121, i32 1
- %130 = insertelement <4 x float> %129, float %123, i32 2
- %131 = insertelement <4 x float> %130, float 0.000000e+00, i32 3
- %132 = call float @llvm.AMDGPU.dp4(<4 x float> %127, <4 x float> %131)
- %133 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
- %134 = extractelement <4 x float> %133, i32 0
- %135 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
- %136 = extractelement <4 x float> %135, i32 1
- %137 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
- %138 = extractelement <4 x float> %137, i32 2
- %139 = insertelement <4 x float> undef, float %99, i32 0
- %140 = insertelement <4 x float> %139, float %100, i32 1
- %141 = insertelement <4 x float> %140, float %101, i32 2
- %142 = insertelement <4 x float> %141, float 0.000000e+00, i32 3
- %143 = insertelement <4 x float> undef, float %134, i32 0
- %144 = insertelement <4 x float> %143, float %136, i32 1
- %145 = insertelement <4 x float> %144, float %138, i32 2
- %146 = insertelement <4 x float> %145, float 0.000000e+00, i32 3
- %147 = call float @llvm.AMDGPU.dp4(<4 x float> %142, <4 x float> %146)
- %148 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
- %149 = extractelement <4 x float> %148, i32 0
- %150 = fmul float %149, %8
- %151 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
- %152 = extractelement <4 x float> %151, i32 1
- %153 = fmul float %152, %9
- %154 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
- %155 = extractelement <4 x float> %154, i32 2
- %156 = fmul float %155, %10
- %157 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
- %158 = extractelement <4 x float> %157, i32 0
- %159 = fmul float %158, %12
- %160 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
- %161 = extractelement <4 x float> %160, i32 1
- %162 = fmul float %161, %13
- %163 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
- %164 = extractelement <4 x float> %163, i32 2
- %165 = fmul float %164, %14
- %166 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
- %167 = extractelement <4 x float> %166, i32 0
- %168 = fmul float %167, %16
- %169 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
- %170 = extractelement <4 x float> %169, i32 1
- %171 = fmul float %170, %17
- %172 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
- %173 = extractelement <4 x float> %172, i32 2
- %174 = fmul float %173, %18
- %175 = fcmp uge float %132, 0.000000e+00
- %176 = select i1 %175, float %132, float 0.000000e+00
- %177 = fcmp uge float %147, 0.000000e+00
- %178 = select i1 %177, float %147, float 0.000000e+00
- %179 = call float @llvm.pow.f32(float %178, float %24)
- %180 = fcmp ult float %132, 0.000000e+00
- %181 = select i1 %180, float 0.000000e+00, float %179
- %182 = fadd float %150, %105
- %183 = fadd float %153, %109
- %184 = fadd float %156, %113
- %185 = fmul float %176, %159
- %186 = fadd float %185, %182
- %187 = fmul float %176, %162
- %188 = fadd float %187, %183
- %189 = fmul float %176, %165
- %190 = fadd float %189, %184
- %191 = fmul float %181, %168
- %192 = fadd float %191, %186
- %193 = fmul float %181, %171
- %194 = fadd float %193, %188
- %195 = fmul float %181, %174
- %196 = fadd float %195, %190
- %197 = call float @llvm.AMDIL.clamp.(float %192, float 0.000000e+00, float 1.000000e+00)
- %198 = call float @llvm.AMDIL.clamp.(float %194, float 0.000000e+00, float 1.000000e+00)
- %199 = call float @llvm.AMDIL.clamp.(float %196, float 0.000000e+00, float 1.000000e+00)
- %200 = insertelement <4 x float> undef, float %75, i32 0
- %201 = insertelement <4 x float> %200, float %79, i32 1
- %202 = insertelement <4 x float> %201, float %83, i32 2
- %203 = insertelement <4 x float> %202, float %87, i32 3
- call void @llvm.R600.store.swizzle(<4 x float> %203, i32 60, i32 1)
- %204 = insertelement <4 x float> undef, float %197, i32 0
- %205 = insertelement <4 x float> %204, float %198, i32 1
- %206 = insertelement <4 x float> %205, float %199, i32 2
- %207 = insertelement <4 x float> %206, float %117, i32 3
- call void @llvm.R600.store.swizzle(<4 x float> %207, i32 0, i32 2)
- ret void
-}
-
-; Function Attrs: readnone
-declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
-
-; Function Attrs: readonly
-declare float @fabs(float) #2
-
-; Function Attrs: readnone
-declare float @llvm.AMDGPU.rsq.f32(float) #1
-
-; Function Attrs: readnone
-declare float @llvm.AMDIL.clamp.(float, float, float) #1
-
-; Function Attrs: nounwind readonly
-declare float @llvm.pow.f32(float, float) #3
-
-declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
-
-attributes #0 = { "ShaderType"="1" }
-attributes #1 = { readnone }
-attributes #2 = { readonly }
-attributes #3 = { nounwind readonly }
Removed: llvm/trunk/test/CodeGen/R600/r600-encoding.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/r600-encoding.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/r600-encoding.ll (original)
+++ llvm/trunk/test/CodeGen/R600/r600-encoding.ll (removed)
@@ -1,25 +0,0 @@
-; RUN: llc < %s -march=r600 -show-mc-encoding -mcpu=redwood | FileCheck --check-prefix=EG %s
-; RUN: llc < %s -march=r600 -show-mc-encoding -mcpu=rs880 | FileCheck --check-prefix=R600 %s
-
-; The earliest R600 GPUs have a slightly different encoding than the rest of
-; the VLIW4/5 GPUs.
-
-; EG: {{^}}test:
-; EG: MUL_IEEE {{[ *TXYZWPVxyzw.,0-9]+}} ; encoding: [{{0x[0-9a-f]+,0x[0-9a-f]+,0x[0-9a-f]+,0x[0-9a-f]+,0x10,0x01,0x[0-9a-f]+,0x[0-9a-f]+}}]
-
-; R600: {{^}}test:
-; R600: MUL_IEEE {{[ *TXYZWPVxyzw.,0-9]+}} ; encoding: [{{0x[0-9a-f]+,0x[0-9a-f]+,0x[0-9a-f]+,0x[0-9a-f]+,0x10,0x02,0x[0-9a-f]+,0x[0-9a-f]+}}]
-
-define void @test(<4 x float> inreg %reg0) #0 {
-entry:
- %r0 = extractelement <4 x float> %reg0, i32 0
- %r1 = extractelement <4 x float> %reg0, i32 1
- %r2 = fmul float %r0, %r1
- %vec = insertelement <4 x float> undef, float %r2, i32 0
- call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
- ret void
-}
-
-declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
-
-attributes #0 = { "ShaderType"="0" }
Removed: llvm/trunk/test/CodeGen/R600/r600-export-fix.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/r600-export-fix.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/r600-export-fix.ll (original)
+++ llvm/trunk/test/CodeGen/R600/r600-export-fix.ll (removed)
@@ -1,142 +0,0 @@
-; RUN: llc < %s -march=r600 -mcpu=cedar | FileCheck %s
-
-;CHECK: EXPORT T{{[0-9]}}.XYZW
-;CHECK: EXPORT T{{[0-9]}}.0000
-;CHECK: EXPORT T{{[0-9]}}.0000
-;CHECK: EXPORT T{{[0-9]}}.0XYZ
-;CHECK: EXPORT T{{[0-9]}}.XYZW
-;CHECK: EXPORT T{{[0-9]}}.YZ00
-;CHECK: EXPORT T{{[0-9]}}.0000
-;CHECK: EXPORT T{{[0-9]}}.0000
-
-
-define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 {
-main_body:
- %0 = extractelement <4 x float> %reg1, i32 0
- %1 = extractelement <4 x float> %reg1, i32 1
- %2 = extractelement <4 x float> %reg1, i32 2
- %3 = extractelement <4 x float> %reg1, i32 3
- %4 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
- %5 = extractelement <4 x float> %4, i32 0
- %6 = fmul float %5, %0
- %7 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
- %8 = extractelement <4 x float> %7, i32 1
- %9 = fmul float %8, %0
- %10 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
- %11 = extractelement <4 x float> %10, i32 2
- %12 = fmul float %11, %0
- %13 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
- %14 = extractelement <4 x float> %13, i32 3
- %15 = fmul float %14, %0
- %16 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
- %17 = extractelement <4 x float> %16, i32 0
- %18 = fmul float %17, %1
- %19 = fadd float %18, %6
- %20 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
- %21 = extractelement <4 x float> %20, i32 1
- %22 = fmul float %21, %1
- %23 = fadd float %22, %9
- %24 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
- %25 = extractelement <4 x float> %24, i32 2
- %26 = fmul float %25, %1
- %27 = fadd float %26, %12
- %28 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
- %29 = extractelement <4 x float> %28, i32 3
- %30 = fmul float %29, %1
- %31 = fadd float %30, %15
- %32 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
- %33 = extractelement <4 x float> %32, i32 0
- %34 = fmul float %33, %2
- %35 = fadd float %34, %19
- %36 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
- %37 = extractelement <4 x float> %36, i32 1
- %38 = fmul float %37, %2
- %39 = fadd float %38, %23
- %40 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
- %41 = extractelement <4 x float> %40, i32 2
- %42 = fmul float %41, %2
- %43 = fadd float %42, %27
- %44 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
- %45 = extractelement <4 x float> %44, i32 3
- %46 = fmul float %45, %2
- %47 = fadd float %46, %31
- %48 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
- %49 = extractelement <4 x float> %48, i32 0
- %50 = fmul float %49, %3
- %51 = fadd float %50, %35
- %52 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
- %53 = extractelement <4 x float> %52, i32 1
- %54 = fmul float %53, %3
- %55 = fadd float %54, %39
- %56 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
- %57 = extractelement <4 x float> %56, i32 2
- %58 = fmul float %57, %3
- %59 = fadd float %58, %43
- %60 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
- %61 = extractelement <4 x float> %60, i32 3
- %62 = fmul float %61, %3
- %63 = fadd float %62, %47
- %64 = load <4 x float>, <4 x float> addrspace(8)* null
- %65 = extractelement <4 x float> %64, i32 0
- %66 = load <4 x float>, <4 x float> addrspace(8)* null
- %67 = extractelement <4 x float> %66, i32 1
- %68 = load <4 x float>, <4 x float> addrspace(8)* null
- %69 = extractelement <4 x float> %68, i32 2
- %70 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
- %71 = extractelement <4 x float> %70, i32 0
- %72 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
- %73 = extractelement <4 x float> %72, i32 1
- %74 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
- %75 = extractelement <4 x float> %74, i32 2
- %76 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
- %77 = extractelement <4 x float> %76, i32 0
- %78 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
- %79 = extractelement <4 x float> %78, i32 1
- %80 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
- %81 = extractelement <4 x float> %80, i32 2
- %82 = insertelement <4 x float> undef, float %51, i32 0
- %83 = insertelement <4 x float> %82, float %55, i32 1
- %84 = insertelement <4 x float> %83, float %59, i32 2
- %85 = insertelement <4 x float> %84, float %63, i32 3
- call void @llvm.R600.store.swizzle(<4 x float> %85, i32 60, i32 1)
- %86 = insertelement <4 x float> undef, float 0.000000e+00, i32 0
- %87 = insertelement <4 x float> %86, float 0.000000e+00, i32 1
- %88 = insertelement <4 x float> %87, float 0.000000e+00, i32 2
- %89 = insertelement <4 x float> %88, float 0.000000e+00, i32 3
- call void @llvm.R600.store.swizzle(<4 x float> %89, i32 0, i32 2)
- %90 = insertelement <4 x float> undef, float 0.000000e+00, i32 0
- %91 = insertelement <4 x float> %90, float 0.000000e+00, i32 1
- %92 = insertelement <4 x float> %91, float 0.000000e+00, i32 2
- %93 = insertelement <4 x float> %92, float 0.000000e+00, i32 3
- call void @llvm.R600.store.swizzle(<4 x float> %93, i32 1, i32 2)
- %94 = insertelement <4 x float> undef, float 0.000000e+00, i32 0
- %95 = insertelement <4 x float> %94, float %65, i32 1
- %96 = insertelement <4 x float> %95, float %67, i32 2
- %97 = insertelement <4 x float> %96, float %69, i32 3
- call void @llvm.R600.store.swizzle(<4 x float> %97, i32 2, i32 2)
- %98 = insertelement <4 x float> undef, float %77, i32 0
- %99 = insertelement <4 x float> %98, float %79, i32 1
- %100 = insertelement <4 x float> %99, float %81, i32 2
- %101 = insertelement <4 x float> %100, float %71, i32 3
- call void @llvm.R600.store.swizzle(<4 x float> %101, i32 3, i32 2)
- %102 = insertelement <4 x float> undef, float %73, i32 0
- %103 = insertelement <4 x float> %102, float %75, i32 1
- %104 = insertelement <4 x float> %103, float 0.000000e+00, i32 2
- %105 = insertelement <4 x float> %104, float 0.000000e+00, i32 3
- call void @llvm.R600.store.swizzle(<4 x float> %105, i32 4, i32 2)
- %106 = insertelement <4 x float> undef, float 0.000000e+00, i32 0
- %107 = insertelement <4 x float> %106, float 0.000000e+00, i32 1
- %108 = insertelement <4 x float> %107, float 0.000000e+00, i32 2
- %109 = insertelement <4 x float> %108, float 0.000000e+00, i32 3
- call void @llvm.R600.store.swizzle(<4 x float> %109, i32 5, i32 2)
- %110 = insertelement <4 x float> undef, float 0.000000e+00, i32 0
- %111 = insertelement <4 x float> %110, float 0.000000e+00, i32 1
- %112 = insertelement <4 x float> %111, float 0.000000e+00, i32 2
- %113 = insertelement <4 x float> %112, float 0.000000e+00, i32 3
- call void @llvm.R600.store.swizzle(<4 x float> %113, i32 6, i32 2)
- ret void
-}
-
-declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
-
-attributes #0 = { "ShaderType"="1" }
Removed: llvm/trunk/test/CodeGen/R600/r600-infinite-loop-bug-while-reorganizing-vector.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/r600-infinite-loop-bug-while-reorganizing-vector.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/r600-infinite-loop-bug-while-reorganizing-vector.ll (original)
+++ llvm/trunk/test/CodeGen/R600/r600-infinite-loop-bug-while-reorganizing-vector.ll (removed)
@@ -1,58 +0,0 @@
-;RUN: llc < %s -march=r600 -mcpu=cayman
-
-define void @main(<4 x float> inreg, <4 x float> inreg) #0 {
-main_body:
- %2 = extractelement <4 x float> %0, i32 0
- %3 = extractelement <4 x float> %0, i32 1
- %4 = extractelement <4 x float> %0, i32 2
- %5 = extractelement <4 x float> %0, i32 3
- %6 = insertelement <4 x float> undef, float %2, i32 0
- %7 = insertelement <4 x float> %6, float %3, i32 1
- %8 = insertelement <4 x float> %7, float %4, i32 2
- %9 = insertelement <4 x float> %8, float %5, i32 3
- %10 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %9)
- %11 = extractelement <4 x float> %10, i32 0
- %12 = extractelement <4 x float> %10, i32 1
- %13 = extractelement <4 x float> %10, i32 2
- %14 = extractelement <4 x float> %10, i32 3
- %15 = call float @fabs(float %13)
- %16 = fdiv float 1.000000e+00, %15
- %17 = fmul float %11, %16
- %18 = fadd float %17, 1.500000e+00
- %19 = fmul float %12, %16
- %20 = fadd float %19, 1.500000e+00
- %21 = insertelement <4 x float> undef, float %20, i32 0
- %22 = insertelement <4 x float> %21, float %18, i32 1
- %23 = insertelement <4 x float> %22, float %14, i32 2
- %24 = insertelement <4 x float> %23, float %5, i32 3
- %25 = extractelement <4 x float> %24, i32 0
- %26 = extractelement <4 x float> %24, i32 1
- %27 = extractelement <4 x float> %24, i32 2
- %28 = extractelement <4 x float> %24, i32 3
- %29 = insertelement <4 x float> undef, float %25, i32 0
- %30 = insertelement <4 x float> %29, float %26, i32 1
- %31 = insertelement <4 x float> %30, float %27, i32 2
- %32 = insertelement <4 x float> %31, float %28, i32 3
- %33 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %32, i32 16, i32 0, i32 13)
- %34 = extractelement <4 x float> %33, i32 0
- %35 = insertelement <4 x float> undef, float %34, i32 0
- %36 = insertelement <4 x float> %35, float %34, i32 1
- %37 = insertelement <4 x float> %36, float %34, i32 2
- %38 = insertelement <4 x float> %37, float 1.000000e+00, i32 3
- call void @llvm.R600.store.swizzle(<4 x float> %38, i32 0, i32 0)
- ret void
-}
-
-; Function Attrs: readnone
-declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #1
-
-; Function Attrs: readnone
-declare float @fabs(float) #1
-
-; Function Attrs: readnone
-declare <4 x float> @llvm.AMDGPU.tex(<4 x float>, i32, i32, i32) #1
-
-declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
-
-attributes #0 = { "ShaderType"="0" }
-attributes #1 = { readnone }
Removed: llvm/trunk/test/CodeGen/R600/r600cfg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/r600cfg.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/r600cfg.ll (original)
+++ llvm/trunk/test/CodeGen/R600/r600cfg.ll (removed)
@@ -1,119 +0,0 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood
-
-define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 {
-main_body:
- %0 = extractelement <4 x float> %reg1, i32 0
- %1 = extractelement <4 x float> %reg1, i32 1
- %2 = extractelement <4 x float> %reg1, i32 2
- %3 = extractelement <4 x float> %reg1, i32 3
- %4 = bitcast float %0 to i32
- %5 = icmp eq i32 %4, 0
- %6 = sext i1 %5 to i32
- %7 = bitcast i32 %6 to float
- %8 = bitcast float %7 to i32
- %9 = icmp ne i32 %8, 0
- %. = select i1 %9, float 0x36A0000000000000, float %0
- br label %LOOP
-
-LOOP: ; preds = %LOOP47, %main_body
- %temp12.0 = phi float [ 0x36A0000000000000, %main_body ], [ %temp12.1, %LOOP47 ]
- %temp8.0 = phi float [ 0.000000e+00, %main_body ], [ %38, %LOOP47 ]
- %temp4.1 = phi float [ %., %main_body ], [ %52, %LOOP47 ]
- %10 = bitcast float %temp4.1 to i32
- %11 = icmp eq i32 %10, 1
- %12 = sext i1 %11 to i32
- %13 = bitcast i32 %12 to float
- %14 = bitcast float %13 to i32
- %15 = icmp ne i32 %14, 0
- br i1 %15, label %IF41, label %ENDIF40
-
-IF41: ; preds = %LOOP
- %16 = insertelement <4 x float> undef, float %0, i32 0
- %17 = insertelement <4 x float> %16, float %temp8.0, i32 1
- %18 = insertelement <4 x float> %17, float %temp12.0, i32 2
- %19 = insertelement <4 x float> %18, float 0.000000e+00, i32 3
- call void @llvm.R600.store.stream.output(<4 x float> %19, i32 0, i32 0, i32 1)
- %20 = insertelement <4 x float> undef, float %0, i32 0
- %21 = insertelement <4 x float> %20, float %temp8.0, i32 1
- %22 = insertelement <4 x float> %21, float %temp12.0, i32 2
- %23 = insertelement <4 x float> %22, float 0.000000e+00, i32 3
- call void @llvm.R600.store.stream.output(<4 x float> %23, i32 0, i32 0, i32 2)
- %24 = insertelement <4 x float> undef, float %0, i32 0
- %25 = insertelement <4 x float> %24, float %temp8.0, i32 1
- %26 = insertelement <4 x float> %25, float %temp12.0, i32 2
- %27 = insertelement <4 x float> %26, float 0.000000e+00, i32 3
- call void @llvm.R600.store.stream.output(<4 x float> %27, i32 0, i32 0, i32 4)
- %28 = insertelement <4 x float> undef, float 0.000000e+00, i32 0
- %29 = insertelement <4 x float> %28, float 0.000000e+00, i32 1
- %30 = insertelement <4 x float> %29, float 0.000000e+00, i32 2
- %31 = insertelement <4 x float> %30, float 0.000000e+00, i32 3
- call void @llvm.R600.store.swizzle(<4 x float> %31, i32 60, i32 1)
- %32 = insertelement <4 x float> undef, float %0, i32 0
- %33 = insertelement <4 x float> %32, float %temp8.0, i32 1
- %34 = insertelement <4 x float> %33, float %temp12.0, i32 2
- %35 = insertelement <4 x float> %34, float 0.000000e+00, i32 3
- call void @llvm.R600.store.swizzle(<4 x float> %35, i32 0, i32 2)
- ret void
-
-ENDIF40: ; preds = %LOOP
- %36 = bitcast float %temp8.0 to i32
- %37 = add i32 %36, 1
- %38 = bitcast i32 %37 to float
- %39 = bitcast float %temp4.1 to i32
- %40 = urem i32 %39, 2
- %41 = bitcast i32 %40 to float
- %42 = bitcast float %41 to i32
- %43 = icmp eq i32 %42, 0
- %44 = sext i1 %43 to i32
- %45 = bitcast i32 %44 to float
- %46 = bitcast float %45 to i32
- %47 = icmp ne i32 %46, 0
- %48 = bitcast float %temp4.1 to i32
- br i1 %47, label %IF44, label %ELSE45
-
-IF44: ; preds = %ENDIF40
- %49 = udiv i32 %48, 2
- br label %ENDIF43
-
-ELSE45: ; preds = %ENDIF40
- %50 = mul i32 3, %48
- %51 = add i32 %50, 1
- br label %ENDIF43
-
-ENDIF43: ; preds = %ELSE45, %IF44
- %.sink = phi i32 [ %49, %IF44 ], [ %51, %ELSE45 ]
- %52 = bitcast i32 %.sink to float
- %53 = load <4 x float>, <4 x float> addrspace(8)* null
- %54 = extractelement <4 x float> %53, i32 0
- %55 = bitcast float %54 to i32
- br label %LOOP47
-
-LOOP47: ; preds = %ENDIF48, %ENDIF43
- %temp12.1 = phi float [ %temp12.0, %ENDIF43 ], [ %67, %ENDIF48 ]
- %temp28.0 = phi float [ 0.000000e+00, %ENDIF43 ], [ %70, %ENDIF48 ]
- %56 = bitcast float %temp28.0 to i32
- %57 = icmp uge i32 %56, %55
- %58 = sext i1 %57 to i32
- %59 = bitcast i32 %58 to float
- %60 = bitcast float %59 to i32
- %61 = icmp ne i32 %60, 0
- br i1 %61, label %LOOP, label %ENDIF48
-
-ENDIF48: ; preds = %LOOP47
- %62 = bitcast float %temp12.1 to i32
- %63 = mul i32 %62, 2
- %64 = bitcast i32 %63 to float
- %65 = bitcast float %64 to i32
- %66 = urem i32 %65, 2147483647
- %67 = bitcast i32 %66 to float
- %68 = bitcast float %temp28.0 to i32
- %69 = add i32 %68, 1
- %70 = bitcast i32 %69 to float
- br label %LOOP47
-}
-
-declare void @llvm.R600.store.stream.output(<4 x float>, i32, i32, i32)
-
-declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
-
-attributes #0 = { "ShaderType"="1" }
Removed: llvm/trunk/test/CodeGen/R600/reciprocal.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/reciprocal.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/reciprocal.ll (original)
+++ llvm/trunk/test/CodeGen/R600/reciprocal.ll (removed)
@@ -1,15 +0,0 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-
-;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-
-define void @test(<4 x float> inreg %reg0) #0 {
- %r0 = extractelement <4 x float> %reg0, i32 0
- %r1 = fdiv float 1.0, %r0
- %vec = insertelement <4 x float> undef, float %r1, i32 0
- call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
- ret void
-}
-
-declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
-
-attributes #0 = { "ShaderType"="0" }
Removed: llvm/trunk/test/CodeGen/R600/register-count-comments.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/register-count-comments.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/register-count-comments.ll (original)
+++ llvm/trunk/test/CodeGen/R600/register-count-comments.ll (removed)
@@ -1,27 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs -asm-verbose < %s | FileCheck -check-prefix=SI %s
-
-declare i32 @llvm.SI.tid() nounwind readnone
-
-; SI-LABEL: {{^}}foo:
-; SI: .section .AMDGPU.csdata
-; SI: ; Kernel info:
-; SI: ; NumSgprs: {{[0-9]+}}
-; SI: ; NumVgprs: {{[0-9]+}}
-define void @foo(i32 addrspace(1)* noalias %out, i32 addrspace(1)* %abase, i32 addrspace(1)* %bbase) nounwind {
- %tid = call i32 @llvm.SI.tid() nounwind readnone
- %aptr = getelementptr i32, i32 addrspace(1)* %abase, i32 %tid
- %bptr = getelementptr i32, i32 addrspace(1)* %bbase, i32 %tid
- %outptr = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
- %a = load i32, i32 addrspace(1)* %aptr, align 4
- %b = load i32, i32 addrspace(1)* %bptr, align 4
- %result = add i32 %a, %b
- store i32 %result, i32 addrspace(1)* %outptr, align 4
- ret void
-}
-
-; SI-LABEL: {{^}}one_vgpr_used:
-; SI: NumVgprs: 1
-define void @one_vgpr_used(i32 addrspace(1)* %out, i32 %x) nounwind {
- store i32 %x, i32 addrspace(1)* %out, align 4
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/reorder-stores.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/reorder-stores.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/reorder-stores.ll (original)
+++ llvm/trunk/test/CodeGen/R600/reorder-stores.ll (removed)
@@ -1,105 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI %s
-; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI %s
-
-; SI-LABEL: {{^}}no_reorder_v2f64_global_load_store:
-; SI: buffer_load_dwordx2
-; SI: buffer_load_dwordx2
-; SI: buffer_load_dwordx2
-; SI: buffer_load_dwordx2
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
-; SI: s_endpgm
-define void @no_reorder_v2f64_global_load_store(<2 x double> addrspace(1)* nocapture %x, <2 x double> addrspace(1)* nocapture %y) nounwind {
- %tmp1 = load <2 x double>, <2 x double> addrspace(1)* %x, align 16
- %tmp4 = load <2 x double>, <2 x double> addrspace(1)* %y, align 16
- store <2 x double> %tmp4, <2 x double> addrspace(1)* %x, align 16
- store <2 x double> %tmp1, <2 x double> addrspace(1)* %y, align 16
- ret void
-}
-
-; SI-LABEL: {{^}}no_reorder_scalarized_v2f64_local_load_store:
-; SI: ds_read_b64
-; SI: ds_read_b64
-; SI: ds_write_b64
-; SI: ds_write_b64
-; SI: s_endpgm
-define void @no_reorder_scalarized_v2f64_local_load_store(<2 x double> addrspace(3)* nocapture %x, <2 x double> addrspace(3)* nocapture %y) nounwind {
- %tmp1 = load <2 x double>, <2 x double> addrspace(3)* %x, align 16
- %tmp4 = load <2 x double>, <2 x double> addrspace(3)* %y, align 16
- store <2 x double> %tmp4, <2 x double> addrspace(3)* %x, align 16
- store <2 x double> %tmp1, <2 x double> addrspace(3)* %y, align 16
- ret void
-}
-
-; SI-LABEL: {{^}}no_reorder_split_v8i32_global_load_store:
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-
-
-; SI: buffer_store_dword
-; SI: buffer_store_dword
-; SI: buffer_store_dword
-; SI: buffer_store_dword
-
-; SI: buffer_store_dword
-; SI: buffer_store_dword
-; SI: buffer_store_dword
-; SI: buffer_store_dword
-
-; SI: buffer_store_dword
-; SI: buffer_store_dword
-; SI: buffer_store_dword
-; SI: buffer_store_dword
-
-; SI: buffer_store_dword
-; SI: buffer_store_dword
-; SI: buffer_store_dword
-; SI: buffer_store_dword
-; SI: s_endpgm
-define void @no_reorder_split_v8i32_global_load_store(<8 x i32> addrspace(1)* nocapture %x, <8 x i32> addrspace(1)* nocapture %y) nounwind {
- %tmp1 = load <8 x i32>, <8 x i32> addrspace(1)* %x, align 32
- %tmp4 = load <8 x i32>, <8 x i32> addrspace(1)* %y, align 32
- store <8 x i32> %tmp4, <8 x i32> addrspace(1)* %x, align 32
- store <8 x i32> %tmp1, <8 x i32> addrspace(1)* %y, align 32
- ret void
-}
-
-; SI-LABEL: {{^}}no_reorder_extload_64:
-; SI: ds_read_b64
-; SI: ds_read_b64
-; SI: ds_write_b64
-; SI-NOT: ds_read
-; SI: ds_write_b64
-; SI: s_endpgm
-define void @no_reorder_extload_64(<2 x i32> addrspace(3)* nocapture %x, <2 x i32> addrspace(3)* nocapture %y) nounwind {
- %tmp1 = load <2 x i32>, <2 x i32> addrspace(3)* %x, align 8
- %tmp4 = load <2 x i32>, <2 x i32> addrspace(3)* %y, align 8
- %tmp1ext = zext <2 x i32> %tmp1 to <2 x i64>
- %tmp4ext = zext <2 x i32> %tmp4 to <2 x i64>
- %tmp7 = add <2 x i64> %tmp1ext, <i64 1, i64 1>
- %tmp9 = add <2 x i64> %tmp4ext, <i64 1, i64 1>
- %trunctmp9 = trunc <2 x i64> %tmp9 to <2 x i32>
- %trunctmp7 = trunc <2 x i64> %tmp7 to <2 x i32>
- store <2 x i32> %trunctmp9, <2 x i32> addrspace(3)* %x, align 8
- store <2 x i32> %trunctmp7, <2 x i32> addrspace(3)* %y, align 8
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/rotl.i64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/rotl.i64.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/rotl.i64.ll (original)
+++ llvm/trunk/test/CodeGen/R600/rotl.i64.ll (removed)
@@ -1,39 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=BOTH %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=BOTH %s
-
-; BOTH-LABEL: {{^}}s_rotl_i64:
-; BOTH-DAG: s_lshl_b64
-; BOTH-DAG: s_sub_i32
-; BOTH-DAG: s_lshr_b64
-; BOTH: s_or_b64
-; BOTH: s_endpgm
-define void @s_rotl_i64(i64 addrspace(1)* %in, i64 %x, i64 %y) {
-entry:
- %0 = shl i64 %x, %y
- %1 = sub i64 64, %y
- %2 = lshr i64 %x, %1
- %3 = or i64 %0, %2
- store i64 %3, i64 addrspace(1)* %in
- ret void
-}
-
-; BOTH-LABEL: {{^}}v_rotl_i64:
-; SI-DAG: v_lshl_b64
-; VI-DAG: v_lshlrev_b64
-; BOTH-DAG: v_sub_i32
-; SI: v_lshr_b64
-; VI: v_lshrrev_b64
-; BOTH: v_or_b32
-; BOTH: v_or_b32
-; BOTH: s_endpgm
-define void @v_rotl_i64(i64 addrspace(1)* %in, i64 addrspace(1)* %xptr, i64 addrspace(1)* %yptr) {
-entry:
- %x = load i64, i64 addrspace(1)* %xptr, align 8
- %y = load i64, i64 addrspace(1)* %yptr, align 8
- %tmp0 = shl i64 %x, %y
- %tmp1 = sub i64 64, %y
- %tmp2 = lshr i64 %x, %tmp1
- %tmp3 = or i64 %tmp0, %tmp2
- store i64 %tmp3, i64 addrspace(1)* %in, align 8
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/rotl.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/rotl.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/rotl.ll (original)
+++ llvm/trunk/test/CodeGen/R600/rotl.ll (removed)
@@ -1,57 +0,0 @@
-; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=R600 -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-
-; FUNC-LABEL: {{^}}rotl_i32:
-; R600: SUB_INT {{\** T[0-9]+\.[XYZW]}}, literal.x
-; R600-NEXT: 32
-; R600: BIT_ALIGN_INT {{T[0-9]+\.[XYZW]}}, KC0[2].Z, KC0[2].Z, PV.{{[XYZW]}}
-
-; SI: s_sub_i32 [[SDST:s[0-9]+]], 32, {{[s][0-9]+}}
-; SI: v_mov_b32_e32 [[VDST:v[0-9]+]], [[SDST]]
-; SI: v_alignbit_b32 {{v[0-9]+, [s][0-9]+, s[0-9]+}}, [[VDST]]
-define void @rotl_i32(i32 addrspace(1)* %in, i32 %x, i32 %y) {
-entry:
- %0 = shl i32 %x, %y
- %1 = sub i32 32, %y
- %2 = lshr i32 %x, %1
- %3 = or i32 %0, %2
- store i32 %3, i32 addrspace(1)* %in
- ret void
-}
-
-; FUNC-LABEL: {{^}}rotl_v2i32:
-; SI-DAG: s_sub_i32
-; SI-DAG: s_sub_i32
-; SI-DAG: v_alignbit_b32
-; SI-DAG: v_alignbit_b32
-; SI: s_endpgm
-define void @rotl_v2i32(<2 x i32> addrspace(1)* %in, <2 x i32> %x, <2 x i32> %y) {
-entry:
- %0 = shl <2 x i32> %x, %y
- %1 = sub <2 x i32> <i32 32, i32 32>, %y
- %2 = lshr <2 x i32> %x, %1
- %3 = or <2 x i32> %0, %2
- store <2 x i32> %3, <2 x i32> addrspace(1)* %in
- ret void
-}
-
-; FUNC-LABEL: {{^}}rotl_v4i32:
-; SI-DAG: s_sub_i32
-; SI-DAG: v_alignbit_b32
-; SI-DAG: s_sub_i32
-; SI-DAG: v_alignbit_b32
-; SI-DAG: s_sub_i32
-; SI-DAG: v_alignbit_b32
-; SI-DAG: s_sub_i32
-; SI-DAG: v_alignbit_b32
-; SI: s_endpgm
-define void @rotl_v4i32(<4 x i32> addrspace(1)* %in, <4 x i32> %x, <4 x i32> %y) {
-entry:
- %0 = shl <4 x i32> %x, %y
- %1 = sub <4 x i32> <i32 32, i32 32, i32 32, i32 32>, %y
- %2 = lshr <4 x i32> %x, %1
- %3 = or <4 x i32> %0, %2
- store <4 x i32> %3, <4 x i32> addrspace(1)* %in
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/rotr.i64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/rotr.i64.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/rotr.i64.ll (original)
+++ llvm/trunk/test/CodeGen/R600/rotr.i64.ll (removed)
@@ -1,61 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=BOTH %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=BOTH %s
-
-; BOTH-LABEL: {{^}}s_rotr_i64:
-; BOTH-DAG: s_sub_i32
-; BOTH-DAG: s_lshr_b64
-; BOTH-DAG: s_lshl_b64
-; BOTH: s_or_b64
-define void @s_rotr_i64(i64 addrspace(1)* %in, i64 %x, i64 %y) {
-entry:
- %tmp0 = sub i64 64, %y
- %tmp1 = shl i64 %x, %tmp0
- %tmp2 = lshr i64 %x, %y
- %tmp3 = or i64 %tmp1, %tmp2
- store i64 %tmp3, i64 addrspace(1)* %in
- ret void
-}
-
-; BOTH-LABEL: {{^}}v_rotr_i64:
-; BOTH-DAG: v_sub_i32
-; SI-DAG: v_lshr_b64
-; SI-DAG: v_lshl_b64
-; VI-DAG: v_lshrrev_b64
-; VI-DAG: v_lshlrev_b64
-; BOTH: v_or_b32
-; BOTH: v_or_b32
-define void @v_rotr_i64(i64 addrspace(1)* %in, i64 addrspace(1)* %xptr, i64 addrspace(1)* %yptr) {
-entry:
- %x = load i64, i64 addrspace(1)* %xptr, align 8
- %y = load i64, i64 addrspace(1)* %yptr, align 8
- %tmp0 = sub i64 64, %y
- %tmp1 = shl i64 %x, %tmp0
- %tmp2 = lshr i64 %x, %y
- %tmp3 = or i64 %tmp1, %tmp2
- store i64 %tmp3, i64 addrspace(1)* %in
- ret void
-}
-
-; BOTH-LABEL: {{^}}s_rotr_v2i64:
-define void @s_rotr_v2i64(<2 x i64> addrspace(1)* %in, <2 x i64> %x, <2 x i64> %y) {
-entry:
- %tmp0 = sub <2 x i64> <i64 64, i64 64>, %y
- %tmp1 = shl <2 x i64> %x, %tmp0
- %tmp2 = lshr <2 x i64> %x, %y
- %tmp3 = or <2 x i64> %tmp1, %tmp2
- store <2 x i64> %tmp3, <2 x i64> addrspace(1)* %in
- ret void
-}
-
-; BOTH-LABEL: {{^}}v_rotr_v2i64:
-define void @v_rotr_v2i64(<2 x i64> addrspace(1)* %in, <2 x i64> addrspace(1)* %xptr, <2 x i64> addrspace(1)* %yptr) {
-entry:
- %x = load <2 x i64>, <2 x i64> addrspace(1)* %xptr, align 8
- %y = load <2 x i64>, <2 x i64> addrspace(1)* %yptr, align 8
- %tmp0 = sub <2 x i64> <i64 64, i64 64>, %y
- %tmp1 = shl <2 x i64> %x, %tmp0
- %tmp2 = lshr <2 x i64> %x, %y
- %tmp3 = or <2 x i64> %tmp1, %tmp2
- store <2 x i64> %tmp3, <2 x i64> addrspace(1)* %in
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/rotr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/rotr.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/rotr.ll (original)
+++ llvm/trunk/test/CodeGen/R600/rotr.ll (removed)
@@ -1,53 +0,0 @@
-; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=R600 -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-
-; FUNC-LABEL: {{^}}rotr_i32:
-; R600: BIT_ALIGN_INT
-
-; SI: v_alignbit_b32
-define void @rotr_i32(i32 addrspace(1)* %in, i32 %x, i32 %y) {
-entry:
- %tmp0 = sub i32 32, %y
- %tmp1 = shl i32 %x, %tmp0
- %tmp2 = lshr i32 %x, %y
- %tmp3 = or i32 %tmp1, %tmp2
- store i32 %tmp3, i32 addrspace(1)* %in
- ret void
-}
-
-; FUNC-LABEL: {{^}}rotr_v2i32:
-; R600: BIT_ALIGN_INT
-; R600: BIT_ALIGN_INT
-
-; SI: v_alignbit_b32
-; SI: v_alignbit_b32
-define void @rotr_v2i32(<2 x i32> addrspace(1)* %in, <2 x i32> %x, <2 x i32> %y) {
-entry:
- %tmp0 = sub <2 x i32> <i32 32, i32 32>, %y
- %tmp1 = shl <2 x i32> %x, %tmp0
- %tmp2 = lshr <2 x i32> %x, %y
- %tmp3 = or <2 x i32> %tmp1, %tmp2
- store <2 x i32> %tmp3, <2 x i32> addrspace(1)* %in
- ret void
-}
-
-; FUNC-LABEL: {{^}}rotr_v4i32:
-; R600: BIT_ALIGN_INT
-; R600: BIT_ALIGN_INT
-; R600: BIT_ALIGN_INT
-; R600: BIT_ALIGN_INT
-
-; SI: v_alignbit_b32
-; SI: v_alignbit_b32
-; SI: v_alignbit_b32
-; SI: v_alignbit_b32
-define void @rotr_v4i32(<4 x i32> addrspace(1)* %in, <4 x i32> %x, <4 x i32> %y) {
-entry:
- %tmp0 = sub <4 x i32> <i32 32, i32 32, i32 32, i32 32>, %y
- %tmp1 = shl <4 x i32> %x, %tmp0
- %tmp2 = lshr <4 x i32> %x, %y
- %tmp3 = or <4 x i32> %tmp1, %tmp2
- store <4 x i32> %tmp3, <4 x i32> addrspace(1)* %in
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/rsq.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/rsq.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/rsq.ll (original)
+++ llvm/trunk/test/CodeGen/R600/rsq.ll (removed)
@@ -1,74 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -mattr=-fp32-denormals -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI-UNSAFE -check-prefix=SI %s
-; RUN: llc -march=amdgcn -mcpu=SI -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI %s
-
-declare i32 @llvm.r600.read.tidig.x() nounwind readnone
-declare float @llvm.sqrt.f32(float) nounwind readnone
-declare double @llvm.sqrt.f64(double) nounwind readnone
-
-; SI-LABEL: {{^}}rsq_f32:
-; SI: v_rsq_f32_e32
-; SI: s_endpgm
-define void @rsq_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
- %val = load float, float addrspace(1)* %in, align 4
- %sqrt = call float @llvm.sqrt.f32(float %val) nounwind readnone
- %div = fdiv float 1.0, %sqrt
- store float %div, float addrspace(1)* %out, align 4
- ret void
-}
-
-; SI-LABEL: {{^}}rsq_f64:
-; SI-UNSAFE: v_rsq_f64_e32
-; SI-SAFE: v_sqrt_f64_e32
-; SI: s_endpgm
-define void @rsq_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) nounwind {
- %val = load double, double addrspace(1)* %in, align 4
- %sqrt = call double @llvm.sqrt.f64(double %val) nounwind readnone
- %div = fdiv double 1.0, %sqrt
- store double %div, double addrspace(1)* %out, align 4
- ret void
-}
-
-; SI-LABEL: {{^}}rsq_f32_sgpr:
-; SI: v_rsq_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}
-; SI: s_endpgm
-define void @rsq_f32_sgpr(float addrspace(1)* noalias %out, float %val) nounwind {
- %sqrt = call float @llvm.sqrt.f32(float %val) nounwind readnone
- %div = fdiv float 1.0, %sqrt
- store float %div, float addrspace(1)* %out, align 4
- ret void
-}
-
-; Recognize that this is rsqrt(a) * rcp(b) * c,
-; not 1 / ( 1 / sqrt(a)) * rcp(b) * c.
-
-; SI-LABEL: @rsqrt_fmul
-; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
-; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
-; SI-DAG: buffer_load_dword [[C:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8
-
-; SI-UNSAFE-DAG: v_rsq_f32_e32 [[RSQA:v[0-9]+]], [[A]]
-; SI-UNSAFE-DAG: v_rcp_f32_e32 [[RCPB:v[0-9]+]], [[B]]
-; SI-UNSAFE-DAG: v_mul_f32_e32 [[TMP:v[0-9]+]], [[RCPB]], [[RSQA]]
-; SI-UNSAFE: v_mul_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[C]]
-; SI-UNSAFE: buffer_store_dword [[RESULT]]
-
-; SI-SAFE-NOT: v_rsq_f32
-
-; SI: s_endpgm
-define void @rsqrt_fmul(float addrspace(1)* %out, float addrspace(1)* %in) {
- %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
- %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
- %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
-
- %a = load float, float addrspace(1)* %gep.0
- %b = load float, float addrspace(1)* %gep.1
- %c = load float, float addrspace(1)* %gep.2
-
- %x = call float @llvm.sqrt.f32(float %a)
- %y = fmul float %x, %b
- %z = fdiv float %c, %y
- store float %z, float addrspace(1)* %out.gep
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/rv7x0_count3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/rv7x0_count3.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/rv7x0_count3.ll (original)
+++ llvm/trunk/test/CodeGen/R600/rv7x0_count3.ll (removed)
@@ -1,41 +0,0 @@
-; RUN: llc < %s -march=r600 -show-mc-encoding -mcpu=rv710 | FileCheck %s
-
-; CHECK: TEX 9 @6 ; encoding: [0x06,0x00,0x00,0x00,0x00,0x04,0x88,0x80]
-
-define void @test(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 {
- %1 = extractelement <4 x float> %reg1, i32 0
- %2 = extractelement <4 x float> %reg1, i32 1
- %3 = extractelement <4 x float> %reg1, i32 2
- %4 = extractelement <4 x float> %reg1, i32 3
- %5 = insertelement <4 x float> undef, float %1, i32 0
- %6 = insertelement <4 x float> %5, float %2, i32 1
- %7 = insertelement <4 x float> %6, float %3, i32 2
- %8 = insertelement <4 x float> %7, float %4, i32 3
- %9 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %8, i32 0, i32 0, i32 1)
- %10 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %8, i32 1, i32 0, i32 1)
- %11 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %8, i32 2, i32 0, i32 1)
- %12 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %8, i32 3, i32 0, i32 1)
- %13 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %8, i32 4, i32 0, i32 1)
- %14 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %8, i32 5, i32 0, i32 1)
- %15 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %8, i32 6, i32 0, i32 1)
- %16 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %8, i32 7, i32 0, i32 1)
- %17 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %8, i32 8, i32 0, i32 1)
- %18 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %8, i32 9, i32 0, i32 1)
- %19 = fadd <4 x float> %9, %10
- %20 = fadd <4 x float> %19, %11
- %21 = fadd <4 x float> %20, %12
- %22 = fadd <4 x float> %21, %13
- %23 = fadd <4 x float> %22, %14
- %24 = fadd <4 x float> %23, %15
- %25 = fadd <4 x float> %24, %16
- %26 = fadd <4 x float> %25, %17
- %27 = fadd <4 x float> %26, %18
- call void @llvm.R600.store.swizzle(<4 x float> %27, i32 0, i32 2)
- ret void
-}
-
-declare <4 x float> @llvm.AMDGPU.tex(<4 x float>, i32, i32, i32) readnone
-
-declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
-
-attributes #0 = { "ShaderType"="1" }
Removed: llvm/trunk/test/CodeGen/R600/s_movk_i32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/s_movk_i32.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/s_movk_i32.ll (original)
+++ llvm/trunk/test/CodeGen/R600/s_movk_i32.ll (removed)
@@ -1,185 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
-
-; SI-LABEL: {{^}}s_movk_i32_k0:
-; SI-DAG: s_mov_b32 [[LO_S_IMM:s[0-9]+]], 0xffff{{$}}
-; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 1{{$}}
-; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
-; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
-; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
-; SI: s_endpgm
-define void @s_movk_i32_k0(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
- %loada = load i64, i64 addrspace(1)* %a, align 4
- %or = or i64 %loada, 4295032831 ; ((1 << 16) - 1) | (1 << 32)
- store i64 %or, i64 addrspace(1)* %out
- ret void
-}
-
-; SI-LABEL: {{^}}s_movk_i32_k1:
-; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x7fff{{$}}
-; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 1{{$}}
-; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
-; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
-; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
-; SI: s_endpgm
-define void @s_movk_i32_k1(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
- %loada = load i64, i64 addrspace(1)* %a, align 4
- %or = or i64 %loada, 4295000063 ; ((1 << 15) - 1) | (1 << 32)
- store i64 %or, i64 addrspace(1)* %out
- ret void
-}
-
-; SI-LABEL: {{^}}s_movk_i32_k2:
-; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x7fff{{$}}
-; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 64{{$}}
-; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
-; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
-; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
-; SI: s_endpgm
-define void @s_movk_i32_k2(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
- %loada = load i64, i64 addrspace(1)* %a, align 4
- %or = or i64 %loada, 274877939711 ; ((1 << 15) - 1) | (64 << 32)
- store i64 %or, i64 addrspace(1)* %out
- ret void
-}
-
-; SI-LABEL: {{^}}s_movk_i32_k3:
-; SI-DAG: s_mov_b32 [[LO_S_IMM:s[0-9]+]], 0x8000{{$}}
-; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 1{{$}}
-; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
-; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
-; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
-; SI: s_endpgm
-define void @s_movk_i32_k3(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
- %loada = load i64, i64 addrspace(1)* %a, align 4
- %or = or i64 %loada, 4295000064 ; (1 << 15) | (1 << 32)
- store i64 %or, i64 addrspace(1)* %out
- ret void
-}
-
-; SI-LABEL: {{^}}s_movk_i32_k4:
-; SI-DAG: s_mov_b32 [[LO_S_IMM:s[0-9]+]], 0x20000{{$}}
-; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 1{{$}}
-; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
-; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
-; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
-; SI: s_endpgm
-define void @s_movk_i32_k4(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
- %loada = load i64, i64 addrspace(1)* %a, align 4
- %or = or i64 %loada, 4295098368 ; (1 << 17) | (1 << 32)
- store i64 %or, i64 addrspace(1)* %out
- ret void
-}
-
-; SI-LABEL: {{^}}s_movk_i32_k5:
-; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0xffef{{$}}
-; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 0xff00ffff{{$}}
-; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
-; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
-; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
-; SI: s_endpgm
-define void @s_movk_i32_k5(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
- %loada = load i64, i64 addrspace(1)* %a, align 4
- %or = or i64 %loada, 18374967954648334319 ; -17 & 0xff00ffffffffffff
- store i64 %or, i64 addrspace(1)* %out
- ret void
-}
-
-; SI-LABEL: {{^}}s_movk_i32_k6:
-; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x41{{$}}
-; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 63{{$}}
-; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
-; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
-; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
-; SI: s_endpgm
-define void @s_movk_i32_k6(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
- %loada = load i64, i64 addrspace(1)* %a, align 4
- %or = or i64 %loada, 270582939713 ; 65 | (63 << 32)
- store i64 %or, i64 addrspace(1)* %out
- ret void
-}
-
-; SI-LABEL: {{^}}s_movk_i32_k7:
-; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x2000{{$}}
-; SI-DAG: s_movk_i32 [[HI_S_IMM:s[0-9]+]], 0x4000{{$}}
-; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
-; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
-; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
-; SI: s_endpgm
-define void @s_movk_i32_k7(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
- %loada = load i64, i64 addrspace(1)* %a, align 4
- %or = or i64 %loada, 70368744185856; ((1 << 13)) | ((1 << 14) << 32)
- store i64 %or, i64 addrspace(1)* %out
- ret void
-}
-
-
-; SI-LABEL: {{^}}s_movk_i32_k8:
-; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x8000{{$}}
-; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 0x11111111{{$}}
-; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
-; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
-; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
-; SI: s_endpgm
-define void @s_movk_i32_k8(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
- %loada = load i64, i64 addrspace(1)* %a, align 4
- %or = or i64 %loada, 1229782942255906816 ; 0x11111111ffff8000
- store i64 %or, i64 addrspace(1)* %out
- ret void
-}
-
-; SI-LABEL: {{^}}s_movk_i32_k9:
-; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x8001{{$}}
-; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 0x11111111{{$}}
-; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
-; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
-; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
-; SI: s_endpgm
-define void @s_movk_i32_k9(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
- %loada = load i64, i64 addrspace(1)* %a, align 4
- %or = or i64 %loada, 1229782942255906817 ; 0x11111111ffff8001
- store i64 %or, i64 addrspace(1)* %out
- ret void
-}
-
-; SI-LABEL: {{^}}s_movk_i32_k10:
-; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x8888{{$}}
-; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 0x11111111{{$}}
-; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
-; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
-; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
-; SI: s_endpgm
-define void @s_movk_i32_k10(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
- %loada = load i64, i64 addrspace(1)* %a, align 4
- %or = or i64 %loada, 1229782942255909000 ; 0x11111111ffff8888
- store i64 %or, i64 addrspace(1)* %out
- ret void
-}
-
-; SI-LABEL: {{^}}s_movk_i32_k11:
-; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x8fff{{$}}
-; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 0x11111111{{$}}
-; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
-; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
-; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
-; SI: s_endpgm
-define void @s_movk_i32_k11(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
- %loada = load i64, i64 addrspace(1)* %a, align 4
- %or = or i64 %loada, 1229782942255910911 ; 0x11111111ffff8fff
- store i64 %or, i64 addrspace(1)* %out
- ret void
-}
-
-; SI-LABEL: {{^}}s_movk_i32_k12:
-; SI-DAG: s_mov_b32 [[LO_S_IMM:s[0-9]+]], 0xffff7001{{$}}
-; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 0x11111111{{$}}
-; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
-; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
-; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
-; SI: s_endpgm
-define void @s_movk_i32_k12(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
- %loada = load i64, i64 addrspace(1)* %a, align 4
- %or = or i64 %loada, 1229782942255902721 ; 0x11111111ffff7001
- store i64 %or, i64 addrspace(1)* %out
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/saddo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/saddo.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/saddo.ll (original)
+++ llvm/trunk/test/CodeGen/R600/saddo.ll (removed)
@@ -1,63 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs< %s
-
-declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone
-declare { i64, i1 } @llvm.sadd.with.overflow.i64(i64, i64) nounwind readnone
-
-; FUNC-LABEL: {{^}}saddo_i64_zext:
-define void @saddo_i64_zext(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
- %sadd = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) nounwind
- %val = extractvalue { i64, i1 } %sadd, 0
- %carry = extractvalue { i64, i1 } %sadd, 1
- %ext = zext i1 %carry to i64
- %add2 = add i64 %val, %ext
- store i64 %add2, i64 addrspace(1)* %out, align 8
- ret void
-}
-
-; FUNC-LABEL: {{^}}s_saddo_i32:
-define void @s_saddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 %a, i32 %b) nounwind {
- %sadd = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) nounwind
- %val = extractvalue { i32, i1 } %sadd, 0
- %carry = extractvalue { i32, i1 } %sadd, 1
- store i32 %val, i32 addrspace(1)* %out, align 4
- store i1 %carry, i1 addrspace(1)* %carryout
- ret void
-}
-
-; FUNC-LABEL: {{^}}v_saddo_i32:
-define void @v_saddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
- %a = load i32, i32 addrspace(1)* %aptr, align 4
- %b = load i32, i32 addrspace(1)* %bptr, align 4
- %sadd = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) nounwind
- %val = extractvalue { i32, i1 } %sadd, 0
- %carry = extractvalue { i32, i1 } %sadd, 1
- store i32 %val, i32 addrspace(1)* %out, align 4
- store i1 %carry, i1 addrspace(1)* %carryout
- ret void
-}
-
-; FUNC-LABEL: {{^}}s_saddo_i64:
-define void @s_saddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 %a, i64 %b) nounwind {
- %sadd = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) nounwind
- %val = extractvalue { i64, i1 } %sadd, 0
- %carry = extractvalue { i64, i1 } %sadd, 1
- store i64 %val, i64 addrspace(1)* %out, align 8
- store i1 %carry, i1 addrspace(1)* %carryout
- ret void
-}
-
-; FUNC-LABEL: {{^}}v_saddo_i64:
-; SI: v_add_i32
-; SI: v_addc_u32
-define void @v_saddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
- %a = load i64, i64 addrspace(1)* %aptr, align 4
- %b = load i64, i64 addrspace(1)* %bptr, align 4
- %sadd = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) nounwind
- %val = extractvalue { i64, i1 } %sadd, 0
- %carry = extractvalue { i64, i1 } %sadd, 1
- store i64 %val, i64 addrspace(1)* %out, align 8
- store i1 %carry, i1 addrspace(1)* %carryout
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/salu-to-valu.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/salu-to-valu.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/salu-to-valu.ll (original)
+++ llvm/trunk/test/CodeGen/R600/salu-to-valu.ll (removed)
@@ -1,118 +0,0 @@
-; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s
-
-; In this test both the pointer and the offset operands to the
-; BUFFER_LOAD instructions end up being stored in vgprs. This
-; requires us to add the pointer and offset together, store the
-; result in the offset operand (vaddr), and then store 0 in an
-; sgpr register pair and use that for the pointer operand
-; (low 64-bits of srsrc).
-
-; CHECK-LABEL: {{^}}mubuf:
-
-; Make sure we aren't using VGPRs for the source operand of s_mov_b64
-; CHECK-NOT: s_mov_b64 s[{{[0-9]+:[0-9]+}}], v
-
-; Make sure we aren't using VGPR's for the srsrc operand of BUFFER_LOAD_*
-; instructions
-; CHECK: buffer_load_ubyte v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64
-; CHECK: buffer_load_ubyte v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64
-define void @mubuf(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
-entry:
- %0 = call i32 @llvm.r600.read.tidig.x() #1
- %1 = call i32 @llvm.r600.read.tidig.y() #1
- %2 = sext i32 %0 to i64
- %3 = sext i32 %1 to i64
- br label %loop
-
-loop:
- %4 = phi i64 [0, %entry], [%5, %loop]
- %5 = add i64 %2, %4
- %6 = getelementptr i8, i8 addrspace(1)* %in, i64 %5
- %7 = load i8, i8 addrspace(1)* %6, align 1
- %8 = or i64 %5, 1
- %9 = getelementptr i8, i8 addrspace(1)* %in, i64 %8
- %10 = load i8, i8 addrspace(1)* %9, align 1
- %11 = add i8 %7, %10
- %12 = sext i8 %11 to i32
- store i32 %12, i32 addrspace(1)* %out
- %13 = icmp slt i64 %5, 10
- br i1 %13, label %loop, label %done
-
-done:
- ret void
-}
-
-declare i32 @llvm.r600.read.tidig.x() #1
-declare i32 @llvm.r600.read.tidig.y() #1
-
-attributes #1 = { nounwind readnone }
-
-; Test moving an SMRD instruction to the VALU
-
-; CHECK-LABEL: {{^}}smrd_valu:
-; CHECK: buffer_load_dword [[OUT:v[0-9]+]]
-; CHECK: buffer_store_dword [[OUT]]
-
-define void @smrd_valu(i32 addrspace(2)* addrspace(1)* %in, i32 %a, i32 addrspace(1)* %out) {
-entry:
- %0 = icmp ne i32 %a, 0
- br i1 %0, label %if, label %else
-
-if:
- %1 = load i32 addrspace(2)*, i32 addrspace(2)* addrspace(1)* %in
- br label %endif
-
-else:
- %2 = getelementptr i32 addrspace(2)*, i32 addrspace(2)* addrspace(1)* %in
- %3 = load i32 addrspace(2)*, i32 addrspace(2)* addrspace(1)* %2
- br label %endif
-
-endif:
- %4 = phi i32 addrspace(2)* [%1, %if], [%3, %else]
- %5 = getelementptr i32, i32 addrspace(2)* %4, i32 3000
- %6 = load i32, i32 addrspace(2)* %5
- store i32 %6, i32 addrspace(1)* %out
- ret void
-}
-
-; Test moving ann SMRD with an immediate offset to the VALU
-
-; CHECK-LABEL: {{^}}smrd_valu2:
-; CHECK: buffer_load_dword
-define void @smrd_valu2(i32 addrspace(1)* %out, [8 x i32] addrspace(2)* %in) {
-entry:
- %0 = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %1 = add i32 %0, 4
- %2 = getelementptr [8 x i32], [8 x i32] addrspace(2)* %in, i32 %0, i32 4
- %3 = load i32, i32 addrspace(2)* %2
- store i32 %3, i32 addrspace(1)* %out
- ret void
-}
-
-; CHECK-LABEL: {{^}}s_load_imm_v8i32:
-; CHECK: buffer_load_dwordx4
-; CHECK: buffer_load_dwordx4
-define void @s_load_imm_v8i32(<8 x i32> addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) {
-entry:
- %tmp0 = tail call i32 @llvm.r600.read.tidig.x() #1
- %tmp1 = getelementptr inbounds i32, i32 addrspace(2)* %in, i32 %tmp0
- %tmp2 = bitcast i32 addrspace(2)* %tmp1 to <8 x i32> addrspace(2)*
- %tmp3 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp2, align 4
- store <8 x i32> %tmp3, <8 x i32> addrspace(1)* %out, align 32
- ret void
-}
-
-; CHECK-LABEL: {{^}}s_load_imm_v16i32:
-; CHECK: buffer_load_dwordx4
-; CHECK: buffer_load_dwordx4
-; CHECK: buffer_load_dwordx4
-; CHECK: buffer_load_dwordx4
-define void @s_load_imm_v16i32(<16 x i32> addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) {
-entry:
- %tmp0 = tail call i32 @llvm.r600.read.tidig.x() #1
- %tmp1 = getelementptr inbounds i32, i32 addrspace(2)* %in, i32 %tmp0
- %tmp2 = bitcast i32 addrspace(2)* %tmp1 to <16 x i32> addrspace(2)*
- %tmp3 = load <16 x i32>, <16 x i32> addrspace(2)* %tmp2, align 4
- store <16 x i32> %tmp3, <16 x i32> addrspace(1)* %out, align 32
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/scalar_to_vector.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/scalar_to_vector.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/scalar_to_vector.ll (original)
+++ llvm/trunk/test/CodeGen/R600/scalar_to_vector.ll (removed)
@@ -1,81 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-
-
-; FUNC-LABEL: {{^}}scalar_to_vector_v2i32:
-; SI: buffer_load_dword [[VAL:v[0-9]+]],
-; SI: v_lshrrev_b32_e32 [[RESULT:v[0-9]+]], 16, [[VAL]]
-; SI: buffer_store_short [[RESULT]]
-; SI: buffer_store_short [[RESULT]]
-; SI: buffer_store_short [[RESULT]]
-; SI: buffer_store_short [[RESULT]]
-; SI: s_endpgm
-define void @scalar_to_vector_v2i32(<4 x i16> addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %tmp1 = load i32, i32 addrspace(1)* %in, align 4
- %bc = bitcast i32 %tmp1 to <2 x i16>
- %tmp2 = shufflevector <2 x i16> %bc, <2 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
- store <4 x i16> %tmp2, <4 x i16> addrspace(1)* %out, align 8
- ret void
-}
-
-; FUNC-LABEL: {{^}}scalar_to_vector_v2f32:
-; SI: buffer_load_dword [[VAL:v[0-9]+]],
-; SI: v_lshrrev_b32_e32 [[RESULT:v[0-9]+]], 16, [[VAL]]
-; SI: buffer_store_short [[RESULT]]
-; SI: buffer_store_short [[RESULT]]
-; SI: buffer_store_short [[RESULT]]
-; SI: buffer_store_short [[RESULT]]
-; SI: s_endpgm
-define void @scalar_to_vector_v2f32(<4 x i16> addrspace(1)* %out, float addrspace(1)* %in) nounwind {
- %tmp1 = load float, float addrspace(1)* %in, align 4
- %bc = bitcast float %tmp1 to <2 x i16>
- %tmp2 = shufflevector <2 x i16> %bc, <2 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
- store <4 x i16> %tmp2, <4 x i16> addrspace(1)* %out, align 8
- ret void
-}
-
-; Getting a SCALAR_TO_VECTOR seems to be tricky. These cases managed
-; to produce one, but for some reason never made it to selection.
-
-
-; define void @scalar_to_vector_test2(<8 x i8> addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-; %tmp1 = load i32, i32 addrspace(1)* %in, align 4
-; %bc = bitcast i32 %tmp1 to <4 x i8>
-
-; %tmp2 = shufflevector <4 x i8> %bc, <4 x i8> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-; store <8 x i8> %tmp2, <8 x i8> addrspace(1)* %out, align 4
-; ret void
-; }
-
-; define void @scalar_to_vector_test3(<4 x i32> addrspace(1)* %out) nounwind {
-; %newvec0 = insertelement <2 x i64> undef, i64 12345, i32 0
-; %newvec1 = insertelement <2 x i64> %newvec0, i64 undef, i32 1
-; %bc = bitcast <2 x i64> %newvec1 to <4 x i32>
-; %add = add <4 x i32> %bc, <i32 1, i32 2, i32 3, i32 4>
-; store <4 x i32> %add, <4 x i32> addrspace(1)* %out, align 16
-; ret void
-; }
-
-; define void @scalar_to_vector_test4(<8 x i16> addrspace(1)* %out) nounwind {
-; %newvec0 = insertelement <4 x i32> undef, i32 12345, i32 0
-; %bc = bitcast <4 x i32> %newvec0 to <8 x i16>
-; %add = add <8 x i16> %bc, <i16 1, i16 2, i16 3, i16 4, i16 1, i16 2, i16 3, i16 4>
-; store <8 x i16> %add, <8 x i16> addrspace(1)* %out, align 16
-; ret void
-; }
-
-; define void @scalar_to_vector_test5(<4 x i16> addrspace(1)* %out) nounwind {
-; %newvec0 = insertelement <2 x i32> undef, i32 12345, i32 0
-; %bc = bitcast <2 x i32> %newvec0 to <4 x i16>
-; %add = add <4 x i16> %bc, <i16 1, i16 2, i16 3, i16 4>
-; store <4 x i16> %add, <4 x i16> addrspace(1)* %out, align 16
-; ret void
-; }
-
-; define void @scalar_to_vector_test6(<4 x i16> addrspace(1)* %out) nounwind {
-; %newvec0 = insertelement <2 x i32> undef, i32 12345, i32 0
-; %bc = bitcast <2 x i32> %newvec0 to <4 x i16>
-; %add = add <4 x i16> %bc, <i16 1, i16 2, i16 3, i16 4>
-; store <4 x i16> %add, <4 x i16> addrspace(1)* %out, align 16
-; ret void
-; }
Removed: llvm/trunk/test/CodeGen/R600/schedule-fs-loop-nested-if.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/schedule-fs-loop-nested-if.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/schedule-fs-loop-nested-if.ll (original)
+++ llvm/trunk/test/CodeGen/R600/schedule-fs-loop-nested-if.ll (removed)
@@ -1,82 +0,0 @@
-;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched -verify-machineinstrs
-;REQUIRES: asserts
-
-define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #1 {
-main_body:
- %0 = extractelement <4 x float> %reg1, i32 0
- %1 = extractelement <4 x float> %reg1, i32 1
- %2 = extractelement <4 x float> %reg1, i32 2
- %3 = extractelement <4 x float> %reg1, i32 3
- %4 = fcmp ult float %1, 0.000000e+00
- %5 = select i1 %4, float 1.000000e+00, float 0.000000e+00
- %6 = fsub float -0.000000e+00, %5
- %7 = fptosi float %6 to i32
- %8 = bitcast i32 %7 to float
- %9 = fcmp ult float %0, 5.700000e+01
- %10 = select i1 %9, float 1.000000e+00, float 0.000000e+00
- %11 = fsub float -0.000000e+00, %10
- %12 = fptosi float %11 to i32
- %13 = bitcast i32 %12 to float
- %14 = bitcast float %8 to i32
- %15 = bitcast float %13 to i32
- %16 = and i32 %14, %15
- %17 = bitcast i32 %16 to float
- %18 = bitcast float %17 to i32
- %19 = icmp ne i32 %18, 0
- %20 = fcmp ult float %0, 0.000000e+00
- %21 = select i1 %20, float 1.000000e+00, float 0.000000e+00
- %22 = fsub float -0.000000e+00, %21
- %23 = fptosi float %22 to i32
- %24 = bitcast i32 %23 to float
- %25 = bitcast float %24 to i32
- %26 = icmp ne i32 %25, 0
- br i1 %19, label %IF, label %ELSE
-
-IF: ; preds = %main_body
- %. = select i1 %26, float 0.000000e+00, float 1.000000e+00
- %.18 = select i1 %26, float 1.000000e+00, float 0.000000e+00
- br label %ENDIF
-
-ELSE: ; preds = %main_body
- br i1 %26, label %ENDIF, label %ELSE17
-
-ENDIF: ; preds = %ELSE17, %ELSE, %IF
- %temp1.0 = phi float [ %., %IF ], [ %48, %ELSE17 ], [ 0.000000e+00, %ELSE ]
- %temp2.0 = phi float [ 0.000000e+00, %IF ], [ %49, %ELSE17 ], [ 1.000000e+00, %ELSE ]
- %temp.0 = phi float [ %.18, %IF ], [ %47, %ELSE17 ], [ 0.000000e+00, %ELSE ]
- %27 = call float @llvm.AMDIL.clamp.(float %temp.0, float 0.000000e+00, float 1.000000e+00)
- %28 = call float @llvm.AMDIL.clamp.(float %temp1.0, float 0.000000e+00, float 1.000000e+00)
- %29 = call float @llvm.AMDIL.clamp.(float %temp2.0, float 0.000000e+00, float 1.000000e+00)
- %30 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00)
- %31 = insertelement <4 x float> undef, float %27, i32 0
- %32 = insertelement <4 x float> %31, float %28, i32 1
- %33 = insertelement <4 x float> %32, float %29, i32 2
- %34 = insertelement <4 x float> %33, float %30, i32 3
- call void @llvm.R600.store.swizzle(<4 x float> %34, i32 0, i32 0)
- ret void
-
-ELSE17: ; preds = %ELSE
- %35 = fadd float 0.000000e+00, 0x3FC99999A0000000
- %36 = fadd float 0.000000e+00, 0x3FC99999A0000000
- %37 = fadd float 0.000000e+00, 0x3FC99999A0000000
- %38 = fadd float %35, 0x3FC99999A0000000
- %39 = fadd float %36, 0x3FC99999A0000000
- %40 = fadd float %37, 0x3FC99999A0000000
- %41 = fadd float %38, 0x3FC99999A0000000
- %42 = fadd float %39, 0x3FC99999A0000000
- %43 = fadd float %40, 0x3FC99999A0000000
- %44 = fadd float %41, 0x3FC99999A0000000
- %45 = fadd float %42, 0x3FC99999A0000000
- %46 = fadd float %43, 0x3FC99999A0000000
- %47 = fadd float %44, 0x3FC99999A0000000
- %48 = fadd float %45, 0x3FC99999A0000000
- %49 = fadd float %46, 0x3FC99999A0000000
- br label %ENDIF
-}
-
-declare float @llvm.AMDIL.clamp.(float, float, float) #0
-
-declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
-
-attributes #0 = { readnone }
-attributes #1 = { "ShaderType"="1" }
Removed: llvm/trunk/test/CodeGen/R600/schedule-fs-loop-nested.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/schedule-fs-loop-nested.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/schedule-fs-loop-nested.ll (original)
+++ llvm/trunk/test/CodeGen/R600/schedule-fs-loop-nested.ll (removed)
@@ -1,88 +0,0 @@
-;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched -verify-machineinstrs
-;REQUIRES: asserts
-
-define void @main() {
-main_body:
- %0 = load <4 x float>, <4 x float> addrspace(9)* null
- %1 = extractelement <4 x float> %0, i32 3
- %2 = fptosi float %1 to i32
- %3 = bitcast i32 %2 to float
- %4 = bitcast float %3 to i32
- %5 = sdiv i32 %4, 4
- %6 = bitcast i32 %5 to float
- %7 = bitcast float %6 to i32
- %8 = mul i32 %7, 4
- %9 = bitcast i32 %8 to float
- %10 = bitcast float %9 to i32
- %11 = sub i32 0, %10
- %12 = bitcast i32 %11 to float
- %13 = bitcast float %3 to i32
- %14 = bitcast float %12 to i32
- %15 = add i32 %13, %14
- %16 = bitcast i32 %15 to float
- %17 = load <4 x float>, <4 x float> addrspace(9)* null
- %18 = extractelement <4 x float> %17, i32 0
- %19 = load <4 x float>, <4 x float> addrspace(9)* null
- %20 = extractelement <4 x float> %19, i32 1
- %21 = load <4 x float>, <4 x float> addrspace(9)* null
- %22 = extractelement <4 x float> %21, i32 2
- br label %LOOP
-
-LOOP: ; preds = %IF31, %main_body
- %temp12.0 = phi float [ 0.000000e+00, %main_body ], [ %47, %IF31 ]
- %temp6.0 = phi float [ %22, %main_body ], [ %temp6.1, %IF31 ]
- %temp5.0 = phi float [ %20, %main_body ], [ %temp5.1, %IF31 ]
- %temp4.0 = phi float [ %18, %main_body ], [ %temp4.1, %IF31 ]
- %23 = bitcast float %temp12.0 to i32
- %24 = bitcast float %6 to i32
- %25 = icmp sge i32 %23, %24
- %26 = sext i1 %25 to i32
- %27 = bitcast i32 %26 to float
- %28 = bitcast float %27 to i32
- %29 = icmp ne i32 %28, 0
- br i1 %29, label %IF, label %LOOP29
-
-IF: ; preds = %LOOP
- %30 = call float @llvm.AMDIL.clamp.(float %temp4.0, float 0.000000e+00, float 1.000000e+00)
- %31 = call float @llvm.AMDIL.clamp.(float %temp5.0, float 0.000000e+00, float 1.000000e+00)
- %32 = call float @llvm.AMDIL.clamp.(float %temp6.0, float 0.000000e+00, float 1.000000e+00)
- %33 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00)
- %34 = insertelement <4 x float> undef, float %30, i32 0
- %35 = insertelement <4 x float> %34, float %31, i32 1
- %36 = insertelement <4 x float> %35, float %32, i32 2
- %37 = insertelement <4 x float> %36, float %33, i32 3
- call void @llvm.R600.store.swizzle(<4 x float> %37, i32 0, i32 0)
- ret void
-
-LOOP29: ; preds = %LOOP, %ENDIF30
- %temp6.1 = phi float [ %temp4.1, %ENDIF30 ], [ %temp6.0, %LOOP ]
- %temp5.1 = phi float [ %temp6.1, %ENDIF30 ], [ %temp5.0, %LOOP ]
- %temp4.1 = phi float [ %temp5.1, %ENDIF30 ], [ %temp4.0, %LOOP ]
- %temp20.0 = phi float [ %50, %ENDIF30 ], [ 0.000000e+00, %LOOP ]
- %38 = bitcast float %temp20.0 to i32
- %39 = bitcast float %16 to i32
- %40 = icmp sge i32 %38, %39
- %41 = sext i1 %40 to i32
- %42 = bitcast i32 %41 to float
- %43 = bitcast float %42 to i32
- %44 = icmp ne i32 %43, 0
- br i1 %44, label %IF31, label %ENDIF30
-
-IF31: ; preds = %LOOP29
- %45 = bitcast float %temp12.0 to i32
- %46 = add i32 %45, 1
- %47 = bitcast i32 %46 to float
- br label %LOOP
-
-ENDIF30: ; preds = %LOOP29
- %48 = bitcast float %temp20.0 to i32
- %49 = add i32 %48, 1
- %50 = bitcast i32 %49 to float
- br label %LOOP29
-}
-
-declare float @llvm.AMDIL.clamp.(float, float, float) #0
-
-declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
-
-attributes #0 = { readnone }
Removed: llvm/trunk/test/CodeGen/R600/schedule-fs-loop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/schedule-fs-loop.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/schedule-fs-loop.ll (original)
+++ llvm/trunk/test/CodeGen/R600/schedule-fs-loop.ll (removed)
@@ -1,55 +0,0 @@
-;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched -verify-machineinstrs
-;REQUIRES: asserts
-
-define void @main() {
-main_body:
- %0 = load <4 x float>, <4 x float> addrspace(9)* null
- %1 = extractelement <4 x float> %0, i32 3
- %2 = fptosi float %1 to i32
- %3 = bitcast i32 %2 to float
- %4 = load <4 x float>, <4 x float> addrspace(9)* null
- %5 = extractelement <4 x float> %4, i32 0
- %6 = load <4 x float>, <4 x float> addrspace(9)* null
- %7 = extractelement <4 x float> %6, i32 1
- %8 = load <4 x float>, <4 x float> addrspace(9)* null
- %9 = extractelement <4 x float> %8, i32 2
- br label %LOOP
-
-LOOP: ; preds = %ENDIF, %main_body
- %temp4.0 = phi float [ %5, %main_body ], [ %temp5.0, %ENDIF ]
- %temp5.0 = phi float [ %7, %main_body ], [ %temp6.0, %ENDIF ]
- %temp6.0 = phi float [ %9, %main_body ], [ %temp4.0, %ENDIF ]
- %temp8.0 = phi float [ 0.000000e+00, %main_body ], [ %27, %ENDIF ]
- %10 = bitcast float %temp8.0 to i32
- %11 = bitcast float %3 to i32
- %12 = icmp sge i32 %10, %11
- %13 = sext i1 %12 to i32
- %14 = bitcast i32 %13 to float
- %15 = bitcast float %14 to i32
- %16 = icmp ne i32 %15, 0
- br i1 %16, label %IF, label %ENDIF
-
-IF: ; preds = %LOOP
- %17 = call float @llvm.AMDIL.clamp.(float %temp4.0, float 0.000000e+00, float 1.000000e+00)
- %18 = call float @llvm.AMDIL.clamp.(float %temp5.0, float 0.000000e+00, float 1.000000e+00)
- %19 = call float @llvm.AMDIL.clamp.(float %temp6.0, float 0.000000e+00, float 1.000000e+00)
- %20 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00)
- %21 = insertelement <4 x float> undef, float %17, i32 0
- %22 = insertelement <4 x float> %21, float %18, i32 1
- %23 = insertelement <4 x float> %22, float %19, i32 2
- %24 = insertelement <4 x float> %23, float %20, i32 3
- call void @llvm.R600.store.swizzle(<4 x float> %24, i32 0, i32 0)
- ret void
-
-ENDIF: ; preds = %LOOP
- %25 = bitcast float %temp8.0 to i32
- %26 = add i32 %25, 1
- %27 = bitcast i32 %26 to float
- br label %LOOP
-}
-
-declare float @llvm.AMDIL.clamp.(float, float, float) #0
-
-declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
-
-attributes #0 = { readnone }
Removed: llvm/trunk/test/CodeGen/R600/schedule-global-loads.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/schedule-global-loads.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/schedule-global-loads.ll (original)
+++ llvm/trunk/test/CodeGen/R600/schedule-global-loads.ll (removed)
@@ -1,41 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=SI %s
-
-
-declare i32 @llvm.r600.read.tidig.x() #1
-
-; FIXME: This currently doesn't do a great job of clustering the
-; loads, which end up with extra moves between them. Right now, it
-; seems the only things areLoadsFromSameBasePtr is accomplishing is
-; ordering the loads so that the lower address loads come first.
-
-; FUNC-LABEL: {{^}}cluster_global_arg_loads:
-; SI-DAG: buffer_load_dword [[REG0:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
-; SI-DAG: buffer_load_dword [[REG1:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:4
-; SI: buffer_store_dword [[REG0]]
-; SI: buffer_store_dword [[REG1]]
-define void @cluster_global_arg_loads(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(1)* %ptr) #0 {
- %load0 = load i32, i32 addrspace(1)* %ptr, align 4
- %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 1
- %load1 = load i32, i32 addrspace(1)* %gep, align 4
- store i32 %load0, i32 addrspace(1)* %out0, align 4
- store i32 %load1, i32 addrspace(1)* %out1, align 4
- ret void
-}
-
-; Test for a crach in SIInstrInfo::areLoadsFromSameBasePtr() when checking
-; an MUBUF load which does not have a vaddr operand.
-; FUNC-LABEL: {{^}}same_base_ptr_crash:
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-define void @same_base_ptr_crash(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %offset) {
-entry:
- %out1 = getelementptr i32, i32 addrspace(1)* %out, i32 %offset
- %tmp0 = load i32, i32 addrspace(1)* %out
- %tmp1 = load i32, i32 addrspace(1)* %out1
- %tmp2 = add i32 %tmp0, %tmp1
- store i32 %tmp2, i32 addrspace(1)* %out
- ret void
-}
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
Removed: llvm/trunk/test/CodeGen/R600/schedule-if-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/schedule-if-2.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/schedule-if-2.ll (original)
+++ llvm/trunk/test/CodeGen/R600/schedule-if-2.ll (removed)
@@ -1,94 +0,0 @@
-;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched -verify-machineinstrs
-;REQUIRES: asserts
-
-define void @main() {
-main_body:
- %0 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
- %1 = extractelement <4 x float> %0, i32 0
- %2 = fadd float 1.000000e+03, %1
- %3 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
- %4 = extractelement <4 x float> %3, i32 0
- %5 = bitcast float %4 to i32
- %6 = icmp eq i32 %5, 0
- %7 = sext i1 %6 to i32
- %8 = bitcast i32 %7 to float
- %9 = bitcast float %8 to i32
- %10 = icmp ne i32 %9, 0
- br i1 %10, label %IF, label %ELSE
-
-IF: ; preds = %main_body
- %11 = call float @fabs(float %2)
- %12 = fcmp ueq float %11, 0x7FF0000000000000
- %13 = select i1 %12, float 1.000000e+00, float 0.000000e+00
- %14 = fsub float -0.000000e+00, %13
- %15 = fptosi float %14 to i32
- %16 = bitcast i32 %15 to float
- %17 = bitcast float %16 to i32
- %18 = icmp ne i32 %17, 0
- %. = select i1 %18, float 0x36A0000000000000, float 0.000000e+00
- %19 = fcmp une float %2, %2
- %20 = select i1 %19, float 1.000000e+00, float 0.000000e+00
- %21 = fsub float -0.000000e+00, %20
- %22 = fptosi float %21 to i32
- %23 = bitcast i32 %22 to float
- %24 = bitcast float %23 to i32
- %25 = icmp ne i32 %24, 0
- %temp8.0 = select i1 %25, float 0x36A0000000000000, float 0.000000e+00
- %26 = bitcast float %. to i32
- %27 = sitofp i32 %26 to float
- %28 = bitcast float %temp8.0 to i32
- %29 = sitofp i32 %28 to float
- %30 = fcmp ugt float %2, 0.000000e+00
- %31 = select i1 %30, float 1.000000e+00, float %2
- %32 = fcmp uge float %31, 0.000000e+00
- %33 = select i1 %32, float %31, float -1.000000e+00
- %34 = fadd float %33, 1.000000e+00
- %35 = fmul float %34, 5.000000e-01
- br label %ENDIF
-
-ELSE: ; preds = %main_body
- %36 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
- %37 = extractelement <4 x float> %36, i32 0
- %38 = bitcast float %37 to i32
- %39 = icmp eq i32 %38, 1
- %40 = sext i1 %39 to i32
- %41 = bitcast i32 %40 to float
- %42 = bitcast float %41 to i32
- %43 = icmp ne i32 %42, 0
- br i1 %43, label %IF23, label %ENDIF
-
-ENDIF: ; preds = %IF23, %ELSE, %IF
- %temp4.0 = phi float [ %2, %IF ], [ %56, %IF23 ], [ 0.000000e+00, %ELSE ]
- %temp5.0 = phi float [ %27, %IF ], [ %60, %IF23 ], [ 0.000000e+00, %ELSE ]
- %temp6.0 = phi float [ %29, %IF ], [ 0.000000e+00, %ELSE ], [ 0.000000e+00, %IF23 ]
- %temp7.0 = phi float [ %35, %IF ], [ 0.000000e+00, %ELSE ], [ 0.000000e+00, %IF23 ]
- %44 = insertelement <4 x float> undef, float %temp4.0, i32 0
- %45 = insertelement <4 x float> %44, float %temp5.0, i32 1
- %46 = insertelement <4 x float> %45, float %temp6.0, i32 2
- %47 = insertelement <4 x float> %46, float %temp7.0, i32 3
- call void @llvm.R600.store.swizzle(<4 x float> %47, i32 0, i32 0)
- ret void
-
-IF23: ; preds = %ELSE
- %48 = fcmp ult float 0.000000e+00, %2
- %49 = select i1 %48, float 1.000000e+00, float 0.000000e+00
- %50 = fsub float -0.000000e+00, %49
- %51 = fptosi float %50 to i32
- %52 = bitcast i32 %51 to float
- %53 = bitcast float %52 to i32
- %54 = icmp ne i32 %53, 0
- %.28 = select i1 %54, float 0x36A0000000000000, float 0.000000e+00
- %55 = bitcast float %.28 to i32
- %56 = sitofp i32 %55 to float
- %57 = load <4 x float>, <4 x float> addrspace(8)* null
- %58 = extractelement <4 x float> %57, i32 0
- %59 = fsub float -0.000000e+00, %58
- %60 = fadd float %2, %59
- br label %ENDIF
-}
-
-declare float @fabs(float) #0
-
-declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
-
-attributes #0 = { readonly }
Removed: llvm/trunk/test/CodeGen/R600/schedule-if.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/schedule-if.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/schedule-if.ll (original)
+++ llvm/trunk/test/CodeGen/R600/schedule-if.ll (removed)
@@ -1,46 +0,0 @@
-;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched -verify-machineinstrs
-;REQUIRES: asserts
-
-define void @main() {
-main_body:
- %0 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
- %1 = extractelement <4 x float> %0, i32 0
- %2 = bitcast float %1 to i32
- %3 = icmp eq i32 %2, 0
- %4 = sext i1 %3 to i32
- %5 = bitcast i32 %4 to float
- %6 = bitcast float %5 to i32
- %7 = icmp ne i32 %6, 0
- br i1 %7, label %ENDIF, label %ELSE
-
-ELSE: ; preds = %main_body
- %8 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
- %9 = extractelement <4 x float> %8, i32 0
- %10 = bitcast float %9 to i32
- %11 = icmp eq i32 %10, 1
- %12 = sext i1 %11 to i32
- %13 = bitcast i32 %12 to float
- %14 = bitcast float %13 to i32
- %15 = icmp ne i32 %14, 0
- br i1 %15, label %IF13, label %ENDIF
-
-ENDIF: ; preds = %IF13, %ELSE, %main_body
- %temp.0 = phi float [ 1.000000e+03, %main_body ], [ 1.000000e+00, %IF13 ], [ 0.000000e+00, %ELSE ]
- %temp1.0 = phi float [ 0.000000e+00, %main_body ], [ %23, %IF13 ], [ 0.000000e+00, %ELSE ]
- %temp3.0 = phi float [ 1.000000e+00, %main_body ], [ 0.000000e+00, %ELSE ], [ 0.000000e+00, %IF13 ]
- %16 = insertelement <4 x float> undef, float %temp.0, i32 0
- %17 = insertelement <4 x float> %16, float %temp1.0, i32 1
- %18 = insertelement <4 x float> %17, float 0.000000e+00, i32 2
- %19 = insertelement <4 x float> %18, float %temp3.0, i32 3
- call void @llvm.R600.store.swizzle(<4 x float> %19, i32 0, i32 0)
- ret void
-
-IF13: ; preds = %ELSE
- %20 = load <4 x float>, <4 x float> addrspace(8)* null
- %21 = extractelement <4 x float> %20, i32 0
- %22 = fsub float -0.000000e+00, %21
- %23 = fadd float 1.000000e+03, %22
- br label %ENDIF
-}
-
-declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
Removed: llvm/trunk/test/CodeGen/R600/schedule-kernel-arg-loads.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/schedule-kernel-arg-loads.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/schedule-kernel-arg-loads.ll (original)
+++ llvm/trunk/test/CodeGen/R600/schedule-kernel-arg-loads.ll (removed)
@@ -1,51 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=SI --check-prefix=GCN %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=VI --check-prefix=GCN %s
-
-; FUNC-LABEL: {{^}}cluster_arg_loads:
-; SI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x9
-; SI-NEXT: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; SI-NEXT: s_load_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd
-; SI-NEXT: s_load_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0xe
-; VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x24
-; VI-NEXT: s_nop 0
-; VI-NEXT: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x2c
-; VI-NEXT: s_nop 0
-; VI-NEXT: s_load_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0x34
-; VI-NEXT: s_nop 0
-; VI-NEXT: s_load_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0x38
-define void @cluster_arg_loads(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 %x, i32 %y) nounwind {
- store i32 %x, i32 addrspace(1)* %out0, align 4
- store i32 %y, i32 addrspace(1)* %out1, align 4
- ret void
-}
-
-; Test for a crash in SIInstrInfo::areLoadsFromSameBasePtr() when
-; s_load_dwordx2 has a register offset
-
-; FUNC-LABEL: @same_base_ptr_crash
-; GCN: s_load_dwordx2
-; GCN: s_load_dwordx2
-; GCN: s_load_dwordx2
-; GCN: s_endpgm
-define void @same_base_ptr_crash(i64 addrspace(1)* %out,
- i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %arg5, i64 %arg6, i64 %arg7,
- i64 %arg8, i64 %arg9, i64 %arg10, i64 %arg11, i64 %arg12, i64 %arg13, i64 %arg14, i64 %arg15,
- i64 %arg16, i64 %arg17, i64 %arg18, i64 %arg19, i64 %arg20, i64 %arg21, i64 %arg22, i64 %arg23,
- i64 %arg24, i64 %arg25, i64 %arg26, i64 %arg27, i64 %arg28, i64 %arg29, i64 %arg30, i64 %arg31,
- i64 %arg32, i64 %arg33, i64 %arg34, i64 %arg35, i64 %arg36, i64 %arg37, i64 %arg38, i64 %arg39,
- i64 %arg40, i64 %arg41, i64 %arg42, i64 %arg43, i64 %arg44, i64 %arg45, i64 %arg46, i64 %arg47,
- i64 %arg48, i64 %arg49, i64 %arg50, i64 %arg51, i64 %arg52, i64 %arg53, i64 %arg54, i64 %arg55,
- i64 %arg56, i64 %arg57, i64 %arg58, i64 %arg59, i64 %arg60, i64 %arg61, i64 %arg62, i64 %arg63,
- i64 %arg64, i64 %arg65, i64 %arg66, i64 %arg67, i64 %arg68, i64 %arg69, i64 %arg70, i64 %arg71,
- i64 %arg72, i64 %arg73, i64 %arg74, i64 %arg75, i64 %arg76, i64 %arg77, i64 %arg78, i64 %arg79,
- i64 %arg80, i64 %arg81, i64 %arg82, i64 %arg83, i64 %arg84, i64 %arg85, i64 %arg86, i64 %arg87,
- i64 %arg88, i64 %arg89, i64 %arg90, i64 %arg91, i64 %arg92, i64 %arg93, i64 %arg94, i64 %arg95,
- i64 %arg96, i64 %arg97, i64 %arg98, i64 %arg99, i64 %arg100, i64 %arg101, i64 %arg102, i64 %arg103,
- i64 %arg104, i64 %arg105, i64 %arg106, i64 %arg107, i64 %arg108, i64 %arg109, i64 %arg110, i64 %arg111,
- i64 %arg112, i64 %arg113, i64 %arg114, i64 %arg115, i64 %arg116, i64 %arg117, i64 %arg118, i64 %arg119,
- i64 %arg120, i64 %arg121, i64 %arg122, i64 %arg123, i64 %arg124, i64 %arg125, i64 %arg126) {
-entry:
- %value = add i64 %arg125, %arg126
- store i64 %value, i64 addrspace(1)* %out, align 8
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/schedule-vs-if-nested-loop-failure.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/schedule-vs-if-nested-loop-failure.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/schedule-vs-if-nested-loop-failure.ll (original)
+++ llvm/trunk/test/CodeGen/R600/schedule-vs-if-nested-loop-failure.ll (removed)
@@ -1,163 +0,0 @@
-; XFAIL: *
-; REQUIRES: asserts
-; RUN: llc -O0 -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck %s -check-prefix=SI
-; RUN: llc -O0 -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck %s -check-prefix=SI
-
-declare void @llvm.AMDGPU.barrier.local() nounwind noduplicate
-
-
-; SI-LABEL: {{^}}main(
-define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 {
-main_body:
- %0 = extractelement <4 x float> %reg1, i32 0
- %1 = extractelement <4 x float> %reg1, i32 2
- %2 = fcmp ult float %0, 0.000000e+00
- %3 = select i1 %2, float 1.000000e+00, float 0.000000e+00
- %4 = fsub float -0.000000e+00, %3
- %5 = fptosi float %4 to i32
- %6 = bitcast i32 %5 to float
- %7 = bitcast float %6 to i32
- %8 = icmp ne i32 %7, 0
- br i1 %8, label %LOOP, label %ENDIF
-
-Flow1: ; preds = %ENDIF19, %ENDIF16
- %9 = phi float [ %115, %ENDIF19 ], [ undef, %ENDIF16 ]
- %10 = phi float [ %114, %ENDIF19 ], [ undef, %ENDIF16 ]
- %11 = phi float [ %113, %ENDIF19 ], [ undef, %ENDIF16 ]
- %12 = phi float [ %112, %ENDIF19 ], [ undef, %ENDIF16 ]
- %13 = phi float [ %111, %ENDIF19 ], [ undef, %ENDIF16 ]
- %14 = phi i1 [ false, %ENDIF19 ], [ true, %ENDIF16 ]
- br label %Flow
-
-Flow2: ; preds = %Flow
- br label %ENDIF
-
-ENDIF: ; preds = %main_body, %Flow2
- %temp.0 = phi float [ 0.000000e+00, %main_body ], [ %104, %Flow2 ]
- %temp1.0 = phi float [ 1.000000e+00, %main_body ], [ %103, %Flow2 ]
- %temp2.0 = phi float [ 0.000000e+00, %main_body ], [ %102, %Flow2 ]
- %temp3.0 = phi float [ 0.000000e+00, %main_body ], [ %101, %Flow2 ]
- %15 = extractelement <4 x float> %reg1, i32 1
- %16 = extractelement <4 x float> %reg1, i32 3
- %17 = load <4 x float>, <4 x float> addrspace(9)* null
- %18 = extractelement <4 x float> %17, i32 0
- %19 = fmul float %18, %0
- %20 = load <4 x float>, <4 x float> addrspace(9)* null
- %21 = extractelement <4 x float> %20, i32 1
- %22 = fmul float %21, %0
- %23 = load <4 x float>, <4 x float> addrspace(9)* null
- %24 = extractelement <4 x float> %23, i32 2
- %25 = fmul float %24, %0
- %26 = load <4 x float>, <4 x float> addrspace(9)* null
- %27 = extractelement <4 x float> %26, i32 3
- %28 = fmul float %27, %0
- %29 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
- %30 = extractelement <4 x float> %29, i32 0
- %31 = fmul float %30, %15
- %32 = fadd float %31, %19
- %33 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
- %34 = extractelement <4 x float> %33, i32 1
- %35 = fmul float %34, %15
- %36 = fadd float %35, %22
- %37 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
- %38 = extractelement <4 x float> %37, i32 2
- %39 = fmul float %38, %15
- %40 = fadd float %39, %25
- %41 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
- %42 = extractelement <4 x float> %41, i32 3
- %43 = fmul float %42, %15
- %44 = fadd float %43, %28
- %45 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
- %46 = extractelement <4 x float> %45, i32 0
- %47 = fmul float %46, %1
- %48 = fadd float %47, %32
- %49 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
- %50 = extractelement <4 x float> %49, i32 1
- %51 = fmul float %50, %1
- %52 = fadd float %51, %36
- %53 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
- %54 = extractelement <4 x float> %53, i32 2
- %55 = fmul float %54, %1
- %56 = fadd float %55, %40
- %57 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
- %58 = extractelement <4 x float> %57, i32 3
- %59 = fmul float %58, %1
- %60 = fadd float %59, %44
- %61 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
- %62 = extractelement <4 x float> %61, i32 0
- %63 = fmul float %62, %16
- %64 = fadd float %63, %48
- %65 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
- %66 = extractelement <4 x float> %65, i32 1
- %67 = fmul float %66, %16
- %68 = fadd float %67, %52
- %69 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
- %70 = extractelement <4 x float> %69, i32 2
- %71 = fmul float %70, %16
- %72 = fadd float %71, %56
- %73 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
- %74 = extractelement <4 x float> %73, i32 3
- %75 = fmul float %74, %16
- %76 = fadd float %75, %60
- %77 = insertelement <4 x float> undef, float %64, i32 0
- %78 = insertelement <4 x float> %77, float %68, i32 1
- %79 = insertelement <4 x float> %78, float %72, i32 2
- %80 = insertelement <4 x float> %79, float %76, i32 3
- call void @llvm.AMDGPU.barrier.local()
- %81 = insertelement <4 x float> undef, float %temp.0, i32 0
- %82 = insertelement <4 x float> %81, float %temp1.0, i32 1
- %83 = insertelement <4 x float> %82, float %temp2.0, i32 2
- %84 = insertelement <4 x float> %83, float %temp3.0, i32 3
- call void @llvm.AMDGPU.barrier.local()
- ret void
-
-LOOP: ; preds = %main_body, %Flow
- %temp.1 = phi float [ %109, %Flow ], [ 0.000000e+00, %main_body ]
- %temp1.1 = phi float [ %108, %Flow ], [ 1.000000e+00, %main_body ]
- %temp2.1 = phi float [ %107, %Flow ], [ 0.000000e+00, %main_body ]
- %temp3.1 = phi float [ %106, %Flow ], [ 0.000000e+00, %main_body ]
- %temp4.0 = phi float [ %105, %Flow ], [ -2.000000e+00, %main_body ]
- %85 = fcmp uge float %temp4.0, %0
- %86 = select i1 %85, float 1.000000e+00, float 0.000000e+00
- %87 = fsub float -0.000000e+00, %86
- %88 = fptosi float %87 to i32
- %89 = bitcast i32 %88 to float
- %90 = bitcast float %89 to i32
- %91 = icmp ne i32 %90, 0
- %92 = xor i1 %91, true
- br i1 %92, label %ENDIF16, label %Flow
-
-ENDIF16: ; preds = %LOOP
- %93 = fcmp une float %1, %temp4.0
- %94 = select i1 %93, float 1.000000e+00, float 0.000000e+00
- %95 = fsub float -0.000000e+00, %94
- %96 = fptosi float %95 to i32
- %97 = bitcast i32 %96 to float
- %98 = bitcast float %97 to i32
- %99 = icmp ne i32 %98, 0
- %100 = xor i1 %99, true
- br i1 %100, label %ENDIF19, label %Flow1
-
-Flow: ; preds = %Flow1, %LOOP
- %101 = phi float [ %temp3.1, %Flow1 ], [ %temp3.1, %LOOP ]
- %102 = phi float [ %temp2.1, %Flow1 ], [ %temp2.1, %LOOP ]
- %103 = phi float [ %temp1.1, %Flow1 ], [ %temp1.1, %LOOP ]
- %104 = phi float [ %temp.1, %Flow1 ], [ %temp.1, %LOOP ]
- %105 = phi float [ %9, %Flow1 ], [ undef, %LOOP ]
- %106 = phi float [ %10, %Flow1 ], [ undef, %LOOP ]
- %107 = phi float [ %11, %Flow1 ], [ undef, %LOOP ]
- %108 = phi float [ %12, %Flow1 ], [ undef, %LOOP ]
- %109 = phi float [ %13, %Flow1 ], [ undef, %LOOP ]
- %110 = phi i1 [ %14, %Flow1 ], [ true, %LOOP ]
- br i1 %110, label %Flow2, label %LOOP
-
-ENDIF19: ; preds = %ENDIF16
- %111 = fadd float %temp.1, 1.000000e+00
- %112 = fadd float %temp1.1, 0.000000e+00
- %113 = fadd float %temp2.1, 0.000000e+00
- %114 = fadd float %temp3.1, 0.000000e+00
- %115 = fadd float %temp4.0, 1.000000e+00
- br label %Flow1
-}
-
-attributes #0 = { "ShaderType"="1" }
Removed: llvm/trunk/test/CodeGen/R600/schedule-vs-if-nested-loop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/schedule-vs-if-nested-loop.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/schedule-vs-if-nested-loop.ll (original)
+++ llvm/trunk/test/CodeGen/R600/schedule-vs-if-nested-loop.ll (removed)
@@ -1,132 +0,0 @@
-;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched
-;REQUIRES: asserts
-
-define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 {
-main_body:
- %0 = extractelement <4 x float> %reg1, i32 0
- %1 = extractelement <4 x float> %reg1, i32 1
- %2 = extractelement <4 x float> %reg1, i32 2
- %3 = extractelement <4 x float> %reg1, i32 3
- %4 = fcmp ult float %0, 0.000000e+00
- %5 = select i1 %4, float 1.000000e+00, float 0.000000e+00
- %6 = fsub float -0.000000e+00, %5
- %7 = fptosi float %6 to i32
- %8 = bitcast i32 %7 to float
- %9 = bitcast float %8 to i32
- %10 = icmp ne i32 %9, 0
- br i1 %10, label %LOOP, label %ENDIF
-
-ENDIF: ; preds = %ENDIF16, %LOOP, %main_body
- %temp.0 = phi float [ 0.000000e+00, %main_body ], [ %temp.1, %LOOP ], [ %temp.1, %ENDIF16 ]
- %temp1.0 = phi float [ 1.000000e+00, %main_body ], [ %temp1.1, %LOOP ], [ %temp1.1, %ENDIF16 ]
- %temp2.0 = phi float [ 0.000000e+00, %main_body ], [ %temp2.1, %LOOP ], [ %temp2.1, %ENDIF16 ]
- %temp3.0 = phi float [ 0.000000e+00, %main_body ], [ %temp3.1, %LOOP ], [ %temp3.1, %ENDIF16 ]
- %11 = load <4 x float>, <4 x float> addrspace(9)* null
- %12 = extractelement <4 x float> %11, i32 0
- %13 = fmul float %12, %0
- %14 = load <4 x float>, <4 x float> addrspace(9)* null
- %15 = extractelement <4 x float> %14, i32 1
- %16 = fmul float %15, %0
- %17 = load <4 x float>, <4 x float> addrspace(9)* null
- %18 = extractelement <4 x float> %17, i32 2
- %19 = fmul float %18, %0
- %20 = load <4 x float>, <4 x float> addrspace(9)* null
- %21 = extractelement <4 x float> %20, i32 3
- %22 = fmul float %21, %0
- %23 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
- %24 = extractelement <4 x float> %23, i32 0
- %25 = fmul float %24, %1
- %26 = fadd float %25, %13
- %27 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
- %28 = extractelement <4 x float> %27, i32 1
- %29 = fmul float %28, %1
- %30 = fadd float %29, %16
- %31 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
- %32 = extractelement <4 x float> %31, i32 2
- %33 = fmul float %32, %1
- %34 = fadd float %33, %19
- %35 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
- %36 = extractelement <4 x float> %35, i32 3
- %37 = fmul float %36, %1
- %38 = fadd float %37, %22
- %39 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
- %40 = extractelement <4 x float> %39, i32 0
- %41 = fmul float %40, %2
- %42 = fadd float %41, %26
- %43 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
- %44 = extractelement <4 x float> %43, i32 1
- %45 = fmul float %44, %2
- %46 = fadd float %45, %30
- %47 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
- %48 = extractelement <4 x float> %47, i32 2
- %49 = fmul float %48, %2
- %50 = fadd float %49, %34
- %51 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
- %52 = extractelement <4 x float> %51, i32 3
- %53 = fmul float %52, %2
- %54 = fadd float %53, %38
- %55 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
- %56 = extractelement <4 x float> %55, i32 0
- %57 = fmul float %56, %3
- %58 = fadd float %57, %42
- %59 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
- %60 = extractelement <4 x float> %59, i32 1
- %61 = fmul float %60, %3
- %62 = fadd float %61, %46
- %63 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
- %64 = extractelement <4 x float> %63, i32 2
- %65 = fmul float %64, %3
- %66 = fadd float %65, %50
- %67 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
- %68 = extractelement <4 x float> %67, i32 3
- %69 = fmul float %68, %3
- %70 = fadd float %69, %54
- %71 = insertelement <4 x float> undef, float %58, i32 0
- %72 = insertelement <4 x float> %71, float %62, i32 1
- %73 = insertelement <4 x float> %72, float %66, i32 2
- %74 = insertelement <4 x float> %73, float %70, i32 3
- call void @llvm.R600.store.swizzle(<4 x float> %74, i32 60, i32 1)
- %75 = insertelement <4 x float> undef, float %temp.0, i32 0
- %76 = insertelement <4 x float> %75, float %temp1.0, i32 1
- %77 = insertelement <4 x float> %76, float %temp2.0, i32 2
- %78 = insertelement <4 x float> %77, float %temp3.0, i32 3
- call void @llvm.R600.store.swizzle(<4 x float> %78, i32 0, i32 2)
- ret void
-
-LOOP: ; preds = %main_body, %ENDIF19
- %temp.1 = phi float [ %93, %ENDIF19 ], [ 0.000000e+00, %main_body ]
- %temp1.1 = phi float [ %94, %ENDIF19 ], [ 1.000000e+00, %main_body ]
- %temp2.1 = phi float [ %95, %ENDIF19 ], [ 0.000000e+00, %main_body ]
- %temp3.1 = phi float [ %96, %ENDIF19 ], [ 0.000000e+00, %main_body ]
- %temp4.0 = phi float [ %97, %ENDIF19 ], [ -2.000000e+00, %main_body ]
- %79 = fcmp uge float %temp4.0, %0
- %80 = select i1 %79, float 1.000000e+00, float 0.000000e+00
- %81 = fsub float -0.000000e+00, %80
- %82 = fptosi float %81 to i32
- %83 = bitcast i32 %82 to float
- %84 = bitcast float %83 to i32
- %85 = icmp ne i32 %84, 0
- br i1 %85, label %ENDIF, label %ENDIF16
-
-ENDIF16: ; preds = %LOOP
- %86 = fcmp une float %2, %temp4.0
- %87 = select i1 %86, float 1.000000e+00, float 0.000000e+00
- %88 = fsub float -0.000000e+00, %87
- %89 = fptosi float %88 to i32
- %90 = bitcast i32 %89 to float
- %91 = bitcast float %90 to i32
- %92 = icmp ne i32 %91, 0
- br i1 %92, label %ENDIF, label %ENDIF19
-
-ENDIF19: ; preds = %ENDIF16
- %93 = fadd float %temp.1, 1.000000e+00
- %94 = fadd float %temp1.1, 0.000000e+00
- %95 = fadd float %temp2.1, 0.000000e+00
- %96 = fadd float %temp3.1, 0.000000e+00
- %97 = fadd float %temp4.0, 1.000000e+00
- br label %LOOP
-}
-
-declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
-
-attributes #0 = { "ShaderType"="1" }
Removed: llvm/trunk/test/CodeGen/R600/scratch-buffer.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/scratch-buffer.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/scratch-buffer.ll (original)
+++ llvm/trunk/test/CodeGen/R600/scratch-buffer.ll (removed)
@@ -1,87 +0,0 @@
-; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck %s
-; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck %s
-
-; When a frame index offset is more than 12-bits, make sure we don't store
-; it in mubuf's offset field.
-
-; Also, make sure we use the same register for storing the scratch buffer addresss
-; for both stores. This register is allocated by the register scavenger, so we
-; should be able to reuse the same regiser for each scratch buffer access.
-
-; CHECK-LABEL: {{^}}legal_offset_fi:
-; CHECK: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0{{$}}
-; CHECK: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen
-; CHECK: v_mov_b32_e32 [[OFFSET]], 0x8000
-; CHECK: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen{{$}}
-
-define void @legal_offset_fi(i32 addrspace(1)* %out, i32 %cond, i32 %if_offset, i32 %else_offset) {
-entry:
- %scratch0 = alloca [8192 x i32]
- %scratch1 = alloca [8192 x i32]
-
- %scratchptr0 = getelementptr [8192 x i32], [8192 x i32]* %scratch0, i32 0, i32 0
- store i32 1, i32* %scratchptr0
-
- %scratchptr1 = getelementptr [8192 x i32], [8192 x i32]* %scratch1, i32 0, i32 0
- store i32 2, i32* %scratchptr1
-
- %cmp = icmp eq i32 %cond, 0
- br i1 %cmp, label %if, label %else
-
-if:
- %if_ptr = getelementptr [8192 x i32], [8192 x i32]* %scratch0, i32 0, i32 %if_offset
- %if_value = load i32, i32* %if_ptr
- br label %done
-
-else:
- %else_ptr = getelementptr [8192 x i32], [8192 x i32]* %scratch1, i32 0, i32 %else_offset
- %else_value = load i32, i32* %else_ptr
- br label %done
-
-done:
- %value = phi i32 [%if_value, %if], [%else_value, %else]
- store i32 %value, i32 addrspace(1)* %out
- ret void
-
- ret void
-
-}
-
-; CHECK-LABEL: {{^}}legal_offset_fi_offset
-; CHECK: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen
-; CHECK: v_add_i32_e32 [[OFFSET:v[0-9]+]], 0x8000
-; CHECK: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen{{$}}
-
-define void @legal_offset_fi_offset(i32 addrspace(1)* %out, i32 %cond, i32 addrspace(1)* %offsets, i32 %if_offset, i32 %else_offset) {
-entry:
- %scratch0 = alloca [8192 x i32]
- %scratch1 = alloca [8192 x i32]
-
- %offset0 = load i32, i32 addrspace(1)* %offsets
- %scratchptr0 = getelementptr [8192 x i32], [8192 x i32]* %scratch0, i32 0, i32 %offset0
- store i32 %offset0, i32* %scratchptr0
-
- %offsetptr1 = getelementptr i32, i32 addrspace(1)* %offsets, i32 1
- %offset1 = load i32, i32 addrspace(1)* %offsetptr1
- %scratchptr1 = getelementptr [8192 x i32], [8192 x i32]* %scratch1, i32 0, i32 %offset1
- store i32 %offset1, i32* %scratchptr1
-
- %cmp = icmp eq i32 %cond, 0
- br i1 %cmp, label %if, label %else
-
-if:
- %if_ptr = getelementptr [8192 x i32], [8192 x i32]* %scratch0, i32 0, i32 %if_offset
- %if_value = load i32, i32* %if_ptr
- br label %done
-
-else:
- %else_ptr = getelementptr [8192 x i32], [8192 x i32]* %scratch1, i32 0, i32 %else_offset
- %else_value = load i32, i32* %else_ptr
- br label %done
-
-done:
- %value = phi i32 [%if_value, %if], [%else_value, %else]
- store i32 %value, i32 addrspace(1)* %out
- ret void
-}
-
Removed: llvm/trunk/test/CodeGen/R600/sdiv.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/sdiv.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/sdiv.ll (original)
+++ llvm/trunk/test/CodeGen/R600/sdiv.ll (removed)
@@ -1,104 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-
-; The code generated by sdiv is long and complex and may frequently change.
-; The goal of this test is to make sure the ISel doesn't fail.
-;
-; This program was previously failing to compile when one of the selectcc
-; opcodes generated by the sdiv lowering was being legalized and optimized to:
-; selectcc Remainder -1, 0, -1, SETGT
-; This was fixed by adding an additional pattern in R600Instructions.td to
-; match this pattern with a CNDGE_INT.
-
-; FUNC-LABEL: {{^}}sdiv_i32:
-; EG: CF_END
-define void @sdiv_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
- %num = load i32, i32 addrspace(1) * %in
- %den = load i32, i32 addrspace(1) * %den_ptr
- %result = sdiv i32 %num, %den
- store i32 %result, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}sdiv_i32_4:
-define void @sdiv_i32_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %num = load i32, i32 addrspace(1) * %in
- %result = sdiv i32 %num, 4
- store i32 %result, i32 addrspace(1)* %out
- ret void
-}
-
-; Multiply by a weird constant to make sure setIntDivIsCheap is
-; working.
-
-; FUNC-LABEL: {{^}}slow_sdiv_i32_3435:
-; SI: buffer_load_dword [[VAL:v[0-9]+]],
-; SI: v_mov_b32_e32 [[MAGIC:v[0-9]+]], 0x98a1930b
-; SI: v_mul_hi_i32 [[TMP:v[0-9]+]], [[MAGIC]], [[VAL]]
-; SI: v_add_i32
-; SI: v_lshrrev_b32
-; SI: v_ashrrev_i32
-; SI: v_add_i32
-; SI: buffer_store_dword
-; SI: s_endpgm
-define void @slow_sdiv_i32_3435(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %num = load i32, i32 addrspace(1) * %in
- %result = sdiv i32 %num, 3435
- store i32 %result, i32 addrspace(1)* %out
- ret void
-}
-
-define void @sdiv_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
- %den_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
- %num = load <2 x i32>, <2 x i32> addrspace(1) * %in
- %den = load <2 x i32>, <2 x i32> addrspace(1) * %den_ptr
- %result = sdiv <2 x i32> %num, %den
- store <2 x i32> %result, <2 x i32> addrspace(1)* %out
- ret void
-}
-
-define void @sdiv_v2i32_4(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
- %num = load <2 x i32>, <2 x i32> addrspace(1) * %in
- %result = sdiv <2 x i32> %num, <i32 4, i32 4>
- store <2 x i32> %result, <2 x i32> addrspace(1)* %out
- ret void
-}
-
-define void @sdiv_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
- %den_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
- %num = load <4 x i32>, <4 x i32> addrspace(1) * %in
- %den = load <4 x i32>, <4 x i32> addrspace(1) * %den_ptr
- %result = sdiv <4 x i32> %num, %den
- store <4 x i32> %result, <4 x i32> addrspace(1)* %out
- ret void
-}
-
-define void @sdiv_v4i32_4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
- %num = load <4 x i32>, <4 x i32> addrspace(1) * %in
- %result = sdiv <4 x i32> %num, <i32 4, i32 4, i32 4, i32 4>
- store <4 x i32> %result, <4 x i32> addrspace(1)* %out
- ret void
-}
-
-; Tests for 64-bit divide bypass.
-; define void @test_get_quotient(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
-; %result = sdiv i64 %a, %b
-; store i64 %result, i64 addrspace(1)* %out, align 8
-; ret void
-; }
-
-; define void @test_get_remainder(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
-; %result = srem i64 %a, %b
-; store i64 %result, i64 addrspace(1)* %out, align 8
-; ret void
-; }
-
-; define void @test_get_quotient_and_remainder(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
-; %resultdiv = sdiv i64 %a, %b
-; %resultrem = srem i64 %a, %b
-; %result = add i64 %resultdiv, %resultrem
-; store i64 %result, i64 addrspace(1)* %out, align 8
-; ret void
-; }
Removed: llvm/trunk/test/CodeGen/R600/sdivrem24.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/sdivrem24.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/sdivrem24.ll (original)
+++ llvm/trunk/test/CodeGen/R600/sdivrem24.ll (removed)
@@ -1,239 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-
-; FUNC-LABEL: {{^}}sdiv24_i8:
-; SI: v_cvt_f32_i32
-; SI: v_cvt_f32_i32
-; SI: v_rcp_f32
-; SI: v_cvt_i32_f32
-
-; EG: INT_TO_FLT
-; EG-DAG: INT_TO_FLT
-; EG-DAG: RECIP_IEEE
-; EG: FLT_TO_INT
-define void @sdiv24_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
- %den_ptr = getelementptr i8, i8 addrspace(1)* %in, i8 1
- %num = load i8, i8 addrspace(1) * %in
- %den = load i8, i8 addrspace(1) * %den_ptr
- %result = sdiv i8 %num, %den
- store i8 %result, i8 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}sdiv24_i16:
-; SI: v_cvt_f32_i32
-; SI: v_cvt_f32_i32
-; SI: v_rcp_f32
-; SI: v_cvt_i32_f32
-
-; EG: INT_TO_FLT
-; EG-DAG: INT_TO_FLT
-; EG-DAG: RECIP_IEEE
-; EG: FLT_TO_INT
-define void @sdiv24_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
- %den_ptr = getelementptr i16, i16 addrspace(1)* %in, i16 1
- %num = load i16, i16 addrspace(1) * %in, align 2
- %den = load i16, i16 addrspace(1) * %den_ptr, align 2
- %result = sdiv i16 %num, %den
- store i16 %result, i16 addrspace(1)* %out, align 2
- ret void
-}
-
-; FUNC-LABEL: {{^}}sdiv24_i32:
-; SI: v_cvt_f32_i32
-; SI: v_cvt_f32_i32
-; SI: v_rcp_f32
-; SI: v_cvt_i32_f32
-
-; EG: INT_TO_FLT
-; EG-DAG: INT_TO_FLT
-; EG-DAG: RECIP_IEEE
-; EG: FLT_TO_INT
-define void @sdiv24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
- %num = load i32, i32 addrspace(1) * %in, align 4
- %den = load i32, i32 addrspace(1) * %den_ptr, align 4
- %num.i24.0 = shl i32 %num, 8
- %den.i24.0 = shl i32 %den, 8
- %num.i24 = ashr i32 %num.i24.0, 8
- %den.i24 = ashr i32 %den.i24.0, 8
- %result = sdiv i32 %num.i24, %den.i24
- store i32 %result, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}sdiv25_i32:
-; SI-NOT: v_cvt_f32_i32
-; SI-NOT: v_rcp_f32
-
-; EG-NOT: INT_TO_FLT
-; EG-NOT: RECIP_IEEE
-define void @sdiv25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
- %num = load i32, i32 addrspace(1) * %in, align 4
- %den = load i32, i32 addrspace(1) * %den_ptr, align 4
- %num.i24.0 = shl i32 %num, 7
- %den.i24.0 = shl i32 %den, 7
- %num.i24 = ashr i32 %num.i24.0, 7
- %den.i24 = ashr i32 %den.i24.0, 7
- %result = sdiv i32 %num.i24, %den.i24
- store i32 %result, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}test_no_sdiv24_i32_1:
-; SI-NOT: v_cvt_f32_i32
-; SI-NOT: v_rcp_f32
-
-; EG-NOT: INT_TO_FLT
-; EG-NOT: RECIP_IEEE
-define void @test_no_sdiv24_i32_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
- %num = load i32, i32 addrspace(1) * %in, align 4
- %den = load i32, i32 addrspace(1) * %den_ptr, align 4
- %num.i24.0 = shl i32 %num, 8
- %den.i24.0 = shl i32 %den, 7
- %num.i24 = ashr i32 %num.i24.0, 8
- %den.i24 = ashr i32 %den.i24.0, 7
- %result = sdiv i32 %num.i24, %den.i24
- store i32 %result, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}test_no_sdiv24_i32_2:
-; SI-NOT: v_cvt_f32_i32
-; SI-NOT: v_rcp_f32
-
-; EG-NOT: INT_TO_FLT
-; EG-NOT: RECIP_IEEE
-define void @test_no_sdiv24_i32_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
- %num = load i32, i32 addrspace(1) * %in, align 4
- %den = load i32, i32 addrspace(1) * %den_ptr, align 4
- %num.i24.0 = shl i32 %num, 7
- %den.i24.0 = shl i32 %den, 8
- %num.i24 = ashr i32 %num.i24.0, 7
- %den.i24 = ashr i32 %den.i24.0, 8
- %result = sdiv i32 %num.i24, %den.i24
- store i32 %result, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}srem24_i8:
-; SI: v_cvt_f32_i32
-; SI: v_cvt_f32_i32
-; SI: v_rcp_f32
-; SI: v_cvt_i32_f32
-
-; EG: INT_TO_FLT
-; EG-DAG: INT_TO_FLT
-; EG-DAG: RECIP_IEEE
-; EG: FLT_TO_INT
-define void @srem24_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
- %den_ptr = getelementptr i8, i8 addrspace(1)* %in, i8 1
- %num = load i8, i8 addrspace(1) * %in
- %den = load i8, i8 addrspace(1) * %den_ptr
- %result = srem i8 %num, %den
- store i8 %result, i8 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}srem24_i16:
-; SI: v_cvt_f32_i32
-; SI: v_cvt_f32_i32
-; SI: v_rcp_f32
-; SI: v_cvt_i32_f32
-
-; EG: INT_TO_FLT
-; EG-DAG: INT_TO_FLT
-; EG-DAG: RECIP_IEEE
-; EG: FLT_TO_INT
-define void @srem24_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
- %den_ptr = getelementptr i16, i16 addrspace(1)* %in, i16 1
- %num = load i16, i16 addrspace(1) * %in, align 2
- %den = load i16, i16 addrspace(1) * %den_ptr, align 2
- %result = srem i16 %num, %den
- store i16 %result, i16 addrspace(1)* %out, align 2
- ret void
-}
-
-; FUNC-LABEL: {{^}}srem24_i32:
-; SI: v_cvt_f32_i32
-; SI: v_cvt_f32_i32
-; SI: v_rcp_f32
-; SI: v_cvt_i32_f32
-
-; EG: INT_TO_FLT
-; EG-DAG: INT_TO_FLT
-; EG-DAG: RECIP_IEEE
-; EG: FLT_TO_INT
-define void @srem24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
- %num = load i32, i32 addrspace(1) * %in, align 4
- %den = load i32, i32 addrspace(1) * %den_ptr, align 4
- %num.i24.0 = shl i32 %num, 8
- %den.i24.0 = shl i32 %den, 8
- %num.i24 = ashr i32 %num.i24.0, 8
- %den.i24 = ashr i32 %den.i24.0, 8
- %result = srem i32 %num.i24, %den.i24
- store i32 %result, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}srem25_i32:
-; SI-NOT: v_cvt_f32_i32
-; SI-NOT: v_rcp_f32
-
-; EG-NOT: INT_TO_FLT
-; EG-NOT: RECIP_IEEE
-define void @srem25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
- %num = load i32, i32 addrspace(1) * %in, align 4
- %den = load i32, i32 addrspace(1) * %den_ptr, align 4
- %num.i24.0 = shl i32 %num, 7
- %den.i24.0 = shl i32 %den, 7
- %num.i24 = ashr i32 %num.i24.0, 7
- %den.i24 = ashr i32 %den.i24.0, 7
- %result = srem i32 %num.i24, %den.i24
- store i32 %result, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}test_no_srem24_i32_1:
-; SI-NOT: v_cvt_f32_i32
-; SI-NOT: v_rcp_f32
-
-; EG-NOT: INT_TO_FLT
-; EG-NOT: RECIP_IEEE
-define void @test_no_srem24_i32_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
- %num = load i32, i32 addrspace(1) * %in, align 4
- %den = load i32, i32 addrspace(1) * %den_ptr, align 4
- %num.i24.0 = shl i32 %num, 8
- %den.i24.0 = shl i32 %den, 7
- %num.i24 = ashr i32 %num.i24.0, 8
- %den.i24 = ashr i32 %den.i24.0, 7
- %result = srem i32 %num.i24, %den.i24
- store i32 %result, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}test_no_srem24_i32_2:
-; SI-NOT: v_cvt_f32_i32
-; SI-NOT: v_rcp_f32
-
-; EG-NOT: INT_TO_FLT
-; EG-NOT: RECIP_IEEE
-define void @test_no_srem24_i32_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
- %num = load i32, i32 addrspace(1) * %in, align 4
- %den = load i32, i32 addrspace(1) * %den_ptr, align 4
- %num.i24.0 = shl i32 %num, 7
- %den.i24.0 = shl i32 %den, 8
- %num.i24 = ashr i32 %num.i24.0, 7
- %den.i24 = ashr i32 %den.i24.0, 8
- %result = srem i32 %num.i24, %den.i24
- store i32 %result, i32 addrspace(1)* %out, align 4
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/sdivrem64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/sdivrem64.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/sdivrem64.ll (original)
+++ llvm/trunk/test/CodeGen/R600/sdivrem64.ll (removed)
@@ -1,225 +0,0 @@
-;RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=GCN --check-prefix=FUNC %s
-;RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck --check-prefix=VI --check-prefix=GCN --check-prefix=FUNC %s
-;RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=EG --check-prefix=FUNC %s
-
-;FUNC-LABEL: {{^}}test_sdiv:
-;EG: RECIP_UINT
-;EG: LSHL {{.*}}, 1,
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-
-;GCN: v_bfe_u32
-;GCN: v_bfe_u32
-;GCN: v_bfe_u32
-;GCN: v_bfe_u32
-;GCN: v_bfe_u32
-;GCN: v_bfe_u32
-;GCN: v_bfe_u32
-;GCN: v_bfe_u32
-;GCN: v_bfe_u32
-;GCN: v_bfe_u32
-;GCN: v_bfe_u32
-;GCN: v_bfe_u32
-;GCN: v_bfe_u32
-;GCN: v_bfe_u32
-;GCN: v_bfe_u32
-;GCN: v_bfe_u32
-;GCN: v_bfe_u32
-;GCN: v_bfe_u32
-;GCN: v_bfe_u32
-;GCN: v_bfe_u32
-;GCN: v_bfe_u32
-;GCN: v_bfe_u32
-;GCN: v_bfe_u32
-;GCN: v_bfe_u32
-;GCN: v_bfe_u32
-;GCN: v_bfe_u32
-;GCN: v_bfe_u32
-;GCN: v_bfe_u32
-;GCN: v_bfe_u32
-;GCN: v_bfe_u32
-;GCN-NOT: v_mad_f32
-;SI-NOT: v_lshr_b64
-;VI-NOT: v_lshrrev_b64
-;GCN: s_endpgm
-define void @test_sdiv(i64 addrspace(1)* %out, i64 %x, i64 %y) {
- %result = sdiv i64 %x, %y
- store i64 %result, i64 addrspace(1)* %out
- ret void
-}
-
-;FUNC-LABEL: {{^}}test_srem:
-;EG: RECIP_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: AND_INT {{.*}}, 1,
-
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN-NOT: v_mad_f32
-;SI-NOT: v_lshr_b64
-;VI-NOT: v_lshrrev_b64
-;GCN: s_endpgm
-define void @test_srem(i64 addrspace(1)* %out, i64 %x, i64 %y) {
- %result = urem i64 %x, %y
- store i64 %result, i64 addrspace(1)* %out
- ret void
-}
-
-;FUNC-LABEL: {{^}}test_sdiv3264:
-;EG: RECIP_UINT
-;EG-NOT: BFE_UINT
-
-;GCN-NOT: s_bfe_u32
-;GCN-NOT: v_mad_f32
-;SI-NOT: v_lshr_b64
-;VI-NOT: v_lshrrev_b64
-;GCN: s_endpgm
-define void @test_sdiv3264(i64 addrspace(1)* %out, i64 %x, i64 %y) {
- %1 = ashr i64 %x, 33
- %2 = ashr i64 %y, 33
- %result = sdiv i64 %1, %2
- store i64 %result, i64 addrspace(1)* %out
- ret void
-}
-
-;FUNC-LABEL: {{^}}test_srem3264:
-;EG: RECIP_UINT
-;EG-NOT: BFE_UINT
-
-;GCN-NOT: s_bfe_u32
-;GCN-NOT: v_mad_f32
-;SI-NOT: v_lshr_b64
-;VI-NOT: v_lshrrev_b64
-;GCN: s_endpgm
-define void @test_srem3264(i64 addrspace(1)* %out, i64 %x, i64 %y) {
- %1 = ashr i64 %x, 33
- %2 = ashr i64 %y, 33
- %result = srem i64 %1, %2
- store i64 %result, i64 addrspace(1)* %out
- ret void
-}
-
-;FUNC-LABEL: {{^}}test_sdiv2464:
-;EG: INT_TO_FLT
-;EG: INT_TO_FLT
-;EG: FLT_TO_INT
-;EG-NOT: RECIP_UINT
-;EG-NOT: BFE_UINT
-
-;GCN-NOT: s_bfe_u32
-;GCN: v_mad_f32
-;SI-NOT: v_lshr_b64
-;VI-NOT: v_lshrrev_b64
-;GCN: s_endpgm
-define void @test_sdiv2464(i64 addrspace(1)* %out, i64 %x, i64 %y) {
- %1 = ashr i64 %x, 40
- %2 = ashr i64 %y, 40
- %result = sdiv i64 %1, %2
- store i64 %result, i64 addrspace(1)* %out
- ret void
-}
-
-;FUNC-LABEL: {{^}}test_srem2464:
-;EG: INT_TO_FLT
-;EG: INT_TO_FLT
-;EG: FLT_TO_INT
-;EG-NOT: RECIP_UINT
-;EG-NOT: BFE_UINT
-
-;GCN-NOT: s_bfe_u32
-;GCN: v_mad_f32
-;SI-NOT: v_lshr_b64
-;VI-NOT: v_lshrrev_b64
-;GCN: s_endpgm
-define void @test_srem2464(i64 addrspace(1)* %out, i64 %x, i64 %y) {
- %1 = ashr i64 %x, 40
- %2 = ashr i64 %y, 40
- %result = srem i64 %1, %2
- store i64 %result, i64 addrspace(1)* %out
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/select-i1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/select-i1.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/select-i1.ll (original)
+++ llvm/trunk/test/CodeGen/R600/select-i1.ll (removed)
@@ -1,15 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-
-; FIXME: This should go in existing select.ll test, except the current testcase there is broken on SI
-
-; FUNC-LABEL: {{^}}select_i1:
-; SI: v_cndmask_b32
-; SI-NOT: v_cndmask_b32
-define void @select_i1(i1 addrspace(1)* %out, i32 %cond, i1 %a, i1 %b) nounwind {
- %cmp = icmp ugt i32 %cond, 5
- %sel = select i1 %cmp, i1 %a, i1 %b
- store i1 %sel, i1 addrspace(1)* %out, align 4
- ret void
-}
-
Removed: llvm/trunk/test/CodeGen/R600/select-vectors.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/select-vectors.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/select-vectors.ll (original)
+++ llvm/trunk/test/CodeGen/R600/select-vectors.ll (removed)
@@ -1,156 +0,0 @@
-; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-
-; Test expansion of scalar selects on vectors.
-; Evergreen not enabled since it seems to be having problems with doubles.
-
-
-; FUNC-LABEL: {{^}}select_v4i8:
-; SI: v_cndmask_b32_e64
-; SI: v_cndmask_b32_e64
-; SI: v_cndmask_b32_e64
-; SI: v_cndmask_b32_e64
-define void @select_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> %a, <4 x i8> %b, i8 %c) nounwind {
- %cmp = icmp eq i8 %c, 0
- %select = select i1 %cmp, <4 x i8> %a, <4 x i8> %b
- store <4 x i8> %select, <4 x i8> addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}select_v4i16:
-; SI: v_cndmask_b32_e64
-; SI: v_cndmask_b32_e64
-; SI: v_cndmask_b32_e64
-; SI: v_cndmask_b32_e64
-define void @select_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %a, <4 x i16> %b, i32 %c) nounwind {
- %cmp = icmp eq i32 %c, 0
- %select = select i1 %cmp, <4 x i16> %a, <4 x i16> %b
- store <4 x i16> %select, <4 x i16> addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}select_v2i32:
-; SI: v_cndmask_b32_e64
-; SI: v_cndmask_b32_e64
-; SI: buffer_store_dwordx2
-define void @select_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b, i32 %c) nounwind {
- %cmp = icmp eq i32 %c, 0
- %select = select i1 %cmp, <2 x i32> %a, <2 x i32> %b
- store <2 x i32> %select, <2 x i32> addrspace(1)* %out, align 8
- ret void
-}
-
-; FUNC-LABEL: {{^}}select_v4i32:
-; SI: v_cndmask_b32_e64
-; SI: v_cndmask_b32_e64
-; SI: v_cndmask_b32_e64
-; SI: v_cndmask_b32_e64
-; SI: buffer_store_dwordx4
-define void @select_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b, i32 %c) nounwind {
- %cmp = icmp eq i32 %c, 0
- %select = select i1 %cmp, <4 x i32> %a, <4 x i32> %b
- store <4 x i32> %select, <4 x i32> addrspace(1)* %out, align 16
- ret void
-}
-
-; FUNC-LABEL: {{^}}select_v8i32:
-; SI: v_cndmask_b32_e64
-; SI: v_cndmask_b32_e64
-; SI: v_cndmask_b32_e64
-; SI: v_cndmask_b32_e64
-; SI: v_cndmask_b32_e64
-; SI: v_cndmask_b32_e64
-; SI: v_cndmask_b32_e64
-; SI: v_cndmask_b32_e64
-define void @select_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x i32> %b, i32 %c) nounwind {
- %cmp = icmp eq i32 %c, 0
- %select = select i1 %cmp, <8 x i32> %a, <8 x i32> %b
- store <8 x i32> %select, <8 x i32> addrspace(1)* %out, align 16
- ret void
-}
-
-; FUNC-LABEL: {{^}}select_v2f32:
-; SI: buffer_store_dwordx2
-define void @select_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b, i32 %c) nounwind {
- %cmp = icmp eq i32 %c, 0
- %select = select i1 %cmp, <2 x float> %a, <2 x float> %b
- store <2 x float> %select, <2 x float> addrspace(1)* %out, align 16
- ret void
-}
-
-; FUNC-LABEL: {{^}}select_v4f32:
-; SI: buffer_store_dwordx4
-define void @select_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b, i32 %c) nounwind {
- %cmp = icmp eq i32 %c, 0
- %select = select i1 %cmp, <4 x float> %a, <4 x float> %b
- store <4 x float> %select, <4 x float> addrspace(1)* %out, align 16
- ret void
-}
-
-; FUNC-LABEL: {{^}}select_v8f32:
-; SI: v_cndmask_b32_e64
-; SI: v_cndmask_b32_e64
-; SI: v_cndmask_b32_e64
-; SI: v_cndmask_b32_e64
-; SI: v_cndmask_b32_e64
-; SI: v_cndmask_b32_e64
-; SI: v_cndmask_b32_e64
-; SI: v_cndmask_b32_e64
-define void @select_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b, i32 %c) nounwind {
- %cmp = icmp eq i32 %c, 0
- %select = select i1 %cmp, <8 x float> %a, <8 x float> %b
- store <8 x float> %select, <8 x float> addrspace(1)* %out, align 16
- ret void
-}
-
-; FUNC-LABEL: {{^}}select_v2f64:
-; SI: v_cndmask_b32_e64
-; SI: v_cndmask_b32_e64
-; SI: v_cndmask_b32_e64
-; SI: v_cndmask_b32_e64
-define void @select_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %a, <2 x double> %b, i32 %c) nounwind {
- %cmp = icmp eq i32 %c, 0
- %select = select i1 %cmp, <2 x double> %a, <2 x double> %b
- store <2 x double> %select, <2 x double> addrspace(1)* %out, align 16
- ret void
-}
-
-; FUNC-LABEL: {{^}}select_v4f64:
-; SI: v_cndmask_b32_e64
-; SI: v_cndmask_b32_e64
-; SI: v_cndmask_b32_e64
-; SI: v_cndmask_b32_e64
-; SI: v_cndmask_b32_e64
-; SI: v_cndmask_b32_e64
-; SI: v_cndmask_b32_e64
-; SI: v_cndmask_b32_e64
-define void @select_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %a, <4 x double> %b, i32 %c) nounwind {
- %cmp = icmp eq i32 %c, 0
- %select = select i1 %cmp, <4 x double> %a, <4 x double> %b
- store <4 x double> %select, <4 x double> addrspace(1)* %out, align 16
- ret void
-}
-
-; FUNC-LABEL: {{^}}select_v8f64:
-; SI: v_cndmask_b32_e64
-; SI: v_cndmask_b32_e64
-; SI: v_cndmask_b32_e64
-; SI: v_cndmask_b32_e64
-; SI: v_cndmask_b32_e64
-; SI: v_cndmask_b32_e64
-; SI: v_cndmask_b32_e64
-; SI: v_cndmask_b32_e64
-; SI: v_cndmask_b32_e64
-; SI: v_cndmask_b32_e64
-; SI: v_cndmask_b32_e64
-; SI: v_cndmask_b32_e64
-; SI: v_cndmask_b32_e64
-; SI: v_cndmask_b32_e64
-; SI: v_cndmask_b32_e64
-; SI: v_cndmask_b32_e64
-define void @select_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %a, <8 x double> %b, i32 %c) nounwind {
- %cmp = icmp eq i32 %c, 0
- %select = select i1 %cmp, <8 x double> %a, <8 x double> %b
- store <8 x double> %select, <8 x double> addrspace(1)* %out, align 16
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/select.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/select.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/select.ll (original)
+++ llvm/trunk/test/CodeGen/R600/select.ll (removed)
@@ -1,47 +0,0 @@
-; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-
-
-; Normally icmp + select is optimized to select_cc, when this happens the
-; DAGLegalizer never sees the select and doesn't have a chance to leaglize it.
-;
-; In order to avoid the select_cc optimization, this test case calculates the
-; condition for the select in a separate basic block.
-
-; FUNC-LABEL: {{^}}select:
-; EG-DAG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.X
-; EG-DAG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.X
-; EG-DAG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XY
-; EG-DAG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XY
-; EG-DAG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XYZW
-; EG-DAG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XYZW
-define void @select (i32 addrspace(1)* %i32out, float addrspace(1)* %f32out,
- <2 x i32> addrspace(1)* %v2i32out, <2 x float> addrspace(1)* %v2f32out,
- <4 x i32> addrspace(1)* %v4i32out, <4 x float> addrspace(1)* %v4f32out,
- i32 %cond) {
-entry:
- br label %for
-body:
- %inc = add i32 %i, 1
- %br_cmp.i = icmp eq i1 %br_cmp, 0
- br label %for
-for:
- %i = phi i32 [ %inc, %body], [ 0, %entry ]
- %br_cmp = phi i1 [ %br_cmp.i, %body ], [ 0, %entry ]
- %0 = icmp eq i32 %cond, %i
- %1 = select i1 %br_cmp, i32 2, i32 3
- %2 = select i1 %br_cmp, float 2.0 , float 5.0
- %3 = select i1 %br_cmp, <2 x i32> <i32 2, i32 3>, <2 x i32> <i32 4, i32 5>
- %4 = select i1 %br_cmp, <2 x float> <float 2.0, float 3.0>, <2 x float> <float 4.0, float 5.0>
- %5 = select i1 %br_cmp, <4 x i32> <i32 2 , i32 3, i32 4, i32 5>, <4 x i32> <i32 6, i32 7, i32 8, i32 9>
- %6 = select i1 %br_cmp, <4 x float> <float 2.0, float 3.0, float 4.0, float 5.0>, <4 x float> <float 6.0, float 7.0, float 8.0, float 9.0>
- br i1 %0, label %body, label %done
-
-done:
- store i32 %1, i32 addrspace(1)* %i32out
- store float %2, float addrspace(1)* %f32out
- store <2 x i32> %3, <2 x i32> addrspace(1)* %v2i32out
- store <2 x float> %4, <2 x float> addrspace(1)* %v2f32out
- store <4 x i32> %5, <4 x i32> addrspace(1)* %v4i32out
- store <4 x float> %6, <4 x float> addrspace(1)* %v4f32out
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/select64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/select64.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/select64.ll (original)
+++ llvm/trunk/test/CodeGen/R600/select64.ll (removed)
@@ -1,68 +0,0 @@
-; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s
-; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
-
-; CHECK-LABEL: {{^}}select0:
-; i64 select should be split into two i32 selects, and we shouldn't need
-; to use a shfit to extract the hi dword of the input.
-; CHECK-NOT: s_lshr_b64
-; CHECK: v_cndmask
-; CHECK: v_cndmask
-define void @select0(i64 addrspace(1)* %out, i32 %cond, i64 %in) {
-entry:
- %0 = icmp ugt i32 %cond, 5
- %1 = select i1 %0, i64 0, i64 %in
- store i64 %1, i64 addrspace(1)* %out
- ret void
-}
-
-; CHECK-LABEL: {{^}}select_trunc_i64:
-; CHECK: v_cndmask_b32
-; CHECK-NOT: v_cndmask_b32
-define void @select_trunc_i64(i32 addrspace(1)* %out, i32 %cond, i64 %in) nounwind {
- %cmp = icmp ugt i32 %cond, 5
- %sel = select i1 %cmp, i64 0, i64 %in
- %trunc = trunc i64 %sel to i32
- store i32 %trunc, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; CHECK-LABEL: {{^}}select_trunc_i64_2:
-; CHECK: v_cndmask_b32
-; CHECK-NOT: v_cndmask_b32
-define void @select_trunc_i64_2(i32 addrspace(1)* %out, i32 %cond, i64 %a, i64 %b) nounwind {
- %cmp = icmp ugt i32 %cond, 5
- %sel = select i1 %cmp, i64 %a, i64 %b
- %trunc = trunc i64 %sel to i32
- store i32 %trunc, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; CHECK-LABEL: {{^}}v_select_trunc_i64_2:
-; CHECK: v_cndmask_b32
-; CHECK-NOT: v_cndmask_b32
-define void @v_select_trunc_i64_2(i32 addrspace(1)* %out, i32 %cond, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
- %cmp = icmp ugt i32 %cond, 5
- %a = load i64, i64 addrspace(1)* %aptr, align 8
- %b = load i64, i64 addrspace(1)* %bptr, align 8
- %sel = select i1 %cmp, i64 %a, i64 %b
- %trunc = trunc i64 %sel to i32
- store i32 %trunc, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; CHECK-LABEL: {{^}}v_select_i64_split_imm:
-; CHECK: s_mov_b32 [[SHI:s[0-9]+]], 63
-; CHECK: s_mov_b32 [[SLO:s[0-9]+]], 0
-; CHECK-DAG: v_mov_b32_e32 [[VHI:v[0-9]+]], [[SHI]]
-; CHECK-DAG: v_mov_b32_e32 [[VLO:v[0-9]+]], [[SLO]]
-; CHECK-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, [[VLO]], {{v[0-9]+}}
-; CHECK-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, [[VHI]], {{v[0-9]+}}
-; CHECK: s_endpgm
-define void @v_select_i64_split_imm(i64 addrspace(1)* %out, i32 %cond, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
- %cmp = icmp ugt i32 %cond, 5
- %a = load i64, i64 addrspace(1)* %aptr, align 8
- %b = load i64, i64 addrspace(1)* %bptr, align 8
- %sel = select i1 %cmp, i64 %a, i64 270582939648 ; 63 << 32
- store i64 %sel, i64 addrspace(1)* %out, align 8
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/selectcc-cnd.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/selectcc-cnd.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/selectcc-cnd.ll (original)
+++ llvm/trunk/test/CodeGen/R600/selectcc-cnd.ll (removed)
@@ -1,12 +0,0 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-
-;CHECK-NOT: SETE
-;CHECK: CNDE {{\*?}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1.0, literal.x,
-;CHECK: 1073741824
-define void @test(float addrspace(1)* %out, float addrspace(1)* %in) {
- %1 = load float, float addrspace(1)* %in
- %2 = fcmp oeq float %1, 0.0
- %3 = select i1 %2, float 1.0, float 2.0
- store float %3, float addrspace(1)* %out
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/selectcc-cnde-int.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/selectcc-cnde-int.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/selectcc-cnde-int.ll (original)
+++ llvm/trunk/test/CodeGen/R600/selectcc-cnde-int.ll (removed)
@@ -1,12 +0,0 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-
-;CHECK-NOT: SETE_INT
-;CHECK: CNDE_INT {{\*?}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, literal.x,
-;CHECK-NEXT: 2
-define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %1 = load i32, i32 addrspace(1)* %in
- %2 = icmp eq i32 %1, 0
- %3 = select i1 %2, i32 1, i32 2
- store i32 %3, i32 addrspace(1)* %out
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/selectcc-icmp-select-float.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/selectcc-icmp-select-float.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/selectcc-icmp-select-float.ll (original)
+++ llvm/trunk/test/CodeGen/R600/selectcc-icmp-select-float.ll (removed)
@@ -1,16 +0,0 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-
-; Note additional optimizations may cause this SGT to be replaced with a
-; CND* instruction.
-; CHECK: SETGT_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, literal.x,
-; CHECK-NEXT: -1
-; Test a selectcc with i32 LHS/RHS and float True/False
-
-define void @test(float addrspace(1)* %out, i32 addrspace(1)* %in) {
-entry:
- %0 = load i32, i32 addrspace(1)* %in
- %1 = icmp sge i32 %0, 0
- %2 = select i1 %1, float 1.0, float 0.0
- store float %2, float addrspace(1)* %out
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/selectcc-opt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/selectcc-opt.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/selectcc-opt.ll (original)
+++ llvm/trunk/test/CodeGen/R600/selectcc-opt.ll (removed)
@@ -1,80 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-
-
-; FUNC-LABEL: {{^}}test_a:
-; EG-NOT: CND
-; EG: SET{{[NEQGTL]+}}_DX10
-
-define void @test_a(i32 addrspace(1)* %out, float %in) {
-entry:
- %0 = fcmp olt float %in, 0.000000e+00
- %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
- %2 = fsub float -0.000000e+00, %1
- %3 = fptosi float %2 to i32
- %4 = bitcast i32 %3 to float
- %5 = bitcast float %4 to i32
- %6 = icmp ne i32 %5, 0
- br i1 %6, label %IF, label %ENDIF
-
-IF:
- %7 = getelementptr i32, i32 addrspace(1)* %out, i32 1
- store i32 0, i32 addrspace(1)* %7
- br label %ENDIF
-
-ENDIF:
- store i32 0, i32 addrspace(1)* %out
- ret void
-}
-
-; Same as test_a, but the branch labels are swapped to produce the inverse cc
-; for the icmp instruction
-
-; EG-LABEL: {{^}}test_b:
-; EG: SET{{[GTEQN]+}}_DX10
-; EG-NEXT: PRED_
-; EG-NEXT: ALU clause starting
-define void @test_b(i32 addrspace(1)* %out, float %in) {
-entry:
- %0 = fcmp olt float %in, 0.0
- %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
- %2 = fsub float -0.000000e+00, %1
- %3 = fptosi float %2 to i32
- %4 = bitcast i32 %3 to float
- %5 = bitcast float %4 to i32
- %6 = icmp ne i32 %5, 0
- br i1 %6, label %ENDIF, label %IF
-
-IF:
- %7 = getelementptr i32, i32 addrspace(1)* %out, i32 1
- store i32 0, i32 addrspace(1)* %7
- br label %ENDIF
-
-ENDIF:
- store i32 0, i32 addrspace(1)* %out
- ret void
-}
-
-; Test a CND*_INT instruction with float true/false values
-; EG-LABEL: {{^}}test_c:
-; EG: CND{{[GTE]+}}_INT
-define void @test_c(float addrspace(1)* %out, i32 %in) {
-entry:
- %0 = icmp sgt i32 %in, 0
- %1 = select i1 %0, float 2.0, float 3.0
- store float %1, float addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}selectcc_bool:
-; SI: v_cmp_ne_i32
-; SI-NEXT: v_cndmask_b32_e64
-; SI-NOT: cmp
-; SI-NOT: cndmask
-define void @selectcc_bool(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
- %icmp0 = icmp ne i32 %a, %b
- %ext = select i1 %icmp0, i32 -1, i32 0
- store i32 %ext, i32 addrspace(1)* %out
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/selectcc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/selectcc.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/selectcc.ll (original)
+++ llvm/trunk/test/CodeGen/R600/selectcc.ll (removed)
@@ -1,20 +0,0 @@
-; RUN: llc -verify-machineinstrs -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-
-; FUNC-LABEL: {{^}}selectcc_i64:
-; EG: XOR_INT
-; EG: XOR_INT
-; EG: OR_INT
-; EG: CNDE_INT
-; EG: CNDE_INT
-; SI: v_cmp_eq_i64
-; SI: v_cndmask
-; SI: v_cndmask
-define void @selectcc_i64(i64 addrspace(1) * %out, i64 %lhs, i64 %rhs, i64 %true, i64 %false) {
-entry:
- %0 = icmp eq i64 %lhs, %rhs
- %1 = select i1 %0, i64 %true, i64 %false
- store i64 %1, i64 addrspace(1)* %out
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/set-dx10.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/set-dx10.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/set-dx10.ll (original)
+++ llvm/trunk/test/CodeGen/R600/set-dx10.ll (removed)
@@ -1,161 +0,0 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-
-; These tests check that floating point comparisons which are used by select
-; to store integer true (-1) and false (0) values are lowered to one of the
-; SET*DX10 instructions.
-
-; CHECK: {{^}}fcmp_une_select_fptosi:
-; CHECK: SETNE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
-; CHECK-NEXT: LSHR
-; CHECK-NEXT: 1084227584(5.000000e+00)
-define void @fcmp_une_select_fptosi(i32 addrspace(1)* %out, float %in) {
-entry:
- %0 = fcmp une float %in, 5.0
- %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
- %2 = fsub float -0.000000e+00, %1
- %3 = fptosi float %2 to i32
- store i32 %3, i32 addrspace(1)* %out
- ret void
-}
-
-; CHECK: {{^}}fcmp_une_select_i32:
-; CHECK: SETNE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
-; CHECK-NEXT: LSHR
-; CHECK-NEXT: 1084227584(5.000000e+00)
-define void @fcmp_une_select_i32(i32 addrspace(1)* %out, float %in) {
-entry:
- %0 = fcmp une float %in, 5.0
- %1 = select i1 %0, i32 -1, i32 0
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; CHECK: {{^}}fcmp_oeq_select_fptosi:
-; CHECK: SETE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
-; CHECK-NEXT: LSHR
-; CHECK-NEXT: 1084227584(5.000000e+00)
-define void @fcmp_oeq_select_fptosi(i32 addrspace(1)* %out, float %in) {
-entry:
- %0 = fcmp oeq float %in, 5.0
- %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
- %2 = fsub float -0.000000e+00, %1
- %3 = fptosi float %2 to i32
- store i32 %3, i32 addrspace(1)* %out
- ret void
-}
-
-; CHECK: {{^}}fcmp_oeq_select_i32:
-; CHECK: SETE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
-; CHECK-NEXT: LSHR
-; CHECK-NEXT: 1084227584(5.000000e+00)
-define void @fcmp_oeq_select_i32(i32 addrspace(1)* %out, float %in) {
-entry:
- %0 = fcmp oeq float %in, 5.0
- %1 = select i1 %0, i32 -1, i32 0
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; CHECK: {{^}}fcmp_ogt_select_fptosi:
-; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
-; CHECK-NEXT: LSHR
-; CHECK-NEXT: 1084227584(5.000000e+00)
-define void @fcmp_ogt_select_fptosi(i32 addrspace(1)* %out, float %in) {
-entry:
- %0 = fcmp ogt float %in, 5.0
- %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
- %2 = fsub float -0.000000e+00, %1
- %3 = fptosi float %2 to i32
- store i32 %3, i32 addrspace(1)* %out
- ret void
-}
-
-; CHECK: {{^}}fcmp_ogt_select_i32:
-; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
-; CHECK-NEXT: LSHR
-; CHECK-NEXT: 1084227584(5.000000e+00)
-define void @fcmp_ogt_select_i32(i32 addrspace(1)* %out, float %in) {
-entry:
- %0 = fcmp ogt float %in, 5.0
- %1 = select i1 %0, i32 -1, i32 0
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; CHECK: {{^}}fcmp_oge_select_fptosi:
-; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
-; CHECK-NEXT: LSHR
-; CHECK-NEXT: 1084227584(5.000000e+00)
-define void @fcmp_oge_select_fptosi(i32 addrspace(1)* %out, float %in) {
-entry:
- %0 = fcmp oge float %in, 5.0
- %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
- %2 = fsub float -0.000000e+00, %1
- %3 = fptosi float %2 to i32
- store i32 %3, i32 addrspace(1)* %out
- ret void
-}
-
-; CHECK: {{^}}fcmp_oge_select_i32:
-; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
-; CHECK-NEXT: LSHR
-; CHECK-NEXT: 1084227584(5.000000e+00)
-define void @fcmp_oge_select_i32(i32 addrspace(1)* %out, float %in) {
-entry:
- %0 = fcmp oge float %in, 5.0
- %1 = select i1 %0, i32 -1, i32 0
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; CHECK: {{^}}fcmp_ole_select_fptosi:
-; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
-; CHECK-NEXT: LSHR
-; CHECK-NEXT: 1084227584(5.000000e+00)
-define void @fcmp_ole_select_fptosi(i32 addrspace(1)* %out, float %in) {
-entry:
- %0 = fcmp ole float %in, 5.0
- %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
- %2 = fsub float -0.000000e+00, %1
- %3 = fptosi float %2 to i32
- store i32 %3, i32 addrspace(1)* %out
- ret void
-}
-
-; CHECK: {{^}}fcmp_ole_select_i32:
-; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
-; CHECK-NEXT: LSHR
-; CHECK-NEXT: 1084227584(5.000000e+00)
-define void @fcmp_ole_select_i32(i32 addrspace(1)* %out, float %in) {
-entry:
- %0 = fcmp ole float %in, 5.0
- %1 = select i1 %0, i32 -1, i32 0
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; CHECK: {{^}}fcmp_olt_select_fptosi:
-; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
-; CHECK-NEXT: LSHR
-; CHECK-NEXT: 1084227584(5.000000e+00)
-define void @fcmp_olt_select_fptosi(i32 addrspace(1)* %out, float %in) {
-entry:
- %0 = fcmp olt float %in, 5.0
- %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
- %2 = fsub float -0.000000e+00, %1
- %3 = fptosi float %2 to i32
- store i32 %3, i32 addrspace(1)* %out
- ret void
-}
-
-; CHECK: {{^}}fcmp_olt_select_i32:
-; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
-; CHECK-NEXT: LSHR
-; CHECK-NEXT: 1084227584(5.000000e+00)
-define void @fcmp_olt_select_i32(i32 addrspace(1)* %out, float %in) {
-entry:
- %0 = fcmp olt float %in, 5.0
- %1 = select i1 %0, i32 -1, i32 0
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/setcc-equivalent.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/setcc-equivalent.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/setcc-equivalent.ll (original)
+++ llvm/trunk/test/CodeGen/R600/setcc-equivalent.ll (removed)
@@ -1,30 +0,0 @@
-; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG %s
-
-; EG-LABEL: {{^}}and_setcc_setcc_i32:
-; EG: AND_INT
-; EG-NEXT: SETE_INT
-define void @and_setcc_setcc_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
- %cmp1 = icmp eq i32 %a, -1
- %cmp2 = icmp eq i32 %b, -1
- %and = and i1 %cmp1, %cmp2
- %ext = sext i1 %and to i32
- store i32 %ext, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; EG-LABEL: {{^}}and_setcc_setcc_v4i32:
-; EG: AND_INT
-; EG: AND_INT
-; EG: SETE_INT
-; EG: AND_INT
-; EG: SETE_INT
-; EG: AND_INT
-; EG: SETE_INT
-define void @and_setcc_setcc_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) {
- %cmp1 = icmp eq <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1>
- %cmp2 = icmp eq <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
- %and = and <4 x i1> %cmp1, %cmp2
- %ext = sext <4 x i1> %and to <4 x i32>
- store <4 x i32> %ext, <4 x i32> addrspace(1)* %out, align 4
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/setcc-opt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/setcc-opt.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/setcc-opt.ll (original)
+++ llvm/trunk/test/CodeGen/R600/setcc-opt.ll (removed)
@@ -1,236 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-
-; FUNC-LABEL: {{^}}sext_bool_icmp_eq_0:
-; GCN-NOT: v_cmp
-; GCN: v_cmp_ne_i32_e32 vcc,
-; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
-; GCN-NEXT:buffer_store_byte [[RESULT]]
-; GCN-NEXT: s_endpgm
-
-; EG: SETNE_INT * [[CMP:T[0-9]+]].[[CMPCHAN:[XYZW]]], KC0[2].Z, KC0[2].W
-; EG: AND_INT T{{[0-9]+.[XYZW]}}, PS, 1
-define void @sext_bool_icmp_eq_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
- %icmp0 = icmp eq i32 %a, %b
- %ext = sext i1 %icmp0 to i32
- %icmp1 = icmp eq i32 %ext, 0
- store i1 %icmp1, i1 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}sext_bool_icmp_ne_0:
-; GCN-NOT: v_cmp
-; GCN: v_cmp_ne_i32_e32 vcc,
-; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
-; GCN-NEXT: buffer_store_byte [[RESULT]]
-; GCN-NEXT: s_endpgm
-
-; EG: SETNE_INT * [[CMP:T[0-9]+]].[[CMPCHAN:[XYZW]]], KC0[2].Z, KC0[2].W
-; EG: AND_INT T{{[0-9]+.[XYZW]}}, PS, 1
-define void @sext_bool_icmp_ne_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
- %icmp0 = icmp ne i32 %a, %b
- %ext = sext i1 %icmp0 to i32
- %icmp1 = icmp ne i32 %ext, 0
- store i1 %icmp1, i1 addrspace(1)* %out
- ret void
-}
-
-; This really folds away to false
-; FUNC-LABEL: {{^}}sext_bool_icmp_eq_1:
-; GCN: v_cmp_eq_i32_e32 vcc,
-; GCN-NEXT: v_cndmask_b32_e64 [[TMP:v[0-9]+]], 0, -1, vcc
-; GCN-NEXT: v_cmp_eq_i32_e32 vcc, 1, [[TMP]]{{$}}
-; GCN-NEXT: v_cndmask_b32_e64 [[TMP:v[0-9]+]], 0, 1,
-; GCN-NEXT: buffer_store_byte [[TMP]]
-; GCN-NEXT: s_endpgm
-define void @sext_bool_icmp_eq_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
- %icmp0 = icmp eq i32 %a, %b
- %ext = sext i1 %icmp0 to i32
- %icmp1 = icmp eq i32 %ext, 1
- store i1 %icmp1, i1 addrspace(1)* %out
- ret void
-}
-
-; This really folds away to true
-; FUNC-LABEL: {{^}}sext_bool_icmp_ne_1:
-; GCN: v_cmp_ne_i32_e32 vcc,
-; GCN-NEXT: v_cndmask_b32_e64 [[TMP:v[0-9]+]], 0, -1, vcc
-; GCN-NEXT: v_cmp_ne_i32_e32 vcc, 1, [[TMP]]{{$}}
-; GCN-NEXT: v_cndmask_b32_e64 [[TMP:v[0-9]+]], 0, 1,
-; GCN-NEXT: buffer_store_byte [[TMP]]
-; GCN-NEXT: s_endpgm
-define void @sext_bool_icmp_ne_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
- %icmp0 = icmp ne i32 %a, %b
- %ext = sext i1 %icmp0 to i32
- %icmp1 = icmp ne i32 %ext, 1
- store i1 %icmp1, i1 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}zext_bool_icmp_eq_0:
-; GCN-NOT: v_cmp
-; GCN: v_cmp_ne_i32_e32 vcc,
-; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
-; GCN-NEXT: buffer_store_byte [[RESULT]]
-; GCN-NEXT: s_endpgm
-define void @zext_bool_icmp_eq_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
- %icmp0 = icmp eq i32 %a, %b
- %ext = zext i1 %icmp0 to i32
- %icmp1 = icmp eq i32 %ext, 0
- store i1 %icmp1, i1 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}zext_bool_icmp_ne_0:
-; GCN-NOT: v_cmp
-; GCN: v_cmp_ne_i32_e32 vcc,
-; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
-; GCN-NEXT: buffer_store_byte [[RESULT]]
-; GCN-NEXT: s_endpgm
-define void @zext_bool_icmp_ne_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
- %icmp0 = icmp ne i32 %a, %b
- %ext = zext i1 %icmp0 to i32
- %icmp1 = icmp ne i32 %ext, 0
- store i1 %icmp1, i1 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}zext_bool_icmp_eq_1:
-; GCN-NOT: v_cmp
-; GCN: v_cmp_eq_i32_e32 vcc,
-; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
-; GCN-NEXT: buffer_store_byte [[RESULT]]
-; GCN-NEXT: s_endpgm
-define void @zext_bool_icmp_eq_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
- %icmp0 = icmp eq i32 %a, %b
- %ext = zext i1 %icmp0 to i32
- %icmp1 = icmp eq i32 %ext, 1
- store i1 %icmp1, i1 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}zext_bool_icmp_ne_1:
-; GCN-NOT: v_cmp
-; GCN: v_cmp_eq_i32_e32 vcc,
-; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
-; GCN-NEXT: buffer_store_byte [[RESULT]]
-define void @zext_bool_icmp_ne_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
- %icmp0 = icmp ne i32 %a, %b
- %ext = zext i1 %icmp0 to i32
- %icmp1 = icmp ne i32 %ext, 1
- store i1 %icmp1, i1 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}sext_bool_icmp_ne_k:
-; SI-DAG: s_load_dword [[A:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; SI-DAG: s_load_dword [[B:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
-; VI-DAG: s_load_dword [[A:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
-; VI-DAG: s_load_dword [[B:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30
-; GCN: v_mov_b32_e32 [[VB:v[0-9]+]], [[B]]
-; GCN: v_cmp_ne_i32_e32 vcc, 2, [[VB]]{{$}}
-; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
-; GCN: buffer_store_byte
-; GCN: s_endpgm
-define void @sext_bool_icmp_ne_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
- %icmp0 = icmp ne i32 %a, %b
- %ext = sext i1 %icmp0 to i32
- %icmp1 = icmp ne i32 %ext, 2
- store i1 %icmp1, i1 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}cmp_zext_k_i8max:
-; GCN: buffer_load_ubyte [[B:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:44
-; GCN: v_mov_b32_e32 [[K255:v[0-9]+]], 0xff{{$}}
-; GCN: v_cmp_ne_i32_e32 vcc, [[K255]], [[B]]
-; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
-; GCN-NEXT: buffer_store_byte [[RESULT]]
-; GCN: s_endpgm
-define void @cmp_zext_k_i8max(i1 addrspace(1)* %out, i8 %b) nounwind {
- %b.ext = zext i8 %b to i32
- %icmp0 = icmp ne i32 %b.ext, 255
- store i1 %icmp0, i1 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}cmp_sext_k_neg1:
-; GCN: buffer_load_sbyte [[B:v[0-9]+]]
-; GCN: v_cmp_ne_i32_e32 vcc, -1, [[B]]{{$}}
-; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
-; GCN-NEXT: buffer_store_byte [[RESULT]]
-; GCN: s_endpgm
-define void @cmp_sext_k_neg1(i1 addrspace(1)* %out, i8 addrspace(1)* %b.ptr) nounwind {
- %b = load i8, i8 addrspace(1)* %b.ptr
- %b.ext = sext i8 %b to i32
- %icmp0 = icmp ne i32 %b.ext, -1
- store i1 %icmp0, i1 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}cmp_sext_k_neg1_i8_sext_arg:
-; GCN: s_load_dword [[B:s[0-9]+]]
-; GCN: v_cmp_ne_i32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], -1, [[B]]
-; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CMP]]
-; GCN-NEXT: buffer_store_byte [[RESULT]]
-; GCN: s_endpgm
-define void @cmp_sext_k_neg1_i8_sext_arg(i1 addrspace(1)* %out, i8 signext %b) nounwind {
- %b.ext = sext i8 %b to i32
- %icmp0 = icmp ne i32 %b.ext, -1
- store i1 %icmp0, i1 addrspace(1)* %out
- ret void
-}
-
-; FIXME: This ends up doing a buffer_load_ubyte, and and compare to
-; 255. Seems to be because of ordering problems when not allowing load widths to be reduced.
-; Should do a buffer_load_sbyte and compare with -1
-
-; FUNC-LABEL: {{^}}cmp_sext_k_neg1_i8_arg:
-; GCN-DAG: buffer_load_ubyte [[B:v[0-9]+]]
-; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0xff{{$}}
-; GCN: v_cmp_ne_i32_e32 vcc, [[K]], [[B]]{{$}}
-; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
-; GCN-NEXT: buffer_store_byte [[RESULT]]
-; GCN: s_endpgm
-define void @cmp_sext_k_neg1_i8_arg(i1 addrspace(1)* %out, i8 %b) nounwind {
- %b.ext = sext i8 %b to i32
- %icmp0 = icmp ne i32 %b.ext, -1
- store i1 %icmp0, i1 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}cmp_zext_k_neg1:
-; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}}
-; GCN: buffer_store_byte [[RESULT]]
-; GCN: s_endpgm
-define void @cmp_zext_k_neg1(i1 addrspace(1)* %out, i8 %b) nounwind {
- %b.ext = zext i8 %b to i32
- %icmp0 = icmp ne i32 %b.ext, -1
- store i1 %icmp0, i1 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}zext_bool_icmp_ne_k:
-; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}}
-; GCN: buffer_store_byte [[RESULT]]
-; GCN-NEXT: s_endpgm
-define void @zext_bool_icmp_ne_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
- %icmp0 = icmp ne i32 %a, %b
- %ext = zext i1 %icmp0 to i32
- %icmp1 = icmp ne i32 %ext, 2
- store i1 %icmp1, i1 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}zext_bool_icmp_eq_k:
-; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}}
-; GCN: buffer_store_byte [[RESULT]]
-; GCN-NEXT: s_endpgm
-define void @zext_bool_icmp_eq_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
- %icmp0 = icmp ne i32 %a, %b
- %ext = zext i1 %icmp0 to i32
- %icmp1 = icmp eq i32 %ext, 2
- store i1 %icmp1, i1 addrspace(1)* %out
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/setcc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/setcc.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/setcc.ll (original)
+++ llvm/trunk/test/CodeGen/R600/setcc.ll (removed)
@@ -1,377 +0,0 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600 --check-prefix=FUNC %s
-; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-
-declare i32 @llvm.r600.read.tidig.x() nounwind readnone
-
-; FUNC-LABEL: {{^}}setcc_v2i32:
-; R600-DAG: SETE_INT * T{{[0-9]+\.[XYZW]}}, KC0[3].X, KC0[3].Z
-; R600-DAG: SETE_INT * T{{[0-9]+\.[XYZW]}}, KC0[2].W, KC0[3].Y
-
-define void @setcc_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) {
- %result = icmp eq <2 x i32> %a, %b
- %sext = sext <2 x i1> %result to <2 x i32>
- store <2 x i32> %sext, <2 x i32> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}setcc_v4i32:
-; R600-DAG: SETE_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; R600-DAG: SETE_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; R600-DAG: SETE_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; R600-DAG: SETE_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-
-define void @setcc_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
- %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
- %a = load <4 x i32>, <4 x i32> addrspace(1) * %in
- %b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr
- %result = icmp eq <4 x i32> %a, %b
- %sext = sext <4 x i1> %result to <4 x i32>
- store <4 x i32> %sext, <4 x i32> addrspace(1)* %out
- ret void
-}
-
-;;;==========================================================================;;;
-;; Float comparisons
-;;;==========================================================================;;;
-
-; FUNC-LABEL: {{^}}f32_oeq:
-; R600: SETE_DX10
-; SI: v_cmp_eq_f32
-define void @f32_oeq(i32 addrspace(1)* %out, float %a, float %b) {
-entry:
- %0 = fcmp oeq float %a, %b
- %1 = sext i1 %0 to i32
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}f32_ogt:
-; R600: SETGT_DX10
-; SI: v_cmp_gt_f32
-define void @f32_ogt(i32 addrspace(1)* %out, float %a, float %b) {
-entry:
- %0 = fcmp ogt float %a, %b
- %1 = sext i1 %0 to i32
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}f32_oge:
-; R600: SETGE_DX10
-; SI: v_cmp_ge_f32
-define void @f32_oge(i32 addrspace(1)* %out, float %a, float %b) {
-entry:
- %0 = fcmp oge float %a, %b
- %1 = sext i1 %0 to i32
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}f32_olt:
-; R600: SETGT_DX10
-; SI: v_cmp_lt_f32
-define void @f32_olt(i32 addrspace(1)* %out, float %a, float %b) {
-entry:
- %0 = fcmp olt float %a, %b
- %1 = sext i1 %0 to i32
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}f32_ole:
-; R600: SETGE_DX10
-; SI: v_cmp_le_f32
-define void @f32_ole(i32 addrspace(1)* %out, float %a, float %b) {
-entry:
- %0 = fcmp ole float %a, %b
- %1 = sext i1 %0 to i32
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}f32_one:
-; R600-DAG: SETE_DX10
-; R600-DAG: SETE_DX10
-; R600-DAG: AND_INT
-; R600-DAG: SETNE_DX10
-; R600-DAG: AND_INT
-; R600-DAG: SETNE_INT
-
-; SI: v_cmp_lg_f32_e32 vcc
-; SI-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
-define void @f32_one(i32 addrspace(1)* %out, float %a, float %b) {
-entry:
- %0 = fcmp one float %a, %b
- %1 = sext i1 %0 to i32
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}f32_ord:
-; R600-DAG: SETE_DX10
-; R600-DAG: SETE_DX10
-; R600-DAG: AND_INT
-; R600-DAG: SETNE_INT
-; SI: v_cmp_o_f32
-define void @f32_ord(i32 addrspace(1)* %out, float %a, float %b) {
-entry:
- %0 = fcmp ord float %a, %b
- %1 = sext i1 %0 to i32
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}f32_ueq:
-; R600-DAG: SETNE_DX10
-; R600-DAG: SETNE_DX10
-; R600-DAG: OR_INT
-; R600-DAG: SETE_DX10
-; R600-DAG: OR_INT
-; R600-DAG: SETNE_INT
-
-; SI: v_cmp_nlg_f32_e32 vcc
-; SI-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
-define void @f32_ueq(i32 addrspace(1)* %out, float %a, float %b) {
-entry:
- %0 = fcmp ueq float %a, %b
- %1 = sext i1 %0 to i32
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}f32_ugt:
-; R600: SETGE
-; R600: SETE_DX10
-; SI: v_cmp_nle_f32_e32 vcc
-; SI-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
-define void @f32_ugt(i32 addrspace(1)* %out, float %a, float %b) {
-entry:
- %0 = fcmp ugt float %a, %b
- %1 = sext i1 %0 to i32
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}f32_uge:
-; R600: SETGT
-; R600: SETE_DX10
-
-; SI: v_cmp_nlt_f32_e32 vcc
-; SI-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
-define void @f32_uge(i32 addrspace(1)* %out, float %a, float %b) {
-entry:
- %0 = fcmp uge float %a, %b
- %1 = sext i1 %0 to i32
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}f32_ult:
-; R600: SETGE
-; R600: SETE_DX10
-
-; SI: v_cmp_nge_f32_e32 vcc
-; SI-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
-define void @f32_ult(i32 addrspace(1)* %out, float %a, float %b) {
-entry:
- %0 = fcmp ult float %a, %b
- %1 = sext i1 %0 to i32
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}f32_ule:
-; R600: SETGT
-; R600: SETE_DX10
-
-; SI: v_cmp_ngt_f32_e32 vcc
-; SI-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
-define void @f32_ule(i32 addrspace(1)* %out, float %a, float %b) {
-entry:
- %0 = fcmp ule float %a, %b
- %1 = sext i1 %0 to i32
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}f32_une:
-; R600: SETNE_DX10
-; SI: v_cmp_neq_f32
-define void @f32_une(i32 addrspace(1)* %out, float %a, float %b) {
-entry:
- %0 = fcmp une float %a, %b
- %1 = sext i1 %0 to i32
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}f32_uno:
-; R600: SETNE_DX10
-; R600: SETNE_DX10
-; R600: OR_INT
-; R600: SETNE_INT
-; SI: v_cmp_u_f32
-define void @f32_uno(i32 addrspace(1)* %out, float %a, float %b) {
-entry:
- %0 = fcmp uno float %a, %b
- %1 = sext i1 %0 to i32
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-;;;==========================================================================;;;
-;; 32-bit integer comparisons
-;;;==========================================================================;;;
-
-; FUNC-LABEL: {{^}}i32_eq:
-; R600: SETE_INT
-; SI: v_cmp_eq_i32
-define void @i32_eq(i32 addrspace(1)* %out, i32 %a, i32 %b) {
-entry:
- %0 = icmp eq i32 %a, %b
- %1 = sext i1 %0 to i32
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}i32_ne:
-; R600: SETNE_INT
-; SI: v_cmp_ne_i32
-define void @i32_ne(i32 addrspace(1)* %out, i32 %a, i32 %b) {
-entry:
- %0 = icmp ne i32 %a, %b
- %1 = sext i1 %0 to i32
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}i32_ugt:
-; R600: SETGT_UINT
-; SI: v_cmp_gt_u32
-define void @i32_ugt(i32 addrspace(1)* %out, i32 %a, i32 %b) {
-entry:
- %0 = icmp ugt i32 %a, %b
- %1 = sext i1 %0 to i32
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}i32_uge:
-; R600: SETGE_UINT
-; SI: v_cmp_ge_u32
-define void @i32_uge(i32 addrspace(1)* %out, i32 %a, i32 %b) {
-entry:
- %0 = icmp uge i32 %a, %b
- %1 = sext i1 %0 to i32
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}i32_ult:
-; R600: SETGT_UINT
-; SI: v_cmp_lt_u32
-define void @i32_ult(i32 addrspace(1)* %out, i32 %a, i32 %b) {
-entry:
- %0 = icmp ult i32 %a, %b
- %1 = sext i1 %0 to i32
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}i32_ule:
-; R600: SETGE_UINT
-; SI: v_cmp_le_u32
-define void @i32_ule(i32 addrspace(1)* %out, i32 %a, i32 %b) {
-entry:
- %0 = icmp ule i32 %a, %b
- %1 = sext i1 %0 to i32
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}i32_sgt:
-; R600: SETGT_INT
-; SI: v_cmp_gt_i32
-define void @i32_sgt(i32 addrspace(1)* %out, i32 %a, i32 %b) {
-entry:
- %0 = icmp sgt i32 %a, %b
- %1 = sext i1 %0 to i32
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}i32_sge:
-; R600: SETGE_INT
-; SI: v_cmp_ge_i32
-define void @i32_sge(i32 addrspace(1)* %out, i32 %a, i32 %b) {
-entry:
- %0 = icmp sge i32 %a, %b
- %1 = sext i1 %0 to i32
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}i32_slt:
-; R600: SETGT_INT
-; SI: v_cmp_lt_i32
-define void @i32_slt(i32 addrspace(1)* %out, i32 %a, i32 %b) {
-entry:
- %0 = icmp slt i32 %a, %b
- %1 = sext i1 %0 to i32
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}i32_sle:
-; R600: SETGE_INT
-; SI: v_cmp_le_i32
-define void @i32_sle(i32 addrspace(1)* %out, i32 %a, i32 %b) {
-entry:
- %0 = icmp sle i32 %a, %b
- %1 = sext i1 %0 to i32
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; FIXME: This does 4 compares
-; FUNC-LABEL: {{^}}v3i32_eq:
-; SI-DAG: v_cmp_eq_i32
-; SI-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1,
-; SI-DAG: v_cmp_eq_i32
-; SI-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1,
-; SI-DAG: v_cmp_eq_i32
-; SI-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1,
-; SI: s_endpgm
-define void @v3i32_eq(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(1)* %ptra, <3 x i32> addrspace(1)* %ptrb) {
- %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.a = getelementptr <3 x i32>, <3 x i32> addrspace(1)* %ptra, i32 %tid
- %gep.b = getelementptr <3 x i32>, <3 x i32> addrspace(1)* %ptrb, i32 %tid
- %gep.out = getelementptr <3 x i32>, <3 x i32> addrspace(1)* %out, i32 %tid
- %a = load <3 x i32>, <3 x i32> addrspace(1)* %gep.a
- %b = load <3 x i32>, <3 x i32> addrspace(1)* %gep.b
- %cmp = icmp eq <3 x i32> %a, %b
- %ext = sext <3 x i1> %cmp to <3 x i32>
- store <3 x i32> %ext, <3 x i32> addrspace(1)* %gep.out
- ret void
-}
-
-; FUNC-LABEL: {{^}}v3i8_eq:
-; SI-DAG: v_cmp_eq_i32
-; SI-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1,
-; SI-DAG: v_cmp_eq_i32
-; SI-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1,
-; SI-DAG: v_cmp_eq_i32
-; SI-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1,
-; SI: s_endpgm
-define void @v3i8_eq(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(1)* %ptra, <3 x i8> addrspace(1)* %ptrb) {
- %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.a = getelementptr <3 x i8>, <3 x i8> addrspace(1)* %ptra, i32 %tid
- %gep.b = getelementptr <3 x i8>, <3 x i8> addrspace(1)* %ptrb, i32 %tid
- %gep.out = getelementptr <3 x i8>, <3 x i8> addrspace(1)* %out, i32 %tid
- %a = load <3 x i8>, <3 x i8> addrspace(1)* %gep.a
- %b = load <3 x i8>, <3 x i8> addrspace(1)* %gep.b
- %cmp = icmp eq <3 x i8> %a, %b
- %ext = sext <3 x i1> %cmp to <3 x i8>
- store <3 x i8> %ext, <3 x i8> addrspace(1)* %gep.out
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/setcc64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/setcc64.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/setcc64.ll (original)
+++ llvm/trunk/test/CodeGen/R600/setcc64.ll (removed)
@@ -1,259 +0,0 @@
-;RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs| FileCheck --check-prefix=SI --check-prefix=FUNC %s
-;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs| FileCheck --check-prefix=SI --check-prefix=FUNC %s
-
-; XXX: Merge this into setcc, once R600 supports 64-bit operations
-
-;;;==========================================================================;;;
-;; Double comparisons
-;;;==========================================================================;;;
-
-; FUNC-LABEL: {{^}}f64_oeq:
-; SI: v_cmp_eq_f64
-define void @f64_oeq(i32 addrspace(1)* %out, double %a, double %b) {
-entry:
- %0 = fcmp oeq double %a, %b
- %1 = sext i1 %0 to i32
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}f64_ogt:
-; SI: v_cmp_gt_f64
-define void @f64_ogt(i32 addrspace(1)* %out, double %a, double %b) {
-entry:
- %0 = fcmp ogt double %a, %b
- %1 = sext i1 %0 to i32
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}f64_oge:
-; SI: v_cmp_ge_f64
-define void @f64_oge(i32 addrspace(1)* %out, double %a, double %b) {
-entry:
- %0 = fcmp oge double %a, %b
- %1 = sext i1 %0 to i32
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}f64_olt:
-; SI: v_cmp_lt_f64
-define void @f64_olt(i32 addrspace(1)* %out, double %a, double %b) {
-entry:
- %0 = fcmp olt double %a, %b
- %1 = sext i1 %0 to i32
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}f64_ole:
-; SI: v_cmp_le_f64
-define void @f64_ole(i32 addrspace(1)* %out, double %a, double %b) {
-entry:
- %0 = fcmp ole double %a, %b
- %1 = sext i1 %0 to i32
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}f64_one:
-; SI: v_cmp_lg_f64_e32 vcc
-; SI-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
-define void @f64_one(i32 addrspace(1)* %out, double %a, double %b) {
-entry:
- %0 = fcmp one double %a, %b
- %1 = sext i1 %0 to i32
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}f64_ord:
-; SI: v_cmp_o_f64
-define void @f64_ord(i32 addrspace(1)* %out, double %a, double %b) {
-entry:
- %0 = fcmp ord double %a, %b
- %1 = sext i1 %0 to i32
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}f64_ueq:
-; SI: v_cmp_nlg_f64_e32 vcc
-; SI-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
-define void @f64_ueq(i32 addrspace(1)* %out, double %a, double %b) {
-entry:
- %0 = fcmp ueq double %a, %b
- %1 = sext i1 %0 to i32
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}f64_ugt:
-
-; SI: v_cmp_nle_f64_e32 vcc
-; SI-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
-define void @f64_ugt(i32 addrspace(1)* %out, double %a, double %b) {
-entry:
- %0 = fcmp ugt double %a, %b
- %1 = sext i1 %0 to i32
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}f64_uge:
-; SI: v_cmp_nlt_f64_e32 vcc
-; SI-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
-define void @f64_uge(i32 addrspace(1)* %out, double %a, double %b) {
-entry:
- %0 = fcmp uge double %a, %b
- %1 = sext i1 %0 to i32
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}f64_ult:
-; SI: v_cmp_nge_f64_e32 vcc
-; SI-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
-define void @f64_ult(i32 addrspace(1)* %out, double %a, double %b) {
-entry:
- %0 = fcmp ult double %a, %b
- %1 = sext i1 %0 to i32
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}f64_ule:
-; SI: v_cmp_ngt_f64_e32 vcc
-; SI-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
-define void @f64_ule(i32 addrspace(1)* %out, double %a, double %b) {
-entry:
- %0 = fcmp ule double %a, %b
- %1 = sext i1 %0 to i32
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}f64_une:
-; SI: v_cmp_neq_f64
-define void @f64_une(i32 addrspace(1)* %out, double %a, double %b) {
-entry:
- %0 = fcmp une double %a, %b
- %1 = sext i1 %0 to i32
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}f64_uno:
-; SI: v_cmp_u_f64
-define void @f64_uno(i32 addrspace(1)* %out, double %a, double %b) {
-entry:
- %0 = fcmp uno double %a, %b
- %1 = sext i1 %0 to i32
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-;;;==========================================================================;;;
-;; 64-bit integer comparisons
-;;;==========================================================================;;;
-
-; FUNC-LABEL: {{^}}i64_eq:
-; SI: v_cmp_eq_i64
-define void @i64_eq(i32 addrspace(1)* %out, i64 %a, i64 %b) {
-entry:
- %0 = icmp eq i64 %a, %b
- %1 = sext i1 %0 to i32
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}i64_ne:
-; SI: v_cmp_ne_i64
-define void @i64_ne(i32 addrspace(1)* %out, i64 %a, i64 %b) {
-entry:
- %0 = icmp ne i64 %a, %b
- %1 = sext i1 %0 to i32
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}i64_ugt:
-; SI: v_cmp_gt_u64
-define void @i64_ugt(i32 addrspace(1)* %out, i64 %a, i64 %b) {
-entry:
- %0 = icmp ugt i64 %a, %b
- %1 = sext i1 %0 to i32
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}i64_uge:
-; SI: v_cmp_ge_u64
-define void @i64_uge(i32 addrspace(1)* %out, i64 %a, i64 %b) {
-entry:
- %0 = icmp uge i64 %a, %b
- %1 = sext i1 %0 to i32
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}i64_ult:
-; SI: v_cmp_lt_u64
-define void @i64_ult(i32 addrspace(1)* %out, i64 %a, i64 %b) {
-entry:
- %0 = icmp ult i64 %a, %b
- %1 = sext i1 %0 to i32
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}i64_ule:
-; SI: v_cmp_le_u64
-define void @i64_ule(i32 addrspace(1)* %out, i64 %a, i64 %b) {
-entry:
- %0 = icmp ule i64 %a, %b
- %1 = sext i1 %0 to i32
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}i64_sgt:
-; SI: v_cmp_gt_i64
-define void @i64_sgt(i32 addrspace(1)* %out, i64 %a, i64 %b) {
-entry:
- %0 = icmp sgt i64 %a, %b
- %1 = sext i1 %0 to i32
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}i64_sge:
-; SI: v_cmp_ge_i64
-define void @i64_sge(i32 addrspace(1)* %out, i64 %a, i64 %b) {
-entry:
- %0 = icmp sge i64 %a, %b
- %1 = sext i1 %0 to i32
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}i64_slt:
-; SI: v_cmp_lt_i64
-define void @i64_slt(i32 addrspace(1)* %out, i64 %a, i64 %b) {
-entry:
- %0 = icmp slt i64 %a, %b
- %1 = sext i1 %0 to i32
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}i64_sle:
-; SI: v_cmp_le_i64
-define void @i64_sle(i32 addrspace(1)* %out, i64 %a, i64 %b) {
-entry:
- %0 = icmp sle i64 %a, %b
- %1 = sext i1 %0 to i32
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/seto.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/seto.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/seto.ll (original)
+++ llvm/trunk/test/CodeGen/R600/seto.ll (removed)
@@ -1,15 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s
-
-; CHECK-LABEL: {{^}}main:
-; CHECK: v_cmp_o_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[SREG:s[0-9]+]], [[SREG]]
-; CHECK-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, 1.0, [[CMP]]
-define void @main(float %p) {
-main_body:
- %c = fcmp oeq float %p, %p
- %r = select i1 %c, float 1.000000e+00, float 0.000000e+00
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %r, float %r, float %r, float %r)
- ret void
-}
-
-declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
Removed: llvm/trunk/test/CodeGen/R600/setuo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/setuo.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/setuo.ll (original)
+++ llvm/trunk/test/CodeGen/R600/setuo.ll (removed)
@@ -1,15 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s
-
-; CHECK-LABEL: {{^}}main:
-; CHECK: v_cmp_u_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[SREG:s[0-9]+]], [[SREG]]
-; CHECK-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, 1.0, [[CMP]]
-define void @main(float %p) {
-main_body:
- %c = fcmp une float %p, %p
- %r = select i1 %c, float 1.000000e+00, float 0.000000e+00
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %r, float %r, float %r, float %r)
- ret void
-}
-
-declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
Removed: llvm/trunk/test/CodeGen/R600/sext-eliminate.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/sext-eliminate.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/sext-eliminate.ll (original)
+++ llvm/trunk/test/CodeGen/R600/sext-eliminate.ll (removed)
@@ -1,26 +0,0 @@
-; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-
-
-; FUNC-LABEL: {{^}}sext_in_reg_i1_i32_add:
-
-; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
-; EG: SUB_INT {{[* ]*}}[[RES]]
-; EG-NOT: BFE
-define void @sext_in_reg_i1_i32_add(i32 addrspace(1)* %out, i1 %a, i32 %b) {
- %sext = sext i1 %a to i32
- %res = add i32 %b, %sext
- store i32 %res, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}sext_in_reg_i1_i32_sub:
-
-; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
-; EG: ADD_INT {{[* ]*}}[[RES]]
-; EG-NOT: BFE
-define void @sext_in_reg_i1_i32_sub(i32 addrspace(1)* %out, i1 %a, i32 %b) {
- %sext = sext i1 %a to i32
- %res = sub i32 %b, %sext
- store i32 %res, i32 addrspace(1)* %out
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/sext-in-reg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/sext-in-reg.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/sext-in-reg.ll (original)
+++ llvm/trunk/test/CodeGen/R600/sext-in-reg.ll (removed)
@@ -1,611 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-
-declare i32 @llvm.AMDGPU.imax(i32, i32) nounwind readnone
-declare i32 @llvm.r600.read.tidig.x() nounwind readnone
-
-
-; FUNC-LABEL: {{^}}sext_in_reg_i1_i32:
-; SI: s_load_dword [[ARG:s[0-9]+]],
-; SI: s_bfe_i32 [[SEXTRACT:s[0-9]+]], [[ARG]], 0x10000
-; SI: v_mov_b32_e32 [[EXTRACT:v[0-9]+]], [[SEXTRACT]]
-; SI: buffer_store_dword [[EXTRACT]],
-
-; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
-; EG: BFE_INT [[RES]], {{.*}}, 0.0, 1
-; EG-NEXT: LSHR * [[ADDR]]
-define void @sext_in_reg_i1_i32(i32 addrspace(1)* %out, i32 %in) {
- %shl = shl i32 %in, 31
- %sext = ashr i32 %shl, 31
- store i32 %sext, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}sext_in_reg_i8_to_i32:
-; SI: s_add_i32 [[VAL:s[0-9]+]],
-; SI: s_sext_i32_i8 [[EXTRACT:s[0-9]+]], [[VAL]]
-; SI: v_mov_b32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]]
-; SI: buffer_store_dword [[VEXTRACT]],
-
-; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
-; EG: ADD_INT
-; EG-NEXT: BFE_INT [[RES]], {{.*}}, 0.0, literal
-; EG-NEXT: LSHR * [[ADDR]]
-define void @sext_in_reg_i8_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
- %c = add i32 %a, %b ; add to prevent folding into extload
- %shl = shl i32 %c, 24
- %ashr = ashr i32 %shl, 24
- store i32 %ashr, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}sext_in_reg_i16_to_i32:
-; SI: s_add_i32 [[VAL:s[0-9]+]],
-; SI: s_sext_i32_i16 [[EXTRACT:s[0-9]+]], [[VAL]]
-; SI: v_mov_b32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]]
-; SI: buffer_store_dword [[VEXTRACT]],
-
-; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
-; EG: ADD_INT
-; EG-NEXT: BFE_INT [[RES]], {{.*}}, 0.0, literal
-; EG-NEXT: LSHR * [[ADDR]]
-define void @sext_in_reg_i16_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
- %c = add i32 %a, %b ; add to prevent folding into extload
- %shl = shl i32 %c, 16
- %ashr = ashr i32 %shl, 16
- store i32 %ashr, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}sext_in_reg_i8_to_v1i32:
-; SI: s_add_i32 [[VAL:s[0-9]+]],
-; SI: s_sext_i32_i8 [[EXTRACT:s[0-9]+]], [[VAL]]
-; SI: v_mov_b32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]]
-; SI: buffer_store_dword [[VEXTRACT]],
-
-; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
-; EG: ADD_INT
-; EG-NEXT: BFE_INT [[RES]], {{.*}}, 0.0, literal
-; EG-NEXT: LSHR * [[ADDR]]
-define void @sext_in_reg_i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i32> %a, <1 x i32> %b) nounwind {
- %c = add <1 x i32> %a, %b ; add to prevent folding into extload
- %shl = shl <1 x i32> %c, <i32 24>
- %ashr = ashr <1 x i32> %shl, <i32 24>
- store <1 x i32> %ashr, <1 x i32> addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}sext_in_reg_i1_to_i64:
-; SI: s_lshl_b64 [[VAL:s\[[0-9]+:[0-9]+\]]]
-; SI-DAG: s_bfe_i64 s{{\[}}[[SLO:[0-9]+]]:[[SHI:[0-9]+]]{{\]}}, [[VAL]], 0x10000
-; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SLO]]
-; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]]
-; SI: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}}
-define void @sext_in_reg_i1_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
- %c = shl i64 %a, %b
- %shl = shl i64 %c, 63
- %ashr = ashr i64 %shl, 63
- store i64 %ashr, i64 addrspace(1)* %out, align 8
- ret void
-}
-
-; FUNC-LABEL: {{^}}sext_in_reg_i8_to_i64:
-; SI: s_lshl_b64 [[VAL:s\[[0-9]+:[0-9]+\]]]
-; SI-DAG: s_bfe_i64 s{{\[}}[[SLO:[0-9]+]]:[[SHI:[0-9]+]]{{\]}}, [[VAL]], 0x80000
-; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SLO]]
-; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]]
-; SI: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}}
-
-; EG: MEM_{{.*}} STORE_{{.*}} [[RES_LO:T[0-9]+\.[XYZW]]], [[ADDR_LO:T[0-9]+.[XYZW]]]
-; EG: MEM_{{.*}} STORE_{{.*}} [[RES_HI:T[0-9]+\.[XYZW]]], [[ADDR_HI:T[0-9]+.[XYZW]]]
-; EG: LSHL
-; EG: BFE_INT {{\*?}} [[RES_LO]], {{.*}}, 0.0, literal
-; EG: ASHR [[RES_HI]]
-; EG-NOT: BFE_INT
-; EG: LSHR
-; EG: LSHR
-;; TODO Check address computation, using | with variables in {{}} does not work,
-;; also the _LO/_HI order might be different
-define void @sext_in_reg_i8_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
- %c = shl i64 %a, %b
- %shl = shl i64 %c, 56
- %ashr = ashr i64 %shl, 56
- store i64 %ashr, i64 addrspace(1)* %out, align 8
- ret void
-}
-
-; FUNC-LABEL: {{^}}sext_in_reg_i16_to_i64:
-; SI: s_lshl_b64 [[VAL:s\[[0-9]+:[0-9]+\]]]
-; SI-DAG: s_bfe_i64 s{{\[}}[[SLO:[0-9]+]]:[[SHI:[0-9]+]]{{\]}}, [[VAL]], 0x100000
-; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SLO]]
-; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]]
-; SI: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}}
-
-; EG: MEM_{{.*}} STORE_{{.*}} [[RES_LO:T[0-9]+\.[XYZW]]], [[ADDR_LO:T[0-9]+.[XYZW]]]
-; EG: MEM_{{.*}} STORE_{{.*}} [[RES_HI:T[0-9]+\.[XYZW]]], [[ADDR_HI:T[0-9]+.[XYZW]]]
-; EG: LSHL
-; EG: BFE_INT {{\*?}} [[RES_LO]], {{.*}}, 0.0, literal
-; EG: ASHR [[RES_HI]]
-; EG-NOT: BFE_INT
-; EG: LSHR
-; EG: LSHR
-;; TODO Check address computation, using | with variables in {{}} does not work,
-;; also the _LO/_HI order might be different
-define void @sext_in_reg_i16_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
- %c = shl i64 %a, %b
- %shl = shl i64 %c, 48
- %ashr = ashr i64 %shl, 48
- store i64 %ashr, i64 addrspace(1)* %out, align 8
- ret void
-}
-
-; FUNC-LABEL: {{^}}sext_in_reg_i32_to_i64:
-; SI: s_lshl_b64 [[VAL:s\[[0-9]+:[0-9]+\]]]
-; SI-DAG: s_bfe_i64 s{{\[}}[[SLO:[0-9]+]]:[[SHI:[0-9]+]]{{\]}}, [[VAL]], 0x200000
-; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SLO]]
-; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]]
-; SI: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}}
-
-; EG: MEM_{{.*}} STORE_{{.*}} [[RES_LO:T[0-9]+\.[XYZW]]], [[ADDR_LO:T[0-9]+.[XYZW]]]
-; EG: MEM_{{.*}} STORE_{{.*}} [[RES_HI:T[0-9]+\.[XYZW]]], [[ADDR_HI:T[0-9]+.[XYZW]]]
-; EG-NOT: BFE_INT
-
-; EG: ASHR [[RES_HI]]
-
-; EG: LSHR
-; EG: LSHR
-;; TODO Check address computation, using | with variables in {{}} does not work,
-;; also the _LO/_HI order might be different
-define void @sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
- %c = shl i64 %a, %b
- %shl = shl i64 %c, 32
- %ashr = ashr i64 %shl, 32
- store i64 %ashr, i64 addrspace(1)* %out, align 8
- ret void
-}
-
-; This is broken on Evergreen for some reason related to the <1 x i64> kernel arguments.
-; XFUNC-LABEL: {{^}}sext_in_reg_i8_to_v1i64:
-; XSI: s_bfe_i32 [[EXTRACT:s[0-9]+]], {{s[0-9]+}}, 524288
-; XSI: s_ashr_i32 {{v[0-9]+}}, [[EXTRACT]], 31
-; XSI: buffer_store_dword
-; XEG: BFE_INT
-; XEG: ASHR
-; define void @sext_in_reg_i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i64> %a, <1 x i64> %b) nounwind {
-; %c = add <1 x i64> %a, %b
-; %shl = shl <1 x i64> %c, <i64 56>
-; %ashr = ashr <1 x i64> %shl, <i64 56>
-; store <1 x i64> %ashr, <1 x i64> addrspace(1)* %out, align 8
-; ret void
-; }
-
-; FUNC-LABEL: {{^}}v_sext_in_reg_i1_to_i64:
-; SI: buffer_load_dwordx2
-; SI: v_lshl_b64 v{{\[}}[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]{{\]}}
-; SI: v_bfe_i32 v[[LO:[0-9]+]], v[[VAL_LO]], 0, 1
-; SI: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
-; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
-define void @v_sext_in_reg_i1_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
- %tid = call i32 @llvm.r600.read.tidig.x()
- %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
- %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
- %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
- %a = load i64, i64 addrspace(1)* %a.gep, align 8
- %b = load i64, i64 addrspace(1)* %b.gep, align 8
-
- %c = shl i64 %a, %b
- %shl = shl i64 %c, 63
- %ashr = ashr i64 %shl, 63
- store i64 %ashr, i64 addrspace(1)* %out.gep, align 8
- ret void
-}
-
-; FUNC-LABEL: {{^}}v_sext_in_reg_i8_to_i64:
-; SI: buffer_load_dwordx2
-; SI: v_lshl_b64 v{{\[}}[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]{{\]}}
-; SI: v_bfe_i32 v[[LO:[0-9]+]], v[[VAL_LO]], 0, 8
-; SI: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
-; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
-define void @v_sext_in_reg_i8_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
- %tid = call i32 @llvm.r600.read.tidig.x()
- %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
- %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
- %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
- %a = load i64, i64 addrspace(1)* %a.gep, align 8
- %b = load i64, i64 addrspace(1)* %b.gep, align 8
-
- %c = shl i64 %a, %b
- %shl = shl i64 %c, 56
- %ashr = ashr i64 %shl, 56
- store i64 %ashr, i64 addrspace(1)* %out.gep, align 8
- ret void
-}
-
-; FUNC-LABEL: {{^}}v_sext_in_reg_i16_to_i64:
-; SI: buffer_load_dwordx2
-; SI: v_lshl_b64 v{{\[}}[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]{{\]}}
-; SI: v_bfe_i32 v[[LO:[0-9]+]], v[[VAL_LO]], 0, 16
-; SI: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
-; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
-define void @v_sext_in_reg_i16_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
- %tid = call i32 @llvm.r600.read.tidig.x()
- %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
- %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
- %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
- %a = load i64, i64 addrspace(1)* %a.gep, align 8
- %b = load i64, i64 addrspace(1)* %b.gep, align 8
-
- %c = shl i64 %a, %b
- %shl = shl i64 %c, 48
- %ashr = ashr i64 %shl, 48
- store i64 %ashr, i64 addrspace(1)* %out.gep, align 8
- ret void
-}
-
-; FUNC-LABEL: {{^}}v_sext_in_reg_i32_to_i64:
-; SI: buffer_load_dwordx2
-; SI: v_lshl_b64 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}},
-; SI: v_ashrrev_i32_e32 v[[SHR:[0-9]+]], 31, v[[LO]]
-; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[SHR]]{{\]}}
-define void @v_sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
- %tid = call i32 @llvm.r600.read.tidig.x()
- %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
- %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
- %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
- %a = load i64, i64 addrspace(1)* %a.gep, align 8
- %b = load i64, i64 addrspace(1)* %b.gep, align 8
-
- %c = shl i64 %a, %b
- %shl = shl i64 %c, 32
- %ashr = ashr i64 %shl, 32
- store i64 %ashr, i64 addrspace(1)* %out.gep, align 8
- ret void
-}
-
-; FUNC-LABEL: {{^}}sext_in_reg_i1_in_i32_other_amount:
-; SI-NOT: s_lshl
-; SI-NOT: s_ashr
-; SI: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x190001
-
-; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
-; EG-NOT: BFE
-; EG: ADD_INT
-; EG: LSHL
-; EG: ASHR [[RES]]
-; EG: LSHR {{\*?}} [[ADDR]]
-define void @sext_in_reg_i1_in_i32_other_amount(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
- %c = add i32 %a, %b
- %x = shl i32 %c, 6
- %y = ashr i32 %x, 7
- store i32 %y, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}sext_in_reg_v2i1_in_v2i32_other_amount:
-; SI-NOT: s_lshl
-; SI-NOT: s_ashr
-; SI-DAG: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x190001
-; SI-DAG: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x190001
-; SI: s_endpgm
-
-; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
-; EG-NOT: BFE
-; EG: ADD_INT
-; EG: LSHL
-; EG: ASHR [[RES]]
-; EG: LSHL
-; EG: ASHR [[RES]]
-; EG: LSHR {{\*?}} [[ADDR]]
-define void @sext_in_reg_v2i1_in_v2i32_other_amount(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind {
- %c = add <2 x i32> %a, %b
- %x = shl <2 x i32> %c, <i32 6, i32 6>
- %y = ashr <2 x i32> %x, <i32 7, i32 7>
- store <2 x i32> %y, <2 x i32> addrspace(1)* %out, align 2
- ret void
-}
-
-
-; FUNC-LABEL: {{^}}sext_in_reg_v2i1_to_v2i32:
-; SI: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
-; SI: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
-; SI: buffer_store_dwordx2
-
-; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
-; EG: BFE_INT [[RES]]
-; EG: BFE_INT [[RES]]
-; EG: LSHR {{\*?}} [[ADDR]]
-define void @sext_in_reg_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind {
- %c = add <2 x i32> %a, %b ; add to prevent folding into extload
- %shl = shl <2 x i32> %c, <i32 31, i32 31>
- %ashr = ashr <2 x i32> %shl, <i32 31, i32 31>
- store <2 x i32> %ashr, <2 x i32> addrspace(1)* %out, align 8
- ret void
-}
-
-; FUNC-LABEL: {{^}}sext_in_reg_v4i1_to_v4i32:
-; SI: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
-; SI: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
-; SI: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
-; SI: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
-; SI: buffer_store_dwordx4
-
-; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW][XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
-; EG: BFE_INT [[RES]]
-; EG: BFE_INT [[RES]]
-; EG: BFE_INT [[RES]]
-; EG: BFE_INT [[RES]]
-; EG: LSHR {{\*?}} [[ADDR]]
-define void @sext_in_reg_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) nounwind {
- %c = add <4 x i32> %a, %b ; add to prevent folding into extload
- %shl = shl <4 x i32> %c, <i32 31, i32 31, i32 31, i32 31>
- %ashr = ashr <4 x i32> %shl, <i32 31, i32 31, i32 31, i32 31>
- store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8
- ret void
-}
-
-; FUNC-LABEL: {{^}}sext_in_reg_v2i8_to_v2i32:
-; SI: s_sext_i32_i8 {{s[0-9]+}}, {{s[0-9]+}}
-; SI: s_sext_i32_i8 {{s[0-9]+}}, {{s[0-9]+}}
-; SI: buffer_store_dwordx2
-
-; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
-; EG: BFE_INT [[RES]]
-; EG: BFE_INT [[RES]]
-; EG: LSHR {{\*?}} [[ADDR]]
-define void @sext_in_reg_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind {
- %c = add <2 x i32> %a, %b ; add to prevent folding into extload
- %shl = shl <2 x i32> %c, <i32 24, i32 24>
- %ashr = ashr <2 x i32> %shl, <i32 24, i32 24>
- store <2 x i32> %ashr, <2 x i32> addrspace(1)* %out, align 8
- ret void
-}
-
-; FUNC-LABEL: {{^}}sext_in_reg_v4i8_to_v4i32:
-; SI: s_sext_i32_i8 {{s[0-9]+}}, {{s[0-9]+}}
-; SI: s_sext_i32_i8 {{s[0-9]+}}, {{s[0-9]+}}
-; SI: s_sext_i32_i8 {{s[0-9]+}}, {{s[0-9]+}}
-; SI: s_sext_i32_i8 {{s[0-9]+}}, {{s[0-9]+}}
-; SI: buffer_store_dwordx4
-
-; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW][XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
-; EG: BFE_INT [[RES]]
-; EG: BFE_INT [[RES]]
-; EG: BFE_INT [[RES]]
-; EG: BFE_INT [[RES]]
-; EG: LSHR {{\*?}} [[ADDR]]
-define void @sext_in_reg_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) nounwind {
- %c = add <4 x i32> %a, %b ; add to prevent folding into extload
- %shl = shl <4 x i32> %c, <i32 24, i32 24, i32 24, i32 24>
- %ashr = ashr <4 x i32> %shl, <i32 24, i32 24, i32 24, i32 24>
- store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8
- ret void
-}
-
-; FUNC-LABEL: {{^}}sext_in_reg_v2i16_to_v2i32:
-; SI: s_sext_i32_i16 {{s[0-9]+}}, {{s[0-9]+}}
-; SI: s_sext_i32_i16 {{s[0-9]+}}, {{s[0-9]+}}
-; SI: buffer_store_dwordx2
-
-; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
-; EG: BFE_INT [[RES]]
-; EG: BFE_INT [[RES]]
-; EG: LSHR {{\*?}} [[ADDR]]
-define void @sext_in_reg_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind {
- %c = add <2 x i32> %a, %b ; add to prevent folding into extload
- %shl = shl <2 x i32> %c, <i32 16, i32 16>
- %ashr = ashr <2 x i32> %shl, <i32 16, i32 16>
- store <2 x i32> %ashr, <2 x i32> addrspace(1)* %out, align 8
- ret void
-}
-
-; FUNC-LABEL: {{^}}testcase:
-define void @testcase(i8 addrspace(1)* %out, i8 %a) nounwind {
- %and_a_1 = and i8 %a, 1
- %cmp_eq = icmp eq i8 %and_a_1, 0
- %cmp_slt = icmp slt i8 %a, 0
- %sel0 = select i1 %cmp_slt, i8 0, i8 %a
- %sel1 = select i1 %cmp_eq, i8 0, i8 %a
- %xor = xor i8 %sel0, %sel1
- store i8 %xor, i8 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}testcase_3:
-define void @testcase_3(i8 addrspace(1)* %out, i8 %a) nounwind {
- %and_a_1 = and i8 %a, 1
- %cmp_eq = icmp eq i8 %and_a_1, 0
- %cmp_slt = icmp slt i8 %a, 0
- %sel0 = select i1 %cmp_slt, i8 0, i8 %a
- %sel1 = select i1 %cmp_eq, i8 0, i8 %a
- %xor = xor i8 %sel0, %sel1
- store i8 %xor, i8 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}vgpr_sext_in_reg_v4i8_to_v4i32:
-; SI: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
-; SI: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
-; SI: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
-; SI: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
-define void @vgpr_sext_in_reg_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind {
- %loada = load <4 x i32>, <4 x i32> addrspace(1)* %a, align 16
- %loadb = load <4 x i32>, <4 x i32> addrspace(1)* %b, align 16
- %c = add <4 x i32> %loada, %loadb ; add to prevent folding into extload
- %shl = shl <4 x i32> %c, <i32 24, i32 24, i32 24, i32 24>
- %ashr = ashr <4 x i32> %shl, <i32 24, i32 24, i32 24, i32 24>
- store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8
- ret void
-}
-
-; FUNC-LABEL: {{^}}vgpr_sext_in_reg_v4i16_to_v4i32:
-; SI: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 16
-; SI: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 16
-define void @vgpr_sext_in_reg_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind {
- %loada = load <4 x i32>, <4 x i32> addrspace(1)* %a, align 16
- %loadb = load <4 x i32>, <4 x i32> addrspace(1)* %b, align 16
- %c = add <4 x i32> %loada, %loadb ; add to prevent folding into extload
- %shl = shl <4 x i32> %c, <i32 16, i32 16, i32 16, i32 16>
- %ashr = ashr <4 x i32> %shl, <i32 16, i32 16, i32 16, i32 16>
- store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8
- ret void
-}
-
-; FUNC-LABEL: {{^}}sext_in_reg_to_illegal_type:
-; SI: buffer_load_sbyte
-; SI: v_max_i32
-; SI-NOT: bfe
-; SI: buffer_store_short
-define void @sext_in_reg_to_illegal_type(i16 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %src) nounwind {
- %tmp5 = load i8, i8 addrspace(1)* %src, align 1
- %tmp2 = sext i8 %tmp5 to i32
- %tmp3 = tail call i32 @llvm.AMDGPU.imax(i32 %tmp2, i32 0) nounwind readnone
- %tmp4 = trunc i32 %tmp3 to i8
- %tmp6 = sext i8 %tmp4 to i16
- store i16 %tmp6, i16 addrspace(1)* %out, align 2
- ret void
-}
-
-declare i32 @llvm.AMDGPU.bfe.i32(i32, i32, i32) nounwind readnone
-
-; FUNC-LABEL: {{^}}bfe_0_width:
-; SI-NOT: {{[^@]}}bfe
-; SI: s_endpgm
-define void @bfe_0_width(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind {
- %load = load i32, i32 addrspace(1)* %ptr, align 4
- %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 8, i32 0) nounwind readnone
- store i32 %bfe, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}bfe_8_bfe_8:
-; SI: v_bfe_i32
-; SI-NOT: {{[^@]}}bfe
-; SI: s_endpgm
-define void @bfe_8_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind {
- %load = load i32, i32 addrspace(1)* %ptr, align 4
- %bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 8) nounwind readnone
- %bfe1 = call i32 @llvm.AMDGPU.bfe.i32(i32 %bfe0, i32 0, i32 8) nounwind readnone
- store i32 %bfe1, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}bfe_8_bfe_16:
-; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8
-; SI: s_endpgm
-define void @bfe_8_bfe_16(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind {
- %load = load i32, i32 addrspace(1)* %ptr, align 4
- %bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 8) nounwind readnone
- %bfe1 = call i32 @llvm.AMDGPU.bfe.i32(i32 %bfe0, i32 0, i32 16) nounwind readnone
- store i32 %bfe1, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; This really should be folded into 1
-; FUNC-LABEL: {{^}}bfe_16_bfe_8:
-; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8
-; SI-NOT: {{[^@]}}bfe
-; SI: s_endpgm
-define void @bfe_16_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind {
- %load = load i32, i32 addrspace(1)* %ptr, align 4
- %bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 16) nounwind readnone
- %bfe1 = call i32 @llvm.AMDGPU.bfe.i32(i32 %bfe0, i32 0, i32 8) nounwind readnone
- store i32 %bfe1, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; Make sure there isn't a redundant BFE
-; FUNC-LABEL: {{^}}sext_in_reg_i8_to_i32_bfe:
-; SI: s_sext_i32_i8 s{{[0-9]+}}, s{{[0-9]+}}
-; SI-NOT: {{[^@]}}bfe
-; SI: s_endpgm
-define void @sext_in_reg_i8_to_i32_bfe(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
- %c = add i32 %a, %b ; add to prevent folding into extload
- %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %c, i32 0, i32 8) nounwind readnone
- %shl = shl i32 %bfe, 24
- %ashr = ashr i32 %shl, 24
- store i32 %ashr, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}sext_in_reg_i8_to_i32_bfe_wrong:
-define void @sext_in_reg_i8_to_i32_bfe_wrong(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
- %c = add i32 %a, %b ; add to prevent folding into extload
- %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %c, i32 8, i32 0) nounwind readnone
- %shl = shl i32 %bfe, 24
- %ashr = ashr i32 %shl, 24
- store i32 %ashr, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_i8_to_i32_bfe:
-; SI: buffer_load_sbyte
-; SI-NOT: {{[^@]}}bfe
-; SI: s_endpgm
-define void @sextload_i8_to_i32_bfe(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) nounwind {
- %load = load i8, i8 addrspace(1)* %ptr, align 1
- %sext = sext i8 %load to i32
- %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %sext, i32 0, i32 8) nounwind readnone
- %shl = shl i32 %bfe, 24
- %ashr = ashr i32 %shl, 24
- store i32 %ashr, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; SI: .text
-; FUNC-LABEL: {{^}}sextload_i8_to_i32_bfe_0:{{.*$}}
-; SI-NOT: {{[^@]}}bfe
-; SI: s_endpgm
-define void @sextload_i8_to_i32_bfe_0(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) nounwind {
- %load = load i8, i8 addrspace(1)* %ptr, align 1
- %sext = sext i8 %load to i32
- %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %sext, i32 8, i32 0) nounwind readnone
- %shl = shl i32 %bfe, 24
- %ashr = ashr i32 %shl, 24
- store i32 %ashr, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}sext_in_reg_i1_bfe_offset_0:
-; SI-NOT: shr
-; SI-NOT: shl
-; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 1
-; SI: s_endpgm
-define void @sext_in_reg_i1_bfe_offset_0(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %x = load i32, i32 addrspace(1)* %in, align 4
- %shl = shl i32 %x, 31
- %shr = ashr i32 %shl, 31
- %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shr, i32 0, i32 1)
- store i32 %bfe, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}sext_in_reg_i1_bfe_offset_1:
-; SI: buffer_load_dword
-; SI-NOT: shl
-; SI-NOT: shr
-; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 1
-; SI: s_endpgm
-define void @sext_in_reg_i1_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %x = load i32, i32 addrspace(1)* %in, align 4
- %shl = shl i32 %x, 30
- %shr = ashr i32 %shl, 30
- %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shr, i32 1, i32 1)
- store i32 %bfe, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}sext_in_reg_i2_bfe_offset_1:
-; SI: buffer_load_dword
-; SI-NOT: v_lshl
-; SI-NOT: v_ashr
-; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 2
-; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 2
-; SI: s_endpgm
-define void @sext_in_reg_i2_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %x = load i32, i32 addrspace(1)* %in, align 4
- %shl = shl i32 %x, 30
- %shr = ashr i32 %shl, 30
- %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shr, i32 1, i32 2)
- store i32 %bfe, i32 addrspace(1)* %out, align 4
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/sgpr-control-flow.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/sgpr-control-flow.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/sgpr-control-flow.ll (original)
+++ llvm/trunk/test/CodeGen/R600/sgpr-control-flow.ll (removed)
@@ -1,105 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
-;
-;
-; Most SALU instructions ignore control flow, so we need to make sure
-; they don't overwrite values from other blocks.
-
-; If the branch decision is made based on a value in an SGPR then all
-; threads will execute the same code paths, so we don't need to worry
-; about instructions in different blocks overwriting each other.
-; SI-LABEL: {{^}}sgpr_if_else_salu_br:
-; SI: s_add
-; SI: s_add
-
-define void @sgpr_if_else_salu_br(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
-entry:
- %0 = icmp eq i32 %a, 0
- br i1 %0, label %if, label %else
-
-if:
- %1 = add i32 %b, %c
- br label %endif
-
-else:
- %2 = add i32 %d, %e
- br label %endif
-
-endif:
- %3 = phi i32 [%1, %if], [%2, %else]
- %4 = add i32 %3, %a
- store i32 %4, i32 addrspace(1)* %out
- ret void
-}
-
-; The two S_ADD instructions should write to different registers, since
-; different threads will take different control flow paths.
-
-; SI-LABEL: {{^}}sgpr_if_else_valu_br:
-; SI: s_add_i32 [[SGPR:s[0-9]+]]
-; SI-NOT: s_add_i32 [[SGPR]]
-
-define void @sgpr_if_else_valu_br(i32 addrspace(1)* %out, float %a, i32 %b, i32 %c, i32 %d, i32 %e) {
-entry:
- %tid = call i32 @llvm.r600.read.tidig.x() #0
- %tid_f = uitofp i32 %tid to float
- %tmp1 = fcmp ueq float %tid_f, 0.0
- br i1 %tmp1, label %if, label %else
-
-if:
- %tmp2 = add i32 %b, %c
- br label %endif
-
-else:
- %tmp3 = add i32 %d, %e
- br label %endif
-
-endif:
- %tmp4 = phi i32 [%tmp2, %if], [%tmp3, %else]
- store i32 %tmp4, i32 addrspace(1)* %out
- ret void
-}
-
-; FIXME: Should write to different SGPR pairs instead of copying to
-; VALU for i1 phi.
-
-; SI-LABEL: {{^}}sgpr_if_else_valu_cmp_phi_br:
-; SI: buffer_load_dword [[AVAL:v[0-9]+]]
-; SI: v_cmp_gt_i32_e32 [[CMP_IF:vcc]], 0, [[AVAL]]
-; SI: v_cndmask_b32_e64 [[V_CMP:v[0-9]+]], 0, -1, [[CMP_IF]]
-
-; SI: BB2_1:
-; SI: buffer_load_dword [[AVAL:v[0-9]+]]
-; SI: v_cmp_eq_i32_e32 [[CMP_ELSE:vcc]], 0, [[AVAL]]
-; SI: v_cndmask_b32_e64 [[V_CMP]], 0, -1, [[CMP_ELSE]]
-
-; SI: v_cmp_ne_i32_e32 [[CMP_CMP:vcc]], 0, [[V_CMP]]
-; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP_CMP]]
-; SI: buffer_store_dword [[RESULT]]
-define void @sgpr_if_else_valu_cmp_phi_br(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 addrspace(1)* %b) {
-entry:
- %tid = call i32 @llvm.r600.read.tidig.x() #0
- %tmp1 = icmp eq i32 %tid, 0
- br i1 %tmp1, label %if, label %else
-
-if:
- %gep.if = getelementptr i32, i32 addrspace(1)* %a, i32 %tid
- %a.val = load i32, i32 addrspace(1)* %gep.if
- %cmp.if = icmp eq i32 %a.val, 0
- br label %endif
-
-else:
- %gep.else = getelementptr i32, i32 addrspace(1)* %b, i32 %tid
- %b.val = load i32, i32 addrspace(1)* %gep.else
- %cmp.else = icmp slt i32 %b.val, 0
- br label %endif
-
-endif:
- %tmp4 = phi i1 [%cmp.if, %if], [%cmp.else, %else]
- %ext = sext i1 %tmp4 to i32
- store i32 %ext, i32 addrspace(1)* %out
- ret void
-}
-
-declare i32 @llvm.r600.read.tidig.x() #0
-
-attributes #0 = { readnone }
Removed: llvm/trunk/test/CodeGen/R600/sgpr-copy-duplicate-operand.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/sgpr-copy-duplicate-operand.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/sgpr-copy-duplicate-operand.ll (original)
+++ llvm/trunk/test/CodeGen/R600/sgpr-copy-duplicate-operand.ll (removed)
@@ -1,19 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
-
-; Copy VGPR -> SGPR used twice as an instruction operand, which is then
-; used in an REG_SEQUENCE that also needs to be handled.
-
-; SI-LABEL: {{^}}test_dup_operands:
-; SI: v_add_i32_e32
-define void @test_dup_operands(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %in) {
- %a = load <2 x i32>, <2 x i32> addrspace(1)* %in
- %lo = extractelement <2 x i32> %a, i32 0
- %hi = extractelement <2 x i32> %a, i32 1
- %add = add i32 %lo, %lo
- %vec0 = insertelement <2 x i32> undef, i32 %add, i32 0
- %vec1 = insertelement <2 x i32> %vec0, i32 %hi, i32 1
- store <2 x i32> %vec1, <2 x i32> addrspace(1)* %out, align 8
- ret void
-}
-
Removed: llvm/trunk/test/CodeGen/R600/sgpr-copy.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/sgpr-copy.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/sgpr-copy.ll (original)
+++ llvm/trunk/test/CodeGen/R600/sgpr-copy.ll (removed)
@@ -1,379 +0,0 @@
-; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s
-; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
-
-; This test checks that no VGPR to SGPR copies are created by the register
-; allocator.
-; CHECK-LABEL: {{^}}phi1:
-; CHECK: s_buffer_load_dword [[DST:s[0-9]]], {{s\[[0-9]+:[0-9]+\]}}, 0x0
-; CHECK: v_mov_b32_e32 v{{[0-9]}}, [[DST]]
-
-define void @phi1(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
-main_body:
- %20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0
- %21 = load <16 x i8>, <16 x i8> addrspace(2)* %20, !tbaa !1
- %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 0)
- %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 16)
- %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 32)
- %25 = fptosi float %23 to i32
- %26 = icmp ne i32 %25, 0
- br i1 %26, label %ENDIF, label %ELSE
-
-ELSE: ; preds = %main_body
- %27 = fsub float -0.000000e+00, %22
- br label %ENDIF
-
-ENDIF: ; preds = %main_body, %ELSE
- %temp.0 = phi float [ %27, %ELSE ], [ %22, %main_body ]
- %28 = fadd float %temp.0, %24
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %28, float %28, float 0.000000e+00, float 1.000000e+00)
- ret void
-}
-
-; Make sure this program doesn't crash
-; CHECK-LABEL: {{^}}phi2:
-define void @phi2(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
-main_body:
- %20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0
- %21 = load <16 x i8>, <16 x i8> addrspace(2)* %20, !tbaa !1
- %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 16)
- %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 32)
- %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 36)
- %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 40)
- %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 48)
- %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 52)
- %28 = call float @llvm.SI.load.const(<16 x i8> %21, i32 56)
- %29 = call float @llvm.SI.load.const(<16 x i8> %21, i32 64)
- %30 = call float @llvm.SI.load.const(<16 x i8> %21, i32 68)
- %31 = call float @llvm.SI.load.const(<16 x i8> %21, i32 72)
- %32 = call float @llvm.SI.load.const(<16 x i8> %21, i32 76)
- %33 = call float @llvm.SI.load.const(<16 x i8> %21, i32 80)
- %34 = call float @llvm.SI.load.const(<16 x i8> %21, i32 84)
- %35 = call float @llvm.SI.load.const(<16 x i8> %21, i32 88)
- %36 = call float @llvm.SI.load.const(<16 x i8> %21, i32 92)
- %37 = getelementptr <32 x i8>, <32 x i8> addrspace(2)* %2, i32 0
- %38 = load <32 x i8>, <32 x i8> addrspace(2)* %37, !tbaa !1
- %39 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %1, i32 0
- %40 = load <16 x i8>, <16 x i8> addrspace(2)* %39, !tbaa !1
- %41 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
- %42 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
- %43 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %3, <2 x i32> %5)
- %44 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %3, <2 x i32> %5)
- %45 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %3, <2 x i32> %5)
- %46 = bitcast float %41 to i32
- %47 = bitcast float %42 to i32
- %48 = insertelement <2 x i32> undef, i32 %46, i32 0
- %49 = insertelement <2 x i32> %48, i32 %47, i32 1
- %50 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %49, <32 x i8> %38, <16 x i8> %40, i32 2)
- %51 = extractelement <4 x float> %50, i32 2
- %52 = call float @fabs(float %51)
- %53 = fmul float %43, %43
- %54 = fmul float %44, %44
- %55 = fadd float %54, %53
- %56 = fmul float %45, %45
- %57 = fadd float %55, %56
- %58 = call float @llvm.AMDGPU.rsq.f32(float %57)
- %59 = fmul float %43, %58
- %60 = fmul float %44, %58
- %61 = fmul float %45, %58
- %62 = fmul float %59, %23
- %63 = fmul float %60, %24
- %64 = fadd float %63, %62
- %65 = fmul float %61, %25
- %66 = fadd float %64, %65
- %67 = fsub float -0.000000e+00, %26
- %68 = fmul float %66, %52
- %69 = fadd float %68, %67
- %70 = fmul float %27, %69
- %71 = fmul float %28, %69
- %72 = call float @fabs(float %70)
- %73 = fcmp olt float 0x3EE4F8B580000000, %72
- %74 = sext i1 %73 to i32
- %75 = bitcast i32 %74 to float
- %76 = bitcast float %75 to i32
- %77 = icmp ne i32 %76, 0
- br i1 %77, label %IF, label %ENDIF
-
-IF: ; preds = %main_body
- %78 = fsub float -0.000000e+00, %70
- %79 = call float @llvm.AMDIL.exp.(float %78)
- %80 = fsub float -0.000000e+00, %79
- %81 = fadd float 1.000000e+00, %80
- %82 = fdiv float 1.000000e+00, %70
- %83 = fmul float %81, %82
- %84 = fmul float %32, %83
- br label %ENDIF
-
-ENDIF: ; preds = %main_body, %IF
- %temp4.0 = phi float [ %84, %IF ], [ %32, %main_body ]
- %85 = call float @fabs(float %71)
- %86 = fcmp olt float 0x3EE4F8B580000000, %85
- %87 = sext i1 %86 to i32
- %88 = bitcast i32 %87 to float
- %89 = bitcast float %88 to i32
- %90 = icmp ne i32 %89, 0
- br i1 %90, label %IF25, label %ENDIF24
-
-IF25: ; preds = %ENDIF
- %91 = fsub float -0.000000e+00, %71
- %92 = call float @llvm.AMDIL.exp.(float %91)
- %93 = fsub float -0.000000e+00, %92
- %94 = fadd float 1.000000e+00, %93
- %95 = fdiv float 1.000000e+00, %71
- %96 = fmul float %94, %95
- %97 = fmul float %36, %96
- br label %ENDIF24
-
-ENDIF24: ; preds = %ENDIF, %IF25
- %temp8.0 = phi float [ %97, %IF25 ], [ %36, %ENDIF ]
- %98 = fmul float %29, %temp4.0
- %99 = fmul float %30, %temp4.0
- %100 = fmul float %31, %temp4.0
- %101 = fmul float %33, %temp8.0
- %102 = fadd float %101, %98
- %103 = fmul float %34, %temp8.0
- %104 = fadd float %103, %99
- %105 = fmul float %35, %temp8.0
- %106 = fadd float %105, %100
- %107 = call float @llvm.pow.f32(float %52, float %22)
- %108 = fsub float -0.000000e+00, %102
- %109 = fmul float %108, %107
- %110 = fsub float -0.000000e+00, %104
- %111 = fmul float %110, %107
- %112 = fsub float -0.000000e+00, %106
- %113 = fmul float %112, %107
- %114 = call i32 @llvm.SI.packf16(float %109, float %111)
- %115 = bitcast i32 %114 to float
- %116 = call i32 @llvm.SI.packf16(float %113, float 1.000000e+00)
- %117 = bitcast i32 %116 to float
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %115, float %117, float %115, float %117)
- ret void
-}
-
-; We just want ot make sure the program doesn't crash
-; CHECK-LABEL: {{^}}loop:
-
-define void @loop(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
-main_body:
- %20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0
- %21 = load <16 x i8>, <16 x i8> addrspace(2)* %20, !tbaa !1
- %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 0)
- %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 4)
- %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 8)
- %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 12)
- %26 = fptosi float %25 to i32
- %27 = bitcast i32 %26 to float
- %28 = bitcast float %27 to i32
- br label %LOOP
-
-LOOP: ; preds = %ENDIF, %main_body
- %temp4.0 = phi float [ %22, %main_body ], [ %temp5.0, %ENDIF ]
- %temp5.0 = phi float [ %23, %main_body ], [ %temp6.0, %ENDIF ]
- %temp6.0 = phi float [ %24, %main_body ], [ %temp4.0, %ENDIF ]
- %temp8.0 = phi float [ 0.000000e+00, %main_body ], [ %37, %ENDIF ]
- %29 = bitcast float %temp8.0 to i32
- %30 = icmp sge i32 %29, %28
- %31 = sext i1 %30 to i32
- %32 = bitcast i32 %31 to float
- %33 = bitcast float %32 to i32
- %34 = icmp ne i32 %33, 0
- br i1 %34, label %IF, label %ENDIF
-
-IF: ; preds = %LOOP
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %temp4.0, float %temp5.0, float %temp6.0, float 1.000000e+00)
- ret void
-
-ENDIF: ; preds = %LOOP
- %35 = bitcast float %temp8.0 to i32
- %36 = add i32 %35, 1
- %37 = bitcast i32 %36 to float
- br label %LOOP
-}
-
-; Function Attrs: nounwind readnone
-declare float @llvm.SI.load.const(<16 x i8>, i32) #1
-
-; Function Attrs: readonly
-declare float @fabs(float) #2
-
-declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
-
-attributes #0 = { "ShaderType"="0" }
-attributes #1 = { nounwind readnone }
-attributes #2 = { readonly }
-attributes #3 = { readnone }
-attributes #4 = { nounwind readonly }
-
-!0 = !{!"const", null}
-!1 = !{!0, !0, i64 0, i32 1}
-
-; Function Attrs: nounwind readnone
-declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
-
-; Function Attrs: nounwind readnone
-declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
-
-; Function Attrs: readnone
-declare float @llvm.AMDGPU.rsq.f32(float) #3
-
-; Function Attrs: readnone
-declare float @llvm.AMDIL.exp.(float) #3
-
-; Function Attrs: nounwind readonly
-declare float @llvm.pow.f32(float, float) #4
-
-; Function Attrs: nounwind readnone
-declare i32 @llvm.SI.packf16(float, float) #1
-
-; This checks for a bug in the FixSGPRCopies pass where VReg96
-; registers were being identified as an SGPR regclass which was causing
-; an assertion failure.
-
-; CHECK-LABEL: {{^}}sample_v3:
-; CHECK: image_sample
-; CHECK: image_sample
-; CHECK: exp
-; CHECK: s_endpgm
-define void @sample_v3([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
-
-entry:
- %21 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %0, i64 0, i32 0
- %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, !tbaa !2
- %23 = call float @llvm.SI.load.const(<16 x i8> %22, i32 16)
- %24 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 0
- %25 = load <32 x i8>, <32 x i8> addrspace(2)* %24, !tbaa !2
- %26 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 0
- %27 = load <16 x i8>, <16 x i8> addrspace(2)* %26, !tbaa !2
- %28 = fcmp oeq float %23, 0.0
- br i1 %28, label %if, label %else
-
-if:
- %val.if = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> <i32 0, i32 0>, <32 x i8> %25, <16 x i8> %27, i32 2)
- %val.if.0 = extractelement <4 x float> %val.if, i32 0
- %val.if.1 = extractelement <4 x float> %val.if, i32 1
- %val.if.2 = extractelement <4 x float> %val.if, i32 2
- br label %endif
-
-else:
- %val.else = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> <i32 1, i32 0>, <32 x i8> %25, <16 x i8> %27, i32 2)
- %val.else.0 = extractelement <4 x float> %val.else, i32 0
- %val.else.1 = extractelement <4 x float> %val.else, i32 1
- %val.else.2 = extractelement <4 x float> %val.else, i32 2
- br label %endif
-
-endif:
- %val.0 = phi float [%val.if.0, %if], [%val.else.0, %else]
- %val.1 = phi float [%val.if.1, %if], [%val.else.1, %else]
- %val.2 = phi float [%val.if.2, %if], [%val.else.2, %else]
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %val.0, float %val.1, float %val.2, float 0.0)
- ret void
-}
-
-!2 = !{!"const", null, i32 1}
-
-; CHECK-LABEL: {{^}}copy1:
-; CHECK: buffer_load_dword
-; CHECK: v_add
-; CHECK: s_endpgm
-define void @copy1(float addrspace(1)* %out, float addrspace(1)* %in0) {
-entry:
- %0 = load float, float addrspace(1)* %in0
- %1 = fcmp oeq float %0, 0.0
- br i1 %1, label %if0, label %endif
-
-if0:
- %2 = bitcast float %0 to i32
- %3 = fcmp olt float %0, 0.0
- br i1 %3, label %if1, label %endif
-
-if1:
- %4 = add i32 %2, 1
- br label %endif
-
-endif:
- %5 = phi i32 [ 0, %entry ], [ %2, %if0 ], [ %4, %if1 ]
- %6 = bitcast i32 %5 to float
- store float %6, float addrspace(1)* %out
- ret void
-}
-
-; This test is just checking that we don't crash / assertion fail.
-; CHECK-LABEL: {{^}}copy2:
-; CHECK: s_endpgm
-
-define void @copy2([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
-entry:
- br label %LOOP68
-
-LOOP68:
- %temp4.7 = phi float [ 0.000000e+00, %entry ], [ %v, %ENDIF69 ]
- %t = phi i32 [ 20, %entry ], [ %x, %ENDIF69 ]
- %g = icmp eq i32 0, %t
- %l = bitcast float %temp4.7 to i32
- br i1 %g, label %IF70, label %ENDIF69
-
-IF70:
- %q = icmp ne i32 %l, 13
- %temp.8 = select i1 %q, float 1.000000e+00, float 0.000000e+00
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %temp.8, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
- ret void
-
-ENDIF69:
- %u = add i32 %l, %t
- %v = bitcast i32 %u to float
- %x = add i32 %t, -1
- br label %LOOP68
-}
-
-attributes #0 = { "ShaderType"="0" }
-
-; This test checks that image_sample resource descriptors aren't loaded into
-; vgprs. The verifier will fail if this happens.
-; CHECK-LABEL:{{^}}sample_rsrc:
-; CHECK: image_sample
-; CHECK: image_sample
-; CHECK: s_endpgm
-define void @sample_rsrc([6 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <4 x i32>] addrspace(2)* byval %arg2, [32 x <8 x i32>] addrspace(2)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) #0 {
-bb:
- %tmp = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %arg1, i32 0, i32 0
- %tmp22 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
- %tmp23 = call float @llvm.SI.load.const(<16 x i8> %tmp22, i32 16)
- %tmp25 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %arg3, i32 0, i32 0
- %tmp26 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp25, !tbaa !0
- %tmp27 = getelementptr [16 x <4 x i32>], [16 x <4 x i32>] addrspace(2)* %arg2, i32 0, i32 0
- %tmp28 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp27, !tbaa !0
- %tmp29 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %arg5, <2 x i32> %arg7)
- %tmp30 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %arg5, <2 x i32> %arg7)
- %tmp31 = bitcast float %tmp23 to i32
- %tmp36 = icmp ne i32 %tmp31, 0
- br i1 %tmp36, label %bb38, label %bb80
-
-bb38: ; preds = %bb
- %tmp52 = bitcast float %tmp29 to i32
- %tmp53 = bitcast float %tmp30 to i32
- %tmp54 = insertelement <2 x i32> undef, i32 %tmp52, i32 0
- %tmp55 = insertelement <2 x i32> %tmp54, i32 %tmp53, i32 1
- %tmp56 = bitcast <8 x i32> %tmp26 to <32 x i8>
- %tmp57 = bitcast <4 x i32> %tmp28 to <16 x i8>
- %tmp58 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %tmp55, <32 x i8> %tmp56, <16 x i8> %tmp57, i32 2)
- br label %bb71
-
-bb80: ; preds = %bb
- %tmp81 = bitcast float %tmp29 to i32
- %tmp82 = bitcast float %tmp30 to i32
- %tmp82.2 = add i32 %tmp82, 1
- %tmp83 = insertelement <2 x i32> undef, i32 %tmp81, i32 0
- %tmp84 = insertelement <2 x i32> %tmp83, i32 %tmp82.2, i32 1
- %tmp85 = bitcast <8 x i32> %tmp26 to <32 x i8>
- %tmp86 = bitcast <4 x i32> %tmp28 to <16 x i8>
- %tmp87 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %tmp84, <32 x i8> %tmp85, <16 x i8> %tmp86, i32 2)
- br label %bb71
-
-bb71: ; preds = %bb80, %bb38
- %tmp72 = phi <4 x float> [ %tmp58, %bb38 ], [ %tmp87, %bb80 ]
- %tmp88 = extractelement <4 x float> %tmp72, i32 0
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %tmp88, float %tmp88, float %tmp88, float %tmp88)
- ret void
-}
-
-attributes #0 = { "ShaderType"="0" "unsafe-fp-math"="true" }
-attributes #1 = { nounwind readnone }
Removed: llvm/trunk/test/CodeGen/R600/shared-op-cycle.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/shared-op-cycle.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/shared-op-cycle.ll (original)
+++ llvm/trunk/test/CodeGen/R600/shared-op-cycle.ll (removed)
@@ -1,32 +0,0 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-
-; CHECK: {{^}}main:
-; CHECK: MULADD_IEEE *
-; CHECK-NOT: MULADD_IEEE *
-
-define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2) #0 {
- %w0 = extractelement <4 x float> %reg0, i32 3
- %w1 = extractelement <4 x float> %reg1, i32 3
- %w2 = extractelement <4 x float> %reg2, i32 3
- %sq0 = fmul float %w0, %w0
- %r0 = fadd float %sq0, 2.0
- %sq1 = fmul float %w1, %w1
- %r1 = fadd float %sq1, 2.0
- %sq2 = fmul float %w2, %w2
- %r2 = fadd float %sq2, 2.0
- %v0 = insertelement <4 x float> undef, float %r0, i32 0
- %v1 = insertelement <4 x float> %v0, float %r1, i32 1
- %v2 = insertelement <4 x float> %v1, float %r2, i32 2
- %res = call float @llvm.AMDGPU.dp4(<4 x float> %v2, <4 x float> %v2)
- %vecres = insertelement <4 x float> undef, float %res, i32 0
- call void @llvm.R600.store.swizzle(<4 x float> %vecres, i32 0, i32 2)
- ret void
-}
-
-; Function Attrs: readnone
-declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
-
-declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
-
-attributes #0 = { "ShaderType"="1" }
-attributes #1 = { readnone }
\ No newline at end of file
Removed: llvm/trunk/test/CodeGen/R600/shl.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/shl.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/shl.ll (original)
+++ llvm/trunk/test/CodeGen/R600/shl.ll (removed)
@@ -1,180 +0,0 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG %s
-;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI %s
-;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=VI %s
-
-;EG: {{^}}shl_v2i32:
-;EG: LSHL {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG: LSHL {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-
-;SI: {{^}}shl_v2i32:
-;SI: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-
-;VI: {{^}}shl_v2i32:
-;VI: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;VI: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-
-define void @shl_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
- %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
- %a = load <2 x i32>, <2 x i32> addrspace(1) * %in
- %b = load <2 x i32>, <2 x i32> addrspace(1) * %b_ptr
- %result = shl <2 x i32> %a, %b
- store <2 x i32> %result, <2 x i32> addrspace(1)* %out
- ret void
-}
-
-;EG: {{^}}shl_v4i32:
-;EG: LSHL {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG: LSHL {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG: LSHL {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG: LSHL {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-
-;SI: {{^}}shl_v4i32:
-;SI: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-
-;VI: {{^}}shl_v4i32:
-;VI: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;VI: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;VI: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;VI: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-
-define void @shl_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
- %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
- %a = load <4 x i32>, <4 x i32> addrspace(1) * %in
- %b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr
- %result = shl <4 x i32> %a, %b
- store <4 x i32> %result, <4 x i32> addrspace(1)* %out
- ret void
-}
-
-;EG: {{^}}shl_i64:
-;EG: SUB_INT {{\*? *}}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]]
-;EG: LSHR {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPLO:T[0-9]+\.[XYZW]]], {{[[COMPSH]]|PV.[XYZW]}}
-;EG: LSHR {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1
-;EG_CHECK-DAG: ADD_INT {{\*? *}}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
-;EG-DAG: LSHL {{\*? *}}[[HISMTMP:T[0-9]+\.[XYZW]]], [[OPHI:T[0-9]+\.[XYZW]]], [[SHIFT]]
-;EG-DAG: OR_INT {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], {{[[HISMTMP]]|PV.[XYZW]}}, {{[[OVERF]]|PV.[XYZW]}}
-;EG-DAG: LSHL {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], [[OPLO]], {{PS|[[SHIFT]]}}
-;EG-DAG: SETGT_UINT {{\*? *}}[[RESC:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
-;EG-DAG: CNDE_INT {{\*? *}}[[RESLO:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]}}
-;EG-DAG: CNDE_INT {{\*? *}}[[RESHI:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW], .*}}, 0.0
-
-;SI: {{^}}shl_i64:
-;SI: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
-
-;VI: {{^}}shl_i64:
-;VI: v_lshlrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
-
-define void @shl_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
- %b_ptr = getelementptr i64, i64 addrspace(1)* %in, i64 1
- %a = load i64, i64 addrspace(1) * %in
- %b = load i64, i64 addrspace(1) * %b_ptr
- %result = shl i64 %a, %b
- store i64 %result, i64 addrspace(1)* %out
- ret void
-}
-
-;EG: {{^}}shl_v2i64:
-;EG-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]]
-;EG-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]]
-;EG-DAG: LSHR {{\*? *}}[[COMPSHA]]
-;EG-DAG: LSHR {{\*? *}}[[COMPSHB]]
-;EG-DAG: LSHR {{.*}}, 1
-;EG-DAG: LSHR {{.*}}, 1
-;EG-DAG: ADD_INT {{\*? *}}[[BIGSHA:T[0-9]+\.[XYZW]]]{{.*}}, literal
-;EG-DAG: ADD_INT {{\*? *}}[[BIGSHB:T[0-9]+\.[XYZW]]]{{.*}}, literal
-;EG-DAG: LSHL {{.*}}, [[SHA]]
-;EG-DAG: LSHL {{.*}}, [[SHB]]
-;EG-DAG: LSHL {{.*}}, [[SHA]]
-;EG-DAG: LSHL {{.*}}, [[SHB]]
-;EG-DAG: LSHL
-;EG-DAG: LSHL
-;EG-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHA]], literal
-;EG-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHB]], literal
-;EG-DAG: CNDE_INT {{.*}}, 0.0
-;EG-DAG: CNDE_INT {{.*}}, 0.0
-;EG-DAG: CNDE_INT
-;EG-DAG: CNDE_INT
-
-;SI: {{^}}shl_v2i64:
-;SI: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
-;SI: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
-
-;VI: {{^}}shl_v2i64:
-;VI: v_lshlrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
-;VI: v_lshlrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
-
-define void @shl_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
- %b_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %in, i64 1
- %a = load <2 x i64>, <2 x i64> addrspace(1) * %in
- %b = load <2 x i64>, <2 x i64> addrspace(1) * %b_ptr
- %result = shl <2 x i64> %a, %b
- store <2 x i64> %result, <2 x i64> addrspace(1)* %out
- ret void
-}
-
-;EG: {{^}}shl_v4i64:
-;EG-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]]
-;EG-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]]
-;EG-DAG: SUB_INT {{\*? *}}[[COMPSHC:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHC:T[0-9]+\.[XYZW]]]
-;EG-DAG: SUB_INT {{\*? *}}[[COMPSHD:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHD:T[0-9]+\.[XYZW]]]
-;EG-DAG: LSHR {{\*? *}}[[COMPSHA]]
-;EG-DAG: LSHR {{\*? *}}[[COMPSHB]]
-;EG-DAG: LSHR {{\*? *}}[[COMPSHC]]
-;EG-DAG: LSHR {{\*? *}}[[COMPSHD]]
-;EG-DAG: LSHR {{.*}}, 1
-;EG-DAG: LSHR {{.*}}, 1
-;EG-DAG: LSHR {{.*}}, 1
-;EG-DAG: LSHR {{.*}}, 1
-;EG-DAG: ADD_INT {{\*? *}}[[BIGSHA:T[0-9]+\.[XYZW]]]{{.*}}, literal
-;EG-DAG: ADD_INT {{\*? *}}[[BIGSHB:T[0-9]+\.[XYZW]]]{{.*}}, literal
-;EG-DAG: ADD_INT {{\*? *}}[[BIGSHC:T[0-9]+\.[XYZW]]]{{.*}}, literal
-;EG-DAG: ADD_INT {{\*? *}}[[BIGSHD:T[0-9]+\.[XYZW]]]{{.*}}, literal
-;EG-DAG: LSHL {{.*}}, [[SHA]]
-;EG-DAG: LSHL {{.*}}, [[SHB]]
-;EG-DAG: LSHL {{.*}}, [[SHC]]
-;EG-DAG: LSHL {{.*}}, [[SHD]]
-;EG-DAG: LSHL {{.*}}, [[SHA]]
-;EG-DAG: LSHL {{.*}}, [[SHB]]
-;EG-DAG: LSHL {{.*}}, [[SHC]]
-;EG-DAG: LSHL {{.*}}, [[SHD]]
-;EG-DAG: LSHL
-;EG-DAG: LSHL
-;EG-DAG: LSHL
-;EG-DAG: LSHL
-;EG-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHA]], literal
-;EG-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHB]], literal
-;EG-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHC]], literal
-;EG-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHD]], literal
-;EG-DAG: CNDE_INT {{.*}}, 0.0
-;EG-DAG: CNDE_INT {{.*}}, 0.0
-;EG-DAG: CNDE_INT {{.*}}, 0.0
-;EG-DAG: CNDE_INT {{.*}}, 0.0
-;EG-DAG: CNDE_INT
-;EG-DAG: CNDE_INT
-;EG-DAG: CNDE_INT
-;EG-DAG: CNDE_INT
-
-;SI: {{^}}shl_v4i64:
-;SI: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
-;SI: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
-;SI: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
-;SI: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
-
-;VI: {{^}}shl_v4i64:
-;VI: v_lshlrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
-;VI: v_lshlrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
-;VI: v_lshlrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
-;VI: v_lshlrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
-
-define void @shl_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
- %b_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i64 1
- %a = load <4 x i64>, <4 x i64> addrspace(1) * %in
- %b = load <4 x i64>, <4 x i64> addrspace(1) * %b_ptr
- %result = shl <4 x i64> %a, %b
- store <4 x i64> %result, <4 x i64> addrspace(1)* %out
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/shl_add_constant.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/shl_add_constant.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/shl_add_constant.ll (original)
+++ llvm/trunk/test/CodeGen/R600/shl_add_constant.ll (removed)
@@ -1,90 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
-
-declare i32 @llvm.r600.read.tidig.x() #1
-
-; Test with inline immediate
-
-; FUNC-LABEL: {{^}}shl_2_add_9_i32:
-; SI: v_lshlrev_b32_e32 [[REG:v[0-9]+]], 2, {{v[0-9]+}}
-; SI: v_add_i32_e32 [[RESULT:v[0-9]+]], 36, [[REG]]
-; SI: buffer_store_dword [[RESULT]]
-; SI: s_endpgm
-define void @shl_2_add_9_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
- %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
- %ptr = getelementptr i32, i32 addrspace(1)* %in, i32 %tid.x
- %val = load i32, i32 addrspace(1)* %ptr, align 4
- %add = add i32 %val, 9
- %result = shl i32 %add, 2
- store i32 %result, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}shl_2_add_9_i32_2_add_uses:
-; SI-DAG: v_add_i32_e32 [[ADDREG:v[0-9]+]], 9, {{v[0-9]+}}
-; SI-DAG: v_lshlrev_b32_e32 [[SHLREG:v[0-9]+]], 2, {{v[0-9]+}}
-; SI-DAG: buffer_store_dword [[ADDREG]]
-; SI-DAG: buffer_store_dword [[SHLREG]]
-; SI: s_endpgm
-define void @shl_2_add_9_i32_2_add_uses(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(1)* %in) #0 {
- %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
- %ptr = getelementptr i32, i32 addrspace(1)* %in, i32 %tid.x
- %val = load i32, i32 addrspace(1)* %ptr, align 4
- %add = add i32 %val, 9
- %result = shl i32 %add, 2
- store i32 %result, i32 addrspace(1)* %out0, align 4
- store i32 %add, i32 addrspace(1)* %out1, align 4
- ret void
-}
-
-; Test with add literal constant
-
-; FUNC-LABEL: {{^}}shl_2_add_999_i32:
-; SI: v_lshlrev_b32_e32 [[REG:v[0-9]+]], 2, {{v[0-9]+}}
-; SI: v_add_i32_e32 [[RESULT:v[0-9]+]], 0xf9c, [[REG]]
-; SI: buffer_store_dword [[RESULT]]
-; SI: s_endpgm
-define void @shl_2_add_999_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
- %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
- %ptr = getelementptr i32, i32 addrspace(1)* %in, i32 %tid.x
- %val = load i32, i32 addrspace(1)* %ptr, align 4
- %shl = add i32 %val, 999
- %result = shl i32 %shl, 2
- store i32 %result, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}test_add_shl_add_constant:
-; SI-DAG: s_load_dword [[X:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; SI-DAG: s_load_dword [[Y:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
-; SI: s_lshl_b32 [[SHL3:s[0-9]+]], [[X]], 3
-; SI: s_add_i32 [[TMP:s[0-9]+]], [[SHL3]], [[Y]]
-; SI: s_add_i32 [[RESULT:s[0-9]+]], [[TMP]], 0x3d8
-; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[RESULT]]
-; SI: buffer_store_dword [[VRESULT]]
-define void @test_add_shl_add_constant(i32 addrspace(1)* %out, i32 %x, i32 %y) #0 {
- %add.0 = add i32 %x, 123
- %shl = shl i32 %add.0, 3
- %add.1 = add i32 %shl, %y
- store i32 %add.1, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}test_add_shl_add_constant_inv:
-; SI-DAG: s_load_dword [[X:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; SI-DAG: s_load_dword [[Y:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
-; SI: s_lshl_b32 [[SHL3:s[0-9]+]], [[X]], 3
-; SI: s_add_i32 [[TMP:s[0-9]+]], [[SHL3]], [[Y]]
-; SI: s_add_i32 [[RESULT:s[0-9]+]], [[TMP]], 0x3d8
-; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[RESULT]]
-; SI: buffer_store_dword [[VRESULT]]
-
-define void @test_add_shl_add_constant_inv(i32 addrspace(1)* %out, i32 %x, i32 %y) #0 {
- %add.0 = add i32 %x, 123
- %shl = shl i32 %add.0, 3
- %add.1 = add i32 %y, %shl
- store i32 %add.1, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
Removed: llvm/trunk/test/CodeGen/R600/shl_add_ptr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/shl_add_ptr.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/shl_add_ptr.ll (original)
+++ llvm/trunk/test/CodeGen/R600/shl_add_ptr.ll (removed)
@@ -1,284 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck -check-prefix=SI %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck -check-prefix=SI %s
-
-; Test that doing a shift of a pointer with a constant add will be
-; folded into the constant offset addressing mode even if the add has
-; multiple uses. This is relevant to accessing 2 separate, adjacent
-; LDS globals.
-
-
-declare i32 @llvm.r600.read.tidig.x() #1
-
- at lds0 = addrspace(3) global [512 x float] undef, align 4
- at lds1 = addrspace(3) global [512 x float] undef, align 4
-
-
-; Make sure the (add tid, 2) << 2 gets folded into the ds's offset as (tid << 2) + 8
-
-; SI-LABEL: {{^}}load_shl_base_lds_0:
-; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
-; SI: ds_read_b32 {{v[0-9]+}}, [[PTR]] offset:8
-; SI: s_endpgm
-define void @load_shl_base_lds_0(float addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
- %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
- %idx.0 = add nsw i32 %tid.x, 2
- %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
- %val0 = load float, float addrspace(3)* %arrayidx0, align 4
- store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
- store float %val0, float addrspace(1)* %out
- ret void
-}
-
-; Make sure once the first use is folded into the addressing mode, the
-; remaining add use goes through the normal shl + add constant fold.
-
-; SI-LABEL: {{^}}load_shl_base_lds_1:
-; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
-; SI: ds_read_b32 [[RESULT:v[0-9]+]], [[PTR]] offset:8
-; SI: v_add_i32_e32 [[ADDUSE:v[0-9]+]], 8, v{{[0-9]+}}
-; SI-DAG: buffer_store_dword [[RESULT]]
-; SI-DAG: buffer_store_dword [[ADDUSE]]
-; SI: s_endpgm
-define void @load_shl_base_lds_1(float addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
- %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
- %idx.0 = add nsw i32 %tid.x, 2
- %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
- %val0 = load float, float addrspace(3)* %arrayidx0, align 4
- %shl_add_use = shl i32 %idx.0, 2
- store i32 %shl_add_use, i32 addrspace(1)* %add_use, align 4
- store float %val0, float addrspace(1)* %out
- ret void
-}
-
- at maxlds = addrspace(3) global [65536 x i8] undef, align 4
-
-; SI-LABEL: {{^}}load_shl_base_lds_max_offset
-; SI: ds_read_u8 v{{[0-9]+}}, v{{[0-9]+}} offset:65535
-; SI: s_endpgm
-define void @load_shl_base_lds_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)* %lds, i32 addrspace(1)* %add_use) #0 {
- %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
- %idx.0 = add nsw i32 %tid.x, 65535
- %arrayidx0 = getelementptr inbounds [65536 x i8], [65536 x i8] addrspace(3)* @maxlds, i32 0, i32 %idx.0
- %val0 = load i8, i8 addrspace(3)* %arrayidx0
- store i32 %idx.0, i32 addrspace(1)* %add_use
- store i8 %val0, i8 addrspace(1)* %out
- ret void
-}
-
-; The two globals are placed adjacent in memory, so the same base
-; pointer can be used with an offset into the second one.
-
-; SI-LABEL: {{^}}load_shl_base_lds_2:
-; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
-; SI: s_mov_b32 m0, -1
-; SI-NEXT: ds_read2st64_b32 {{v\[[0-9]+:[0-9]+\]}}, [[PTR]] offset0:1 offset1:9
-; SI: s_endpgm
-define void @load_shl_base_lds_2(float addrspace(1)* %out) #0 {
- %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
- %idx.0 = add nsw i32 %tid.x, 64
- %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
- %val0 = load float, float addrspace(3)* %arrayidx0, align 4
- %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds1, i32 0, i32 %idx.0
- %val1 = load float, float addrspace(3)* %arrayidx1, align 4
- %sum = fadd float %val0, %val1
- store float %sum, float addrspace(1)* %out, align 4
- ret void
-}
-
-; SI-LABEL: {{^}}store_shl_base_lds_0:
-; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
-; SI: ds_write_b32 [[PTR]], {{v[0-9]+}} offset:8
-; SI: s_endpgm
-define void @store_shl_base_lds_0(float addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
- %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
- %idx.0 = add nsw i32 %tid.x, 2
- %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
- store float 1.0, float addrspace(3)* %arrayidx0, align 4
- store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
- ret void
-}
-
-
-; --------------------------------------------------------------------------------
-; Atomics.
-
- at lds2 = addrspace(3) global [512 x i32] undef, align 4
-
-; define void @atomic_load_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
-; %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
-; %idx.0 = add nsw i32 %tid.x, 2
-; %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
-; %val = load atomic i32, i32 addrspace(3)* %arrayidx0 seq_cst, align 4
-; store i32 %val, i32 addrspace(1)* %out, align 4
-; store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
-; ret void
-; }
-
-
-; SI-LABEL: {{^}}atomic_cmpxchg_shl_base_lds_0:
-; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
-; SI: ds_cmpst_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}}, {{v[0-9]+}} offset:8
-; SI: s_endpgm
-define void @atomic_cmpxchg_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use, i32 %swap) #0 {
- %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
- %idx.0 = add nsw i32 %tid.x, 2
- %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
- %pair = cmpxchg i32 addrspace(3)* %arrayidx0, i32 7, i32 %swap seq_cst monotonic
- %result = extractvalue { i32, i1 } %pair, 0
- store i32 %result, i32 addrspace(1)* %out, align 4
- store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
- ret void
-}
-
-; SI-LABEL: {{^}}atomic_swap_shl_base_lds_0:
-; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
-; SI: ds_wrxchg_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
-; SI: s_endpgm
-define void @atomic_swap_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
- %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
- %idx.0 = add nsw i32 %tid.x, 2
- %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
- %val = atomicrmw xchg i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
- store i32 %val, i32 addrspace(1)* %out, align 4
- store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
- ret void
-}
-
-; SI-LABEL: {{^}}atomic_add_shl_base_lds_0:
-; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
-; SI: ds_add_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
-; SI: s_endpgm
-define void @atomic_add_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
- %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
- %idx.0 = add nsw i32 %tid.x, 2
- %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
- %val = atomicrmw add i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
- store i32 %val, i32 addrspace(1)* %out, align 4
- store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
- ret void
-}
-
-; SI-LABEL: {{^}}atomic_sub_shl_base_lds_0:
-; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
-; SI: ds_sub_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
-; SI: s_endpgm
-define void @atomic_sub_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
- %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
- %idx.0 = add nsw i32 %tid.x, 2
- %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
- %val = atomicrmw sub i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
- store i32 %val, i32 addrspace(1)* %out, align 4
- store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
- ret void
-}
-
-; SI-LABEL: {{^}}atomic_and_shl_base_lds_0:
-; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
-; SI: ds_and_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
-; SI: s_endpgm
-define void @atomic_and_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
- %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
- %idx.0 = add nsw i32 %tid.x, 2
- %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
- %val = atomicrmw and i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
- store i32 %val, i32 addrspace(1)* %out, align 4
- store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
- ret void
-}
-
-; SI-LABEL: {{^}}atomic_or_shl_base_lds_0:
-; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
-; SI: ds_or_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
-; SI: s_endpgm
-define void @atomic_or_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
- %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
- %idx.0 = add nsw i32 %tid.x, 2
- %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
- %val = atomicrmw or i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
- store i32 %val, i32 addrspace(1)* %out, align 4
- store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
- ret void
-}
-
-; SI-LABEL: {{^}}atomic_xor_shl_base_lds_0:
-; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
-; SI: ds_xor_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
-; SI: s_endpgm
-define void @atomic_xor_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
- %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
- %idx.0 = add nsw i32 %tid.x, 2
- %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
- %val = atomicrmw xor i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
- store i32 %val, i32 addrspace(1)* %out, align 4
- store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
- ret void
-}
-
-; define void @atomic_nand_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
-; %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
-; %idx.0 = add nsw i32 %tid.x, 2
-; %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
-; %val = atomicrmw nand i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
-; store i32 %val, i32 addrspace(1)* %out, align 4
-; store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
-; ret void
-; }
-
-; SI-LABEL: {{^}}atomic_min_shl_base_lds_0:
-; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
-; SI: ds_min_rtn_i32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
-; SI: s_endpgm
-define void @atomic_min_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
- %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
- %idx.0 = add nsw i32 %tid.x, 2
- %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
- %val = atomicrmw min i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
- store i32 %val, i32 addrspace(1)* %out, align 4
- store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
- ret void
-}
-
-; SI-LABEL: {{^}}atomic_max_shl_base_lds_0:
-; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
-; SI: ds_max_rtn_i32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
-; SI: s_endpgm
-define void @atomic_max_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
- %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
- %idx.0 = add nsw i32 %tid.x, 2
- %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
- %val = atomicrmw max i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
- store i32 %val, i32 addrspace(1)* %out, align 4
- store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
- ret void
-}
-
-; SI-LABEL: {{^}}atomic_umin_shl_base_lds_0:
-; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
-; SI: ds_min_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
-; SI: s_endpgm
-define void @atomic_umin_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
- %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
- %idx.0 = add nsw i32 %tid.x, 2
- %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
- %val = atomicrmw umin i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
- store i32 %val, i32 addrspace(1)* %out, align 4
- store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
- ret void
-}
-
-; SI-LABEL: {{^}}atomic_umax_shl_base_lds_0:
-; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
-; SI: ds_max_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
-; SI: s_endpgm
-define void @atomic_umax_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
- %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
- %idx.0 = add nsw i32 %tid.x, 2
- %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
- %val = atomicrmw umax i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
- store i32 %val, i32 addrspace(1)* %out, align 4
- store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
- ret void
-}
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
Removed: llvm/trunk/test/CodeGen/R600/si-annotate-cf-assertion.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/si-annotate-cf-assertion.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/si-annotate-cf-assertion.ll (original)
+++ llvm/trunk/test/CodeGen/R600/si-annotate-cf-assertion.ll (removed)
@@ -1,25 +0,0 @@
-; REQUIRES: asserts
-; XFAIL: *
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs-asm-verbose=false < %s | FileCheck %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs-asm-verbose=false < %s | FileCheck %s
-
-
-define void @test(i32 addrspace(1)* %g, i8 addrspace(3)* %l, i32 %x) nounwind {
-; CHECK-LABEL: {{^}}test:
-
-entry:
- switch i32 %x, label %sw.default [
- i32 0, label %sw.bb
- i32 60, label %sw.bb
- ]
-
-sw.bb:
- unreachable
-
-sw.default:
- unreachable
-
-sw.epilog:
- ret void
-}
-
Removed: llvm/trunk/test/CodeGen/R600/si-annotate-cf.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/si-annotate-cf.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/si-annotate-cf.ll (original)
+++ llvm/trunk/test/CodeGen/R600/si-annotate-cf.ll (removed)
@@ -1,63 +0,0 @@
-; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s
-; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s
-
-; FUNC-LABEL: {{^}}break_inserted_outside_of_loop:
-
-; SI: [[LOOP_LABEL:[A-Z0-9]+]]:
-; Lowered break instructin:
-; SI: s_or_b64
-; Lowered Loop instruction:
-; SI: s_andn2_b64
-; s_cbranch_execnz [[LOOP_LABEL]]
-; SI: s_endpgm
-define void @break_inserted_outside_of_loop(i32 addrspace(1)* %out, i32 %a, i32 %b) {
-main_body:
- %0 = and i32 %a, %b
- %1 = trunc i32 %0 to i1
- br label %ENDIF
-
-ENDLOOP:
- store i32 0, i32 addrspace(1)* %out
- ret void
-
-ENDIF:
- br i1 %1, label %ENDLOOP, label %ENDIF
-}
-
-
-; FUNC-LABEL: {{^}}phi_cond_outside_loop:
-; FIXME: This could be folded into the s_or_b64 instruction
-; SI: s_mov_b64 [[ZERO:s\[[0-9]+:[0-9]+\]]], 0
-; SI: [[LOOP_LABEL:[A-Z0-9]+]]
-; SI: v_cmp_ne_i32_e32 vcc, 0, v{{[0-9]+}}
-
-; SI_IF_BREAK instruction:
-; SI: s_or_b64 [[BREAK:s\[[0-9]+:[0-9]+\]]], vcc, [[ZERO]]
-
-; SI_LOOP instruction:
-; SI: s_andn2_b64 exec, exec, [[BREAK]]
-; SI: s_cbranch_execnz [[LOOP_LABEL]]
-; SI: s_endpgm
-
-define void @phi_cond_outside_loop(i32 %a, i32 %b) {
-entry:
- %0 = icmp eq i32 %a , 0
- br i1 %0, label %if, label %else
-
-if:
- br label %endif
-
-else:
- %1 = icmp eq i32 %b, 0
- br label %endif
-
-endif:
- %2 = phi i1 [0, %if], [%1, %else]
- br label %loop
-
-loop:
- br i1 %2, label %exit, label %loop
-
-exit:
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/si-lod-bias.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/si-lod-bias.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/si-lod-bias.ll (original)
+++ llvm/trunk/test/CodeGen/R600/si-lod-bias.ll (removed)
@@ -1,52 +0,0 @@
-;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
-;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
-
-; This shader has the potential to generated illegal VGPR to SGPR copies if
-; the wrong register class is used for the REG_SEQUENCE instructions.
-
-; CHECK: {{^}}main:
-; CHECK: image_sample_b v{{\[[0-9]:[0-9]\]}}, 15, 0, 0, 0, 0, 0, 0, 0, v{{\[[0-9]:[0-9]\]}}
-
-define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
-main_body:
- %20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0
- %21 = load <16 x i8>, <16 x i8> addrspace(2)* %20, !tbaa !1
- %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 16)
- %23 = getelementptr <32 x i8>, <32 x i8> addrspace(2)* %2, i32 0
- %24 = load <32 x i8>, <32 x i8> addrspace(2)* %23, !tbaa !1
- %25 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %1, i32 0
- %26 = load <16 x i8>, <16 x i8> addrspace(2)* %25, !tbaa !1
- %27 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
- %28 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
- %29 = bitcast float %22 to i32
- %30 = bitcast float %27 to i32
- %31 = bitcast float %28 to i32
- %32 = insertelement <4 x i32> undef, i32 %29, i32 0
- %33 = insertelement <4 x i32> %32, i32 %30, i32 1
- %34 = insertelement <4 x i32> %33, i32 %31, i32 2
- %35 = insertelement <4 x i32> %34, i32 undef, i32 3
- %36 = call <4 x float> @llvm.SI.sampleb.v4i32(<4 x i32> %35, <32 x i8> %24, <16 x i8> %26, i32 2)
- %37 = extractelement <4 x float> %36, i32 0
- %38 = extractelement <4 x float> %36, i32 1
- %39 = extractelement <4 x float> %36, i32 2
- %40 = extractelement <4 x float> %36, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %37, float %38, float %39, float %40)
- ret void
-}
-
-; Function Attrs: nounwind readnone
-declare float @llvm.SI.load.const(<16 x i8>, i32) #1
-
-; Function Attrs: nounwind readnone
-declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
-
-; Function Attrs: nounwind readnone
-declare <4 x float> @llvm.SI.sampleb.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1
-
-declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
-
-attributes #0 = { "ShaderType"="0" }
-attributes #1 = { nounwind readnone }
-
-!0 = !{!"const", null}
-!1 = !{!0, !0, i64 0, i32 1}
Removed: llvm/trunk/test/CodeGen/R600/si-sgpr-spill.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/si-sgpr-spill.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/si-sgpr-spill.ll (original)
+++ llvm/trunk/test/CodeGen/R600/si-sgpr-spill.ll (removed)
@@ -1,1568 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck %s
-; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck %s
-
-; These tests check that the compiler won't crash when it needs to spill
-; SGPRs.
-
-; CHECK-LABEL: {{^}}main:
-; CHECK: s_wqm
-; Writing to M0 from an SMRD instruction will hang the GPU.
-; CHECK-NOT: s_buffer_load_dword m0
-; CHECK: s_endpgm
- at ddxy_lds = external addrspace(3) global [64 x i32]
-
-define void @main([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
-main_body:
- %21 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %0, i64 0, i32 0
- %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, !tbaa !0
- %23 = call float @llvm.SI.load.const(<16 x i8> %22, i32 96)
- %24 = call float @llvm.SI.load.const(<16 x i8> %22, i32 100)
- %25 = call float @llvm.SI.load.const(<16 x i8> %22, i32 104)
- %26 = call float @llvm.SI.load.const(<16 x i8> %22, i32 112)
- %27 = call float @llvm.SI.load.const(<16 x i8> %22, i32 116)
- %28 = call float @llvm.SI.load.const(<16 x i8> %22, i32 120)
- %29 = call float @llvm.SI.load.const(<16 x i8> %22, i32 128)
- %30 = call float @llvm.SI.load.const(<16 x i8> %22, i32 132)
- %31 = call float @llvm.SI.load.const(<16 x i8> %22, i32 140)
- %32 = call float @llvm.SI.load.const(<16 x i8> %22, i32 144)
- %33 = call float @llvm.SI.load.const(<16 x i8> %22, i32 160)
- %34 = call float @llvm.SI.load.const(<16 x i8> %22, i32 176)
- %35 = call float @llvm.SI.load.const(<16 x i8> %22, i32 180)
- %36 = call float @llvm.SI.load.const(<16 x i8> %22, i32 184)
- %37 = call float @llvm.SI.load.const(<16 x i8> %22, i32 192)
- %38 = call float @llvm.SI.load.const(<16 x i8> %22, i32 196)
- %39 = call float @llvm.SI.load.const(<16 x i8> %22, i32 200)
- %40 = call float @llvm.SI.load.const(<16 x i8> %22, i32 208)
- %41 = call float @llvm.SI.load.const(<16 x i8> %22, i32 212)
- %42 = call float @llvm.SI.load.const(<16 x i8> %22, i32 216)
- %43 = call float @llvm.SI.load.const(<16 x i8> %22, i32 224)
- %44 = call float @llvm.SI.load.const(<16 x i8> %22, i32 240)
- %45 = call float @llvm.SI.load.const(<16 x i8> %22, i32 244)
- %46 = call float @llvm.SI.load.const(<16 x i8> %22, i32 248)
- %47 = call float @llvm.SI.load.const(<16 x i8> %22, i32 256)
- %48 = call float @llvm.SI.load.const(<16 x i8> %22, i32 272)
- %49 = call float @llvm.SI.load.const(<16 x i8> %22, i32 276)
- %50 = call float @llvm.SI.load.const(<16 x i8> %22, i32 280)
- %51 = call float @llvm.SI.load.const(<16 x i8> %22, i32 288)
- %52 = call float @llvm.SI.load.const(<16 x i8> %22, i32 292)
- %53 = call float @llvm.SI.load.const(<16 x i8> %22, i32 296)
- %54 = call float @llvm.SI.load.const(<16 x i8> %22, i32 304)
- %55 = call float @llvm.SI.load.const(<16 x i8> %22, i32 308)
- %56 = call float @llvm.SI.load.const(<16 x i8> %22, i32 312)
- %57 = call float @llvm.SI.load.const(<16 x i8> %22, i32 368)
- %58 = call float @llvm.SI.load.const(<16 x i8> %22, i32 372)
- %59 = call float @llvm.SI.load.const(<16 x i8> %22, i32 376)
- %60 = call float @llvm.SI.load.const(<16 x i8> %22, i32 384)
- %61 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 0
- %62 = load <32 x i8>, <32 x i8> addrspace(2)* %61, !tbaa !0
- %63 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 0
- %64 = load <16 x i8>, <16 x i8> addrspace(2)* %63, !tbaa !0
- %65 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 1
- %66 = load <32 x i8>, <32 x i8> addrspace(2)* %65, !tbaa !0
- %67 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 1
- %68 = load <16 x i8>, <16 x i8> addrspace(2)* %67, !tbaa !0
- %69 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 2
- %70 = load <32 x i8>, <32 x i8> addrspace(2)* %69, !tbaa !0
- %71 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 2
- %72 = load <16 x i8>, <16 x i8> addrspace(2)* %71, !tbaa !0
- %73 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 3
- %74 = load <32 x i8>, <32 x i8> addrspace(2)* %73, !tbaa !0
- %75 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 3
- %76 = load <16 x i8>, <16 x i8> addrspace(2)* %75, !tbaa !0
- %77 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 4
- %78 = load <32 x i8>, <32 x i8> addrspace(2)* %77, !tbaa !0
- %79 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 4
- %80 = load <16 x i8>, <16 x i8> addrspace(2)* %79, !tbaa !0
- %81 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 5
- %82 = load <32 x i8>, <32 x i8> addrspace(2)* %81, !tbaa !0
- %83 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 5
- %84 = load <16 x i8>, <16 x i8> addrspace(2)* %83, !tbaa !0
- %85 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 6
- %86 = load <32 x i8>, <32 x i8> addrspace(2)* %85, !tbaa !0
- %87 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 6
- %88 = load <16 x i8>, <16 x i8> addrspace(2)* %87, !tbaa !0
- %89 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 7
- %90 = load <32 x i8>, <32 x i8> addrspace(2)* %89, !tbaa !0
- %91 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 7
- %92 = load <16 x i8>, <16 x i8> addrspace(2)* %91, !tbaa !0
- %93 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %4, <2 x i32> %6)
- %94 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %4, <2 x i32> %6)
- %95 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %4, <2 x i32> %6)
- %96 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %4, <2 x i32> %6)
- %97 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %4, <2 x i32> %6)
- %98 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %4, <2 x i32> %6)
- %99 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %4, <2 x i32> %6)
- %100 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %4, <2 x i32> %6)
- %101 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %4, <2 x i32> %6)
- %102 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %4, <2 x i32> %6)
- %103 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %4, <2 x i32> %6)
- %104 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %4, <2 x i32> %6)
- %105 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %4, <2 x i32> %6)
- %106 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %4, <2 x i32> %6)
- %107 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %4, <2 x i32> %6)
- %108 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %4, <2 x i32> %6)
- %109 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %4, <2 x i32> %6)
- %110 = call i32 @llvm.SI.tid()
- %111 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %110
- %112 = bitcast float %93 to i32
- store i32 %112, i32 addrspace(3)* %111
- %113 = bitcast float %94 to i32
- store i32 %113, i32 addrspace(3)* %111
- %114 = call i32 @llvm.SI.tid()
- %115 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %114
- %116 = and i32 %114, -4
- %117 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %116
- %118 = add i32 %116, 1
- %119 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %118
- %120 = bitcast float %93 to i32
- store i32 %120, i32 addrspace(3)* %115
- %121 = load i32, i32 addrspace(3)* %117
- %122 = bitcast i32 %121 to float
- %123 = load i32, i32 addrspace(3)* %119
- %124 = bitcast i32 %123 to float
- %125 = fsub float %124, %122
- %126 = bitcast float %94 to i32
- store i32 %126, i32 addrspace(3)* %115
- %127 = load i32, i32 addrspace(3)* %117
- %128 = bitcast i32 %127 to float
- %129 = load i32, i32 addrspace(3)* %119
- %130 = bitcast i32 %129 to float
- %131 = fsub float %130, %128
- %132 = insertelement <4 x float> undef, float %125, i32 0
- %133 = insertelement <4 x float> %132, float %131, i32 1
- %134 = insertelement <4 x float> %133, float %131, i32 2
- %135 = insertelement <4 x float> %134, float %131, i32 3
- %136 = extractelement <4 x float> %135, i32 0
- %137 = extractelement <4 x float> %135, i32 1
- %138 = fmul float %60, %93
- %139 = fmul float %60, %94
- %140 = fmul float %60, %94
- %141 = fmul float %60, %94
- %142 = call i32 @llvm.SI.tid()
- %143 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %142
- %144 = bitcast float %138 to i32
- store i32 %144, i32 addrspace(3)* %143
- %145 = bitcast float %139 to i32
- store i32 %145, i32 addrspace(3)* %143
- %146 = bitcast float %140 to i32
- store i32 %146, i32 addrspace(3)* %143
- %147 = bitcast float %141 to i32
- store i32 %147, i32 addrspace(3)* %143
- %148 = call i32 @llvm.SI.tid()
- %149 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %148
- %150 = and i32 %148, -4
- %151 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %150
- %152 = add i32 %150, 2
- %153 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %152
- %154 = bitcast float %138 to i32
- store i32 %154, i32 addrspace(3)* %149
- %155 = load i32, i32 addrspace(3)* %151
- %156 = bitcast i32 %155 to float
- %157 = load i32, i32 addrspace(3)* %153
- %158 = bitcast i32 %157 to float
- %159 = fsub float %158, %156
- %160 = bitcast float %139 to i32
- store i32 %160, i32 addrspace(3)* %149
- %161 = load i32, i32 addrspace(3)* %151
- %162 = bitcast i32 %161 to float
- %163 = load i32, i32 addrspace(3)* %153
- %164 = bitcast i32 %163 to float
- %165 = fsub float %164, %162
- %166 = bitcast float %140 to i32
- store i32 %166, i32 addrspace(3)* %149
- %167 = load i32, i32 addrspace(3)* %151
- %168 = bitcast i32 %167 to float
- %169 = load i32, i32 addrspace(3)* %153
- %170 = bitcast i32 %169 to float
- %171 = fsub float %170, %168
- %172 = bitcast float %141 to i32
- store i32 %172, i32 addrspace(3)* %149
- %173 = load i32, i32 addrspace(3)* %151
- %174 = bitcast i32 %173 to float
- %175 = load i32, i32 addrspace(3)* %153
- %176 = bitcast i32 %175 to float
- %177 = fsub float %176, %174
- %178 = insertelement <4 x float> undef, float %159, i32 0
- %179 = insertelement <4 x float> %178, float %165, i32 1
- %180 = insertelement <4 x float> %179, float %171, i32 2
- %181 = insertelement <4 x float> %180, float %177, i32 3
- %182 = extractelement <4 x float> %181, i32 0
- %183 = extractelement <4 x float> %181, i32 1
- %184 = fdiv float 1.000000e+00, %97
- %185 = fmul float %33, %184
- %186 = fcmp uge float 1.000000e+00, %185
- %187 = select i1 %186, float %185, float 1.000000e+00
- %188 = fmul float %187, %30
- %189 = call float @ceil(float %188)
- %190 = fcmp uge float 3.000000e+00, %189
- %191 = select i1 %190, float 3.000000e+00, float %189
- %192 = fdiv float 1.000000e+00, %191
- %193 = fdiv float 1.000000e+00, %30
- %194 = fmul float %191, %193
- %195 = fmul float %31, %194
- %196 = fmul float %95, %95
- %197 = fmul float %96, %96
- %198 = fadd float %197, %196
- %199 = fmul float %97, %97
- %200 = fadd float %198, %199
- %201 = call float @llvm.AMDGPU.rsq.f32(float %200)
- %202 = fmul float %95, %201
- %203 = fmul float %96, %201
- %204 = fmul float %202, %29
- %205 = fmul float %203, %29
- %206 = fmul float %204, -1.000000e+00
- %207 = fmul float %205, 1.000000e+00
- %208 = fmul float %206, %32
- %209 = fmul float %207, %32
- %210 = fsub float -0.000000e+00, %208
- %211 = fadd float %93, %210
- %212 = fsub float -0.000000e+00, %209
- %213 = fadd float %94, %212
- %214 = fmul float %206, %192
- %215 = fmul float %207, %192
- %216 = fmul float -1.000000e+00, %192
- %217 = bitcast float %136 to i32
- %218 = bitcast float %182 to i32
- %219 = bitcast float %137 to i32
- %220 = bitcast float %183 to i32
- %221 = insertelement <8 x i32> undef, i32 %217, i32 0
- %222 = insertelement <8 x i32> %221, i32 %218, i32 1
- %223 = insertelement <8 x i32> %222, i32 %219, i32 2
- %224 = insertelement <8 x i32> %223, i32 %220, i32 3
- br label %LOOP
-
-LOOP: ; preds = %ENDIF, %main_body
- %temp24.0 = phi float [ 1.000000e+00, %main_body ], [ %258, %ENDIF ]
- %temp28.0 = phi float [ %211, %main_body ], [ %253, %ENDIF ]
- %temp29.0 = phi float [ %213, %main_body ], [ %255, %ENDIF ]
- %temp30.0 = phi float [ 1.000000e+00, %main_body ], [ %257, %ENDIF ]
- %225 = fcmp oge float %temp24.0, %191
- %226 = sext i1 %225 to i32
- %227 = bitcast i32 %226 to float
- %228 = bitcast float %227 to i32
- %229 = icmp ne i32 %228, 0
- br i1 %229, label %IF, label %ENDIF
-
-IF: ; preds = %LOOP
- %230 = bitcast float %136 to i32
- %231 = bitcast float %182 to i32
- %232 = bitcast float %137 to i32
- %233 = bitcast float %183 to i32
- %234 = insertelement <8 x i32> undef, i32 %230, i32 0
- %235 = insertelement <8 x i32> %234, i32 %231, i32 1
- %236 = insertelement <8 x i32> %235, i32 %232, i32 2
- %237 = insertelement <8 x i32> %236, i32 %233, i32 3
- br label %LOOP65
-
-ENDIF: ; preds = %LOOP
- %238 = bitcast float %temp28.0 to i32
- %239 = bitcast float %temp29.0 to i32
- %240 = insertelement <8 x i32> %224, i32 %238, i32 4
- %241 = insertelement <8 x i32> %240, i32 %239, i32 5
- %242 = insertelement <8 x i32> %241, i32 undef, i32 6
- %243 = insertelement <8 x i32> %242, i32 undef, i32 7
- %244 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %243, <32 x i8> %62, <16 x i8> %64, i32 2)
- %245 = extractelement <4 x float> %244, i32 3
- %246 = fcmp oge float %temp30.0, %245
- %247 = sext i1 %246 to i32
- %248 = bitcast i32 %247 to float
- %249 = bitcast float %248 to i32
- %250 = and i32 %249, 1065353216
- %251 = bitcast i32 %250 to float
- %252 = fmul float %214, %251
- %253 = fadd float %252, %temp28.0
- %254 = fmul float %215, %251
- %255 = fadd float %254, %temp29.0
- %256 = fmul float %216, %251
- %257 = fadd float %256, %temp30.0
- %258 = fadd float %temp24.0, 1.000000e+00
- br label %LOOP
-
-LOOP65: ; preds = %ENDIF66, %IF
- %temp24.1 = phi float [ 0.000000e+00, %IF ], [ %610, %ENDIF66 ]
- %temp28.1 = phi float [ %temp28.0, %IF ], [ %605, %ENDIF66 ]
- %temp29.1 = phi float [ %temp29.0, %IF ], [ %607, %ENDIF66 ]
- %temp30.1 = phi float [ %temp30.0, %IF ], [ %609, %ENDIF66 ]
- %temp32.0 = phi float [ 1.000000e+00, %IF ], [ %611, %ENDIF66 ]
- %259 = fcmp oge float %temp24.1, %195
- %260 = sext i1 %259 to i32
- %261 = bitcast i32 %260 to float
- %262 = bitcast float %261 to i32
- %263 = icmp ne i32 %262, 0
- br i1 %263, label %IF67, label %ENDIF66
-
-IF67: ; preds = %LOOP65
- %264 = bitcast float %136 to i32
- %265 = bitcast float %182 to i32
- %266 = bitcast float %137 to i32
- %267 = bitcast float %183 to i32
- %268 = bitcast float %temp28.1 to i32
- %269 = bitcast float %temp29.1 to i32
- %270 = insertelement <8 x i32> undef, i32 %264, i32 0
- %271 = insertelement <8 x i32> %270, i32 %265, i32 1
- %272 = insertelement <8 x i32> %271, i32 %266, i32 2
- %273 = insertelement <8 x i32> %272, i32 %267, i32 3
- %274 = insertelement <8 x i32> %273, i32 %268, i32 4
- %275 = insertelement <8 x i32> %274, i32 %269, i32 5
- %276 = insertelement <8 x i32> %275, i32 undef, i32 6
- %277 = insertelement <8 x i32> %276, i32 undef, i32 7
- %278 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %277, <32 x i8> %66, <16 x i8> %68, i32 2)
- %279 = extractelement <4 x float> %278, i32 0
- %280 = extractelement <4 x float> %278, i32 1
- %281 = extractelement <4 x float> %278, i32 2
- %282 = extractelement <4 x float> %278, i32 3
- %283 = fmul float %282, %47
- %284 = bitcast float %136 to i32
- %285 = bitcast float %182 to i32
- %286 = bitcast float %137 to i32
- %287 = bitcast float %183 to i32
- %288 = bitcast float %temp28.1 to i32
- %289 = bitcast float %temp29.1 to i32
- %290 = insertelement <8 x i32> undef, i32 %284, i32 0
- %291 = insertelement <8 x i32> %290, i32 %285, i32 1
- %292 = insertelement <8 x i32> %291, i32 %286, i32 2
- %293 = insertelement <8 x i32> %292, i32 %287, i32 3
- %294 = insertelement <8 x i32> %293, i32 %288, i32 4
- %295 = insertelement <8 x i32> %294, i32 %289, i32 5
- %296 = insertelement <8 x i32> %295, i32 undef, i32 6
- %297 = insertelement <8 x i32> %296, i32 undef, i32 7
- %298 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %297, <32 x i8> %82, <16 x i8> %84, i32 2)
- %299 = extractelement <4 x float> %298, i32 0
- %300 = extractelement <4 x float> %298, i32 1
- %301 = extractelement <4 x float> %298, i32 2
- %302 = bitcast float %136 to i32
- %303 = bitcast float %182 to i32
- %304 = bitcast float %137 to i32
- %305 = bitcast float %183 to i32
- %306 = bitcast float %temp28.1 to i32
- %307 = bitcast float %temp29.1 to i32
- %308 = insertelement <8 x i32> undef, i32 %302, i32 0
- %309 = insertelement <8 x i32> %308, i32 %303, i32 1
- %310 = insertelement <8 x i32> %309, i32 %304, i32 2
- %311 = insertelement <8 x i32> %310, i32 %305, i32 3
- %312 = insertelement <8 x i32> %311, i32 %306, i32 4
- %313 = insertelement <8 x i32> %312, i32 %307, i32 5
- %314 = insertelement <8 x i32> %313, i32 undef, i32 6
- %315 = insertelement <8 x i32> %314, i32 undef, i32 7
- %316 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %315, <32 x i8> %78, <16 x i8> %80, i32 2)
- %317 = extractelement <4 x float> %316, i32 0
- %318 = extractelement <4 x float> %316, i32 1
- %319 = extractelement <4 x float> %316, i32 2
- %320 = fmul float %317, %23
- %321 = fmul float %318, %24
- %322 = fmul float %319, %25
- %323 = fmul float %299, %26
- %324 = fadd float %323, %320
- %325 = fmul float %300, %27
- %326 = fadd float %325, %321
- %327 = fmul float %301, %28
- %328 = fadd float %327, %322
- %329 = fadd float %279, %324
- %330 = fadd float %280, %326
- %331 = fadd float %281, %328
- %332 = bitcast float %136 to i32
- %333 = bitcast float %182 to i32
- %334 = bitcast float %137 to i32
- %335 = bitcast float %183 to i32
- %336 = bitcast float %temp28.1 to i32
- %337 = bitcast float %temp29.1 to i32
- %338 = insertelement <8 x i32> undef, i32 %332, i32 0
- %339 = insertelement <8 x i32> %338, i32 %333, i32 1
- %340 = insertelement <8 x i32> %339, i32 %334, i32 2
- %341 = insertelement <8 x i32> %340, i32 %335, i32 3
- %342 = insertelement <8 x i32> %341, i32 %336, i32 4
- %343 = insertelement <8 x i32> %342, i32 %337, i32 5
- %344 = insertelement <8 x i32> %343, i32 undef, i32 6
- %345 = insertelement <8 x i32> %344, i32 undef, i32 7
- %346 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %345, <32 x i8> %62, <16 x i8> %64, i32 2)
- %347 = extractelement <4 x float> %346, i32 0
- %348 = extractelement <4 x float> %346, i32 1
- %349 = extractelement <4 x float> %346, i32 2
- %350 = fadd float %347, -5.000000e-01
- %351 = fadd float %348, -5.000000e-01
- %352 = fadd float %349, -5.000000e-01
- %353 = fmul float %350, %350
- %354 = fmul float %351, %351
- %355 = fadd float %354, %353
- %356 = fmul float %352, %352
- %357 = fadd float %355, %356
- %358 = call float @llvm.AMDGPU.rsq.f32(float %357)
- %359 = fmul float %350, %358
- %360 = fmul float %351, %358
- %361 = fmul float %352, %358
- %362 = bitcast float %136 to i32
- %363 = bitcast float %182 to i32
- %364 = bitcast float %137 to i32
- %365 = bitcast float %183 to i32
- %366 = bitcast float %temp28.1 to i32
- %367 = bitcast float %temp29.1 to i32
- %368 = insertelement <8 x i32> undef, i32 %362, i32 0
- %369 = insertelement <8 x i32> %368, i32 %363, i32 1
- %370 = insertelement <8 x i32> %369, i32 %364, i32 2
- %371 = insertelement <8 x i32> %370, i32 %365, i32 3
- %372 = insertelement <8 x i32> %371, i32 %366, i32 4
- %373 = insertelement <8 x i32> %372, i32 %367, i32 5
- %374 = insertelement <8 x i32> %373, i32 undef, i32 6
- %375 = insertelement <8 x i32> %374, i32 undef, i32 7
- %376 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %375, <32 x i8> %70, <16 x i8> %72, i32 2)
- %377 = extractelement <4 x float> %376, i32 0
- %378 = extractelement <4 x float> %376, i32 1
- %379 = extractelement <4 x float> %376, i32 2
- %380 = extractelement <4 x float> %376, i32 3
- %381 = fsub float -0.000000e+00, %95
- %382 = fsub float -0.000000e+00, %96
- %383 = fsub float -0.000000e+00, %97
- %384 = fmul float %359, %381
- %385 = fmul float %360, %382
- %386 = fadd float %385, %384
- %387 = fmul float %361, %383
- %388 = fadd float %386, %387
- %389 = fmul float %388, %359
- %390 = fmul float %388, %360
- %391 = fmul float %388, %361
- %392 = fmul float 2.000000e+00, %389
- %393 = fmul float 2.000000e+00, %390
- %394 = fmul float 2.000000e+00, %391
- %395 = fsub float -0.000000e+00, %392
- %396 = fadd float %381, %395
- %397 = fsub float -0.000000e+00, %393
- %398 = fadd float %382, %397
- %399 = fsub float -0.000000e+00, %394
- %400 = fadd float %383, %399
- %401 = fmul float %396, %98
- %402 = fmul float %396, %99
- %403 = fmul float %396, %100
- %404 = fmul float %398, %101
- %405 = fadd float %404, %401
- %406 = fmul float %398, %102
- %407 = fadd float %406, %402
- %408 = fmul float %398, %103
- %409 = fadd float %408, %403
- %410 = fmul float %400, %104
- %411 = fadd float %410, %405
- %412 = fmul float %400, %105
- %413 = fadd float %412, %407
- %414 = fmul float %400, %106
- %415 = fadd float %414, %409
- %416 = bitcast float %136 to i32
- %417 = bitcast float %182 to i32
- %418 = bitcast float %137 to i32
- %419 = bitcast float %183 to i32
- %420 = bitcast float %temp28.1 to i32
- %421 = bitcast float %temp29.1 to i32
- %422 = insertelement <8 x i32> undef, i32 %416, i32 0
- %423 = insertelement <8 x i32> %422, i32 %417, i32 1
- %424 = insertelement <8 x i32> %423, i32 %418, i32 2
- %425 = insertelement <8 x i32> %424, i32 %419, i32 3
- %426 = insertelement <8 x i32> %425, i32 %420, i32 4
- %427 = insertelement <8 x i32> %426, i32 %421, i32 5
- %428 = insertelement <8 x i32> %427, i32 undef, i32 6
- %429 = insertelement <8 x i32> %428, i32 undef, i32 7
- %430 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %429, <32 x i8> %86, <16 x i8> %88, i32 2)
- %431 = extractelement <4 x float> %430, i32 0
- %432 = extractelement <4 x float> %430, i32 1
- %433 = extractelement <4 x float> %430, i32 2
- %434 = fmul float %48, %411
- %435 = fmul float %49, %411
- %436 = fmul float %50, %411
- %437 = fmul float %51, %413
- %438 = fadd float %437, %434
- %439 = fmul float %52, %413
- %440 = fadd float %439, %435
- %441 = fmul float %53, %413
- %442 = fadd float %441, %436
- %443 = fmul float %54, %415
- %444 = fadd float %443, %438
- %445 = fmul float %55, %415
- %446 = fadd float %445, %440
- %447 = fmul float %56, %415
- %448 = fadd float %447, %442
- %449 = insertelement <4 x float> undef, float %444, i32 0
- %450 = insertelement <4 x float> %449, float %446, i32 1
- %451 = insertelement <4 x float> %450, float %448, i32 2
- %452 = insertelement <4 x float> %451, float %195, i32 3
- %453 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %452)
- %454 = extractelement <4 x float> %453, i32 0
- %455 = extractelement <4 x float> %453, i32 1
- %456 = extractelement <4 x float> %453, i32 2
- %457 = extractelement <4 x float> %453, i32 3
- %458 = call float @fabs(float %456)
- %459 = fdiv float 1.000000e+00, %458
- %460 = fmul float %454, %459
- %461 = fadd float %460, 1.500000e+00
- %462 = fmul float %455, %459
- %463 = fadd float %462, 1.500000e+00
- %464 = bitcast float %463 to i32
- %465 = bitcast float %461 to i32
- %466 = bitcast float %457 to i32
- %467 = insertelement <4 x i32> undef, i32 %464, i32 0
- %468 = insertelement <4 x i32> %467, i32 %465, i32 1
- %469 = insertelement <4 x i32> %468, i32 %466, i32 2
- %470 = insertelement <4 x i32> %469, i32 undef, i32 3
- %471 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %470, <32 x i8> %90, <16 x i8> %92, i32 4)
- %472 = extractelement <4 x float> %471, i32 0
- %473 = extractelement <4 x float> %471, i32 1
- %474 = extractelement <4 x float> %471, i32 2
- %475 = fmul float %431, %472
- %476 = fadd float %475, %329
- %477 = fmul float %432, %473
- %478 = fadd float %477, %330
- %479 = fmul float %433, %474
- %480 = fadd float %479, %331
- %481 = fmul float %107, %107
- %482 = fmul float %108, %108
- %483 = fadd float %482, %481
- %484 = fmul float %109, %109
- %485 = fadd float %483, %484
- %486 = call float @llvm.AMDGPU.rsq.f32(float %485)
- %487 = fmul float %107, %486
- %488 = fmul float %108, %486
- %489 = fmul float %109, %486
- %490 = fmul float %377, %40
- %491 = fmul float %378, %41
- %492 = fmul float %379, %42
- %493 = fmul float %359, %487
- %494 = fmul float %360, %488
- %495 = fadd float %494, %493
- %496 = fmul float %361, %489
- %497 = fadd float %495, %496
- %498 = fmul float %497, %359
- %499 = fmul float %497, %360
- %500 = fmul float %497, %361
- %501 = fmul float 2.000000e+00, %498
- %502 = fmul float 2.000000e+00, %499
- %503 = fmul float 2.000000e+00, %500
- %504 = fsub float -0.000000e+00, %501
- %505 = fadd float %487, %504
- %506 = fsub float -0.000000e+00, %502
- %507 = fadd float %488, %506
- %508 = fsub float -0.000000e+00, %503
- %509 = fadd float %489, %508
- %510 = fmul float %95, %95
- %511 = fmul float %96, %96
- %512 = fadd float %511, %510
- %513 = fmul float %97, %97
- %514 = fadd float %512, %513
- %515 = call float @llvm.AMDGPU.rsq.f32(float %514)
- %516 = fmul float %95, %515
- %517 = fmul float %96, %515
- %518 = fmul float %97, %515
- %519 = fmul float %505, %516
- %520 = fmul float %507, %517
- %521 = fadd float %520, %519
- %522 = fmul float %509, %518
- %523 = fadd float %521, %522
- %524 = fsub float -0.000000e+00, %523
- %525 = fcmp uge float %524, 0.000000e+00
- %526 = select i1 %525, float %524, float 0.000000e+00
- %527 = fmul float %43, %380
- %528 = fadd float %527, 1.000000e+00
- %529 = call float @llvm.pow.f32(float %526, float %528)
- %530 = fmul float %476, %37
- %531 = fmul float %478, %38
- %532 = fmul float %480, %39
- %533 = fmul float %359, %487
- %534 = fmul float %360, %488
- %535 = fadd float %534, %533
- %536 = fmul float %361, %489
- %537 = fadd float %535, %536
- %538 = fcmp uge float %537, 0.000000e+00
- %539 = select i1 %538, float %537, float 0.000000e+00
- %540 = fmul float %530, %539
- %541 = fmul float %531, %539
- %542 = fmul float %532, %539
- %543 = fmul float %490, %529
- %544 = fadd float %543, %540
- %545 = fmul float %491, %529
- %546 = fadd float %545, %541
- %547 = fmul float %492, %529
- %548 = fadd float %547, %542
- %549 = fmul float %476, %34
- %550 = fmul float %478, %35
- %551 = fmul float %480, %36
- %552 = fmul float %544, %57
- %553 = fadd float %552, %549
- %554 = fmul float %546, %58
- %555 = fadd float %554, %550
- %556 = fmul float %548, %59
- %557 = fadd float %556, %551
- %558 = bitcast float %136 to i32
- %559 = bitcast float %182 to i32
- %560 = bitcast float %137 to i32
- %561 = bitcast float %183 to i32
- %562 = bitcast float %temp28.1 to i32
- %563 = bitcast float %temp29.1 to i32
- %564 = insertelement <8 x i32> undef, i32 %558, i32 0
- %565 = insertelement <8 x i32> %564, i32 %559, i32 1
- %566 = insertelement <8 x i32> %565, i32 %560, i32 2
- %567 = insertelement <8 x i32> %566, i32 %561, i32 3
- %568 = insertelement <8 x i32> %567, i32 %562, i32 4
- %569 = insertelement <8 x i32> %568, i32 %563, i32 5
- %570 = insertelement <8 x i32> %569, i32 undef, i32 6
- %571 = insertelement <8 x i32> %570, i32 undef, i32 7
- %572 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %571, <32 x i8> %74, <16 x i8> %76, i32 2)
- %573 = extractelement <4 x float> %572, i32 0
- %574 = extractelement <4 x float> %572, i32 1
- %575 = extractelement <4 x float> %572, i32 2
- %576 = fmul float %573, %44
- %577 = fadd float %576, %553
- %578 = fmul float %574, %45
- %579 = fadd float %578, %555
- %580 = fmul float %575, %46
- %581 = fadd float %580, %557
- %582 = call i32 @llvm.SI.packf16(float %577, float %579)
- %583 = bitcast i32 %582 to float
- %584 = call i32 @llvm.SI.packf16(float %581, float %283)
- %585 = bitcast i32 %584 to float
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %583, float %585, float %583, float %585)
- ret void
-
-ENDIF66: ; preds = %LOOP65
- %586 = bitcast float %temp28.1 to i32
- %587 = bitcast float %temp29.1 to i32
- %588 = insertelement <8 x i32> %237, i32 %586, i32 4
- %589 = insertelement <8 x i32> %588, i32 %587, i32 5
- %590 = insertelement <8 x i32> %589, i32 undef, i32 6
- %591 = insertelement <8 x i32> %590, i32 undef, i32 7
- %592 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %591, <32 x i8> %62, <16 x i8> %64, i32 2)
- %593 = extractelement <4 x float> %592, i32 3
- %594 = fcmp oge float %temp30.1, %593
- %595 = sext i1 %594 to i32
- %596 = bitcast i32 %595 to float
- %597 = bitcast float %596 to i32
- %598 = and i32 %597, 1065353216
- %599 = bitcast i32 %598 to float
- %600 = fmul float 5.000000e-01, %temp32.0
- %601 = fsub float -0.000000e+00, %600
- %602 = fmul float %599, %temp32.0
- %603 = fadd float %602, %601
- %604 = fmul float %214, %603
- %605 = fadd float %604, %temp28.1
- %606 = fmul float %215, %603
- %607 = fadd float %606, %temp29.1
- %608 = fmul float %216, %603
- %609 = fadd float %608, %temp30.1
- %610 = fadd float %temp24.1, 1.000000e+00
- %611 = fmul float %temp32.0, 5.000000e-01
- br label %LOOP65
-}
-
-; Function Attrs: nounwind readnone
-declare float @llvm.SI.load.const(<16 x i8>, i32) #1
-
-; Function Attrs: nounwind readnone
-declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
-
-; Function Attrs: readnone
-declare i32 @llvm.SI.tid() #2
-
-; Function Attrs: readonly
-declare float @ceil(float) #3
-
-; Function Attrs: readnone
-declare float @llvm.AMDGPU.rsq.f32(float) #2
-
-; Function Attrs: nounwind readnone
-declare <4 x float> @llvm.SI.sampled.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32) #1
-
-; Function Attrs: readnone
-declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2
-
-; Function Attrs: readnone
-declare float @fabs(float) #2
-
-; Function Attrs: nounwind readnone
-declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1
-
-; Function Attrs: nounwind readonly
-declare float @llvm.pow.f32(float, float) #4
-
-; Function Attrs: nounwind readnone
-declare i32 @llvm.SI.packf16(float, float) #1
-
-declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
-
-attributes #0 = { "ShaderType"="0" }
-attributes #1 = { nounwind readnone }
-attributes #2 = { readnone }
-attributes #3 = { readonly }
-attributes #4 = { nounwind readonly }
-
-!0 = !{!"const", null, i32 1}
-
-; CHECK-LABEL: {{^}}main1:
-; CHECK: s_endpgm
-define void @main1([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
-main_body:
- %21 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %0, i64 0, i32 0
- %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, !tbaa !0
- %23 = call float @llvm.SI.load.const(<16 x i8> %22, i32 0)
- %24 = call float @llvm.SI.load.const(<16 x i8> %22, i32 4)
- %25 = call float @llvm.SI.load.const(<16 x i8> %22, i32 8)
- %26 = call float @llvm.SI.load.const(<16 x i8> %22, i32 12)
- %27 = call float @llvm.SI.load.const(<16 x i8> %22, i32 28)
- %28 = call float @llvm.SI.load.const(<16 x i8> %22, i32 48)
- %29 = call float @llvm.SI.load.const(<16 x i8> %22, i32 52)
- %30 = call float @llvm.SI.load.const(<16 x i8> %22, i32 56)
- %31 = call float @llvm.SI.load.const(<16 x i8> %22, i32 64)
- %32 = call float @llvm.SI.load.const(<16 x i8> %22, i32 68)
- %33 = call float @llvm.SI.load.const(<16 x i8> %22, i32 72)
- %34 = call float @llvm.SI.load.const(<16 x i8> %22, i32 76)
- %35 = call float @llvm.SI.load.const(<16 x i8> %22, i32 128)
- %36 = call float @llvm.SI.load.const(<16 x i8> %22, i32 132)
- %37 = call float @llvm.SI.load.const(<16 x i8> %22, i32 144)
- %38 = call float @llvm.SI.load.const(<16 x i8> %22, i32 148)
- %39 = call float @llvm.SI.load.const(<16 x i8> %22, i32 152)
- %40 = call float @llvm.SI.load.const(<16 x i8> %22, i32 160)
- %41 = call float @llvm.SI.load.const(<16 x i8> %22, i32 164)
- %42 = call float @llvm.SI.load.const(<16 x i8> %22, i32 168)
- %43 = call float @llvm.SI.load.const(<16 x i8> %22, i32 172)
- %44 = call float @llvm.SI.load.const(<16 x i8> %22, i32 176)
- %45 = call float @llvm.SI.load.const(<16 x i8> %22, i32 180)
- %46 = call float @llvm.SI.load.const(<16 x i8> %22, i32 184)
- %47 = call float @llvm.SI.load.const(<16 x i8> %22, i32 192)
- %48 = call float @llvm.SI.load.const(<16 x i8> %22, i32 196)
- %49 = call float @llvm.SI.load.const(<16 x i8> %22, i32 200)
- %50 = call float @llvm.SI.load.const(<16 x i8> %22, i32 208)
- %51 = call float @llvm.SI.load.const(<16 x i8> %22, i32 212)
- %52 = call float @llvm.SI.load.const(<16 x i8> %22, i32 216)
- %53 = call float @llvm.SI.load.const(<16 x i8> %22, i32 220)
- %54 = call float @llvm.SI.load.const(<16 x i8> %22, i32 236)
- %55 = call float @llvm.SI.load.const(<16 x i8> %22, i32 240)
- %56 = call float @llvm.SI.load.const(<16 x i8> %22, i32 244)
- %57 = call float @llvm.SI.load.const(<16 x i8> %22, i32 248)
- %58 = call float @llvm.SI.load.const(<16 x i8> %22, i32 252)
- %59 = call float @llvm.SI.load.const(<16 x i8> %22, i32 256)
- %60 = call float @llvm.SI.load.const(<16 x i8> %22, i32 260)
- %61 = call float @llvm.SI.load.const(<16 x i8> %22, i32 264)
- %62 = call float @llvm.SI.load.const(<16 x i8> %22, i32 268)
- %63 = call float @llvm.SI.load.const(<16 x i8> %22, i32 272)
- %64 = call float @llvm.SI.load.const(<16 x i8> %22, i32 276)
- %65 = call float @llvm.SI.load.const(<16 x i8> %22, i32 280)
- %66 = call float @llvm.SI.load.const(<16 x i8> %22, i32 284)
- %67 = call float @llvm.SI.load.const(<16 x i8> %22, i32 288)
- %68 = call float @llvm.SI.load.const(<16 x i8> %22, i32 292)
- %69 = call float @llvm.SI.load.const(<16 x i8> %22, i32 464)
- %70 = call float @llvm.SI.load.const(<16 x i8> %22, i32 468)
- %71 = call float @llvm.SI.load.const(<16 x i8> %22, i32 472)
- %72 = call float @llvm.SI.load.const(<16 x i8> %22, i32 496)
- %73 = call float @llvm.SI.load.const(<16 x i8> %22, i32 500)
- %74 = call float @llvm.SI.load.const(<16 x i8> %22, i32 504)
- %75 = call float @llvm.SI.load.const(<16 x i8> %22, i32 512)
- %76 = call float @llvm.SI.load.const(<16 x i8> %22, i32 516)
- %77 = call float @llvm.SI.load.const(<16 x i8> %22, i32 524)
- %78 = call float @llvm.SI.load.const(<16 x i8> %22, i32 532)
- %79 = call float @llvm.SI.load.const(<16 x i8> %22, i32 536)
- %80 = call float @llvm.SI.load.const(<16 x i8> %22, i32 540)
- %81 = call float @llvm.SI.load.const(<16 x i8> %22, i32 544)
- %82 = call float @llvm.SI.load.const(<16 x i8> %22, i32 548)
- %83 = call float @llvm.SI.load.const(<16 x i8> %22, i32 552)
- %84 = call float @llvm.SI.load.const(<16 x i8> %22, i32 556)
- %85 = call float @llvm.SI.load.const(<16 x i8> %22, i32 560)
- %86 = call float @llvm.SI.load.const(<16 x i8> %22, i32 564)
- %87 = call float @llvm.SI.load.const(<16 x i8> %22, i32 568)
- %88 = call float @llvm.SI.load.const(<16 x i8> %22, i32 572)
- %89 = call float @llvm.SI.load.const(<16 x i8> %22, i32 576)
- %90 = call float @llvm.SI.load.const(<16 x i8> %22, i32 580)
- %91 = call float @llvm.SI.load.const(<16 x i8> %22, i32 584)
- %92 = call float @llvm.SI.load.const(<16 x i8> %22, i32 588)
- %93 = call float @llvm.SI.load.const(<16 x i8> %22, i32 592)
- %94 = call float @llvm.SI.load.const(<16 x i8> %22, i32 596)
- %95 = call float @llvm.SI.load.const(<16 x i8> %22, i32 600)
- %96 = call float @llvm.SI.load.const(<16 x i8> %22, i32 604)
- %97 = call float @llvm.SI.load.const(<16 x i8> %22, i32 608)
- %98 = call float @llvm.SI.load.const(<16 x i8> %22, i32 612)
- %99 = call float @llvm.SI.load.const(<16 x i8> %22, i32 616)
- %100 = call float @llvm.SI.load.const(<16 x i8> %22, i32 624)
- %101 = call float @llvm.SI.load.const(<16 x i8> %22, i32 628)
- %102 = call float @llvm.SI.load.const(<16 x i8> %22, i32 632)
- %103 = call float @llvm.SI.load.const(<16 x i8> %22, i32 636)
- %104 = call float @llvm.SI.load.const(<16 x i8> %22, i32 640)
- %105 = call float @llvm.SI.load.const(<16 x i8> %22, i32 644)
- %106 = call float @llvm.SI.load.const(<16 x i8> %22, i32 648)
- %107 = call float @llvm.SI.load.const(<16 x i8> %22, i32 652)
- %108 = call float @llvm.SI.load.const(<16 x i8> %22, i32 656)
- %109 = call float @llvm.SI.load.const(<16 x i8> %22, i32 660)
- %110 = call float @llvm.SI.load.const(<16 x i8> %22, i32 664)
- %111 = call float @llvm.SI.load.const(<16 x i8> %22, i32 668)
- %112 = call float @llvm.SI.load.const(<16 x i8> %22, i32 672)
- %113 = call float @llvm.SI.load.const(<16 x i8> %22, i32 676)
- %114 = call float @llvm.SI.load.const(<16 x i8> %22, i32 680)
- %115 = call float @llvm.SI.load.const(<16 x i8> %22, i32 684)
- %116 = call float @llvm.SI.load.const(<16 x i8> %22, i32 688)
- %117 = call float @llvm.SI.load.const(<16 x i8> %22, i32 692)
- %118 = call float @llvm.SI.load.const(<16 x i8> %22, i32 696)
- %119 = call float @llvm.SI.load.const(<16 x i8> %22, i32 700)
- %120 = call float @llvm.SI.load.const(<16 x i8> %22, i32 704)
- %121 = call float @llvm.SI.load.const(<16 x i8> %22, i32 708)
- %122 = call float @llvm.SI.load.const(<16 x i8> %22, i32 712)
- %123 = call float @llvm.SI.load.const(<16 x i8> %22, i32 716)
- %124 = call float @llvm.SI.load.const(<16 x i8> %22, i32 864)
- %125 = call float @llvm.SI.load.const(<16 x i8> %22, i32 868)
- %126 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 0
- %127 = load <32 x i8>, <32 x i8> addrspace(2)* %126, !tbaa !0
- %128 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 0
- %129 = load <16 x i8>, <16 x i8> addrspace(2)* %128, !tbaa !0
- %130 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 1
- %131 = load <32 x i8>, <32 x i8> addrspace(2)* %130, !tbaa !0
- %132 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 1
- %133 = load <16 x i8>, <16 x i8> addrspace(2)* %132, !tbaa !0
- %134 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 2
- %135 = load <32 x i8>, <32 x i8> addrspace(2)* %134, !tbaa !0
- %136 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 2
- %137 = load <16 x i8>, <16 x i8> addrspace(2)* %136, !tbaa !0
- %138 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 3
- %139 = load <32 x i8>, <32 x i8> addrspace(2)* %138, !tbaa !0
- %140 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 3
- %141 = load <16 x i8>, <16 x i8> addrspace(2)* %140, !tbaa !0
- %142 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 4
- %143 = load <32 x i8>, <32 x i8> addrspace(2)* %142, !tbaa !0
- %144 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 4
- %145 = load <16 x i8>, <16 x i8> addrspace(2)* %144, !tbaa !0
- %146 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 5
- %147 = load <32 x i8>, <32 x i8> addrspace(2)* %146, !tbaa !0
- %148 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 5
- %149 = load <16 x i8>, <16 x i8> addrspace(2)* %148, !tbaa !0
- %150 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 6
- %151 = load <32 x i8>, <32 x i8> addrspace(2)* %150, !tbaa !0
- %152 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 6
- %153 = load <16 x i8>, <16 x i8> addrspace(2)* %152, !tbaa !0
- %154 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 7
- %155 = load <32 x i8>, <32 x i8> addrspace(2)* %154, !tbaa !0
- %156 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 7
- %157 = load <16 x i8>, <16 x i8> addrspace(2)* %156, !tbaa !0
- %158 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 8
- %159 = load <32 x i8>, <32 x i8> addrspace(2)* %158, !tbaa !0
- %160 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 8
- %161 = load <16 x i8>, <16 x i8> addrspace(2)* %160, !tbaa !0
- %162 = fcmp ugt float %17, 0.000000e+00
- %163 = select i1 %162, float 1.000000e+00, float 0.000000e+00
- %164 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %4, <2 x i32> %6)
- %165 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %4, <2 x i32> %6)
- %166 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %4, <2 x i32> %6)
- %167 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %4, <2 x i32> %6)
- %168 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %4, <2 x i32> %6)
- %169 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %4, <2 x i32> %6)
- %170 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %4, <2 x i32> %6)
- %171 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %4, <2 x i32> %6)
- %172 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %4, <2 x i32> %6)
- %173 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %4, <2 x i32> %6)
- %174 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %4, <2 x i32> %6)
- %175 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %4, <2 x i32> %6)
- %176 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %4, <2 x i32> %6)
- %177 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %4, <2 x i32> %6)
- %178 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %4, <2 x i32> %6)
- %179 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %4, <2 x i32> %6)
- %180 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %4, <2 x i32> %6)
- %181 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %4, <2 x i32> %6)
- %182 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %4, <2 x i32> %6)
- %183 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %4, <2 x i32> %6)
- %184 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %4, <2 x i32> %6)
- %185 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %4, <2 x i32> %6)
- %186 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %4, <2 x i32> %6)
- %187 = call float @llvm.SI.fs.interp(i32 3, i32 5, i32 %4, <2 x i32> %6)
- %188 = call float @llvm.SI.fs.interp(i32 0, i32 6, i32 %4, <2 x i32> %6)
- %189 = call float @llvm.SI.fs.interp(i32 1, i32 6, i32 %4, <2 x i32> %6)
- %190 = call float @llvm.SI.fs.interp(i32 2, i32 6, i32 %4, <2 x i32> %6)
- %191 = call float @llvm.SI.fs.interp(i32 3, i32 6, i32 %4, <2 x i32> %6)
- %192 = call float @llvm.SI.fs.interp(i32 0, i32 7, i32 %4, <2 x i32> %6)
- %193 = call float @llvm.SI.fs.interp(i32 1, i32 7, i32 %4, <2 x i32> %6)
- %194 = call float @llvm.SI.fs.interp(i32 2, i32 7, i32 %4, <2 x i32> %6)
- %195 = call float @llvm.SI.fs.interp(i32 3, i32 7, i32 %4, <2 x i32> %6)
- %196 = fmul float %14, %124
- %197 = fadd float %196, %125
- %198 = call float @llvm.AMDIL.clamp.(float %163, float 0.000000e+00, float 1.000000e+00)
- %199 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
- %200 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
- %201 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00)
- %202 = bitcast float %198 to i32
- %203 = icmp ne i32 %202, 0
- %. = select i1 %203, float -1.000000e+00, float 1.000000e+00
- %204 = fsub float -0.000000e+00, %164
- %205 = fadd float %44, %204
- %206 = fsub float -0.000000e+00, %165
- %207 = fadd float %45, %206
- %208 = fsub float -0.000000e+00, %166
- %209 = fadd float %46, %208
- %210 = fmul float %205, %205
- %211 = fmul float %207, %207
- %212 = fadd float %211, %210
- %213 = fmul float %209, %209
- %214 = fadd float %212, %213
- %215 = call float @llvm.AMDGPU.rsq.f32(float %214)
- %216 = fmul float %205, %215
- %217 = fmul float %207, %215
- %218 = fmul float %209, %215
- %219 = fmul float %., %54
- %220 = fmul float %13, %47
- %221 = fmul float %197, %48
- %222 = bitcast float %174 to i32
- %223 = bitcast float %175 to i32
- %224 = insertelement <2 x i32> undef, i32 %222, i32 0
- %225 = insertelement <2 x i32> %224, i32 %223, i32 1
- %226 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %225, <32 x i8> %131, <16 x i8> %133, i32 2)
- %227 = extractelement <4 x float> %226, i32 0
- %228 = extractelement <4 x float> %226, i32 1
- %229 = extractelement <4 x float> %226, i32 2
- %230 = extractelement <4 x float> %226, i32 3
- %231 = fmul float %227, 0x4012611180000000
- %232 = fmul float %228, 0x4012611180000000
- %233 = fmul float %229, 0x4012611180000000
- %234 = call float @llvm.AMDGPU.lrp(float %27, float %231, float 1.000000e+00)
- %235 = call float @llvm.AMDGPU.lrp(float %27, float %232, float 1.000000e+00)
- %236 = call float @llvm.AMDGPU.lrp(float %27, float %233, float 1.000000e+00)
- %237 = fmul float %216, %184
- %238 = fmul float %217, %185
- %239 = fadd float %238, %237
- %240 = fmul float %218, %186
- %241 = fadd float %239, %240
- %242 = fmul float %216, %187
- %243 = fmul float %217, %188
- %244 = fadd float %243, %242
- %245 = fmul float %218, %189
- %246 = fadd float %244, %245
- %247 = fmul float %216, %190
- %248 = fmul float %217, %191
- %249 = fadd float %248, %247
- %250 = fmul float %218, %192
- %251 = fadd float %249, %250
- %252 = call float @llvm.AMDIL.clamp.(float %251, float 0.000000e+00, float 1.000000e+00)
- %253 = fmul float %214, 0x3F5A36E2E0000000
- %254 = call float @llvm.AMDIL.clamp.(float %253, float 0.000000e+00, float 1.000000e+00)
- %255 = fsub float -0.000000e+00, %254
- %256 = fadd float 1.000000e+00, %255
- %257 = call float @llvm.pow.f32(float %252, float 2.500000e-01)
- %258 = fmul float %39, %257
- %259 = fmul float %241, %258
- %260 = fmul float %246, %258
- %261 = fmul float %259, %230
- %262 = fmul float %260, %230
- %263 = fadd float %252, 0x3EE4F8B580000000
- %264 = fsub float -0.000000e+00, %252
- %265 = fadd float 1.000000e+00, %264
- %266 = fmul float 1.200000e+01, %265
- %267 = fadd float %266, 4.000000e+00
- %268 = fsub float -0.000000e+00, %267
- %269 = fmul float %268, %263
- %270 = fsub float -0.000000e+00, %267
- %271 = fmul float %270, %263
- %272 = fsub float -0.000000e+00, %267
- %273 = fmul float %272, %263
- %274 = fdiv float 1.000000e+00, %269
- %275 = fdiv float 1.000000e+00, %271
- %276 = fdiv float 1.000000e+00, %273
- %277 = fmul float %261, %274
- %278 = fmul float %262, %275
- %279 = fmul float %263, %276
- br label %LOOP
-
-LOOP: ; preds = %LOOP, %main_body
- %temp144.0 = phi float [ 1.000000e+00, %main_body ], [ %292, %LOOP ]
- %temp168.0 = phi float [ %176, %main_body ], [ %288, %LOOP ]
- %temp169.0 = phi float [ %177, %main_body ], [ %289, %LOOP ]
- %temp170.0 = phi float [ %256, %main_body ], [ %290, %LOOP ]
- %280 = bitcast float %temp168.0 to i32
- %281 = bitcast float %temp169.0 to i32
- %282 = insertelement <4 x i32> undef, i32 %280, i32 0
- %283 = insertelement <4 x i32> %282, i32 %281, i32 1
- %284 = insertelement <4 x i32> %283, i32 0, i32 2
- %285 = insertelement <4 x i32> %284, i32 undef, i32 3
- %286 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %285, <32 x i8> %147, <16 x i8> %149, i32 2)
- %287 = extractelement <4 x float> %286, i32 3
- %288 = fadd float %temp168.0, %277
- %289 = fadd float %temp169.0, %278
- %290 = fadd float %temp170.0, %279
- %291 = fsub float -0.000000e+00, %287
- %292 = fadd float %290, %291
- %293 = fcmp oge float 0.000000e+00, %292
- %294 = sext i1 %293 to i32
- %295 = bitcast i32 %294 to float
- %296 = bitcast float %295 to i32
- %297 = icmp ne i32 %296, 0
- br i1 %297, label %IF189, label %LOOP
-
-IF189: ; preds = %LOOP
- %298 = extractelement <4 x float> %286, i32 0
- %299 = extractelement <4 x float> %286, i32 1
- %300 = extractelement <4 x float> %286, i32 2
- %301 = fsub float -0.000000e+00, %292
- %302 = fadd float %temp144.0, %301
- %303 = fdiv float 1.000000e+00, %302
- %304 = fmul float %292, %303
- %305 = fadd float %304, -1.000000e+00
- %306 = fmul float %305, %277
- %307 = fadd float %306, %288
- %308 = fmul float %305, %278
- %309 = fadd float %308, %289
- %310 = fsub float -0.000000e+00, %176
- %311 = fadd float %307, %310
- %312 = fsub float -0.000000e+00, %177
- %313 = fadd float %309, %312
- %314 = fadd float %176, %311
- %315 = fadd float %177, %313
- %316 = fmul float %311, %67
- %317 = fmul float %313, %68
- %318 = fmul float %316, %55
- %319 = fmul float %316, %56
- %320 = fmul float %317, %57
- %321 = fadd float %320, %318
- %322 = fmul float %317, %58
- %323 = fadd float %322, %319
- %324 = fadd float %178, %321
- %325 = fadd float %179, %323
- %326 = fmul float %316, %59
- %327 = fmul float %316, %60
- %328 = fmul float %316, %61
- %329 = fmul float %316, %62
- %330 = fmul float %317, %63
- %331 = fadd float %330, %326
- %332 = fmul float %317, %64
- %333 = fadd float %332, %327
- %334 = fmul float %317, %65
- %335 = fadd float %334, %328
- %336 = fmul float %317, %66
- %337 = fadd float %336, %329
- %338 = fadd float %168, %331
- %339 = fadd float %169, %333
- %340 = fadd float %170, %335
- %341 = fadd float %171, %337
- %342 = bitcast float %338 to i32
- %343 = bitcast float %339 to i32
- %344 = insertelement <2 x i32> undef, i32 %342, i32 0
- %345 = insertelement <2 x i32> %344, i32 %343, i32 1
- %346 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %345, <32 x i8> %135, <16 x i8> %137, i32 2)
- %347 = extractelement <4 x float> %346, i32 0
- %348 = extractelement <4 x float> %346, i32 1
- %349 = extractelement <4 x float> %346, i32 2
- %350 = extractelement <4 x float> %346, i32 3
- %351 = fmul float %347, %23
- %352 = fmul float %348, %24
- %353 = fmul float %349, %25
- %354 = fmul float %350, %26
- %355 = fmul float %351, %180
- %356 = fmul float %352, %181
- %357 = fmul float %353, %182
- %358 = fmul float %354, %183
- %359 = fsub float -0.000000e+00, %350
- %360 = fadd float 1.000000e+00, %359
- %361 = fmul float %360, %49
- %362 = call float @llvm.AMDGPU.lrp(float %361, float %347, float %355)
- %363 = call float @llvm.AMDGPU.lrp(float %361, float %348, float %356)
- %364 = call float @llvm.AMDGPU.lrp(float %361, float %349, float %357)
- %365 = bitcast float %340 to i32
- %366 = bitcast float %341 to i32
- %367 = insertelement <2 x i32> undef, i32 %365, i32 0
- %368 = insertelement <2 x i32> %367, i32 %366, i32 1
- %369 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %368, <32 x i8> %151, <16 x i8> %153, i32 2)
- %370 = extractelement <4 x float> %369, i32 2
- %371 = fmul float %362, %234
- %372 = fmul float %363, %235
- %373 = fmul float %364, %236
- %374 = fmul float %358, %230
- %375 = bitcast float %314 to i32
- %376 = bitcast float %315 to i32
- %377 = insertelement <2 x i32> undef, i32 %375, i32 0
- %378 = insertelement <2 x i32> %377, i32 %376, i32 1
- %379 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %378, <32 x i8> %139, <16 x i8> %141, i32 2)
- %380 = extractelement <4 x float> %379, i32 0
- %381 = extractelement <4 x float> %379, i32 1
- %382 = extractelement <4 x float> %379, i32 2
- %383 = extractelement <4 x float> %379, i32 3
- %384 = fcmp olt float 0.000000e+00, %382
- %385 = sext i1 %384 to i32
- %386 = bitcast i32 %385 to float
- %387 = bitcast float %386 to i32
- %388 = icmp ne i32 %387, 0
- %.224 = select i1 %388, float %381, float %380
- %.225 = select i1 %388, float %383, float %381
- %389 = bitcast float %324 to i32
- %390 = bitcast float %325 to i32
- %391 = insertelement <2 x i32> undef, i32 %389, i32 0
- %392 = insertelement <2 x i32> %391, i32 %390, i32 1
- %393 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %392, <32 x i8> %143, <16 x i8> %145, i32 2)
- %394 = extractelement <4 x float> %393, i32 0
- %395 = extractelement <4 x float> %393, i32 1
- %396 = extractelement <4 x float> %393, i32 2
- %397 = extractelement <4 x float> %393, i32 3
- %398 = fcmp olt float 0.000000e+00, %396
- %399 = sext i1 %398 to i32
- %400 = bitcast i32 %399 to float
- %401 = bitcast float %400 to i32
- %402 = icmp ne i32 %401, 0
- %temp112.1 = select i1 %402, float %395, float %394
- %temp113.1 = select i1 %402, float %397, float %395
- %403 = fmul float %.224, 2.000000e+00
- %404 = fadd float %403, -1.000000e+00
- %405 = fmul float %.225, 2.000000e+00
- %406 = fadd float %405, -1.000000e+00
- %407 = fmul float %temp112.1, 2.000000e+00
- %408 = fadd float %407, -1.000000e+00
- %409 = fmul float %temp113.1, 2.000000e+00
- %410 = fadd float %409, -1.000000e+00
- %411 = fsub float -0.000000e+00, %404
- %412 = fmul float %411, %35
- %413 = fsub float -0.000000e+00, %406
- %414 = fmul float %413, %35
- %415 = fsub float -0.000000e+00, %408
- %416 = fmul float %415, %36
- %417 = fsub float -0.000000e+00, %410
- %418 = fmul float %417, %36
- %419 = fmul float %416, %370
- %420 = fmul float %418, %370
- %421 = call float @fabs(float %412)
- %422 = call float @fabs(float %414)
- %423 = fsub float -0.000000e+00, %421
- %424 = fadd float 1.000000e+00, %423
- %425 = fsub float -0.000000e+00, %422
- %426 = fadd float 1.000000e+00, %425
- %427 = fmul float %424, %419
- %428 = fadd float %427, %412
- %429 = fmul float %426, %420
- %430 = fadd float %429, %414
- %431 = fmul float %428, %428
- %432 = fmul float %430, %430
- %433 = fadd float %431, %432
- %434 = fsub float -0.000000e+00, %433
- %435 = fadd float 0x3FF00068E0000000, %434
- %436 = call float @llvm.AMDIL.clamp.(float %435, float 0.000000e+00, float 1.000000e+00)
- %437 = call float @llvm.AMDGPU.rsq.f32(float %436)
- %438 = fmul float %437, %436
- %439 = fsub float -0.000000e+00, %436
- %440 = call float @llvm.AMDGPU.cndlt(float %439, float %438, float 0.000000e+00)
- %441 = fmul float %184, %428
- %442 = fmul float %185, %428
- %443 = fmul float %186, %428
- %444 = fmul float %187, %430
- %445 = fadd float %444, %441
- %446 = fmul float %188, %430
- %447 = fadd float %446, %442
- %448 = fmul float %189, %430
- %449 = fadd float %448, %443
- %450 = fmul float %190, %440
- %451 = fadd float %450, %445
- %452 = fmul float %191, %440
- %453 = fadd float %452, %447
- %454 = fmul float %192, %440
- %455 = fadd float %454, %449
- %456 = fmul float %451, %451
- %457 = fmul float %453, %453
- %458 = fadd float %457, %456
- %459 = fmul float %455, %455
- %460 = fadd float %458, %459
- %461 = call float @llvm.AMDGPU.rsq.f32(float %460)
- %462 = fmul float %451, %461
- %463 = fmul float %453, %461
- %464 = fmul float %455, %461
- %465 = fcmp olt float 0.000000e+00, %219
- %466 = sext i1 %465 to i32
- %467 = bitcast i32 %466 to float
- %468 = bitcast float %467 to i32
- %469 = icmp ne i32 %468, 0
- br i1 %469, label %IF198, label %ENDIF197
-
-IF198: ; preds = %IF189
- %470 = fsub float -0.000000e+00, %462
- %471 = fsub float -0.000000e+00, %463
- %472 = fsub float -0.000000e+00, %464
- br label %ENDIF197
-
-ENDIF197: ; preds = %IF189, %IF198
- %temp14.0 = phi float [ %472, %IF198 ], [ %464, %IF189 ]
- %temp13.0 = phi float [ %471, %IF198 ], [ %463, %IF189 ]
- %temp12.0 = phi float [ %470, %IF198 ], [ %462, %IF189 ]
- %473 = bitcast float %220 to i32
- %474 = bitcast float %221 to i32
- %475 = insertelement <2 x i32> undef, i32 %473, i32 0
- %476 = insertelement <2 x i32> %475, i32 %474, i32 1
- %477 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %476, <32 x i8> %159, <16 x i8> %161, i32 2)
- %478 = extractelement <4 x float> %477, i32 0
- %479 = extractelement <4 x float> %477, i32 1
- %480 = extractelement <4 x float> %477, i32 2
- %481 = extractelement <4 x float> %477, i32 3
- %482 = fmul float %478, %40
- %483 = fadd float %482, %41
- %484 = fmul float %479, %40
- %485 = fadd float %484, %41
- %486 = fmul float %480, %40
- %487 = fadd float %486, %41
- %488 = fmul float %481, %42
- %489 = fadd float %488, %43
- %490 = bitcast float %172 to i32
- %491 = bitcast float %173 to i32
- %492 = insertelement <2 x i32> undef, i32 %490, i32 0
- %493 = insertelement <2 x i32> %492, i32 %491, i32 1
- %494 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %493, <32 x i8> %155, <16 x i8> %157, i32 2)
- %495 = extractelement <4 x float> %494, i32 0
- %496 = extractelement <4 x float> %494, i32 1
- %497 = extractelement <4 x float> %494, i32 2
- %498 = extractelement <4 x float> %494, i32 3
- %499 = fmul float %498, 3.200000e+01
- %500 = fadd float %499, -1.600000e+01
- %501 = call float @llvm.AMDIL.exp.(float %500)
- %502 = fmul float %495, %501
- %503 = fmul float %496, %501
- %504 = fmul float %497, %501
- %505 = fmul float %28, %502
- %506 = fadd float %505, %193
- %507 = fmul float %29, %503
- %508 = fadd float %507, %194
- %509 = fmul float %30, %504
- %510 = fadd float %509, %195
- %511 = fmul float %506, %489
- %512 = fmul float %508, %489
- %513 = fmul float %510, %489
- %514 = fmul float %489, 5.000000e-01
- %515 = fadd float %514, 5.000000e-01
- %516 = fmul float %483, %515
- %517 = fadd float %516, %511
- %518 = fmul float %485, %515
- %519 = fadd float %518, %512
- %520 = fmul float %487, %515
- %521 = fadd float %520, %513
- %522 = fmul float %517, %371
- %523 = fmul float %519, %372
- %524 = fmul float %521, %373
- %525 = fmul float %428, 0x3FDB272440000000
- %526 = fmul float %430, 0xBFDB272440000000
- %527 = fadd float %526, %525
- %528 = fmul float %440, 0x3FE99999A0000000
- %529 = fadd float %527, %528
- %530 = fmul float %529, 5.000000e-01
- %531 = fadd float %530, 0x3FE3333340000000
- %532 = fmul float %531, %531
- %533 = fmul float %522, %532
- %534 = fmul float %523, %532
- %535 = fmul float %524, %532
- %536 = fsub float -0.000000e+00, %72
- %537 = fsub float -0.000000e+00, %73
- %538 = fsub float -0.000000e+00, %74
- %539 = fmul float %temp12.0, %536
- %540 = fmul float %temp13.0, %537
- %541 = fadd float %540, %539
- %542 = fmul float %temp14.0, %538
- %543 = fadd float %541, %542
- %544 = call float @llvm.AMDIL.clamp.(float %543, float 0.000000e+00, float 1.000000e+00)
- %545 = fmul float %371, %544
- %546 = fmul float %372, %544
- %547 = fmul float %373, %544
- %548 = fmul float %545, %69
- %549 = fmul float %546, %70
- %550 = fmul float %547, %71
- %551 = fsub float -0.000000e+00, %164
- %552 = fadd float %97, %551
- %553 = fsub float -0.000000e+00, %165
- %554 = fadd float %98, %553
- %555 = fsub float -0.000000e+00, %166
- %556 = fadd float %99, %555
- %557 = fmul float %552, %552
- %558 = fmul float %554, %554
- %559 = fadd float %558, %557
- %560 = fmul float %556, %556
- %561 = fadd float %559, %560
- %562 = call float @llvm.AMDGPU.rsq.f32(float %561)
- %563 = fmul float %562, %561
- %564 = fsub float -0.000000e+00, %561
- %565 = call float @llvm.AMDGPU.cndlt(float %564, float %563, float 0.000000e+00)
- %566 = fsub float -0.000000e+00, %84
- %567 = fadd float %565, %566
- %568 = fsub float -0.000000e+00, %83
- %569 = fadd float %565, %568
- %570 = fsub float -0.000000e+00, %82
- %571 = fadd float %565, %570
- %572 = fsub float -0.000000e+00, %84
- %573 = fadd float %83, %572
- %574 = fsub float -0.000000e+00, %83
- %575 = fadd float %82, %574
- %576 = fsub float -0.000000e+00, %82
- %577 = fadd float %81, %576
- %578 = fdiv float 1.000000e+00, %573
- %579 = fdiv float 1.000000e+00, %575
- %580 = fdiv float 1.000000e+00, %577
- %581 = fmul float %567, %578
- %582 = fmul float %569, %579
- %583 = fmul float %571, %580
- %584 = fcmp olt float %565, %83
- %585 = sext i1 %584 to i32
- %586 = bitcast i32 %585 to float
- %587 = bitcast float %586 to i32
- %588 = icmp ne i32 %587, 0
- br i1 %588, label %ENDIF200, label %ELSE202
-
-ELSE202: ; preds = %ENDIF197
- %589 = fcmp olt float %565, %82
- %590 = sext i1 %589 to i32
- %591 = bitcast i32 %590 to float
- %592 = bitcast float %591 to i32
- %593 = icmp ne i32 %592, 0
- br i1 %593, label %ENDIF200, label %ELSE205
-
-ENDIF200: ; preds = %ELSE205, %ELSE202, %ENDIF197
- %temp80.0 = phi float [ %581, %ENDIF197 ], [ %.226, %ELSE205 ], [ %582, %ELSE202 ]
- %temp88.0 = phi float [ %122, %ENDIF197 ], [ %.227, %ELSE205 ], [ %120, %ELSE202 ]
- %temp89.0 = phi float [ %123, %ENDIF197 ], [ %.228, %ELSE205 ], [ %121, %ELSE202 ]
- %temp90.0 = phi float [ %120, %ENDIF197 ], [ %116, %ELSE205 ], [ %118, %ELSE202 ]
- %temp91.0 = phi float [ %121, %ENDIF197 ], [ %117, %ELSE205 ], [ %119, %ELSE202 ]
- %594 = fcmp olt float %565, %83
- %595 = sext i1 %594 to i32
- %596 = bitcast i32 %595 to float
- %597 = bitcast float %596 to i32
- %598 = icmp ne i32 %597, 0
- br i1 %598, label %ENDIF209, label %ELSE211
-
-ELSE205: ; preds = %ELSE202
- %599 = fcmp olt float %565, %81
- %600 = sext i1 %599 to i32
- %601 = bitcast i32 %600 to float
- %602 = bitcast float %601 to i32
- %603 = icmp ne i32 %602, 0
- %.226 = select i1 %603, float %583, float 1.000000e+00
- %.227 = select i1 %603, float %118, float %116
- %.228 = select i1 %603, float %119, float %117
- br label %ENDIF200
-
-ELSE211: ; preds = %ENDIF200
- %604 = fcmp olt float %565, %82
- %605 = sext i1 %604 to i32
- %606 = bitcast i32 %605 to float
- %607 = bitcast float %606 to i32
- %608 = icmp ne i32 %607, 0
- br i1 %608, label %ENDIF209, label %ELSE214
-
-ENDIF209: ; preds = %ELSE214, %ELSE211, %ENDIF200
- %temp52.0 = phi float [ %108, %ENDIF200 ], [ %100, %ELSE214 ], [ %104, %ELSE211 ]
- %temp53.0 = phi float [ %109, %ENDIF200 ], [ %101, %ELSE214 ], [ %105, %ELSE211 ]
- %temp54.0 = phi float [ %110, %ENDIF200 ], [ %102, %ELSE214 ], [ %106, %ELSE211 ]
- %temp55.0 = phi float [ %111, %ENDIF200 ], [ %103, %ELSE214 ], [ %107, %ELSE211 ]
- %temp68.0 = phi float [ %112, %ENDIF200 ], [ %.230, %ELSE214 ], [ %108, %ELSE211 ]
- %temp69.0 = phi float [ %113, %ENDIF200 ], [ %.231, %ELSE214 ], [ %109, %ELSE211 ]
- %temp70.0 = phi float [ %114, %ENDIF200 ], [ %.232, %ELSE214 ], [ %110, %ELSE211 ]
- %temp71.0 = phi float [ %115, %ENDIF200 ], [ %.233, %ELSE214 ], [ %111, %ELSE211 ]
- %609 = fmul float %164, %85
- %610 = fmul float %165, %86
- %611 = fadd float %609, %610
- %612 = fmul float %166, %87
- %613 = fadd float %611, %612
- %614 = fmul float %167, %88
- %615 = fadd float %613, %614
- %616 = fmul float %164, %89
- %617 = fmul float %165, %90
- %618 = fadd float %616, %617
- %619 = fmul float %166, %91
- %620 = fadd float %618, %619
- %621 = fmul float %167, %92
- %622 = fadd float %620, %621
- %623 = fmul float %164, %93
- %624 = fmul float %165, %94
- %625 = fadd float %623, %624
- %626 = fmul float %166, %95
- %627 = fadd float %625, %626
- %628 = fmul float %167, %96
- %629 = fadd float %627, %628
- %630 = fsub float -0.000000e+00, %78
- %631 = fadd float 1.000000e+00, %630
- %632 = call float @fabs(float %615)
- %633 = call float @fabs(float %622)
- %634 = fcmp oge float %631, %632
- %635 = sext i1 %634 to i32
- %636 = bitcast i32 %635 to float
- %637 = bitcast float %636 to i32
- %638 = and i32 %637, 1065353216
- %639 = bitcast i32 %638 to float
- %640 = fcmp oge float %631, %633
- %641 = sext i1 %640 to i32
- %642 = bitcast i32 %641 to float
- %643 = bitcast float %642 to i32
- %644 = and i32 %643, 1065353216
- %645 = bitcast i32 %644 to float
- %646 = fmul float %639, %645
- %647 = fmul float %629, %646
- %648 = fmul float %615, %temp68.0
- %649 = fadd float %648, %temp70.0
- %650 = fmul float %622, %temp69.0
- %651 = fadd float %650, %temp71.0
- %652 = fmul float %615, %temp52.0
- %653 = fadd float %652, %temp54.0
- %654 = fmul float %622, %temp53.0
- %655 = fadd float %654, %temp55.0
- %656 = fadd float %temp80.0, -1.000000e+00
- %657 = fmul float %656, %77
- %658 = fadd float %657, 1.000000e+00
- %659 = call float @llvm.AMDIL.clamp.(float %658, float 0.000000e+00, float 1.000000e+00)
- %660 = bitcast float %649 to i32
- %661 = bitcast float %651 to i32
- %662 = bitcast float 0.000000e+00 to i32
- %663 = insertelement <4 x i32> undef, i32 %660, i32 0
- %664 = insertelement <4 x i32> %663, i32 %661, i32 1
- %665 = insertelement <4 x i32> %664, i32 %662, i32 2
- %666 = insertelement <4 x i32> %665, i32 undef, i32 3
- %667 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %666, <32 x i8> %127, <16 x i8> %129, i32 2)
- %668 = extractelement <4 x float> %667, i32 0
- %669 = extractelement <4 x float> %667, i32 1
- %670 = bitcast float %653 to i32
- %671 = bitcast float %655 to i32
- %672 = bitcast float 0.000000e+00 to i32
- %673 = insertelement <4 x i32> undef, i32 %670, i32 0
- %674 = insertelement <4 x i32> %673, i32 %671, i32 1
- %675 = insertelement <4 x i32> %674, i32 %672, i32 2
- %676 = insertelement <4 x i32> %675, i32 undef, i32 3
- %677 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %676, <32 x i8> %127, <16 x i8> %129, i32 2)
- %678 = extractelement <4 x float> %677, i32 0
- %679 = extractelement <4 x float> %677, i32 1
- %680 = fsub float -0.000000e+00, %669
- %681 = fadd float 1.000000e+00, %680
- %682 = fsub float -0.000000e+00, %679
- %683 = fadd float 1.000000e+00, %682
- %684 = fmul float %681, 2.500000e-01
- %685 = fmul float %683, 2.500000e-01
- %686 = fsub float -0.000000e+00, %684
- %687 = fadd float %668, %686
- %688 = fsub float -0.000000e+00, %685
- %689 = fadd float %678, %688
- %690 = fmul float %647, %temp88.0
- %691 = fadd float %690, %temp89.0
- %692 = fmul float %647, %temp90.0
- %693 = fadd float %692, %temp91.0
- %694 = call float @llvm.AMDIL.clamp.(float %691, float 0.000000e+00, float 1.000000e+00)
- %695 = call float @llvm.AMDIL.clamp.(float %693, float 0.000000e+00, float 1.000000e+00)
- %696 = fsub float -0.000000e+00, %694
- %697 = fadd float %668, %696
- %698 = fsub float -0.000000e+00, %695
- %699 = fadd float %678, %698
- %700 = fmul float %668, %668
- %701 = fmul float %678, %678
- %702 = fsub float -0.000000e+00, %700
- %703 = fadd float %687, %702
- %704 = fsub float -0.000000e+00, %701
- %705 = fadd float %689, %704
- %706 = fcmp uge float %703, %75
- %707 = select i1 %706, float %703, float %75
- %708 = fcmp uge float %705, %75
- %709 = select i1 %708, float %705, float %75
- %710 = fmul float %697, %697
- %711 = fadd float %710, %707
- %712 = fmul float %699, %699
- %713 = fadd float %712, %709
- %714 = fdiv float 1.000000e+00, %711
- %715 = fdiv float 1.000000e+00, %713
- %716 = fmul float %707, %714
- %717 = fmul float %709, %715
- %718 = fcmp oge float %697, 0.000000e+00
- %719 = sext i1 %718 to i32
- %720 = bitcast i32 %719 to float
- %721 = bitcast float %720 to i32
- %722 = icmp ne i32 %721, 0
- %.229 = select i1 %722, float 1.000000e+00, float %716
- %723 = fcmp oge float %699, 0.000000e+00
- %724 = sext i1 %723 to i32
- %725 = bitcast i32 %724 to float
- %726 = bitcast float %725 to i32
- %727 = icmp ne i32 %726, 0
- %temp28.0 = select i1 %727, float 1.000000e+00, float %717
- %728 = call float @llvm.AMDGPU.lrp(float %659, float %temp28.0, float %.229)
- %729 = call float @llvm.pow.f32(float %728, float %76)
- %730 = fmul float %729, %79
- %731 = fadd float %730, %80
- %732 = call float @llvm.AMDIL.clamp.(float %731, float 0.000000e+00, float 1.000000e+00)
- %733 = fmul float %732, %732
- %734 = fmul float 2.000000e+00, %732
- %735 = fsub float -0.000000e+00, %734
- %736 = fadd float 3.000000e+00, %735
- %737 = fmul float %733, %736
- %738 = fmul float %548, %737
- %739 = fmul float %549, %737
- %740 = fmul float %550, %737
- %741 = fmul float %738, %515
- %742 = fadd float %741, %533
- %743 = fmul float %739, %515
- %744 = fadd float %743, %534
- %745 = fmul float %740, %515
- %746 = fadd float %745, %535
- %747 = call float @llvm.AMDGPU.lrp(float %230, float %287, float 1.000000e+00)
- %748 = call float @llvm.AMDGPU.lrp(float %37, float %298, float 1.000000e+00)
- %749 = call float @llvm.AMDGPU.lrp(float %37, float %299, float 1.000000e+00)
- %750 = call float @llvm.AMDGPU.lrp(float %37, float %300, float 1.000000e+00)
- %751 = call float @llvm.AMDGPU.lrp(float %38, float %747, float 1.000000e+00)
- %752 = fmul float %748, %751
- %753 = fmul float %749, %751
- %754 = fmul float %750, %751
- %755 = fmul float %742, %752
- %756 = fmul float %744, %753
- %757 = fmul float %746, %754
- %758 = fmul float %temp12.0, %216
- %759 = fmul float %temp13.0, %217
- %760 = fadd float %759, %758
- %761 = fmul float %temp14.0, %218
- %762 = fadd float %760, %761
- %763 = call float @fabs(float %762)
- %764 = fmul float %763, %763
- %765 = fmul float %764, %50
- %766 = fadd float %765, %51
- %767 = call float @llvm.AMDIL.clamp.(float %766, float 0.000000e+00, float 1.000000e+00)
- %768 = fsub float -0.000000e+00, %767
- %769 = fadd float 1.000000e+00, %768
- %770 = fmul float %33, %769
- %771 = fmul float %33, %769
- %772 = fmul float %33, %769
- %773 = fmul float %34, %769
- %774 = call float @llvm.AMDGPU.lrp(float %770, float %31, float %755)
- %775 = call float @llvm.AMDGPU.lrp(float %771, float %31, float %756)
- %776 = call float @llvm.AMDGPU.lrp(float %772, float %31, float %757)
- %777 = call float @llvm.AMDGPU.lrp(float %773, float %32, float %374)
- %778 = fcmp uge float %774, 0x3E6FFFFE60000000
- %779 = select i1 %778, float %774, float 0x3E6FFFFE60000000
- %780 = fcmp uge float %775, 0x3E6FFFFE60000000
- %781 = select i1 %780, float %775, float 0x3E6FFFFE60000000
- %782 = fcmp uge float %776, 0x3E6FFFFE60000000
- %783 = select i1 %782, float %776, float 0x3E6FFFFE60000000
- %784 = fcmp uge float %779, 6.550400e+04
- %785 = select i1 %784, float 6.550400e+04, float %779
- %786 = fcmp uge float %781, 6.550400e+04
- %787 = select i1 %786, float 6.550400e+04, float %781
- %788 = fcmp uge float %783, 6.550400e+04
- %789 = select i1 %788, float 6.550400e+04, float %783
- %790 = fmul float %777, %52
- %791 = fadd float %790, %53
- %792 = call float @llvm.AMDIL.clamp.(float %791, float 0.000000e+00, float 1.000000e+00)
- %793 = call i32 @llvm.SI.packf16(float %785, float %787)
- %794 = bitcast i32 %793 to float
- %795 = call i32 @llvm.SI.packf16(float %789, float %792)
- %796 = bitcast i32 %795 to float
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %794, float %796, float %794, float %796)
- ret void
-
-ELSE214: ; preds = %ELSE211
- %797 = fcmp olt float %565, %81
- %798 = sext i1 %797 to i32
- %799 = bitcast i32 %798 to float
- %800 = bitcast float %799 to i32
- %801 = icmp ne i32 %800, 0
- %.230 = select i1 %801, float %104, float %100
- %.231 = select i1 %801, float %105, float %101
- %.232 = select i1 %801, float %106, float %102
- %.233 = select i1 %801, float %107, float %103
- br label %ENDIF209
-}
-
-; Function Attrs: readnone
-declare float @llvm.AMDIL.clamp.(float, float, float) #2
-
-; Function Attrs: nounwind readnone
-declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
-
-; Function Attrs: readnone
-declare float @llvm.AMDGPU.lrp(float, float, float) #2
-
-; Function Attrs: nounwind readnone
-declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1
-
-; Function Attrs: readnone
-declare float @llvm.AMDGPU.cndlt(float, float, float) #2
-
-; Function Attrs: readnone
-declare float @llvm.AMDIL.exp.(float) #2
-
-attributes #0 = { "ShaderType"="0" }
-attributes #1 = { nounwind readnone }
-attributes #2 = { readnone }
-attributes #3 = { nounwind readonly }
-attributes #4 = { readonly }
Removed: llvm/trunk/test/CodeGen/R600/si-spill-cf.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/si-spill-cf.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/si-spill-cf.ll (original)
+++ llvm/trunk/test/CodeGen/R600/si-spill-cf.ll (removed)
@@ -1,501 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI < %s -verify-machineinstrs | FileCheck -check-prefix=SI %s
-; RUN: llc -march=amdgcn -mcpu=tonga < %s -verify-machineinstrs | FileCheck -check-prefix=SI %s
-
-; If this occurs it is likely due to reordering and the restore was
-; originally supposed to happen before SI_END_CF.
-; SI: s_or_b64 exec, exec, [[SAVED:s\[[0-9]+:[0-9]+\]|[a-z]+]]
-; SI-NOT: v_readlane_b32 [[SAVED]]
-
-define void @main() #0 {
-main_body:
- %0 = call float @llvm.SI.load.const(<16 x i8> undef, i32 16)
- %1 = call float @llvm.SI.load.const(<16 x i8> undef, i32 32)
- %2 = call float @llvm.SI.load.const(<16 x i8> undef, i32 80)
- %3 = call float @llvm.SI.load.const(<16 x i8> undef, i32 84)
- %4 = call float @llvm.SI.load.const(<16 x i8> undef, i32 88)
- %5 = call float @llvm.SI.load.const(<16 x i8> undef, i32 96)
- %6 = call float @llvm.SI.load.const(<16 x i8> undef, i32 100)
- %7 = call float @llvm.SI.load.const(<16 x i8> undef, i32 104)
- %8 = call float @llvm.SI.load.const(<16 x i8> undef, i32 112)
- %9 = call float @llvm.SI.load.const(<16 x i8> undef, i32 116)
- %10 = call float @llvm.SI.load.const(<16 x i8> undef, i32 120)
- %11 = call float @llvm.SI.load.const(<16 x i8> undef, i32 128)
- %12 = call float @llvm.SI.load.const(<16 x i8> undef, i32 132)
- %13 = call float @llvm.SI.load.const(<16 x i8> undef, i32 136)
- %14 = call float @llvm.SI.load.const(<16 x i8> undef, i32 144)
- %15 = call float @llvm.SI.load.const(<16 x i8> undef, i32 148)
- %16 = call float @llvm.SI.load.const(<16 x i8> undef, i32 152)
- %17 = call float @llvm.SI.load.const(<16 x i8> undef, i32 160)
- %18 = call float @llvm.SI.load.const(<16 x i8> undef, i32 164)
- %19 = call float @llvm.SI.load.const(<16 x i8> undef, i32 168)
- %20 = call float @llvm.SI.load.const(<16 x i8> undef, i32 176)
- %21 = call float @llvm.SI.load.const(<16 x i8> undef, i32 180)
- %22 = call float @llvm.SI.load.const(<16 x i8> undef, i32 184)
- %23 = call float @llvm.SI.load.const(<16 x i8> undef, i32 192)
- %24 = call float @llvm.SI.load.const(<16 x i8> undef, i32 196)
- %25 = call float @llvm.SI.load.const(<16 x i8> undef, i32 200)
- %26 = call float @llvm.SI.load.const(<16 x i8> undef, i32 208)
- %27 = call float @llvm.SI.load.const(<16 x i8> undef, i32 212)
- %28 = call float @llvm.SI.load.const(<16 x i8> undef, i32 216)
- %29 = call float @llvm.SI.load.const(<16 x i8> undef, i32 224)
- %30 = call float @llvm.SI.load.const(<16 x i8> undef, i32 228)
- %31 = call float @llvm.SI.load.const(<16 x i8> undef, i32 232)
- %32 = call float @llvm.SI.load.const(<16 x i8> undef, i32 240)
- %33 = call float @llvm.SI.load.const(<16 x i8> undef, i32 244)
- %34 = call float @llvm.SI.load.const(<16 x i8> undef, i32 248)
- %35 = call float @llvm.SI.load.const(<16 x i8> undef, i32 256)
- %36 = call float @llvm.SI.load.const(<16 x i8> undef, i32 260)
- %37 = call float @llvm.SI.load.const(<16 x i8> undef, i32 264)
- %38 = call float @llvm.SI.load.const(<16 x i8> undef, i32 272)
- %39 = call float @llvm.SI.load.const(<16 x i8> undef, i32 276)
- %40 = call float @llvm.SI.load.const(<16 x i8> undef, i32 280)
- %41 = call float @llvm.SI.load.const(<16 x i8> undef, i32 288)
- %42 = call float @llvm.SI.load.const(<16 x i8> undef, i32 292)
- %43 = call float @llvm.SI.load.const(<16 x i8> undef, i32 296)
- %44 = call float @llvm.SI.load.const(<16 x i8> undef, i32 304)
- %45 = call float @llvm.SI.load.const(<16 x i8> undef, i32 308)
- %46 = call float @llvm.SI.load.const(<16 x i8> undef, i32 312)
- %47 = call float @llvm.SI.load.const(<16 x i8> undef, i32 320)
- %48 = call float @llvm.SI.load.const(<16 x i8> undef, i32 324)
- %49 = call float @llvm.SI.load.const(<16 x i8> undef, i32 328)
- %50 = call float @llvm.SI.load.const(<16 x i8> undef, i32 336)
- %51 = call float @llvm.SI.load.const(<16 x i8> undef, i32 340)
- %52 = call float @llvm.SI.load.const(<16 x i8> undef, i32 344)
- %53 = call float @llvm.SI.load.const(<16 x i8> undef, i32 352)
- %54 = call float @llvm.SI.load.const(<16 x i8> undef, i32 356)
- %55 = call float @llvm.SI.load.const(<16 x i8> undef, i32 360)
- %56 = call float @llvm.SI.load.const(<16 x i8> undef, i32 368)
- %57 = call float @llvm.SI.load.const(<16 x i8> undef, i32 372)
- %58 = call float @llvm.SI.load.const(<16 x i8> undef, i32 376)
- %59 = call float @llvm.SI.load.const(<16 x i8> undef, i32 384)
- %60 = call float @llvm.SI.load.const(<16 x i8> undef, i32 388)
- %61 = call float @llvm.SI.load.const(<16 x i8> undef, i32 392)
- %62 = call float @llvm.SI.load.const(<16 x i8> undef, i32 400)
- %63 = call float @llvm.SI.load.const(<16 x i8> undef, i32 404)
- %64 = call float @llvm.SI.load.const(<16 x i8> undef, i32 408)
- %65 = call float @llvm.SI.load.const(<16 x i8> undef, i32 416)
- %66 = call float @llvm.SI.load.const(<16 x i8> undef, i32 420)
- br label %LOOP
-
-LOOP: ; preds = %ENDIF2795, %main_body
- %temp894.0 = phi float [ 0.000000e+00, %main_body ], [ %temp894.1, %ENDIF2795 ]
- %temp18.0 = phi float [ undef, %main_body ], [ %temp18.1, %ENDIF2795 ]
- %67 = icmp sgt i32 undef, 4
- br i1 %67, label %ENDLOOP, label %ENDIF
-
-ENDLOOP: ; preds = %ELSE2566, %LOOP
- %68 = call float @llvm.AMDGPU.lrp(float %0, float undef, float undef)
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float undef, float %68, float undef, float 1.000000e+00)
- ret void
-
-ENDIF: ; preds = %LOOP
- %69 = fsub float %2, undef
- %70 = fsub float %3, undef
- %71 = fsub float %4, undef
- %72 = fmul float %69, 0.000000e+00
- %73 = fmul float %70, undef
- %74 = fmul float %71, undef
- %75 = fsub float %6, undef
- %76 = fsub float %7, undef
- %77 = fmul float %75, undef
- %78 = fmul float %76, 0.000000e+00
- %79 = call float @llvm.minnum.f32(float %74, float %78)
- %80 = call float @llvm.maxnum.f32(float %72, float 0.000000e+00)
- %81 = call float @llvm.maxnum.f32(float %73, float %77)
- %82 = call float @llvm.maxnum.f32(float undef, float %79)
- %83 = call float @llvm.minnum.f32(float %80, float %81)
- %84 = call float @llvm.minnum.f32(float %83, float undef)
- %85 = fsub float %14, undef
- %86 = fsub float %15, undef
- %87 = fsub float %16, undef
- %88 = fmul float %85, undef
- %89 = fmul float %86, undef
- %90 = fmul float %87, undef
- %91 = fsub float %17, undef
- %92 = fsub float %18, undef
- %93 = fsub float %19, undef
- %94 = fmul float %91, 0.000000e+00
- %95 = fmul float %92, undef
- %96 = fmul float %93, undef
- %97 = call float @llvm.minnum.f32(float %89, float %95)
- %98 = call float @llvm.maxnum.f32(float %88, float %94)
- %99 = call float @llvm.maxnum.f32(float %90, float %96)
- %100 = call float @llvm.maxnum.f32(float undef, float %97)
- %101 = call float @llvm.maxnum.f32(float %100, float undef)
- %102 = call float @llvm.minnum.f32(float %98, float undef)
- %103 = call float @llvm.minnum.f32(float %102, float %99)
- %104 = fsub float %30, undef
- %105 = fsub float %31, undef
- %106 = fmul float %104, 0.000000e+00
- %107 = fmul float %105, 0.000000e+00
- %108 = call float @llvm.minnum.f32(float undef, float %106)
- %109 = call float @llvm.maxnum.f32(float undef, float %107)
- %110 = call float @llvm.maxnum.f32(float undef, float %108)
- %111 = call float @llvm.maxnum.f32(float %110, float undef)
- %112 = call float @llvm.minnum.f32(float undef, float %109)
- %113 = fsub float %32, undef
- %114 = fsub float %33, undef
- %115 = fsub float %34, undef
- %116 = fmul float %113, 0.000000e+00
- %117 = fmul float %114, undef
- %118 = fmul float %115, undef
- %119 = fsub float %35, undef
- %120 = fsub float %36, undef
- %121 = fsub float %37, undef
- %122 = fmul float %119, undef
- %123 = fmul float %120, undef
- %124 = fmul float %121, undef
- %125 = call float @llvm.minnum.f32(float %116, float %122)
- %126 = call float @llvm.minnum.f32(float %117, float %123)
- %127 = call float @llvm.minnum.f32(float %118, float %124)
- %128 = call float @llvm.maxnum.f32(float %125, float %126)
- %129 = call float @llvm.maxnum.f32(float %128, float %127)
- %130 = fsub float %38, undef
- %131 = fsub float %39, undef
- %132 = fsub float %40, undef
- %133 = fmul float %130, 0.000000e+00
- %134 = fmul float %131, undef
- %135 = fmul float %132, undef
- %136 = fsub float %41, undef
- %137 = fsub float %42, undef
- %138 = fsub float %43, undef
- %139 = fmul float %136, undef
- %140 = fmul float %137, undef
- %141 = fmul float %138, undef
- %142 = call float @llvm.minnum.f32(float %133, float %139)
- %143 = call float @llvm.minnum.f32(float %134, float %140)
- %144 = call float @llvm.minnum.f32(float %135, float %141)
- %145 = call float @llvm.maxnum.f32(float %142, float %143)
- %146 = call float @llvm.maxnum.f32(float %145, float %144)
- %147 = fsub float %44, undef
- %148 = fsub float %45, undef
- %149 = fsub float %46, undef
- %150 = fmul float %147, 0.000000e+00
- %151 = fmul float %148, 0.000000e+00
- %152 = fmul float %149, undef
- %153 = fsub float %47, undef
- %154 = fsub float %48, undef
- %155 = fsub float %49, undef
- %156 = fmul float %153, undef
- %157 = fmul float %154, 0.000000e+00
- %158 = fmul float %155, undef
- %159 = call float @llvm.minnum.f32(float %150, float %156)
- %160 = call float @llvm.minnum.f32(float %151, float %157)
- %161 = call float @llvm.minnum.f32(float %152, float %158)
- %162 = call float @llvm.maxnum.f32(float %159, float %160)
- %163 = call float @llvm.maxnum.f32(float %162, float %161)
- %164 = fsub float %50, undef
- %165 = fsub float %51, undef
- %166 = fsub float %52, undef
- %167 = fmul float %164, undef
- %168 = fmul float %165, 0.000000e+00
- %169 = fmul float %166, 0.000000e+00
- %170 = fsub float %53, undef
- %171 = fsub float %54, undef
- %172 = fsub float %55, undef
- %173 = fdiv float 1.000000e+00, %temp18.0
- %174 = fmul float %170, undef
- %175 = fmul float %171, undef
- %176 = fmul float %172, %173
- %177 = call float @llvm.minnum.f32(float %167, float %174)
- %178 = call float @llvm.minnum.f32(float %168, float %175)
- %179 = call float @llvm.minnum.f32(float %169, float %176)
- %180 = call float @llvm.maxnum.f32(float %177, float %178)
- %181 = call float @llvm.maxnum.f32(float %180, float %179)
- %182 = fsub float %62, undef
- %183 = fsub float %63, undef
- %184 = fsub float %64, undef
- %185 = fmul float %182, 0.000000e+00
- %186 = fmul float %183, undef
- %187 = fmul float %184, undef
- %188 = fsub float %65, undef
- %189 = fsub float %66, undef
- %190 = fmul float %188, undef
- %191 = fmul float %189, undef
- %192 = call float @llvm.maxnum.f32(float %185, float %190)
- %193 = call float @llvm.maxnum.f32(float %186, float %191)
- %194 = call float @llvm.maxnum.f32(float %187, float undef)
- %195 = call float @llvm.minnum.f32(float %192, float %193)
- %196 = call float @llvm.minnum.f32(float %195, float %194)
- %.temp292.7 = select i1 undef, float %163, float undef
- %temp292.9 = select i1 false, float %181, float %.temp292.7
- %.temp292.9 = select i1 undef, float undef, float %temp292.9
- %197 = fcmp ogt float undef, 0.000000e+00
- %198 = fcmp olt float undef, %196
- %199 = and i1 %197, %198
- %200 = fcmp olt float undef, %.temp292.9
- %201 = and i1 %199, %200
- %temp292.11 = select i1 %201, float undef, float %.temp292.9
- br i1 undef, label %IF2565, label %ELSE2566
-
-IF2565: ; preds = %ENDIF
- br i1 false, label %ENDIF2582, label %ELSE2584
-
-ELSE2566: ; preds = %ENDIF
- %202 = fcmp oeq float %temp292.11, 1.000000e+04
- br i1 %202, label %ENDLOOP, label %ELSE2593
-
-ENDIF2564: ; preds = %ENDIF2594, %ENDIF2588
- %temp894.1 = phi float [ undef, %ENDIF2588 ], [ %temp894.2, %ENDIF2594 ]
- %temp18.1 = phi float [ %219, %ENDIF2588 ], [ undef, %ENDIF2594 ]
- %203 = fsub float %5, undef
- %204 = fmul float %203, undef
- %205 = call float @llvm.maxnum.f32(float undef, float %204)
- %206 = call float @llvm.minnum.f32(float %205, float undef)
- %207 = call float @llvm.minnum.f32(float %206, float undef)
- %208 = fcmp ogt float undef, 0.000000e+00
- %209 = fcmp olt float undef, 1.000000e+00
- %210 = and i1 %208, %209
- %211 = fcmp olt float undef, %207
- %212 = and i1 %210, %211
- br i1 %212, label %ENDIF2795, label %ELSE2797
-
-ELSE2584: ; preds = %IF2565
- br label %ENDIF2582
-
-ENDIF2582: ; preds = %ELSE2584, %IF2565
- %213 = fadd float %1, undef
- %214 = fadd float 0.000000e+00, %213
- %215 = call float @llvm.AMDIL.fraction.(float %214)
- br i1 undef, label %IF2589, label %ELSE2590
-
-IF2589: ; preds = %ENDIF2582
- br label %ENDIF2588
-
-ELSE2590: ; preds = %ENDIF2582
- br label %ENDIF2588
-
-ENDIF2588: ; preds = %ELSE2590, %IF2589
- %216 = fsub float 1.000000e+00, %215
- %217 = call float @llvm.sqrt.f32(float %216)
- %218 = fmul float %217, undef
- %219 = fadd float %218, undef
- br label %ENDIF2564
-
-ELSE2593: ; preds = %ELSE2566
- %220 = fcmp oeq float %temp292.11, %82
- %221 = fcmp olt float %82, %84
- %222 = and i1 %220, %221
- br i1 %222, label %ENDIF2594, label %ELSE2596
-
-ELSE2596: ; preds = %ELSE2593
- %223 = fcmp oeq float %temp292.11, %101
- %224 = fcmp olt float %101, %103
- %225 = and i1 %223, %224
- br i1 %225, label %ENDIF2594, label %ELSE2632
-
-ENDIF2594: ; preds = %ELSE2788, %ELSE2785, %ELSE2782, %ELSE2779, %IF2775, %ELSE2761, %ELSE2758, %IF2757, %ELSE2704, %ELSE2686, %ELSE2671, %ELSE2668, %IF2667, %ELSE2632, %ELSE2596, %ELSE2593
- %temp894.2 = phi float [ 0.000000e+00, %IF2667 ], [ 0.000000e+00, %ELSE2671 ], [ 0.000000e+00, %IF2757 ], [ 0.000000e+00, %ELSE2761 ], [ %temp894.0, %ELSE2758 ], [ 0.000000e+00, %IF2775 ], [ 0.000000e+00, %ELSE2779 ], [ 0.000000e+00, %ELSE2782 ], [ %.2848, %ELSE2788 ], [ 0.000000e+00, %ELSE2785 ], [ 0.000000e+00, %ELSE2593 ], [ 0.000000e+00, %ELSE2632 ], [ 0.000000e+00, %ELSE2704 ], [ 0.000000e+00, %ELSE2686 ], [ 0.000000e+00, %ELSE2668 ], [ 0.000000e+00, %ELSE2596 ]
- %226 = fmul float %temp894.2, undef
- br label %ENDIF2564
-
-ELSE2632: ; preds = %ELSE2596
- br i1 undef, label %ENDIF2594, label %ELSE2650
-
-ELSE2650: ; preds = %ELSE2632
- %227 = fcmp oeq float %temp292.11, %111
- %228 = fcmp olt float %111, %112
- %229 = and i1 %227, %228
- br i1 %229, label %IF2667, label %ELSE2668
-
-IF2667: ; preds = %ELSE2650
- br i1 undef, label %ENDIF2594, label %ELSE2671
-
-ELSE2668: ; preds = %ELSE2650
- %230 = fcmp oeq float %temp292.11, %129
- %231 = fcmp olt float %129, undef
- %232 = and i1 %230, %231
- br i1 %232, label %ENDIF2594, label %ELSE2686
-
-ELSE2671: ; preds = %IF2667
- br label %ENDIF2594
-
-ELSE2686: ; preds = %ELSE2668
- %233 = fcmp oeq float %temp292.11, %146
- %234 = fcmp olt float %146, undef
- %235 = and i1 %233, %234
- br i1 %235, label %ENDIF2594, label %ELSE2704
-
-ELSE2704: ; preds = %ELSE2686
- %236 = fcmp oeq float %temp292.11, %181
- %237 = fcmp olt float %181, undef
- %238 = and i1 %236, %237
- br i1 %238, label %ENDIF2594, label %ELSE2740
-
-ELSE2740: ; preds = %ELSE2704
- br i1 undef, label %IF2757, label %ELSE2758
-
-IF2757: ; preds = %ELSE2740
- br i1 undef, label %ENDIF2594, label %ELSE2761
-
-ELSE2758: ; preds = %ELSE2740
- br i1 undef, label %IF2775, label %ENDIF2594
-
-ELSE2761: ; preds = %IF2757
- br label %ENDIF2594
-
-IF2775: ; preds = %ELSE2758
- %239 = fcmp olt float undef, undef
- br i1 %239, label %ENDIF2594, label %ELSE2779
-
-ELSE2779: ; preds = %IF2775
- br i1 undef, label %ENDIF2594, label %ELSE2782
-
-ELSE2782: ; preds = %ELSE2779
- br i1 undef, label %ENDIF2594, label %ELSE2785
-
-ELSE2785: ; preds = %ELSE2782
- %240 = fcmp olt float undef, 0.000000e+00
- br i1 %240, label %ENDIF2594, label %ELSE2788
-
-ELSE2788: ; preds = %ELSE2785
- %241 = fcmp olt float 0.000000e+00, undef
- %.2848 = select i1 %241, float -1.000000e+00, float 1.000000e+00
- br label %ENDIF2594
-
-ELSE2797: ; preds = %ENDIF2564
- %242 = fsub float %8, undef
- %243 = fsub float %9, undef
- %244 = fsub float %10, undef
- %245 = fmul float %242, undef
- %246 = fmul float %243, undef
- %247 = fmul float %244, undef
- %248 = fsub float %11, undef
- %249 = fsub float %12, undef
- %250 = fsub float %13, undef
- %251 = fmul float %248, undef
- %252 = fmul float %249, undef
- %253 = fmul float %250, undef
- %254 = call float @llvm.minnum.f32(float %245, float %251)
- %255 = call float @llvm.minnum.f32(float %246, float %252)
- %256 = call float @llvm.maxnum.f32(float %247, float %253)
- %257 = call float @llvm.maxnum.f32(float %254, float %255)
- %258 = call float @llvm.maxnum.f32(float %257, float undef)
- %259 = call float @llvm.minnum.f32(float undef, float %256)
- %260 = fcmp ogt float %258, 0.000000e+00
- %261 = fcmp olt float %258, 1.000000e+00
- %262 = and i1 %260, %261
- %263 = fcmp olt float %258, %259
- %264 = and i1 %262, %263
- br i1 %264, label %ENDIF2795, label %ELSE2800
-
-ENDIF2795: ; preds = %ELSE2824, %ELSE2821, %ELSE2818, %ELSE2815, %ELSE2812, %ELSE2809, %ELSE2806, %ELSE2803, %ELSE2800, %ELSE2797, %ENDIF2564
- br label %LOOP
-
-ELSE2800: ; preds = %ELSE2797
- br i1 undef, label %ENDIF2795, label %ELSE2803
-
-ELSE2803: ; preds = %ELSE2800
- %265 = fsub float %20, undef
- %266 = fsub float %21, undef
- %267 = fsub float %22, undef
- %268 = fmul float %265, undef
- %269 = fmul float %266, undef
- %270 = fmul float %267, 0.000000e+00
- %271 = fsub float %23, undef
- %272 = fsub float %24, undef
- %273 = fsub float %25, undef
- %274 = fmul float %271, undef
- %275 = fmul float %272, undef
- %276 = fmul float %273, undef
- %277 = call float @llvm.minnum.f32(float %268, float %274)
- %278 = call float @llvm.maxnum.f32(float %269, float %275)
- %279 = call float @llvm.maxnum.f32(float %270, float %276)
- %280 = call float @llvm.maxnum.f32(float %277, float undef)
- %281 = call float @llvm.maxnum.f32(float %280, float undef)
- %282 = call float @llvm.minnum.f32(float undef, float %278)
- %283 = call float @llvm.minnum.f32(float %282, float %279)
- %284 = fcmp ogt float %281, 0.000000e+00
- %285 = fcmp olt float %281, 1.000000e+00
- %286 = and i1 %284, %285
- %287 = fcmp olt float %281, %283
- %288 = and i1 %286, %287
- br i1 %288, label %ENDIF2795, label %ELSE2806
-
-ELSE2806: ; preds = %ELSE2803
- %289 = fsub float %26, undef
- %290 = fsub float %27, undef
- %291 = fsub float %28, undef
- %292 = fmul float %289, undef
- %293 = fmul float %290, 0.000000e+00
- %294 = fmul float %291, undef
- %295 = fsub float %29, undef
- %296 = fmul float %295, undef
- %297 = call float @llvm.minnum.f32(float %292, float %296)
- %298 = call float @llvm.minnum.f32(float %293, float undef)
- %299 = call float @llvm.maxnum.f32(float %294, float undef)
- %300 = call float @llvm.maxnum.f32(float %297, float %298)
- %301 = call float @llvm.maxnum.f32(float %300, float undef)
- %302 = call float @llvm.minnum.f32(float undef, float %299)
- %303 = fcmp ogt float %301, 0.000000e+00
- %304 = fcmp olt float %301, 1.000000e+00
- %305 = and i1 %303, %304
- %306 = fcmp olt float %301, %302
- %307 = and i1 %305, %306
- br i1 %307, label %ENDIF2795, label %ELSE2809
-
-ELSE2809: ; preds = %ELSE2806
- br i1 undef, label %ENDIF2795, label %ELSE2812
-
-ELSE2812: ; preds = %ELSE2809
- br i1 undef, label %ENDIF2795, label %ELSE2815
-
-ELSE2815: ; preds = %ELSE2812
- br i1 undef, label %ENDIF2795, label %ELSE2818
-
-ELSE2818: ; preds = %ELSE2815
- br i1 undef, label %ENDIF2795, label %ELSE2821
-
-ELSE2821: ; preds = %ELSE2818
- %308 = fsub float %56, undef
- %309 = fsub float %57, undef
- %310 = fsub float %58, undef
- %311 = fmul float %308, undef
- %312 = fmul float %309, 0.000000e+00
- %313 = fmul float %310, undef
- %314 = fsub float %59, undef
- %315 = fsub float %60, undef
- %316 = fsub float %61, undef
- %317 = fmul float %314, undef
- %318 = fmul float %315, undef
- %319 = fmul float %316, undef
- %320 = call float @llvm.maxnum.f32(float %311, float %317)
- %321 = call float @llvm.maxnum.f32(float %312, float %318)
- %322 = call float @llvm.maxnum.f32(float %313, float %319)
- %323 = call float @llvm.minnum.f32(float %320, float %321)
- %324 = call float @llvm.minnum.f32(float %323, float %322)
- %325 = fcmp ogt float undef, 0.000000e+00
- %326 = fcmp olt float undef, 1.000000e+00
- %327 = and i1 %325, %326
- %328 = fcmp olt float undef, %324
- %329 = and i1 %327, %328
- br i1 %329, label %ENDIF2795, label %ELSE2824
-
-ELSE2824: ; preds = %ELSE2821
- %.2849 = select i1 undef, float 0.000000e+00, float 1.000000e+00
- br label %ENDIF2795
-}
-
-; Function Attrs: nounwind readnone
-declare float @llvm.SI.load.const(<16 x i8>, i32) #1
-
-; Function Attrs: readnone
-declare float @llvm.AMDIL.fraction.(float) #2
-
-; Function Attrs: nounwind readnone
-declare float @llvm.sqrt.f32(float) #1
-
-; Function Attrs: nounwind readnone
-declare float @llvm.minnum.f32(float, float) #1
-
-; Function Attrs: nounwind readnone
-declare float @llvm.maxnum.f32(float, float) #1
-
-; Function Attrs: readnone
-declare float @llvm.AMDGPU.lrp(float, float, float) #2
-
-declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
-
-attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
-attributes #1 = { nounwind readnone }
-attributes #2 = { readnone }
Removed: llvm/trunk/test/CodeGen/R600/si-triv-disjoint-mem-access.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/si-triv-disjoint-mem-access.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/si-triv-disjoint-mem-access.ll (original)
+++ llvm/trunk/test/CodeGen/R600/si-triv-disjoint-mem-access.ll (removed)
@@ -1,236 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -enable-misched -enable-aa-sched-mi < %s | FileCheck -check-prefix=FUNC -check-prefix=CI %s
-
-declare void @llvm.SI.tbuffer.store.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
-declare void @llvm.SI.tbuffer.store.v4i32(<16 x i8>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
-declare void @llvm.AMDGPU.barrier.local() #2
-
-
- at stored_lds_ptr = addrspace(3) global i32 addrspace(3)* undef, align 4
- at stored_constant_ptr = addrspace(3) global i32 addrspace(2)* undef, align 8
- at stored_global_ptr = addrspace(3) global i32 addrspace(1)* undef, align 8
-
-; FUNC-LABEL: @reorder_local_load_global_store_local_load
-; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:4
-; CI-NEXT: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:8
-; CI: buffer_store_dword
-define void @reorder_local_load_global_store_local_load(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 {
- %ptr0 = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(3)* @stored_lds_ptr, align 4
-
- %ptr1 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 1
- %ptr2 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 2
-
- %tmp1 = load i32, i32 addrspace(3)* %ptr1, align 4
- store i32 99, i32 addrspace(1)* %gptr, align 4
- %tmp2 = load i32, i32 addrspace(3)* %ptr2, align 4
-
- %add = add nsw i32 %tmp1, %tmp2
-
- store i32 %add, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: @no_reorder_local_load_volatile_global_store_local_load
-; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:4
-; CI: buffer_store_dword
-; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:8
-define void @no_reorder_local_load_volatile_global_store_local_load(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 {
- %ptr0 = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(3)* @stored_lds_ptr, align 4
-
- %ptr1 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 1
- %ptr2 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 2
-
- %tmp1 = load i32, i32 addrspace(3)* %ptr1, align 4
- store volatile i32 99, i32 addrspace(1)* %gptr, align 4
- %tmp2 = load i32, i32 addrspace(3)* %ptr2, align 4
-
- %add = add nsw i32 %tmp1, %tmp2
-
- store i32 %add, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: @no_reorder_barrier_local_load_global_store_local_load
-; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:4
-; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:8
-; CI: buffer_store_dword
-define void @no_reorder_barrier_local_load_global_store_local_load(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 {
- %ptr0 = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(3)* @stored_lds_ptr, align 4
-
- %ptr1 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 1
- %ptr2 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 2
-
- %tmp1 = load i32, i32 addrspace(3)* %ptr1, align 4
- store i32 99, i32 addrspace(1)* %gptr, align 4
- call void @llvm.AMDGPU.barrier.local() #2
- %tmp2 = load i32, i32 addrspace(3)* %ptr2, align 4
-
- %add = add nsw i32 %tmp1, %tmp2
-
- store i32 %add, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; Technically we could reorder these, but just comparing the
-; instruction type of the load is insufficient.
-
-; FUNC-LABEL: @no_reorder_constant_load_global_store_constant_load
-; CI: buffer_load_dword
-; CI: buffer_store_dword
-; CI: buffer_load_dword
-; CI: buffer_store_dword
-define void @no_reorder_constant_load_global_store_constant_load(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 {
- %ptr0 = load i32 addrspace(2)*, i32 addrspace(2)* addrspace(3)* @stored_constant_ptr, align 8
-
- %ptr1 = getelementptr inbounds i32, i32 addrspace(2)* %ptr0, i64 1
- %ptr2 = getelementptr inbounds i32, i32 addrspace(2)* %ptr0, i64 2
-
- %tmp1 = load i32, i32 addrspace(2)* %ptr1, align 4
- store i32 99, i32 addrspace(1)* %gptr, align 4
- %tmp2 = load i32, i32 addrspace(2)* %ptr2, align 4
-
- %add = add nsw i32 %tmp1, %tmp2
-
- store i32 %add, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: @reorder_constant_load_local_store_constant_load
-; CI: buffer_load_dword
-; CI: buffer_load_dword
-; CI: ds_write_b32
-; CI: buffer_store_dword
-define void @reorder_constant_load_local_store_constant_load(i32 addrspace(1)* %out, i32 addrspace(3)* %lptr) #0 {
- %ptr0 = load i32 addrspace(2)*, i32 addrspace(2)* addrspace(3)* @stored_constant_ptr, align 8
-
- %ptr1 = getelementptr inbounds i32, i32 addrspace(2)* %ptr0, i64 1
- %ptr2 = getelementptr inbounds i32, i32 addrspace(2)* %ptr0, i64 2
-
- %tmp1 = load i32, i32 addrspace(2)* %ptr1, align 4
- store i32 99, i32 addrspace(3)* %lptr, align 4
- %tmp2 = load i32, i32 addrspace(2)* %ptr2, align 4
-
- %add = add nsw i32 %tmp1, %tmp2
-
- store i32 %add, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: @reorder_smrd_load_local_store_smrd_load
-; CI: s_load_dword
-; CI: s_load_dword
-; CI: s_load_dword
-; CI: ds_write_b32
-; CI: buffer_store_dword
-define void @reorder_smrd_load_local_store_smrd_load(i32 addrspace(1)* %out, i32 addrspace(3)* noalias %lptr, i32 addrspace(2)* %ptr0) #0 {
- %ptr1 = getelementptr inbounds i32, i32 addrspace(2)* %ptr0, i64 1
- %ptr2 = getelementptr inbounds i32, i32 addrspace(2)* %ptr0, i64 2
-
- %tmp1 = load i32, i32 addrspace(2)* %ptr1, align 4
- store i32 99, i32 addrspace(3)* %lptr, align 4
- %tmp2 = load i32, i32 addrspace(2)* %ptr2, align 4
-
- %add = add nsw i32 %tmp1, %tmp2
-
- store i32 %add, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: @reorder_global_load_local_store_global_load
-; CI: buffer_load_dword
-; CI: buffer_load_dword
-; CI: ds_write_b32
-; CI: buffer_store_dword
-define void @reorder_global_load_local_store_global_load(i32 addrspace(1)* %out, i32 addrspace(3)* %lptr, i32 addrspace(1)* %ptr0) #0 {
- %ptr1 = getelementptr inbounds i32, i32 addrspace(1)* %ptr0, i64 1
- %ptr2 = getelementptr inbounds i32, i32 addrspace(1)* %ptr0, i64 2
-
- %tmp1 = load i32, i32 addrspace(1)* %ptr1, align 4
- store i32 99, i32 addrspace(3)* %lptr, align 4
- %tmp2 = load i32, i32 addrspace(1)* %ptr2, align 4
-
- %add = add nsw i32 %tmp1, %tmp2
-
- store i32 %add, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: @reorder_local_offsets
-; CI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:12
-; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:400
-; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:404
-; CI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:400
-; CI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:404
-; CI: buffer_store_dword
-; CI: s_endpgm
-define void @reorder_local_offsets(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* noalias nocapture readnone %gptr, i32 addrspace(3)* noalias nocapture %ptr0) #0 {
- %ptr1 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 3
- %ptr2 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 100
- %ptr3 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 101
-
- store i32 123, i32 addrspace(3)* %ptr1, align 4
- %tmp1 = load i32, i32 addrspace(3)* %ptr2, align 4
- %tmp2 = load i32, i32 addrspace(3)* %ptr3, align 4
- store i32 123, i32 addrspace(3)* %ptr2, align 4
- %tmp3 = load i32, i32 addrspace(3)* %ptr1, align 4
- store i32 789, i32 addrspace(3)* %ptr3, align 4
-
- %add.0 = add nsw i32 %tmp2, %tmp1
- %add.1 = add nsw i32 %add.0, %tmp3
- store i32 %add.1, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: @reorder_global_offsets
-; CI: buffer_store_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:12
-; CI: buffer_load_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:400
-; CI: buffer_load_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:404
-; CI: buffer_store_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:400
-; CI: buffer_store_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:404
-; CI: buffer_store_dword
-; CI: s_endpgm
-define void @reorder_global_offsets(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* noalias nocapture readnone %gptr, i32 addrspace(1)* noalias nocapture %ptr0) #0 {
- %ptr1 = getelementptr inbounds i32, i32 addrspace(1)* %ptr0, i32 3
- %ptr2 = getelementptr inbounds i32, i32 addrspace(1)* %ptr0, i32 100
- %ptr3 = getelementptr inbounds i32, i32 addrspace(1)* %ptr0, i32 101
-
- store i32 123, i32 addrspace(1)* %ptr1, align 4
- %tmp1 = load i32, i32 addrspace(1)* %ptr2, align 4
- %tmp2 = load i32, i32 addrspace(1)* %ptr3, align 4
- store i32 123, i32 addrspace(1)* %ptr2, align 4
- %tmp3 = load i32, i32 addrspace(1)* %ptr1, align 4
- store i32 789, i32 addrspace(1)* %ptr3, align 4
-
- %add.0 = add nsw i32 %tmp2, %tmp1
- %add.1 = add nsw i32 %add.0, %tmp3
- store i32 %add.1, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; XFUNC-LABEL: @reorder_local_load_tbuffer_store_local_load
-; XCI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}}, 0x4
-; XCI: TBUFFER_STORE_FORMAT
-; XCI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}}, 0x8
-; define void @reorder_local_load_tbuffer_store_local_load(i32 addrspace(1)* %out, i32 %a1, i32 %vaddr) #1 {
-; %ptr0 = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(3)* @stored_lds_ptr, align 4
-
-; %ptr1 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 1
-; %ptr2 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 2
-
-; %tmp1 = load i32, i32 addrspace(3)* %ptr1, align 4
-
-; %vdata = insertelement <4 x i32> undef, i32 %a1, i32 0
-; call void @llvm.SI.tbuffer.store.v4i32(<16 x i8> undef, <4 x i32> %vdata,
-; i32 4, i32 %vaddr, i32 0, i32 32, i32 14, i32 4, i32 1, i32 0, i32 1,
-; i32 1, i32 0)
-
-; %tmp2 = load i32, i32 addrspace(3)* %ptr2, align 4
-
-; %add = add nsw i32 %tmp1, %tmp2
-
-; store i32 %add, i32 addrspace(1)* %out, align 4
-; ret void
-; }
-
-attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" }
-attributes #1 = { "ShaderType"="1" nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" }
-attributes #2 = { nounwind noduplicate }
Removed: llvm/trunk/test/CodeGen/R600/si-vector-hang.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/si-vector-hang.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/si-vector-hang.ll (original)
+++ llvm/trunk/test/CodeGen/R600/si-vector-hang.ll (removed)
@@ -1,105 +0,0 @@
-; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
-; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
-
-; CHECK: {{^}}test_8_min_char:
-; CHECK: buffer_store_byte
-; CHECK: buffer_store_byte
-; CHECK: buffer_store_byte
-; CHECK: buffer_store_byte
-; CHECK: buffer_store_byte
-; CHECK: buffer_store_byte
-; CHECK: buffer_store_byte
-; CHECK: buffer_store_byte
-; ModuleID = 'radeon'
-
-define void @test_8_min_char(i8 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture readonly %in0, i8 addrspace(1)* nocapture readonly %in1) #0 {
-entry:
- %0 = load i8, i8 addrspace(1)* %in0, align 1
- %1 = insertelement <8 x i8> undef, i8 %0, i32 0
- %arrayidx2.i.i = getelementptr inbounds i8, i8 addrspace(1)* %in0, i64 1
- %2 = load i8, i8 addrspace(1)* %arrayidx2.i.i, align 1
- %3 = insertelement <8 x i8> %1, i8 %2, i32 1
- %arrayidx6.i.i = getelementptr inbounds i8, i8 addrspace(1)* %in0, i64 2
- %4 = load i8, i8 addrspace(1)* %arrayidx6.i.i, align 1
- %5 = insertelement <8 x i8> %3, i8 %4, i32 2
- %arrayidx10.i.i = getelementptr inbounds i8, i8 addrspace(1)* %in0, i64 3
- %6 = load i8, i8 addrspace(1)* %arrayidx10.i.i, align 1
- %7 = insertelement <8 x i8> %5, i8 %6, i32 3
- %arrayidx.i.i = getelementptr inbounds i8, i8 addrspace(1)* %in0, i64 4
- %8 = load i8, i8 addrspace(1)* %arrayidx.i.i, align 1
- %9 = insertelement <8 x i8> undef, i8 %8, i32 0
- %arrayidx2.i9.i = getelementptr inbounds i8, i8 addrspace(1)* %in0, i64 5
- %10 = load i8, i8 addrspace(1)* %arrayidx2.i9.i, align 1
- %11 = insertelement <8 x i8> %9, i8 %10, i32 1
- %arrayidx6.i11.i = getelementptr inbounds i8, i8 addrspace(1)* %in0, i64 6
- %12 = load i8, i8 addrspace(1)* %arrayidx6.i11.i, align 1
- %13 = insertelement <8 x i8> %11, i8 %12, i32 2
- %arrayidx10.i13.i = getelementptr inbounds i8, i8 addrspace(1)* %in0, i64 7
- %14 = load i8, i8 addrspace(1)* %arrayidx10.i13.i, align 1
- %15 = insertelement <8 x i8> %13, i8 %14, i32 3
- %vecinit5.i = shufflevector <8 x i8> %7, <8 x i8> %15, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
- %16 = load i8, i8 addrspace(1)* %in1, align 1
- %17 = insertelement <8 x i8> undef, i8 %16, i32 0
- %arrayidx2.i.i4 = getelementptr inbounds i8, i8 addrspace(1)* %in1, i64 1
- %18 = load i8, i8 addrspace(1)* %arrayidx2.i.i4, align 1
- %19 = insertelement <8 x i8> %17, i8 %18, i32 1
- %arrayidx6.i.i5 = getelementptr inbounds i8, i8 addrspace(1)* %in1, i64 2
- %20 = load i8, i8 addrspace(1)* %arrayidx6.i.i5, align 1
- %21 = insertelement <8 x i8> %19, i8 %20, i32 2
- %arrayidx10.i.i6 = getelementptr inbounds i8, i8 addrspace(1)* %in1, i64 3
- %22 = load i8, i8 addrspace(1)* %arrayidx10.i.i6, align 1
- %23 = insertelement <8 x i8> %21, i8 %22, i32 3
- %arrayidx.i.i7 = getelementptr inbounds i8, i8 addrspace(1)* %in1, i64 4
- %24 = load i8, i8 addrspace(1)* %arrayidx.i.i7, align 1
- %25 = insertelement <8 x i8> undef, i8 %24, i32 0
- %arrayidx2.i9.i8 = getelementptr inbounds i8, i8 addrspace(1)* %in1, i64 5
- %26 = load i8, i8 addrspace(1)* %arrayidx2.i9.i8, align 1
- %27 = insertelement <8 x i8> %25, i8 %26, i32 1
- %arrayidx6.i11.i9 = getelementptr inbounds i8, i8 addrspace(1)* %in1, i64 6
- %28 = load i8, i8 addrspace(1)* %arrayidx6.i11.i9, align 1
- %29 = insertelement <8 x i8> %27, i8 %28, i32 2
- %arrayidx10.i13.i10 = getelementptr inbounds i8, i8 addrspace(1)* %in1, i64 7
- %30 = load i8, i8 addrspace(1)* %arrayidx10.i13.i10, align 1
- %31 = insertelement <8 x i8> %29, i8 %30, i32 3
- %vecinit5.i11 = shufflevector <8 x i8> %23, <8 x i8> %31, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
- %cmp.i = icmp slt <8 x i8> %vecinit5.i, %vecinit5.i11
- %cond.i = select <8 x i1> %cmp.i, <8 x i8> %vecinit5.i, <8 x i8> %vecinit5.i11
- %32 = extractelement <8 x i8> %cond.i, i32 0
- store i8 %32, i8 addrspace(1)* %out, align 1
- %33 = extractelement <8 x i8> %cond.i, i32 1
- %arrayidx2.i.i.i = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 1
- store i8 %33, i8 addrspace(1)* %arrayidx2.i.i.i, align 1
- %34 = extractelement <8 x i8> %cond.i, i32 2
- %arrayidx.i.i.i = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 2
- store i8 %34, i8 addrspace(1)* %arrayidx.i.i.i, align 1
- %35 = extractelement <8 x i8> %cond.i, i32 3
- %arrayidx2.i6.i.i = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 3
- store i8 %35, i8 addrspace(1)* %arrayidx2.i6.i.i, align 1
- %arrayidx.i.i3 = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 4
- %36 = extractelement <8 x i8> %cond.i, i32 4
- store i8 %36, i8 addrspace(1)* %arrayidx.i.i3, align 1
- %37 = extractelement <8 x i8> %cond.i, i32 5
- %arrayidx2.i.i6.i = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 5
- store i8 %37, i8 addrspace(1)* %arrayidx2.i.i6.i, align 1
- %38 = extractelement <8 x i8> %cond.i, i32 6
- %arrayidx.i.i7.i = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 6
- store i8 %38, i8 addrspace(1)* %arrayidx.i.i7.i, align 1
- %39 = extractelement <8 x i8> %cond.i, i32 7
- %arrayidx2.i6.i8.i = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 7
- store i8 %39, i8 addrspace(1)* %arrayidx2.i6.i8.i, align 1
- ret void
-}
-
-attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-
-!opencl.kernels = !{!0, !1, !2, !3, !4, !5, !6, !7, !8}
-
-!0 = !{null}
-!1 = !{null}
-!2 = !{null}
-!3 = !{void (i8 addrspace(1)*, i8 addrspace(1)*, i8 addrspace(1)*)* @test_8_min_char}
-!4 = !{null}
-!5 = !{null}
-!6 = !{null}
-!7 = !{null}
-!8 = !{null}
Removed: llvm/trunk/test/CodeGen/R600/sign_extend.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/sign_extend.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/sign_extend.ll (original)
+++ llvm/trunk/test/CodeGen/R600/sign_extend.ll (removed)
@@ -1,63 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
-
-; SI-LABEL: {{^}}s_sext_i1_to_i32:
-; SI: v_cndmask_b32_e64
-; SI: s_endpgm
-define void @s_sext_i1_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
- %cmp = icmp eq i32 %a, %b
- %sext = sext i1 %cmp to i32
- store i32 %sext, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; SI-LABEL: {{^}}test_s_sext_i32_to_i64:
-; SI: s_ashr_i32
-; SI: s_endpg
-define void @test_s_sext_i32_to_i64(i64 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) nounwind {
-entry:
- %mul = mul i32 %a, %b
- %add = add i32 %mul, %c
- %sext = sext i32 %add to i64
- store i64 %sext, i64 addrspace(1)* %out, align 8
- ret void
-}
-
-; SI-LABEL: {{^}}s_sext_i1_to_i64:
-; SI: v_cndmask_b32_e64 v[[LOREG:[0-9]+]], 0, -1, vcc
-; SI: v_mov_b32_e32 v[[HIREG:[0-9]+]], v[[LOREG]]
-; SI: buffer_store_dwordx2 v{{\[}}[[LOREG]]:[[HIREG]]{{\]}}
-; SI: s_endpgm
-define void @s_sext_i1_to_i64(i64 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
- %cmp = icmp eq i32 %a, %b
- %sext = sext i1 %cmp to i64
- store i64 %sext, i64 addrspace(1)* %out, align 8
- ret void
-}
-
-; SI-LABEL: {{^}}s_sext_i32_to_i64:
-; SI: s_ashr_i32
-; SI: s_endpgm
-define void @s_sext_i32_to_i64(i64 addrspace(1)* %out, i32 %a) nounwind {
- %sext = sext i32 %a to i64
- store i64 %sext, i64 addrspace(1)* %out, align 8
- ret void
-}
-
-; SI-LABEL: {{^}}v_sext_i32_to_i64:
-; SI: v_ashr
-; SI: s_endpgm
-define void @v_sext_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %val = load i32, i32 addrspace(1)* %in, align 4
- %sext = sext i32 %val to i64
- store i64 %sext, i64 addrspace(1)* %out, align 8
- ret void
-}
-
-; SI-LABEL: {{^}}s_sext_i16_to_i64:
-; SI: s_endpgm
-define void @s_sext_i16_to_i64(i64 addrspace(1)* %out, i16 %a) nounwind {
- %sext = sext i16 %a to i64
- store i64 %sext, i64 addrspace(1)* %out, align 8
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/simplify-demanded-bits-build-pair.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/simplify-demanded-bits-build-pair.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/simplify-demanded-bits-build-pair.ll (original)
+++ llvm/trunk/test/CodeGen/R600/simplify-demanded-bits-build-pair.ll (removed)
@@ -1,39 +0,0 @@
-; XFAIL: *
-; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=SI -mattr=-promote-alloca < %s | FileCheck -check-prefix=SI %s
-; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tonga -mattr=-promote-alloca < %s | FileCheck -check-prefix=SI %s
-
-; 64-bit select was originally lowered with a build_pair, and this
-; could be simplified to 1 cndmask instead of 2, but that broken when
-; it started being implemented with a v2i32 build_vector and
-; bitcasting.
-define void @trunc_select_i64(i32 addrspace(1)* %out, i64 %a, i64 %b, i32 %c) {
- %cmp = icmp eq i32 %c, 0
- %select = select i1 %cmp, i64 %a, i64 %b
- %trunc = trunc i64 %select to i32
- store i32 %trunc, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FIXME: Fix truncating store for local memory
-; SI-LABEL: {{^}}trunc_load_alloca_i64:
-; SI: v_movrels_b32
-; SI-NOT: v_movrels_b32
-; SI: s_endpgm
-define void @trunc_load_alloca_i64(i64 addrspace(1)* %out, i32 %a, i32 %b) {
- %idx = add i32 %a, %b
- %alloca = alloca i64, i32 4
- %gep0 = getelementptr i64, i64* %alloca, i64 0
- %gep1 = getelementptr i64, i64* %alloca, i64 1
- %gep2 = getelementptr i64, i64* %alloca, i64 2
- %gep3 = getelementptr i64, i64* %alloca, i64 3
- store i64 24, i64* %gep0, align 8
- store i64 9334, i64* %gep1, align 8
- store i64 3935, i64* %gep2, align 8
- store i64 9342, i64* %gep3, align 8
- %gep = getelementptr i64, i64* %alloca, i32 %idx
- %load = load i64, i64* %gep, align 8
- %mask = and i64 %load, 4294967296
- %add = add i64 %mask, -1
- store i64 %add, i64 addrspace(1)* %out, align 4
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/sint_to_fp.f64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/sint_to_fp.f64.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/sint_to_fp.f64.ll (original)
+++ llvm/trunk/test/CodeGen/R600/sint_to_fp.f64.ll (removed)
@@ -1,61 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
-
-declare i32 @llvm.r600.read.tidig.x() nounwind readnone
-
-; SI-LABEL: {{^}}sint_to_fp_i32_to_f64
-; SI: v_cvt_f64_i32_e32
-define void @sint_to_fp_i32_to_f64(double addrspace(1)* %out, i32 %in) {
- %result = sitofp i32 %in to double
- store double %result, double addrspace(1)* %out
- ret void
-}
-
-; FIXME: select on 0, 0
-; SI-LABEL: {{^}}sint_to_fp_i1_f64:
-; SI: v_cmp_eq_i32_e64 [[CMP:s\[[0-9]+:[0-9]\]]],
-; We can't fold the SGPRs into v_cndmask_b32_e64, because it already
-; uses an SGPR for [[CMP]]
-; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, v{{[0-9]+}}, [[CMP]]
-; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 0, [[CMP]]
-; SI: buffer_store_dwordx2
-; SI: s_endpgm
-define void @sint_to_fp_i1_f64(double addrspace(1)* %out, i32 %in) {
- %cmp = icmp eq i32 %in, 0
- %fp = sitofp i1 %cmp to double
- store double %fp, double addrspace(1)* %out, align 4
- ret void
-}
-
-; SI-LABEL: {{^}}sint_to_fp_i1_f64_load:
-; SI: v_cndmask_b32_e64 [[IRESULT:v[0-9]]], 0, -1
-; SI-NEXT: v_cvt_f64_i32_e32 [[RESULT:v\[[0-9]+:[0-9]\]]], [[IRESULT]]
-; SI: buffer_store_dwordx2 [[RESULT]]
-; SI: s_endpgm
-define void @sint_to_fp_i1_f64_load(double addrspace(1)* %out, i1 %in) {
- %fp = sitofp i1 %in to double
- store double %fp, double addrspace(1)* %out, align 8
- ret void
-}
-
-; SI-LABEL: @s_sint_to_fp_i64_to_f64
-define void @s_sint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 %in) {
- %result = sitofp i64 %in to double
- store double %result, double addrspace(1)* %out
- ret void
-}
-
-; SI-LABEL: @v_sint_to_fp_i64_to_f64
-; SI: buffer_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
-; SI: v_cvt_f64_i32_e32 [[HI_CONV:v\[[0-9]+:[0-9]+\]]], v[[HI]]
-; SI: v_ldexp_f64 [[LDEXP:v\[[0-9]+:[0-9]+\]]], [[HI_CONV]], 32
-; SI: v_cvt_f64_u32_e32 [[LO_CONV:v\[[0-9]+:[0-9]+\]]], v[[LO]]
-; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[LDEXP]], [[LO_CONV]]
-; SI: buffer_store_dwordx2 [[RESULT]]
-define void @v_sint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 addrspace(1)* %in) {
- %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
- %val = load i64, i64 addrspace(1)* %gep, align 8
- %result = sitofp i64 %val to double
- store double %result, double addrspace(1)* %out
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/sint_to_fp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/sint_to_fp.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/sint_to_fp.ll (original)
+++ llvm/trunk/test/CodeGen/R600/sint_to_fp.ll (removed)
@@ -1,64 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
-
-
-; FUNC-LABEL: {{^}}s_sint_to_fp_i32_to_f32:
-; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[2].Z
-; SI: v_cvt_f32_i32_e32 {{v[0-9]+}}, {{s[0-9]+$}}
-define void @s_sint_to_fp_i32_to_f32(float addrspace(1)* %out, i32 %in) {
- %result = sitofp i32 %in to float
- store float %result, float addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}sint_to_fp_v2i32:
-; R600-DAG: INT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[2].W
-; R600-DAG: INT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[3].X
-
-; SI: v_cvt_f32_i32_e32
-; SI: v_cvt_f32_i32_e32
-define void @sint_to_fp_v2i32(<2 x float> addrspace(1)* %out, <2 x i32> %in) {
- %result = sitofp <2 x i32> %in to <2 x float>
- store <2 x float> %result, <2 x float> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}sint_to_fp_v4i32:
-; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-
-; SI: v_cvt_f32_i32_e32
-; SI: v_cvt_f32_i32_e32
-; SI: v_cvt_f32_i32_e32
-; SI: v_cvt_f32_i32_e32
-define void @sint_to_fp_v4i32(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
- %value = load <4 x i32>, <4 x i32> addrspace(1) * %in
- %result = sitofp <4 x i32> %value to <4 x float>
- store <4 x float> %result, <4 x float> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}sint_to_fp_i1_f32:
-; SI: v_cmp_eq_i32_e64 [[CMP:s\[[0-9]+:[0-9]\]]],
-; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1.0, [[CMP]]
-; SI: buffer_store_dword [[RESULT]],
-; SI: s_endpgm
-define void @sint_to_fp_i1_f32(float addrspace(1)* %out, i32 %in) {
- %cmp = icmp eq i32 %in, 0
- %fp = uitofp i1 %cmp to float
- store float %fp, float addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}sint_to_fp_i1_f32_load:
-; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1.0
-; SI: buffer_store_dword [[RESULT]],
-; SI: s_endpgm
-define void @sint_to_fp_i1_f32_load(float addrspace(1)* %out, i1 %in) {
- %fp = sitofp i1 %in to float
- store float %fp, float addrspace(1)* %out, align 4
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/smrd.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/smrd.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/smrd.ll (original)
+++ llvm/trunk/test/CodeGen/R600/smrd.ll (removed)
@@ -1,111 +0,0 @@
-; RUN: llc < %s -march=amdgcn -mcpu=SI -show-mc-encoding -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=GCN %s
-; RUN: llc < %s -march=amdgcn -mcpu=tonga -show-mc-encoding -verify-machineinstrs | FileCheck --check-prefix=VI --check-prefix=GCN %s
-
-; SMRD load with an immediate offset.
-; GCN-LABEL: {{^}}smrd0:
-; SI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x1 ; encoding: [0x01
-; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4
-define void @smrd0(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
-entry:
- %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 1
- %1 = load i32, i32 addrspace(2)* %0
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; SMRD load with the largest possible immediate offset.
-; GCN-LABEL: {{^}}smrd1:
-; SI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff
-; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3fc
-define void @smrd1(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
-entry:
- %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 255
- %1 = load i32, i32 addrspace(2)* %0
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; SMRD load with an offset greater than the largest possible immediate.
-; GCN-LABEL: {{^}}smrd2:
-; SI: s_movk_i32 s[[OFFSET:[0-9]]], 0x400
-; SI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]]
-; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x400
-; GCN: s_endpgm
-define void @smrd2(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
-entry:
- %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 256
- %1 = load i32, i32 addrspace(2)* %0
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; SMRD load with a 64-bit offset
-; GCN-LABEL: {{^}}smrd3:
-; FIXME: There are too many copies here because we don't fold immediates
-; through REG_SEQUENCE
-; SI: s_mov_b32 s[[SLO:[0-9]+]], 0 ;
-; SI: s_mov_b32 s[[SHI:[0-9]+]], 4
-; SI: s_mov_b32 s[[SSLO:[0-9]+]], s[[SLO]]
-; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SSLO]]
-; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]]
-; FIXME: We should be able to use s_load_dword here
-; SI: buffer_load_dword v{{[0-9]+}}, v{{\[}}[[VLO]]:[[VHI]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64
-; TODO: Add VI checks
-; GCN: s_endpgm
-define void @smrd3(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
-entry:
- %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 4294967296 ; 2 ^ 32
- %1 = load i32, i32 addrspace(2)* %0
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; SMRD load using the load.const intrinsic with an immediate offset
-; GCN-LABEL: {{^}}smrd_load_const0:
-; SI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4 ; encoding: [0x04
-; VI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x10
-define void @smrd_load_const0(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
-main_body:
- %20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0
- %21 = load <16 x i8>, <16 x i8> addrspace(2)* %20
- %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 16)
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %22, float %22, float %22, float %22)
- ret void
-}
-
-; SMRD load using the load.const intrinsic with the largest possible immediate
-; offset.
-; GCN-LABEL: {{^}}smrd_load_const1:
-; SI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff
-; VI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3fc
-define void @smrd_load_const1(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
-main_body:
- %20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0
- %21 = load <16 x i8>, <16 x i8> addrspace(2)* %20
- %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 1020)
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %22, float %22, float %22, float %22)
- ret void
-}
-; SMRD load using the load.const intrinsic with an offset greater than the
-; largets possible immediate.
-; immediate offset.
-; GCN-LABEL: {{^}}smrd_load_const2:
-; SI: s_movk_i32 s[[OFFSET:[0-9]]], 0x400
-; SI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]]
-; VI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x400
-define void @smrd_load_const2(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
-main_body:
- %20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0
- %21 = load <16 x i8>, <16 x i8> addrspace(2)* %20
- %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 1024)
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %22, float %22, float %22, float %22)
- ret void
-}
-
-; Function Attrs: nounwind readnone
-declare float @llvm.SI.load.const(<16 x i8>, i32) #1
-
-declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
-
-attributes #0 = { "ShaderType"="0" }
-attributes #1 = { nounwind readnone }
Removed: llvm/trunk/test/CodeGen/R600/split-scalar-i64-add.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/split-scalar-i64-add.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/split-scalar-i64-add.ll (original)
+++ llvm/trunk/test/CodeGen/R600/split-scalar-i64-add.ll (removed)
@@ -1,48 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-
-declare i32 @llvm.r600.read.tidig.x() readnone
-
-; This is broken because the low half of the 64-bit add remains on the
-; SALU, but the upper half does not. The addc expects the carry bit
-; set in vcc, which is undefined since the low scalar half add sets
-; scc instead.
-
-; FUNC-LABEL: {{^}}imp_def_vcc_split_i64_add_0:
-; SI: v_add_i32
-; SI: v_addc_u32
-define void @imp_def_vcc_split_i64_add_0(i64 addrspace(1)* %out, i32 %val) {
- %vec.0 = insertelement <2 x i32> undef, i32 %val, i32 0
- %vec.1 = insertelement <2 x i32> %vec.0, i32 999999, i32 1
- %bc = bitcast <2 x i32> %vec.1 to i64
- %add = add i64 %bc, 399
- store i64 %add, i64 addrspace(1)* %out, align 8
- ret void
-}
-
-; FUNC-LABEL: {{^}}imp_def_vcc_split_i64_add_1:
-; SI: v_add_i32
-; SI: v_addc_u32
-define void @imp_def_vcc_split_i64_add_1(i64 addrspace(1)* %out, i32 %val0, i64 %val1) {
- %vec.0 = insertelement <2 x i32> undef, i32 %val0, i32 0
- %vec.1 = insertelement <2 x i32> %vec.0, i32 99999, i32 1
- %bc = bitcast <2 x i32> %vec.1 to i64
- %add = add i64 %bc, %val1
- store i64 %add, i64 addrspace(1)* %out, align 8
- ret void
-}
-
-; Doesn't use constants
-; FUNC-LABEL @imp_def_vcc_split_i64_add_2
-; SI: v_add_i32
-; SI: v_addc_u32
-define void @imp_def_vcc_split_i64_add_2(i64 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %val0, i64 %val1) {
- %tid = call i32 @llvm.r600.read.tidig.x() readnone
- %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
- %load = load i32, i32 addrspace(1)* %gep
- %vec.0 = insertelement <2 x i32> undef, i32 %val0, i32 0
- %vec.1 = insertelement <2 x i32> %vec.0, i32 %load, i32 1
- %bc = bitcast <2 x i32> %vec.1 to i64
- %add = add i64 %bc, %val1
- store i64 %add, i64 addrspace(1)* %out, align 8
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/sra.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/sra.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/sra.ll (original)
+++ llvm/trunk/test/CodeGen/R600/sra.ll (removed)
@@ -1,213 +0,0 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG %s
-;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI %s
-;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=VI %s
-
-;EG-LABEL: {{^}}ashr_v2i32:
-;EG: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-
-;SI-LABEL: {{^}}ashr_v2i32:
-;SI: v_ashr_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI: v_ashr_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-
-;VI-LABEL: {{^}}ashr_v2i32:
-;VI: v_ashrrev_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;VI: v_ashrrev_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-
-define void @ashr_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
- %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
- %a = load <2 x i32>, <2 x i32> addrspace(1) * %in
- %b = load <2 x i32>, <2 x i32> addrspace(1) * %b_ptr
- %result = ashr <2 x i32> %a, %b
- store <2 x i32> %result, <2 x i32> addrspace(1)* %out
- ret void
-}
-
-;EG-LABEL: {{^}}ashr_v4i32:
-;EG: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-
-;SI-LABEL: {{^}}ashr_v4i32:
-;SI: v_ashr_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI: v_ashr_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI: v_ashr_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI: v_ashr_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-
-;VI-LABEL: {{^}}ashr_v4i32:
-;VI: v_ashrrev_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;VI: v_ashrrev_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;VI: v_ashrrev_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;VI: v_ashrrev_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-
-define void @ashr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
- %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
- %a = load <4 x i32>, <4 x i32> addrspace(1) * %in
- %b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr
- %result = ashr <4 x i32> %a, %b
- store <4 x i32> %result, <4 x i32> addrspace(1)* %out
- ret void
-}
-
-;EG-LABEL: {{^}}ashr_i64:
-;EG: ASHR
-
-;SI-LABEL: {{^}}ashr_i64:
-;SI: s_ashr_i64 s[{{[0-9]}}:{{[0-9]}}], s[{{[0-9]}}:{{[0-9]}}], 8
-
-;VI-LABEL: {{^}}ashr_i64:
-;VI: s_ashr_i64 s[{{[0-9]}}:{{[0-9]}}], s[{{[0-9]}}:{{[0-9]}}], 8
-
-define void @ashr_i64(i64 addrspace(1)* %out, i32 %in) {
-entry:
- %0 = sext i32 %in to i64
- %1 = ashr i64 %0, 8
- store i64 %1, i64 addrspace(1)* %out
- ret void
-}
-
-;EG-LABEL: {{^}}ashr_i64_2:
-;EG: SUB_INT {{\*? *}}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]]
-;EG: LSHL {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPHI:T[0-9]+\.[XYZW]]], {{[[COMPSH]]|PV.[XYZW]}}
-;EG: LSHL {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1
-;EG_CHECK-DAG: ADD_INT {{\*? *}}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
-;EG-DAG: LSHR {{\*? *}}[[LOSMTMP:T[0-9]+\.[XYZW]]], [[OPLO:T[0-9]+\.[XYZW]]], [[SHIFT]]
-;EG-DAG: OR_INT {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], {{[[LOSMTMP]]|PV.[XYZW]}}, {{[[OVERF]]|PV.[XYZW]}}
-;EG-DAG: ASHR {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|[[SHIFT]]}}
-;EG-DAG: ASHR {{\*? *}}[[LOBIG:T[0-9]+\.[XYZW]]], [[OPHI]], literal
-;EG-DAG: ASHR {{\*? *}}[[HIBIG:T[0-9]+\.[XYZW]]], [[OPHI]], literal
-;EG-DAG: SETGT_UINT {{\*? *}}[[RESC:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
-;EG-DAG: CNDE_INT {{\*? *}}[[RESLO:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]}}
-;EG-DAG: CNDE_INT {{\*? *}}[[RESHI:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]}}
-
-;SI-LABEL: {{^}}ashr_i64_2:
-;SI: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
-
-;VI-LABEL: {{^}}ashr_i64_2:
-;VI: v_ashrrev_i64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
-
-define void @ashr_i64_2(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
-entry:
- %b_ptr = getelementptr i64, i64 addrspace(1)* %in, i64 1
- %a = load i64, i64 addrspace(1) * %in
- %b = load i64, i64 addrspace(1) * %b_ptr
- %result = ashr i64 %a, %b
- store i64 %result, i64 addrspace(1)* %out
- ret void
-}
-
-;EG-LABEL: {{^}}ashr_v2i64:
-;EG-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]]
-;EG-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]]
-;EG-DAG: LSHL {{\*? *}}[[COMPSHA]]
-;EG-DAG: LSHL {{\*? *}}[[COMPSHB]]
-;EG-DAG: LSHL {{.*}}, 1
-;EG-DAG: LSHL {{.*}}, 1
-;EG-DAG: ASHR {{.*}}, [[SHA]]
-;EG-DAG: ASHR {{.*}}, [[SHB]]
-;EG-DAG: LSHR {{.*}}, [[SHA]]
-;EG-DAG: LSHR {{.*}}, [[SHB]]
-;EG-DAG: OR_INT
-;EG-DAG: OR_INT
-;EG-DAG: ADD_INT {{\*? *}}[[BIGSHA:T[0-9]+\.[XYZW]]]{{.*}}, literal
-;EG-DAG: ADD_INT {{\*? *}}[[BIGSHB:T[0-9]+\.[XYZW]]]{{.*}}, literal
-;EG-DAG: ASHR
-;EG-DAG: ASHR
-;EG-DAG: ASHR {{.*}}, literal
-;EG-DAG: ASHR {{.*}}, literal
-;EG-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHA]], literal
-;EG-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHB]], literal
-;EG-DAG: CNDE_INT
-;EG-DAG: CNDE_INT
-;EG-DAG: CNDE_INT
-;EG-DAG: CNDE_INT
-
-;SI-LABEL: {{^}}ashr_v2i64:
-;SI: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
-;SI: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
-
-;VI-LABEL: {{^}}ashr_v2i64:
-;VI: v_ashrrev_i64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
-;VI: v_ashrrev_i64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
-
-define void @ashr_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
- %b_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %in, i64 1
- %a = load <2 x i64>, <2 x i64> addrspace(1) * %in
- %b = load <2 x i64>, <2 x i64> addrspace(1) * %b_ptr
- %result = ashr <2 x i64> %a, %b
- store <2 x i64> %result, <2 x i64> addrspace(1)* %out
- ret void
-}
-
-;EG-LABEL: {{^}}ashr_v4i64:
-;EG-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]]
-;EG-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]]
-;EG-DAG: SUB_INT {{\*? *}}[[COMPSHC:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHC:T[0-9]+\.[XYZW]]]
-;EG-DAG: SUB_INT {{\*? *}}[[COMPSHD:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHD:T[0-9]+\.[XYZW]]]
-;EG-DAG: LSHL {{\*? *}}[[COMPSHA]]
-;EG-DAG: LSHL {{\*? *}}[[COMPSHB]]
-;EG-DAG: LSHL {{\*? *}}[[COMPSHC]]
-;EG-DAG: LSHL {{\*? *}}[[COMPSHD]]
-;EG-DAG: LSHL {{.*}}, 1
-;EG-DAG: LSHL {{.*}}, 1
-;EG-DAG: LSHL {{.*}}, 1
-;EG-DAG: LSHL {{.*}}, 1
-;EG-DAG: ASHR {{.*}}, [[SHA]]
-;EG-DAG: ASHR {{.*}}, [[SHB]]
-;EG-DAG: ASHR {{.*}}, [[SHC]]
-;EG-DAG: ASHR {{.*}}, [[SHD]]
-;EG-DAG: LSHR {{.*}}, [[SHA]]
-;EG-DAG: LSHR {{.*}}, [[SHB]]
-;EG-DAG: LSHR {{.*}}, [[SHA]]
-;EG-DAG: LSHR {{.*}}, [[SHB]]
-;EG-DAG: OR_INT
-;EG-DAG: OR_INT
-;EG-DAG: OR_INT
-;EG-DAG: OR_INT
-;EG-DAG: ADD_INT {{\*? *}}[[BIGSHA:T[0-9]+\.[XYZW]]]{{.*}}, literal
-;EG-DAG: ADD_INT {{\*? *}}[[BIGSHB:T[0-9]+\.[XYZW]]]{{.*}}, literal
-;EG-DAG: ADD_INT {{\*? *}}[[BIGSHC:T[0-9]+\.[XYZW]]]{{.*}}, literal
-;EG-DAG: ADD_INT {{\*? *}}[[BIGSHD:T[0-9]+\.[XYZW]]]{{.*}}, literal
-;EG-DAG: ASHR
-;EG-DAG: ASHR
-;EG-DAG: ASHR
-;EG-DAG: ASHR
-;EG-DAG: ASHR {{.*}}, literal
-;EG-DAG: ASHR {{.*}}, literal
-;EG-DAG: ASHR {{.*}}, literal
-;EG-DAG: ASHR {{.*}}, literal
-;EG-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHA]], literal
-;EG-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHB]], literal
-;EG-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHC]], literal
-;EG-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHD]], literal
-;EG-DAG: CNDE_INT
-;EG-DAG: CNDE_INT
-;EG-DAG: CNDE_INT
-;EG-DAG: CNDE_INT
-;EG-DAG: CNDE_INT
-;EG-DAG: CNDE_INT
-;EG-DAG: CNDE_INT
-;EG-DAG: CNDE_INT
-
-;SI-LABEL: {{^}}ashr_v4i64:
-;SI: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
-;SI: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
-;SI: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
-;SI: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
-
-;VI-LABEL: {{^}}ashr_v4i64:
-;VI: v_ashrrev_i64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
-;VI: v_ashrrev_i64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
-;VI: v_ashrrev_i64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
-;VI: v_ashrrev_i64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
-
-define void @ashr_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
- %b_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i64 1
- %a = load <4 x i64>, <4 x i64> addrspace(1) * %in
- %b = load <4 x i64>, <4 x i64> addrspace(1) * %b_ptr
- %result = ashr <4 x i64> %a, %b
- store <4 x i64> %result, <4 x i64> addrspace(1)* %out
- ret void
-}
-
Removed: llvm/trunk/test/CodeGen/R600/srem.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/srem.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/srem.ll (original)
+++ llvm/trunk/test/CodeGen/R600/srem.ll (removed)
@@ -1,112 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=redwood < %s
-
-define void @srem_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
- %num = load i32, i32 addrspace(1) * %in
- %den = load i32, i32 addrspace(1) * %den_ptr
- %result = srem i32 %num, %den
- store i32 %result, i32 addrspace(1)* %out
- ret void
-}
-
-define void @srem_i32_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %num = load i32, i32 addrspace(1) * %in
- %result = srem i32 %num, 4
- store i32 %result, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}srem_i32_7:
-; SI: v_mov_b32_e32 [[MAGIC:v[0-9]+]], 0x92492493
-; SI: v_mul_hi_i32 {{v[0-9]+}}, [[MAGIC]],
-; SI: v_mul_lo_i32
-; SI: v_sub_i32
-; SI: s_endpgm
-define void @srem_i32_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %num = load i32, i32 addrspace(1) * %in
- %result = srem i32 %num, 7
- store i32 %result, i32 addrspace(1)* %out
- ret void
-}
-
-define void @srem_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
- %den_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
- %num = load <2 x i32>, <2 x i32> addrspace(1) * %in
- %den = load <2 x i32>, <2 x i32> addrspace(1) * %den_ptr
- %result = srem <2 x i32> %num, %den
- store <2 x i32> %result, <2 x i32> addrspace(1)* %out
- ret void
-}
-
-define void @srem_v2i32_4(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
- %num = load <2 x i32>, <2 x i32> addrspace(1) * %in
- %result = srem <2 x i32> %num, <i32 4, i32 4>
- store <2 x i32> %result, <2 x i32> addrspace(1)* %out
- ret void
-}
-
-define void @srem_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
- %den_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
- %num = load <4 x i32>, <4 x i32> addrspace(1) * %in
- %den = load <4 x i32>, <4 x i32> addrspace(1) * %den_ptr
- %result = srem <4 x i32> %num, %den
- store <4 x i32> %result, <4 x i32> addrspace(1)* %out
- ret void
-}
-
-define void @srem_v4i32_4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
- %num = load <4 x i32>, <4 x i32> addrspace(1) * %in
- %result = srem <4 x i32> %num, <i32 4, i32 4, i32 4, i32 4>
- store <4 x i32> %result, <4 x i32> addrspace(1)* %out
- ret void
-}
-
-define void @srem_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
- %den_ptr = getelementptr i64, i64 addrspace(1)* %in, i64 1
- %num = load i64, i64 addrspace(1) * %in
- %den = load i64, i64 addrspace(1) * %den_ptr
- %result = srem i64 %num, %den
- store i64 %result, i64 addrspace(1)* %out
- ret void
-}
-
-define void @srem_i64_4(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
- %num = load i64, i64 addrspace(1) * %in
- %result = srem i64 %num, 4
- store i64 %result, i64 addrspace(1)* %out
- ret void
-}
-
-define void @srem_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
- %den_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %in, i64 1
- %num = load <2 x i64>, <2 x i64> addrspace(1) * %in
- %den = load <2 x i64>, <2 x i64> addrspace(1) * %den_ptr
- %result = srem <2 x i64> %num, %den
- store <2 x i64> %result, <2 x i64> addrspace(1)* %out
- ret void
-}
-
-define void @srem_v2i64_4(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
- %num = load <2 x i64>, <2 x i64> addrspace(1) * %in
- %result = srem <2 x i64> %num, <i64 4, i64 4>
- store <2 x i64> %result, <2 x i64> addrspace(1)* %out
- ret void
-}
-
-define void @srem_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
- %den_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i64 1
- %num = load <4 x i64>, <4 x i64> addrspace(1) * %in
- %den = load <4 x i64>, <4 x i64> addrspace(1) * %den_ptr
- %result = srem <4 x i64> %num, %den
- store <4 x i64> %result, <4 x i64> addrspace(1)* %out
- ret void
-}
-
-define void @srem_v4i64_4(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
- %num = load <4 x i64>, <4 x i64> addrspace(1) * %in
- %result = srem <4 x i64> %num, <i64 4, i64 4, i64 4, i64 4>
- store <4 x i64> %result, <4 x i64> addrspace(1)* %out
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/srl.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/srl.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/srl.ll (original)
+++ llvm/trunk/test/CodeGen/R600/srl.ll (removed)
@@ -1,186 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-
-; FUNC-LABEL: {{^}}lshr_i32:
-; SI: v_lshrrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-; VI: v_lshrrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-define void @lshr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
- %a = load i32, i32 addrspace(1)* %in
- %b = load i32, i32 addrspace(1)* %b_ptr
- %result = lshr i32 %a, %b
- store i32 %result, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}lshr_v2i32:
-; SI: v_lshr_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-; SI: v_lshr_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-
-; VI: v_lshrrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-; VI: v_lshrrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-
-; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-define void @lshr_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
- %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
- %a = load <2 x i32>, <2 x i32> addrspace(1)* %in
- %b = load <2 x i32>, <2 x i32> addrspace(1)* %b_ptr
- %result = lshr <2 x i32> %a, %b
- store <2 x i32> %result, <2 x i32> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}lshr_v4i32:
-; SI: v_lshr_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-; SI: v_lshr_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-; SI: v_lshr_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-; SI: v_lshr_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-
-; VI: v_lshrrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-; VI: v_lshrrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-; VI: v_lshrrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-; VI: v_lshrrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-
-; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-define void @lshr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
- %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
- %a = load <4 x i32>, <4 x i32> addrspace(1)* %in
- %b = load <4 x i32>, <4 x i32> addrspace(1)* %b_ptr
- %result = lshr <4 x i32> %a, %b
- store <4 x i32> %result, <4 x i32> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}lshr_i64:
-; SI: v_lshr_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
-; VI: v_lshrrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
-
-; EG: SUB_INT {{\*? *}}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]]
-; EG: LSHL {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPHI:T[0-9]+\.[XYZW]]], {{[[COMPSH]]|PV.[XYZW]}}
-; EG: LSHL {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1
-; EG-DAG: ADD_INT {{\*? *}}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
-; EG-DAG: LSHR {{\*? *}}[[LOSMTMP:T[0-9]+\.[XYZW]]], [[OPLO:T[0-9]+\.[XYZW]]], [[SHIFT]]
-; EG-DAG: OR_INT {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], {{[[LOSMTMP]]|PV.[XYZW]}}, {{[[OVERF]]|PV.[XYZW]}}
-; EG-DAG: LSHR {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|[[SHIFT]]}}
-; EG-DAG: LSHR {{\*? *}}[[LOBIG:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|[[SHIFT]]}}
-; EG-DAG: SETGT_UINT {{\*? *}}[[RESC:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
-; EG-DAG: CNDE_INT {{\*? *}}[[RESLO:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]}}
-; EG-DAG: CNDE_INT {{\*? *}}[[RESHI:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW], .*}}, 0.0
-define void @lshr_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
- %b_ptr = getelementptr i64, i64 addrspace(1)* %in, i64 1
- %a = load i64, i64 addrspace(1)* %in
- %b = load i64, i64 addrspace(1)* %b_ptr
- %result = lshr i64 %a, %b
- store i64 %result, i64 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}lshr_v2i64:
-; SI: v_lshr_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
-; SI: v_lshr_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
-
-; VI: v_lshrrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
-; VI: v_lshrrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
-
-; EG-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]]
-; EG-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]]
-; EG-DAG: LSHL {{\*? *}}[[COMPSHA]]
-; EG-DAG: LSHL {{\*? *}}[[COMPSHB]]
-; EG-DAG: LSHL {{.*}}, 1
-; EG-DAG: LSHL {{.*}}, 1
-; EG-DAG: LSHR {{.*}}, [[SHA]]
-; EG-DAG: LSHR {{.*}}, [[SHB]]
-; EG-DAG: LSHR {{.*}}, [[SHA]]
-; EG-DAG: LSHR {{.*}}, [[SHB]]
-; EG-DAG: OR_INT
-; EG-DAG: OR_INT
-; EG-DAG: ADD_INT {{\*? *}}[[BIGSHA:T[0-9]+\.[XYZW]]]{{.*}}, literal
-; EG-DAG: ADD_INT {{\*? *}}[[BIGSHB:T[0-9]+\.[XYZW]]]{{.*}}, literal
-; EG-DAG: LSHR
-; EG-DAG: LSHR
-; EG-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHA]], literal
-; EG-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHB]], literal
-; EG-DAG: CNDE_INT {{.*}}, 0.0
-; EG-DAG: CNDE_INT {{.*}}, 0.0
-; EG-DAG: CNDE_INT
-; EG-DAG: CNDE_INT
-define void @lshr_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
- %b_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %in, i64 1
- %a = load <2 x i64>, <2 x i64> addrspace(1)* %in
- %b = load <2 x i64>, <2 x i64> addrspace(1)* %b_ptr
- %result = lshr <2 x i64> %a, %b
- store <2 x i64> %result, <2 x i64> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}lshr_v4i64:
-; SI: v_lshr_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
-; SI: v_lshr_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
-; SI: v_lshr_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
-; SI: v_lshr_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
-
-; VI: v_lshrrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
-; VI: v_lshrrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
-; VI: v_lshrrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
-; VI: v_lshrrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
-
-; EG-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]]
-; EG-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]]
-; EG-DAG: SUB_INT {{\*? *}}[[COMPSHC:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHC:T[0-9]+\.[XYZW]]]
-; EG-DAG: SUB_INT {{\*? *}}[[COMPSHD:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHD:T[0-9]+\.[XYZW]]]
-; EG-DAG: LSHL {{\*? *}}[[COMPSHA]]
-; EG-DAG: LSHL {{\*? *}}[[COMPSHB]]
-; EG-DAG: LSHL {{\*? *}}[[COMPSHC]]
-; EG-DAG: LSHL {{\*? *}}[[COMPSHD]]
-; EG-DAG: LSHL {{.*}}, 1
-; EG-DAG: LSHL {{.*}}, 1
-; EG-DAG: LSHL {{.*}}, 1
-; EG-DAG: LSHL {{.*}}, 1
-; EG-DAG: LSHR {{.*}}, [[SHA]]
-; EG-DAG: LSHR {{.*}}, [[SHB]]
-; EG-DAG: LSHR {{.*}}, [[SHC]]
-; EG-DAG: LSHR {{.*}}, [[SHD]]
-; EG-DAG: LSHR {{.*}}, [[SHA]]
-; EG-DAG: LSHR {{.*}}, [[SHB]]
-; EG-DAG: LSHR {{.*}}, [[SHC]]
-; EG-DAG: LSHR {{.*}}, [[SHD]]
-; EG-DAG: OR_INT
-; EG-DAG: OR_INT
-; EG-DAG: OR_INT
-; EG-DAG: OR_INT
-; EG-DAG: ADD_INT {{\*? *}}[[BIGSHA:T[0-9]+\.[XYZW]]]{{.*}}, literal
-; EG-DAG: ADD_INT {{\*? *}}[[BIGSHB:T[0-9]+\.[XYZW]]]{{.*}}, literal
-; EG-DAG: ADD_INT {{\*? *}}[[BIGSHC:T[0-9]+\.[XYZW]]]{{.*}}, literal
-; EG-DAG: ADD_INT {{\*? *}}[[BIGSHD:T[0-9]+\.[XYZW]]]{{.*}}, literal
-; EG-DAG: LSHR
-; EG-DAG: LSHR
-; EG-DAG: LSHR
-; EG-DAG: LSHR
-; EG-DAG: LSHR
-; EG-DAG: LSHR
-; EG-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHA]], literal
-; EG-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHB]], literal
-; EG-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHC]], literal
-; EG-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHD]], literal
-; EG-DAG: CNDE_INT {{.*}}, 0.0
-; EG-DAG: CNDE_INT {{.*}}, 0.0
-; EG-DAG: CNDE_INT {{.*}}, 0.0
-; EG-DAG: CNDE_INT {{.*}}, 0.0
-; EG-DAG: CNDE_INT
-; EG-DAG: CNDE_INT
-; EG-DAG: CNDE_INT
-; EG-DAG: CNDE_INT
-define void @lshr_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
- %b_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i64 1
- %a = load <4 x i64>, <4 x i64> addrspace(1)* %in
- %b = load <4 x i64>, <4 x i64> addrspace(1)* %b_ptr
- %result = lshr <4 x i64> %a, %b
- store <4 x i64> %result, <4 x i64> addrspace(1)* %out
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/ssubo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/ssubo.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/ssubo.ll (original)
+++ llvm/trunk/test/CodeGen/R600/ssubo.ll (removed)
@@ -1,65 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs< %s
-
-declare { i32, i1 } @llvm.ssub.with.overflow.i32(i32, i32) nounwind readnone
-declare { i64, i1 } @llvm.ssub.with.overflow.i64(i64, i64) nounwind readnone
-
-; FUNC-LABEL: {{^}}ssubo_i64_zext:
-define void @ssubo_i64_zext(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
- %ssub = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) nounwind
- %val = extractvalue { i64, i1 } %ssub, 0
- %carry = extractvalue { i64, i1 } %ssub, 1
- %ext = zext i1 %carry to i64
- %add2 = add i64 %val, %ext
- store i64 %add2, i64 addrspace(1)* %out, align 8
- ret void
-}
-
-; FUNC-LABEL: {{^}}s_ssubo_i32:
-define void @s_ssubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 %a, i32 %b) nounwind {
- %ssub = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) nounwind
- %val = extractvalue { i32, i1 } %ssub, 0
- %carry = extractvalue { i32, i1 } %ssub, 1
- store i32 %val, i32 addrspace(1)* %out, align 4
- store i1 %carry, i1 addrspace(1)* %carryout
- ret void
-}
-
-; FUNC-LABEL: {{^}}v_ssubo_i32:
-define void @v_ssubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
- %a = load i32, i32 addrspace(1)* %aptr, align 4
- %b = load i32, i32 addrspace(1)* %bptr, align 4
- %ssub = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) nounwind
- %val = extractvalue { i32, i1 } %ssub, 0
- %carry = extractvalue { i32, i1 } %ssub, 1
- store i32 %val, i32 addrspace(1)* %out, align 4
- store i1 %carry, i1 addrspace(1)* %carryout
- ret void
-}
-
-; FUNC-LABEL: {{^}}s_ssubo_i64:
-; SI: s_sub_u32
-; SI: s_subb_u32
-define void @s_ssubo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 %a, i64 %b) nounwind {
- %ssub = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) nounwind
- %val = extractvalue { i64, i1 } %ssub, 0
- %carry = extractvalue { i64, i1 } %ssub, 1
- store i64 %val, i64 addrspace(1)* %out, align 8
- store i1 %carry, i1 addrspace(1)* %carryout
- ret void
-}
-
-; FUNC-LABEL: {{^}}v_ssubo_i64:
-; SI: v_sub_i32_e32
-; SI: v_subb_u32_e32
-define void @v_ssubo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
- %a = load i64, i64 addrspace(1)* %aptr, align 4
- %b = load i64, i64 addrspace(1)* %bptr, align 4
- %ssub = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) nounwind
- %val = extractvalue { i64, i1 } %ssub, 0
- %carry = extractvalue { i64, i1 } %ssub, 1
- store i64 %val, i64 addrspace(1)* %out, align 8
- store i1 %carry, i1 addrspace(1)* %carryout
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/store-barrier.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/store-barrier.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/store-barrier.ll (original)
+++ llvm/trunk/test/CodeGen/R600/store-barrier.ll (removed)
@@ -1,42 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck --check-prefix=CHECK %s
-; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck --check-prefix=CHECK %s
-
-; This test is for a bug in the machine scheduler where stores without
-; an underlying object would be moved across the barrier. In this
-; test, the <2 x i8> store will be split into two i8 stores, so they
-; won't have an underlying object.
-
-; CHECK-LABEL: {{^}}test:
-; CHECK: ds_write_b8
-; CHECK: ds_write_b8
-; CHECK: s_barrier
-; CHECK: s_endpgm
-; Function Attrs: nounwind
-define void @test(<2 x i8> addrspace(3)* nocapture %arg, <2 x i8> addrspace(1)* nocapture readonly %arg1, i32 addrspace(1)* nocapture readonly %arg2, <2 x i8> addrspace(1)* nocapture %arg3, i32 %arg4, i64 %tmp9) {
-bb:
- %tmp10 = getelementptr inbounds i32, i32 addrspace(1)* %arg2, i64 %tmp9
- %tmp13 = load i32, i32 addrspace(1)* %tmp10, align 2
- %tmp14 = getelementptr inbounds <2 x i8>, <2 x i8> addrspace(3)* %arg, i32 %tmp13
- %tmp15 = load <2 x i8>, <2 x i8> addrspace(3)* %tmp14, align 2
- %tmp16 = add i32 %tmp13, 1
- %tmp17 = getelementptr inbounds <2 x i8>, <2 x i8> addrspace(3)* %arg, i32 %tmp16
- store <2 x i8> %tmp15, <2 x i8> addrspace(3)* %tmp17, align 2
- tail call void @llvm.AMDGPU.barrier.local() #2
- %tmp25 = load i32, i32 addrspace(1)* %tmp10, align 4
- %tmp26 = sext i32 %tmp25 to i64
- %tmp27 = sext i32 %arg4 to i64
- %tmp28 = getelementptr inbounds <2 x i8>, <2 x i8> addrspace(3)* %arg, i32 %tmp25, i32 %arg4
- %tmp29 = load i8, i8 addrspace(3)* %tmp28, align 1
- %tmp30 = getelementptr inbounds <2 x i8>, <2 x i8> addrspace(1)* %arg3, i64 %tmp26, i64 %tmp27
- store i8 %tmp29, i8 addrspace(1)* %tmp30, align 1
- %tmp32 = getelementptr inbounds <2 x i8>, <2 x i8> addrspace(3)* %arg, i32 %tmp25, i32 0
- %tmp33 = load i8, i8 addrspace(3)* %tmp32, align 1
- %tmp35 = getelementptr inbounds <2 x i8>, <2 x i8> addrspace(1)* %arg3, i64 %tmp26, i64 0
- store i8 %tmp33, i8 addrspace(1)* %tmp35, align 1
- ret void
-}
-
-; Function Attrs: noduplicate nounwind
-declare void @llvm.AMDGPU.barrier.local() #2
-
-attributes #2 = { noduplicate nounwind }
Removed: llvm/trunk/test/CodeGen/R600/store-v3i32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/store-v3i32.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/store-v3i32.ll (original)
+++ llvm/trunk/test/CodeGen/R600/store-v3i32.ll (removed)
@@ -1,13 +0,0 @@
-; XFAIL: *
-; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI %s
-; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI %s
-
-; 3 vectors have the same size and alignment as 4 vectors, so this
-; should be done in a single store.
-
-; SI-LABEL: {{^}}store_v3i32:
-; SI: buffer_store_dwordx4
-define void @store_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> %a) nounwind {
- store <3 x i32> %a, <3 x i32> addrspace(1)* %out, align 16
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/store-v3i64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/store-v3i64.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/store-v3i64.ll (original)
+++ llvm/trunk/test/CodeGen/R600/store-v3i64.ll (removed)
@@ -1,29 +0,0 @@
-; XFAIL: *
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
-
-; SI-LABEL: {{^}}global_store_v3i64:
-; SI: buffer_store_dwordx4
-; SI: buffer_store_dwordx4
-define void @global_store_v3i64(<3 x i64> addrspace(1)* %out, <3 x i64> %x) {
- store <3 x i64> %x, <3 x i64> addrspace(1)* %out, align 32
- ret void
-}
-
-; SI-LABEL: {{^}}global_store_v3i64_unaligned:
-define void @global_store_v3i64_unaligned(<3 x i64> addrspace(1)* %out, <3 x i64> %x) {
- store <3 x i64> %x, <3 x i64> addrspace(1)* %out, align 1
- ret void
-}
-
-; SI-LABEL: {{^}}local_store_v3i64:
-define void @local_store_v3i64(<3 x i64> addrspace(3)* %out, <3 x i64> %x) {
- store <3 x i64> %x, <3 x i64> addrspace(3)* %out, align 32
- ret void
-}
-
-; SI-LABEL: {{^}}local_store_v3i64_unaligned:
-define void @local_store_v3i64_unaligned(<3 x i64> addrspace(1)* %out, <3 x i64> %x) {
- store <3 x i64> %x, <3 x i64> addrspace(1)* %out, align 1
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/store-vector-ptrs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/store-vector-ptrs.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/store-vector-ptrs.ll (original)
+++ llvm/trunk/test/CodeGen/R600/store-vector-ptrs.ll (removed)
@@ -1,12 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s
-
-; This tests for a bug that caused a crash in
-; AMDGPUDAGToDAGISel::SelectMUBUFScratch() which is used for selecting
-; scratch loads and stores.
-; CHECK-LABEL: {{^}}store_vector_ptrs:
-define void @store_vector_ptrs(<4 x i32*>* %out, <4 x [1024 x i32]*> %array) nounwind {
- %p = getelementptr [1024 x i32], <4 x [1024 x i32]*> %array, <4 x i16> zeroinitializer, <4 x i16> <i16 16, i16 16, i16 16, i16 16>
- store <4 x i32*> %p, <4 x i32*>* %out
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/store.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/store.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/store.ll (original)
+++ llvm/trunk/test/CodeGen/R600/store.ll (removed)
@@ -1,369 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=CM -check-prefix=FUNC %s
-
-;===------------------------------------------------------------------------===;
-; Global Address Space
-;===------------------------------------------------------------------------===;
-; FUNC-LABEL: {{^}}store_i1:
-; EG: MEM_RAT MSKOR
-; SI: buffer_store_byte
-define void @store_i1(i1 addrspace(1)* %out) {
-entry:
- store i1 true, i1 addrspace(1)* %out
- ret void
-}
-
-; i8 store
-; FUNC-LABEL: {{^}}store_i8:
-; EG: MEM_RAT MSKOR T[[RW_GPR:[0-9]]].XW, T{{[0-9]}}.X
-
-; IG 0: Get the byte index and truncate the value
-; EG: AND_INT * T{{[0-9]}}.[[BI_CHAN:[XYZW]]], KC0[2].Y, literal.x
-; EG: LSHL T{{[0-9]}}.[[SHIFT_CHAN:[XYZW]]], PV.[[BI_CHAN]], literal.x
-; EG: AND_INT * T{{[0-9]}}.[[TRUNC_CHAN:[XYZW]]], KC0[2].Z, literal.y
-; EG-NEXT: 3(4.203895e-45), 255(3.573311e-43)
-
-
-; IG 1: Truncate the calculated the shift amount for the mask
-
-; IG 2: Shift the value and the mask
-; EG: LSHL T[[RW_GPR]].X, PS, PV.[[SHIFT_CHAN]]
-; EG: LSHL * T[[RW_GPR]].W, literal.x, PV.[[SHIFT_CHAN]]
-; EG-NEXT: 255
-; IG 3: Initialize the Y and Z channels to zero
-; XXX: An optimal scheduler should merge this into one of the prevous IGs.
-; EG: MOV T[[RW_GPR]].Y, 0.0
-; EG: MOV * T[[RW_GPR]].Z, 0.0
-
-; SI: buffer_store_byte
-
-define void @store_i8(i8 addrspace(1)* %out, i8 %in) {
-entry:
- store i8 %in, i8 addrspace(1)* %out
- ret void
-}
-
-; i16 store
-; FUNC-LABEL: {{^}}store_i16:
-; EG: MEM_RAT MSKOR T[[RW_GPR:[0-9]]].XW, T{{[0-9]}}.X
-
-; IG 0: Get the byte index and truncate the value
-
-
-; EG: AND_INT * T{{[0-9]}}.[[BI_CHAN:[XYZW]]], KC0[2].Y, literal.x
-; EG-NEXT: 3(4.203895e-45),
-
-; EG: LSHL T{{[0-9]}}.[[SHIFT_CHAN:[XYZW]]], PV.[[BI_CHAN]], literal.x
-; EG: AND_INT * T{{[0-9]}}.[[TRUNC_CHAN:[XYZW]]], KC0[2].Z, literal.y
-
-; EG-NEXT: 3(4.203895e-45), 65535(9.183409e-41)
-; IG 1: Truncate the calculated the shift amount for the mask
-
-; IG 2: Shift the value and the mask
-; EG: LSHL T[[RW_GPR]].X, PS, PV.[[SHIFT_CHAN]]
-; EG: LSHL * T[[RW_GPR]].W, literal.x, PV.[[SHIFT_CHAN]]
-; EG-NEXT: 65535
-; IG 3: Initialize the Y and Z channels to zero
-; XXX: An optimal scheduler should merge this into one of the prevous IGs.
-; EG: MOV T[[RW_GPR]].Y, 0.0
-; EG: MOV * T[[RW_GPR]].Z, 0.0
-
-; SI: buffer_store_short
-define void @store_i16(i16 addrspace(1)* %out, i16 %in) {
-entry:
- store i16 %in, i16 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}store_v2i8:
-; EG: MEM_RAT MSKOR
-; EG-NOT: MEM_RAT MSKOR
-
-; SI: buffer_store_byte
-; SI: buffer_store_byte
-define void @store_v2i8(<2 x i8> addrspace(1)* %out, <2 x i32> %in) {
-entry:
- %0 = trunc <2 x i32> %in to <2 x i8>
- store <2 x i8> %0, <2 x i8> addrspace(1)* %out
- ret void
-}
-
-
-; FUNC-LABEL: {{^}}store_v2i16:
-; EG: MEM_RAT_CACHELESS STORE_RAW
-
-; CM: MEM_RAT_CACHELESS STORE_DWORD
-
-; SI: buffer_store_short
-; SI: buffer_store_short
-define void @store_v2i16(<2 x i16> addrspace(1)* %out, <2 x i32> %in) {
-entry:
- %0 = trunc <2 x i32> %in to <2 x i16>
- store <2 x i16> %0, <2 x i16> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}store_v4i8:
-; EG: MEM_RAT_CACHELESS STORE_RAW
-
-; CM: MEM_RAT_CACHELESS STORE_DWORD
-
-; SI: buffer_store_byte
-; SI: buffer_store_byte
-; SI: buffer_store_byte
-; SI: buffer_store_byte
-define void @store_v4i8(<4 x i8> addrspace(1)* %out, <4 x i32> %in) {
-entry:
- %0 = trunc <4 x i32> %in to <4 x i8>
- store <4 x i8> %0, <4 x i8> addrspace(1)* %out
- ret void
-}
-
-; floating-point store
-; FUNC-LABEL: {{^}}store_f32:
-; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+\.X, T[0-9]+\.X}}, 1
-
-; CM: MEM_RAT_CACHELESS STORE_DWORD T{{[0-9]+\.X, T[0-9]+\.X}}
-
-; SI: buffer_store_dword
-
-define void @store_f32(float addrspace(1)* %out, float %in) {
- store float %in, float addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}store_v4i16:
-; EG: MEM_RAT MSKOR
-; EG: MEM_RAT MSKOR
-; EG: MEM_RAT MSKOR
-; EG: MEM_RAT MSKOR
-; EG-NOT: MEM_RAT MSKOR
-
-; SI: buffer_store_short
-; SI: buffer_store_short
-; SI: buffer_store_short
-; SI: buffer_store_short
-; SI-NOT: buffer_store_byte
-define void @store_v4i16(<4 x i16> addrspace(1)* %out, <4 x i32> %in) {
-entry:
- %0 = trunc <4 x i32> %in to <4 x i16>
- store <4 x i16> %0, <4 x i16> addrspace(1)* %out
- ret void
-}
-
-; vec2 floating-point stores
-; FUNC-LABEL: {{^}}store_v2f32:
-; EG: MEM_RAT_CACHELESS STORE_RAW
-
-; CM: MEM_RAT_CACHELESS STORE_DWORD
-
-; SI: buffer_store_dwordx2
-
-define void @store_v2f32(<2 x float> addrspace(1)* %out, float %a, float %b) {
-entry:
- %0 = insertelement <2 x float> <float 0.0, float 0.0>, float %a, i32 0
- %1 = insertelement <2 x float> %0, float %b, i32 1
- store <2 x float> %1, <2 x float> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}store_v4i32:
-; EG: MEM_RAT_CACHELESS STORE_RAW
-; EG-NOT: MEM_RAT_CACHELESS STORE_RAW
-
-; CM: MEM_RAT_CACHELESS STORE_DWORD
-; CM-NOT: MEM_RAT_CACHELESS STORE_DWORD
-
-; SI: buffer_store_dwordx4
-define void @store_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %in) {
-entry:
- store <4 x i32> %in, <4 x i32> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}store_i64_i8:
-; EG: MEM_RAT MSKOR
-; SI: buffer_store_byte
-define void @store_i64_i8(i8 addrspace(1)* %out, i64 %in) {
-entry:
- %0 = trunc i64 %in to i8
- store i8 %0, i8 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}store_i64_i16:
-; EG: MEM_RAT MSKOR
-; SI: buffer_store_short
-define void @store_i64_i16(i16 addrspace(1)* %out, i64 %in) {
-entry:
- %0 = trunc i64 %in to i16
- store i16 %0, i16 addrspace(1)* %out
- ret void
-}
-
-;===------------------------------------------------------------------------===;
-; Local Address Space
-;===------------------------------------------------------------------------===;
-
-; FUNC-LABEL: {{^}}store_local_i1:
-; EG: LDS_BYTE_WRITE
-; SI: ds_write_b8
-define void @store_local_i1(i1 addrspace(3)* %out) {
-entry:
- store i1 true, i1 addrspace(3)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}store_local_i8:
-; EG: LDS_BYTE_WRITE
-
-; SI: ds_write_b8
-define void @store_local_i8(i8 addrspace(3)* %out, i8 %in) {
- store i8 %in, i8 addrspace(3)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}store_local_i16:
-; EG: LDS_SHORT_WRITE
-
-; SI: ds_write_b16
-define void @store_local_i16(i16 addrspace(3)* %out, i16 %in) {
- store i16 %in, i16 addrspace(3)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}store_local_v2i16:
-; EG: LDS_WRITE
-
-; CM: LDS_WRITE
-
-; SI: ds_write_b16
-; SI: ds_write_b16
-define void @store_local_v2i16(<2 x i16> addrspace(3)* %out, <2 x i16> %in) {
-entry:
- store <2 x i16> %in, <2 x i16> addrspace(3)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}store_local_v4i8:
-; EG: LDS_WRITE
-
-; CM: LDS_WRITE
-
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-define void @store_local_v4i8(<4 x i8> addrspace(3)* %out, <4 x i8> %in) {
-entry:
- store <4 x i8> %in, <4 x i8> addrspace(3)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}store_local_v2i32:
-; EG: LDS_WRITE
-; EG: LDS_WRITE
-
-; CM: LDS_WRITE
-; CM: LDS_WRITE
-
-; SI: ds_write_b64
-define void @store_local_v2i32(<2 x i32> addrspace(3)* %out, <2 x i32> %in) {
-entry:
- store <2 x i32> %in, <2 x i32> addrspace(3)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}store_local_v4i32:
-; EG: LDS_WRITE
-; EG: LDS_WRITE
-; EG: LDS_WRITE
-; EG: LDS_WRITE
-
-; CM: LDS_WRITE
-; CM: LDS_WRITE
-; CM: LDS_WRITE
-; CM: LDS_WRITE
-
-; SI: ds_write_b32
-; SI: ds_write_b32
-; SI: ds_write_b32
-; SI: ds_write_b32
-define void @store_local_v4i32(<4 x i32> addrspace(3)* %out, <4 x i32> %in) {
-entry:
- store <4 x i32> %in, <4 x i32> addrspace(3)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}store_local_i64_i8:
-; EG: LDS_BYTE_WRITE
-; SI: ds_write_b8
-define void @store_local_i64_i8(i8 addrspace(3)* %out, i64 %in) {
-entry:
- %0 = trunc i64 %in to i8
- store i8 %0, i8 addrspace(3)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}store_local_i64_i16:
-; EG: LDS_SHORT_WRITE
-; SI: ds_write_b16
-define void @store_local_i64_i16(i16 addrspace(3)* %out, i64 %in) {
-entry:
- %0 = trunc i64 %in to i16
- store i16 %0, i16 addrspace(3)* %out
- ret void
-}
-
-; The stores in this function are combined by the optimizer to create a
-; 64-bit store with 32-bit alignment. This is legal for SI and the legalizer
-; should not try to split the 64-bit store back into 2 32-bit stores.
-;
-; Evergreen / Northern Islands don't support 64-bit stores yet, so there should
-; be two 32-bit stores.
-
-; FUNC-LABEL: {{^}}vecload2:
-; EG: MEM_RAT_CACHELESS STORE_RAW
-
-; CM: MEM_RAT_CACHELESS STORE_DWORD
-
-; SI: buffer_store_dwordx2
-define void @vecload2(i32 addrspace(1)* nocapture %out, i32 addrspace(2)* nocapture %mem) #0 {
-entry:
- %0 = load i32, i32 addrspace(2)* %mem, align 4
- %arrayidx1.i = getelementptr inbounds i32, i32 addrspace(2)* %mem, i64 1
- %1 = load i32, i32 addrspace(2)* %arrayidx1.i, align 4
- store i32 %0, i32 addrspace(1)* %out, align 4
- %arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1
- store i32 %1, i32 addrspace(1)* %arrayidx1, align 4
- ret void
-}
-
-attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
-
-; When i128 was a legal type this program generated cannot select errors:
-
-; FUNC-LABEL: {{^}}"i128-const-store":
-; FIXME: We should be able to to this with one store instruction
-; EG: STORE_RAW
-; EG: STORE_RAW
-; EG: STORE_RAW
-; EG: STORE_RAW
-; CM: STORE_DWORD
-; CM: STORE_DWORD
-; CM: STORE_DWORD
-; CM: STORE_DWORD
-; SI: buffer_store_dwordx4
-define void @i128-const-store(i32 addrspace(1)* %out) {
-entry:
- store i32 1, i32 addrspace(1)* %out, align 4
- %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1
- store i32 1, i32 addrspace(1)* %arrayidx2, align 4
- %arrayidx4 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 2
- store i32 2, i32 addrspace(1)* %arrayidx4, align 4
- %arrayidx6 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 3
- store i32 2, i32 addrspace(1)* %arrayidx6, align 4
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/store.r600.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/store.r600.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/store.r600.ll (original)
+++ llvm/trunk/test/CodeGen/R600/store.r600.ll (removed)
@@ -1,22 +0,0 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG %s
-
-; XXX: Merge this test into store.ll once it is supported on SI
-
-; v4i32 store
-; EG: {{^}}store_v4i32:
-; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+\.XYZW, T[0-9]+\.X}}, 1
-
-define void @store_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
- %1 = load <4 x i32>, <4 x i32> addrspace(1) * %in
- store <4 x i32> %1, <4 x i32> addrspace(1)* %out
- ret void
-}
-
-; v4f32 store
-; EG: {{^}}store_v4f32:
-; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+\.XYZW, T[0-9]+\.X}}, 1
-define void @store_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
- %1 = load <4 x float>, <4 x float> addrspace(1) * %in
- store <4 x float> %1, <4 x float> addrspace(1)* %out
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/structurize.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/structurize.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/structurize.ll (original)
+++ llvm/trunk/test/CodeGen/R600/structurize.ll (removed)
@@ -1,83 +0,0 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood -mattr=disable-irstructurizer | FileCheck %s
-; Test case for a crash in the AMDILCFGStructurizer from a CFG like this:
-;
-; entry
-; / \
-; diamond_head branch_from
-; / \ |
-; diamond_false diamond_true
-; \ /
-; done
-;
-; When the diamond_true branch had more than 100 instructions.
-;
-;
-
-; CHECK-LABEL: {{^}}branch_into_diamond:
-; === entry block:
-; CHECK: ALU_PUSH_BEFORE
-; === Branch instruction (IF):
-; CHECK: JUMP
- ; === branch_from block
- ; CHECK: ALU
- ; === Duplicated diamond_true block (There can be more than one ALU clause):
- ; === XXX: We should be able to optimize this so the basic block is not
- ; === duplicated. See comments in
- ; === AMDGPUCFGStructurizer::improveSimpleJumpintoIf()
- ; CHECK: ALU
-; === Branch instruction (ELSE):
-; CHECK: ELSE
- ; === diamond_head block:
- ; CHECK: ALU_PUSH_BEFORE
- ; === Branch instruction (IF):
- ; CHECK: JUMP
- ; === diamond_true block (There can be more than one ALU clause):
- ; ALU
- ; === Branch instruction (ELSE):
- ; CHECK: ELSE
- ; === diamond_false block plus implicit ENDIF
- ; CHECK: ALU_POP_AFTER
-; === Branch instruction (ENDIF):
-; CHECK: POP
-; === done block:
-; CHECK: ALU
-; CHECK: MEM_RAT_CACHELESS
-; CHECK: CF_END
-
-
-define void @branch_into_diamond(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
-entry:
-%0 = icmp ne i32 %a, 0
- br i1 %0, label %diamond_head, label %branch_from
-
-diamond_head:
- %1 = icmp ne i32 %a, 1
- br i1 %1, label %diamond_true, label %diamond_false
-
-branch_from:
- %2 = add i32 %a, 1
- br label %diamond_true
-
-diamond_false:
- %3 = add i32 %a, 2
- br label %done
-
-diamond_true:
- %4 = phi i32 [%2, %branch_from], [%a, %diamond_head]
- ; This block needs to be > 100 ISA instructions to hit the bug,
- ; so we'll use udiv instructions.
- %div0 = udiv i32 %a, %b
- %div1 = udiv i32 %div0, %4
- %div2 = udiv i32 %div1, 11
- %div3 = udiv i32 %div2, %a
- %div4 = udiv i32 %div3, %b
- %div5 = udiv i32 %div4, %c
- %div6 = udiv i32 %div5, %div0
- %div7 = udiv i32 %div6, %div1
- br label %done
-
-done:
- %5 = phi i32 [%3, %diamond_false], [%div7, %diamond_true]
- store i32 %5, i32 addrspace(1)* %out
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/structurize1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/structurize1.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/structurize1.ll (original)
+++ llvm/trunk/test/CodeGen/R600/structurize1.ll (removed)
@@ -1,62 +0,0 @@
-; RUN: llc < %s -march=r600 -mattr=disable-ifcvt -mcpu=redwood | FileCheck %s
-
-; This tests for abug where the AMDILCFGStructurizer was crashing on loops
-; like this:
-;
-; for (i = 0; i < x; i++) {
-; if (cond0) {
-; if (cond1) {
-;
-; } else {
-;
-; }
-; if (cond2) {
-;
-; }
-; }
-; }
-
-; CHECK-LABEL: {{^}}if_inside_loop:
-; CHECK: LOOP_START_DX10
-; CHECK: END_LOOP
-define void @if_inside_loop(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d) {
-entry:
- br label %for.body
-
-for.body:
- %0 = phi i32 [0, %entry], [%inc, %for.inc]
- %val = phi i32 [0, %entry], [%val.for.inc, %for.inc]
- %inc = add i32 %0, 1
- %1 = icmp ult i32 10, %a
- br i1 %1, label %for.inc, label %if.then
-
-if.then:
- %2 = icmp ne i32 0, %b
- br i1 %2, label %if.then.true, label %if.then.false
-
-if.then.true:
- %3 = add i32 %a, %val
- br label %if
-
-if.then.false:
- %4 = mul i32 %a, %val
- br label %if
-
-if:
- %val.if = phi i32 [%3, %if.then.true], [%4, %if.then.false]
- %5 = icmp ne i32 0, %c
- br i1 %5, label %if.true, label %for.inc
-
-if.true:
- %6 = add i32 %a, %val.if
- br label %for.inc
-
-for.inc:
- %val.for.inc = phi i32 [%val, %for.body], [%val.if, %if], [%6, %if.true]
- %7 = icmp ne i32 0, %d
- br i1 %7, label %for.body, label %exit
-
-exit:
- store i32 %val.for.inc, i32 addrspace(1)* %out
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/sub.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/sub.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/sub.ll (original)
+++ llvm/trunk/test/CodeGen/R600/sub.ll (removed)
@@ -1,130 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-
-
-declare i32 @llvm.r600.read.tidig.x() readnone
-
-; FUNC-LABEL: {{^}}test_sub_i32:
-; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-
-; SI: v_subrev_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-define void @test_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
- %a = load i32, i32 addrspace(1)* %in
- %b = load i32, i32 addrspace(1)* %b_ptr
- %result = sub i32 %a, %b
- store i32 %result, i32 addrspace(1)* %out
- ret void
-}
-
-
-; FUNC-LABEL: {{^}}test_sub_v2i32:
-; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-
-; SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-; SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-
-define void @test_sub_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
- %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
- %a = load <2 x i32>, <2 x i32> addrspace(1) * %in
- %b = load <2 x i32>, <2 x i32> addrspace(1) * %b_ptr
- %result = sub <2 x i32> %a, %b
- store <2 x i32> %result, <2 x i32> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}test_sub_v4i32:
-; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-
-; SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-; SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-; SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-; SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-
-define void @test_sub_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
- %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
- %a = load <4 x i32>, <4 x i32> addrspace(1) * %in
- %b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr
- %result = sub <4 x i32> %a, %b
- store <4 x i32> %result, <4 x i32> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}s_sub_i64:
-; SI: s_sub_u32
-; SI: s_subb_u32
-
-; EG: MEM_RAT_CACHELESS STORE_RAW [[LO:T[0-9]+\.[XYZW]]]
-; EG: MEM_RAT_CACHELESS STORE_RAW [[HI:T[0-9]+\.[XYZW]]]
-; EG-DAG: SUB_INT {{[* ]*}}[[LO]]
-; EG-DAG: SUBB_UINT
-; EG-DAG: SUB_INT
-; EG-DAG: SUB_INT {{[* ]*}}[[HI]]
-; EG-NOT: SUB
-define void @s_sub_i64(i64 addrspace(1)* noalias %out, i64 %a, i64 %b) nounwind {
- %result = sub i64 %a, %b
- store i64 %result, i64 addrspace(1)* %out, align 8
- ret void
-}
-
-; FUNC-LABEL: {{^}}v_sub_i64:
-; SI: v_sub_i32_e32
-; SI: v_subb_u32_e32
-
-; EG: MEM_RAT_CACHELESS STORE_RAW [[LO:T[0-9]+\.[XYZW]]]
-; EG: MEM_RAT_CACHELESS STORE_RAW [[HI:T[0-9]+\.[XYZW]]]
-; EG-DAG: SUB_INT {{[* ]*}}[[LO]]
-; EG-DAG: SUBB_UINT
-; EG-DAG: SUB_INT
-; EG-DAG: SUB_INT {{[* ]*}}[[HI]]
-; EG-NOT: SUB
-define void @v_sub_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %inA, i64 addrspace(1)* noalias %inB) nounwind {
- %tid = call i32 @llvm.r600.read.tidig.x() readnone
- %a_ptr = getelementptr i64, i64 addrspace(1)* %inA, i32 %tid
- %b_ptr = getelementptr i64, i64 addrspace(1)* %inB, i32 %tid
- %a = load i64, i64 addrspace(1)* %a_ptr
- %b = load i64, i64 addrspace(1)* %b_ptr
- %result = sub i64 %a, %b
- store i64 %result, i64 addrspace(1)* %out, align 8
- ret void
-}
-
-; FUNC-LABEL: {{^}}v_test_sub_v2i64:
-; SI: v_sub_i32_e32
-; SI: v_subb_u32_e32
-; SI: v_sub_i32_e32
-; SI: v_subb_u32_e32
-define void @v_test_sub_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* noalias %inA, <2 x i64> addrspace(1)* noalias %inB) {
- %tid = call i32 @llvm.r600.read.tidig.x() readnone
- %a_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %inA, i32 %tid
- %b_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %inB, i32 %tid
- %a = load <2 x i64>, <2 x i64> addrspace(1)* %a_ptr
- %b = load <2 x i64>, <2 x i64> addrspace(1)* %b_ptr
- %result = sub <2 x i64> %a, %b
- store <2 x i64> %result, <2 x i64> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}v_test_sub_v4i64:
-; SI: v_sub_i32_e32
-; SI: v_subb_u32_e32
-; SI: v_sub_i32_e32
-; SI: v_subb_u32_e32
-; SI: v_sub_i32_e32
-; SI: v_subb_u32_e32
-; SI: v_sub_i32_e32
-; SI: v_subb_u32_e32
-define void @v_test_sub_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* noalias %inA, <4 x i64> addrspace(1)* noalias %inB) {
- %tid = call i32 @llvm.r600.read.tidig.x() readnone
- %a_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %inA, i32 %tid
- %b_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %inB, i32 %tid
- %a = load <4 x i64>, <4 x i64> addrspace(1)* %a_ptr
- %b = load <4 x i64>, <4 x i64> addrspace(1)* %b_ptr
- %result = sub <4 x i64> %a, %b
- store <4 x i64> %result, <4 x i64> addrspace(1)* %out
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/subreg-coalescer-crash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/subreg-coalescer-crash.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/subreg-coalescer-crash.ll (original)
+++ llvm/trunk/test/CodeGen/R600/subreg-coalescer-crash.ll (removed)
@@ -1,109 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs -o - %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -o - %s
-
-; SI-LABEL:{{^}}row_filter_C1_D0:
-; SI: s_endpgm
-; Function Attrs: nounwind
-define void @row_filter_C1_D0() {
-entry:
- br i1 undef, label %for.inc.1, label %do.body.preheader
-
-do.body.preheader: ; preds = %entry
- %0 = insertelement <4 x i32> zeroinitializer, i32 undef, i32 1
- br i1 undef, label %do.body56.1, label %do.body90
-
-do.body90: ; preds = %do.body56.2, %do.body56.1, %do.body.preheader
- %1 = phi <4 x i32> [ %6, %do.body56.2 ], [ %5, %do.body56.1 ], [ %0, %do.body.preheader ]
- %2 = insertelement <4 x i32> %1, i32 undef, i32 2
- %3 = insertelement <4 x i32> %2, i32 undef, i32 3
- br i1 undef, label %do.body124.1, label %do.body.1562.preheader
-
-do.body.1562.preheader: ; preds = %do.body124.1, %do.body90
- %storemerge = phi <4 x i32> [ %3, %do.body90 ], [ %7, %do.body124.1 ]
- %4 = insertelement <4 x i32> undef, i32 undef, i32 1
- br label %for.inc.1
-
-do.body56.1: ; preds = %do.body.preheader
- %5 = insertelement <4 x i32> %0, i32 undef, i32 1
- %or.cond472.1 = or i1 undef, undef
- br i1 %or.cond472.1, label %do.body56.2, label %do.body90
-
-do.body56.2: ; preds = %do.body56.1
- %6 = insertelement <4 x i32> %5, i32 undef, i32 1
- br label %do.body90
-
-do.body124.1: ; preds = %do.body90
- %7 = insertelement <4 x i32> %3, i32 undef, i32 3
- br label %do.body.1562.preheader
-
-for.inc.1: ; preds = %do.body.1562.preheader, %entry
- %storemerge591 = phi <4 x i32> [ zeroinitializer, %entry ], [ %storemerge, %do.body.1562.preheader ]
- %add.i495 = add <4 x i32> %storemerge591, undef
- unreachable
-}
-
-; SI-LABEL: {{^}}foo:
-; SI: s_endpgm
-define void @foo() #0 {
-bb:
- br i1 undef, label %bb2, label %bb1
-
-bb1: ; preds = %bb
- br i1 undef, label %bb4, label %bb6
-
-bb2: ; preds = %bb4, %bb
- %tmp = phi float [ %tmp5, %bb4 ], [ 0.000000e+00, %bb ]
- br i1 undef, label %bb9, label %bb13
-
-bb4: ; preds = %bb7, %bb6, %bb1
- %tmp5 = phi float [ undef, %bb1 ], [ undef, %bb6 ], [ %tmp8, %bb7 ]
- br label %bb2
-
-bb6: ; preds = %bb1
- br i1 undef, label %bb7, label %bb4
-
-bb7: ; preds = %bb6
- %tmp8 = fmul float undef, undef
- br label %bb4
-
-bb9: ; preds = %bb2
- %tmp10 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 2)
- %tmp11 = extractelement <4 x float> %tmp10, i32 1
- %tmp12 = extractelement <4 x float> %tmp10, i32 3
- br label %bb14
-
-bb13: ; preds = %bb2
- br i1 undef, label %bb23, label %bb24
-
-bb14: ; preds = %bb27, %bb24, %bb9
- %tmp15 = phi float [ %tmp12, %bb9 ], [ undef, %bb27 ], [ 0.000000e+00, %bb24 ]
- %tmp16 = phi float [ %tmp11, %bb9 ], [ undef, %bb27 ], [ %tmp25, %bb24 ]
- %tmp17 = fmul float 10.5, %tmp16
- %tmp18 = fmul float 11.5, %tmp15
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %tmp18, float %tmp17, float %tmp17, float %tmp17)
- ret void
-
-bb23: ; preds = %bb13
- br i1 undef, label %bb24, label %bb26
-
-bb24: ; preds = %bb26, %bb23, %bb13
- %tmp25 = phi float [ %tmp, %bb13 ], [ %tmp, %bb26 ], [ 0.000000e+00, %bb23 ]
- br i1 undef, label %bb27, label %bb14
-
-bb26: ; preds = %bb23
- br label %bb24
-
-bb27: ; preds = %bb24
- br label %bb14
-}
-
-; Function Attrs: nounwind readnone
-declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
-
-; Function Attrs: nounwind readnone
-declare i32 @llvm.SI.packf16(float, float) #1
-
-declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
-
-attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" }
-attributes #1 = { nounwind readnone }
Removed: llvm/trunk/test/CodeGen/R600/subreg-eliminate-dead.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/subreg-eliminate-dead.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/subreg-eliminate-dead.ll (original)
+++ llvm/trunk/test/CodeGen/R600/subreg-eliminate-dead.ll (removed)
@@ -1,19 +0,0 @@
-; RUN: llc -mtriple=amdgcn-- -verify-machineinstrs -o - %s | FileCheck %s
-; LiveRangeEdit::eliminateDeadDef did not update LiveInterval sub ranges
-; properly.
-
-; Just make sure this test doesn't crash.
-; CHECK-LABEL: foobar:
-; CHECK: s_endpgm
-define void @foobar() {
- %v0 = icmp eq <4 x i32> undef, <i32 0, i32 1, i32 2, i32 3>
- %v3 = sext <4 x i1> %v0 to <4 x i32>
- %v4 = extractelement <4 x i32> %v3, i32 1
- %v5 = icmp ne i32 %v4, 0
- %v6 = select i1 %v5, i32 undef, i32 0
- %v15 = insertelement <2 x i32> undef, i32 %v6, i32 1
- store <2 x i32> %v15, <2 x i32> addrspace(1)* undef, align 8
- ret void
-}
-
-declare double @llvm.fma.f64(double, double, double)
Removed: llvm/trunk/test/CodeGen/R600/swizzle-export.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/swizzle-export.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/swizzle-export.ll (original)
+++ llvm/trunk/test/CodeGen/R600/swizzle-export.ll (removed)
@@ -1,129 +0,0 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG %s
-
-;EG: {{^}}main:
-;EG: EXPORT T{{[0-9]+}}.XYXX
-;EG: EXPORT T{{[0-9]+}}.ZXXX
-;EG: EXPORT T{{[0-9]+}}.XXWX
-;EG: EXPORT T{{[0-9]+}}.XXXW
-
-define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 {
-main_body:
- %0 = extractelement <4 x float> %reg1, i32 0
- %1 = extractelement <4 x float> %reg1, i32 1
- %2 = extractelement <4 x float> %reg1, i32 2
- %3 = extractelement <4 x float> %reg1, i32 3
- %4 = load <4 x float>, <4 x float> addrspace(8)* null
- %5 = extractelement <4 x float> %4, i32 1
- %6 = load <4 x float>, <4 x float> addrspace(8)* null
- %7 = extractelement <4 x float> %6, i32 2
- %8 = load <4 x float>, <4 x float> addrspace(8)* null
- %9 = extractelement <4 x float> %8, i32 0
- %10 = fmul float 0.000000e+00, %9
- %11 = load <4 x float>, <4 x float> addrspace(8)* null
- %12 = extractelement <4 x float> %11, i32 0
- %13 = fmul float %5, %12
- %14 = load <4 x float>, <4 x float> addrspace(8)* null
- %15 = extractelement <4 x float> %14, i32 0
- %16 = fmul float 0.000000e+00, %15
- %17 = load <4 x float>, <4 x float> addrspace(8)* null
- %18 = extractelement <4 x float> %17, i32 0
- %19 = fmul float 0.000000e+00, %18
- %20 = load <4 x float>, <4 x float> addrspace(8)* null
- %21 = extractelement <4 x float> %20, i32 0
- %22 = fmul float %7, %21
- %23 = load <4 x float>, <4 x float> addrspace(8)* null
- %24 = extractelement <4 x float> %23, i32 0
- %25 = fmul float 0.000000e+00, %24
- %26 = load <4 x float>, <4 x float> addrspace(8)* null
- %27 = extractelement <4 x float> %26, i32 0
- %28 = fmul float 0.000000e+00, %27
- %29 = load <4 x float>, <4 x float> addrspace(8)* null
- %30 = extractelement <4 x float> %29, i32 0
- %31 = fmul float 0.000000e+00, %30
- %32 = load <4 x float>, <4 x float> addrspace(8)* null
- %33 = extractelement <4 x float> %32, i32 0
- %34 = fmul float 0.000000e+00, %33
- %35 = load <4 x float>, <4 x float> addrspace(8)* null
- %36 = extractelement <4 x float> %35, i32 0
- %37 = fmul float 0.000000e+00, %36
- %38 = load <4 x float>, <4 x float> addrspace(8)* null
- %39 = extractelement <4 x float> %38, i32 0
- %40 = fmul float 1.000000e+00, %39
- %41 = load <4 x float>, <4 x float> addrspace(8)* null
- %42 = extractelement <4 x float> %41, i32 0
- %43 = fmul float 0.000000e+00, %42
- %44 = load <4 x float>, <4 x float> addrspace(8)* null
- %45 = extractelement <4 x float> %44, i32 0
- %46 = fmul float 0.000000e+00, %45
- %47 = load <4 x float>, <4 x float> addrspace(8)* null
- %48 = extractelement <4 x float> %47, i32 0
- %49 = fmul float 0.000000e+00, %48
- %50 = load <4 x float>, <4 x float> addrspace(8)* null
- %51 = extractelement <4 x float> %50, i32 0
- %52 = fmul float 0.000000e+00, %51
- %53 = load <4 x float>, <4 x float> addrspace(8)* null
- %54 = extractelement <4 x float> %53, i32 0
- %55 = fmul float 1.000000e+00, %54
- %56 = insertelement <4 x float> undef, float %0, i32 0
- %57 = insertelement <4 x float> %56, float %1, i32 1
- %58 = insertelement <4 x float> %57, float %2, i32 2
- %59 = insertelement <4 x float> %58, float %3, i32 3
- call void @llvm.R600.store.swizzle(<4 x float> %59, i32 60, i32 1)
- %60 = insertelement <4 x float> undef, float %10, i32 0
- %61 = insertelement <4 x float> %60, float %13, i32 1
- %62 = insertelement <4 x float> %61, float %16, i32 2
- %63 = insertelement <4 x float> %62, float %19, i32 3
- call void @llvm.R600.store.swizzle(<4 x float> %63, i32 0, i32 2)
- %64 = insertelement <4 x float> undef, float %22, i32 0
- %65 = insertelement <4 x float> %64, float %25, i32 1
- %66 = insertelement <4 x float> %65, float %28, i32 2
- %67 = insertelement <4 x float> %66, float %31, i32 3
- call void @llvm.R600.store.swizzle(<4 x float> %67, i32 1, i32 2)
- %68 = insertelement <4 x float> undef, float %34, i32 0
- %69 = insertelement <4 x float> %68, float %37, i32 1
- %70 = insertelement <4 x float> %69, float %40, i32 2
- %71 = insertelement <4 x float> %70, float %43, i32 3
- call void @llvm.R600.store.swizzle(<4 x float> %71, i32 2, i32 2)
- %72 = insertelement <4 x float> undef, float %46, i32 0
- %73 = insertelement <4 x float> %72, float %49, i32 1
- %74 = insertelement <4 x float> %73, float %52, i32 2
- %75 = insertelement <4 x float> %74, float %55, i32 3
- call void @llvm.R600.store.swizzle(<4 x float> %75, i32 3, i32 2)
- ret void
-}
-
-; EG: {{^}}main2:
-; EG: T{{[0-9]+}}.XY__
-; EG: T{{[0-9]+}}.ZXY0
-
-define void @main2(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 {
-main_body:
- %0 = extractelement <4 x float> %reg1, i32 0
- %1 = extractelement <4 x float> %reg1, i32 1
- %2 = fadd float %0, 2.5
- %3 = fmul float %1, 3.5
- %4 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
- %5 = extractelement <4 x float> %4, i32 0
- %6 = call float @llvm.cos.f32(float %5)
- %7 = load <4 x float>, <4 x float> addrspace(8)* null
- %8 = extractelement <4 x float> %7, i32 0
- %9 = load <4 x float>, <4 x float> addrspace(8)* null
- %10 = extractelement <4 x float> %9, i32 1
- %11 = insertelement <4 x float> undef, float %2, i32 0
- %12 = insertelement <4 x float> %11, float %3, i32 1
- call void @llvm.R600.store.swizzle(<4 x float> %12, i32 60, i32 1)
- %13 = insertelement <4 x float> undef, float %6, i32 0
- %14 = insertelement <4 x float> %13, float %8, i32 1
- %15 = insertelement <4 x float> %14, float %10, i32 2
- %16 = insertelement <4 x float> %15, float 0.000000e+00, i32 3
- call void @llvm.R600.store.swizzle(<4 x float> %16, i32 0, i32 2)
- ret void
-}
-
-; Function Attrs: nounwind readonly
-declare float @llvm.cos.f32(float) #1
-
-declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
-
-attributes #0 = { "ShaderType"="1" }
-attributes #1 = { nounwind readonly }
Removed: llvm/trunk/test/CodeGen/R600/tex-clause-antidep.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/tex-clause-antidep.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/tex-clause-antidep.ll (original)
+++ llvm/trunk/test/CodeGen/R600/tex-clause-antidep.ll (removed)
@@ -1,25 +0,0 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-
-;CHECK: TEX
-;CHECK-NEXT: ALU
-
-define void @test(<4 x float> inreg %reg0) #0 {
- %1 = extractelement <4 x float> %reg0, i32 0
- %2 = extractelement <4 x float> %reg0, i32 1
- %3 = extractelement <4 x float> %reg0, i32 2
- %4 = extractelement <4 x float> %reg0, i32 3
- %5 = insertelement <4 x float> undef, float %1, i32 0
- %6 = insertelement <4 x float> %5, float %2, i32 1
- %7 = insertelement <4 x float> %6, float %3, i32 2
- %8 = insertelement <4 x float> %7, float %4, i32 3
- %9 = call <4 x float> @llvm.R600.tex(<4 x float> %8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
- %10 = call <4 x float> @llvm.R600.tex(<4 x float> %8, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
- %11 = fadd <4 x float> %9, %10
- call void @llvm.R600.store.swizzle(<4 x float> %11, i32 0, i32 0)
- ret void
-}
-
-declare <4 x float> @llvm.R600.tex(<4 x float>, i32, i32, i32, i32, i32, i32, i32, i32, i32) readnone
-declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
-
-attributes #0 = { "ShaderType"="1" }
\ No newline at end of file
Removed: llvm/trunk/test/CodeGen/R600/texture-input-merge.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/texture-input-merge.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/texture-input-merge.ll (original)
+++ llvm/trunk/test/CodeGen/R600/texture-input-merge.ll (removed)
@@ -1,31 +0,0 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-
-;CHECK-NOT: MOV
-
-define void @test(<4 x float> inreg %reg0) #0 {
- %1 = extractelement <4 x float> %reg0, i32 0
- %2 = extractelement <4 x float> %reg0, i32 1
- %3 = extractelement <4 x float> %reg0, i32 2
- %4 = extractelement <4 x float> %reg0, i32 3
- %5 = fmul float %1, 3.0
- %6 = fmul float %2, 3.0
- %7 = fmul float %3, 3.0
- %8 = fmul float %4, 3.0
- %9 = insertelement <4 x float> undef, float %5, i32 0
- %10 = insertelement <4 x float> %9, float %6, i32 1
- %11 = insertelement <4 x float> undef, float %7, i32 0
- %12 = insertelement <4 x float> %11, float %5, i32 1
- %13 = insertelement <4 x float> undef, float %8, i32 0
- %14 = call <4 x float> @llvm.R600.tex(<4 x float> %10, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
- %15 = call <4 x float> @llvm.R600.tex(<4 x float> %12, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
- %16 = call <4 x float> @llvm.R600.tex(<4 x float> %13, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
- %17 = fadd <4 x float> %14, %15
- %18 = fadd <4 x float> %17, %16
- call void @llvm.R600.store.swizzle(<4 x float> %18, i32 0, i32 0)
- ret void
-}
-
-declare <4 x float> @llvm.R600.tex(<4 x float>, i32, i32, i32, i32, i32, i32, i32, i32, i32) readnone
-declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
-
-attributes #0 = { "ShaderType"="1" }
\ No newline at end of file
Removed: llvm/trunk/test/CodeGen/R600/trunc-cmp-constant.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/trunc-cmp-constant.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/trunc-cmp-constant.ll (original)
+++ llvm/trunk/test/CodeGen/R600/trunc-cmp-constant.ll (removed)
@@ -1,170 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-
-; FUNC-LABEL {{^}}sextload_i1_to_i32_trunc_cmp_eq_0:
-; SI: buffer_load_ubyte [[LOAD:v[0-9]+]]
-; SI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]]
-; SI: v_cmp_eq_i32_e32 vcc, 1, [[TMP]]{{$}}
-; SI: s_xor_b64 s{{\[[0-9]+:[0-9]+\]}}, vcc, -1{{$}}
-; SI: v_cndmask_b32_e64
-; SI: buffer_store_byte
-define void @sextload_i1_to_i32_trunc_cmp_eq_0(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
- %load = load i1, i1 addrspace(1)* %in
- %ext = sext i1 %load to i32
- %cmp = icmp eq i32 %ext, 0
- store i1 %cmp, i1 addrspace(1)* %out
- ret void
-}
-
-; FIXME: The negate should be inverting the compare.
-; FUNC-LABEL: {{^}}zextload_i1_to_i32_trunc_cmp_eq_0:
-; SI: buffer_load_ubyte [[LOAD:v[0-9]+]]
-; SI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]]
-; SI: v_cmp_eq_i32_e32 vcc, 1, [[TMP]]{{$}}
-; SI-NEXT: s_xor_b64 [[NEG:s\[[0-9]+:[0-9]+\]]], vcc, -1
-; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[NEG]]
-; SI-NEXT: buffer_store_byte [[RESULT]]
-define void @zextload_i1_to_i32_trunc_cmp_eq_0(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
- %load = load i1, i1 addrspace(1)* %in
- %ext = zext i1 %load to i32
- %cmp = icmp eq i32 %ext, 0
- store i1 %cmp, i1 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_i1_to_i32_trunc_cmp_eq_1:
-; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}}
-; SI: buffer_store_byte [[RESULT]]
-define void @sextload_i1_to_i32_trunc_cmp_eq_1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
- %load = load i1, i1 addrspace(1)* %in
- %ext = sext i1 %load to i32
- %cmp = icmp eq i32 %ext, 1
- store i1 %cmp, i1 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_i1_to_i32_trunc_cmp_eq_1:
-; SI: buffer_load_ubyte [[LOAD:v[0-9]+]]
-; SI: v_and_b32_e32 [[RESULT:v[0-9]+]], 1, [[LOAD]]
-; SI-NEXT: buffer_store_byte [[RESULT]]
-define void @zextload_i1_to_i32_trunc_cmp_eq_1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
- %load = load i1, i1 addrspace(1)* %in
- %ext = zext i1 %load to i32
- %cmp = icmp eq i32 %ext, 1
- store i1 %cmp, i1 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_i1_to_i32_trunc_cmp_eq_neg1:
-; SI: buffer_load_ubyte [[LOAD:v[0-9]+]]
-; SI: v_and_b32_e32 [[RESULT:v[0-9]+]], 1, [[LOAD]]
-; SI-NEXT: buffer_store_byte [[RESULT]]
-define void @sextload_i1_to_i32_trunc_cmp_eq_neg1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
- %load = load i1, i1 addrspace(1)* %in
- %ext = sext i1 %load to i32
- %cmp = icmp eq i32 %ext, -1
- store i1 %cmp, i1 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_i1_to_i32_trunc_cmp_eq_neg1:
-; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}}
-; SI: buffer_store_byte [[RESULT]]
-define void @zextload_i1_to_i32_trunc_cmp_eq_neg1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
- %load = load i1, i1 addrspace(1)* %in
- %ext = zext i1 %load to i32
- %cmp = icmp eq i32 %ext, -1
- store i1 %cmp, i1 addrspace(1)* %out
- ret void
-}
-
-
-; FUNC-LABEL {{^}}sextload_i1_to_i32_trunc_cmp_ne_0:
-; SI: buffer_load_ubyte [[LOAD:v[0-9]+]]
-; SI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]]
-; SI-NEXT: buffer_store_byte [[RESULT]]
-define void @sextload_i1_to_i32_trunc_cmp_ne_0(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
- %load = load i1, i1 addrspace(1)* %in
- %ext = sext i1 %load to i32
- %cmp = icmp ne i32 %ext, 0
- store i1 %cmp, i1 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_i1_to_i32_trunc_cmp_ne_0:
-; SI: buffer_load_ubyte [[LOAD:v[0-9]+]]
-; SI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]]
-; SI-NEXT: buffer_store_byte [[RESULT]]
-define void @zextload_i1_to_i32_trunc_cmp_ne_0(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
- %load = load i1, i1 addrspace(1)* %in
- %ext = zext i1 %load to i32
- %cmp = icmp ne i32 %ext, 0
- store i1 %cmp, i1 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_i1_to_i32_trunc_cmp_ne_1:
-; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}}
-; SI: buffer_store_byte [[RESULT]]
-define void @sextload_i1_to_i32_trunc_cmp_ne_1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
- %load = load i1, i1 addrspace(1)* %in
- %ext = sext i1 %load to i32
- %cmp = icmp ne i32 %ext, 1
- store i1 %cmp, i1 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_i1_to_i32_trunc_cmp_ne_1:
-; SI: buffer_load_ubyte [[LOAD:v[0-9]+]]
-; SI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]]
-; SI: v_cmp_eq_i32_e32 vcc, 1, [[TMP]]{{$}}
-; SI-NEXT: s_xor_b64 [[NEG:s\[[0-9]+:[0-9]+\]]], vcc, -1
-; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[NEG]]
-; SI-NEXT: buffer_store_byte [[RESULT]]
-define void @zextload_i1_to_i32_trunc_cmp_ne_1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
- %load = load i1, i1 addrspace(1)* %in
- %ext = zext i1 %load to i32
- %cmp = icmp ne i32 %ext, 1
- store i1 %cmp, i1 addrspace(1)* %out
- ret void
-}
-
-; FIXME: This should be one compare.
-; FUNC-LABEL: {{^}}sextload_i1_to_i32_trunc_cmp_ne_neg1:
-; XSI: buffer_load_ubyte [[LOAD:v[0-9]+]]
-; XSI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]]
-; XSI: v_cmp_eq_i32_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], [[TMP]], 0{{$}}
-; XSI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CMP0]]
-; XSI-NEXT: buffer_store_byte [[RESULT]]
-define void @sextload_i1_to_i32_trunc_cmp_ne_neg1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
- %load = load i1, i1 addrspace(1)* %in
- %ext = sext i1 %load to i32
- %cmp = icmp ne i32 %ext, -1
- store i1 %cmp, i1 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_i1_to_i32_trunc_cmp_ne_neg1:
-; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}}
-; SI: buffer_store_byte [[RESULT]]
-define void @zextload_i1_to_i32_trunc_cmp_ne_neg1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
- %load = load i1, i1 addrspace(1)* %in
- %ext = zext i1 %load to i32
- %cmp = icmp ne i32 %ext, -1
- store i1 %cmp, i1 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}masked_load_i1_to_i32_trunc_cmp_ne_neg1:
-; SI: buffer_load_sbyte [[LOAD:v[0-9]+]]
-; SI: v_cmp_ne_i32_e32 vcc, -1, [[LOAD]]{{$}}
-; SI-NEXT: v_cndmask_b32_e64
-; SI-NEXT: buffer_store_byte
-define void @masked_load_i1_to_i32_trunc_cmp_ne_neg1(i1 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
- %load = load i8, i8 addrspace(1)* %in
- %masked = and i8 %load, 255
- %ext = sext i8 %masked to i32
- %cmp = icmp ne i32 %ext, -1
- store i1 %cmp, i1 addrspace(1)* %out
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/trunc-store-f64-to-f16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/trunc-store-f64-to-f16.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/trunc-store-f64-to-f16.ll (original)
+++ llvm/trunk/test/CodeGen/R600/trunc-store-f64-to-f16.ll (removed)
@@ -1,56 +0,0 @@
-; XFAIL: *
-; RUN: llc -march=amdgcn -mcpu=SI < %s
-
-; GCN-LABEL: {{^}}global_truncstore_f64_to_f16:
-; GCN: s_endpgm
-define void @global_truncstore_f64_to_f16(half addrspace(1)* %out, double addrspace(1)* %in) #0 {
- %val = load double, double addrspace(1)* %in
- %cvt = fptrunc double %val to half
- store half %cvt, half addrspace(1)* %out
- ret void
-}
-
-; GCN-LABEL: {{^}}global_truncstore_v2f64_to_v2f16:
-; GCN: s_endpgm
-define void @global_truncstore_v2f64_to_v2f16(<2 x half> addrspace(1)* %out, <2 x double> addrspace(1)* %in) #0 {
- %val = load <2 x double>, <2 x double> addrspace(1)* %in
- %cvt = fptrunc <2 x double> %val to <2 x half>
- store <2 x half> %cvt, <2 x half> addrspace(1)* %out
- ret void
-}
-
-; GCN-LABEL: {{^}}global_truncstore_v3f64_to_v3f16:
-; GCN: s_endpgm
-define void @global_truncstore_v3f64_to_v3f16(<3 x half> addrspace(1)* %out, <3 x double> addrspace(1)* %in) #0 {
- %val = load <3 x double>, <3 x double> addrspace(1)* %in
- %cvt = fptrunc <3 x double> %val to <3 x half>
- store <3 x half> %cvt, <3 x half> addrspace(1)* %out
- ret void
-}
-
-; GCN-LABEL: {{^}}global_truncstore_v4f64_to_v4f16:
-; GCN: s_endpgm
-define void @global_truncstore_v4f64_to_v4f16(<4 x half> addrspace(1)* %out, <4 x double> addrspace(1)* %in) #0 {
- %val = load <4 x double>, <4 x double> addrspace(1)* %in
- %cvt = fptrunc <4 x double> %val to <4 x half>
- store <4 x half> %cvt, <4 x half> addrspace(1)* %out
- ret void
-}
-
-; GCN-LABEL: {{^}}global_truncstore_v8f64_to_v8f16:
-; GCN: s_endpgm
-define void @global_truncstore_v8f64_to_v8f16(<8 x half> addrspace(1)* %out, <8 x double> addrspace(1)* %in) #0 {
- %val = load <8 x double>, <8 x double> addrspace(1)* %in
- %cvt = fptrunc <8 x double> %val to <8 x half>
- store <8 x half> %cvt, <8 x half> addrspace(1)* %out
- ret void
-}
-
-; GCN-LABEL: {{^}}global_truncstore_v16f64_to_v16f16:
-; GCN: s_endpgm
-define void @global_truncstore_v16f64_to_v16f16(<16 x half> addrspace(1)* %out, <16 x double> addrspace(1)* %in) #0 {
- %val = load <16 x double>, <16 x double> addrspace(1)* %in
- %cvt = fptrunc <16 x double> %val to <16 x half>
- store <16 x half> %cvt, <16 x half> addrspace(1)* %out
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/trunc-store-i1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/trunc-store-i1.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/trunc-store-i1.ll (original)
+++ llvm/trunk/test/CodeGen/R600/trunc-store-i1.ll (removed)
@@ -1,33 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
-
-
-; SI-LABEL: {{^}}global_truncstore_i32_to_i1:
-; SI: s_load_dword [[LOAD:s[0-9]+]],
-; SI: s_and_b32 [[SREG:s[0-9]+]], [[LOAD]], 1
-; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], [[SREG]]
-; SI: buffer_store_byte [[VREG]],
-define void @global_truncstore_i32_to_i1(i1 addrspace(1)* %out, i32 %val) nounwind {
- %trunc = trunc i32 %val to i1
- store i1 %trunc, i1 addrspace(1)* %out, align 1
- ret void
-}
-
-; SI-LABEL: {{^}}global_truncstore_i64_to_i1:
-; SI: buffer_store_byte
-define void @global_truncstore_i64_to_i1(i1 addrspace(1)* %out, i64 %val) nounwind {
- %trunc = trunc i64 %val to i1
- store i1 %trunc, i1 addrspace(1)* %out, align 1
- ret void
-}
-
-; SI-LABEL: {{^}}global_truncstore_i16_to_i1:
-; SI: s_load_dword [[LOAD:s[0-9]+]],
-; SI: s_and_b32 [[SREG:s[0-9]+]], [[LOAD]], 1
-; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], [[SREG]]
-; SI: buffer_store_byte [[VREG]],
-define void @global_truncstore_i16_to_i1(i1 addrspace(1)* %out, i16 %val) nounwind {
- %trunc = trunc i16 %val to i1
- store i1 %trunc, i1 addrspace(1)* %out, align 1
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/trunc-vector-store-assertion-failure.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/trunc-vector-store-assertion-failure.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/trunc-vector-store-assertion-failure.ll (original)
+++ llvm/trunk/test/CodeGen/R600/trunc-vector-store-assertion-failure.ll (removed)
@@ -1,20 +0,0 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-
-; This tests for a bug in the SelectionDAG where custom lowered truncated
-; vector stores at the end of a basic block were not being added to the
-; LegalizedNodes list, which triggered an assertion failure.
-
-; CHECK-LABEL: {{^}}test:
-; CHECK: MEM_RAT_CACHELESS STORE_RAW
-define void @test(<4 x i8> addrspace(1)* %out, i32 %cond, <4 x i8> %in) {
-entry:
- %0 = icmp eq i32 %cond, 0
- br i1 %0, label %if, label %done
-
-if:
- store <4 x i8> %in, <4 x i8> addrspace(1)* %out
- br label %done
-
-done:
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/trunc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/trunc.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/trunc.ll (original)
+++ llvm/trunk/test/CodeGen/R600/trunc.ll (removed)
@@ -1,100 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
-; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG %s
-
-declare i32 @llvm.r600.read.tidig.x() nounwind readnone
-
-define void @trunc_i64_to_i32_store(i32 addrspace(1)* %out, i64 %in) {
-; SI-LABEL: {{^}}trunc_i64_to_i32_store:
-; SI: s_load_dword [[SLOAD:s[0-9]+]], s[0:1], 0xb
-; SI: v_mov_b32_e32 [[VLOAD:v[0-9]+]], [[SLOAD]]
-; SI: buffer_store_dword [[VLOAD]]
-
-; EG-LABEL: {{^}}trunc_i64_to_i32_store:
-; EG: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
-; EG: LSHR
-; EG-NEXT: 2(
-
- %result = trunc i64 %in to i32 store i32 %result, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; SI-LABEL: {{^}}trunc_load_shl_i64:
-; SI-DAG: s_load_dwordx2
-; SI-DAG: s_load_dword [[SREG:s[0-9]+]],
-; SI: s_lshl_b32 [[SHL:s[0-9]+]], [[SREG]], 2
-; SI: v_mov_b32_e32 [[VSHL:v[0-9]+]], [[SHL]]
-; SI: buffer_store_dword [[VSHL]],
-define void @trunc_load_shl_i64(i32 addrspace(1)* %out, i64 %a) {
- %b = shl i64 %a, 2
- %result = trunc i64 %b to i32
- store i32 %result, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; SI-LABEL: {{^}}trunc_shl_i64:
-; SI: s_load_dwordx2 s{{\[}}[[LO_SREG:[0-9]+]]:{{[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd
-; SI: s_lshl_b64 s{{\[}}[[LO_SHL:[0-9]+]]:{{[0-9]+\]}}, s{{\[}}[[LO_SREG]]:{{[0-9]+\]}}, 2
-; SI: s_add_u32 s[[LO_SREG2:[0-9]+]], s[[LO_SHL]],
-; SI: s_addc_u32
-; SI: v_mov_b32_e32
-; SI: v_mov_b32_e32
-; SI: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG2]]
-; SI: buffer_store_dword v[[LO_VREG]],
-define void @trunc_shl_i64(i64 addrspace(1)* %out2, i32 addrspace(1)* %out, i64 %a) {
- %aa = add i64 %a, 234 ; Prevent shrinking store.
- %b = shl i64 %aa, 2
- %result = trunc i64 %b to i32
- store i32 %result, i32 addrspace(1)* %out, align 4
- store i64 %b, i64 addrspace(1)* %out2, align 8 ; Prevent reducing ops to 32-bits
- ret void
-}
-
-; SI-LABEL: {{^}}trunc_i32_to_i1:
-; SI: v_and_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
-; SI: v_cmp_eq_i32
-define void @trunc_i32_to_i1(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) {
- %a = load i32, i32 addrspace(1)* %ptr, align 4
- %trunc = trunc i32 %a to i1
- %result = select i1 %trunc, i32 1, i32 0
- store i32 %result, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; SI-LABEL: {{^}}sgpr_trunc_i32_to_i1:
-; SI: v_and_b32_e64 v{{[0-9]+}}, 1, s{{[0-9]+}}
-; SI: v_cmp_eq_i32
-define void @sgpr_trunc_i32_to_i1(i32 addrspace(1)* %out, i32 %a) {
- %trunc = trunc i32 %a to i1
- %result = select i1 %trunc, i32 1, i32 0
- store i32 %result, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; SI-LABEL: {{^}}s_trunc_i64_to_i1:
-; SI: s_load_dwordx2 s{{\[}}[[SLO:[0-9]+]]:{{[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0xb
-; SI: v_and_b32_e64 [[MASKED:v[0-9]+]], 1, s[[SLO]]
-; SI: v_cmp_eq_i32_e32 vcc, 1, [[MASKED]]
-; SI: v_cndmask_b32_e64 {{v[0-9]+}}, -12, 63, vcc
-define void @s_trunc_i64_to_i1(i32 addrspace(1)* %out, i64 %x) {
- %trunc = trunc i64 %x to i1
- %sel = select i1 %trunc, i32 63, i32 -12
- store i32 %sel, i32 addrspace(1)* %out
- ret void
-}
-
-; SI-LABEL: {{^}}v_trunc_i64_to_i1:
-; SI: buffer_load_dwordx2 v{{\[}}[[VLO:[0-9]+]]:{{[0-9]+\]}}
-; SI: v_and_b32_e32 [[MASKED:v[0-9]+]], 1, v[[VLO]]
-; SI: v_cmp_eq_i32_e32 vcc, 1, [[MASKED]]
-; SI: v_cndmask_b32_e64 {{v[0-9]+}}, -12, 63, vcc
-define void @v_trunc_i64_to_i1(i32 addrspace(1)* %out, i64 addrspace(1)* %in) {
- %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
- %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
- %x = load i64, i64 addrspace(1)* %gep
-
- %trunc = trunc i64 %x to i1
- %sel = select i1 %trunc, i32 63, i32 -12
- store i32 %sel, i32 addrspace(1)* %out.gep
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/tti-unroll-prefs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/tti-unroll-prefs.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/tti-unroll-prefs.ll (original)
+++ llvm/trunk/test/CodeGen/R600/tti-unroll-prefs.ll (removed)
@@ -1,58 +0,0 @@
-; RUN: opt -loop-unroll -S -mtriple=amdgcn-- -mcpu=SI %s | FileCheck %s
-
-; This IR comes from this OpenCL C code:
-;
-; if (b + 4 > a) {
-; for (int i = 0; i < 4; i++, b++) {
-; if (b + 1 <= a)
-; *(dst + c + b) = 0;
-; else
-; break;
-; }
-; }
-;
-; This test is meant to check that this loop isn't unrolled into more than
-; four iterations. The loop unrolling preferences we currently use cause this
-; loop to not be unrolled at all, but that may change in the future.
-
-; CHECK-LABEL: @test
-; CHECK: store i8 0, i8 addrspace(1)*
-; CHECK-NOT: store i8 0, i8 addrspace(1)*
-; CHECK: ret void
-define void @test(i8 addrspace(1)* nocapture %dst, i32 %a, i32 %b, i32 %c) {
-entry:
- %add = add nsw i32 %b, 4
- %cmp = icmp sgt i32 %add, %a
- br i1 %cmp, label %for.cond.preheader, label %if.end7
-
-for.cond.preheader: ; preds = %entry
- %cmp313 = icmp slt i32 %b, %a
- br i1 %cmp313, label %if.then4.lr.ph, label %if.end7.loopexit
-
-if.then4.lr.ph: ; preds = %for.cond.preheader
- %0 = sext i32 %c to i64
- br label %if.then4
-
-if.then4: ; preds = %if.then4.lr.ph, %if.then4
- %i.015 = phi i32 [ 0, %if.then4.lr.ph ], [ %inc, %if.then4 ]
- %b.addr.014 = phi i32 [ %b, %if.then4.lr.ph ], [ %add2, %if.then4 ]
- %add2 = add nsw i32 %b.addr.014, 1
- %1 = sext i32 %b.addr.014 to i64
- %add.ptr.sum = add nsw i64 %1, %0
- %add.ptr5 = getelementptr inbounds i8, i8 addrspace(1)* %dst, i64 %add.ptr.sum
- store i8 0, i8 addrspace(1)* %add.ptr5, align 1
- %inc = add nsw i32 %i.015, 1
- %cmp1 = icmp slt i32 %inc, 4
- %cmp3 = icmp slt i32 %add2, %a
- %or.cond = and i1 %cmp3, %cmp1
- br i1 %or.cond, label %if.then4, label %for.cond.if.end7.loopexit_crit_edge
-
-for.cond.if.end7.loopexit_crit_edge: ; preds = %if.then4
- br label %if.end7.loopexit
-
-if.end7.loopexit: ; preds = %for.cond.if.end7.loopexit_crit_edge, %for.cond.preheader
- br label %if.end7
-
-if.end7: ; preds = %if.end7.loopexit, %entry
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/uaddo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/uaddo.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/uaddo.ll (original)
+++ llvm/trunk/test/CodeGen/R600/uaddo.ll (removed)
@@ -1,85 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs< %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-
-declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone
-declare { i64, i1 } @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone
-
-; FUNC-LABEL: {{^}}uaddo_i64_zext:
-; SI: add
-; SI: addc
-; SI: addc
-
-; EG: ADDC_UINT
-; EG: ADDC_UINT
-define void @uaddo_i64_zext(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
- %uadd = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) nounwind
- %val = extractvalue { i64, i1 } %uadd, 0
- %carry = extractvalue { i64, i1 } %uadd, 1
- %ext = zext i1 %carry to i64
- %add2 = add i64 %val, %ext
- store i64 %add2, i64 addrspace(1)* %out, align 8
- ret void
-}
-
-; FUNC-LABEL: {{^}}s_uaddo_i32:
-; SI: s_add_i32
-
-; EG: ADDC_UINT
-; EG: ADD_INT
-define void @s_uaddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 %a, i32 %b) nounwind {
- %uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) nounwind
- %val = extractvalue { i32, i1 } %uadd, 0
- %carry = extractvalue { i32, i1 } %uadd, 1
- store i32 %val, i32 addrspace(1)* %out, align 4
- store i1 %carry, i1 addrspace(1)* %carryout
- ret void
-}
-
-; FUNC-LABEL: {{^}}v_uaddo_i32:
-; SI: v_add_i32
-
-; EG: ADDC_UINT
-; EG: ADD_INT
-define void @v_uaddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
- %a = load i32, i32 addrspace(1)* %aptr, align 4
- %b = load i32, i32 addrspace(1)* %bptr, align 4
- %uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) nounwind
- %val = extractvalue { i32, i1 } %uadd, 0
- %carry = extractvalue { i32, i1 } %uadd, 1
- store i32 %val, i32 addrspace(1)* %out, align 4
- store i1 %carry, i1 addrspace(1)* %carryout
- ret void
-}
-
-; FUNC-LABEL: {{^}}s_uaddo_i64:
-; SI: s_add_u32
-; SI: s_addc_u32
-
-; EG: ADDC_UINT
-; EG: ADD_INT
-define void @s_uaddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 %a, i64 %b) nounwind {
- %uadd = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) nounwind
- %val = extractvalue { i64, i1 } %uadd, 0
- %carry = extractvalue { i64, i1 } %uadd, 1
- store i64 %val, i64 addrspace(1)* %out, align 8
- store i1 %carry, i1 addrspace(1)* %carryout
- ret void
-}
-
-; FUNC-LABEL: {{^}}v_uaddo_i64:
-; SI: v_add_i32
-; SI: v_addc_u32
-
-; EG: ADDC_UINT
-; EG: ADD_INT
-define void @v_uaddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
- %a = load i64, i64 addrspace(1)* %aptr, align 4
- %b = load i64, i64 addrspace(1)* %bptr, align 4
- %uadd = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) nounwind
- %val = extractvalue { i64, i1 } %uadd, 0
- %carry = extractvalue { i64, i1 } %uadd, 1
- store i64 %val, i64 addrspace(1)* %out, align 8
- store i1 %carry, i1 addrspace(1)* %carryout
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/udiv.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/udiv.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/udiv.ll (original)
+++ llvm/trunk/test/CodeGen/R600/udiv.ll (removed)
@@ -1,48 +0,0 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG %s
-;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI %s
-;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=SI %s
-
-;EG-LABEL: {{^}}test:
-;EG-NOT: SETGE_INT
-;EG: CF_END
-
-define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
- %a = load i32, i32 addrspace(1) * %in
- %b = load i32, i32 addrspace(1) * %b_ptr
- %result = udiv i32 %a, %b
- store i32 %result, i32 addrspace(1)* %out
- ret void
-}
-
-;The code generated by udiv is long and complex and may frequently change.
-;The goal of this test is to make sure the ISel doesn't fail when it gets
-;a v4i32 udiv
-
-;EG-LABEL: {{^}}test2:
-;EG: CF_END
-;SI-LABEL: {{^}}test2:
-;SI: s_endpgm
-
-define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
- %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
- %a = load <2 x i32>, <2 x i32> addrspace(1) * %in
- %b = load <2 x i32>, <2 x i32> addrspace(1) * %b_ptr
- %result = udiv <2 x i32> %a, %b
- store <2 x i32> %result, <2 x i32> addrspace(1)* %out
- ret void
-}
-
-;EG-LABEL: {{^}}test4:
-;EG: CF_END
-;SI-LABEL: {{^}}test4:
-;SI: s_endpgm
-
-define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
- %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
- %a = load <4 x i32>, <4 x i32> addrspace(1) * %in
- %b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr
- %result = udiv <4 x i32> %a, %b
- store <4 x i32> %result, <4 x i32> addrspace(1)* %out
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/udivrem.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/udivrem.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/udivrem.ll (original)
+++ llvm/trunk/test/CodeGen/R600/udivrem.ll (removed)
@@ -1,345 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=EG --check-prefix=FUNC %s
-
-; FUNC-LABEL: {{^}}test_udivrem:
-; EG: RECIP_UINT
-; EG-DAG: MULHI
-; EG-DAG: MULLO_INT
-; EG-DAG: SUB_INT
-; EG: CNDE_INT
-; EG: MULHI
-; EG-DAG: ADD_INT
-; EG-DAG: SUB_INT
-; EG: CNDE_INT
-; EG: MULHI
-; EG: MULLO_INT
-; EG: SUB_INT
-; EG-DAG: SETGE_UINT
-; EG-DAG: SETGE_UINT
-; EG: AND_INT
-; EG-DAG: ADD_INT
-; EG-DAG: SUB_INT
-; EG-DAG: CNDE_INT
-; EG-DAG: CNDE_INT
-; EG-DAG: ADD_INT
-; EG-DAG: SUB_INT
-; EG-DAG: CNDE_INT
-; EG-DAG: CNDE_INT
-
-; SI: v_rcp_iflag_f32_e32 [[RCP:v[0-9]+]]
-; SI-DAG: v_mul_hi_u32 [[RCP_HI:v[0-9]+]], [[RCP]]
-; SI-DAG: v_mul_lo_i32 [[RCP_LO:v[0-9]+]], [[RCP]]
-; SI-DAG: v_sub_i32_e32 [[NEG_RCP_LO:v[0-9]+]], 0, [[RCP_LO]]
-; SI: v_cndmask_b32_e64
-; SI: v_mul_hi_u32 [[E:v[0-9]+]], {{v[0-9]+}}, [[RCP]]
-; SI-DAG: v_add_i32_e32 [[RCP_A_E:v[0-9]+]], [[E]], [[RCP]]
-; SI-DAG: v_subrev_i32_e32 [[RCP_S_E:v[0-9]+]], [[E]], [[RCP]]
-; SI: v_cndmask_b32_e64
-; SI: v_mul_hi_u32 [[Quotient:v[0-9]+]]
-; SI: v_mul_lo_i32 [[Num_S_Remainder:v[0-9]+]]
-; SI-DAG: v_sub_i32_e32 [[Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[Num_S_Remainder]]
-; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_cndmask_b32_e64
-; SI: v_and_b32_e32 [[Tmp1:v[0-9]+]]
-; SI-DAG: v_add_i32_e32 [[Quotient_A_One:v[0-9]+]], 1, [[Quotient]]
-; SI-DAG: v_subrev_i32_e32 [[Quotient_S_One:v[0-9]+]],
-; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_add_i32_e32 [[Remainder_A_Den:v[0-9]+]],
-; SI-DAG: v_subrev_i32_e32 [[Remainder_S_Den:v[0-9]+]],
-; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_cndmask_b32_e64
-; SI: s_endpgm
-define void @test_udivrem(i32 addrspace(1)* %out, i32 %x, i32 %y) {
- %result0 = udiv i32 %x, %y
- store i32 %result0, i32 addrspace(1)* %out
- %result1 = urem i32 %x, %y
- store i32 %result1, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}test_udivrem_v2:
-; EG-DAG: RECIP_UINT
-; EG-DAG: MULHI
-; EG-DAG: MULLO_INT
-; EG-DAG: SUB_INT
-; EG-DAG: CNDE_INT
-; EG-DAG: MULHI
-; EG-DAG: ADD_INT
-; EG-DAG: SUB_INT
-; EG-DAG: CNDE_INT
-; EG-DAG: MULHI
-; EG-DAG: MULLO_INT
-; EG-DAG: SUB_INT
-; EG-DAG: SETGE_UINT
-; EG-DAG: SETGE_UINT
-; EG-DAG: AND_INT
-; EG-DAG: ADD_INT
-; EG-DAG: SUB_INT
-; EG-DAG: CNDE_INT
-; EG-DAG: CNDE_INT
-; EG-DAG: ADD_INT
-; EG-DAG: SUB_INT
-; EG-DAG: CNDE_INT
-; EG-DAG: CNDE_INT
-; EG-DAG: RECIP_UINT
-; EG-DAG: MULHI
-; EG-DAG: MULLO_INT
-; EG-DAG: SUB_INT
-; EG-DAG: CNDE_INT
-; EG-DAG: MULHI
-; EG-DAG: ADD_INT
-; EG-DAG: SUB_INT
-; EG-DAG: CNDE_INT
-; EG-DAG: MULHI
-; EG-DAG: MULLO_INT
-; EG-DAG: SUB_INT
-; EG-DAG: SETGE_UINT
-; EG-DAG: SETGE_UINT
-; EG-DAG: AND_INT
-; EG-DAG: ADD_INT
-; EG-DAG: SUB_INT
-; EG-DAG: CNDE_INT
-; EG-DAG: CNDE_INT
-; EG-DAG: ADD_INT
-; EG-DAG: SUB_INT
-; EG-DAG: CNDE_INT
-; EG-DAG: CNDE_INT
-
-; SI-DAG: v_rcp_iflag_f32_e32 [[FIRST_RCP:v[0-9]+]]
-; SI-DAG: v_mul_hi_u32 [[FIRST_RCP_HI:v[0-9]+]], [[FIRST_RCP]]
-; SI-DAG: v_mul_lo_i32 [[FIRST_RCP_LO:v[0-9]+]], [[FIRST_RCP]]
-; SI-DAG: v_sub_i32_e32 [[FIRST_NEG_RCP_LO:v[0-9]+]], 0, [[FIRST_RCP_LO]]
-; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_mul_hi_u32 [[FIRST_E:v[0-9]+]], {{v[0-9]+}}, [[FIRST_RCP]]
-; SI-DAG: v_add_i32_e32 [[FIRST_RCP_A_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]]
-; SI-DAG: v_subrev_i32_e32 [[FIRST_RCP_S_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]]
-; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_mul_hi_u32 [[FIRST_Quotient:v[0-9]+]]
-; SI-DAG: v_mul_lo_i32 [[FIRST_Num_S_Remainder:v[0-9]+]]
-; SI-DAG: v_subrev_i32_e32 [[FIRST_Remainder:v[0-9]+]], [[FIRST_Num_S_Remainder]], v{{[0-9]+}}
-; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_and_b32_e32 [[FIRST_Tmp1:v[0-9]+]]
-; SI-DAG: v_add_i32_e32 [[FIRST_Quotient_A_One:v[0-9]+]], {{.*}}, [[FIRST_Quotient]]
-; SI-DAG: v_subrev_i32_e32 [[FIRST_Quotient_S_One:v[0-9]+]],
-; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_add_i32_e32 [[FIRST_Remainder_A_Den:v[0-9]+]],
-; SI-DAG: v_subrev_i32_e32 [[FIRST_Remainder_S_Den:v[0-9]+]],
-; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_rcp_iflag_f32_e32 [[SECOND_RCP:v[0-9]+]]
-; SI-DAG: v_mul_hi_u32 [[SECOND_RCP_HI:v[0-9]+]], [[SECOND_RCP]]
-; SI-DAG: v_mul_lo_i32 [[SECOND_RCP_LO:v[0-9]+]], [[SECOND_RCP]]
-; SI-DAG: v_sub_i32_e32 [[SECOND_NEG_RCP_LO:v[0-9]+]], 0, [[SECOND_RCP_LO]]
-; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_mul_hi_u32 [[SECOND_E:v[0-9]+]], {{v[0-9]+}}, [[SECOND_RCP]]
-; SI-DAG: v_add_i32_e32 [[SECOND_RCP_A_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]]
-; SI-DAG: v_subrev_i32_e32 [[SECOND_RCP_S_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]]
-; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_mul_hi_u32 [[SECOND_Quotient:v[0-9]+]]
-; SI-DAG: v_mul_lo_i32 [[SECOND_Num_S_Remainder:v[0-9]+]]
-; SI-DAG: v_subrev_i32_e32 [[SECOND_Remainder:v[0-9]+]], [[SECOND_Num_S_Remainder]], v{{[0-9]+}}
-; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_and_b32_e32 [[SECOND_Tmp1:v[0-9]+]]
-; SI-DAG: v_add_i32_e32 [[SECOND_Quotient_A_One:v[0-9]+]], {{.*}}, [[SECOND_Quotient]]
-; SI-DAG: v_subrev_i32_e32 [[SECOND_Quotient_S_One:v[0-9]+]],
-; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_add_i32_e32 [[SECOND_Remainder_A_Den:v[0-9]+]],
-; SI-DAG: v_subrev_i32_e32 [[SECOND_Remainder_S_Den:v[0-9]+]],
-; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_cndmask_b32_e64
-; SI: s_endpgm
-define void @test_udivrem_v2(<2 x i32> addrspace(1)* %out, <2 x i32> %x, <2 x i32> %y) {
- %result0 = udiv <2 x i32> %x, %y
- store <2 x i32> %result0, <2 x i32> addrspace(1)* %out
- %result1 = urem <2 x i32> %x, %y
- store <2 x i32> %result1, <2 x i32> addrspace(1)* %out
- ret void
-}
-
-
-; FUNC-LABEL: {{^}}test_udivrem_v4:
-; EG-DAG: RECIP_UINT
-; EG-DAG: MULHI
-; EG-DAG: MULLO_INT
-; EG-DAG: SUB_INT
-; EG-DAG: CNDE_INT
-; EG-DAG: MULHI
-; EG-DAG: ADD_INT
-; EG-DAG: SUB_INT
-; EG-DAG: CNDE_INT
-; EG-DAG: MULHI
-; EG-DAG: MULLO_INT
-; EG-DAG: SUB_INT
-; EG-DAG: SETGE_UINT
-; EG-DAG: SETGE_UINT
-; EG-DAG: AND_INT
-; EG-DAG: ADD_INT
-; EG-DAG: SUB_INT
-; EG-DAG: CNDE_INT
-; EG-DAG: CNDE_INT
-; EG-DAG: ADD_INT
-; EG-DAG: SUB_INT
-; EG-DAG: CNDE_INT
-; EG-DAG: CNDE_INT
-; EG-DAG: RECIP_UINT
-; EG-DAG: MULHI
-; EG-DAG: MULLO_INT
-; EG-DAG: SUB_INT
-; EG-DAG: CNDE_INT
-; EG-DAG: MULHI
-; EG-DAG: ADD_INT
-; EG-DAG: SUB_INT
-; EG-DAG: CNDE_INT
-; EG-DAG: MULHI
-; EG-DAG: MULLO_INT
-; EG-DAG: SUB_INT
-; EG-DAG: SETGE_UINT
-; EG-DAG: SETGE_UINT
-; EG-DAG: AND_INT
-; EG-DAG: ADD_INT
-; EG-DAG: SUB_INT
-; EG-DAG: CNDE_INT
-; EG-DAG: CNDE_INT
-; EG-DAG: ADD_INT
-; EG-DAG: SUB_INT
-; EG-DAG: CNDE_INT
-; EG-DAG: CNDE_INT
-; EG-DAG: RECIP_UINT
-; EG-DAG: MULHI
-; EG-DAG: MULLO_INT
-; EG-DAG: SUB_INT
-; EG-DAG: CNDE_INT
-; EG-DAG: MULHI
-; EG-DAG: ADD_INT
-; EG-DAG: SUB_INT
-; EG-DAG: CNDE_INT
-; EG-DAG: MULHI
-; EG-DAG: MULLO_INT
-; EG-DAG: SUB_INT
-; EG-DAG: SETGE_UINT
-; EG-DAG: SETGE_UINT
-; EG-DAG: AND_INT
-; EG-DAG: ADD_INT
-; EG-DAG: SUB_INT
-; EG-DAG: CNDE_INT
-; EG-DAG: CNDE_INT
-; EG-DAG: ADD_INT
-; EG-DAG: SUB_INT
-; EG-DAG: CNDE_INT
-; EG-DAG: CNDE_INT
-; EG-DAG: RECIP_UINT
-; EG-DAG: MULHI
-; EG-DAG: MULLO_INT
-; EG-DAG: SUB_INT
-; EG-DAG: CNDE_INT
-; EG-DAG: MULHI
-; EG-DAG: ADD_INT
-; EG-DAG: SUB_INT
-; EG-DAG: CNDE_INT
-; EG-DAG: MULHI
-; EG-DAG: MULLO_INT
-; EG-DAG: SUB_INT
-; EG-DAG: SETGE_UINT
-; EG-DAG: SETGE_UINT
-; EG-DAG: AND_INT
-; EG-DAG: ADD_INT
-; EG-DAG: SUB_INT
-; EG-DAG: CNDE_INT
-; EG-DAG: CNDE_INT
-; EG-DAG: ADD_INT
-; EG-DAG: SUB_INT
-; EG-DAG: CNDE_INT
-; EG-DAG: CNDE_INT
-
-; SI-DAG: v_rcp_iflag_f32_e32 [[FIRST_RCP:v[0-9]+]]
-; SI-DAG: v_mul_hi_u32 [[FIRST_RCP_HI:v[0-9]+]], [[FIRST_RCP]]
-; SI-DAG: v_mul_lo_i32 [[FIRST_RCP_LO:v[0-9]+]], [[FIRST_RCP]]
-; SI-DAG: v_sub_i32_e32 [[FIRST_NEG_RCP_LO:v[0-9]+]], 0, [[FIRST_RCP_LO]]
-; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_mul_hi_u32 [[FIRST_E:v[0-9]+]], {{v[0-9]+}}, [[FIRST_RCP]]
-; SI-DAG: v_add_i32_e32 [[FIRST_RCP_A_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]]
-; SI-DAG: v_subrev_i32_e32 [[FIRST_RCP_S_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]]
-; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_mul_hi_u32 [[FIRST_Quotient:v[0-9]+]]
-; SI-DAG: v_mul_lo_i32 [[FIRST_Num_S_Remainder:v[0-9]+]]
-; SI-DAG: v_subrev_i32_e32 [[FIRST_Remainder:v[l0-9]+]], [[FIRST_Num_S_Remainder]], v{{[0-9]+}}
-; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_and_b32_e32 [[FIRST_Tmp1:v[0-9]+]]
-; SI-DAG: v_add_i32_e32 [[FIRST_Quotient_A_One:v[0-9]+]], {{.*}}, [[FIRST_Quotient]]
-; SI-DAG: v_subrev_i32_e32 [[FIRST_Quotient_S_One:v[0-9]+]],
-; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_add_i32_e32 [[FIRST_Remainder_A_Den:v[0-9]+]],
-; SI-DAG: v_subrev_i32_e32 [[FIRST_Remainder_S_Den:v[0-9]+]],
-; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_rcp_iflag_f32_e32 [[SECOND_RCP:v[0-9]+]]
-; SI-DAG: v_mul_hi_u32 [[SECOND_RCP_HI:v[0-9]+]], [[SECOND_RCP]]
-; SI-DAG: v_mul_lo_i32 [[SECOND_RCP_LO:v[0-9]+]], [[SECOND_RCP]]
-; SI-DAG: v_sub_i32_e32 [[SECOND_NEG_RCP_LO:v[0-9]+]], 0, [[SECOND_RCP_LO]]
-; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_mul_hi_u32 [[SECOND_E:v[0-9]+]], {{v[0-9]+}}, [[SECOND_RCP]]
-; SI-DAG: v_add_i32_e32 [[SECOND_RCP_A_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]]
-; SI-DAG: v_subrev_i32_e32 [[SECOND_RCP_S_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]]
-; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_mul_hi_u32 [[SECOND_Quotient:v[0-9]+]]
-; SI-DAG: v_mul_lo_i32 [[SECOND_Num_S_Remainder:v[0-9]+]]
-; SI-DAG: v_subrev_i32_e32 [[SECOND_Remainder:v[0-9]+]], [[SECOND_Num_S_Remainder]], v{{[0-9]+}}
-; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_and_b32_e32 [[SECOND_Tmp1:v[0-9]+]]
-; SI-DAG: v_add_i32_e32 [[SECOND_Quotient_A_One:v[0-9]+]], {{.*}}, [[SECOND_Quotient]]
-; SI-DAG: v_subrev_i32_e32 [[SECOND_Quotient_S_One:v[0-9]+]],
-; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_add_i32_e32 [[SECOND_Remainder_A_Den:v[0-9]+]],
-; SI-DAG: v_subrev_i32_e32 [[SECOND_Remainder_S_Den:v[0-9]+]],
-; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_rcp_iflag_f32_e32 [[THIRD_RCP:v[0-9]+]]
-; SI-DAG: v_mul_hi_u32 [[THIRD_RCP_HI:v[0-9]+]], [[THIRD_RCP]]
-; SI-DAG: v_mul_lo_i32 [[THIRD_RCP_LO:v[0-9]+]], [[THIRD_RCP]]
-; SI-DAG: v_sub_i32_e32 [[THIRD_NEG_RCP_LO:v[0-9]+]], 0, [[THIRD_RCP_LO]]
-; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_mul_hi_u32 [[THIRD_E:v[0-9]+]], {{v[0-9]+}}, [[THIRD_RCP]]
-; SI-DAG: v_add_i32_e32 [[THIRD_RCP_A_E:v[0-9]+]], [[THIRD_E]], [[THIRD_RCP]]
-; SI-DAG: v_subrev_i32_e32 [[THIRD_RCP_S_E:v[0-9]+]], [[THIRD_E]], [[THIRD_RCP]]
-; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_mul_hi_u32 [[THIRD_Quotient:v[0-9]+]]
-; SI-DAG: v_mul_lo_i32 [[THIRD_Num_S_Remainder:v[0-9]+]]
-; SI-DAG: v_subrev_i32_e32 [[THIRD_Remainder:v[0-9]+]], [[THIRD_Num_S_Remainder]], {{v[0-9]+}}
-; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_and_b32_e32 [[THIRD_Tmp1:v[0-9]+]]
-; SI-DAG: v_add_i32_e32 [[THIRD_Quotient_A_One:v[0-9]+]], {{.*}}, [[THIRD_Quotient]]
-; SI-DAG: v_subrev_i32_e32 [[THIRD_Quotient_S_One:v[0-9]+]],
-; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_add_i32_e32 [[THIRD_Remainder_A_Den:v[0-9]+]],
-; SI-DAG: v_subrev_i32_e32 [[THIRD_Remainder_S_Den:v[0-9]+]],
-; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_rcp_iflag_f32_e32 [[FOURTH_RCP:v[0-9]+]]
-; SI-DAG: v_mul_hi_u32 [[FOURTH_RCP_HI:v[0-9]+]], [[FOURTH_RCP]]
-; SI-DAG: v_mul_lo_i32 [[FOURTH_RCP_LO:v[0-9]+]], [[FOURTH_RCP]]
-; SI-DAG: v_sub_i32_e32 [[FOURTH_NEG_RCP_LO:v[0-9]+]], 0, [[FOURTH_RCP_LO]]
-; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_mul_hi_u32 [[FOURTH_E:v[0-9]+]], {{v[0-9]+}}, [[FOURTH_RCP]]
-; SI-DAG: v_add_i32_e32 [[FOURTH_RCP_A_E:v[0-9]+]], [[FOURTH_E]], [[FOURTH_RCP]]
-; SI-DAG: v_subrev_i32_e32 [[FOURTH_RCP_S_E:v[0-9]+]], [[FOURTH_E]], [[FOURTH_RCP]]
-; SI-DAG: v_cndmask_b32_e64
-; SI: s_endpgm
-define void @test_udivrem_v4(<4 x i32> addrspace(1)* %out, <4 x i32> %x, <4 x i32> %y) {
- %result0 = udiv <4 x i32> %x, %y
- store <4 x i32> %result0, <4 x i32> addrspace(1)* %out
- %result1 = urem <4 x i32> %x, %y
- store <4 x i32> %result1, <4 x i32> addrspace(1)* %out
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/udivrem24.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/udivrem24.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/udivrem24.ll (original)
+++ llvm/trunk/test/CodeGen/R600/udivrem24.ll (removed)
@@ -1,245 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-
-; FUNC-LABEL: {{^}}udiv24_i8:
-; SI: v_cvt_f32_ubyte
-; SI: v_cvt_f32_ubyte
-; SI: v_rcp_f32
-; SI: v_cvt_u32_f32
-
-; EG: UINT_TO_FLT
-; EG-DAG: UINT_TO_FLT
-; EG-DAG: RECIP_IEEE
-; EG: FLT_TO_UINT
-define void @udiv24_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
- %den_ptr = getelementptr i8, i8 addrspace(1)* %in, i8 1
- %num = load i8, i8 addrspace(1) * %in
- %den = load i8, i8 addrspace(1) * %den_ptr
- %result = udiv i8 %num, %den
- store i8 %result, i8 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}udiv24_i16:
-; SI: v_cvt_f32_u32
-; SI: v_cvt_f32_u32
-; SI: v_rcp_f32
-; SI: v_cvt_u32_f32
-
-; EG: UINT_TO_FLT
-; EG-DAG: UINT_TO_FLT
-; EG-DAG: RECIP_IEEE
-; EG: FLT_TO_UINT
-define void @udiv24_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
- %den_ptr = getelementptr i16, i16 addrspace(1)* %in, i16 1
- %num = load i16, i16 addrspace(1) * %in, align 2
- %den = load i16, i16 addrspace(1) * %den_ptr, align 2
- %result = udiv i16 %num, %den
- store i16 %result, i16 addrspace(1)* %out, align 2
- ret void
-}
-
-; FUNC-LABEL: {{^}}udiv24_i32:
-; SI: v_cvt_f32_u32
-; SI-DAG: v_cvt_f32_u32
-; SI-DAG: v_rcp_f32
-; SI: v_cvt_u32_f32
-
-; EG: UINT_TO_FLT
-; EG-DAG: UINT_TO_FLT
-; EG-DAG: RECIP_IEEE
-; EG: FLT_TO_UINT
-define void @udiv24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
- %num = load i32, i32 addrspace(1) * %in, align 4
- %den = load i32, i32 addrspace(1) * %den_ptr, align 4
- %num.i24.0 = shl i32 %num, 8
- %den.i24.0 = shl i32 %den, 8
- %num.i24 = lshr i32 %num.i24.0, 8
- %den.i24 = lshr i32 %den.i24.0, 8
- %result = udiv i32 %num.i24, %den.i24
- store i32 %result, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}udiv25_i32:
-; RCP_IFLAG is for URECIP in the full 32b alg
-; SI: v_rcp_iflag
-; SI-NOT: v_rcp_f32
-
-; EG-NOT: UINT_TO_FLT
-; EG-NOT: RECIP_IEEE
-define void @udiv25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
- %num = load i32, i32 addrspace(1) * %in, align 4
- %den = load i32, i32 addrspace(1) * %den_ptr, align 4
- %num.i24.0 = shl i32 %num, 7
- %den.i24.0 = shl i32 %den, 7
- %num.i24 = lshr i32 %num.i24.0, 7
- %den.i24 = lshr i32 %den.i24.0, 7
- %result = udiv i32 %num.i24, %den.i24
- store i32 %result, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}test_no_udiv24_i32_1:
-; RCP_IFLAG is for URECIP in the full 32b alg
-; SI: v_rcp_iflag
-; SI-NOT: v_rcp_f32
-
-; EG-NOT: UINT_TO_FLT
-; EG-NOT: RECIP_IEEE
-define void @test_no_udiv24_i32_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
- %num = load i32, i32 addrspace(1) * %in, align 4
- %den = load i32, i32 addrspace(1) * %den_ptr, align 4
- %num.i24.0 = shl i32 %num, 8
- %den.i24.0 = shl i32 %den, 7
- %num.i24 = lshr i32 %num.i24.0, 8
- %den.i24 = lshr i32 %den.i24.0, 7
- %result = udiv i32 %num.i24, %den.i24
- store i32 %result, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}test_no_udiv24_i32_2:
-; RCP_IFLAG is for URECIP in the full 32b alg
-; SI: v_rcp_iflag
-; SI-NOT: v_rcp_f32
-
-; EG-NOT: UINT_TO_FLT
-; EG-NOT: RECIP_IEEE
-define void @test_no_udiv24_i32_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
- %num = load i32, i32 addrspace(1) * %in, align 4
- %den = load i32, i32 addrspace(1) * %den_ptr, align 4
- %num.i24.0 = shl i32 %num, 7
- %den.i24.0 = shl i32 %den, 8
- %num.i24 = lshr i32 %num.i24.0, 7
- %den.i24 = lshr i32 %den.i24.0, 8
- %result = udiv i32 %num.i24, %den.i24
- store i32 %result, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}urem24_i8:
-; SI: v_cvt_f32_ubyte
-; SI: v_cvt_f32_ubyte
-; SI: v_rcp_f32
-; SI: v_cvt_u32_f32
-
-; EG: UINT_TO_FLT
-; EG-DAG: UINT_TO_FLT
-; EG-DAG: RECIP_IEEE
-; EG: FLT_TO_UINT
-define void @urem24_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
- %den_ptr = getelementptr i8, i8 addrspace(1)* %in, i8 1
- %num = load i8, i8 addrspace(1) * %in
- %den = load i8, i8 addrspace(1) * %den_ptr
- %result = urem i8 %num, %den
- store i8 %result, i8 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}urem24_i16:
-; SI: v_cvt_f32_u32
-; SI: v_cvt_f32_u32
-; SI: v_rcp_f32
-; SI: v_cvt_u32_f32
-
-; EG: UINT_TO_FLT
-; EG-DAG: UINT_TO_FLT
-; EG-DAG: RECIP_IEEE
-; EG: FLT_TO_UINT
-define void @urem24_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
- %den_ptr = getelementptr i16, i16 addrspace(1)* %in, i16 1
- %num = load i16, i16 addrspace(1) * %in, align 2
- %den = load i16, i16 addrspace(1) * %den_ptr, align 2
- %result = urem i16 %num, %den
- store i16 %result, i16 addrspace(1)* %out, align 2
- ret void
-}
-
-; FUNC-LABEL: {{^}}urem24_i32:
-; SI: v_cvt_f32_u32
-; SI: v_cvt_f32_u32
-; SI: v_rcp_f32
-; SI: v_cvt_u32_f32
-
-; EG: UINT_TO_FLT
-; EG-DAG: UINT_TO_FLT
-; EG-DAG: RECIP_IEEE
-; EG: FLT_TO_UINT
-define void @urem24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
- %num = load i32, i32 addrspace(1) * %in, align 4
- %den = load i32, i32 addrspace(1) * %den_ptr, align 4
- %num.i24.0 = shl i32 %num, 8
- %den.i24.0 = shl i32 %den, 8
- %num.i24 = lshr i32 %num.i24.0, 8
- %den.i24 = lshr i32 %den.i24.0, 8
- %result = urem i32 %num.i24, %den.i24
- store i32 %result, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}urem25_i32:
-; RCP_IFLAG is for URECIP in the full 32b alg
-; SI: v_rcp_iflag
-; SI-NOT: v_rcp_f32
-
-; EG-NOT: UINT_TO_FLT
-; EG-NOT: RECIP_IEEE
-define void @urem25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
- %num = load i32, i32 addrspace(1) * %in, align 4
- %den = load i32, i32 addrspace(1) * %den_ptr, align 4
- %num.i24.0 = shl i32 %num, 7
- %den.i24.0 = shl i32 %den, 7
- %num.i24 = lshr i32 %num.i24.0, 7
- %den.i24 = lshr i32 %den.i24.0, 7
- %result = urem i32 %num.i24, %den.i24
- store i32 %result, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}test_no_urem24_i32_1:
-; RCP_IFLAG is for URECIP in the full 32b alg
-; SI: v_rcp_iflag
-; SI-NOT: v_rcp_f32
-
-; EG-NOT: UINT_TO_FLT
-; EG-NOT: RECIP_IEEE
-define void @test_no_urem24_i32_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
- %num = load i32, i32 addrspace(1) * %in, align 4
- %den = load i32, i32 addrspace(1) * %den_ptr, align 4
- %num.i24.0 = shl i32 %num, 8
- %den.i24.0 = shl i32 %den, 7
- %num.i24 = lshr i32 %num.i24.0, 8
- %den.i24 = lshr i32 %den.i24.0, 7
- %result = urem i32 %num.i24, %den.i24
- store i32 %result, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}test_no_urem24_i32_2:
-; RCP_IFLAG is for URECIP in the full 32b alg
-; SI: v_rcp_iflag
-; SI-NOT: v_rcp_f32
-
-; EG-NOT: UINT_TO_FLT
-; EG-NOT: RECIP_IEEE
-define void @test_no_urem24_i32_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
- %num = load i32, i32 addrspace(1) * %in, align 4
- %den = load i32, i32 addrspace(1) * %den_ptr, align 4
- %num.i24.0 = shl i32 %num, 7
- %den.i24.0 = shl i32 %den, 8
- %num.i24 = lshr i32 %num.i24.0, 7
- %den.i24 = lshr i32 %den.i24.0, 8
- %result = urem i32 %num.i24, %den.i24
- store i32 %result, i32 addrspace(1)* %out, align 4
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/udivrem64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/udivrem64.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/udivrem64.ll (original)
+++ llvm/trunk/test/CodeGen/R600/udivrem64.ll (removed)
@@ -1,223 +0,0 @@
-;RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=GCN --check-prefix=FUNC %s
-;RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck --check-prefix=VI --check-prefix=GCN --check-prefix=FUNC %s
-;RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=EG --check-prefix=FUNC %s
-
-;FUNC-LABEL: {{^}}test_udiv:
-;EG: RECIP_UINT
-;EG: LSHL {{.*}}, 1,
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN-NOT: v_mad_f32
-;SI-NOT: v_lshr_b64
-;VI-NOT: v_lshrrev_b64
-;GCN: s_endpgm
-define void @test_udiv(i64 addrspace(1)* %out, i64 %x, i64 %y) {
- %result = udiv i64 %x, %y
- store i64 %result, i64 addrspace(1)* %out
- ret void
-}
-
-;FUNC-LABEL: {{^}}test_urem:
-;EG: RECIP_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: BFE_UINT
-;EG: AND_INT {{.*}}, 1,
-
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN: s_bfe_u32
-;GCN-NOT: v_mad_f32
-;SI-NOT: v_lshr_b64
-;VI-NOT: v_lshrrev_b64
-;GCN: s_endpgm
-define void @test_urem(i64 addrspace(1)* %out, i64 %x, i64 %y) {
- %result = urem i64 %x, %y
- store i64 %result, i64 addrspace(1)* %out
- ret void
-}
-
-;FUNC-LABEL: {{^}}test_udiv3264:
-;EG: RECIP_UINT
-;EG-NOT: BFE_UINT
-
-;GCN-NOT: s_bfe_u32
-;GCN-NOT: v_mad_f32
-;SI-NOT: v_lshr_b64
-;VI-NOT: v_lshrrev_b64
-;GCN: s_endpgm
-define void @test_udiv3264(i64 addrspace(1)* %out, i64 %x, i64 %y) {
- %1 = lshr i64 %x, 33
- %2 = lshr i64 %y, 33
- %result = udiv i64 %1, %2
- store i64 %result, i64 addrspace(1)* %out
- ret void
-}
-
-;FUNC-LABEL: {{^}}test_urem3264:
-;EG: RECIP_UINT
-;EG-NOT: BFE_UINT
-
-;GCN-NOT: s_bfe_u32
-;GCN-NOT: v_mad_f32
-;SI-NOT: v_lshr_b64
-;VI-NOT: v_lshrrev_b64
-;GCN: s_endpgm
-define void @test_urem3264(i64 addrspace(1)* %out, i64 %x, i64 %y) {
- %1 = lshr i64 %x, 33
- %2 = lshr i64 %y, 33
- %result = urem i64 %1, %2
- store i64 %result, i64 addrspace(1)* %out
- ret void
-}
-
-;FUNC-LABEL: {{^}}test_udiv2464:
-;EG: UINT_TO_FLT
-;EG: UINT_TO_FLT
-;EG: FLT_TO_UINT
-;EG-NOT: RECIP_UINT
-;EG-NOT: BFE_UINT
-
-;SI-NOT: v_lshr_b64
-;VI-NOT: v_lshrrev_b64
-;GCN: v_mad_f32
-;GCN: s_endpgm
-define void @test_udiv2464(i64 addrspace(1)* %out, i64 %x, i64 %y) {
- %1 = lshr i64 %x, 40
- %2 = lshr i64 %y, 40
- %result = udiv i64 %1, %2
- store i64 %result, i64 addrspace(1)* %out
- ret void
-}
-
-;FUNC-LABEL: {{^}}test_urem2464:
-;EG: UINT_TO_FLT
-;EG: UINT_TO_FLT
-;EG: FLT_TO_UINT
-;EG-NOT: RECIP_UINT
-;EG-NOT: BFE_UINT
-
-;SI-NOT: v_lshr_b64
-;VI-NOT: v_lshrrev_b64
-;GCN: v_mad_f32
-;GCN: s_endpgm
-define void @test_urem2464(i64 addrspace(1)* %out, i64 %x, i64 %y) {
- %1 = lshr i64 %x, 40
- %2 = lshr i64 %y, 40
- %result = urem i64 %1, %2
- store i64 %result, i64 addrspace(1)* %out
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/uint_to_fp.f64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/uint_to_fp.f64.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/uint_to_fp.f64.ll (original)
+++ llvm/trunk/test/CodeGen/R600/uint_to_fp.f64.ll (removed)
@@ -1,98 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
-
-declare i32 @llvm.r600.read.tidig.x() nounwind readnone
-
-; SI-LABEL: {{^}}v_uint_to_fp_i64_to_f64
-; SI: buffer_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
-; SI: v_cvt_f64_u32_e32 [[HI_CONV:v\[[0-9]+:[0-9]+\]]], v[[HI]]
-; SI: v_ldexp_f64 [[LDEXP:v\[[0-9]+:[0-9]+\]]], [[HI_CONV]], 32
-; SI: v_cvt_f64_u32_e32 [[LO_CONV:v\[[0-9]+:[0-9]+\]]], v[[LO]]
-; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[LDEXP]], [[LO_CONV]]
-; SI: buffer_store_dwordx2 [[RESULT]]
-define void @v_uint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 addrspace(1)* %in) {
- %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
- %val = load i64, i64 addrspace(1)* %gep, align 8
- %result = uitofp i64 %val to double
- store double %result, double addrspace(1)* %out
- ret void
-}
-
-; SI-LABEL: {{^}}s_uint_to_fp_i64_to_f64
-define void @s_uint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 %in) {
- %cast = uitofp i64 %in to double
- store double %cast, double addrspace(1)* %out, align 8
- ret void
-}
-
-; SI-LABEL: {{^}}s_uint_to_fp_v2i64_to_v2f64
-define void @s_uint_to_fp_v2i64_to_v2f64(<2 x double> addrspace(1)* %out, <2 x i64> %in) {
- %cast = uitofp <2 x i64> %in to <2 x double>
- store <2 x double> %cast, <2 x double> addrspace(1)* %out, align 16
- ret void
-}
-
-; SI-LABEL: {{^}}s_uint_to_fp_v4i64_to_v4f64
-define void @s_uint_to_fp_v4i64_to_v4f64(<4 x double> addrspace(1)* %out, <4 x i64> %in) {
- %cast = uitofp <4 x i64> %in to <4 x double>
- store <4 x double> %cast, <4 x double> addrspace(1)* %out, align 16
- ret void
-}
-
-; SI-LABEL: {{^}}s_uint_to_fp_i32_to_f64
-; SI: v_cvt_f64_u32_e32
-; SI: s_endpgm
-define void @s_uint_to_fp_i32_to_f64(double addrspace(1)* %out, i32 %in) {
- %cast = uitofp i32 %in to double
- store double %cast, double addrspace(1)* %out, align 8
- ret void
-}
-
-; SI-LABEL: {{^}}s_uint_to_fp_v2i32_to_v2f64
-; SI: v_cvt_f64_u32_e32
-; SI: v_cvt_f64_u32_e32
-; SI: s_endpgm
-define void @s_uint_to_fp_v2i32_to_v2f64(<2 x double> addrspace(1)* %out, <2 x i32> %in) {
- %cast = uitofp <2 x i32> %in to <2 x double>
- store <2 x double> %cast, <2 x double> addrspace(1)* %out, align 16
- ret void
-}
-
-; SI-LABEL: {{^}}s_uint_to_fp_v4i32_to_v4f64
-; SI: v_cvt_f64_u32_e32
-; SI: v_cvt_f64_u32_e32
-; SI: v_cvt_f64_u32_e32
-; SI: v_cvt_f64_u32_e32
-; SI: s_endpgm
-define void @s_uint_to_fp_v4i32_to_v4f64(<4 x double> addrspace(1)* %out, <4 x i32> %in) {
- %cast = uitofp <4 x i32> %in to <4 x double>
- store <4 x double> %cast, <4 x double> addrspace(1)* %out, align 16
- ret void
-}
-
-; FIXME: select on 0, 0
-; SI-LABEL: {{^}}uint_to_fp_i1_to_f64:
-; SI: v_cmp_eq_i32_e64 [[CMP:s\[[0-9]+:[0-9]\]]],
-; We can't fold the SGPRs into v_cndmask_b32_e64, because it already
-; uses an SGPR for [[CMP]]
-; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, v{{[0-9]+}}, [[CMP]]
-; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 0, [[CMP]]
-; SI: buffer_store_dwordx2
-; SI: s_endpgm
-define void @uint_to_fp_i1_to_f64(double addrspace(1)* %out, i32 %in) {
- %cmp = icmp eq i32 %in, 0
- %fp = uitofp i1 %cmp to double
- store double %fp, double addrspace(1)* %out, align 4
- ret void
-}
-
-; SI-LABEL: {{^}}uint_to_fp_i1_to_f64_load:
-; SI: v_cndmask_b32_e64 [[IRESULT:v[0-9]]], 0, 1
-; SI-NEXT: v_cvt_f64_u32_e32 [[RESULT:v\[[0-9]+:[0-9]\]]], [[IRESULT]]
-; SI: buffer_store_dwordx2 [[RESULT]]
-; SI: s_endpgm
-define void @uint_to_fp_i1_to_f64_load(double addrspace(1)* %out, i1 %in) {
- %fp = uitofp i1 %in to double
- store double %fp, double addrspace(1)* %out, align 8
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/uint_to_fp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/uint_to_fp.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/uint_to_fp.ll (original)
+++ llvm/trunk/test/CodeGen/R600/uint_to_fp.ll (removed)
@@ -1,82 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
-
-; FUNC-LABEL: {{^}}uint_to_fp_i32_to_f32:
-; R600-DAG: UINT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[2].Z
-
-; SI: v_cvt_f32_u32_e32
-; SI: s_endpgm
-define void @uint_to_fp_i32_to_f32(float addrspace(1)* %out, i32 %in) {
- %result = uitofp i32 %in to float
- store float %result, float addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}uint_to_fp_v2i32_to_v2f32:
-; R600-DAG: UINT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[2].W
-; R600-DAG: UINT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[3].X
-
-; SI: v_cvt_f32_u32_e32
-; SI: v_cvt_f32_u32_e32
-; SI: s_endpgm
-define void @uint_to_fp_v2i32_to_v2f32(<2 x float> addrspace(1)* %out, <2 x i32> %in) {
- %result = uitofp <2 x i32> %in to <2 x float>
- store <2 x float> %result, <2 x float> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}uint_to_fp_v4i32_to_v4f32:
-; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-
-; SI: v_cvt_f32_u32_e32
-; SI: v_cvt_f32_u32_e32
-; SI: v_cvt_f32_u32_e32
-; SI: v_cvt_f32_u32_e32
-; SI: s_endpgm
-define void @uint_to_fp_v4i32_to_v4f32(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
- %value = load <4 x i32>, <4 x i32> addrspace(1) * %in
- %result = uitofp <4 x i32> %value to <4 x float>
- store <4 x float> %result, <4 x float> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}uint_to_fp_i64_to_f32:
-; R600: UINT_TO_FLT
-; R600: UINT_TO_FLT
-; R600: MULADD_IEEE
-; SI: v_cvt_f32_u32_e32
-; SI: v_cvt_f32_u32_e32
-; SI: v_madmk_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, 0x4f800000
-; SI: s_endpgm
-define void @uint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 %in) {
-entry:
- %0 = uitofp i64 %in to float
- store float %0, float addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}uint_to_fp_i1_to_f32:
-; SI: v_cmp_eq_i32_e64 [[CMP:s\[[0-9]+:[0-9]\]]],
-; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1.0, [[CMP]]
-; SI: buffer_store_dword [[RESULT]],
-; SI: s_endpgm
-define void @uint_to_fp_i1_to_f32(float addrspace(1)* %out, i32 %in) {
- %cmp = icmp eq i32 %in, 0
- %fp = uitofp i1 %cmp to float
- store float %fp, float addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}uint_to_fp_i1_to_f32_load:
-; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1.0
-; SI: buffer_store_dword [[RESULT]],
-; SI: s_endpgm
-define void @uint_to_fp_i1_to_f32_load(float addrspace(1)* %out, i1 %in) {
- %fp = uitofp i1 %in to float
- store float %fp, float addrspace(1)* %out, align 4
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/unaligned-load-store.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/unaligned-load-store.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/unaligned-load-store.ll (original)
+++ llvm/trunk/test/CodeGen/R600/unaligned-load-store.ll (removed)
@@ -1,254 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
-
-; SI-LABEL: {{^}}unaligned_load_store_i16_local:
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: s_endpgm
-define void @unaligned_load_store_i16_local(i16 addrspace(3)* %p, i16 addrspace(3)* %r) nounwind {
- %v = load i16, i16 addrspace(3)* %p, align 1
- store i16 %v, i16 addrspace(3)* %r, align 1
- ret void
-}
-
-; SI-LABEL: {{^}}unaligned_load_store_i16_global:
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_store_byte
-; SI: buffer_store_byte
-; SI: s_endpgm
-define void @unaligned_load_store_i16_global(i16 addrspace(1)* %p, i16 addrspace(1)* %r) nounwind {
- %v = load i16, i16 addrspace(1)* %p, align 1
- store i16 %v, i16 addrspace(1)* %r, align 1
- ret void
-}
-
-; SI-LABEL: {{^}}unaligned_load_store_i32_local:
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: s_endpgm
-define void @unaligned_load_store_i32_local(i32 addrspace(3)* %p, i32 addrspace(3)* %r) nounwind {
- %v = load i32, i32 addrspace(3)* %p, align 1
- store i32 %v, i32 addrspace(3)* %r, align 1
- ret void
-}
-
-; SI-LABEL: {{^}}unaligned_load_store_i32_global:
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_store_byte
-; SI: buffer_store_byte
-; SI: buffer_store_byte
-; SI: buffer_store_byte
-define void @unaligned_load_store_i32_global(i32 addrspace(1)* %p, i32 addrspace(1)* %r) nounwind {
- %v = load i32, i32 addrspace(1)* %p, align 1
- store i32 %v, i32 addrspace(1)* %r, align 1
- ret void
-}
-
-; SI-LABEL: {{^}}unaligned_load_store_i64_local:
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: s_endpgm
-define void @unaligned_load_store_i64_local(i64 addrspace(3)* %p, i64 addrspace(3)* %r) {
- %v = load i64, i64 addrspace(3)* %p, align 1
- store i64 %v, i64 addrspace(3)* %r, align 1
- ret void
-}
-
-; SI-LABEL: {{^}}unaligned_load_store_i64_global:
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_store_byte
-; SI: buffer_store_byte
-; SI: buffer_store_byte
-; SI: buffer_store_byte
-; SI: buffer_store_byte
-; SI: buffer_store_byte
-; SI: buffer_store_byte
-; SI: buffer_store_byte
-define void @unaligned_load_store_i64_global(i64 addrspace(1)* %p, i64 addrspace(1)* %r) {
- %v = load i64, i64 addrspace(1)* %p, align 1
- store i64 %v, i64 addrspace(1)* %r, align 1
- ret void
-}
-
-; SI-LABEL: {{^}}unaligned_load_store_v4i32_local:
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: s_endpgm
-define void @unaligned_load_store_v4i32_local(<4 x i32> addrspace(3)* %p, <4 x i32> addrspace(3)* %r) nounwind {
- %v = load <4 x i32>, <4 x i32> addrspace(3)* %p, align 1
- store <4 x i32> %v, <4 x i32> addrspace(3)* %r, align 1
- ret void
-}
-
-; FIXME: We mark v4i32 as custom, so misaligned loads are never expanded.
-; FIXME-SI-LABEL: {{^}}unaligned_load_store_v4i32_global
-; FIXME-SI: buffer_load_ubyte
-; FIXME-SI: buffer_load_ubyte
-; FIXME-SI: buffer_load_ubyte
-; FIXME-SI: buffer_load_ubyte
-; FIXME-SI: buffer_load_ubyte
-; FIXME-SI: buffer_load_ubyte
-; FIXME-SI: buffer_load_ubyte
-; FIXME-SI: buffer_load_ubyte
-; FIXME-SI: buffer_load_ubyte
-; FIXME-SI: buffer_load_ubyte
-; FIXME-SI: buffer_load_ubyte
-; FIXME-SI: buffer_load_ubyte
-; FIXME-SI: buffer_load_ubyte
-; FIXME-SI: buffer_load_ubyte
-; FIXME-SI: buffer_load_ubyte
-; FIXME-SI: buffer_load_ubyte
-define void @unaligned_load_store_v4i32_global(<4 x i32> addrspace(1)* %p, <4 x i32> addrspace(1)* %r) nounwind {
- %v = load <4 x i32>, <4 x i32> addrspace(1)* %p, align 1
- store <4 x i32> %v, <4 x i32> addrspace(1)* %r, align 1
- ret void
-}
-
-; SI-LABEL: {{^}}load_lds_i64_align_4:
-; SI: ds_read2_b32
-; SI: s_endpgm
-define void @load_lds_i64_align_4(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
- %val = load i64, i64 addrspace(3)* %in, align 4
- store i64 %val, i64 addrspace(1)* %out, align 8
- ret void
-}
-
-; SI-LABEL: {{^}}load_lds_i64_align_4_with_offset
-; SI: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}} offset0:8 offset1:9
-; SI: s_endpgm
-define void @load_lds_i64_align_4_with_offset(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
- %ptr = getelementptr i64, i64 addrspace(3)* %in, i32 4
- %val = load i64, i64 addrspace(3)* %ptr, align 4
- store i64 %val, i64 addrspace(1)* %out, align 8
- ret void
-}
-
-; SI-LABEL: {{^}}load_lds_i64_align_4_with_split_offset:
-; The tests for the case where the lo offset is 8-bits, but the hi offset is 9-bits
-; SI: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}} offset1:1
-; SI: s_endpgm
-define void @load_lds_i64_align_4_with_split_offset(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
- %ptr = bitcast i64 addrspace(3)* %in to i32 addrspace(3)*
- %ptr255 = getelementptr i32, i32 addrspace(3)* %ptr, i32 255
- %ptri64 = bitcast i32 addrspace(3)* %ptr255 to i64 addrspace(3)*
- %val = load i64, i64 addrspace(3)* %ptri64, align 4
- store i64 %val, i64 addrspace(1)* %out, align 8
- ret void
-}
-
-; SI-LABEL: {{^}}load_lds_i64_align_1:
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: buffer_store_dwordx2
-; SI: s_endpgm
-
-define void @load_lds_i64_align_1(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
- %val = load i64, i64 addrspace(3)* %in, align 1
- store i64 %val, i64 addrspace(1)* %out, align 8
- ret void
-}
-
-; SI-LABEL: {{^}}store_lds_i64_align_4:
-; SI: ds_write2_b32
-; SI: s_endpgm
-define void @store_lds_i64_align_4(i64 addrspace(3)* %out, i64 %val) #0 {
- store i64 %val, i64 addrspace(3)* %out, align 4
- ret void
-}
-
-; SI-LABEL: {{^}}store_lds_i64_align_4_with_offset
-; SI: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:8 offset1:9
-; SI: s_endpgm
-define void @store_lds_i64_align_4_with_offset(i64 addrspace(3)* %out) #0 {
- %ptr = getelementptr i64, i64 addrspace(3)* %out, i32 4
- store i64 0, i64 addrspace(3)* %ptr, align 4
- ret void
-}
-
-; SI-LABEL: {{^}}store_lds_i64_align_4_with_split_offset:
-; The tests for the case where the lo offset is 8-bits, but the hi offset is 9-bits
-; SI: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1
-; SI: s_endpgm
-define void @store_lds_i64_align_4_with_split_offset(i64 addrspace(3)* %out) #0 {
- %ptr = bitcast i64 addrspace(3)* %out to i32 addrspace(3)*
- %ptr255 = getelementptr i32, i32 addrspace(3)* %ptr, i32 255
- %ptri64 = bitcast i32 addrspace(3)* %ptr255 to i64 addrspace(3)*
- store i64 0, i64 addrspace(3)* %out, align 4
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/unhandled-loop-condition-assertion.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/unhandled-loop-condition-assertion.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/unhandled-loop-condition-assertion.ll (original)
+++ llvm/trunk/test/CodeGen/R600/unhandled-loop-condition-assertion.ll (removed)
@@ -1,115 +0,0 @@
-; REQUIRES: asserts
-; XFAIL: *
-; RUN: llc -O0 -verify-machineinstrs -asm-verbose=0 -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=COMMON %s
-; RUN: llc -O0 -verify-machineinstrs -asm-verbose=0 -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=COMMON %s
-; RUN: llc -O0 -verify-machineinstrs -asm-verbose=0 -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=COMMON %s
-
-; SI hits an assertion at -O0, evergreen hits a not implemented unreachable.
-
-; COMMON-LABEL: {{^}}branch_true:
-define void @branch_true(i8 addrspace(1)* nocapture %main, i32 %main_stride) #0 {
-entry:
- br i1 true, label %for.end, label %for.body.lr.ph
-
-for.body.lr.ph: ; preds = %entry
- %add.ptr.sum = shl i32 %main_stride, 1
- %add.ptr1.sum = add i32 %add.ptr.sum, %main_stride
- %add.ptr4.sum = shl i32 %main_stride, 2
- br label %for.body
-
-for.body: ; preds = %for.body, %for.body.lr.ph
- %main.addr.011 = phi i8 addrspace(1)* [ %main, %for.body.lr.ph ], [ %add.ptr6, %for.body ]
- %0 = bitcast i8 addrspace(1)* %main.addr.011 to i32 addrspace(1)*
- %1 = load i32, i32 addrspace(1)* %0, align 4
- %add.ptr = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 %main_stride
- %2 = bitcast i8 addrspace(1)* %add.ptr to i32 addrspace(1)*
- %3 = load i32, i32 addrspace(1)* %2, align 4
- %add.ptr1 = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 %add.ptr.sum
- %4 = bitcast i8 addrspace(1)* %add.ptr1 to i32 addrspace(1)*
- %5 = load i32, i32 addrspace(1)* %4, align 4
- %add.ptr2 = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 %add.ptr1.sum
- %6 = bitcast i8 addrspace(1)* %add.ptr2 to i32 addrspace(1)*
- %7 = load i32, i32 addrspace(1)* %6, align 4
- %add.ptr3 = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 %add.ptr4.sum
- %8 = bitcast i8 addrspace(1)* %add.ptr3 to i32 addrspace(1)*
- %9 = load i32, i32 addrspace(1)* %8, align 4
- %add.ptr6 = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 undef
- br i1 undef, label %for.end, label %for.body
-
-for.end: ; preds = %for.body, %entry
- ret void
-}
-
-; COMMON-LABEL: {{^}}branch_false:
-; SI: .text
-; SI-NEXT: s_endpgm
-define void @branch_false(i8 addrspace(1)* nocapture %main, i32 %main_stride) #0 {
-entry:
- br i1 false, label %for.end, label %for.body.lr.ph
-
-for.body.lr.ph: ; preds = %entry
- %add.ptr.sum = shl i32 %main_stride, 1
- %add.ptr1.sum = add i32 %add.ptr.sum, %main_stride
- %add.ptr4.sum = shl i32 %main_stride, 2
- br label %for.body
-
-for.body: ; preds = %for.body, %for.body.lr.ph
- %main.addr.011 = phi i8 addrspace(1)* [ %main, %for.body.lr.ph ], [ %add.ptr6, %for.body ]
- %0 = bitcast i8 addrspace(1)* %main.addr.011 to i32 addrspace(1)*
- %1 = load i32, i32 addrspace(1)* %0, align 4
- %add.ptr = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 %main_stride
- %2 = bitcast i8 addrspace(1)* %add.ptr to i32 addrspace(1)*
- %3 = load i32, i32 addrspace(1)* %2, align 4
- %add.ptr1 = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 %add.ptr.sum
- %4 = bitcast i8 addrspace(1)* %add.ptr1 to i32 addrspace(1)*
- %5 = load i32, i32 addrspace(1)* %4, align 4
- %add.ptr2 = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 %add.ptr1.sum
- %6 = bitcast i8 addrspace(1)* %add.ptr2 to i32 addrspace(1)*
- %7 = load i32, i32 addrspace(1)* %6, align 4
- %add.ptr3 = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 %add.ptr4.sum
- %8 = bitcast i8 addrspace(1)* %add.ptr3 to i32 addrspace(1)*
- %9 = load i32, i32 addrspace(1)* %8, align 4
- %add.ptr6 = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 undef
- br i1 undef, label %for.end, label %for.body
-
-for.end: ; preds = %for.body, %entry
- ret void
-}
-
-; COMMON-LABEL: {{^}}branch_undef:
-; SI: .text
-; SI-NEXT: s_endpgm
-define void @branch_undef(i8 addrspace(1)* nocapture %main, i32 %main_stride) #0 {
-entry:
- br i1 undef, label %for.end, label %for.body.lr.ph
-
-for.body.lr.ph: ; preds = %entry
- %add.ptr.sum = shl i32 %main_stride, 1
- %add.ptr1.sum = add i32 %add.ptr.sum, %main_stride
- %add.ptr4.sum = shl i32 %main_stride, 2
- br label %for.body
-
-for.body: ; preds = %for.body, %for.body.lr.ph
- %main.addr.011 = phi i8 addrspace(1)* [ %main, %for.body.lr.ph ], [ %add.ptr6, %for.body ]
- %0 = bitcast i8 addrspace(1)* %main.addr.011 to i32 addrspace(1)*
- %1 = load i32, i32 addrspace(1)* %0, align 4
- %add.ptr = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 %main_stride
- %2 = bitcast i8 addrspace(1)* %add.ptr to i32 addrspace(1)*
- %3 = load i32, i32 addrspace(1)* %2, align 4
- %add.ptr1 = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 %add.ptr.sum
- %4 = bitcast i8 addrspace(1)* %add.ptr1 to i32 addrspace(1)*
- %5 = load i32, i32 addrspace(1)* %4, align 4
- %add.ptr2 = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 %add.ptr1.sum
- %6 = bitcast i8 addrspace(1)* %add.ptr2 to i32 addrspace(1)*
- %7 = load i32, i32 addrspace(1)* %6, align 4
- %add.ptr3 = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 %add.ptr4.sum
- %8 = bitcast i8 addrspace(1)* %add.ptr3 to i32 addrspace(1)*
- %9 = load i32, i32 addrspace(1)* %8, align 4
- %add.ptr6 = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 undef
- br i1 undef, label %for.end, label %for.body
-
-for.end: ; preds = %for.body, %entry
- ret void
-}
-
-attributes #0 = { nounwind }
Removed: llvm/trunk/test/CodeGen/R600/unroll.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/unroll.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/unroll.ll (original)
+++ llvm/trunk/test/CodeGen/R600/unroll.ll (removed)
@@ -1,36 +0,0 @@
-; RUN: opt -mtriple=amdgcn-- -loop-unroll -simplifycfg -sroa %s -S -o - | FileCheck %s
-; RUN: opt -mtriple=r600-- -loop-unroll -simplifycfg -sroa %s -S -o - | FileCheck %s
-
-
-; This test contains a simple loop that initializes an array declared in
-; private memory. We want to make sure these kinds of loops are always
-; unrolled, because private memory is slow.
-
-; CHECK-LABEL: @test
-; CHECK-NOT: alloca
-; CHECK: store i32 5, i32 addrspace(1)* %out
-define void @test(i32 addrspace(1)* %out) {
-entry:
- %0 = alloca [32 x i32]
- br label %loop.header
-
-loop.header:
- %counter = phi i32 [0, %entry], [%inc, %loop.inc]
- br label %loop.body
-
-loop.body:
- %ptr = getelementptr [32 x i32], [32 x i32]* %0, i32 0, i32 %counter
- store i32 %counter, i32* %ptr
- br label %loop.inc
-
-loop.inc:
- %inc = add i32 %counter, 1
- %1 = icmp sge i32 %counter, 32
- br i1 %1, label %exit, label %loop.header
-
-exit:
- %2 = getelementptr [32 x i32], [32 x i32]* %0, i32 0, i32 5
- %3 = load i32, i32* %2
- store i32 %3, i32 addrspace(1)* %out
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/unsupported-cc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/unsupported-cc.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/unsupported-cc.ll (original)
+++ llvm/trunk/test/CodeGen/R600/unsupported-cc.ll (removed)
@@ -1,125 +0,0 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-
-; These tests are for condition codes that are not supported by the hardware
-
-; CHECK-LABEL: {{^}}slt:
-; CHECK: SETGT_INT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
-; CHECK-NEXT: LSHR
-; CHECK-NEXT: 5(7.006492e-45)
-define void @slt(i32 addrspace(1)* %out, i32 %in) {
-entry:
- %0 = icmp slt i32 %in, 5
- %1 = select i1 %0, i32 -1, i32 0
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; CHECK-LABEL: {{^}}ult_i32:
-; CHECK: SETGT_UINT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
-; CHECK-NEXT: LSHR
-; CHECK-NEXT: 5(7.006492e-45)
-define void @ult_i32(i32 addrspace(1)* %out, i32 %in) {
-entry:
- %0 = icmp ult i32 %in, 5
- %1 = select i1 %0, i32 -1, i32 0
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; CHECK-LABEL: {{^}}ult_float:
-; CHECK: SETGE * T{{[0-9]}}.[[CHAN:[XYZW]]], KC0[2].Z, literal.x
-; CHECK-NEXT: 1084227584(5.000000e+00)
-; CHECK-NEXT: SETE T{{[0-9]\.[XYZW]}}, PV.[[CHAN]], 0.0
-; CHECK-NEXT: LSHR *
-define void @ult_float(float addrspace(1)* %out, float %in) {
-entry:
- %0 = fcmp ult float %in, 5.0
- %1 = select i1 %0, float 1.0, float 0.0
- store float %1, float addrspace(1)* %out
- ret void
-}
-
-; CHECK-LABEL: {{^}}ult_float_native:
-; CHECK: SETGE T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x
-; CHECK-NEXT: LSHR *
-; CHECK-NEXT: 1084227584(5.000000e+00)
-define void @ult_float_native(float addrspace(1)* %out, float %in) {
-entry:
- %0 = fcmp ult float %in, 5.0
- %1 = select i1 %0, float 0.0, float 1.0
- store float %1, float addrspace(1)* %out
- ret void
-}
-
-; CHECK-LABEL: {{^}}olt:
-; CHECK: SETGT T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
-; CHECK-NEXT: LSHR *
-; CHECK-NEXT: 1084227584(5.000000e+00)
-define void @olt(float addrspace(1)* %out, float %in) {
-entry:
- %0 = fcmp olt float %in, 5.0
- %1 = select i1 %0, float 1.0, float 0.0
- store float %1, float addrspace(1)* %out
- ret void
-}
-
-; CHECK-LABEL: {{^}}sle:
-; CHECK: SETGT_INT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
-; CHECK-NEXT: LSHR
-; CHECK-NEXT: 6(8.407791e-45)
-define void @sle(i32 addrspace(1)* %out, i32 %in) {
-entry:
- %0 = icmp sle i32 %in, 5
- %1 = select i1 %0, i32 -1, i32 0
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; CHECK-LABEL: {{^}}ule_i32:
-; CHECK: SETGT_UINT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
-; CHECK-NEXT: LSHR
-; CHECK-NEXT: 6(8.407791e-45)
-define void @ule_i32(i32 addrspace(1)* %out, i32 %in) {
-entry:
- %0 = icmp ule i32 %in, 5
- %1 = select i1 %0, i32 -1, i32 0
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; CHECK-LABEL: {{^}}ule_float:
-; CHECK: SETGT * T{{[0-9]}}.[[CHAN:[XYZW]]], KC0[2].Z, literal.x
-; CHECK-NEXT: 1084227584(5.000000e+00)
-; CHECK-NEXT: SETE T{{[0-9]\.[XYZW]}}, PV.[[CHAN]], 0.0
-; CHECK-NEXT: LSHR *
-define void @ule_float(float addrspace(1)* %out, float %in) {
-entry:
- %0 = fcmp ule float %in, 5.0
- %1 = select i1 %0, float 1.0, float 0.0
- store float %1, float addrspace(1)* %out
- ret void
-}
-
-; CHECK-LABEL: {{^}}ule_float_native:
-; CHECK: SETGT T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x
-; CHECK-NEXT: LSHR *
-; CHECK-NEXT: 1084227584(5.000000e+00)
-define void @ule_float_native(float addrspace(1)* %out, float %in) {
-entry:
- %0 = fcmp ule float %in, 5.0
- %1 = select i1 %0, float 0.0, float 1.0
- store float %1, float addrspace(1)* %out
- ret void
-}
-
-; CHECK-LABEL: {{^}}ole:
-; CHECK: SETGE T{{[0-9]\.[XYZW]}}, literal.x, KC0[2].Z
-; CHECK-NEXT: LSHR *
-; CHECK-NEXT:1084227584(5.000000e+00)
-define void @ole(float addrspace(1)* %out, float %in) {
-entry:
- %0 = fcmp ole float %in, 5.0
- %1 = select i1 %0, float 1.0, float 0.0
- store float %1, float addrspace(1)* %out
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/urecip.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/urecip.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/urecip.ll (original)
+++ llvm/trunk/test/CodeGen/R600/urecip.ll (removed)
@@ -1,13 +0,0 @@
-;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
-;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
-
-;CHECK: v_rcp_iflag_f32_e32
-
-define void @test(i32 %p, i32 %q) {
- %i = udiv i32 %p, %q
- %r = bitcast i32 %i to float
- call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %r, float %r, float %r, float %r)
- ret void
-}
-
-declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
Removed: llvm/trunk/test/CodeGen/R600/urem.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/urem.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/urem.ll (original)
+++ llvm/trunk/test/CodeGen/R600/urem.ll (removed)
@@ -1,94 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-
-; The code generated by urem is long and complex and may frequently
-; change. The goal of this test is to make sure the ISel doesn't fail
-; when it gets a v2i32/v4i32 urem
-
-; FUNC-LABEL: {{^}}test_urem_i32:
-; SI: s_endpgm
-; EG: CF_END
-define void @test_urem_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
- %a = load i32, i32 addrspace(1)* %in
- %b = load i32, i32 addrspace(1)* %b_ptr
- %result = urem i32 %a, %b
- store i32 %result, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}test_urem_i32_7:
-; SI: v_mov_b32_e32 [[MAGIC:v[0-9]+]], 0x24924925
-; SI: v_mul_hi_u32 {{v[0-9]+}}, [[MAGIC]]
-; SI: v_subrev_i32
-; SI: v_mul_lo_i32
-; SI: v_sub_i32
-; SI: buffer_store_dword
-; SI: s_endpgm
-define void @test_urem_i32_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %num = load i32, i32 addrspace(1) * %in
- %result = urem i32 %num, 7
- store i32 %result, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}test_urem_v2i32:
-; SI: s_endpgm
-; EG: CF_END
-define void @test_urem_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
- %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
- %a = load <2 x i32>, <2 x i32> addrspace(1)* %in
- %b = load <2 x i32>, <2 x i32> addrspace(1)* %b_ptr
- %result = urem <2 x i32> %a, %b
- store <2 x i32> %result, <2 x i32> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}test_urem_v4i32:
-; SI: s_endpgm
-; EG: CF_END
-define void @test_urem_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
- %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
- %a = load <4 x i32>, <4 x i32> addrspace(1)* %in
- %b = load <4 x i32>, <4 x i32> addrspace(1)* %b_ptr
- %result = urem <4 x i32> %a, %b
- store <4 x i32> %result, <4 x i32> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}test_urem_i64:
-; SI: s_endpgm
-; EG: CF_END
-define void @test_urem_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
- %b_ptr = getelementptr i64, i64 addrspace(1)* %in, i64 1
- %a = load i64, i64 addrspace(1)* %in
- %b = load i64, i64 addrspace(1)* %b_ptr
- %result = urem i64 %a, %b
- store i64 %result, i64 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}test_urem_v2i64:
-; SI: s_endpgm
-; EG: CF_END
-define void @test_urem_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
- %b_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %in, i64 1
- %a = load <2 x i64>, <2 x i64> addrspace(1)* %in
- %b = load <2 x i64>, <2 x i64> addrspace(1)* %b_ptr
- %result = urem <2 x i64> %a, %b
- store <2 x i64> %result, <2 x i64> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}test_urem_v4i64:
-; SI: s_endpgm
-; EG: CF_END
-define void @test_urem_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
- %b_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i64 1
- %a = load <4 x i64>, <4 x i64> addrspace(1)* %in
- %b = load <4 x i64>, <4 x i64> addrspace(1)* %b_ptr
- %result = urem <4 x i64> %a, %b
- store <4 x i64> %result, <4 x i64> addrspace(1)* %out
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/use-sgpr-multiple-times.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/use-sgpr-multiple-times.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/use-sgpr-multiple-times.ll (original)
+++ llvm/trunk/test/CodeGen/R600/use-sgpr-multiple-times.ll (removed)
@@ -1,103 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s
-
-declare float @llvm.fma.f32(float, float, float) #1
-declare float @llvm.fmuladd.f32(float, float, float) #1
-declare i32 @llvm.AMDGPU.imad24(i32, i32, i32) #1
-
-
-; GCN-LABEL: {{^}}test_sgpr_use_twice_binop:
-; GCN: s_load_dword [[SGPR:s[0-9]+]],
-; GCN: v_add_f32_e64 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]]
-; GCN: buffer_store_dword [[RESULT]]
-define void @test_sgpr_use_twice_binop(float addrspace(1)* %out, float %a) #0 {
- %dbl = fadd float %a, %a
- store float %dbl, float addrspace(1)* %out, align 4
- ret void
-}
-
-; GCN-LABEL: {{^}}test_sgpr_use_three_ternary_op:
-; GCN: s_load_dword [[SGPR:s[0-9]+]],
-; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], [[SGPR]]
-; GCN: buffer_store_dword [[RESULT]]
-define void @test_sgpr_use_three_ternary_op(float addrspace(1)* %out, float %a) #0 {
- %fma = call float @llvm.fma.f32(float %a, float %a, float %a) #1
- store float %fma, float addrspace(1)* %out, align 4
- ret void
-}
-
-; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_a_b:
-; SI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; SI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
-; VI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
-; VI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30
-; GCN: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
-; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR0]], [[SGPR0]], [[VGPR1]]
-; GCN: buffer_store_dword [[RESULT]]
-define void @test_sgpr_use_twice_ternary_op_a_a_b(float addrspace(1)* %out, float %a, float %b) #0 {
- %fma = call float @llvm.fma.f32(float %a, float %a, float %b) #1
- store float %fma, float addrspace(1)* %out, align 4
- ret void
-}
-
-; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_b_a:
-; SI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; SI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
-; VI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
-; VI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30
-; GCN: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
-; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[VGPR1]], [[SGPR0]], [[SGPR0]]
-; GCN: buffer_store_dword [[RESULT]]
-define void @test_sgpr_use_twice_ternary_op_a_b_a(float addrspace(1)* %out, float %a, float %b) #0 {
- %fma = call float @llvm.fma.f32(float %a, float %b, float %a) #1
- store float %fma, float addrspace(1)* %out, align 4
- ret void
-}
-
-; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_b_a_a:
-; SI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; SI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
-; VI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
-; VI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30
-; GCN: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
-; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR0]], [[VGPR1]], [[SGPR0]]
-; GCN: buffer_store_dword [[RESULT]]
-define void @test_sgpr_use_twice_ternary_op_b_a_a(float addrspace(1)* %out, float %a, float %b) #0 {
- %fma = call float @llvm.fma.f32(float %b, float %a, float %a) #1
- store float %fma, float addrspace(1)* %out, align 4
- ret void
-}
-
-; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_a_imm:
-; GCN: s_load_dword [[SGPR:s[0-9]+]]
-; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], 2.0
-; GCN: buffer_store_dword [[RESULT]]
-define void @test_sgpr_use_twice_ternary_op_a_a_imm(float addrspace(1)* %out, float %a) #0 {
- %fma = call float @llvm.fma.f32(float %a, float %a, float 2.0) #1
- store float %fma, float addrspace(1)* %out, align 4
- ret void
-}
-
-; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_imm_a:
-; GCN: s_load_dword [[SGPR:s[0-9]+]]
-; GCN: v_fma_f32 [[RESULT:v[0-9]+]], 2.0, [[SGPR]], [[SGPR]]
-; GCN: buffer_store_dword [[RESULT]]
-define void @test_sgpr_use_twice_ternary_op_a_imm_a(float addrspace(1)* %out, float %a) #0 {
- %fma = call float @llvm.fma.f32(float %a, float 2.0, float %a) #1
- store float %fma, float addrspace(1)* %out, align 4
- ret void
-}
-
-; Don't use fma since fma c, x, y is canonicalized to fma x, c, y
-; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_imm_a_a:
-; GCN: s_load_dword [[SGPR:s[0-9]+]]
-; GCN: v_mad_i32_i24 [[RESULT:v[0-9]+]], 2, [[SGPR]], [[SGPR]]
-; GCN: buffer_store_dword [[RESULT]]
-define void @test_sgpr_use_twice_ternary_op_imm_a_a(i32 addrspace(1)* %out, i32 %a) #0 {
- %fma = call i32 @llvm.AMDGPU.imad24(i32 2, i32 %a, i32 %a) #1
- store i32 %fma, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
Removed: llvm/trunk/test/CodeGen/R600/usubo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/usubo.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/usubo.ll (original)
+++ llvm/trunk/test/CodeGen/R600/usubo.ll (removed)
@@ -1,86 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs< %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-
-declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) nounwind readnone
-declare { i64, i1 } @llvm.usub.with.overflow.i64(i64, i64) nounwind readnone
-
-; FUNC-LABEL: {{^}}usubo_i64_zext:
-
-; EG: SUBB_UINT
-; EG: ADDC_UINT
-define void @usubo_i64_zext(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
- %usub = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %a, i64 %b) nounwind
- %val = extractvalue { i64, i1 } %usub, 0
- %carry = extractvalue { i64, i1 } %usub, 1
- %ext = zext i1 %carry to i64
- %add2 = add i64 %val, %ext
- store i64 %add2, i64 addrspace(1)* %out, align 8
- ret void
-}
-
-; FUNC-LABEL: {{^}}s_usubo_i32:
-; SI: s_sub_i32
-
-; EG-DAG: SUBB_UINT
-; EG-DAG: SUB_INT
-define void @s_usubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 %a, i32 %b) nounwind {
- %usub = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %a, i32 %b) nounwind
- %val = extractvalue { i32, i1 } %usub, 0
- %carry = extractvalue { i32, i1 } %usub, 1
- store i32 %val, i32 addrspace(1)* %out, align 4
- store i1 %carry, i1 addrspace(1)* %carryout
- ret void
-}
-
-; FUNC-LABEL: {{^}}v_usubo_i32:
-; SI: v_subrev_i32_e32
-
-; EG-DAG: SUBB_UINT
-; EG-DAG: SUB_INT
-define void @v_usubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
- %a = load i32, i32 addrspace(1)* %aptr, align 4
- %b = load i32, i32 addrspace(1)* %bptr, align 4
- %usub = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %a, i32 %b) nounwind
- %val = extractvalue { i32, i1 } %usub, 0
- %carry = extractvalue { i32, i1 } %usub, 1
- store i32 %val, i32 addrspace(1)* %out, align 4
- store i1 %carry, i1 addrspace(1)* %carryout
- ret void
-}
-
-; FUNC-LABEL: {{^}}s_usubo_i64:
-; SI: s_sub_u32
-; SI: s_subb_u32
-
-; EG-DAG: SUBB_UINT
-; EG-DAG: SUB_INT
-; EG-DAG: SUB_INT
-; EG: SUB_INT
-define void @s_usubo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 %a, i64 %b) nounwind {
- %usub = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %a, i64 %b) nounwind
- %val = extractvalue { i64, i1 } %usub, 0
- %carry = extractvalue { i64, i1 } %usub, 1
- store i64 %val, i64 addrspace(1)* %out, align 8
- store i1 %carry, i1 addrspace(1)* %carryout
- ret void
-}
-
-; FUNC-LABEL: {{^}}v_usubo_i64:
-; SI: v_sub_i32
-; SI: v_subb_u32
-
-; EG-DAG: SUBB_UINT
-; EG-DAG: SUB_INT
-; EG-DAG: SUB_INT
-; EG: SUB_INT
-define void @v_usubo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
- %a = load i64, i64 addrspace(1)* %aptr, align 4
- %b = load i64, i64 addrspace(1)* %bptr, align 4
- %usub = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %a, i64 %b) nounwind
- %val = extractvalue { i64, i1 } %usub, 0
- %carry = extractvalue { i64, i1 } %usub, 1
- store i64 %val, i64 addrspace(1)* %out, align 8
- store i1 %carry, i1 addrspace(1)* %carryout
- ret void
-}
Removed: llvm/trunk/test/CodeGen/R600/v1i64-kernel-arg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/v1i64-kernel-arg.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/v1i64-kernel-arg.ll (original)
+++ llvm/trunk/test/CodeGen/R600/v1i64-kernel-arg.ll (removed)
@@ -1,17 +0,0 @@
-; REQUIRES: asserts
-; XFAIL: *
-; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck %s
-
-; CHECK-LABEL: {{^}}kernel_arg_i64:
-define void @kernel_arg_i64(i64 addrspace(1)* %out, i64 %a) nounwind {
- store i64 %a, i64 addrspace(1)* %out, align 8
- ret void
-}
-
-; i64 arg works, v1i64 arg does not.
-; CHECK-LABEL: {{^}}kernel_arg_v1i64:
-define void @kernel_arg_v1i64(<1 x i64> addrspace(1)* %out, <1 x i64> %a) nounwind {
- store <1 x i64> %a, <1 x i64> addrspace(1)* %out, align 8
- ret void
-}
-
Removed: llvm/trunk/test/CodeGen/R600/v_cndmask.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/v_cndmask.ll?rev=239656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/v_cndmask.ll (original)
+++ llvm/trunk/test/CodeGen/R600/v_cndmask.ll (removed)
@@ -1,39 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
-
-declare i32 @llvm.r600.read.tidig.x() #1
-
-; SI-LABEL: {{^}}v_cnd_nan_nosgpr:
-; SI: v_cndmask_b32_e64 v{{[0-9]}}, v{{[0-9]}}, -1, s{{\[[0-9]+:[0-9]+\]}}
-; SI-DAG: v{{[0-9]}}
-; All nan values are converted to 0xffffffff
-; SI: s_endpgm
-define void @v_cnd_nan_nosgpr(float addrspace(1)* %out, i32 %c, float addrspace(1)* %fptr) #0 {
- %idx = call i32 @llvm.r600.read.tidig.x() #1
- %f.gep = getelementptr float, float addrspace(1)* %fptr, i32 %idx
- %f = load float, float addrspace(1)* %fptr
- %setcc = icmp ne i32 %c, 0
- %select = select i1 %setcc, float 0xFFFFFFFFE0000000, float %f
- store float %select, float addrspace(1)* %out
- ret void
-}
-
-
-; This requires slightly trickier SGPR operand legalization since the
-; single constant bus SGPR usage is the last operand, and it should
-; never be moved.
-
-; SI-LABEL: {{^}}v_cnd_nan:
-; SI: v_cndmask_b32_e64 v{{[0-9]}}, v{{[0-9]}}, -1, s{{\[[0-9]+:[0-9]+\]}}
-; SI-DAG: v{{[0-9]}}
-; All nan values are converted to 0xffffffff
-; SI: s_endpgm
-define void @v_cnd_nan(float addrspace(1)* %out, i32 %c, float %f) #0 {
- %setcc = icmp ne i32 %c, 0
- %select = select i1 %setcc, float 0xFFFFFFFFE0000000, float %f
- store float %select, float addrspace(1)* %out
- ret void
-}
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
More information about the llvm-commits
mailing list