[llvm] r291717 - AMDGPU: Fix sext_inreg for i1 in i16

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 11 14:35:22 PST 2017


Author: arsenm
Date: Wed Jan 11 16:35:22 2017
New Revision: 291717

URL: http://llvm.org/viewvc/llvm-project?rev=291717&view=rev
Log:
AMDGPU: Fix sext_inreg for i1 in i16

This produces worse code when i16 is legal, mostly
due to combines getting confused by conversions inserted
for uniform 16-bit operations.

Modified:
    llvm/trunk/lib/Target/AMDGPU/SIInstructions.td
    llvm/trunk/test/CodeGen/AMDGPU/sext-in-reg.ll

Modified: llvm/trunk/lib/Target/AMDGPU/SIInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstructions.td?rev=291717&r1=291716&r2=291717&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstructions.td Wed Jan 11 16:35:22 2017
@@ -871,6 +871,11 @@ def : Pat <
 >;
 
 def : Pat <
+  (i16 (sext_inreg i16:$src, i1)),
+  (S_BFE_I32 $src, (i32 0x00010000)) // 0 | 1 << 16
+>;
+
+def : Pat <
   (i16 (sext_inreg i16:$src, i8)),
   (S_BFE_I32 $src, (i32 0x80000)) // 0 | 8 << 16
 >;

Modified: llvm/trunk/test/CodeGen/AMDGPU/sext-in-reg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sext-in-reg.ll?rev=291717&r1=291716&r2=291717&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sext-in-reg.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sext-in-reg.ll Wed Jan 11 16:35:22 2017
@@ -2,6 +2,8 @@
 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s
 ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
 
+; FIXME: i16 promotion pass ruins the scalar cases when legal.
+
 ; FUNC-LABEL: {{^}}sext_in_reg_i1_i32:
 ; GCN: s_load_dword [[ARG:s[0-9]+]],
 ; GCN: s_bfe_i32 [[SEXTRACT:s[0-9]+]], [[ARG]], 0x10000
@@ -659,6 +661,137 @@ define void @v_sext_in_reg_i32_to_i64_mo
   ret void
 }
 
+; FUNC-LABEL: {{^}}s_sext_in_reg_i1_i16:
+; GCN: s_load_dword [[VAL:s[0-9]+]]
+
+; SI: s_bfe_i32 [[BFE:s[0-9]+]], [[VAL]], 0x10000
+; SI: v_mov_b32_e32 [[VBFE:v[0-9]+]], [[BFE]]
+; SI: buffer_store_short [[VBFE]]
+
+; VI: s_lshl_b32 s{{[0-9]+}}, s{{[0-9]+}}, 15
+; VI: s_sext_i32_i16 s{{[0-9]+}}, s{{[0-9]+}}
+; VI: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 15
+define void @s_sext_in_reg_i1_i16(i16 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 {
+  %ld = load i32, i32 addrspace(2)* %ptr
+  %in = trunc i32 %ld to i16
+  %shl = shl i16 %in, 15
+  %sext = ashr i16 %shl, 15
+  store i16 %sext, i16 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}s_sext_in_reg_i2_i16:
+; GCN: s_load_dword [[VAL:s[0-9]+]]
+
+; SI: s_bfe_i32 [[BFE:s[0-9]+]], [[VAL]], 0x20000
+; SI: v_mov_b32_e32 [[VBFE:v[0-9]+]], [[BFE]]
+; SI: buffer_store_short [[VBFE]]
+
+; VI: s_lshl_b32 s{{[0-9]+}}, s{{[0-9]+}}, 14
+; VI: s_sext_i32_i16 s{{[0-9]+}}, s{{[0-9]+}}
+; VI: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 14
+define void @s_sext_in_reg_i2_i16(i16 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 {
+  %ld = load i32, i32 addrspace(2)* %ptr
+  %in = trunc i32 %ld to i16
+  %shl = shl i16 %in, 14
+  %sext = ashr i16 %shl, 14
+  store i16 %sext, i16 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}v_sext_in_reg_i1_i16:
+; GCN: {{buffer|flat}}_load_ushort [[VAL:v[0-9]+]]
+; GCN: v_bfe_i32 [[BFE:v[0-9]+]], [[VAL]], 0, 1{{$}}
+
+; GCN: ds_write_b16 v{{[0-9]+}}, [[BFE]]
+define void @v_sext_in_reg_i1_i16(i16 addrspace(3)* %out, i16 addrspace(1)* %ptr) #0 {
+  %tid = call i32 @llvm.r600.read.tidig.x()
+  %gep = getelementptr i16, i16 addrspace(1)* %ptr, i32 %tid
+  %out.gep = getelementptr i16, i16 addrspace(3)* %out, i32 %tid
+
+  %in = load i16, i16 addrspace(1)* %gep
+  %shl = shl i16 %in, 15
+  %sext = ashr i16 %shl, 15
+  store i16 %sext, i16 addrspace(3)* %out.gep
+  ret void
+}
+
+; FUNC-LABEL: {{^}}v_sext_in_reg_i1_i16_nonload:
+; GCN: {{buffer|flat}}_load_ushort [[VAL0:v[0-9]+]]
+; GCN: {{buffer|flat}}_load_ushort [[VAL1:v[0-9]+]]
+
+; SI: v_lshlrev_b32_e32 [[REG:v[0-9]+]], [[VAL1]], [[VAL0]]
+; VI: v_lshlrev_b16_e32 [[REG:v[0-9]+]], [[VAL1]], [[VAL0]]
+
+; GCN: v_bfe_i32 [[BFE:v[0-9]+]], [[REG]], 0, 1{{$}}
+; GCN: ds_write_b16 v{{[0-9]+}}, [[BFE]]
+define void @v_sext_in_reg_i1_i16_nonload(i16 addrspace(3)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr, i16 %s.val) nounwind {
+  %tid = call i32 @llvm.r600.read.tidig.x()
+  %a.gep = getelementptr i16, i16 addrspace(1)* %aptr, i32 %tid
+  %b.gep = getelementptr i16, i16 addrspace(1)* %bptr, i32 %tid
+  %out.gep = getelementptr i16, i16 addrspace(3)* %out, i32 %tid
+  %a = load volatile i16, i16 addrspace(1)* %a.gep, align 2
+  %b = load volatile i16, i16 addrspace(1)* %b.gep, align 2
+
+  %c = shl i16 %a, %b
+  %shl = shl i16 %c, 15
+  %ashr = ashr i16 %shl, 15
+
+  store i16 %ashr, i16 addrspace(3)* %out.gep, align 2
+  ret void
+}
+
+; FUNC-LABEL: {{^}}s_sext_in_reg_i2_i16_arg:
+; GCN: s_load_dword [[VAL:s[0-9]+]]
+
+; SI: s_bfe_i32 [[BFE:s[0-9]+]], [[VAL]], 0x20000
+; SI: v_mov_b32_e32 [[VBFE:v[0-9]+]], [[BFE]]
+; SI: buffer_store_short [[VBFE]]
+
+; VI: s_lshl_b32 s{{[0-9]+}}, s{{[0-9]+}}, 14{{$}}
+; VI: s_sext_i32_i16 s{{[0-9]+}}, s{{[0-9]+}}
+; VI: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 14{{$}}
+define void @s_sext_in_reg_i2_i16_arg(i16 addrspace(1)* %out, i16 %in) #0 {
+  %shl = shl i16 %in, 14
+  %sext = ashr i16 %shl, 14
+  store i16 %sext, i16 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}s_sext_in_reg_i8_i16_arg:
+; GCN: s_load_dword [[VAL:s[0-9]+]]
+
+; SI: s_sext_i32_i8 [[SSEXT:s[0-9]+]], [[VAL]]
+; SI: v_mov_b32_e32 [[VSEXT:v[0-9]+]], [[SSEXT]]
+; SI: buffer_store_short [[VBFE]]
+
+; VI: s_lshl_b32 s{{[0-9]+}}, s{{[0-9]+}}, 8{{$}}
+; VI: s_sext_i32_i16 s{{[0-9]+}}, s{{[0-9]+}}
+; VI: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 8{{$}}
+define void @s_sext_in_reg_i8_i16_arg(i16 addrspace(1)* %out, i16 %in) #0 {
+  %shl = shl i16 %in, 8
+  %sext = ashr i16 %shl, 8
+  store i16 %sext, i16 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}s_sext_in_reg_i15_i16_arg:
+; GCN: s_load_dword [[VAL:s[0-9]+]]
+
+; SI: s_bfe_i32 [[BFE:s[0-9]+]], [[VAL]], 0xf0000
+; SI: v_mov_b32_e32 [[VBFE:v[0-9]+]], [[BFE]]
+; SI: buffer_store_short [[VBFE]]
+
+; VI: s_lshl_b32 s{{[0-9]+}}, s{{[0-9]+}}, 1{{$}}
+; VI: s_sext_i32_i16 s{{[0-9]+}}, s{{[0-9]+}}
+; VI: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 1{{$}}
+define void @s_sext_in_reg_i15_i16_arg(i16 addrspace(1)* %out, i16 %in) #0 {
+  %shl = shl i16 %in, 1
+  %sext = ashr i16 %shl, 1
+  store i16 %sext, i16 addrspace(1)* %out
+  ret void
+}
+
 declare i32 @llvm.r600.read.tidig.x() #1
 
 attributes #0 = { nounwind }




More information about the llvm-commits mailing list