[llvm] r179186 - R600/SI: Add pattern for AMDGPUurecip
Michel Danzer
michel.daenzer at amd.com
Wed Apr 10 10:17:56 PDT 2013
Author: daenzer
Date: Wed Apr 10 12:17:56 2013
New Revision: 179186
URL: http://llvm.org/viewvc/llvm-project?rev=179186&view=rev
Log:
R600/SI: Add pattern for AMDGPUurecip
21 more little piglits with radeonsi.
Reviewed-by: Tom Stellard <thomas.stellard at amd.com>
Added:
llvm/trunk/test/CodeGen/R600/urecip.ll
Modified:
llvm/trunk/lib/Target/R600/AMDGPUInstructions.td
llvm/trunk/lib/Target/R600/R600Instructions.td
llvm/trunk/lib/Target/R600/SIInstructions.td
Modified: llvm/trunk/lib/Target/R600/AMDGPUInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/AMDGPUInstructions.td?rev=179186&r1=179185&r2=179186&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/AMDGPUInstructions.td (original)
+++ llvm/trunk/lib/Target/R600/AMDGPUInstructions.td Wed Apr 10 12:17:56 2013
@@ -94,6 +94,7 @@ class Constants {
int TWO_PI = 0x40c90fdb;
int PI = 0x40490fdb;
int TWO_PI_INV = 0x3e22f983;
+int FP_UINT_MAX_PLUS_1 = 0x4f800000; // 1 << 32 in floating point encoding
}
def CONST : Constants;
Modified: llvm/trunk/lib/Target/R600/R600Instructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/R600Instructions.td?rev=179186&r1=179185&r2=179186&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/R600Instructions.td (original)
+++ llvm/trunk/lib/Target/R600/R600Instructions.td Wed Apr 10 12:17:56 2013
@@ -1923,10 +1923,11 @@ def : COS_PAT <COS_cm>;
defm DIV_cm : DIV_Common<RECIP_IEEE_cm>;
// RECIP_UINT emulation for Cayman
+// The multiplication scales from [0,1] to the unsigned integer range
def : Pat <
(AMDGPUurecip R600_Reg32:$src0),
(FLT_TO_UINT_eg (MUL_IEEE (RECIP_IEEE_cm (UINT_TO_FLT_eg R600_Reg32:$src0)),
- (MOV_IMM_I32 0x4f800000)))
+ (MOV_IMM_I32 CONST.FP_UINT_MAX_PLUS_1)))
>;
Modified: llvm/trunk/lib/Target/R600/SIInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/SIInstructions.td?rev=179186&r1=179185&r2=179186&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/SIInstructions.td (original)
+++ llvm/trunk/lib/Target/R600/SIInstructions.td Wed Apr 10 12:17:56 2013
@@ -602,8 +602,8 @@ defm V_READFIRSTLANE_B32 : VOP1_32 <0x00
defm V_CVT_F32_I32 : VOP1_32 <0x00000005, "V_CVT_F32_I32",
[(set VReg_32:$dst, (sint_to_fp VSrc_32:$src0))]
>;
-//defm V_CVT_F32_U32 : VOP1_32 <0x00000006, "V_CVT_F32_U32", []>;
-//defm V_CVT_U32_F32 : VOP1_32 <0x00000007, "V_CVT_U32_F32", []>;
+defm V_CVT_F32_U32 : VOP1_32 <0x00000006, "V_CVT_F32_U32", []>;
+defm V_CVT_U32_F32 : VOP1_32 <0x00000007, "V_CVT_U32_F32", []>;
defm V_CVT_I32_F32 : VOP1_32 <0x00000008, "V_CVT_I32_F32",
[(set (i32 VReg_32:$dst), (fp_to_sint VSrc_32:$src0))]
>;
@@ -1514,6 +1514,14 @@ def : Pat <
(BUFFER_LOAD_DWORD 0, 1, 0, 0, 0, 0, VReg_32:$voff, SReg_128:$sbase, 0, 0, 0)
>;
+// The multiplication scales from [0,1] to the unsigned integer range
+def : Pat <
+ (AMDGPUurecip i32:$src0),
+ (V_CVT_U32_F32_e32
+ (V_MUL_F32_e32 CONST.FP_UINT_MAX_PLUS_1,
+ (V_RCP_IFLAG_F32_e32 (V_CVT_F32_U32_e32 $src0))))
+>;
+
/********** ================== **********/
/********** VOP3 Patterns **********/
/********** ================== **********/
Added: llvm/trunk/test/CodeGen/R600/urecip.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/urecip.ll?rev=179186&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/urecip.ll (added)
+++ llvm/trunk/test/CodeGen/R600/urecip.ll Wed Apr 10 12:17:56 2013
@@ -0,0 +1,12 @@
+;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
+
+;CHECK: V_RCP_IFLAG_F32_e32
+
+define void @test(i32 %p, i32 %q) {
+ %i = udiv i32 %p, %q
+ %r = bitcast i32 %i to float
+ call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %r, float %r, float %r, float %r)
+ ret void
+}
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
More information about the llvm-commits
mailing list