[llvm] r219953 - R600: Fix nonsensical implementation of computeKnownBits for BFE
Matt Arsenault
Matthew.Arsenault at amd.com
Thu Oct 16 13:07:40 PDT 2014
Author: arsenm
Date: Thu Oct 16 15:07:40 2014
New Revision: 219953
URL: http://llvm.org/viewvc/llvm-project?rev=219953&view=rev
Log:
R600: Fix nonsensical implementation of computeKnownBits for BFE
This was resulting in invalid simplifications of sdiv
Modified:
llvm/trunk/lib/Target/R600/AMDGPUISelLowering.cpp
llvm/trunk/test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll
Modified: llvm/trunk/lib/Target/R600/AMDGPUISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/AMDGPUISelLowering.cpp?rev=219953&r1=219952&r2=219953&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/AMDGPUISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/R600/AMDGPUISelLowering.cpp Thu Oct 16 15:07:40 2014
@@ -2377,11 +2377,7 @@ void AMDGPUTargetLowering::computeKnownB
unsigned BitWidth = 32;
uint32_t Width = CWidth->getZExtValue() & 0x1f;
- // FIXME: This could do a lot more. If offset is 0, should be the same as
- // sign_extend_inreg implementation, but that involves duplicating it.
- if (Opc == AMDGPUISD::BFE_I32)
- KnownOne = APInt::getHighBitsSet(BitWidth, BitWidth - Width);
- else
+ if (Opc == AMDGPUISD::BFE_U32)
KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - Width);
break;
Modified: llvm/trunk/test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll?rev=219953&r1=219952&r2=219953&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll (original)
+++ llvm/trunk/test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll Thu Oct 16 15:07:40 2014
@@ -424,3 +424,18 @@ define void @bfe_sext_in_reg_i24(i32 add
store i32 %ashr, i32 addrspace(1)* %out, align 4
ret void
}
+
+; FUNC-LABEL: @simplify_demanded_bfe_sdiv
+; SI: BUFFER_LOAD_DWORD [[LOAD:v[0-9]+]]
+; SI: V_BFE_I32 [[BFE:v[0-9]+]], [[LOAD]], 1, 16
+; SI: V_LSHRREV_B32_e32 [[TMP0:v[0-9]+]], 31, [[BFE]]
+; SI: V_ADD_I32_e32 [[TMP1:v[0-9]+]], [[TMP0]], [[BFE]]
+; SI: V_ASHRREV_I32_e32 [[TMP2:v[0-9]+]], 1, [[TMP1]]
+; SI: BUFFER_STORE_DWORD [[TMP2]]
+define void @simplify_demanded_bfe_sdiv(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+ %src = load i32 addrspace(1)* %in, align 4
+ %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %src, i32 1, i32 16) nounwind readnone
+ %div = sdiv i32 %bfe, 2
+ store i32 %div, i32 addrspace(1)* %out, align 4
+ ret void
+}
More information about the llvm-commits
mailing list