[llvm] r334045 - AMDGPU: Preserve metadata when widening loads
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 5 12:52:56 PDT 2018
Author: arsenm
Date: Tue Jun 5 12:52:56 2018
New Revision: 334045
URL: http://llvm.org/viewvc/llvm-project?rev=334045&view=rev
Log:
AMDGPU: Preserve metadata when widening loads
Preserves the low bound of the !range. I don't think
it's legal to do anything with the top half since it's
theoretically reading garbage.
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
llvm/trunk/test/CodeGen/AMDGPU/widen_extending_scalar_loads.ll
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp?rev=334045&r1=334044&r2=334045&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp Tue Jun 5 12:52:56 2018
@@ -465,7 +465,7 @@ bool AMDGPUCodeGenPrepare::visitBinaryOp
return Changed;
}
-bool AMDGPUCodeGenPrepare::visitLoadInst(LoadInst &I) {
+bool AMDGPUCodeGenPrepare::visitLoadInst(LoadInst &I) {
if ((I.getPointerAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS ||
I.getPointerAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT) &&
canWidenScalarExtLoad(I)) {
@@ -475,7 +475,28 @@ bool AMDGPUCodeGenPrepare::visitLoadInst
Type *I32Ty = Builder.getInt32Ty();
Type *PT = PointerType::get(I32Ty, I.getPointerAddressSpace());
Value *BitCast= Builder.CreateBitCast(I.getPointerOperand(), PT);
- Value *WidenLoad = Builder.CreateLoad(BitCast);
+ LoadInst *WidenLoad = Builder.CreateLoad(BitCast);
+ WidenLoad->copyMetadata(I);
+
+ // If we have range metadata, we need to convert the type, and not make
+ // assumptions about the high bits.
+ if (auto *Range = WidenLoad->getMetadata(LLVMContext::MD_range)) {
+ ConstantInt *Lower =
+ mdconst::extract<ConstantInt>(Range->getOperand(0));
+
+ if (Lower->getValue().isNullValue()) {
+ WidenLoad->setMetadata(LLVMContext::MD_range, nullptr);
+ } else {
+ Metadata *LowAndHigh[] = {
+ ConstantAsMetadata::get(ConstantInt::get(I32Ty, Lower->getValue().zext(32))),
+ // Don't make assumptions about the high bits.
+ ConstantAsMetadata::get(ConstantInt::get(I32Ty, 0))
+ };
+
+ WidenLoad->setMetadata(LLVMContext::MD_range,
+ MDNode::get(Mod->getContext(), LowAndHigh));
+ }
+ }
int TySize = Mod->getDataLayout().getTypeSizeInBits(I.getType());
Type *IntNTy = Builder.getIntNTy(TySize);
Modified: llvm/trunk/test/CodeGen/AMDGPU/widen_extending_scalar_loads.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/widen_extending_scalar_loads.ll?rev=334045&r1=334044&r2=334045&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/widen_extending_scalar_loads.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/widen_extending_scalar_loads.ll Tue Jun 5 12:52:56 2018
@@ -189,4 +189,80 @@ define amdgpu_kernel void @use_dispatch_
ret void
}
+; OPT-LABEL: @constant_load_i16_align4_range(
+; OPT: load i32, i32 addrspace(4)* %1, !range !0
+define amdgpu_kernel void @constant_load_i16_align4_range(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 {
+ %ld = load i16, i16 addrspace(4)* %in, align 4, !range !0
+ %ext = sext i16 %ld to i32
+ store i32 %ext, i32 addrspace(1)* %out
+ ret void
+}
+
+; OPT-LABEL: @constant_load_i16_align4_range_max(
+; OPT: load i32, i32 addrspace(4)* %1, !range !0
+define amdgpu_kernel void @constant_load_i16_align4_range_max(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 {
+ %ld = load i16, i16 addrspace(4)* %in, align 4, !range !1
+ %ext = sext i16 %ld to i32
+ store i32 %ext, i32 addrspace(1)* %out
+ ret void
+}
+
+; OPT-LABEL: @constant_load_i16_align4_complex_range(
+; OPT: load i32, i32 addrspace(4)* %1, !range !1
+define amdgpu_kernel void @constant_load_i16_align4_complex_range(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 {
+ %ld = load i16, i16 addrspace(4)* %in, align 4, !range !2
+ %ext = sext i16 %ld to i32
+ store i32 %ext, i32 addrspace(1)* %out
+ ret void
+}
+
+; OPT-LABEL: @constant_load_i16_align4_range_from_0(
+; OPT: load i32, i32 addrspace(4)* %1{{$}}
+define amdgpu_kernel void @constant_load_i16_align4_range_from_0(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 {
+ %ld = load i16, i16 addrspace(4)* %in, align 4, !range !3
+ %ext = sext i16 %ld to i32
+ store i32 %ext, i32 addrspace(1)* %out
+ ret void
+}
+
+; OPT-LABEL: @constant_load_i16_align4_range_from_neg(
+; OPT: load i32, i32 addrspace(4)* %1, !range !2
+define amdgpu_kernel void @constant_load_i16_align4_range_from_neg(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 {
+ %ld = load i16, i16 addrspace(4)* %in, align 4, !range !4
+ %ext = sext i16 %ld to i32
+ store i32 %ext, i32 addrspace(1)* %out
+ ret void
+}
+
+; OPT-LABEL: @constant_load_i16_align4_range_from_neg_to_0(
+; OPT: load i32, i32 addrspace(4)* %1, !range !2
+define amdgpu_kernel void @constant_load_i16_align4_range_from_neg_to_0(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 {
+ %ld = load i16, i16 addrspace(4)* %in, align 4, !range !5
+ %ext = sext i16 %ld to i32
+ store i32 %ext, i32 addrspace(1)* %out
+ ret void
+}
+
+; OPT-LABEL: @constant_load_i16_align4_invariant
+; OPT: load i32, i32 addrspace(4)* %1, !invariant.load !3
+define amdgpu_kernel void @constant_load_i16_align4_invariant(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 {
+ %ld = load i16, i16 addrspace(4)* %in, align 4, !invariant.load !6
+ %ext = sext i16 %ld to i32
+ store i32 %ext, i32 addrspace(1)* %out
+ ret void
+}
+
attributes #0 = { nounwind }
+
+; OPT: !0 = !{i32 5, i32 0}
+; OPT: !1 = !{i32 8, i32 0}
+; OPT: !2 = !{i32 65520, i32 0}
+; OPT: !3 = !{}
+
+!0 = !{i16 5, i16 500}
+!1 = !{i16 5, i16 -1}
+!2 = !{i16 8, i16 12, i16 42, i16 99}
+!3 = !{i16 0, i16 255}
+!4 = !{i16 -16, i16 16}
+!5 = !{i16 -16, i16 0}
+!6 = !{}
More information about the llvm-commits
mailing list