[PATCH] D88857: InstCombine: Fix losing load properties in copy-constant-to-alloca
Matt Arsenault via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 5 15:49:31 PDT 2020
arsenm created this revision.
arsenm added reviewers: yaxunl, asbirlea, jdoerfert, nikic, rnk, majnemer.
Herald added subscribers: kerbowa, jfb, hiraditya, nhaehnle, jvesely.
Herald added a project: LLVM.
arsenm requested review of this revision.
Herald added a subscriber: wdng.
Preserve the alignment and metadata. Atomic loads are skipped for
this, but pass along the properties for consistency.
https://reviews.llvm.org/D88857
Files:
llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
llvm/test/Transforms/InstCombine/AMDGPU/memcpy-from-constant.ll
Index: llvm/test/Transforms/InstCombine/AMDGPU/memcpy-from-constant.ll
===================================================================
--- llvm/test/Transforms/InstCombine/AMDGPU/memcpy-from-constant.ll
+++ llvm/test/Transforms/InstCombine/AMDGPU/memcpy-from-constant.ll
@@ -23,6 +23,57 @@
ret i8 %load
}
+define i8 @memcpy_constant_arg_ptr_to_alloca_load_metadata([32 x i8] addrspace(4)* noalias readonly align 4 dereferenceable(32) %arg, i32 %idx) {
+; CHECK-LABEL: @memcpy_constant_arg_ptr_to_alloca_load_metadata(
+; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[IDX:%.*]] to i64
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr [32 x i8], [32 x i8] addrspace(4)* [[ARG:%.*]], i64 0, i64 [[TMP1]]
+; CHECK-NEXT: [[LOAD:%.*]] = load i8, i8 addrspace(4)* [[GEP]], align 1, !noalias !0
+; CHECK-NEXT: ret i8 [[LOAD]]
+;
+ %alloca = alloca [32 x i8], align 4, addrspace(5)
+ %alloca.cast = bitcast [32 x i8] addrspace(5)* %alloca to i8 addrspace(5)*
+ %arg.cast = bitcast [32 x i8] addrspace(4)* %arg to i8 addrspace(4)*
+ call void @llvm.memcpy.p5i8.p4i8.i64(i8 addrspace(5)* %alloca.cast, i8 addrspace(4)* %arg.cast, i64 32, i1 false)
+ %gep = getelementptr inbounds [32 x i8], [32 x i8] addrspace(5)* %alloca, i32 0, i32 %idx
+ %load = load i8, i8 addrspace(5)* %gep, !noalias !0
+ ret i8 %load
+}
+
+define i64 @memcpy_constant_arg_ptr_to_alloca_load_alignment([32 x i64] addrspace(4)* noalias readonly align 4 dereferenceable(256) %arg, i32 %idx) {
+; CHECK-LABEL: @memcpy_constant_arg_ptr_to_alloca_load_alignment(
+; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[IDX:%.*]] to i64
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr [32 x i64], [32 x i64] addrspace(4)* [[ARG:%.*]], i64 0, i64 [[TMP1]]
+; CHECK-NEXT: [[LOAD:%.*]] = load i64, i64 addrspace(4)* [[GEP]], align 16
+; CHECK-NEXT: ret i64 [[LOAD]]
+;
+ %alloca = alloca [32 x i64], align 4, addrspace(5)
+ %alloca.cast = bitcast [32 x i64] addrspace(5)* %alloca to i8 addrspace(5)*
+ %arg.cast = bitcast [32 x i64] addrspace(4)* %arg to i8 addrspace(4)*
+ call void @llvm.memcpy.p5i8.p4i8.i64(i8 addrspace(5)* %alloca.cast, i8 addrspace(4)* %arg.cast, i64 256, i1 false)
+ %gep = getelementptr inbounds [32 x i64], [32 x i64] addrspace(5)* %alloca, i32 0, i32 %idx
+ %load = load i64, i64 addrspace(5)* %gep, align 16
+ ret i64 %load
+}
+
+define i64 @memcpy_constant_arg_ptr_to_alloca_load_atomic([32 x i64] addrspace(4)* noalias readonly align 8 dereferenceable(256) %arg, i32 %idx) {
+; CHECK-LABEL: @memcpy_constant_arg_ptr_to_alloca_load_atomic(
+; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [32 x i64], align 8, addrspace(5)
+; CHECK-NEXT: [[ALLOCA_CAST:%.*]] = bitcast [32 x i64] addrspace(5)* [[ALLOCA]] to i8 addrspace(5)*
+; CHECK-NEXT: [[ARG_CAST:%.*]] = bitcast [32 x i64] addrspace(4)* [[ARG:%.*]] to i8 addrspace(4)*
+; CHECK-NEXT: call void @llvm.memcpy.p5i8.p4i8.i64(i8 addrspace(5)* align 8 dereferenceable(256) [[ALLOCA_CAST]], i8 addrspace(4)* align 8 dereferenceable(256) [[ARG_CAST]], i64 256, i1 false)
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [32 x i64], [32 x i64] addrspace(5)* [[ALLOCA]], i32 0, i32 [[IDX:%.*]]
+; CHECK-NEXT: [[LOAD:%.*]] = load atomic i64, i64 addrspace(5)* [[GEP]] syncscope("somescope") acquire, align 8
+; CHECK-NEXT: ret i64 [[LOAD]]
+;
+ %alloca = alloca [32 x i64], align 8, addrspace(5)
+ %alloca.cast = bitcast [32 x i64] addrspace(5)* %alloca to i8 addrspace(5)*
+ %arg.cast = bitcast [32 x i64] addrspace(4)* %arg to i8 addrspace(4)*
+ call void @llvm.memcpy.p5i8.p4i8.i64(i8 addrspace(5)* %alloca.cast, i8 addrspace(4)* %arg.cast, i64 256, i1 false)
+ %gep = getelementptr inbounds [32 x i64], [32 x i64] addrspace(5)* %alloca, i32 0, i32 %idx
+ %load = load atomic i64, i64 addrspace(5)* %gep syncscope("somescope") acquire, align 8
+ ret i64 %load
+}
+
; Simple memmove to alloca from constant address space argument.
define i8 @memmove_constant_arg_ptr_to_alloca([32 x i8] addrspace(4)* noalias readonly align 4 dereferenceable(32) %arg, i32 %idx) {
; CHECK-LABEL: @memmove_constant_arg_ptr_to_alloca(
@@ -168,7 +219,7 @@
; CHECK-LABEL: @byref_infloop_metadata(
; CHECK-NEXT: bb:
; CHECK-NEXT: [[CAST_ALLOCA:%.*]] = bitcast [[STRUCT_TY:%.*]] addrspace(4)* [[ARG:%.*]] to i8 addrspace(4)*
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p4i8.i32(i8* nonnull align 4 dereferenceable(16) [[SCRATCH:%.*]], i8 addrspace(4)* align 4 dereferenceable(16) [[CAST_ALLOCA]], i32 16, i1 false), !noalias !0
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p4i8.i32(i8* nonnull align 4 dereferenceable(16) [[SCRATCH:%.*]], i8 addrspace(4)* align 4 dereferenceable(16) [[CAST_ALLOCA]], i32 16, i1 false), !noalias !1
; CHECK-NEXT: ret void
;
bb:
Index: llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
===================================================================
--- llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -287,9 +287,12 @@
if (auto *LT = dyn_cast<LoadInst>(I)) {
auto *V = getReplacement(LT->getPointerOperand());
assert(V && "Operand not replaced");
- auto *NewI = new LoadInst(I->getType(), V, "", false,
- IC.getDataLayout().getABITypeAlign(I->getType()));
+ auto *NewI = new LoadInst(LT->getType(), V, "", LT->isVolatile(),
+ LT->getAlign(), LT->getOrdering(),
+ LT->getSyncScopeID());
NewI->takeName(LT);
+ copyMetadataForLoad(*NewI, *LT);
+
IC.InsertNewInstWith(NewI, *LT);
IC.replaceInstUsesWith(*LT, NewI);
WorkMap[LT] = NewI;
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D88857.296320.patch
Type: text/x-patch
Size: 5654 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20201005/c51d9dee/attachment.bin>
More information about the llvm-commits
mailing list