[llvm] r303990 - [ARM] Fix lowering of misaligned memcpy/memset
John Brawn via llvm-commits
llvm-commits at lists.llvm.org
Fri May 26 06:59:13 PDT 2017
Author: john.brawn
Date: Fri May 26 08:59:12 2017
New Revision: 303990
URL: http://llvm.org/viewvc/llvm-project?rev=303990&view=rev
Log:
[ARM] Fix lowering of misaligned memcpy/memset
Currently getOptimalMemOpType returns i32 for large enough sizes without
checking for alignment, leading to poor code generation when misaligned accesses
aren't permitted as we generate a word store then later split it up into byte
stores. This means we inadvertantly go over the MaxStoresPerMemcpy limit and for
memset we splat the memset value into a word then immediately split it up
again.
Fix this by leaving it up to FindOptimalMemOpLowering to figure out which type
to use, but also fix a bug there where it wasn't correctly checking if
misaligned memory accesses are allowed.
Differential Revision: https://reviews.llvm.org/D33442
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
llvm/trunk/test/CodeGen/ARM/memcpy-inline.ll
llvm/trunk/test/CodeGen/ARM/memset-inline.ll
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp?rev=303990&r1=303989&r2=303990&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Fri May 26 08:59:12 2017
@@ -4779,23 +4779,23 @@ static bool FindOptimalMemOpLowering(std
DAG.getMachineFunction());
if (VT == MVT::Other) {
- if (DstAlign >= DAG.getDataLayout().getPointerPrefAlignment(DstAS) ||
- TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign)) {
- VT = TLI.getPointerTy(DAG.getDataLayout(), DstAS);
- } else {
- switch (DstAlign & 7) {
- case 0: VT = MVT::i64; break;
- case 4: VT = MVT::i32; break;
- case 2: VT = MVT::i16; break;
- default: VT = MVT::i8; break;
- }
- }
+ // Use the largest integer type whose alignment constraints are satisfied.
+ // We only need to check DstAlign here as SrcAlign is always greater or
+ // equal to DstAlign (or zero).
+ VT = MVT::i64;
+ while (DstAlign && DstAlign < VT.getSizeInBits() / 8 &&
+ !TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign))
+ VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
+ assert(VT.isInteger());
+ // Find the largest legal integer type.
MVT LVT = MVT::i64;
while (!TLI.isTypeLegal(LVT))
LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
assert(LVT.isInteger());
+ // If the type we've chosen is larger than the largest legal integer type
+ // then use that instead.
if (VT.bitsGT(LVT))
VT = LVT;
}
Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=303990&r1=303989&r2=303990&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Fri May 26 08:59:12 2017
@@ -12147,12 +12147,6 @@ EVT ARMTargetLowering::getOptimalMemOpTy
}
}
- // Lowering to i32/i16 if the size permits.
- if (Size >= 4)
- return MVT::i32;
- else if (Size >= 2)
- return MVT::i16;
-
// Let the target-independent logic figure it out.
return MVT::Other;
}
Modified: llvm/trunk/test/CodeGen/ARM/memcpy-inline.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/memcpy-inline.ll?rev=303990&r1=303989&r2=303990&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/memcpy-inline.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/memcpy-inline.ll Fri May 26 08:59:12 2017
@@ -95,10 +95,7 @@ entry:
; CHECK: movt [[REG7:r[0-9]+]], #22866
; CHECK: str [[REG7]]
; CHECK-T1-LABEL: t5:
-; CHECK-T1: movs [[TREG3:r[0-9]]],
-; CHECK-T1: strb [[TREG3]],
-; CHECK-T1: movs [[TREG4:r[0-9]]],
-; CHECK-T1: strb [[TREG4]],
+; CHECK-T1: bl _memcpy
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str5, i64 0, i64 0), i64 7, i32 1, i1 false)
ret void
}
Modified: llvm/trunk/test/CodeGen/ARM/memset-inline.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/memset-inline.ll?rev=303990&r1=303989&r2=303990&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/memset-inline.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/memset-inline.ll Fri May 26 08:59:12 2017
@@ -38,6 +38,56 @@ entry:
ret void
}
+define void @t3(i8* %p) {
+entry:
+; CHECK-7A-LABEL: t3:
+; CHECK-7A: muls [[REG:r[0-9]+]],
+; CHECK-7A: str [[REG]],
+; CHECK-6M-LABEL: t3:
+; CHECK-6M-NOT: muls
+; CHECK-6M: strb [[REG:r[0-9]+]],
+; CHECK-6M: strb [[REG]],
+; CHECK-6M: strb [[REG]],
+; CHECK-6M: strb [[REG]],
+ br label %for.body
+
+for.body:
+ %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %0 = trunc i32 %i to i8
+ call void @llvm.memset.p0i8.i32(i8* %p, i8 %0, i32 4, i32 1, i1 false)
+ call void @something(i8* %p)
+ %inc = add nuw nsw i32 %i, 1
+ %exitcond = icmp eq i32 %inc, 255
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+ ret void
+}
+
+define void @t4(i8* %p) {
+entry:
+; CHECK-7A-LABEL: t4:
+; CHECK-7A: muls [[REG:r[0-9]+]],
+; CHECK-7A: str [[REG]],
+; CHECK-6M-LABEL: t4:
+; CHECK-6M: muls [[REG:r[0-9]+]],
+; CHECK-6M: strh [[REG]],
+; CHECK-6M: strh [[REG]],
+ br label %for.body
+
+for.body:
+ %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %0 = trunc i32 %i to i8
+ call void @llvm.memset.p0i8.i32(i8* %p, i8 %0, i32 4, i32 2, i1 false)
+ call void @something(i8* %p)
+ %inc = add nuw nsw i32 %i, 1
+ %exitcond = icmp eq i32 %inc, 255
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+ ret void
+}
+
declare void @something(i8*) nounwind
declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
More information about the llvm-commits
mailing list