[llvm] 13eb890 - [Target][ARM] Fix VPT Block Pass miscompilation
via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 14 07:16:54 PDT 2020
Author: Pierre-vh
Date: 2020-04-14T15:16:27+01:00
New Revision: 13eb890139382e370be14938f4fffcfc20ff9613
URL: https://github.com/llvm/llvm-project/commit/13eb890139382e370be14938f4fffcfc20ff9613
DIFF: https://github.com/llvm/llvm-project/commit/13eb890139382e370be14938f4fffcfc20ff9613.diff
LOG: [Target][ARM] Fix VPT Block Pass miscompilation
The pass was incorrectly reverting back to a "T" when something wrote
to VPR inside a "E" block. This is not the correct behaviour, the
predicate should stay the same.
Differential Revision: https://reviews.llvm.org/D77798
Added:
Modified:
llvm/lib/Target/ARM/MVEVPTBlockPass.cpp
llvm/test/CodeGen/Thumb2/mve-pred-not.ll
llvm/test/CodeGen/Thumb2/mve-vpt-block-elses.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp b/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp
index c0214cf2a910..5d98307e9af1 100644
--- a/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp
+++ b/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp
@@ -194,7 +194,7 @@ CreateVPTBlock(MachineBasicBlock::instr_iterator &Iter,
// Remove VPNOTs while there's still room in the block, so we can make the
// largest block possible.
- ARMVCC::VPTCodes CurrentPredicate = ARMVCC::Then;
+ ARMVCC::VPTCodes CurrentPredicate = ARMVCC::Else;
while (BlockSize < 4 && Iter != EndIter &&
Iter->getOpcode() == ARM::MVE_VPNOT) {
@@ -222,28 +222,19 @@ CreateVPTBlock(MachineBasicBlock::instr_iterator &Iter,
DeadInstructions.push_back(&*Iter);
++Iter;
- // Replace "then" by "elses" in the block until we find an instruction that
- // defines VPR, then after that leave everything to "t".
+ // Replace the predicates of the instructions we're adding.
// Note that we are using "Iter" to iterate over the block so we can update
// it at the same time.
- bool ChangeToElse = (CurrentPredicate == ARMVCC::Then);
for (; Iter != VPNOTBlockEndIter; ++Iter) {
// Find the register in which the predicate is
int OpIdx = findFirstVPTPredOperandIdx(*Iter);
assert(OpIdx != -1);
- // Update the mask + change the predicate to an else if needed.
- if (ChangeToElse) {
- // Change the predicate and update the mask
- Iter->getOperand(OpIdx).setImm(ARMVCC::Else);
- BlockMask = ExpandBlockMask(BlockMask, ARMVCC::Else);
- // Reset back to a "then" predicate if this instruction defines VPR.
- if (Iter->definesRegister(ARM::VPR))
- ChangeToElse = false;
- } else
- BlockMask = ExpandBlockMask(BlockMask, ARMVCC::Then);
-
- LLVM_DEBUG(dbgs() << " adding: "; Iter->dump());
+ // Change the predicate and update the mask
+ Iter->getOperand(OpIdx).setImm(CurrentPredicate);
+ BlockMask = ExpandBlockMask(BlockMask, CurrentPredicate);
+
+ LLVM_DEBUG(dbgs() << " adding : "; Iter->dump());
}
CurrentPredicate =
diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-not.ll b/llvm/test/CodeGen/Thumb2/mve-pred-not.ll
index 70fc0e4ab1cb..a16fce870cb9 100644
--- a/llvm/test/CodeGen/Thumb2/mve-pred-not.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-pred-not.ll
@@ -405,12 +405,42 @@ declare <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>,
define arm_aapcs_vfpcc <4 x i32> @vpttet_v4i1(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: vpttet_v4i1:
; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .pad #4
+; CHECK-NEXT: sub sp, #4
+; CHECK-NEXT: vcmp.s32 ge, q0, q2
+; CHECK-NEXT: vstr p0, [sp] @ 4-byte Spill
+; CHECK-NEXT: vpstt
+; CHECK-NEXT: vmovt q0, q2
+; CHECK-NEXT: vmovt q0, q2
+; CHECK-NEXT: vldr p0, [sp] @ 4-byte Reload
+; CHECK-NEXT: vpnot
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmovt q0, q2
+; CHECK-NEXT: vldr p0, [sp] @ 4-byte Reload
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmovt q0, q2
+; CHECK-NEXT: add sp, #4
+; CHECK-NEXT: bx lr
+entry:
+ %0 = icmp sge <4 x i32> %x, %z
+ %1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %z, <4 x i32> %z, <4 x i1> %0, <4 x i32> %x)
+ %2 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %z, <4 x i32> %z, <4 x i1> %0, <4 x i32> %1)
+ %3 = xor <4 x i1> %0, <i1 true, i1 true, i1 true, i1 true>
+ %4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %z, <4 x i32> %z, <4 x i1> %3, <4 x i32> %2)
+ %5 = xor <4 x i1> %3, <i1 true, i1 true, i1 true, i1 true>
+ %6 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %z, <4 x i32> %z, <4 x i1> %5, <4 x i32> %4)
+ ret <4 x i32> %6
+}
+
+define arm_aapcs_vfpcc <4 x i32> @vpttee_v4i1(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
+; CHECK-LABEL: vpttee_v4i1:
+; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov q3, q2
-; CHECK-NEXT: vpttet.s32 ge, q0, q2
+; CHECK-NEXT: vpttee.s32 ge, q0, q2
; CHECK-NEXT: vmaxt.s32 q3, q0, q1
; CHECK-NEXT: vcmpt.s32 gt, q0, zr
-; CHECK-NEXT: vcmpe.s32 gt, q1, zr
-; CHECK-NEXT: vmovt q3, q2
+; CHECK-NEXT: vmove q3, q2
+; CHECK-NEXT: vmove q3, q2
; CHECK-NEXT: vmov q0, q3
; CHECK-NEXT: bx lr
entry:
@@ -419,20 +449,19 @@ entry:
%2 = icmp sgt <4 x i32> %x, zeroinitializer
%3 = and <4 x i1> %0, %2
%4 = xor <4 x i1> %3, <i1 true, i1 true, i1 true, i1 true>
- %5 = icmp sgt <4 x i32> %y, zeroinitializer
- %6 = and <4 x i1> %5, %4
- %7 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %z, <4 x i32> %z, <4 x i1> %6, <4 x i32> %1)
- ret <4 x i32> %7
+ %5 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %z, <4 x i32> %z, <4 x i1> %4, <4 x i32> %1)
+ %6 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %z, <4 x i32> %z, <4 x i1> %4, <4 x i32> %5)
+ ret <4 x i32> %6
}
-define arm_aapcs_vfpcc <4 x i32> @vpttee_v4i1(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
-; CHECK-LABEL: vpttee_v4i1:
+define arm_aapcs_vfpcc <4 x i32> @vpttee2_v4i1(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
+; CHECK-LABEL: vpttee2_v4i1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov q3, q2
; CHECK-NEXT: vpttee.s32 ge, q0, q2
; CHECK-NEXT: vmaxt.s32 q3, q0, q1
; CHECK-NEXT: vcmpt.s32 gt, q0, zr
-; CHECK-NEXT: vmove q3, q2
+; CHECK-NEXT: vcmpe.s32 gt, q1, zr
; CHECK-NEXT: vmove q3, q2
; CHECK-NEXT: vmov q0, q3
; CHECK-NEXT: bx lr
@@ -442,9 +471,10 @@ entry:
%2 = icmp sgt <4 x i32> %x, zeroinitializer
%3 = and <4 x i1> %0, %2
%4 = xor <4 x i1> %3, <i1 true, i1 true, i1 true, i1 true>
- %5 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %z, <4 x i32> %z, <4 x i1> %4, <4 x i32> %1)
- %6 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %z, <4 x i32> %z, <4 x i1> %4, <4 x i32> %5)
- ret <4 x i32> %6
+ %5 = icmp sgt <4 x i32> %y, zeroinitializer
+ %6 = and <4 x i1> %5, %4
+ %7 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %z, <4 x i32> %z, <4 x i1> %6, <4 x i32> %1)
+ ret <4 x i32> %7
}
define arm_aapcs_vfpcc <4 x i32> @vpttte_v4i1(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
diff --git a/llvm/test/CodeGen/Thumb2/mve-vpt-block-elses.mir b/llvm/test/CodeGen/Thumb2/mve-vpt-block-elses.mir
index 765d3a4de831..93bf2e6f4510 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vpt-block-elses.mir
+++ b/llvm/test/CodeGen/Thumb2/mve-vpt-block-elses.mir
@@ -69,11 +69,11 @@ body: |
; CHECK: liveins: $q0, $q1, $q2
; CHECK: $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3
; CHECK: BUNDLE implicit-def dead $vpr, implicit-def $q3, implicit-def $d6, implicit-def $s12, implicit-def $s13, implicit-def $d7, implicit-def $s14, implicit-def $s15, implicit killed $q0, implicit $q2, implicit $q1, implicit killed $q3, implicit $zr {
- ; CHECK: MVE_VPTv4s32 5, renamable $q0, renamable $q2, 10, implicit-def $vpr
+ ; CHECK: MVE_VPTv4s32 7, renamable $q0, renamable $q2, 10, implicit-def $vpr
; CHECK: renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, internal renamable $vpr, killed renamable $q3
; CHECK: renamable $vpr = MVE_VCMPs32r killed renamable $q0, $zr, 12, 1, internal killed renamable $vpr
; CHECK: renamable $vpr = MVE_VCMPs32r renamable $q1, $zr, 12, 2, internal killed renamable $vpr
- ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, internal killed renamable $vpr, internal renamable $q3
+ ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal killed renamable $vpr, internal renamable $q3
; CHECK: }
; CHECK: $q0 = MVE_VORR $q3, $q3, 0, $noreg, undef $q0
; CHECK: $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3
@@ -87,20 +87,20 @@ body: |
; CHECK: $q0 = MVE_VORR $q3, $q3, 0, $noreg, undef $q0
; CHECK: $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3
; CHECK: BUNDLE implicit-def dead $vpr, implicit-def $q3, implicit-def $d6, implicit-def $s12, implicit-def $s13, implicit-def $d7, implicit-def $s14, implicit-def $s15, implicit $q0, implicit $q2, implicit $q1, implicit killed $q3, implicit $zr {
- ; CHECK: MVE_VPTv4s32 13, renamable $q0, renamable $q2, 10, implicit-def $vpr
+ ; CHECK: MVE_VPTv4s32 15, renamable $q0, renamable $q2, 10, implicit-def $vpr
; CHECK: renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, internal renamable $vpr, killed renamable $q3
; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal renamable $vpr, internal renamable $q3
; CHECK: renamable $vpr = MVE_VCMPs32r renamable $q1, $zr, 12, 2, internal killed renamable $vpr
- ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, internal killed renamable $vpr, internal killed renamable $q3
+ ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal killed renamable $vpr, internal killed renamable $q3
; CHECK: }
; CHECK: $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0
; CHECK: $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3
; CHECK: BUNDLE implicit-def dead $vpr, implicit-def $q3, implicit-def $d6, implicit-def $s12, implicit-def $s13, implicit-def $d7, implicit-def $s14, implicit-def $s15, implicit $q0, implicit $q2, implicit $q1, implicit killed $q3, implicit $zr {
- ; CHECK: MVE_VPTv4s32 9, renamable $q0, renamable $q2, 10, implicit-def $vpr
+ ; CHECK: MVE_VPTv4s32 15, renamable $q0, renamable $q2, 10, implicit-def $vpr
; CHECK: renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, internal renamable $vpr, killed renamable $q3
; CHECK: renamable $vpr = MVE_VCMPs32r renamable $q1, $zr, 12, 2, internal killed renamable $vpr
- ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, internal renamable $vpr, internal renamable $q3
- ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, internal killed renamable $vpr, internal killed renamable $q3
+ ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal renamable $vpr, internal renamable $q3
+ ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal killed renamable $vpr, internal killed renamable $q3
; CHECK: }
; CHECK: $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0
; CHECK: $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3
@@ -122,10 +122,10 @@ body: |
; CHECK: $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0
; CHECK: $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3
; CHECK: BUNDLE implicit-def dead $vpr, implicit-def $q3, implicit-def $d6, implicit-def $s12, implicit-def $s13, implicit-def $d7, implicit-def $s14, implicit-def $s15, implicit $q0, implicit $q2, implicit $q1, implicit killed $q3, implicit $zr {
- ; CHECK: MVE_VPTv4s32 10, renamable $q0, renamable $q2, 10, implicit-def $vpr
+ ; CHECK: MVE_VPTv4s32 14, renamable $q0, renamable $q2, 10, implicit-def $vpr
; CHECK: renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, internal renamable $vpr, killed renamable $q3
; CHECK: renamable $vpr = MVE_VCMPs32r renamable $q1, $zr, 12, 2, internal killed renamable $vpr
- ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, internal killed renamable $vpr, internal killed renamable $q3
+ ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal killed renamable $vpr, internal killed renamable $q3
; CHECK: }
; CHECK: $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0
; CHECK: $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3
@@ -145,6 +145,14 @@ body: |
; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal renamable $vpr, internal killed renamable $q3
; CHECK: }
; CHECK: $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0
+ ; CHECK: $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3
+ ; CHECK: BUNDLE implicit-def $vpr, implicit-def $q3, implicit-def $d6, implicit-def $s12, implicit-def $s13, implicit-def $d7, implicit-def $s14, implicit-def $s15, implicit $q0, implicit $q2, implicit killed $q3 {
+ ; CHECK: MVE_VPTv4s32 13, renamable $q0, renamable $q2, 10, implicit-def $vpr
+ ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, internal renamable $vpr, killed renamable $q3
+ ; CHECK: renamable $vpr = MVE_VCMPs32 renamable $q0, renamable $q2, 11, 2, internal killed renamable $vpr
+ ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal renamable $vpr, internal killed renamable $q3
+ ; CHECK: renamable $vpr = MVE_VCMPs32 renamable $q0, renamable $q2, 11, 1, internal killed renamable $vpr
+ ; CHECK: }
; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $q0
renamable $vpr = MVE_VCMPs32 renamable $q0, renamable $q2, 10, 0, $noreg
$q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3
@@ -226,6 +234,15 @@ body: |
renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, renamable $vpr, killed renamable $q3
$q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0
+ $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3
+ renamable $vpr = MVE_VCMPs32 renamable $q0, renamable $q2, 10, 0, $noreg
+ renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, renamable $vpr, killed renamable $q3
+ renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg
+ renamable $vpr = MVE_VCMPs32 renamable $q0, renamable $q2, 11, 1, killed renamable $vpr
+ renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, renamable $vpr, killed renamable $q3
+ renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg
+ renamable $vpr = MVE_VCMPs32 renamable $q0, renamable $q2, 11, 1, killed renamable $vpr
+
tBX_RET 14, $noreg, implicit $q0
...
More information about the llvm-commits
mailing list