[llvm] 13eb890 - [Target][ARM] Fix VPT Block Pass miscompilation

via llvm-commits llvm-commits at lists.llvm.org
Tue Apr 14 07:16:54 PDT 2020


Author: Pierre-vh
Date: 2020-04-14T15:16:27+01:00
New Revision: 13eb890139382e370be14938f4fffcfc20ff9613

URL: https://github.com/llvm/llvm-project/commit/13eb890139382e370be14938f4fffcfc20ff9613
DIFF: https://github.com/llvm/llvm-project/commit/13eb890139382e370be14938f4fffcfc20ff9613.diff

LOG: [Target][ARM] Fix VPT Block Pass miscompilation

The pass was incorrectly reverting back to a "T" when something wrote
to VPR inside a "E" block. This is not the correct behaviour, the
predicate should stay the same.

Differential Revision: https://reviews.llvm.org/D77798

Added: 
    

Modified: 
    llvm/lib/Target/ARM/MVEVPTBlockPass.cpp
    llvm/test/CodeGen/Thumb2/mve-pred-not.ll
    llvm/test/CodeGen/Thumb2/mve-vpt-block-elses.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp b/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp
index c0214cf2a910..5d98307e9af1 100644
--- a/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp
+++ b/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp
@@ -194,7 +194,7 @@ CreateVPTBlock(MachineBasicBlock::instr_iterator &Iter,
 
   // Remove VPNOTs while there's still room in the block, so we can make the
   // largest block possible.
-  ARMVCC::VPTCodes CurrentPredicate = ARMVCC::Then;
+  ARMVCC::VPTCodes CurrentPredicate = ARMVCC::Else;
   while (BlockSize < 4 && Iter != EndIter &&
          Iter->getOpcode() == ARM::MVE_VPNOT) {
 
@@ -222,28 +222,19 @@ CreateVPTBlock(MachineBasicBlock::instr_iterator &Iter,
     DeadInstructions.push_back(&*Iter);
     ++Iter;
 
-    // Replace "then" by "elses" in the block until we find an instruction that
-    // defines VPR, then after that leave everything to "t".
+    // Replace the predicates of the instructions we're adding.
     // Note that we are using "Iter" to iterate over the block so we can update
     // it at the same time.
-    bool ChangeToElse = (CurrentPredicate == ARMVCC::Then);
     for (; Iter != VPNOTBlockEndIter; ++Iter) {
       // Find the register in which the predicate is
       int OpIdx = findFirstVPTPredOperandIdx(*Iter);
       assert(OpIdx != -1);
 
-      // Update the mask + change the predicate to an else if needed.
-      if (ChangeToElse) {
-        // Change the predicate and update the mask
-        Iter->getOperand(OpIdx).setImm(ARMVCC::Else);
-        BlockMask = ExpandBlockMask(BlockMask, ARMVCC::Else);
-        // Reset back to a "then" predicate if this instruction defines VPR.
-        if (Iter->definesRegister(ARM::VPR))
-          ChangeToElse = false;
-      } else
-        BlockMask = ExpandBlockMask(BlockMask, ARMVCC::Then);
-
-      LLVM_DEBUG(dbgs() << "  adding: "; Iter->dump());
+      // Change the predicate and update the mask
+      Iter->getOperand(OpIdx).setImm(CurrentPredicate);
+      BlockMask = ExpandBlockMask(BlockMask, CurrentPredicate);
+
+      LLVM_DEBUG(dbgs() << "  adding : "; Iter->dump());
     }
 
     CurrentPredicate =

diff  --git a/llvm/test/CodeGen/Thumb2/mve-pred-not.ll b/llvm/test/CodeGen/Thumb2/mve-pred-not.ll
index 70fc0e4ab1cb..a16fce870cb9 100644
--- a/llvm/test/CodeGen/Thumb2/mve-pred-not.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-pred-not.ll
@@ -405,12 +405,42 @@ declare <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>,
 define arm_aapcs_vfpcc <4 x i32> @vpttet_v4i1(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 ; CHECK-LABEL: vpttet_v4i1:
 ; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .pad #4
+; CHECK-NEXT:    sub sp, #4
+; CHECK-NEXT:    vcmp.s32 ge, q0, q2
+; CHECK-NEXT:    vstr p0, [sp] @ 4-byte Spill
+; CHECK-NEXT:    vpstt
+; CHECK-NEXT:    vmovt q0, q2
+; CHECK-NEXT:    vmovt q0, q2
+; CHECK-NEXT:    vldr p0, [sp] @ 4-byte Reload
+; CHECK-NEXT:    vpnot
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vmovt q0, q2
+; CHECK-NEXT:    vldr p0, [sp] @ 4-byte Reload
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vmovt q0, q2
+; CHECK-NEXT:    add sp, #4
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = icmp sge <4 x i32> %x, %z
+  %1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %z, <4 x i32> %z, <4 x i1> %0, <4 x i32> %x)
+  %2 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %z, <4 x i32> %z, <4 x i1> %0, <4 x i32> %1)
+  %3 = xor <4 x i1> %0, <i1 true, i1 true, i1 true, i1 true>
+  %4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %z, <4 x i32> %z, <4 x i1> %3, <4 x i32> %2)
+  %5 = xor <4 x i1> %3, <i1 true, i1 true, i1 true, i1 true>
+  %6 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %z, <4 x i32> %z, <4 x i1> %5, <4 x i32> %4)
+  ret <4 x i32> %6
+}
+
+define arm_aapcs_vfpcc <4 x i32> @vpttee_v4i1(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
+; CHECK-LABEL: vpttee_v4i1:
+; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    vmov q3, q2
-; CHECK-NEXT:    vpttet.s32 ge, q0, q2
+; CHECK-NEXT:    vpttee.s32 ge, q0, q2
 ; CHECK-NEXT:    vmaxt.s32 q3, q0, q1
 ; CHECK-NEXT:    vcmpt.s32 gt, q0, zr
-; CHECK-NEXT:    vcmpe.s32 gt, q1, zr
-; CHECK-NEXT:    vmovt q3, q2
+; CHECK-NEXT:    vmove q3, q2
+; CHECK-NEXT:    vmove q3, q2
 ; CHECK-NEXT:    vmov q0, q3
 ; CHECK-NEXT:    bx lr
 entry:
@@ -419,20 +449,19 @@ entry:
   %2 = icmp sgt <4 x i32> %x, zeroinitializer
   %3 = and <4 x i1> %0, %2
   %4 = xor <4 x i1> %3, <i1 true, i1 true, i1 true, i1 true>
-  %5 = icmp sgt <4 x i32> %y, zeroinitializer
-  %6 = and <4 x i1> %5, %4
-  %7 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %z, <4 x i32> %z, <4 x i1> %6, <4 x i32> %1)
-  ret <4 x i32> %7
+  %5 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %z, <4 x i32> %z, <4 x i1> %4, <4 x i32> %1)
+  %6 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %z, <4 x i32> %z, <4 x i1> %4, <4 x i32> %5)
+  ret <4 x i32> %6
 }
 
-define arm_aapcs_vfpcc <4 x i32> @vpttee_v4i1(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
-; CHECK-LABEL: vpttee_v4i1:
+define arm_aapcs_vfpcc <4 x i32> @vpttee2_v4i1(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
+; CHECK-LABEL: vpttee2_v4i1:
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    vmov q3, q2
 ; CHECK-NEXT:    vpttee.s32 ge, q0, q2
 ; CHECK-NEXT:    vmaxt.s32 q3, q0, q1
 ; CHECK-NEXT:    vcmpt.s32 gt, q0, zr
-; CHECK-NEXT:    vmove q3, q2
+; CHECK-NEXT:    vcmpe.s32 gt, q1, zr
 ; CHECK-NEXT:    vmove q3, q2
 ; CHECK-NEXT:    vmov q0, q3
 ; CHECK-NEXT:    bx lr
@@ -442,9 +471,10 @@ entry:
   %2 = icmp sgt <4 x i32> %x, zeroinitializer
   %3 = and <4 x i1> %0, %2
   %4 = xor <4 x i1> %3, <i1 true, i1 true, i1 true, i1 true>
-  %5 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %z, <4 x i32> %z, <4 x i1> %4, <4 x i32> %1)
-  %6 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %z, <4 x i32> %z, <4 x i1> %4, <4 x i32> %5)
-  ret <4 x i32> %6
+  %5 = icmp sgt <4 x i32> %y, zeroinitializer
+  %6 = and <4 x i1> %5, %4
+  %7 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %z, <4 x i32> %z, <4 x i1> %6, <4 x i32> %1)
+  ret <4 x i32> %7
 }
 
 define arm_aapcs_vfpcc <4 x i32> @vpttte_v4i1(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {

diff  --git a/llvm/test/CodeGen/Thumb2/mve-vpt-block-elses.mir b/llvm/test/CodeGen/Thumb2/mve-vpt-block-elses.mir
index 765d3a4de831..93bf2e6f4510 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vpt-block-elses.mir
+++ b/llvm/test/CodeGen/Thumb2/mve-vpt-block-elses.mir
@@ -69,11 +69,11 @@ body:             |
     ; CHECK: liveins: $q0, $q1, $q2
     ; CHECK: $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3
     ; CHECK: BUNDLE implicit-def dead $vpr, implicit-def $q3, implicit-def $d6, implicit-def $s12, implicit-def $s13, implicit-def $d7, implicit-def $s14, implicit-def $s15, implicit killed $q0, implicit $q2, implicit $q1, implicit killed $q3, implicit $zr {
-    ; CHECK:   MVE_VPTv4s32 5, renamable $q0, renamable $q2, 10, implicit-def $vpr
+    ; CHECK:   MVE_VPTv4s32 7, renamable $q0, renamable $q2, 10, implicit-def $vpr
     ; CHECK:   renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, internal renamable $vpr, killed renamable $q3
     ; CHECK:   renamable $vpr = MVE_VCMPs32r killed renamable $q0, $zr, 12, 1, internal killed renamable $vpr
     ; CHECK:   renamable $vpr = MVE_VCMPs32r renamable $q1, $zr, 12, 2, internal killed renamable $vpr
-    ; CHECK:   renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, internal killed renamable $vpr, internal renamable $q3
+    ; CHECK:   renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal killed renamable $vpr, internal renamable $q3
     ; CHECK: }
     ; CHECK: $q0 = MVE_VORR $q3, $q3, 0, $noreg, undef $q0
     ; CHECK: $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3
@@ -87,20 +87,20 @@ body:             |
     ; CHECK: $q0 = MVE_VORR $q3, $q3, 0, $noreg, undef $q0
     ; CHECK: $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3
     ; CHECK: BUNDLE implicit-def dead $vpr, implicit-def $q3, implicit-def $d6, implicit-def $s12, implicit-def $s13, implicit-def $d7, implicit-def $s14, implicit-def $s15, implicit $q0, implicit $q2, implicit $q1, implicit killed $q3, implicit $zr {
-    ; CHECK:   MVE_VPTv4s32 13, renamable $q0, renamable $q2, 10, implicit-def $vpr
+    ; CHECK:   MVE_VPTv4s32 15, renamable $q0, renamable $q2, 10, implicit-def $vpr
     ; CHECK:   renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, internal renamable $vpr, killed renamable $q3
     ; CHECK:   renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal renamable $vpr, internal renamable $q3
     ; CHECK:   renamable $vpr = MVE_VCMPs32r renamable $q1, $zr, 12, 2, internal killed renamable $vpr
-    ; CHECK:   renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, internal killed renamable $vpr, internal killed renamable $q3
+    ; CHECK:   renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal killed renamable $vpr, internal killed renamable $q3
     ; CHECK: }
     ; CHECK: $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0
     ; CHECK: $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3
     ; CHECK: BUNDLE implicit-def dead $vpr, implicit-def $q3, implicit-def $d6, implicit-def $s12, implicit-def $s13, implicit-def $d7, implicit-def $s14, implicit-def $s15, implicit $q0, implicit $q2, implicit $q1, implicit killed $q3, implicit $zr {
-    ; CHECK:   MVE_VPTv4s32 9, renamable $q0, renamable $q2, 10, implicit-def $vpr
+    ; CHECK:   MVE_VPTv4s32 15, renamable $q0, renamable $q2, 10, implicit-def $vpr
     ; CHECK:   renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, internal renamable $vpr, killed renamable $q3
     ; CHECK:   renamable $vpr = MVE_VCMPs32r renamable $q1, $zr, 12, 2, internal killed renamable $vpr
-    ; CHECK:   renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, internal renamable $vpr, internal renamable $q3
-    ; CHECK:   renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, internal killed renamable $vpr, internal killed renamable $q3
+    ; CHECK:   renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal renamable $vpr, internal renamable $q3
+    ; CHECK:   renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal killed renamable $vpr, internal killed renamable $q3
     ; CHECK: }
     ; CHECK: $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0
     ; CHECK: $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3
@@ -122,10 +122,10 @@ body:             |
     ; CHECK: $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0
     ; CHECK: $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3
     ; CHECK: BUNDLE implicit-def dead $vpr, implicit-def $q3, implicit-def $d6, implicit-def $s12, implicit-def $s13, implicit-def $d7, implicit-def $s14, implicit-def $s15, implicit $q0, implicit $q2, implicit $q1, implicit killed $q3, implicit $zr {
-    ; CHECK:   MVE_VPTv4s32 10, renamable $q0, renamable $q2, 10, implicit-def $vpr
+    ; CHECK:   MVE_VPTv4s32 14, renamable $q0, renamable $q2, 10, implicit-def $vpr
     ; CHECK:   renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, internal renamable $vpr, killed renamable $q3
     ; CHECK:   renamable $vpr = MVE_VCMPs32r renamable $q1, $zr, 12, 2, internal killed renamable $vpr
-    ; CHECK:   renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, internal killed renamable $vpr, internal killed renamable $q3
+    ; CHECK:   renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal killed renamable $vpr, internal killed renamable $q3
     ; CHECK: }
     ; CHECK: $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0
     ; CHECK: $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3
@@ -145,6 +145,14 @@ body:             |
     ; CHECK:   renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal renamable $vpr, internal killed renamable $q3
     ; CHECK: }
     ; CHECK: $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0
+    ; CHECK: $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3
+    ; CHECK: BUNDLE implicit-def $vpr, implicit-def $q3, implicit-def $d6, implicit-def $s12, implicit-def $s13, implicit-def $d7, implicit-def $s14, implicit-def $s15, implicit $q0, implicit $q2, implicit killed $q3 {
+    ; CHECK:   MVE_VPTv4s32 13, renamable $q0, renamable $q2, 10, implicit-def $vpr
+    ; CHECK:   renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, internal renamable $vpr, killed renamable $q3
+    ; CHECK:   renamable $vpr = MVE_VCMPs32 renamable $q0, renamable $q2, 11, 2, internal killed renamable $vpr
+    ; CHECK:   renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal renamable $vpr, internal killed renamable $q3
+    ; CHECK:   renamable $vpr = MVE_VCMPs32 renamable $q0, renamable $q2, 11, 1, internal killed renamable $vpr
+    ; CHECK: }
     ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $q0
     renamable $vpr = MVE_VCMPs32 renamable $q0, renamable $q2, 10, 0, $noreg
     $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3
@@ -226,6 +234,15 @@ body:             |
     renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, renamable $vpr, killed renamable $q3
     $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0
 
+    $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3
+    renamable $vpr = MVE_VCMPs32 renamable $q0, renamable $q2, 10, 0, $noreg
+    renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, renamable $vpr, killed renamable $q3
+    renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg
+    renamable $vpr = MVE_VCMPs32 renamable $q0, renamable $q2, 11, 1, killed renamable $vpr
+    renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, renamable $vpr, killed renamable $q3
+    renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg
+    renamable $vpr = MVE_VCMPs32 renamable $q0, renamable $q2, 11, 1, killed renamable $vpr
+
     tBX_RET 14, $noreg, implicit $q0
 
 ...


        


More information about the llvm-commits mailing list