[llvm] r212275 - bug fix for PR20020: anti-dependency-breaker causes miscompilation

Sanjay Patel spatel at rotateright.com
Thu Jul 3 08:19:40 PDT 2014


Author: spatel
Date: Thu Jul  3 10:19:40 2014
New Revision: 212275

URL: http://llvm.org/viewvc/llvm-project?rev=212275&view=rev
Log:
bug fix for PR20020: anti-dependency-breaker causes miscompilation

This patch sets the 'KeepReg' bit for any tied and live registers during the PrescanInstruction() phase of the dependency breaking algorithm. It then checks those 'KeepReg' bits during the ScanInstruction() phase to avoid changing any tied registers. For more details, please see comments in:
http://llvm.org/bugs/show_bug.cgi?id=20020

I added two FIXME comments for code that I think can be removed by using register iterators that include self. I don't want to include those code changes with this patch, however, to keep things as small as possible.

The test case is larger than I'd like, but I don't know how to reduce it further and still produce the failing asm.

Differential Revision: http://reviews.llvm.org/D4351


Added:
    llvm/trunk/test/CodeGen/X86/pr20020.ll
Modified:
    llvm/trunk/lib/CodeGen/CriticalAntiDepBreaker.cpp

Modified: llvm/trunk/lib/CodeGen/CriticalAntiDepBreaker.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/CriticalAntiDepBreaker.cpp?rev=212275&r1=212274&r2=212275&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/CriticalAntiDepBreaker.cpp (original)
+++ llvm/trunk/lib/CodeGen/CriticalAntiDepBreaker.cpp Thu Jul  3 10:19:40 2014
@@ -200,6 +200,28 @@ void CriticalAntiDepBreaker::PrescanInst
     if (Classes[Reg] != reinterpret_cast<TargetRegisterClass *>(-1))
       RegRefs.insert(std::make_pair(Reg, &MO));
 
+    // If this reg is tied and live (Classes[Reg] is set to -1), we can't change
+    // it or any of its sub or super regs. We need to use KeepRegs to mark the
+    // reg because not all uses of the same reg within an instruction are
+    // necessarily tagged as tied.
+    // Example: an x86 "xor %eax, %eax" will have one source operand tied to the
+    // def register but not the second (see PR20020 for details).
+    // FIXME: can this check be relaxed to account for undef uses
+    // of a register? In the above 'xor' example, the uses of %eax are undef, so
+    // earlier instructions could still replace %eax even though the 'xor'
+    // itself can't be changed.
+    if (MI->isRegTiedToUseOperand(i) &&
+        Classes[Reg] == reinterpret_cast<TargetRegisterClass *>(-1)) {
+      for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
+           SubRegs.isValid(); ++SubRegs) {
+        KeepRegs.set(*SubRegs);
+      }
+      for (MCSuperRegIterator SuperRegs(Reg, TRI);
+           SuperRegs.isValid(); ++SuperRegs) {
+        KeepRegs.set(*SuperRegs);
+      }
+    }
+
     if (MO.isUse() && Special) {
       if (!KeepRegs.test(Reg)) {
         for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
@@ -236,9 +258,15 @@ void CriticalAntiDepBreaker::ScanInstruc
       unsigned Reg = MO.getReg();
       if (Reg == 0) continue;
       if (!MO.isDef()) continue;
+
+      // If we've already marked this reg as unchangeable, carry on.
+      if (KeepRegs.test(Reg)) continue;
+      
       // Ignore two-addr defs.
       if (MI->isRegTiedToUseOperand(i)) continue;
 
+      // FIXME: we should use a SubRegIterator that includes self (as above), so
+      // we don't have to repeat all this code for the reg itself.
       DefIndices[Reg] = Count;
       KillIndices[Reg] = ~0u;
       assert(((KillIndices[Reg] == ~0u) !=
@@ -281,6 +309,9 @@ void CriticalAntiDepBreaker::ScanInstruc
 
     RegRefs.insert(std::make_pair(Reg, &MO));
 
+    // FIXME: we should use an MCRegAliasIterator that includes self so we don't
+    // have to repeat all this code for the reg itself.
+    
     // It wasn't previously live but now it is, this is a kill.
     if (KillIndices[Reg] == ~0u) {
       KillIndices[Reg] = Count;

Added: llvm/trunk/test/CodeGen/X86/pr20020.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr20020.ll?rev=212275&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pr20020.ll (added)
+++ llvm/trunk/test/CodeGen/X86/pr20020.ll Thu Jul  3 10:19:40 2014
@@ -0,0 +1,73 @@
+; RUN: llc < %s -mtriple=x86_64-apple-macosx -disable-lsr -post-RA-scheduler=1 -break-anti-dependencies=critical  | FileCheck %s
+
+; In PR20020, the critical anti-dependency breaker algorithm mistakenly
+; changes the register operands of an 'xorl %eax, %eax' to 'xorl %ecx, %ecx'
+; and then immediately reloads %rcx with a value based on the wrong %rax
+
+; CHECK-NOT: xorl %ecx, %ecx
+; CHECK: leaq 1(%rax), %rcx
+
+
+%struct.planet = type { double, double, double }
+
+; Function Attrs: nounwind ssp uwtable
+define void @advance(i32 %nbodies, %struct.planet* nocapture %bodies) #0 {
+entry:
+  %cmp4 = icmp sgt i32 %nbodies, 0
+  br i1 %cmp4, label %for.body.preheader, label %for.end38
+
+for.body.preheader:                               ; preds = %entry
+  %gep = getelementptr %struct.planet* %bodies, i64 1, i32 1
+  %gep13 = bitcast double* %gep to %struct.planet*
+  %0 = add i32 %nbodies, -1
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.inc20
+  %iv19 = phi i32 [ %0, %for.body.preheader ], [ %iv.next, %for.inc20 ]
+  %iv = phi %struct.planet* [ %gep13, %for.body.preheader ], [ %gep14, %for.inc20 ]
+  %iv9 = phi i64 [ %iv.next10, %for.inc20 ], [ 0, %for.body.preheader ]
+  %iv.next10 = add nuw nsw i64 %iv9, 1
+  %1 = trunc i64 %iv.next10 to i32
+  %cmp22 = icmp slt i32 %1, %nbodies
+  br i1 %cmp22, label %for.body3.lr.ph, label %for.inc20
+
+for.body3.lr.ph:                                  ; preds = %for.body
+  %x = getelementptr inbounds %struct.planet* %bodies, i64 %iv9, i32 0
+  %y = getelementptr inbounds %struct.planet* %bodies, i64 %iv9, i32 1
+  %vx = getelementptr inbounds %struct.planet* %bodies, i64 %iv9, i32 2
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.body3, %for.body3.lr.ph
+  %iv20 = phi i32 [ %iv.next21, %for.body3 ], [ %iv19, %for.body3.lr.ph ]
+  %iv15 = phi %struct.planet* [ %gep16, %for.body3 ], [ %iv, %for.body3.lr.ph ]
+  %iv1517 = bitcast %struct.planet* %iv15 to double*
+  %2 = load double* %x, align 8
+  %gep18 = getelementptr double* %iv1517, i64 -1
+  %3 = load double* %gep18, align 8
+  %sub = fsub double %2, %3
+  %4 = load double* %y, align 8
+  %5 = load double* %iv1517, align 8
+  %sub8 = fsub double %4, %5
+  %add10 = fadd double %sub, %sub8
+  %call = tail call double @sqrt(double %sub8) #2
+  store double %add10, double* %vx, align 8
+  %gep16 = getelementptr %struct.planet* %iv15, i64 1
+  %iv.next21 = add i32 %iv20, -1
+  %exitcond = icmp eq i32 %iv.next21, 0
+  br i1 %exitcond, label %for.inc20, label %for.body3
+
+for.inc20:                                        ; preds = %for.body3, %for.body
+  %lftr.wideiv11 = trunc i64 %iv.next10 to i32
+  %gep14 = getelementptr %struct.planet* %iv, i64 1
+  %iv.next = add i32 %iv19, -1
+  %exitcond12 = icmp eq i32 %lftr.wideiv11, %nbodies
+  br i1 %exitcond12, label %for.end38, label %for.body
+
+for.end38:                                        ; preds = %for.inc20, %entry
+  ret void
+}
+
+; Function Attrs: nounwind
+declare double @sqrt(double) #1
+
+attributes #0 = { "no-frame-pointer-elim-non-leaf" }





More information about the llvm-commits mailing list