[llvm-commits] [llvm] r56165 - in /llvm/trunk: lib/CodeGen/SimpleRegisterCoalescing.cpp test/CodeGen/PowerPC/2008-09-12-CoalescerBug.ll

Evan Cheng evan.cheng at apple.com
Fri Sep 12 11:13:15 PDT 2008


Author: evancheng
Date: Fri Sep 12 13:13:14 2008
New Revision: 56165

URL: http://llvm.org/viewvc/llvm-project?rev=56165&view=rev
Log:
On some targets, non-move instructions can become move instructions because of coalescing. e.g.
vr2 = OR vr0, vr1
=>
vr2 = OR vr1, vr1   // after coalescing vr0 with vr1

Update the value# of the destination register with the copy instruction if that happens.

Added:
    llvm/trunk/test/CodeGen/PowerPC/2008-09-12-CoalescerBug.ll
Modified:
    llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.cpp

Modified: llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.cpp?rev=56165&r1=56164&r2=56165&view=diff

==============================================================================
--- llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.cpp (original)
+++ llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.cpp Fri Sep 12 13:13:14 2008
@@ -541,20 +541,34 @@
 
       O.setReg(UseDstReg);
       O.setSubReg(0);
-    } else {
-      // Sub-register indexes goes from small to large. e.g.
-      // RAX: 1 -> AL, 2 -> AX, 3 -> EAX
-      // EAX: 1 -> AL, 2 -> AX
-      // So RAX's sub-register 2 is AX, RAX's sub-regsiter 3 is EAX, whose
-      // sub-register 2 is also AX.
-      if (SubIdx && OldSubIdx && SubIdx != OldSubIdx)
-        assert(OldSubIdx < SubIdx && "Conflicting sub-register index!");
-      else if (SubIdx)
-        O.setSubReg(SubIdx);
-      // Remove would-be duplicated kill marker.
-      if (O.isKill() && UseMI->killsRegister(DstReg))
-        O.setIsKill(false);
-      O.setReg(DstReg);
+      continue;
+    }
+
+    // Sub-register indexes goes from small to large. e.g.
+    // RAX: 1 -> AL, 2 -> AX, 3 -> EAX
+    // EAX: 1 -> AL, 2 -> AX
+    // So RAX's sub-register 2 is AX, RAX's sub-regsiter 3 is EAX, whose
+    // sub-register 2 is also AX.
+    if (SubIdx && OldSubIdx && SubIdx != OldSubIdx)
+      assert(OldSubIdx < SubIdx && "Conflicting sub-register index!");
+    else if (SubIdx)
+      O.setSubReg(SubIdx);
+    // Remove would-be duplicated kill marker.
+    if (O.isKill() && UseMI->killsRegister(DstReg))
+      O.setIsKill(false);
+    O.setReg(DstReg);
+
+    // After updating the operand, check if the machine instruction has
+    // become a copy. If so, update its val# information.
+    const TargetInstrDesc &TID = UseMI->getDesc();
+    unsigned CopySrcReg, CopyDstReg;
+    if (TID.getNumDefs() == 1 && TID.getNumOperands() > 2 &&
+        tii_->isMoveInstr(*UseMI, CopySrcReg, CopyDstReg) &&
+        CopySrcReg != CopyDstReg) {
+      LiveInterval &LI = li_->getInterval(CopyDstReg);
+      unsigned DefIdx = li_->getDefIndex(li_->getInstructionIndex(UseMI));
+      const LiveRange *DLR = LI.getLiveRangeContaining(DefIdx);
+      DLR->valno->copy = UseMI;
     }
   }
 }

Added: llvm/trunk/test/CodeGen/PowerPC/2008-09-12-CoalescerBug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/2008-09-12-CoalescerBug.ll?rev=56165&view=auto

==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/2008-09-12-CoalescerBug.ll (added)
+++ llvm/trunk/test/CodeGen/PowerPC/2008-09-12-CoalescerBug.ll Fri Sep 12 13:13:14 2008
@@ -0,0 +1,254 @@
+; RUN: llvm-as < %s | llc -mtriple=powerpc-apple-darwin
+
+	%struct.CGLDI = type { %struct.cgli*, i32, i32, i32, i32, i32, i8*, i32, void (%struct.CGLSI*, i32, %struct.CGLDI*)*, i8*, %struct.vv_t }
+	%struct.cgli = type { i32, %struct.cgli*, void (%struct.cgli*, i8*, i8*, i32, i32, i32, i32, i32, i32, i32, i32)*, i32, i8*, i8*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, i8*, i32*, %struct._cgro*, %struct._cgro*, float, float, float, float, i32, i8*, float, i8*, [16 x i32] }
+	%struct.CGLSI = type { %struct.cgli*, i32, i8*, i8*, i32, i32, i8*, void (%struct.cgli*, i8*, i8*, i32, i32, i32, i32, i32, i32, i32, i32)*, %struct.vv_t, %struct.vv_t, %struct.xx_t* }
+	%struct._cgro = type opaque
+	%struct.xx_t = type { [3 x %struct.vv_t], [2 x %struct.vv_t], [2 x [3 x i8*]] }
+	%struct.vv_t = type { <16 x i8> }
+ at llvm.used = appending global [1 x i8*] [ i8* bitcast (void (%struct.CGLSI*, i32, %struct.CGLDI*)* @lb to i8*) ], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define void @lb(%struct.CGLSI* %src, i32 %n, %struct.CGLDI* %dst) nounwind {
+entry:
+	%0 = load i32* null, align 4		; <i32> [#uses=1]
+	%1 = icmp sgt i32 %0, 0		; <i1> [#uses=1]
+	br i1 %1, label %bb.nph4945, label %return
+
+bb.nph4945:		; preds = %entry
+	%2 = bitcast [2 x %struct.vv_t]* null to i64*		; <i64*> [#uses=6]
+	%3 = getelementptr [2 x i64]* null, i32 0, i32 1		; <i64*> [#uses=6]
+	%4 = bitcast %struct.vv_t* null to i64*		; <i64*> [#uses=5]
+	%5 = getelementptr [2 x i64]* null, i32 0, i32 1		; <i64*> [#uses=3]
+	br label %bb2326
+
+bb2217:		; preds = %bb2326
+	%6 = or i64 0, 0		; <i64> [#uses=2]
+	%7 = fptosi float 0.000000e+00 to i32		; <i32> [#uses=1]
+	%8 = fptosi float 0.000000e+00 to i32		; <i32> [#uses=1]
+	%9 = getelementptr float* null, i32 2		; <float*> [#uses=1]
+	%10 = load float* %9, align 4		; <float> [#uses=1]
+	%11 = getelementptr float* null, i32 3		; <float*> [#uses=1]
+	%12 = load float* %11, align 4		; <float> [#uses=1]
+	%13 = mul float %10, 6.553500e+04		; <float> [#uses=1]
+	%14 = add float %13, 5.000000e-01		; <float> [#uses=1]
+	%15 = mul float %12, 6.553500e+04		; <float> [#uses=1]
+	%16 = add float %15, 5.000000e-01		; <float> [#uses=3]
+	%17 = fcmp olt float %14, 0.000000e+00		; <i1> [#uses=0]
+	%18 = fcmp olt float %16, 0.000000e+00		; <i1> [#uses=1]
+	br i1 %18, label %bb2265, label %bb2262
+
+bb2262:		; preds = %bb2217
+	%19 = fcmp ogt float %16, 6.553500e+04		; <i1> [#uses=1]
+	br i1 %19, label %bb2264, label %bb2265
+
+bb2264:		; preds = %bb2262
+	br label %bb2265
+
+bb2265:		; preds = %bb2264, %bb2262, %bb2217
+	%f3596.0 = phi float [ 6.553500e+04, %bb2264 ], [ 0.000000e+00, %bb2217 ], [ %16, %bb2262 ]		; <float> [#uses=1]
+	%20 = fptosi float 0.000000e+00 to i32		; <i32> [#uses=1]
+	%21 = fptosi float %f3596.0 to i32		; <i32> [#uses=1]
+	%22 = zext i32 %7 to i64		; <i64> [#uses=1]
+	%23 = shl i64 %22, 48		; <i64> [#uses=1]
+	%24 = zext i32 %8 to i64		; <i64> [#uses=1]
+	%25 = shl i64 %24, 32		; <i64> [#uses=1]
+	%26 = sext i32 %20 to i64		; <i64> [#uses=1]
+	%27 = shl i64 %26, 16		; <i64> [#uses=1]
+	%28 = sext i32 %21 to i64		; <i64> [#uses=1]
+	%29 = or i64 %25, %23		; <i64> [#uses=1]
+	%30 = or i64 %29, %27		; <i64> [#uses=1]
+	%31 = or i64 %30, %28		; <i64> [#uses=2]
+	%32 = shl i64 %6, 48		; <i64> [#uses=1]
+	%33 = shl i64 %31, 32		; <i64> [#uses=1]
+	%34 = and i64 %33, 281470681743360		; <i64> [#uses=1]
+	store i64 %6, i64* %2, align 16
+	store i64 %31, i64* %3, align 8
+	%35 = getelementptr i8* null, i32 0		; <i8*> [#uses=1]
+	%36 = bitcast i8* %35 to float*		; <float*> [#uses=4]
+	%37 = load float* %36, align 4		; <float> [#uses=1]
+	%38 = getelementptr float* %36, i32 1		; <float*> [#uses=1]
+	%39 = load float* %38, align 4		; <float> [#uses=1]
+	%40 = mul float %37, 6.553500e+04		; <float> [#uses=1]
+	%41 = add float %40, 5.000000e-01		; <float> [#uses=1]
+	%42 = mul float %39, 6.553500e+04		; <float> [#uses=1]
+	%43 = add float %42, 5.000000e-01		; <float> [#uses=3]
+	%44 = fcmp olt float %41, 0.000000e+00		; <i1> [#uses=0]
+	%45 = fcmp olt float %43, 0.000000e+00		; <i1> [#uses=1]
+	br i1 %45, label %bb2277, label %bb2274
+
+bb2274:		; preds = %bb2265
+	%46 = fcmp ogt float %43, 6.553500e+04		; <i1> [#uses=0]
+	br label %bb2277
+
+bb2277:		; preds = %bb2274, %bb2265
+	%f1582.0 = phi float [ 0.000000e+00, %bb2265 ], [ %43, %bb2274 ]		; <float> [#uses=1]
+	%47 = fptosi float 0.000000e+00 to i32		; <i32> [#uses=1]
+	%48 = fptosi float %f1582.0 to i32		; <i32> [#uses=1]
+	%49 = getelementptr float* %36, i32 2		; <float*> [#uses=1]
+	%50 = load float* %49, align 4		; <float> [#uses=1]
+	%51 = getelementptr float* %36, i32 3		; <float*> [#uses=1]
+	%52 = load float* %51, align 4		; <float> [#uses=1]
+	%53 = mul float %50, 6.553500e+04		; <float> [#uses=1]
+	%54 = add float %53, 5.000000e-01		; <float> [#uses=1]
+	%55 = mul float %52, 6.553500e+04		; <float> [#uses=1]
+	%56 = add float %55, 5.000000e-01		; <float> [#uses=1]
+	%57 = fcmp olt float %54, 0.000000e+00		; <i1> [#uses=0]
+	%58 = fcmp olt float %56, 0.000000e+00		; <i1> [#uses=0]
+	%59 = fptosi float 0.000000e+00 to i32		; <i32> [#uses=1]
+	%60 = fptosi float 0.000000e+00 to i32		; <i32> [#uses=1]
+	%61 = zext i32 %47 to i64		; <i64> [#uses=1]
+	%62 = shl i64 %61, 48		; <i64> [#uses=1]
+	%63 = zext i32 %48 to i64		; <i64> [#uses=1]
+	%64 = shl i64 %63, 32		; <i64> [#uses=1]
+	%65 = sext i32 %59 to i64		; <i64> [#uses=1]
+	%66 = shl i64 %65, 16		; <i64> [#uses=1]
+	%67 = sext i32 %60 to i64		; <i64> [#uses=1]
+	%68 = or i64 %64, %62		; <i64> [#uses=1]
+	%69 = or i64 %68, %66		; <i64> [#uses=1]
+	%70 = or i64 %69, %67		; <i64> [#uses=2]
+	%71 = getelementptr i8* null, i32 0		; <i8*> [#uses=1]
+	%72 = bitcast i8* %71 to float*		; <float*> [#uses=4]
+	%73 = load float* %72, align 4		; <float> [#uses=1]
+	%74 = getelementptr float* %72, i32 1		; <float*> [#uses=1]
+	%75 = load float* %74, align 4		; <float> [#uses=1]
+	%76 = mul float %73, 6.553500e+04		; <float> [#uses=1]
+	%77 = add float %76, 5.000000e-01		; <float> [#uses=3]
+	%78 = mul float %75, 6.553500e+04		; <float> [#uses=1]
+	%79 = add float %78, 5.000000e-01		; <float> [#uses=1]
+	%80 = fcmp olt float %77, 0.000000e+00		; <i1> [#uses=1]
+	br i1 %80, label %bb2295, label %bb2292
+
+bb2292:		; preds = %bb2277
+	%81 = fcmp ogt float %77, 6.553500e+04		; <i1> [#uses=1]
+	br i1 %81, label %bb2294, label %bb2295
+
+bb2294:		; preds = %bb2292
+	br label %bb2295
+
+bb2295:		; preds = %bb2294, %bb2292, %bb2277
+	%f0569.0 = phi float [ 6.553500e+04, %bb2294 ], [ 0.000000e+00, %bb2277 ], [ %77, %bb2292 ]		; <float> [#uses=1]
+	%82 = fcmp olt float %79, 0.000000e+00		; <i1> [#uses=0]
+	%83 = fptosi float %f0569.0 to i32		; <i32> [#uses=1]
+	%84 = fptosi float 0.000000e+00 to i32		; <i32> [#uses=1]
+	%85 = getelementptr float* %72, i32 2		; <float*> [#uses=1]
+	%86 = load float* %85, align 4		; <float> [#uses=1]
+	%87 = getelementptr float* %72, i32 3		; <float*> [#uses=1]
+	%88 = load float* %87, align 4		; <float> [#uses=1]
+	%89 = mul float %86, 6.553500e+04		; <float> [#uses=1]
+	%90 = add float %89, 5.000000e-01		; <float> [#uses=1]
+	%91 = mul float %88, 6.553500e+04		; <float> [#uses=1]
+	%92 = add float %91, 5.000000e-01		; <float> [#uses=1]
+	%93 = fcmp olt float %90, 0.000000e+00		; <i1> [#uses=0]
+	%94 = fcmp olt float %92, 0.000000e+00		; <i1> [#uses=0]
+	%95 = fptosi float 0.000000e+00 to i32		; <i32> [#uses=1]
+	%96 = fptosi float 0.000000e+00 to i32		; <i32> [#uses=1]
+	%97 = zext i32 %83 to i64		; <i64> [#uses=1]
+	%98 = shl i64 %97, 48		; <i64> [#uses=1]
+	%99 = zext i32 %84 to i64		; <i64> [#uses=1]
+	%100 = shl i64 %99, 32		; <i64> [#uses=1]
+	%101 = sext i32 %95 to i64		; <i64> [#uses=1]
+	%102 = shl i64 %101, 16		; <i64> [#uses=1]
+	%103 = sext i32 %96 to i64		; <i64> [#uses=1]
+	%104 = or i64 %100, %98		; <i64> [#uses=1]
+	%105 = or i64 %104, %102		; <i64> [#uses=1]
+	%106 = or i64 %105, %103		; <i64> [#uses=2]
+	%107 = shl i64 %70, 16		; <i64> [#uses=1]
+	%108 = and i64 %107, 4294901760		; <i64> [#uses=1]
+	%109 = and i64 %106, 65535		; <i64> [#uses=1]
+	%110 = or i64 %34, %32		; <i64> [#uses=1]
+	%111 = or i64 %110, %108		; <i64> [#uses=1]
+	%112 = or i64 %111, %109		; <i64> [#uses=1]
+	store i64 %70, i64* %4, align 16
+	store i64 %106, i64* %5, align 8
+	%113 = icmp eq i64 %112, 0		; <i1> [#uses=1]
+	br i1 %113, label %bb2325, label %bb2315
+
+bb2315:		; preds = %bb2295
+	%114 = icmp eq %struct.xx_t* %159, null		; <i1> [#uses=1]
+	br i1 %114, label %bb2318, label %bb2317
+
+bb2317:		; preds = %bb2315
+	%115 = load i64* %2, align 16		; <i64> [#uses=1]
+	%116 = call i32 (...)* @_u16a_cm( i64 %115, %struct.xx_t* %159, double 0.000000e+00, double 1.047551e+06 ) nounwind		; <i32> [#uses=1]
+	%117 = sext i32 %116 to i64		; <i64> [#uses=1]
+	store i64 %117, i64* %2, align 16
+	%118 = load i64* %3, align 8		; <i64> [#uses=1]
+	%119 = call i32 (...)* @_u16a_cm( i64 %118, %struct.xx_t* %159, double 0.000000e+00, double 1.047551e+06 ) nounwind		; <i32> [#uses=1]
+	%120 = sext i32 %119 to i64		; <i64> [#uses=1]
+	store i64 %120, i64* %3, align 8
+	%121 = load i64* %4, align 16		; <i64> [#uses=1]
+	%122 = call i32 (...)* @_u16a_cm( i64 %121, %struct.xx_t* %159, double 0.000000e+00, double 1.047551e+06 ) nounwind		; <i32> [#uses=1]
+	%123 = sext i32 %122 to i64		; <i64> [#uses=1]
+	store i64 %123, i64* %4, align 16
+	%124 = load i64* %5, align 8		; <i64> [#uses=1]
+	%125 = call i32 (...)* @_u16a_cm( i64 %124, %struct.xx_t* %159, double 0.000000e+00, double 1.047551e+06 ) nounwind		; <i32> [#uses=0]
+	unreachable
+
+bb2318:		; preds = %bb2315
+	%126 = getelementptr %struct.CGLSI* %src, i32 %indvar5021, i32 8		; <%struct.vv_t*> [#uses=1]
+	%127 = bitcast %struct.vv_t* %126 to i64*		; <i64*> [#uses=1]
+	%128 = load i64* %127, align 8		; <i64> [#uses=1]
+	%129 = trunc i64 %128 to i32		; <i32> [#uses=4]
+	%130 = load i64* %2, align 16		; <i64> [#uses=1]
+	%131 = call i32 (...)* @_u16_ff( i64 %130, i32 %129 ) nounwind		; <i32> [#uses=1]
+	%132 = sext i32 %131 to i64		; <i64> [#uses=1]
+	store i64 %132, i64* %2, align 16
+	%133 = load i64* %3, align 8		; <i64> [#uses=1]
+	%134 = call i32 (...)* @_u16_ff( i64 %133, i32 %129 ) nounwind		; <i32> [#uses=1]
+	%135 = sext i32 %134 to i64		; <i64> [#uses=1]
+	store i64 %135, i64* %3, align 8
+	%136 = load i64* %4, align 16		; <i64> [#uses=1]
+	%137 = call i32 (...)* @_u16_ff( i64 %136, i32 %129 ) nounwind		; <i32> [#uses=1]
+	%138 = sext i32 %137 to i64		; <i64> [#uses=1]
+	store i64 %138, i64* %4, align 16
+	%139 = load i64* %5, align 8		; <i64> [#uses=1]
+	%140 = call i32 (...)* @_u16_ff( i64 %139, i32 %129 ) nounwind		; <i32> [#uses=0]
+	unreachable
+
+bb2319:		; preds = %bb2326
+	%141 = getelementptr %struct.CGLSI* %src, i32 %indvar5021, i32 2		; <i8**> [#uses=1]
+	%142 = load i8** %141, align 4		; <i8*> [#uses=4]
+	%143 = getelementptr i8* %142, i32 0		; <i8*> [#uses=1]
+	%144 = call i32 (...)* @_u16_sf32( double 0.000000e+00, double 6.553500e+04, double 5.000000e-01, i8* %143 ) nounwind		; <i32> [#uses=1]
+	%145 = sext i32 %144 to i64		; <i64> [#uses=2]
+	%146 = getelementptr i8* %142, i32 0		; <i8*> [#uses=1]
+	%147 = call i32 (...)* @_u16_sf32( double 0.000000e+00, double 6.553500e+04, double 5.000000e-01, i8* %146 ) nounwind		; <i32> [#uses=1]
+	%148 = sext i32 %147 to i64		; <i64> [#uses=2]
+	%149 = shl i64 %145, 48		; <i64> [#uses=0]
+	%150 = shl i64 %148, 32		; <i64> [#uses=1]
+	%151 = and i64 %150, 281470681743360		; <i64> [#uses=0]
+	store i64 %145, i64* %2, align 16
+	store i64 %148, i64* %3, align 8
+	%152 = getelementptr i8* %142, i32 0		; <i8*> [#uses=1]
+	%153 = call i32 (...)* @_u16_sf32( double 0.000000e+00, double 6.553500e+04, double 5.000000e-01, i8* %152 ) nounwind		; <i32> [#uses=1]
+	%154 = sext i32 %153 to i64		; <i64> [#uses=0]
+	%155 = getelementptr i8* %142, i32 0		; <i8*> [#uses=1]
+	%156 = call i32 (...)* @_u16_sf32( double 0.000000e+00, double 6.553500e+04, double 5.000000e-01, i8* %155 ) nounwind		; <i32> [#uses=0]
+	unreachable
+
+bb2325:		; preds = %bb2326, %bb2295
+	%indvar.next5145 = add i32 %indvar5021, 1		; <i32> [#uses=1]
+	br label %bb2326
+
+bb2326:		; preds = %bb2325, %bb.nph4945
+	%indvar5021 = phi i32 [ 0, %bb.nph4945 ], [ %indvar.next5145, %bb2325 ]		; <i32> [#uses=6]
+	%157 = icmp slt i32 %indvar5021, %n		; <i1> [#uses=0]
+	%158 = getelementptr %struct.CGLSI* %src, i32 %indvar5021, i32 10		; <%struct.xx_t**> [#uses=1]
+	%159 = load %struct.xx_t** %158, align 4		; <%struct.xx_t*> [#uses=5]
+	%160 = getelementptr %struct.CGLSI* %src, i32 %indvar5021, i32 1		; <i32*> [#uses=1]
+	%161 = load i32* %160, align 4		; <i32> [#uses=1]
+	%162 = and i32 %161, 255		; <i32> [#uses=1]
+	switch i32 %162, label %bb2325 [
+		 i32 59, label %bb2217
+		 i32 60, label %bb2319
+	]
+
+return:		; preds = %entry
+	ret void
+}
+
+declare i32 @_u16_ff(...)
+
+declare i32 @_u16a_cm(...)
+
+declare i32 @_u16_sf32(...)





More information about the llvm-commits mailing list