[llvm] b189218 - [ARM] Fix Chain/Glue Bug in PerformVMOVhrCombine

Mon Mar 6 03:56:59 PST 2023

Author: Archibald Elliott
Date: 2023-03-06T11:55:54Z
New Revision: b189218d4422ba3f4676fa1191961df8e4732783

URL: https://github.com/llvm/llvm-project/commit/b189218d4422ba3f4676fa1191961df8e4732783
DIFF: https://github.com/llvm/llvm-project/commit/b189218d4422ba3f4676fa1191961df8e4732783.diff

LOG: [ARM] Fix Chain/Glue Bug in PerformVMOVhrCombine

In this optimisation, the Chain and Glue from the original CopyFromReg
was being lost by this optimisation, which resulted in miscompiles.

This fix just ensures that the input chains are correctly updated, and
that any any users are also updated with the new chain from the new
CopyFromReg.

Fixes #60510.

Differential Revision: https://reviews.llvm.org/D143713

Added: 
    

Modified: 
    llvm/lib/Target/ARM/ARMISelLowering.cpp
    llvm/test/CodeGen/ARM/fp16-return-pr60510.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 3d2223961433..126bbc61a7d3 100644

--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -15000,16 +15000,31 @@ static SDValue PerformVMOVhrCombine(SDNode *N,
   // FullFP16: half values are passed in S-registers, and we don't
   // need any of the bitcast and moves:
   //
-  //     t2: f32,ch = CopyFromReg t0, Register:f32 %0
+  //     t2: f32,ch1,gl1? = CopyFromReg ch, Register:f32 %0, gl?
   //   t5: i32 = bitcast t2
   // t18: f16 = ARMISD::VMOVhr t5
+  // =>
+  // tN: f16,ch2,gl2? = CopyFromReg ch, Register::f32 %0, gl?
   if (Op0->getOpcode() == ISD::BITCAST) {
     SDValue Copy = Op0->getOperand(0);
     if (Copy.getValueType() == MVT::f32 &&
         Copy->getOpcode() == ISD::CopyFromReg) {
-      SDValue Ops[] = {Copy->getOperand(0), Copy->getOperand(1)};
+      bool HasGlue = Copy->getNumOperands() == 3;
+      SDValue Ops[] = {Copy->getOperand(0), Copy->getOperand(1),
+                       HasGlue ? Copy->getOperand(2) : SDValue()};
+      EVT OutTys[] = {N->getValueType(0), MVT::Other, MVT::Glue};
       SDValue NewCopy =
-          DCI.DAG.getNode(ISD::CopyFromReg, SDLoc(N), N->getValueType(0), Ops);
+          DCI.DAG.getNode(ISD::CopyFromReg, SDLoc(N),
+                          DCI.DAG.getVTList(ArrayRef(OutTys, HasGlue ? 3 : 2)),
+                          ArrayRef(Ops, HasGlue ? 3 : 2));
+
+      // Update Users, Chains, and Potential Glue.
+      DCI.DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), NewCopy.getValue(0));
+      DCI.DAG.ReplaceAllUsesOfValueWith(Copy.getValue(1), NewCopy.getValue(1));
+      if (HasGlue)
+        DCI.DAG.ReplaceAllUsesOfValueWith(Copy.getValue(2),
+                                          NewCopy.getValue(2));
+
       return NewCopy;
     }
   }

diff  --git a/llvm/test/CodeGen/ARM/fp16-return-pr60510.ll b/llvm/test/CodeGen/ARM/fp16-return-pr60510.ll
index 14aa90c42760..a1c89dcdda17 100644
--- a/llvm/test/CodeGen/ARM/fp16-return-pr60510.ll
+++ b/llvm/test/CodeGen/ARM/fp16-return-pr60510.ll
@@ -59,8 +59,10 @@ define half @fp16_out_call_oneuse(float %arg) nounwind {
 ; FP16-HARD:  @ %bb.0:
 ; FP16-HARD:    vmov.f32 s16, s0
 ; FP16-HARD:    bl fp16_inner
+; FP16-HARD:    vmov.f32 s18, s0
 ; FP16-HARD:    vmov.f32 s0, s16
 ; FP16-HARD:    bl other
+; FP16-HARD:    vmov.f32 s0, s18
   %call = call half @fp16_inner()
   %call1 = call float @other(float %arg)
   ret half %call
@@ -105,13 +107,13 @@ define half @fp16_out_call_multiuse(float %arg) nounwind {
 ; FP16-HARD:  @ %bb.0:
 ; FP16-HARD:    vmov.f32 s16, s0
 ; FP16-HARD:    bl fp16_inner
+; FP16-HARD:    vmov.f32 s18, s0
 ; FP16-HARD:    vmov.f32 s0, s16
 ; FP16-HARD:    bl other
-; FP16-HARD:    vmov.f16 r0, s0
-; FP16-HARD:    vmov.f32 s16, s0
+; FP16-HARD:    vmov.f16 r0, s18
 ; FP16-HARD:    vmov s0, r0
 ; FP16-HARD:    bl fp16_sink
-; FP16-HARD:    vmov.f32 s0, s16
+; FP16-HARD:    vmov.f32 s0, s18
   %call = call half @fp16_inner()
   %call1 = call float @other(float %arg)
   call void @fp16_sink(half %call)