[llvm] r320965 - AArch64: work around how Cyclone handles "movi.2d vD, #0".

Mon Dec 18 02:36:00 PST 2017

Author: tnorthover
Date: Mon Dec 18 02:36:00 2017
New Revision: 320965

URL: http://llvm.org/viewvc/llvm-project?rev=320965&view=rev
Log:
AArch64: work around how Cyclone handles "movi.2d vD, #0".

For Cylone, the instruction "movi.2d vD, #0" is executed incorrectly in some rare
circumstances. Work around the issue conservatively by avoiding the instruction entirely.

This patch changes CodeGen so that problematic instructions are never
generated, and the AsmParser so that an equivalent instruction is used (with a
warning).

Added:
    llvm/trunk/test/MC/AArch64/cyclone-movi-bug.s
Modified:
    llvm/trunk/lib/Target/AArch64/AArch64.td
    llvm/trunk/lib/Target/AArch64/AArch64AsmPrinter.cpp
    llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h
    llvm/trunk/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
    llvm/trunk/test/CodeGen/AArch64/arm64-fcmp-opt.ll
    llvm/trunk/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll
    llvm/trunk/test/CodeGen/AArch64/fp-cond-sel.ll

Modified: llvm/trunk/lib/Target/AArch64/AArch64.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64.td?rev=320965&r1=320964&r2=320965&view=diff
==============================================================================

--- llvm/trunk/lib/Target/AArch64/AArch64.td (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64.td Mon Dec 18 02:36:00 2017
@@ -61,6 +61,12 @@ def FeatureZCRegMove : SubtargetFeature<
 def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true",
                                         "Has zero-cycle zeroing instructions">;
 
+/// ... but the floating-point version doesn't quite work in rare cases on older
+/// CPUs.
+def FeatureZCZeroingFPWorkaround : SubtargetFeature<"zcz-fp-workaround",
+    "HasZeroCycleZeroingFPWorkaround", "true",
+    "The zero-cycle floating-point zeroing instruction has a bug">;
+
 def FeatureStrictAlign : SubtargetFeature<"strict-align",
                                           "StrictAlign", "true",
                                           "Disallow all unaligned memory "
@@ -290,7 +296,8 @@ def ProcCyclone : SubtargetFeature<"cycl
                                    FeaturePerfMon,
                                    FeatureSlowMisaligned128Store,
                                    FeatureZCRegMove,
-                                   FeatureZCZeroing
+                                   FeatureZCZeroing,
+                                   FeatureZCZeroingFPWorkaround
                                    ]>;
 
 def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1",

Modified: llvm/trunk/lib/Target/AArch64/AArch64AsmPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64AsmPrinter.cpp?rev=320965&r1=320964&r2=320965&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64AsmPrinter.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64AsmPrinter.cpp Mon Dec 18 02:36:00 2017
@@ -523,7 +523,7 @@ void AArch64AsmPrinter::LowerPATCHPOINT(
 
 void AArch64AsmPrinter::EmitFMov0(const MachineInstr &MI) {
   unsigned DestReg = MI.getOperand(0).getReg();
-  if (STI->hasZeroCycleZeroing()) {
+  if (STI->hasZeroCycleZeroing() && !STI->hasZeroCycleZeroingFPWorkaround()) {
     // Convert H/S/D register to corresponding Q register
     if (AArch64::H0 <= DestReg && DestReg <= AArch64::H31)
       DestReg = AArch64::Q0 + (DestReg - AArch64::H0);

Modified: llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h?rev=320965&r1=320964&r2=320965&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h Mon Dec 18 02:36:00 2017
@@ -86,6 +86,7 @@ protected:
 
   // HasZeroCycleZeroing - Has zero-cycle zeroing instructions.
   bool HasZeroCycleZeroing = false;
+  bool HasZeroCycleZeroingFPWorkaround = false;
 
   // StrictAlign - Disallow unaligned memory accesses.
   bool StrictAlign = false;
@@ -197,6 +198,10 @@ public:
 
   bool hasZeroCycleZeroing() const { return HasZeroCycleZeroing; }
 
+  bool hasZeroCycleZeroingFPWorkaround() const {
+    return HasZeroCycleZeroingFPWorkaround;
+  }
+
   bool requiresStrictAlign() const { return StrictAlign; }
 
   bool isXRaySupported() const override { return true; }

Modified: llvm/trunk/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp?rev=320965&r1=320964&r2=320965&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp Mon Dec 18 02:36:00 2017
@@ -3796,6 +3796,31 @@ bool AArch64AsmParser::MatchAndEmitInstr
       }
     }
   }
+
+  // The Cyclone CPU and early successors didn't execute the zero-cycle zeroing
+  // instruction for FP registers correctly in some rare circumstances. Convert
+  // it to a safe instruction and warn (because silently changing someone's
+  // assembly is rude).
+  if (getSTI().getFeatureBits()[AArch64::FeatureZCZeroingFPWorkaround] &&
+      NumOperands == 4 && Tok == "movi") {
+    AArch64Operand &Op1 = static_cast<AArch64Operand &>(*Operands[1]);
+    AArch64Operand &Op2 = static_cast<AArch64Operand &>(*Operands[2]);
+    AArch64Operand &Op3 = static_cast<AArch64Operand &>(*Operands[3]);
+    if ((Op1.isToken() && Op2.isNeonVectorReg() && Op3.isImm()) ||
+        (Op1.isNeonVectorReg() && Op2.isToken() && Op3.isImm())) {
+      StringRef Suffix = Op1.isToken() ? Op1.getToken() : Op2.getToken();
+      if (Suffix.lower() == ".2d" &&
+          cast<MCConstantExpr>(Op3.getImm())->getValue() == 0) {
+        Warning(IDLoc, "instruction movi.2d with immediate #0 may not function"
+                " correctly on this CPU, converting to equivalent movi.16b");
+        // Switch the suffix to .16b.
+        unsigned Idx = Op1.isToken() ? 1 : 2;
+        Operands[Idx] = AArch64Operand::CreateToken(".16b", false, IDLoc,
+                                                  getContext());
+      }
+    }
+  }
+
   // FIXME: Horrible hack for sxtw and uxtw with Wn src and Xd dst operands.
   //        InstAlias can't quite handle this since the reg classes aren't
   //        subclasses.

Modified: llvm/trunk/test/CodeGen/AArch64/arm64-fcmp-opt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-fcmp-opt.ll?rev=320965&r1=320964&r2=320965&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/arm64-fcmp-opt.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-fcmp-opt.ll Mon Dec 18 02:36:00 2017
@@ -41,7 +41,7 @@ entry:
 define float @fcmp_oeq(float %a, float %b) nounwind ssp {
 ; CHECK-LABEL: @fcmp_oeq
 ; CHECK: fcmp s0, s1
-; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0
+; CHECK-DAG: fmov s[[ZERO:[0-9]+]], wzr
 ; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0
 ; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], eq
 
@@ -53,7 +53,7 @@ define float @fcmp_oeq(float %a, float %
 define float @fcmp_ogt(float %a, float %b) nounwind ssp {
 ; CHECK-LABEL: @fcmp_ogt
 ; CHECK: fcmp s0, s1
-; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0
+; CHECK-DAG: fmov s[[ZERO:[0-9]+]], wzr
 ; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0
 ; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], gt
 
@@ -65,7 +65,7 @@ define float @fcmp_ogt(float %a, float %
 define float @fcmp_oge(float %a, float %b) nounwind ssp {
 ; CHECK-LABEL: @fcmp_oge
 ; CHECK: fcmp s0, s1
-; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0
+; CHECK-DAG: fmov s[[ZERO:[0-9]+]], wzr
 ; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0
 ; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], ge
 
@@ -77,7 +77,7 @@ define float @fcmp_oge(float %a, float %
 define float @fcmp_olt(float %a, float %b) nounwind ssp {
 ; CHECK-LABEL: @fcmp_olt
 ; CHECK: fcmp s0, s1
-; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0
+; CHECK-DAG: fmov s[[ZERO:[0-9]+]], wzr
 ; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0
 ; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], mi
 
@@ -89,7 +89,7 @@ define float @fcmp_olt(float %a, float %
 define float @fcmp_ole(float %a, float %b) nounwind ssp {
 ; CHECK-LABEL: @fcmp_ole
 ; CHECK: fcmp s0, s1
-; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0
+; CHECK-DAG: fmov s[[ZERO:[0-9]+]], wzr
 ; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0
 ; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], ls
 
@@ -101,7 +101,7 @@ define float @fcmp_ole(float %a, float %
 define float @fcmp_ord(float %a, float %b) nounwind ssp {
 ; CHECK-LABEL: @fcmp_ord
 ; CHECK: fcmp s0, s1
-; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0
+; CHECK-DAG: fmov s[[ZERO:[0-9]+]], wzr
 ; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0
 ; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], vc
   %cmp = fcmp ord float %a, %b
@@ -112,7 +112,7 @@ define float @fcmp_ord(float %a, float %
 define float @fcmp_uno(float %a, float %b) nounwind ssp {
 ; CHECK-LABEL: @fcmp_uno
 ; CHECK: fcmp s0, s1
-; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0
+; CHECK-DAG: fmov s[[ZERO:[0-9]+]], wzr
 ; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0
 ; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], vs
   %cmp = fcmp uno float %a, %b
@@ -123,7 +123,7 @@ define float @fcmp_uno(float %a, float %
 define float @fcmp_ugt(float %a, float %b) nounwind ssp {
 ; CHECK-LABEL: @fcmp_ugt
 ; CHECK: fcmp s0, s1
-; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0
+; CHECK-DAG: fmov s[[ZERO:[0-9]+]], wzr
 ; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0
 ; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], hi
   %cmp = fcmp ugt float %a, %b
@@ -134,7 +134,7 @@ define float @fcmp_ugt(float %a, float %
 define float @fcmp_uge(float %a, float %b) nounwind ssp {
 ; CHECK-LABEL: @fcmp_uge
 ; CHECK: fcmp s0, s1
-; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0
+; CHECK-DAG: fmov s[[ZERO:[0-9]+]], wzr
 ; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0
 ; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], pl
   %cmp = fcmp uge float %a, %b
@@ -145,7 +145,7 @@ define float @fcmp_uge(float %a, float %
 define float @fcmp_ult(float %a, float %b) nounwind ssp {
 ; CHECK-LABEL: @fcmp_ult
 ; CHECK: fcmp s0, s1
-; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0
+; CHECK-DAG: fmov s[[ZERO:[0-9]+]], wzr
 ; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0
 ; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], lt
   %cmp = fcmp ult float %a, %b
@@ -156,7 +156,7 @@ define float @fcmp_ult(float %a, float %
 define float @fcmp_ule(float %a, float %b) nounwind ssp {
 ; CHECK-LABEL: @fcmp_ule
 ; CHECK: fcmp s0, s1
-; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0
+; CHECK-DAG: fmov s[[ZERO:[0-9]+]], wzr
 ; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0
 ; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], le
   %cmp = fcmp ule float %a, %b
@@ -167,7 +167,7 @@ define float @fcmp_ule(float %a, float %
 define float @fcmp_une(float %a, float %b) nounwind ssp {
 ; CHECK-LABEL: @fcmp_une
 ; CHECK: fcmp s0, s1
-; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0
+; CHECK-DAG: fmov s[[ZERO:[0-9]+]], wzr
 ; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0
 ; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], ne
   %cmp = fcmp une float %a, %b
@@ -180,7 +180,7 @@ define float @fcmp_une(float %a, float %
 define float @fcmp_one(float %a, float %b) nounwind ssp {
 ; CHECK-LABEL: @fcmp_one
 ;	fcmp	s0, s1
-; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0
+; CHECK-DAG: fmov s[[ZERO:[0-9]+]], wzr
 ; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0
 ; CHECK: fcsel [[TMP:s[0-9]+]], s[[ONE]], s[[ZERO]], mi
 ; CHECK: fcsel s0, s[[ONE]], [[TMP]], gt
@@ -194,7 +194,7 @@ define float @fcmp_one(float %a, float %
 define float @fcmp_ueq(float %a, float %b) nounwind ssp {
 ; CHECK-LABEL: @fcmp_ueq
 ; CHECK: fcmp s0, s1
-; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0
+; CHECK-DAG: fmov s[[ZERO:[0-9]+]], wzr
 ; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0
 ; CHECK: fcsel [[TMP:s[0-9]+]], s[[ONE]], s[[ZERO]], eq
 ; CHECK: fcsel s0, s[[ONE]], [[TMP]], vs

Modified: llvm/trunk/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll?rev=320965&r1=320964&r2=320965&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll Mon Dec 18 02:36:00 2017
@@ -9,10 +9,10 @@ define void @t1() nounwind ssp {
 entry:
 ; ALL-LABEL: t1:
 ; ALL-NOT: fmov
-; CYCLONE: movi.2d v0, #0000000000000000
-; CYCLONE: movi.2d v1, #0000000000000000
-; CYCLONE: movi.2d v2, #0000000000000000
-; CYCLONE: movi.2d v3, #0000000000000000
+; CYCLONE: fmov d0, xzr
+; CYCLONE: fmov d1, xzr
+; CYCLONE: fmov d2, xzr
+; CYCLONE: fmov d3, xzr
 ; KRYO: movi v0.2d, #0000000000000000
 ; KRYO: movi v1.2d, #0000000000000000
 ; KRYO: movi v2.2d, #0000000000000000
@@ -48,8 +48,8 @@ entry:
 define void @t4() nounwind ssp {
 ; ALL-LABEL: t4:
 ; ALL-NOT: fmov
-; CYCLONE: movi.2d v0, #0000000000000000
-; CYCLONE: movi.2d v1, #0000000000000000
+; CYCLONE: fmov s0, wzr
+; CYCLONE: fmov s1, wzr
 ; KRYO: movi v0.2d, #0000000000000000
 ; KRYO: movi v1.2d, #0000000000000000
 ; FALKOR: movi v0.2d, #0000000000000000

Modified: llvm/trunk/test/CodeGen/AArch64/fp-cond-sel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/fp-cond-sel.ll?rev=320965&r1=320964&r2=320965&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/fp-cond-sel.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/fp-cond-sel.ll Mon Dec 18 02:36:00 2017
@@ -12,7 +12,7 @@ define void @test_csel(i32 %lhs32, i32 %
   %tst1 = icmp ugt i32 %lhs32, %rhs32
   %val1 = select i1 %tst1, float 0.0, float 1.0
   store float %val1, float* @varfloat
-; CHECK-DAG: movi v[[FLT0:[0-9]+]].2d, #0
+; CHECK-DAG: fmov s[[FLT0:[0-9]+]], wzr
 ; CHECK-DAG: fmov s[[FLT1:[0-9]+]], #1.0
 ; CHECK: fcsel {{s[0-9]+}}, s[[FLT0]], s[[FLT1]], hi
 

Added: llvm/trunk/test/MC/AArch64/cyclone-movi-bug.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AArch64/cyclone-movi-bug.s?rev=320965&view=auto
==============================================================================
--- llvm/trunk/test/MC/AArch64/cyclone-movi-bug.s (added)
+++ llvm/trunk/test/MC/AArch64/cyclone-movi-bug.s Mon Dec 18 02:36:00 2017
@@ -0,0 +1,9 @@
+; RUN: llvm-mc -triple aarch64-apple-ios -mcpu=cyclone %s 2> %t.log | FileCheck %s
+; RUN: FileCheck %s --check-prefix=CHECK-ERR < %t.log
+
+    ; CHECK: movi v3.16b, #0
+    ; CHECK: movi v7.16b, #0
+    ; CHECK-ERR: warning: instruction movi.2d with immediate #0 may not function correctly on this CPU, converting to equivalent movi.16b
+    ; CHECK-ERR: warning: instruction movi.2d with immediate #0 may not function correctly on this CPU, converting to equivalent movi.16b
+    movi.2d v3, #0
+    movi v7.2d, #0