[PATCH] D99710: [AArch64] Use 64-bit movi for zeroing halfs/floats

Sjoerd Meijer via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Thu Apr 1 02:30:48 PDT 2021


SjoerdMeijer created this revision.
SjoerdMeijer added reviewers: dmgreen, paquette, t.p.northover, fhahn.
Herald added subscribers: danielkiss, hiraditya, kristof.beyls.
SjoerdMeijer requested review of this revision.
Herald added a project: LLVM.

This was using the .2d variant, but using the .2s variant that zeros 64 bits is faster on some cores.

This is a prep step for D99586 <https://reviews.llvm.org/D99586> to always using movi for zeroing floats.


https://reviews.llvm.org/D99710

Files:
  llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
  llvm/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll
  llvm/test/CodeGen/AArch64/f16-imm.ll


Index: llvm/test/CodeGen/AArch64/f16-imm.ll
===================================================================
--- llvm/test/CodeGen/AArch64/f16-imm.ll
+++ llvm/test/CodeGen/AArch64/f16-imm.ll
@@ -11,7 +11,7 @@
 ;
 ; CHECK-ZCZ-LABEL: Const0:
 ; CHECK-ZCZ:       // %bb.0: // %entry
-; CHECK-ZCZ-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-ZCZ-NEXT:    movi v0.4h, #0
 ; CHECK-ZCZ-NEXT:    ret
 ;
 ; CHECK-NOFP16-LABEL: Const0:
Index: llvm/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll
===================================================================
--- llvm/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll
+++ llvm/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll
@@ -28,13 +28,13 @@
 ; NONE16: fmov d2, xzr
 ; NONE16: movi{{(.16b)?}} v3{{(.2d)?}}, #0
 ; ZEROFP-DAG: ldr h0,{{.*}}
-; ZEROFP-DAG: movi v{{[0-3]+}}.2d, #0
-; ZEROFP-DAG: movi v{{[0-3]+}}.2d, #0
-; ZEROFP-DAG: movi v{{[0-3]+}}.2d, #0
-; ZERO16: movi v{{[0-3]+}}.2d, #0
-; ZERO16: movi v{{[0-3]+}}.2d, #0
-; ZERO16: movi v{{[0-3]+}}.2d, #0
-; ZERO16: movi v{{[0-3]+}}.2d, #0
+; ZEROFP-DAG: movi v1.2s, #0
+; ZEROFP-DAG: movi v2.2d, #0
+; ZEROFP-DAG: movi v3.2d, #0
+; ZERO16: movi v0.4h, #0
+; ZERO16: movi v1.2s, #0
+; ZERO16: movi v2.2d, #0
+; ZERO16: movi v3.2d, #0
   tail call void @bar(half 0.000000e+00, float 0.000000e+00, double 0.000000e+00, <2 x double> <double 0.000000e+00, double 0.000000e+00>) nounwind
   ret void
 }
@@ -65,8 +65,8 @@
 ; ALL-LABEL: t4:
 ; NONEFP: fmov s{{[0-3]+}}, wzr
 ; NONEFP: fmov s{{[0-3]+}}, wzr
-; ZEROFP: movi v{{[0-3]+}}.2d, #0
-; ZEROFP: movi v{{[0-3]+}}.2d, #0
+; ZEROFP: movi v0.2s, #0
+; ZEROFP: movi v1.2s, #0
   tail call void @barf(float 0.000000e+00, float 0.000000e+00) nounwind
   ret void
 }
@@ -147,7 +147,7 @@
 entry:
 ; ALL-LABEL: tf32:
 ; NONEFP: mov s0, wzr
-; ZEROFP: movi v0.2d, #0
+; ZEROFP: movi v0.2s, #0
   ret float 0.0
 }
 
Index: llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -1092,17 +1092,26 @@
   Register DestReg = MI.getOperand(0).getReg();
   if (STI->hasZeroCycleZeroingFP() && !STI->hasZeroCycleZeroingFPWorkaround()) {
     // Convert H/S/D register to corresponding Q register
-    if (AArch64::H0 <= DestReg && DestReg <= AArch64::H31)
+    MCInst MOVI;
+    if (AArch64::H0 <= DestReg && DestReg <= AArch64::H31) {
+      // Create 64-bit mov using MOVIv4i16.
+      MOVI.setOpcode(AArch64::MOVIv4i16);
       DestReg = AArch64::Q0 + (DestReg - AArch64::H0);
-    else if (AArch64::S0 <= DestReg && DestReg <= AArch64::S31)
+      MOVI.addOperand(MCOperand::createReg(DestReg));
+      MOVI.addOperand(MCOperand::createImm(0));
+    } else if (AArch64::S0 <= DestReg && DestReg <= AArch64::S31) {
+      // Create 64-bit mov using MOVIv2i32.
+      MOVI.setOpcode(AArch64::MOVIv2i32);
       DestReg = AArch64::Q0 + (DestReg - AArch64::S0);
-    else {
+      MOVI.addOperand(MCOperand::createReg(DestReg));
+      MOVI.addOperand(MCOperand::createImm(0));
+    } else {
       assert(AArch64::D0 <= DestReg && DestReg <= AArch64::D31);
+      // Create 128-bit mov using MOVIv2d_ns.
+      MOVI.setOpcode(AArch64::MOVIv2d_ns);
       DestReg = AArch64::Q0 + (DestReg - AArch64::D0);
+      MOVI.addOperand(MCOperand::createReg(DestReg));
     }
-    MCInst MOVI;
-    MOVI.setOpcode(AArch64::MOVIv2d_ns);
-    MOVI.addOperand(MCOperand::createReg(DestReg));
     MOVI.addOperand(MCOperand::createImm(0));
     EmitToStreamer(*OutStreamer, MOVI);
   } else {


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D99710.334626.patch
Type: text/x-patch
Size: 3585 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210401/6a003d66/attachment.bin>


More information about the llvm-commits mailing list