[llvm] [AArch64][GlobalISel] Look through COPY and G_BITCAST while selecting fcvtl2 (fpext) (PR #65463)

Wed Sep 6 04:03:42 PDT 2023

https://github.com/dzhidzhoev created https://github.com/llvm/llvm-project/pull/65463:

It tackles some regressions introduced in
https://reviews.llvm.org/D144670.


>From a308c2750c0ec356da7f5a8355d5cd1059d9e588 Mon Sep 17 00:00:00 2001
From: Vladislav Dzhidzhoev <vdzhidzhoev at accesssoftek.com>
Date: Fri, 1 Sep 2023 18:38:26 +0200
Subject: [PATCH] [AArch64][GlobalISel] Look through COPY and G_BITCAST while
 selecting fcvtl2 (fpext)

It tackles some regressions introduced in
https://reviews.llvm.org/D144670.
---
 llvm/lib/Target/AArch64/AArch64InstrGISel.td  |  3 ---
 llvm/lib/Target/AArch64/AArch64InstrInfo.td   |  6 +++++-
 .../GISel/AArch64InstructionSelector.cpp      | 19 +++++++++++++++----
 llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll     | 12 ++++--------
 4 files changed, 24 insertions(+), 16 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
index f9f860607b5877c..d4c6a320486d398 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
@@ -303,9 +303,6 @@ def : Pat<(int_aarch64_stlxp GPR64:$lo, GPR64:$hi, GPR64:$addr),
 def : Pat<(int_aarch64_stxp GPR64:$lo, GPR64:$hi, GPR64:$addr),
           (STXPX GPR64:$lo, GPR64:$hi, GPR64:$addr)>;
 
-let GIIgnoreCopies = 1 in
-class PatIgnoreCopies<dag pattern, dag result> : Pat<pattern, result>, GISelFlags;
-
 multiclass SIMDAcrossLanesSignedIntrinsicBHS<string baseOpc, Intrinsic intOp> {
   def : PatIgnoreCopies<(i32 (sext (i8 (intOp (v8i8 V64:$Rn))))),
         (i32 (SMOVvi8to32
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 4a1f46f2576aeca..f2d2343812286a6 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -4739,6 +4739,10 @@ defm FCMLT  : SIMDFPCmpTwoVector<0, 1, 0b01110, "fcmlt", AArch64fcmltz>;
 defm FCVTAS : SIMDTwoVectorFPToInt<0,0,0b11100, "fcvtas",int_aarch64_neon_fcvtas>;
 defm FCVTAU : SIMDTwoVectorFPToInt<1,0,0b11100, "fcvtau",int_aarch64_neon_fcvtau>;
 defm FCVTL  : SIMDFPWidenTwoVector<0, 0, 0b10111, "fcvtl">;
+
+let GIIgnoreCopies = 1 in
+class PatIgnoreCopies<dag pattern, dag result> : Pat<pattern, result>, GISelFlags;
+
 def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (v4i16 V64:$Rn))),
           (FCVTLv4i16 V64:$Rn)>;
 def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (extract_subvector (v8i16 V128:$Rn),
@@ -4746,7 +4750,7 @@ def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (extract_subvector (v8i16 V128:$Rn)
           (FCVTLv8i16 V128:$Rn)>;
 def : Pat<(v2f64 (any_fpextend (v2f32 V64:$Rn))),
           (FCVTLv2i32 V64:$Rn)>;
-def : Pat<(v2f64 (any_fpextend (v2f32 (extract_high_v4f32 (v4f32 V128:$Rn))))),
+def : PatIgnoreCopies<(v2f64 (any_fpextend (v2f32 (extract_high_v4f32 (v4f32 V128:$Rn))))),
           (FCVTLv4i32 V128:$Rn)>;
 def : Pat<(v4f32 (any_fpextend (v4f16 V64:$Rn))),
           (FCVTLv4i16 V64:$Rn)>;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 57e6bb92057dc7d..5d75f30d7a00a39 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -6777,10 +6777,21 @@ AArch64InstructionSelector::selectExtractHigh(MachineOperand &Root) const {
   MachineRegisterInfo &MRI =
       Root.getParent()->getParent()->getParent()->getRegInfo();
 
-  MachineInstr *Extract = getDefIgnoringCopies(Root.getReg(), MRI);
-  if (Extract && Extract->getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
-      Root.getReg() == Extract->getOperand(1).getReg()) {
-    Register ExtReg = Extract->getOperand(2).getReg();
+  auto Extract = getDefSrcRegIgnoringCopies(Root.getReg(), MRI);
+  while (Extract && Extract->MI->getOpcode() == TargetOpcode::G_BITCAST)
+    Extract =
+        getDefSrcRegIgnoringCopies(Extract->MI->getOperand(1).getReg(), MRI);
+  if (!Extract)
+    return std::nullopt;
+
+  if (Extract->MI->getOpcode() == TargetOpcode::G_UNMERGE_VALUES) {
+    if (Extract->Reg == Extract->MI->getOperand(1).getReg()) {
+      Register ExtReg = Extract->MI->getOperand(2).getReg();
+      return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }}};
+    }
+  }
+  if (Extract->MI->getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT) {
+    Register ExtReg = Extract->MI->getOperand(1).getReg();
     return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }}};
   }
 
diff --git a/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll b/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll
index e94aac3b59c69a0..269ffed98a844eb 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll
@@ -95,8 +95,7 @@ define <2 x double> @test_vcvt_high_v4i16_f32_bitcast(<8 x i16> %x) nounwind rea
 ;
 ; GISEL-LABEL: test_vcvt_high_v4i16_f32_bitcast:
 ; GISEL:       // %bb.0:
-; GISEL-NEXT:    mov d0, v0[1]
-; GISEL-NEXT:    fcvtl v0.2d, v0.2s
+; GISEL-NEXT:    fcvtl2 v0.2d, v0.4s
 ; GISEL-NEXT:    ret
   %ext = shufflevector <8 x i16> %x, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %bc2 = bitcast <4 x i16> %ext to <2 x float>
@@ -112,8 +111,7 @@ define <2 x double> @test_vcvt_high_v8i8_f32_bitcast(<16 x i8> %x) nounwind read
 ;
 ; GISEL-LABEL: test_vcvt_high_v8i8_f32_bitcast:
 ; GISEL:       // %bb.0:
-; GISEL-NEXT:    mov d0, v0[1]
-; GISEL-NEXT:    fcvtl v0.2d, v0.2s
+; GISEL-NEXT:    fcvtl2 v0.2d, v0.4s
 ; GISEL-NEXT:    ret
   %ext = shufflevector <16 x i8> %x, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   %bc2 = bitcast <8 x i8> %ext to <2 x float>
@@ -145,8 +143,7 @@ define <4 x float> @test_vcvt_high_v2i32_f16_bitcast(<4 x i32> %x) nounwind read
 ;
 ; GISEL-LABEL: test_vcvt_high_v2i32_f16_bitcast:
 ; GISEL:       // %bb.0:
-; GISEL-NEXT:    mov d0, v0[1]
-; GISEL-NEXT:    fcvtl v0.4s, v0.4h
+; GISEL-NEXT:    fcvtl2 v0.4s, v0.8h
 ; GISEL-NEXT:    ret
   %ext = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %bc2 = bitcast <2 x i32> %ext to <4 x half>
@@ -178,8 +175,7 @@ define <4 x float> @test_vcvt_high_v8i8_f16_bitcast(<16 x i8> %x) nounwind readn
 ;
 ; GISEL-LABEL: test_vcvt_high_v8i8_f16_bitcast:
 ; GISEL:       // %bb.0:
-; GISEL-NEXT:    mov d0, v0[1]
-; GISEL-NEXT:    fcvtl v0.4s, v0.4h
+; GISEL-NEXT:    fcvtl2 v0.4s, v0.8h
 ; GISEL-NEXT:    ret
   %ext = shufflevector <16 x i8> %x, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   %bc2 = bitcast <8 x i8> %ext to <4 x half>