[llvm] 4b57939 - [AArch64][GlobalISel] Fallback to generic lowering of G_CTPOP

Wed Aug 17 11:11:04 PDT 2022

Author: Vladislav Dzhidzhoev
Date: 2022-08-17T21:10:27+03:00
New Revision: 4b579395836eebd66d491ddbdffc97f761bb42a0

URL: https://github.com/llvm/llvm-project/commit/4b579395836eebd66d491ddbdffc97f761bb42a0
DIFF: https://github.com/llvm/llvm-project/commit/4b579395836eebd66d491ddbdffc97f761bb42a0.diff

LOG: [AArch64][GlobalISel] Fallback to generic lowering of G_CTPOP

Use generic lowering of G_CTPOP for s32 and s64 scalars when
noimplicitfloat is specified.

Differential Revision: https://reviews.llvm.org/D131454

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
    llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctpop-no-implicit-float.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index b0f7b14e0a633..d158519b10523 100644

--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -782,7 +782,6 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
   getActionDefinitionsBuilder({G_SBFX, G_UBFX})
       .customFor({{s32, s32}, {s64, s64}});
 
-  // TODO: Use generic lowering when custom lowering is not possible.
   auto always = [=](const LegalityQuery &Q) { return true; };
   getActionDefinitionsBuilder(G_CTPOP)
       .legalFor({{v8s8, v8s8}, {v16s8, v16s8}})
@@ -1218,9 +1217,6 @@ bool AArch64LegalizerInfo::legalizeCTPOP(MachineInstr &MI,
   //  uaddlp.4h v0, v0  // v4s16, v2s32
   //  uaddlp.2s v0, v0  //        v2s32
 
-  if (!ST->hasNEON() ||
-      MI.getMF()->getFunction().hasFnAttribute(Attribute::NoImplicitFloat))
-    return false;
   MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
   Register Dst = MI.getOperand(0).getReg();
   Register Val = MI.getOperand(1).getReg();
@@ -1230,6 +1226,14 @@ bool AArch64LegalizerInfo::legalizeCTPOP(MachineInstr &MI,
          "Expected src and dst to have the same type!");
   unsigned Size = Ty.getSizeInBits();
 
+  if (!ST->hasNEON() ||
+      MI.getMF()->getFunction().hasFnAttribute(Attribute::NoImplicitFloat)) {
+    // Use generic lowering when custom lowering is not possible.
+    return Ty.isScalar() && (Size == 32 || Size == 64) &&
+           Helper.lowerBitCount(MI) ==
+               LegalizerHelper::LegalizeResult::Legalized;
+  }
+
   // Pre-conditioning: widen Val up to the nearest vector type.
   // s32,s64,v4s16,v2s32 -> v8i8
   // v8s16,v4s32,v2s64 -> v16i8

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctpop-no-implicit-float.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctpop-no-implicit-float.mir
index 41e316e1453df..8b9d04fc574cb 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctpop-no-implicit-float.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctpop-no-implicit-float.mir
@@ -1,5 +1,5 @@
-# RUN: not --crash llc -mtriple=aarch64-unknown-unknown -verify-machineinstrs -run-pass=legalizer %s -o - 2>&1 | FileCheck %s
-# CHECK: LLVM ERROR: unable to legalize instruction: %ctpop:_(s32) = G_CTPOP %copy:_(s32) (in function: s32)
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64-unknown-unknown -verify-machineinstrs -run-pass=legalizer %s -o - 2>&1 | FileCheck %s
 --- |
   define void @s32() noimplicitfloat { unreachable }
   define void @s64() noimplicitfloat { unreachable }
@@ -10,7 +10,73 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     liveins: $w0
+    ; CHECK-LABEL: name: s32
+    ; CHECK: liveins: $w0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %copy:_(s32) = COPY $w0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR %copy, [[C]](s64)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1431655765
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
+    ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB %copy, [[AND]]
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[SUB]], [[C2]](s64)
+    ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 858993459
+    ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
+    ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C3]]
+    ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[AND1]], [[AND2]]
+    ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[ADD]], [[C4]](s64)
+    ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR2]], [[ADD]]
+    ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 252645135
+    ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C5]]
+    ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16843009
+    ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[C6]]
+    ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 24
+    ; CHECK-NEXT: %ctpop:_(s32) = G_LSHR [[MUL]], [[C7]](s64)
+    ; CHECK-NEXT: $w0 = COPY %ctpop(s32)
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
     %copy:_(s32) = COPY $w0
     %ctpop:_(s32) = G_CTPOP %copy(s32)
     $w0 = COPY %ctpop(s32)
     RET_ReallyLR implicit $w0
+
+...
+---
+name:            s64
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0
+    ; CHECK-LABEL: name: s64
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %copy:_(s64) = COPY $x0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR %copy, [[C]](s64)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 6148914691236517205
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[LSHR]], [[C1]]
+    ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB %copy, [[AND]]
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[SUB]], [[C2]](s64)
+    ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 3689348814741910323
+    ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[LSHR1]], [[C3]]
+    ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[SUB]], [[C3]]
+    ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[AND1]], [[AND2]]
+    ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[ADD]], [[C4]](s64)
+    ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s64) = G_ADD [[LSHR2]], [[ADD]]
+    ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 1085102592571150095
+    ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[ADD1]], [[C5]]
+    ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 72340172838076673
+    ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[AND3]], [[C6]]
+    ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 56
+    ; CHECK-NEXT: %ctpop:_(s64) = G_LSHR [[MUL]], [[C7]](s64)
+    ; CHECK-NEXT: $x0 = COPY %ctpop(s64)
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0
+    %copy:_(s64) = COPY $x0
+    %ctpop:_(s64) = G_CTPOP %copy(s64)
+    $x0 = COPY %ctpop(s64)
+    RET_ReallyLR implicit $x0
+
+...