[llvm] 95c8750 - [AArch64][GlobalISel] Added pmull/pmull64 intrinsic support (#165740)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 5 08:53:49 PST 2025
Author: Joshua Rodriguez
Date: 2025-11-05T16:53:45Z
New Revision: 95c87505255032c1cfcd4091e1e114865f62be9a
URL: https://github.com/llvm/llvm-project/commit/95c87505255032c1cfcd4091e1e114865f62be9a
DIFF: https://github.com/llvm/llvm-project/commit/95c87505255032c1cfcd4091e1e114865f62be9a.diff
LOG: [AArch64][GlobalISel] Added pmull/pmull64 intrinsic support (#165740)
GISel no longer falls back onto SDAG when attempting to lower the pmull
and pmull64 intrinsics.
Added:
Modified:
llvm/lib/Target/AArch64/AArch64InstrGISel.td
llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
llvm/test/CodeGen/AArch64/aarch64-smull.ll
llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll
llvm/test/CodeGen/AArch64/arm64-vmul.ll
llvm/test/CodeGen/AArch64/highextractbitcast.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
index 30b7b03f7a69a..52b216c7fe0f0 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
@@ -197,6 +197,12 @@ def G_SMULL : AArch64GenericInstruction {
let hasSideEffects = 0;
}
+def G_PMULL : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type1:$src1, type1:$src2);
+ let hasSideEffects = 0;
+}
+
def G_UADDLP : AArch64GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1);
@@ -273,6 +279,7 @@ def : GINodeEquiv<G_FCMGT, AArch64fcmgt>;
def : GINodeEquiv<G_BSP, AArch64bsp>;
+def : GINodeEquiv<G_PMULL, AArch64pmull>;
def : GINodeEquiv<G_UMULL, AArch64umull>;
def : GINodeEquiv<G_SMULL, AArch64smull>;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 5f93847bc680e..038ad77ae69b2 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -1809,6 +1809,9 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
return LowerBinOp(TargetOpcode::G_FMAXNUM);
case Intrinsic::aarch64_neon_fminnm:
return LowerBinOp(TargetOpcode::G_FMINNUM);
+ case Intrinsic::aarch64_neon_pmull:
+ case Intrinsic::aarch64_neon_pmull64:
+ return LowerBinOp(AArch64::G_PMULL);
case Intrinsic::aarch64_neon_smull:
return LowerBinOp(AArch64::G_SMULL);
case Intrinsic::aarch64_neon_umull:
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index 6d2d70511e894..6b920f05227ad 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -560,6 +560,7 @@ bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI,
case TargetOpcode::G_FCMP:
case TargetOpcode::G_LROUND:
case TargetOpcode::G_LLROUND:
+ case AArch64::G_PMULL:
return true;
case TargetOpcode::G_INTRINSIC:
switch (cast<GIntrinsic>(MI).getIntrinsicID()) {
diff --git a/llvm/test/CodeGen/AArch64/aarch64-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-smull.ll
index 0cd885e599817..e85e808921c87 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-smull.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-smull.ll
@@ -1,10 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-NEON
; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+sve < %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SVE
-; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
-
-; CHECK-GI: warning: Instruction selection used fallback path for pmlsl2_v8i16_uzp1
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for pmlsl_pmlsl2_v8i16_uzp1
+; RUN: llc -mtriple=aarch64 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
define <8 x i16> @smull_v8i8_v8i16(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: smull_v8i8_v8i16:
@@ -1832,14 +1829,33 @@ entry:
}
define void @pmlsl2_v8i16_uzp1(<16 x i8> %0, <8 x i16> %1, ptr %2, ptr %3) {
-; CHECK-LABEL: pmlsl2_v8i16_uzp1:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr q2, [x1, #16]
-; CHECK-NEXT: uzp1 v2.16b, v0.16b, v2.16b
-; CHECK-NEXT: pmull2 v0.8h, v0.16b, v2.16b
-; CHECK-NEXT: sub v0.8h, v1.8h, v0.8h
-; CHECK-NEXT: str q0, [x0]
-; CHECK-NEXT: ret
+; CHECK-NEON-LABEL: pmlsl2_v8i16_uzp1:
+; CHECK-NEON: // %bb.0:
+; CHECK-NEON-NEXT: ldr q2, [x1, #16]
+; CHECK-NEON-NEXT: uzp1 v2.16b, v0.16b, v2.16b
+; CHECK-NEON-NEXT: pmull2 v0.8h, v0.16b, v2.16b
+; CHECK-NEON-NEXT: sub v0.8h, v1.8h, v0.8h
+; CHECK-NEON-NEXT: str q0, [x0]
+; CHECK-NEON-NEXT: ret
+;
+; CHECK-SVE-LABEL: pmlsl2_v8i16_uzp1:
+; CHECK-SVE: // %bb.0:
+; CHECK-SVE-NEXT: ldr q2, [x1, #16]
+; CHECK-SVE-NEXT: uzp1 v2.16b, v0.16b, v2.16b
+; CHECK-SVE-NEXT: pmull2 v0.8h, v0.16b, v2.16b
+; CHECK-SVE-NEXT: sub v0.8h, v1.8h, v0.8h
+; CHECK-SVE-NEXT: str q0, [x0]
+; CHECK-SVE-NEXT: ret
+;
+; CHECK-GI-LABEL: pmlsl2_v8i16_uzp1:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ldr q2, [x1, #16]
+; CHECK-GI-NEXT: mov d0, v0.d[1]
+; CHECK-GI-NEXT: xtn v2.8b, v2.8h
+; CHECK-GI-NEXT: pmull v0.8h, v0.8b, v2.8b
+; CHECK-GI-NEXT: sub v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT: str q0, [x0]
+; CHECK-GI-NEXT: ret
%5 = getelementptr inbounds i32, ptr %3, i64 4
%6 = load <8 x i16>, ptr %5, align 4
%7 = trunc <8 x i16> %6 to <8 x i8>
@@ -1991,16 +2007,40 @@ define void @umlsl2_v4i32_uzp1(<8 x i16> %0, <4 x i32> %1, ptr %2, ptr %3) {
}
define void @pmlsl_pmlsl2_v8i16_uzp1(<16 x i8> %0, <8 x i16> %1, ptr %2, ptr %3, i32 %4) {
-; CHECK-LABEL: pmlsl_pmlsl2_v8i16_uzp1:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ldp q2, q3, [x1]
-; CHECK-NEXT: uzp1 v2.16b, v2.16b, v3.16b
-; CHECK-NEXT: pmull v3.8h, v0.8b, v2.8b
-; CHECK-NEXT: pmull2 v0.8h, v0.16b, v2.16b
-; CHECK-NEXT: add v0.8h, v3.8h, v0.8h
-; CHECK-NEXT: sub v0.8h, v1.8h, v0.8h
-; CHECK-NEXT: str q0, [x0]
-; CHECK-NEXT: ret
+; CHECK-NEON-LABEL: pmlsl_pmlsl2_v8i16_uzp1:
+; CHECK-NEON: // %bb.0: // %entry
+; CHECK-NEON-NEXT: ldp q2, q3, [x1]
+; CHECK-NEON-NEXT: uzp1 v2.16b, v2.16b, v3.16b
+; CHECK-NEON-NEXT: pmull v3.8h, v0.8b, v2.8b
+; CHECK-NEON-NEXT: pmull2 v0.8h, v0.16b, v2.16b
+; CHECK-NEON-NEXT: add v0.8h, v3.8h, v0.8h
+; CHECK-NEON-NEXT: sub v0.8h, v1.8h, v0.8h
+; CHECK-NEON-NEXT: str q0, [x0]
+; CHECK-NEON-NEXT: ret
+;
+; CHECK-SVE-LABEL: pmlsl_pmlsl2_v8i16_uzp1:
+; CHECK-SVE: // %bb.0: // %entry
+; CHECK-SVE-NEXT: ldp q2, q3, [x1]
+; CHECK-SVE-NEXT: uzp1 v2.16b, v2.16b, v3.16b
+; CHECK-SVE-NEXT: pmull v3.8h, v0.8b, v2.8b
+; CHECK-SVE-NEXT: pmull2 v0.8h, v0.16b, v2.16b
+; CHECK-SVE-NEXT: add v0.8h, v3.8h, v0.8h
+; CHECK-SVE-NEXT: sub v0.8h, v1.8h, v0.8h
+; CHECK-SVE-NEXT: str q0, [x0]
+; CHECK-SVE-NEXT: ret
+;
+; CHECK-GI-LABEL: pmlsl_pmlsl2_v8i16_uzp1:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: ldp q2, q3, [x1]
+; CHECK-GI-NEXT: mov d4, v0.d[1]
+; CHECK-GI-NEXT: xtn v2.8b, v2.8h
+; CHECK-GI-NEXT: xtn v3.8b, v3.8h
+; CHECK-GI-NEXT: pmull v0.8h, v0.8b, v2.8b
+; CHECK-GI-NEXT: pmull v2.8h, v4.8b, v3.8b
+; CHECK-GI-NEXT: add v0.8h, v0.8h, v2.8h
+; CHECK-GI-NEXT: sub v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT: str q0, [x0]
+; CHECK-GI-NEXT: ret
entry:
%5 = load <8 x i16>, ptr %3, align 4
%6 = trunc <8 x i16> %5 to <8 x i8>
diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-3v
diff .ll b/llvm/test/CodeGen/AArch64/arm64-neon-3v
diff .ll
index 2a8b3ce2ae10b..8cb319b2c3368 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-3v
diff .ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-3v
diff .ll
@@ -1,11 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon,+aes | FileCheck %s --check-prefixes=CHECK,CHECK-SD
-; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon,+aes -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
-
-; CHECK-GI: warning: Instruction selection used fallback path for test_vmull_p8
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vmull_high_p8
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vmull_p64
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vmull_high_p64
+; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon,+aes -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI
declare <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8>, <8 x i8>)
declare <16 x i8> @llvm.aarch64.neon.pmull64(i64, i64) #5
@@ -2721,14 +2716,24 @@ entry:
}
define i128 @test_vmull_p64(i64 %a, i64 %b) #4 {
-; CHECK-LABEL: test_vmull_p64:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fmov d0, x1
-; CHECK-NEXT: fmov d1, x0
-; CHECK-NEXT: pmull v0.1q, v1.1d, v0.1d
-; CHECK-NEXT: mov x1, v0.d[1]
-; CHECK-NEXT: fmov x0, d0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmull_p64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fmov d0, x1
+; CHECK-SD-NEXT: fmov d1, x0
+; CHECK-SD-NEXT: pmull v0.1q, v1.1d, v0.1d
+; CHECK-SD-NEXT: mov x1, v0.d[1]
+; CHECK-SD-NEXT: fmov x0, d0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmull_p64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fmov d0, x0
+; CHECK-GI-NEXT: fmov d1, x1
+; CHECK-GI-NEXT: pmull v0.1q, v0.1d, v1.1d
+; CHECK-GI-NEXT: mov d1, v0.d[1]
+; CHECK-GI-NEXT: fmov x0, d0
+; CHECK-GI-NEXT: fmov x1, d1
+; CHECK-GI-NEXT: ret
entry:
%vmull2.i = tail call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %a, i64 %b)
%vmull3.i = bitcast <16 x i8> %vmull2.i to i128
@@ -2736,12 +2741,22 @@ entry:
}
define i128 @test_vmull_high_p64(<2 x i64> %a, <2 x i64> %b) #4 {
-; CHECK-LABEL: test_vmull_high_p64:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: pmull2 v0.1q, v0.2d, v1.2d
-; CHECK-NEXT: mov x1, v0.d[1]
-; CHECK-NEXT: fmov x0, d0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmull_high_p64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: pmull2 v0.1q, v0.2d, v1.2d
+; CHECK-SD-NEXT: mov x1, v0.d[1]
+; CHECK-SD-NEXT: fmov x0, d0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmull_high_p64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d0, v0.d[1]
+; CHECK-GI-NEXT: mov d1, v1.d[1]
+; CHECK-GI-NEXT: pmull v0.1q, v0.1d, v1.1d
+; CHECK-GI-NEXT: mov d1, v0.d[1]
+; CHECK-GI-NEXT: fmov x0, d0
+; CHECK-GI-NEXT: fmov x1, d1
+; CHECK-GI-NEXT: ret
entry:
%0 = extractelement <2 x i64> %a, i32 1
%1 = extractelement <2 x i64> %b, i32 1
diff --git a/llvm/test/CodeGen/AArch64/arm64-vmul.ll b/llvm/test/CodeGen/AArch64/arm64-vmul.ll
index e6df9f2fb2c56..90abc7d389c13 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vmul.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vmul.ll
@@ -2,44 +2,35 @@
; RUN: llc -mtriple=aarch64-none-elf -mattr=+aes < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD
; RUN: llc -mtriple=aarch64-none-elf -mattr=+aes -global-isel -global-isel-abort=2 2>&1 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI
-; CHECK-GI: warning: Instruction selection used fallback path for pmull8h
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for commutable_pmull8h
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmulh_1s
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_2s
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_4s
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_2d
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_commuted_neg_2s
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_commuted_neg_4s
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_commuted_neg_2d
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_2s
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_4s
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_2d
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_2s_strict
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_4s_strict
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_2d_strict
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmla_indexed_scalar_2s_strict
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmla_indexed_scalar_4s_strict
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmla_indexed_scalar_2d_strict
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmulh_lane_1s
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlal_lane_1d
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlsl_lane_1d
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for pmull_from_extract_dup_low
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for pmull_from_extract_dup_high
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for pmull_from_extract_duplane_low
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for pmull_from_extract_duplane_high
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for scalar_fmls_from_extract_v4f32
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for scalar_fmls_from_extract_v2f32
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for scalar_fmls_from_extract_v2f64
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v2f32
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v2f32_1
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v4f32
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v4f32_1
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v2f64
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlal_d
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlsl_d
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_pmull_64
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_pmull_high_64
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_commutable_pmull_64
+; CHECK-GI: warning: Instruction selection used fallback path for sqdmulh_1s
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_2s
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_4s
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_2d
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_commuted_neg_2s
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_commuted_neg_4s
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_commuted_neg_2d
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_2s
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_4s
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_2d
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_2s_strict
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_4s_strict
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_2d_strict
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmla_indexed_scalar_2s_strict
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmla_indexed_scalar_4s_strict
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmla_indexed_scalar_2d_strict
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmulh_lane_1s
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlal_lane_1d
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlsl_lane_1d
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for scalar_fmls_from_extract_v4f32
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for scalar_fmls_from_extract_v2f32
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for scalar_fmls_from_extract_v2f64
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v2f32
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v2f32_1
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v4f32
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v4f32_1
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v2f64
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlal_d
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlsl_d
define <8 x i16> @smull8h(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: smull8h:
@@ -2895,11 +2886,18 @@ define <8 x i16> @pmull_from_extract_dup_low(<16 x i8> %lhs, i8 %rhs) {
}
define <8 x i16> @pmull_from_extract_dup_high(<16 x i8> %lhs, i8 %rhs) {
-; CHECK-LABEL: pmull_from_extract_dup_high:
-; CHECK: // %bb.0:
-; CHECK-NEXT: dup v1.16b, w0
-; CHECK-NEXT: pmull2 v0.8h, v0.16b, v1.16b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: pmull_from_extract_dup_high:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: dup v1.16b, w0
+; CHECK-SD-NEXT: pmull2 v0.8h, v0.16b, v1.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: pmull_from_extract_dup_high:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: dup v1.8b, w0
+; CHECK-GI-NEXT: mov d0, v0.d[1]
+; CHECK-GI-NEXT: pmull v0.8h, v0.8b, v1.8b
+; CHECK-GI-NEXT: ret
%rhsvec.0 = insertelement <8 x i8> undef, i8 %rhs, i32 0
%rhsvec = shufflevector <8 x i8> %rhsvec.0, <8 x i8> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
@@ -2924,12 +2922,20 @@ define <8 x i16> @pmull_from_extract_duplane_low(<16 x i8> %lhs, <8 x i8> %rhs)
}
define <8 x i16> @pmull_from_extract_duplane_high(<16 x i8> %lhs, <8 x i8> %rhs) {
-; CHECK-LABEL: pmull_from_extract_duplane_high:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT: dup v1.16b, v1.b[0]
-; CHECK-NEXT: pmull2 v0.8h, v0.16b, v1.16b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: pmull_from_extract_duplane_high:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT: dup v1.16b, v1.b[0]
+; CHECK-SD-NEXT: pmull2 v0.8h, v0.16b, v1.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: pmull_from_extract_duplane_high:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT: mov d0, v0.d[1]
+; CHECK-GI-NEXT: dup v1.8b, v1.b[0]
+; CHECK-GI-NEXT: pmull v0.8h, v0.8b, v1.8b
+; CHECK-GI-NEXT: ret
%lhs.high = shufflevector <16 x i8> %lhs, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%rhs.high = shufflevector <8 x i8> %rhs, <8 x i8> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
@@ -3245,21 +3251,35 @@ define i64 @sqdmlsl_d(i32 %A, i32 %B, i64 %C) nounwind {
}
define <16 x i8> @test_pmull_64(i64 %l, i64 %r) nounwind {
-; CHECK-LABEL: test_pmull_64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fmov d0, x1
-; CHECK-NEXT: fmov d1, x0
-; CHECK-NEXT: pmull v0.1q, v1.1d, v0.1d
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_pmull_64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fmov d0, x1
+; CHECK-SD-NEXT: fmov d1, x0
+; CHECK-SD-NEXT: pmull v0.1q, v1.1d, v0.1d
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_pmull_64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: fmov d0, x0
+; CHECK-GI-NEXT: fmov d1, x1
+; CHECK-GI-NEXT: pmull v0.1q, v0.1d, v1.1d
+; CHECK-GI-NEXT: ret
%val = call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %l, i64 %r)
ret <16 x i8> %val
}
define <16 x i8> @test_pmull_high_64(<2 x i64> %l, <2 x i64> %r) nounwind {
-; CHECK-LABEL: test_pmull_high_64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: pmull2 v0.1q, v0.2d, v1.2d
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_pmull_high_64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: pmull2 v0.1q, v0.2d, v1.2d
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_pmull_high_64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov d0, v0.d[1]
+; CHECK-GI-NEXT: mov d1, v1.d[1]
+; CHECK-GI-NEXT: pmull v0.1q, v0.1d, v1.1d
+; CHECK-GI-NEXT: ret
%l_hi = extractelement <2 x i64> %l, i32 1
%r_hi = extractelement <2 x i64> %r, i32 1
%val = call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %l_hi, i64 %r_hi)
@@ -3267,13 +3287,22 @@ define <16 x i8> @test_pmull_high_64(<2 x i64> %l, <2 x i64> %r) nounwind {
}
define <16 x i8> @test_commutable_pmull_64(i64 %l, i64 %r) nounwind {
-; CHECK-LABEL: test_commutable_pmull_64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fmov d0, x1
-; CHECK-NEXT: fmov d1, x0
-; CHECK-NEXT: pmull v0.1q, v1.1d, v0.1d
-; CHECK-NEXT: add v0.16b, v0.16b, v0.16b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_commutable_pmull_64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fmov d0, x1
+; CHECK-SD-NEXT: fmov d1, x0
+; CHECK-SD-NEXT: pmull v0.1q, v1.1d, v0.1d
+; CHECK-SD-NEXT: add v0.16b, v0.16b, v0.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_commutable_pmull_64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: fmov d0, x0
+; CHECK-GI-NEXT: fmov d1, x1
+; CHECK-GI-NEXT: pmull v2.1q, v0.1d, v1.1d
+; CHECK-GI-NEXT: pmull v0.1q, v1.1d, v0.1d
+; CHECK-GI-NEXT: add v0.16b, v2.16b, v0.16b
+; CHECK-GI-NEXT: ret
%1 = call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %l, i64 %r)
%2 = call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %r, i64 %l)
%3 = add <16 x i8> %1, %2
diff --git a/llvm/test/CodeGen/AArch64/highextractbitcast.ll b/llvm/test/CodeGen/AArch64/highextractbitcast.ll
index df4889b6f09de..bd6c168ce8776 100644
--- a/llvm/test/CodeGen/AArch64/highextractbitcast.ll
+++ b/llvm/test/CodeGen/AArch64/highextractbitcast.ll
@@ -1,10 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s --check-prefixes CHECK,CHECK-LE
; RUN: llc -mtriple=aarch64_be-unknown-linux-gnu < %s | FileCheck %s --check-prefix CHECK-BE
-; RUN: llc -mtriple=aarch64-unknown-linux-gnu -global-isel -global-isel -global-isel-abort=2 2>&1 < %s | FileCheck %s --check-prefixes CHECK,CHECK-GI
-
-; CHECK-GI: warning: Instruction selection used fallback path for test_pmull_high_p8_128
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_pmull_high_p8_64
+; RUN: llc -mtriple=aarch64-unknown-linux-gnu -global-isel < %s | FileCheck %s --check-prefixes CHECK,CHECK-GI
declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>)
declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>)
@@ -521,12 +518,12 @@ entry:
}
define <8 x i16> @test_pmull_high_p8_128(i128 %aa, i128 %bb) {
-; CHECK-LABEL: test_pmull_high_p8_128:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fmov d0, x3
-; CHECK-NEXT: fmov d1, x1
-; CHECK-NEXT: pmull v0.8h, v1.8b, v0.8b
-; CHECK-NEXT: ret
+; CHECK-LE-LABEL: test_pmull_high_p8_128:
+; CHECK-LE: // %bb.0: // %entry
+; CHECK-LE-NEXT: fmov d0, x3
+; CHECK-LE-NEXT: fmov d1, x1
+; CHECK-LE-NEXT: pmull v0.8h, v1.8b, v0.8b
+; CHECK-LE-NEXT: ret
;
; CHECK-BE-LABEL: test_pmull_high_p8_128:
; CHECK-BE: // %bb.0: // %entry
@@ -538,6 +535,15 @@ define <8 x i16> @test_pmull_high_p8_128(i128 %aa, i128 %bb) {
; CHECK-BE-NEXT: rev64 v0.8h, v0.8h
; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-BE-NEXT: ret
+;
+; CHECK-GI-LABEL: test_pmull_high_p8_128:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov v0.d[0], x0
+; CHECK-GI-NEXT: mov v1.d[0], x2
+; CHECK-GI-NEXT: mov v0.d[1], x1
+; CHECK-GI-NEXT: mov v1.d[1], x3
+; CHECK-GI-NEXT: pmull2 v0.8h, v0.16b, v1.16b
+; CHECK-GI-NEXT: ret
entry:
%a = bitcast i128 %aa to <16 x i8>
%b = bitcast i128 %bb to <16 x i8>
More information about the llvm-commits
mailing list