[llvm] [AArch64] Add SUBHN patterns for xor variant (PR #126100)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 6 09:48:21 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: David Green (davemgreen)
<details>
<summary>Changes</summary>
`xor x, -1` can be treated as `sub -1, x`, add patterns for generating subhn as opposed to a not.
Fixes #<!-- -->123999
---
Full diff: https://github.com/llvm/llvm-project/pull/126100.diff
2 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64InstrInfo.td (+37-54)
- (modified) llvm/test/CodeGen/AArch64/arm64-vadd.ll (+9-9)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index ce0c260b78410f..658ac7490eb338 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -6630,60 +6630,43 @@ defm : Neon_addl_extract_patterns<sub, sext, "SSUB">;
// CodeGen patterns for addhn and subhn instructions, which can actually be
// written in LLVM IR without too much difficulty.
-// Prioritize ADDHN and SUBHN over UZP2.
-let AddedComplexity = 10 in {
-
-// ADDHN
-def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm), (i32 8))))),
- (ADDHNv8i16_v8i8 V128:$Rn, V128:$Rm)>;
-def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm),
- (i32 16))))),
- (ADDHNv4i32_v4i16 V128:$Rn, V128:$Rm)>;
-def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm),
- (i32 32))))),
- (ADDHNv2i64_v2i32 V128:$Rn, V128:$Rm)>;
-def : Pat<(concat_vectors (v8i8 V64:$Rd),
- (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm),
- (i32 8))))),
- (ADDHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
- V128:$Rn, V128:$Rm)>;
-def : Pat<(concat_vectors (v4i16 V64:$Rd),
- (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm),
- (i32 16))))),
- (ADDHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
- V128:$Rn, V128:$Rm)>;
-def : Pat<(concat_vectors (v2i32 V64:$Rd),
- (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm),
- (i32 32))))),
- (ADDHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
- V128:$Rn, V128:$Rm)>;
-
-// SUBHN
-def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm), (i32 8))))),
- (SUBHNv8i16_v8i8 V128:$Rn, V128:$Rm)>;
-def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
- (i32 16))))),
- (SUBHNv4i32_v4i16 V128:$Rn, V128:$Rm)>;
-def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
- (i32 32))))),
- (SUBHNv2i64_v2i32 V128:$Rn, V128:$Rm)>;
-def : Pat<(concat_vectors (v8i8 V64:$Rd),
- (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
- (i32 8))))),
- (SUBHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
- V128:$Rn, V128:$Rm)>;
-def : Pat<(concat_vectors (v4i16 V64:$Rd),
- (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
- (i32 16))))),
- (SUBHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
- V128:$Rn, V128:$Rm)>;
-def : Pat<(concat_vectors (v2i32 V64:$Rd),
- (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
- (i32 32))))),
- (SUBHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
- V128:$Rn, V128:$Rm)>;
-
-} // AddedComplexity = 10
+multiclass AddSubHNPatterns<Instruction ADDHN, Instruction ADDHN2, Instruction SUBHN,
+ Instruction SUBHN2, ValueType VT64, ValueType VT128, int Shift> {
+ def : Pat<(VT64 (trunc (VT128 (AArch64vlshr (add V128:$Rn, V128:$Rm), (i32 Shift))))),
+ (ADDHN V128:$Rn, V128:$Rm)>;
+ let AddedComplexity = 10 in
+ def : Pat<(concat_vectors (VT64 V64:$Rd),
+ (trunc (VT128 (AArch64vlshr (add V128:$Rn, V128:$Rm),
+ (i32 Shift))))),
+ (ADDHN2 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), V128:$Rn, V128:$Rm)>;
+
+ def : Pat<(VT64 (trunc (VT128 (AArch64vlshr (sub V128:$Rn, V128:$Rm), (i32 Shift))))),
+ (SUBHN V128:$Rn, V128:$Rm)>;
+ let AddedComplexity = 10 in
+ def : Pat<(concat_vectors (VT64 V64:$Rd),
+ (trunc (VT128 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
+ (i32 Shift))))),
+ (SUBHN2 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), V128:$Rn, V128:$Rm)>;
+
+ // xor by -1 can also be treated as sub
+ def : Pat<(VT64 (trunc (VT128 (AArch64vlshr (xor V128:$Rn, immAllOnesV:$Rm), (i32 Shift))))),
+ (SUBHN V128:$Rm, V128:$Rn)>;
+ let AddedComplexity = 10 in
+ def : Pat<(concat_vectors (VT64 V64:$Rd),
+ (trunc (VT128 (AArch64vlshr (xor V128:$Rn, immAllOnesV:$Rm),
+ (i32 Shift))))),
+ (SUBHN2 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), V128:$Rm, V128:$Rn)>;
+}
+
+defm : AddSubHNPatterns<ADDHNv8i16_v8i8, ADDHNv8i16_v16i8,
+ SUBHNv8i16_v8i8, SUBHNv8i16_v16i8,
+ v8i8, v8i16, 8>;
+defm : AddSubHNPatterns<ADDHNv4i32_v4i16, ADDHNv4i32_v8i16,
+ SUBHNv4i32_v4i16, SUBHNv4i32_v8i16,
+ v4i16, v4i32, 16>;
+defm : AddSubHNPatterns<ADDHNv2i64_v2i32, ADDHNv2i64_v4i32,
+ SUBHNv2i64_v2i32, SUBHNv2i64_v4i32,
+ v2i32, v2i64, 32>;
//----------------------------------------------------------------------------
// AdvSIMD bitwise extract from vector instruction.
diff --git a/llvm/test/CodeGen/AArch64/arm64-vadd.ll b/llvm/test/CodeGen/AArch64/arm64-vadd.ll
index c893138cf7a8cd..d982dbbb1f69b9 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vadd.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vadd.ll
@@ -1521,9 +1521,9 @@ define <4 x i32> @subhn2_4s_natural(<2 x i32> %low, ptr %A, ptr %B) nounwind {
define <16 x i8> @neg_narrow_i8(<16 x i16> %a) {
; CHECK-SD-LABEL: neg_narrow_i8:
; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: mvn v1.16b, v1.16b
-; CHECK-SD-NEXT: mvn v0.16b, v0.16b
-; CHECK-SD-NEXT: uzp2 v0.16b, v0.16b, v1.16b
+; CHECK-SD-NEXT: movi v2.2d, #0xffffffffffffffff
+; CHECK-SD-NEXT: subhn v0.8b, v2.8h, v0.8h
+; CHECK-SD-NEXT: subhn2 v0.16b, v2.8h, v1.8h
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: neg_narrow_i8:
@@ -1542,9 +1542,9 @@ define <16 x i8> @neg_narrow_i8(<16 x i16> %a) {
define <8 x i16> @neg_narrow_i16(<8 x i32> %a) {
; CHECK-SD-LABEL: neg_narrow_i16:
; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: mvn v1.16b, v1.16b
-; CHECK-SD-NEXT: mvn v0.16b, v0.16b
-; CHECK-SD-NEXT: uzp2 v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: movi v2.2d, #0xffffffffffffffff
+; CHECK-SD-NEXT: subhn v0.4h, v2.4s, v0.4s
+; CHECK-SD-NEXT: subhn2 v0.8h, v2.4s, v1.4s
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: neg_narrow_i16:
@@ -1563,9 +1563,9 @@ define <8 x i16> @neg_narrow_i16(<8 x i32> %a) {
define <4 x i32> @neg_narrow_i32(<4 x i64> %a) {
; CHECK-SD-LABEL: neg_narrow_i32:
; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: mvn v1.16b, v1.16b
-; CHECK-SD-NEXT: mvn v0.16b, v0.16b
-; CHECK-SD-NEXT: uzp2 v0.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT: movi v2.2d, #0xffffffffffffffff
+; CHECK-SD-NEXT: subhn v0.2s, v2.2d, v0.2d
+; CHECK-SD-NEXT: subhn2 v0.4s, v2.2d, v1.2d
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: neg_narrow_i32:
``````````
</details>
https://github.com/llvm/llvm-project/pull/126100
More information about the llvm-commits
mailing list