[llvm] 347d2be - [AArch64] Add Neon int instructions to isAssociativeAndCommutative

KAWASHIMA Takahiro via llvm-commits llvm-commits at lists.llvm.org
Tue Dec 20 06:47:59 PST 2022


Author: KAWASHIMA Takahiro
Date: 2022-12-20T23:47:51+09:00
New Revision: 347d2be7bef3a49b7dbe19ff1f964c1c3fb2999f

URL: https://github.com/llvm/llvm-project/commit/347d2be7bef3a49b7dbe19ff1f964c1c3fb2999f
DIFF: https://github.com/llvm/llvm-project/commit/347d2be7bef3a49b7dbe19ff1f964c1c3fb2999f.diff

LOG: [AArch64] Add Neon int instructions to isAssociativeAndCommutative

Differential Revision: https://reviews.llvm.org/D139810

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
    llvm/test/CodeGen/AArch64/machine-combiner.ll
    llvm/test/CodeGen/AArch64/reduce-shuffle.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index a22a67a9d431..11ab7d094a04 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -4997,6 +4997,29 @@ bool AArch64InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst,
   case AArch64::EORXrr:
   case AArch64::EONWrr:
   case AArch64::EONXrr:
+  // -- Advanced SIMD instructions --
+  // Opcodes MULv1i64 and MULv2i64 don't exist because there is no 64-bit MUL
+  // in the Advanced SIMD instruction set.
+  case AArch64::ADDv8i8:
+  case AArch64::ADDv16i8:
+  case AArch64::ADDv4i16:
+  case AArch64::ADDv8i16:
+  case AArch64::ADDv2i32:
+  case AArch64::ADDv4i32:
+  case AArch64::ADDv1i64:
+  case AArch64::ADDv2i64:
+  case AArch64::MULv8i8:
+  case AArch64::MULv16i8:
+  case AArch64::MULv4i16:
+  case AArch64::MULv8i16:
+  case AArch64::MULv2i32:
+  case AArch64::MULv4i32:
+  case AArch64::ANDv8i8:
+  case AArch64::ANDv16i8:
+  case AArch64::ORRv8i8:
+  case AArch64::ORRv16i8:
+  case AArch64::EORv8i8:
+  case AArch64::EORv16i8:
     return true;
 
   default:

diff  --git a/llvm/test/CodeGen/AArch64/machine-combiner.ll b/llvm/test/CodeGen/AArch64/machine-combiner.ll
index f5fcdda363f8..10d602af1b4a 100644
--- a/llvm/test/CodeGen/AArch64/machine-combiner.ll
+++ b/llvm/test/CodeGen/AArch64/machine-combiner.ll
@@ -532,6 +532,75 @@ define <2 x double> @reassociate_muls_v2f64(<2 x double> %x0, <2 x double> %x1,
   ret <2 x double> %t2
 }
 
+; Verify that vector integer arithmetic operations are reassociated.
+
+define <2 x i32> @reassociate_muls_v2i32(<2 x i32> %x0, <2 x i32> %x1, <2 x i32> %x2, <2 x i32> %x3) {
+; CHECK-LABEL: reassociate_muls_v2i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mul v0.2s, v0.2s, v1.2s
+; CHECK-NEXT:    mul v1.2s, v3.2s, v2.2s
+; CHECK-NEXT:    mul v0.2s, v1.2s, v0.2s
+; CHECK-NEXT:    ret
+  %t0 = mul <2 x i32> %x0, %x1
+  %t1 = mul <2 x i32> %x2, %t0
+  %t2 = mul <2 x i32> %x3, %t1
+  ret <2 x i32> %t2
+}
+
+define <2 x i64> @reassociate_adds_v2i64(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, <2 x i64> %x3) {
+; CHECK-LABEL: reassociate_adds_v2i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add v0.2d, v0.2d, v1.2d
+; CHECK-NEXT:    add v1.2d, v3.2d, v2.2d
+; CHECK-NEXT:    add v0.2d, v1.2d, v0.2d
+; CHECK-NEXT:    ret
+  %t0 = add <2 x i64> %x0, %x1
+  %t1 = add <2 x i64> %x2, %t0
+  %t2 = add <2 x i64> %x3, %t1
+  ret <2 x i64> %t2
+}
+
+; Verify that vector bitwise operations are reassociated.
+
+define <16 x i8> @reassociate_ands_v16i8(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, <16 x i8> %x3) {
+; CHECK-LABEL: reassociate_ands_v16i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    orr v0.16b, v0.16b, v1.16b
+; CHECK-NEXT:    and v1.16b, v2.16b, v3.16b
+; CHECK-NEXT:    and v0.16b, v0.16b, v1.16b
+; CHECK-NEXT:    ret
+  %t0 = or <16 x i8> %x0, %x1
+  %t1 = and <16 x i8> %t0, %x2
+  %t2 = and <16 x i8> %t1, %x3
+  ret <16 x i8> %t2
+}
+
+define <4 x i16> @reassociate_ors_v4i16(<4 x i16> %x0, <4 x i16> %x1, <4 x i16> %x2, <4 x i16> %x3) {
+; CHECK-LABEL: reassociate_ors_v4i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    eor v0.8b, v0.8b, v1.8b
+; CHECK-NEXT:    orr v1.8b, v2.8b, v3.8b
+; CHECK-NEXT:    orr v0.8b, v0.8b, v1.8b
+; CHECK-NEXT:    ret
+  %t0 = xor <4 x i16> %x0, %x1
+  %t1 = or <4 x i16> %t0, %x2
+  %t2 = or <4 x i16> %t1, %x3
+  ret <4 x i16> %t2
+}
+
+define <4 x i32> @reassociate_xors_v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, <4 x i32> %x3) {
+; CHECK-LABEL: reassociate_xors_v4i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    and v0.16b, v0.16b, v1.16b
+; CHECK-NEXT:    eor v1.16b, v2.16b, v3.16b
+; CHECK-NEXT:    eor v0.16b, v0.16b, v1.16b
+; CHECK-NEXT:    ret
+  %t0 = and <4 x i32> %x0, %x1
+  %t1 = xor <4 x i32> %t0, %x2
+  %t2 = xor <4 x i32> %t1, %x3
+  ret <4 x i32> %t2
+}
+
 ; PR25016: https://llvm.org/bugs/show_bug.cgi?id=25016
 ; Verify that reassociation is not happening needlessly or wrongly.
 

diff  --git a/llvm/test/CodeGen/AArch64/reduce-shuffle.ll b/llvm/test/CodeGen/AArch64/reduce-shuffle.ll
index 797f3724c98a..b3d1388b55aa 100644
--- a/llvm/test/CodeGen/AArch64/reduce-shuffle.ll
+++ b/llvm/test/CodeGen/AArch64/reduce-shuffle.ll
@@ -41,48 +41,48 @@ define i32 @v1(ptr nocapture noundef readonly %p1, i32 noundef %i1, ptr nocaptur
 ; CHECK-NEXT:    uzp2 v5.4s, v3.4s, v2.4s
 ; CHECK-NEXT:    ext v16.16b, v3.16b, v3.16b, #12
 ; CHECK-NEXT:    zip1 v17.4s, v1.4s, v0.4s
+; CHECK-NEXT:    mov v7.16b, v3.16b
+; CHECK-NEXT:    zip2 v4.4s, v2.4s, v3.4s
 ; CHECK-NEXT:    zip2 v6.4s, v1.4s, v0.4s
 ; CHECK-NEXT:    zip2 v18.4s, v3.4s, v2.4s
-; CHECK-NEXT:    uzp2 v5.4s, v5.4s, v3.4s
-; CHECK-NEXT:    ext v19.16b, v1.16b, v17.16b, #8
-; CHECK-NEXT:    mov v1.s[3], v0.s[2]
-; CHECK-NEXT:    zip2 v4.4s, v2.4s, v3.4s
-; CHECK-NEXT:    mov v7.16b, v3.16b
-; CHECK-NEXT:    ext v16.16b, v2.16b, v16.16b, #12
 ; CHECK-NEXT:    mov v7.s[0], v2.s[1]
+; CHECK-NEXT:    ext v16.16b, v2.16b, v16.16b, #12
+; CHECK-NEXT:    ext v19.16b, v1.16b, v17.16b, #8
+; CHECK-NEXT:    uzp2 v5.4s, v5.4s, v3.4s
 ; CHECK-NEXT:    mov v2.s[1], v3.s[0]
+; CHECK-NEXT:    mov v1.s[3], v0.s[2]
+; CHECK-NEXT:    mov v7.d[1], v17.d[1]
 ; CHECK-NEXT:    mov v5.d[1], v6.d[1]
+; CHECK-NEXT:    mov v2.d[1], v19.d[1]
 ; CHECK-NEXT:    mov v18.d[1], v1.d[1]
 ; CHECK-NEXT:    mov v16.d[1], v6.d[1]
 ; CHECK-NEXT:    mov v4.d[1], v1.d[1]
-; CHECK-NEXT:    mov v7.d[1], v17.d[1]
-; CHECK-NEXT:    mov v2.d[1], v19.d[1]
+; CHECK-NEXT:    add v0.4s, v7.4s, v2.4s
 ; CHECK-NEXT:    add v1.4s, v5.4s, v18.4s
+; CHECK-NEXT:    rev64 v5.4s, v0.4s
 ; CHECK-NEXT:    sub v3.4s, v4.4s, v16.4s
 ; CHECK-NEXT:    rev64 v4.4s, v1.4s
-; CHECK-NEXT:    add v0.4s, v7.4s, v2.4s
 ; CHECK-NEXT:    sub v2.4s, v2.4s, v7.4s
-; CHECK-NEXT:    rev64 v5.4s, v0.4s
-; CHECK-NEXT:    mov v4.d[1], v1.d[1]
+; CHECK-NEXT:    mov v5.d[1], v0.d[1]
 ; CHECK-NEXT:    add v6.4s, v3.4s, v2.4s
 ; CHECK-NEXT:    sub v2.4s, v2.4s, v3.4s
-; CHECK-NEXT:    mov v5.d[1], v0.d[1]
-; CHECK-NEXT:    sub v0.4s, v0.4s, v4.4s
+; CHECK-NEXT:    mov v4.d[1], v1.d[1]
 ; CHECK-NEXT:    rev64 v7.4s, v2.4s
 ; CHECK-NEXT:    rev64 v3.4s, v6.4s
-; CHECK-NEXT:    rev64 v4.4s, v0.4s
 ; CHECK-NEXT:    add v1.4s, v1.4s, v5.4s
+; CHECK-NEXT:    sub v0.4s, v0.4s, v4.4s
 ; CHECK-NEXT:    sub v7.4s, v2.4s, v7.4s
 ; CHECK-NEXT:    addp v5.4s, v1.4s, v6.4s
 ; CHECK-NEXT:    addp v2.4s, v0.4s, v2.4s
 ; CHECK-NEXT:    sub v3.4s, v6.4s, v3.4s
+; CHECK-NEXT:    rev64 v4.4s, v0.4s
 ; CHECK-NEXT:    rev64 v6.4s, v1.4s
-; CHECK-NEXT:    sub v0.4s, v0.4s, v4.4s
 ; CHECK-NEXT:    zip1 v16.4s, v5.4s, v5.4s
 ; CHECK-NEXT:    ext v17.16b, v2.16b, v7.16b, #4
 ; CHECK-NEXT:    ext v18.16b, v5.16b, v3.16b, #4
-; CHECK-NEXT:    ext v4.16b, v0.16b, v2.16b, #8
+; CHECK-NEXT:    sub v0.4s, v0.4s, v4.4s
 ; CHECK-NEXT:    sub v1.4s, v1.4s, v6.4s
+; CHECK-NEXT:    ext v4.16b, v0.16b, v2.16b, #8
 ; CHECK-NEXT:    ext v6.16b, v1.16b, v5.16b, #4
 ; CHECK-NEXT:    trn2 v1.4s, v16.4s, v1.4s
 ; CHECK-NEXT:    zip2 v16.4s, v17.4s, v2.4s
@@ -91,41 +91,41 @@ define i32 @v1(ptr nocapture noundef readonly %p1, i32 noundef %i1, ptr nocaptur
 ; CHECK-NEXT:    ext v6.16b, v6.16b, v6.16b, #4
 ; CHECK-NEXT:    ext v16.16b, v7.16b, v16.16b, #12
 ; CHECK-NEXT:    ext v17.16b, v3.16b, v17.16b, #12
-; CHECK-NEXT:    mov v0.s[2], v2.s[1]
-; CHECK-NEXT:    uzp2 v4.4s, v4.4s, v18.4s
 ; CHECK-NEXT:    mov v3.s[2], v5.s[3]
 ; CHECK-NEXT:    mov v7.s[2], v2.s[3]
-; CHECK-NEXT:    sub v18.4s, v1.4s, v6.4s
-; CHECK-NEXT:    mov v6.s[0], v5.s[1]
-; CHECK-NEXT:    sub v19.4s, v0.4s, v4.4s
+; CHECK-NEXT:    mov v0.s[2], v2.s[1]
+; CHECK-NEXT:    uzp2 v4.4s, v4.4s, v18.4s
 ; CHECK-NEXT:    sub v20.4s, v3.4s, v17.4s
 ; CHECK-NEXT:    sub v21.4s, v7.4s, v16.4s
-; CHECK-NEXT:    mov v0.s[1], v2.s[0]
 ; CHECK-NEXT:    mov v3.s[1], v5.s[2]
 ; CHECK-NEXT:    mov v7.s[1], v2.s[2]
-; CHECK-NEXT:    add v1.4s, v1.4s, v6.4s
-; CHECK-NEXT:    add v0.4s, v0.4s, v4.4s
+; CHECK-NEXT:    sub v18.4s, v1.4s, v6.4s
+; CHECK-NEXT:    mov v6.s[0], v5.s[1]
+; CHECK-NEXT:    sub v19.4s, v0.4s, v4.4s
+; CHECK-NEXT:    mov v0.s[1], v2.s[0]
 ; CHECK-NEXT:    add v2.4s, v3.4s, v17.4s
 ; CHECK-NEXT:    add v3.4s, v7.4s, v16.4s
-; CHECK-NEXT:    mov v1.d[1], v18.d[1]
-; CHECK-NEXT:    mov v0.d[1], v19.d[1]
+; CHECK-NEXT:    add v1.4s, v1.4s, v6.4s
 ; CHECK-NEXT:    mov v3.d[1], v21.d[1]
 ; CHECK-NEXT:    mov v2.d[1], v20.d[1]
-; CHECK-NEXT:    cmlt v4.8h, v1.8h, #0
-; CHECK-NEXT:    cmlt v5.8h, v0.8h, #0
+; CHECK-NEXT:    add v0.4s, v0.4s, v4.4s
+; CHECK-NEXT:    mov v1.d[1], v18.d[1]
+; CHECK-NEXT:    mov v0.d[1], v19.d[1]
 ; CHECK-NEXT:    cmlt v6.8h, v3.8h, #0
 ; CHECK-NEXT:    cmlt v7.8h, v2.8h, #0
+; CHECK-NEXT:    cmlt v4.8h, v1.8h, #0
 ; CHECK-NEXT:    add v3.4s, v6.4s, v3.4s
 ; CHECK-NEXT:    add v2.4s, v7.4s, v2.4s
+; CHECK-NEXT:    cmlt v5.8h, v0.8h, #0
 ; CHECK-NEXT:    add v1.4s, v4.4s, v1.4s
-; CHECK-NEXT:    add v0.4s, v5.4s, v0.4s
-; CHECK-NEXT:    eor v1.16b, v1.16b, v4.16b
-; CHECK-NEXT:    eor v0.16b, v0.16b, v5.16b
 ; CHECK-NEXT:    eor v2.16b, v2.16b, v7.16b
 ; CHECK-NEXT:    eor v3.16b, v3.16b, v6.16b
 ; CHECK-NEXT:    add v2.4s, v2.4s, v3.4s
+; CHECK-NEXT:    add v0.4s, v5.4s, v0.4s
+; CHECK-NEXT:    eor v1.16b, v1.16b, v4.16b
+; CHECK-NEXT:    add v1.4s, v1.4s, v2.4s
+; CHECK-NEXT:    eor v0.16b, v0.16b, v5.16b
 ; CHECK-NEXT:    add v0.4s, v1.4s, v0.4s
-; CHECK-NEXT:    add v0.4s, v0.4s, v2.4s
 ; CHECK-NEXT:    addv s0, v0.4s
 ; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    lsr w9, w8, #16
@@ -321,25 +321,25 @@ define i32 @v2(ptr nocapture noundef readonly %p1, i32 noundef %i1, ptr nocaptur
 ; CHECK-NEXT:    ext v0.16b, v4.16b, v0.16b, #8
 ; CHECK-NEXT:    ext v3.16b, v16.16b, v3.16b, #8
 ; CHECK-NEXT:    add v1.4s, v5.4s, v1.4s
-; CHECK-NEXT:    sub v5.4s, v6.4s, v17.4s
+; CHECK-NEXT:    sub v2.4s, v7.4s, v2.4s
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v4.16b, #4
 ; CHECK-NEXT:    ext v3.16b, v3.16b, v16.16b, #4
-; CHECK-NEXT:    cmlt v6.8h, v5.8h, #0
-; CHECK-NEXT:    sub v2.4s, v7.4s, v2.4s
-; CHECK-NEXT:    add v4.4s, v6.4s, v5.4s
-; CHECK-NEXT:    add v0.4s, v0.4s, v3.4s
+; CHECK-NEXT:    sub v5.4s, v6.4s, v17.4s
 ; CHECK-NEXT:    cmlt v7.8h, v2.8h, #0
 ; CHECK-NEXT:    cmlt v17.8h, v1.8h, #0
-; CHECK-NEXT:    eor v3.16b, v4.16b, v6.16b
-; CHECK-NEXT:    cmlt v4.8h, v0.8h, #0
+; CHECK-NEXT:    cmlt v6.8h, v5.8h, #0
 ; CHECK-NEXT:    add v1.4s, v17.4s, v1.4s
 ; CHECK-NEXT:    add v2.4s, v7.4s, v2.4s
-; CHECK-NEXT:    add v0.4s, v4.4s, v0.4s
+; CHECK-NEXT:    add v0.4s, v0.4s, v3.4s
+; CHECK-NEXT:    add v4.4s, v6.4s, v5.4s
 ; CHECK-NEXT:    eor v2.16b, v2.16b, v7.16b
 ; CHECK-NEXT:    eor v1.16b, v1.16b, v17.16b
-; CHECK-NEXT:    eor v0.16b, v0.16b, v4.16b
+; CHECK-NEXT:    cmlt v3.8h, v0.8h, #0
 ; CHECK-NEXT:    add v1.4s, v1.4s, v2.4s
-; CHECK-NEXT:    add v0.4s, v0.4s, v3.4s
+; CHECK-NEXT:    add v0.4s, v3.4s, v0.4s
+; CHECK-NEXT:    eor v2.16b, v4.16b, v6.16b
+; CHECK-NEXT:    add v1.4s, v2.4s, v1.4s
+; CHECK-NEXT:    eor v0.16b, v0.16b, v3.16b
 ; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    addv s0, v0.4s
 ; CHECK-NEXT:    fmov w8, s0
@@ -545,17 +545,17 @@ define i32 @v3(ptr nocapture noundef readonly %p1, i32 noundef %i1, ptr nocaptur
 ; CHECK-NEXT:    cmlt v6.8h, v3.8h, #0
 ; CHECK-NEXT:    add v0.4s, v0.4s, v2.4s
 ; CHECK-NEXT:    cmlt v2.8h, v1.8h, #0
-; CHECK-NEXT:    cmlt v7.8h, v0.8h, #0
-; CHECK-NEXT:    add v1.4s, v2.4s, v1.4s
 ; CHECK-NEXT:    add v3.4s, v6.4s, v3.4s
+; CHECK-NEXT:    add v1.4s, v2.4s, v1.4s
+; CHECK-NEXT:    cmlt v7.8h, v0.8h, #0
 ; CHECK-NEXT:    add v4.4s, v5.4s, v4.4s
-; CHECK-NEXT:    add v0.4s, v7.4s, v0.4s
-; CHECK-NEXT:    eor v4.16b, v4.16b, v5.16b
-; CHECK-NEXT:    eor v0.16b, v0.16b, v7.16b
 ; CHECK-NEXT:    eor v3.16b, v3.16b, v6.16b
 ; CHECK-NEXT:    eor v1.16b, v1.16b, v2.16b
 ; CHECK-NEXT:    add v1.4s, v1.4s, v3.4s
-; CHECK-NEXT:    add v0.4s, v0.4s, v4.4s
+; CHECK-NEXT:    add v0.4s, v7.4s, v0.4s
+; CHECK-NEXT:    eor v2.16b, v4.16b, v5.16b
+; CHECK-NEXT:    add v1.4s, v2.4s, v1.4s
+; CHECK-NEXT:    eor v0.16b, v0.16b, v7.16b
 ; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    addv s0, v0.4s
 ; CHECK-NEXT:    fmov w8, s0


        


More information about the llvm-commits mailing list