[PATCH] [AArch64 NEON] Get instruction BSL be matched to VSELECT.

Kevin Qin kevinqindev at gmail.com
Fri Dec 6 02:43:21 PST 2013


Hi Tim and reviewers,

Please review, thanks.

http://llvm-reviews.chandlerc.com/D2351

Files:
  lib/Target/AArch64/AArch64InstrNEON.td
  test/CodeGen/AArch64/neon-vselect.ll

Index: lib/Target/AArch64/AArch64InstrNEON.td
===================================================================
--- lib/Target/AArch64/AArch64InstrNEON.td
+++ lib/Target/AArch64/AArch64InstrNEON.td
@@ -412,23 +412,26 @@
 def BSLvvv_16B : NeonI_3VSame_Constraint_impl<"bsl", ".16b", VPR128, v16i8,
                                               0b1, 0b1, 0b01, 0b00011, Neon_bsl>;
 
-multiclass Neon_bitwise3V_patterns<SDPatternOperator opnode,
-                                   Instruction INST8B,
-                                   Instruction INST16B> {
+multiclass Neon_bitwise3V_v1_patterns<SDPatternOperator opnode,
+                                      Instruction INST8B,
+                                      Instruction INST16B> {
   // Disassociate type from instruction definition
-  def : Pat<(v2i32 (opnode VPR64:$src,VPR64:$Rn, VPR64:$Rm)),
+  def : Pat<(v2i32 (opnode (v2i32 VPR64:$src), VPR64:$Rn, VPR64:$Rm)),
             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
-  def : Pat<(v4i16 (opnode VPR64:$src, VPR64:$Rn, VPR64:$Rm)),
+  def : Pat<(v4i16 (opnode (v4i16 VPR64:$src), VPR64:$Rn, VPR64:$Rm)),
             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
-  def : Pat<(v1i64 (opnode VPR64:$src, VPR64:$Rn, VPR64:$Rm)),
+  def : Pat<(v1i64 (opnode (v1i64 VPR64:$src), VPR64:$Rn, VPR64:$Rm)),
             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
-  def : Pat<(v4i32 (opnode VPR128:$src, VPR128:$Rn, VPR128:$Rm)),
+  def : Pat<(v4i32 (opnode (v4i32 VPR128:$src), VPR128:$Rn, VPR128:$Rm)),
             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
-  def : Pat<(v8i16 (opnode VPR128:$src, VPR128:$Rn, VPR128:$Rm)),
+  def : Pat<(v8i16 (opnode (v8i16 VPR128:$src), VPR128:$Rn, VPR128:$Rm)),
             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
-  def : Pat<(v2i64 (opnode VPR128:$src, VPR128:$Rn, VPR128:$Rm)),
+  def : Pat<(v2i64 (opnode (v2i64 VPR128:$src), VPR128:$Rn, VPR128:$Rm)),
             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
+}
 
+multiclass Neon_bitwise3V_v2_patterns<Instruction INST8B,
+                                      Instruction INST16B> {
   // Allow to match BSL instruction pattern with non-constant operand
   def : Pat<(v8i8 (or (and VPR64:$Rn, VPR64:$Rd),
                     (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
@@ -495,7 +498,9 @@
 }
 
 // Additional patterns for bitwise instruction BSL
-defm: Neon_bitwise3V_patterns<Neon_bsl, BSLvvv_8B, BSLvvv_16B>;
+defm: Neon_bitwise3V_v1_patterns<Neon_bsl, BSLvvv_8B, BSLvvv_16B>;
+defm: Neon_bitwise3V_v1_patterns<vselect, BSLvvv_8B, BSLvvv_16B>;
+defm: Neon_bitwise3V_v2_patterns<BSLvvv_8B, BSLvvv_16B>;
 
 def Neon_NoBSLop : PatFrag<(ops node:$src, node:$Rn, node:$Rm),
                            (Neon_bsl node:$src, node:$Rn, node:$Rm),
Index: test/CodeGen/AArch64/neon-vselect.ll
===================================================================
--- /dev/null
+++ test/CodeGen/AArch64/neon-vselect.ll
@@ -0,0 +1,140 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+; Make sure that AArch64 backend with NEON handles vselect.
+
+; CHECK-LABEL: vmax_v2i32
+define void @vmax_v2i32(<2 x i32>* %m, <2 x i32> %a, <2x i32> %b) {
+; CHECK: cmgt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK-NEXT: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+    %cmpres = icmp sgt <2 x i32> %a, %b
+    %maxres = select <2 x i1> %cmpres, <2 x i32> %a,  <2 x i32> %b
+    store <2 x i32> %maxres, <2 x i32>* %m
+    ret void
+}
+
+; CHECK-LABEL: vmax_v4i32
+define void @vmax_v4i32(<4 x i32>* %m, <4 x i32> %a, <4 x i32> %b) {
+; CHECK: cmgt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK-NEXT: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+    %cmpres = icmp sgt <4 x i32> %a, %b
+    %maxres = select <4 x i1> %cmpres, <4 x i32> %a,  <4 x i32> %b
+    store <4 x i32> %maxres, <4 x i32>* %m
+    ret void
+}
+
+%T0_10 = type <16 x i16>
+%T1_10 = type <16 x i1>
+; CHECK-LABEL: func_blend10:
+define void @func_blend10(%T0_10* %loadaddr, %T0_10* %loadaddr2,
+                          %T0_10* %storeaddr) {
+; CHECK: cmgt {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK-NEXT: cmgt {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK-NEXT: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-NEXT: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+  %v0 = load %T0_10* %loadaddr
+  %v1 = load %T0_10* %loadaddr2
+  %c = icmp slt %T0_10 %v0, %v1
+  %r = select %T1_10 %c, %T0_10 %v0, %T0_10 %v1
+  store %T0_10 %r, %T0_10* %storeaddr
+  ret void
+}
+%T0_14 = type <8 x i32>
+%T1_14 = type <8 x i1>
+; CHECK-LABEL: func_blend14:
+define void @func_blend14(%T0_14* %loadaddr, %T0_14* %loadaddr2,
+                          %T0_14* %storeaddr) {
+; CHECK: cmgt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK-NEXT: cmgt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK-NEXT: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-NEXT: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+  %v0 = load %T0_14* %loadaddr
+  %v1 = load %T0_14* %loadaddr2
+  %c = icmp slt %T0_14 %v0, %v1
+  %r = select %T1_14 %c, %T0_14 %v0, %T0_14 %v1
+  store %T0_14 %r, %T0_14* %storeaddr
+  ret void
+}
+%T0_15 = type <16 x i32>
+%T1_15 = type <16 x i1>
+; CHECK-LABEL: func_blend15:
+define void @func_blend15(%T0_15* %loadaddr, %T0_15* %loadaddr2,
+                          %T0_15* %storeaddr) {
+; CHECK: cmgt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK-NEXT: cmgt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK-NEXT: cmgt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK-NEXT: cmgt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK-NEXT: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-NEXT: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-NEXT: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-NEXT: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+  %v0 = load %T0_15* %loadaddr
+  %v1 = load %T0_15* %loadaddr2
+  %c = icmp slt %T0_15 %v0, %v1
+
+  %r = select %T1_15 %c, %T0_15 %v0, %T0_15 %v1
+  store %T0_15 %r, %T0_15* %storeaddr
+  ret void
+}
+%T0_18 = type <4 x i64>
+%T1_18 = type <4 x i1>
+; CHECK-LABEL: func_blend18:
+define void @func_blend18(%T0_18* %loadaddr, %T0_18* %loadaddr2,
+                          %T0_18* %storeaddr) {
+; CHECK: cmgt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK-NEXT: cmgt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK-NEXT: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-NEXT: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+  %v0 = load %T0_18* %loadaddr
+  %v1 = load %T0_18* %loadaddr2
+  %c = icmp slt %T0_18 %v0, %v1
+  %r = select %T1_18 %c, %T0_18 %v0, %T0_18 %v1
+  store %T0_18 %r, %T0_18* %storeaddr
+  ret void
+}
+%T0_19 = type <8 x i64>
+%T1_19 = type <8 x i1>
+; CHECK-LABEL: func_blend19:
+define void @func_blend19(%T0_19* %loadaddr, %T0_19* %loadaddr2,
+                          %T0_19* %storeaddr) {
+; CHECK: cmgt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK-NEXT: cmgt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK-NEXT: cmgt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK-NEXT: cmgt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK-NEXT: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-NEXT: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-NEXT: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-NEXT: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+  %v0 = load %T0_19* %loadaddr
+  %v1 = load %T0_19* %loadaddr2
+  %c = icmp slt %T0_19 %v0, %v1
+  %r = select %T1_19 %c, %T0_19 %v0, %T0_19 %v1
+  store %T0_19 %r, %T0_19* %storeaddr
+  ret void
+}
+%T0_20 = type <16 x i64>
+%T1_20 = type <16 x i1>
+; CHECK-LABEL: func_blend20:
+define void @func_blend20(%T0_20* %loadaddr, %T0_20* %loadaddr2,
+                          %T0_20* %storeaddr) {
+; CHECK: cmgt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK-NEXT: cmgt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK-NEXT: cmgt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK-NEXT: cmgt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK-NEXT: cmgt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK-NEXT: cmgt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK-NEXT: cmgt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK-NEXT: cmgt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK-NEXT: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-NEXT: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-NEXT: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-NEXT: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-NEXT: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-NEXT: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-NEXT: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-NEXT: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+  %v0 = load %T0_20* %loadaddr
+  %v1 = load %T0_20* %loadaddr2
+  %c = icmp slt %T0_20 %v0, %v1
+  %r = select %T1_20 %c, %T0_20 %v0, %T0_20 %v1
+  store %T0_20 %r, %T0_20* %storeaddr
+  ret void
+}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D2351.1.patch
Type: text/x-patch
Size: 9253 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20131206/6e0c9bb9/attachment.bin>


More information about the llvm-commits mailing list