[PATCH] [ARM64]Fix the bug cannot select UQSHL/SQSHL with constant i64 shift amount.

Thu Apr 24 22:57:19 PDT 2014

Hi t.p.northover,

Hi Tim and other reviewers,

Those are 2 patches for LLVM and Clang to fix UQSHL/SQSHL by constant can not be selected. The simply intrinsic call:
      vqshld_s64(a, 36)  // a is a int64_t
will cause build failure.

The root cause is that 2 different pattern match types are used for the following 2 intrinsics:
      int64_t vqshld_n_s64(int64_t a, const int n)      SQSHL Dd,Dn,#n
      int64_t vqshld_s64(int64_t a, int64_t b)          SQSHL Dd,Dn,Dm

vqshld_n_s64 uses int64x1_t for arguments and return value. vqshld_s64 uses int64_t. If the second argument of vqshld_s64 is a constant, it will also try to generate a SQSHL by contant instrution, i.e the first kind of instruction "SQSHL Dd,Dn,#n", not "SQSHL Dd,Dn,Dm". As there is only a pattern for v1i64, not i64, there is a cannot select failure.

To fix this, we can simply add a pattern about i64 to match SQSHL by constant (The LLVM patch). But except that, I also think it is not reasonable to use int64x1_t for "int64_t vqshld_n_s64(int64_t a, const int n)", as it is originally int64_t, why we transfer int64_t to int64xt_t to match. So I also modify the Clang to use int64_t/uint64_t instead of transfering to int64x1_t/uint64x1_t (The Clang patch).

I only add two test cases about vqshld_s64 and vqshld_u64 with constant shift amount, as there are already other test cases for other situations.

Review please.
Thanks

http://reviews.llvm.org/D3501

Files:
  lib/Target/ARM64/ARM64InstrFormats.td
  tools/clang/lib/CodeGen/CGBuiltin.cpp
  tools/clang/test/CodeGen/arm64-scalar-test.c

Index: lib/Target/ARM64/ARM64InstrFormats.td
===================================================================

--- lib/Target/ARM64/ARM64InstrFormats.td
+++ lib/Target/ARM64/ARM64InstrFormats.td
@@ -6867,10 +6867,12 @@
 
   def d : BaseSIMDScalarShift<U, opc, {1,?,?,?,?,?,?},
                               FPR64, FPR64, vecshiftL64, asm,
-    [(set (v1i64 FPR64:$Rd), (OpNode (v1i64 FPR64:$Rn),
-                                     (i32 vecshiftL64:$imm)))]> {
+    [(set (i64 FPR64:$Rd), (OpNode (i64 FPR64:$Rn), (i32 vecshiftL64:$imm)))]> {
     let Inst{21-16} = imm{5-0};
   }
+
+  def : Pat<(v1i64 (OpNode (v1i64 FPR64:$Rn), (i32 vecshiftL64:$imm))),
+            (!cast<Instruction>(NAME # "d") FPR64:$Rn, vecshiftL64:$imm)>;
 }
 
 multiclass SIMDScalarRShiftBHSD<bit U, bits<5> opc, string asm> {
Index: tools/clang/lib/CodeGen/CGBuiltin.cpp
===================================================================
--- tools/clang/lib/CodeGen/CGBuiltin.cpp
+++ tools/clang/lib/CodeGen/CGBuiltin.cpp
@@ -5526,10 +5526,8 @@
   case NEON::BI__builtin_neon_vqshlud_n_s64: {
     Ops.push_back(EmitScalarExpr(E->getArg(1)));
     Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
-    llvm::Type *VTy = llvm::VectorType::get(Int64Ty, 1);
-    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm64_neon_sqshlu, VTy),
-                          Ops, "vqshlu_n");
-    return Builder.CreateBitCast(Ops[0], Int64Ty);
+    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm64_neon_sqshlu, Int64Ty),
+                        Ops, "vqshlu_n");
   }
   case NEON::BI__builtin_neon_vqshld_n_u64:
   case NEON::BI__builtin_neon_vqshld_n_s64: {
@@ -5538,9 +5536,7 @@
                                    : Intrinsic::arm64_neon_sqshl;
     Ops.push_back(EmitScalarExpr(E->getArg(1)));
     Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
-    llvm::Type *VTy = llvm::VectorType::get(Int64Ty, 1);
-    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, VTy), Ops, "vqshl_n");
-    return Builder.CreateBitCast(Ops[0], Int64Ty);
+    return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
   }
   case NEON::BI__builtin_neon_vrshrd_n_u64:
   case NEON::BI__builtin_neon_vrshrd_n_s64: {
@@ -5548,9 +5544,9 @@
                                    ? Intrinsic::arm64_neon_urshl
                                    : Intrinsic::arm64_neon_srshl;
     Ops.push_back(EmitScalarExpr(E->getArg(1)));
-    llvm::Type *VTy = llvm::VectorType::get(Int64Ty, 1);
-    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, VTy), Ops, "vrshr_n", 1, true);
-    return Builder.CreateBitCast(Ops[0], Int64Ty);
+    int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
+    Ops[1] = ConstantInt::get(Int64Ty, -SV);
+    return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
   }
   case NEON::BI__builtin_neon_vrsrad_n_u64:
   case NEON::BI__builtin_neon_vrsrad_n_s64: {
Index: tools/clang/test/CodeGen/arm64-scalar-test.c
===================================================================
--- tools/clang/test/CodeGen/arm64-scalar-test.c
+++ tools/clang/test/CodeGen/arm64-scalar-test.c
@@ -114,6 +114,12 @@
 // CHECK: sqshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
 }
 
+// CHECK: test_vqshld_s64_i
+int64_t test_vqshld_s64_i(int64_t a) {
+  return vqshld_s64(a, 36);
+// CHECK: sqshl {{d[0-9]+}}, {{d[0-9]+}}, #36
+}
+
 // CHECK: test_vqshlb_u8
 uint8_t test_vqshlb_u8(uint8_t a, uint8_t b) {
   return vqshlb_u8(a, b);
@@ -138,6 +144,12 @@
 // CHECK: uqshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
 }
 
+// CHECK: test_vqshld_u64_i
+uint64_t test_vqshld_u64_i(uint64_t a) {
+  return vqshld_u64(a, 36);
+// CHECK: uqshl {{d[0-9]+}}, {{d[0-9]+}}, #36
+}
+
 // CHECK: test_vshld_u64
 uint64_t test_vshld_u64(uint64_t a, uint64_t b) {
   return vshld_u64(a, b);
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D3501.8829.patch
Type: text/x-patch
Size: 3735 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20140425/3a194051/attachment.bin>