r188452 - Clang and AArch64 backend patches to support shll/shl and vmovl instructions and ACLE functions

Hao Liu Hao.Liu at arm.com
Thu Aug 15 01:26:31 PDT 2013


Author: haoliu
Date: Thu Aug 15 03:26:30 2013
New Revision: 188452

URL: http://llvm.org/viewvc/llvm-project?rev=188452&view=rev
Log:
Clang and AArch64 backend patches to support shll/shl and vmovl instructions and ACLE functions

Modified:
    cfe/trunk/include/clang/Basic/arm_neon.td
    cfe/trunk/lib/CodeGen/CGBuiltin.cpp
    cfe/trunk/test/CodeGen/aarch64-neon-intrinsics.c
    cfe/trunk/utils/TableGen/NeonEmitter.cpp

Modified: cfe/trunk/include/clang/Basic/arm_neon.td
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/arm_neon.td?rev=188452&r1=188451&r2=188452&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/arm_neon.td (original)
+++ cfe/trunk/include/clang/Basic/arm_neon.td Thu Aug 15 03:26:30 2013
@@ -152,6 +152,7 @@ class NoTestOpInst<string n, string p, s
 // size modifiers:
 // U: unsigned
 // Q: 128b
+// H: 128b without mangling 'q'
 // P: polynomial
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -552,6 +553,18 @@ def FMINNMP : SInst<"vpminnm", "ddd", "f
 def ADDP  : IInst<"vpadd", "ddd", "csiUcUsUifQcQsQiQUcQUsQUiQfQd">;
 
 ////////////////////////////////////////////////////////////////////////////////
+// Shifts by constant
+let isShift = 1 in {
+// Left shift long high
+def SHLL_HIGH_N    : SInst<"vshll_high_n", "ndi", "HcHsHiHUcHUsHUi">;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Converting vectors
+def VMOVL_HIGH   : SInst<"vmovl_high", "nd", "HcHsHiHUcHUsHUi">;
+
+
+////////////////////////////////////////////////////////////////////////////////
 // Scalar Arithmetic
 
 // Scalar Addition

Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=188452&r1=188451&r2=188452&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Thu Aug 15 03:26:30 2013
@@ -1620,6 +1620,37 @@ static llvm::VectorType *GetNeonType(Cod
   llvm_unreachable("Invalid NeonTypeFlags element type!");
 }
 
+static Value *EmitExtendedSHL(CodeGenFunction &CGF,
+                              SmallVectorImpl<Value*> &Ops,
+                              llvm::VectorType *VTy, bool usgn, bool isHigh) {
+  IRBuilder<> Builder = CGF.Builder;
+  if (isHigh){
+    unsigned NumElts = VTy->getNumElements();
+    unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
+    llvm::Type *EltTy =
+      llvm::IntegerType::get(VTy->getContext(), EltBits / 2);
+    // The source operand type has twice as many elements of half the size.
+    llvm::Type *SrcTy = llvm::VectorType::get(EltTy, NumElts * 2);
+    SmallVector<Constant*, 8> Indices;
+    for (unsigned i = 0; i != NumElts; i++)
+      Indices.push_back(Builder.getInt32(i + NumElts));
+    Value *SV = llvm::ConstantVector::get(Indices);
+    Value *Undef = llvm::UndefValue::get(SrcTy);
+    Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
+    Ops[0] = Builder.CreateShuffleVector(Ops[0], Undef, SV);
+  } else {
+    llvm::Type *SrcTy = llvm::VectorType::getTruncatedElementVectorType(VTy);
+    Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
+  }
+
+  if (usgn)
+    Ops[0] = Builder.CreateZExt(Ops[0], VTy);
+  else
+    Ops[0] = Builder.CreateSExt(Ops[0], VTy);
+  Ops[1] = CGF.EmitNeonShiftVector(Ops[1], VTy, false);
+  return Builder.CreateShl(Ops[0], Ops[1], "vshl_n");
+}
+
 Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) {
   unsigned nElts = cast<llvm::VectorType>(V->getType())->getNumElements();
   Value* SV = llvm::ConstantVector::getSplat(nElts, C);
@@ -1862,6 +1893,18 @@ Value *CodeGenFunction::EmitAArch64Built
     return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqrdmulh_v, E);
   case AArch64::BI__builtin_neon_vqrdmulhq_v:
     return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqrdmulhq_v, E);
+  case AArch64::BI__builtin_neon_vshl_n_v:
+    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vshl_n_v, E);
+  case AArch64::BI__builtin_neon_vshlq_n_v:
+    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vshlq_n_v, E);
+  case AArch64::BI__builtin_neon_vmovl_v:
+    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmovl_v, E);
+  case AArch64::BI__builtin_neon_vshll_n_v:
+    return EmitExtendedSHL(*this, Ops, VTy, usgn, false);
+  case AArch64::BI__builtin_neon_vmovl_high_v:
+    Ops.push_back(ConstantInt::get(Int32Ty, 0));
+  case AArch64::BI__builtin_neon_vshll_high_n_v:
+    return EmitExtendedSHL(*this, Ops, VTy, usgn, true);
 
   // AArch64-only builtins
   case AArch64::BI__builtin_neon_vfms_v:

Modified: cfe/trunk/test/CodeGen/aarch64-neon-intrinsics.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/aarch64-neon-intrinsics.c?rev=188452&r1=188451&r2=188452&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/aarch64-neon-intrinsics.c (original)
+++ cfe/trunk/test/CodeGen/aarch64-neon-intrinsics.c Thu Aug 15 03:26:30 2013
@@ -3021,3 +3021,231 @@ float64x2_t test_vmulxq_f64(float64x2_t
 // CHECK: fmulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 }
 
+int8x8_t test_vshl_n_s8(int8x8_t a) {
+// CHECK: test_vshl_n_s8
+  return vshl_n_s8(a, 3);
+// CHECK: shl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+}
+
+int16x4_t test_vshl_n_s16(int16x4_t a) {
+// CHECK: test_vshl_n_s16
+  return vshl_n_s16(a, 3);
+// CHECK: shl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
+}
+
+int32x2_t test_vshl_n_s32(int32x2_t a) {
+// CHECK: test_vshl_n_s32
+  return vshl_n_s32(a, 3);
+// CHECK: shl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
+}
+
+int8x16_t test_vshlq_n_s8(int8x16_t a) {
+// CHECK: test_vshlq_n_s8
+  return vshlq_n_s8(a, 3);
+// CHECK: shl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+}
+
+int16x8_t test_vshlq_n_s16(int16x8_t a) {
+// CHECK: test_vshlq_n_s16
+  return vshlq_n_s16(a, 3);
+// CHECK: shl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
+}
+
+int32x4_t test_vshlq_n_s32(int32x4_t a) {
+// CHECK: test_vshlq_n_s32
+  return vshlq_n_s32(a, 3);
+// CHECK: shl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
+}
+
+int64x2_t test_vshlq_n_s64(int64x2_t a) {
+// CHECK: test_vshlq_n_s64
+  return vshlq_n_s64(a, 3);
+// CHECK: shl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
+}
+
+int8x8_t test_vshl_n_u8(int8x8_t a) {
+// CHECK: test_vshl_n_u8
+  return vshl_n_u8(a, 3);
+// CHECK: shl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+}
+
+int16x4_t test_vshl_n_u16(int16x4_t a) {
+// CHECK: test_vshl_n_u16
+  return vshl_n_u16(a, 3);
+// CHECK: shl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
+}
+
+int32x2_t test_vshl_n_u32(int32x2_t a) {
+// CHECK: test_vshl_n_u32
+  return vshl_n_u32(a, 3);
+// CHECK: shl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
+}
+
+int8x16_t test_vshlq_n_u8(int8x16_t a) {
+// CHECK: test_vshlq_n_u8
+  return vshlq_n_u8(a, 3);
+// CHECK: shl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+}
+
+int16x8_t test_vshlq_n_u16(int16x8_t a) {
+// CHECK: test_vshlq_n_u16
+  return vshlq_n_u16(a, 3);
+// CHECK: shl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
+}
+
+int32x4_t test_vshlq_n_u32(int32x4_t a) {
+// CHECK: test_vshlq_n_u32
+  return vshlq_n_u32(a, 3);
+// CHECK: shl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
+}
+
+int64x2_t test_vshlq_n_u64(int64x2_t a) {
+// CHECK: test_vshlq_n_u64
+  return vshlq_n_u64(a, 3);
+// CHECK: shl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
+}
+
+int16x8_t test_vshll_n_s8(int8x8_t a) {
+// CHECK: test_vshll_n_s8
+  return vshll_n_s8(a, 3);
+// CHECK: sshll {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, #3
+}
+
+int32x4_t test_vshll_n_s16(int16x4_t a) {
+// CHECK: test_vshll_n_s16
+  return vshll_n_s16(a, 9);
+// CHECK: sshll {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, #9
+}
+
+int64x2_t test_vshll_n_s32(int32x2_t a) {
+// CHECK: test_vshll_n_s32
+  return vshll_n_s32(a, 19);
+// CHECK: sshll {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, #19
+}
+
+uint16x8_t test_vshll_n_u8(uint8x8_t a) {
+// CHECK: test_vshll_n_u8
+  return vshll_n_u8(a, 3);
+// CHECK: ushll {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, #3
+}
+
+uint32x4_t test_vshll_n_u16(uint16x4_t a) {
+// CHECK: test_vshll_n_u16
+  return vshll_n_u16(a, 9);
+// CHECK: ushll {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, #9
+}
+
+uint64x2_t test_vshll_n_u32(uint32x2_t a) {
+// CHECK: test_vshll_n_u32
+  return vshll_n_u32(a, 19);
+// CHECK: ushll {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, #19
+}
+
+int16x8_t test_vshll_high_n_s8(int8x16_t a) {
+// CHECK: test_vshll_high_n_s8
+  return vshll_high_n_s8(a, 3);
+// CHECK: sshll2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, #3
+}
+
+int32x4_t test_vshll_high_n_s16(int16x8_t a) {
+// CHECK: test_vshll_high_n_s16
+  return vshll_high_n_s16(a, 9);
+// CHECK: sshll2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, #9
+}
+
+int64x2_t test_vshll_high_n_s32(int32x4_t a) {
+// CHECK: test_vshll_high_n_s32
+  return vshll_high_n_s32(a, 19);
+// CHECK: sshll2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, #19
+}
+
+uint16x8_t test_vshll_high_n_u8(uint8x16_t a) {
+// CHECK: test_vshll_high_n_u8
+  return vshll_high_n_u8(a, 3);
+// CHECK: ushll2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, #3
+}
+
+uint32x4_t test_vshll_high_n_u16(uint16x8_t a) {
+// CHECK: test_vshll_high_n_u16
+  return vshll_high_n_u16(a, 9);
+// CHECK: ushll2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, #9
+}
+
+uint64x2_t test_vshll_high_n_u32(uint32x4_t a) {
+// CHECK: test_vshll_high_n_u32
+  return vshll_high_n_u32(a, 19);
+// CHECK: ushll2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, #19
+}
+
+int16x8_t test_vmovl_s8(int8x8_t a) {
+// CHECK: test_vmovl_s8
+  return vmovl_s8(a);
+// CHECK: sshll {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, #0
+}
+
+int32x4_t test_vmovl_s16(int16x4_t a) {
+// CHECK: test_vmovl_s16
+  return vmovl_s16(a);
+// CHECK: sshll {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, #0
+}
+
+int64x2_t test_vmovl_s32(int32x2_t a) {
+// CHECK: test_vmovl_s32
+  return vmovl_s32(a);
+// CHECK: sshll {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, #0
+}
+
+uint16x8_t test_vmovl_u8(uint8x8_t a) {
+// CHECK: test_vmovl_u8
+  return vmovl_u8(a);
+// CHECK: ushll {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, #0
+}
+
+uint32x4_t test_vmovl_u16(uint16x4_t a) {
+// CHECK: test_vmovl_u16
+  return vmovl_u16(a);
+// CHECK: ushll {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, #0
+}
+
+uint64x2_t test_vmovl_u32(uint32x2_t a) {
+// CHECK: test_vmovl_u32
+  return vmovl_u32(a);
+// CHECK: ushll {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, #0
+}
+
+int16x8_t test_vmovl_high_s8(int8x16_t a) {
+// CHECK: test_vmovl_high_s8
+  return vmovl_high_s8(a);
+// CHECK: sshll2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, #0
+}
+
+int32x4_t test_vmovl_high_s16(int16x8_t a) {
+// CHECK: test_vmovl_high_s16
+  return vmovl_high_s16(a);
+// CHECK: sshll2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, #0
+}
+
+int64x2_t test_vmovl_high_s32(int32x4_t a) {
+// CHECK: test_vmovl_high_s32
+  return vmovl_high_s32(a);
+// CHECK: sshll2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, #0
+}
+
+uint16x8_t test_vmovl_high_u8(uint8x16_t a) {
+// CHECK: test_vmovl_high_u8
+  return vmovl_high_u8(a);
+// CHECK: ushll2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, #0
+}
+
+uint32x4_t test_vmovl_high_u16(uint16x8_t a) {
+// CHECK: test_vmovl_high_u16
+  return vmovl_high_u16(a);
+// CHECK: ushll2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, #0
+}
+
+uint64x2_t test_vmovl_high_u32(uint32x4_t a) {
+// CHECK: test_vmovl_high_u32
+  return vmovl_high_u32(a);
+// CHECK: ushll2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, #0
+}
+

Modified: cfe/trunk/utils/TableGen/NeonEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/utils/TableGen/NeonEmitter.cpp?rev=188452&r1=188451&r2=188452&view=diff
==============================================================================
--- cfe/trunk/utils/TableGen/NeonEmitter.cpp (original)
+++ cfe/trunk/utils/TableGen/NeonEmitter.cpp Thu Aug 15 03:26:30 2013
@@ -263,7 +263,8 @@ static void ParseTypes(Record *r, std::s
   int len = 0;
 
   for (unsigned i = 0, e = s.size(); i != e; ++i, ++len) {
-    if (data[len] == 'P' || data[len] == 'Q' || data[len] == 'U')
+    if (data[len] == 'P' || data[len] == 'Q' || data[len] == 'U'
+                         || data[len] == 'H')
       continue;
 
     switch (data[len]) {
@@ -325,7 +326,7 @@ static char ClassifyType(StringRef ty, b
   unsigned off = 0;
 
   // remember quad.
-  if (ty[off] == 'Q') {
+  if (ty[off] == 'Q' || ty[off] == 'H') {
     quad = true;
     ++off;
   }
@@ -689,8 +690,8 @@ static void InstructionTypeCode(const St
 }
 
 /// MangleName - Append a type or width suffix to a base neon function name,
-/// and insert a 'q' in the appropriate location if the operation works on
-/// 128b rather than 64b.   E.g. turn "vst2_lane" into "vst2q_lane_f32", etc.
+/// and insert a 'q' in the appropriate location if type string starts with 'Q'.
+/// E.g. turn "vst2_lane" into "vst2q_lane_f32", etc.
 static std::string MangleName(const std::string &name, StringRef typestr,
                               ClassKind ck) {
   if (name == "vcvt_f32_f16")
@@ -712,9 +713,9 @@ static std::string MangleName(const std:
 
   // Insert a 'q' before the first '_' character so that it ends up before
   // _lane or _n on vector-scalar operations.
-  if (quad) {
-    size_t pos = s.find('_');
-    s = s.insert(pos, "q");
+  if (typestr.startswith("Q")) {
+      size_t pos = s.find('_');
+      s = s.insert(pos, "q");
   }
 
   return s;





More information about the cfe-commits mailing list