[PATCH] ARM: do not emit lsrh/ashr for NEON shifts
Amaury de la Vieuville
amaury.delavieuville at arm.com
Fri Oct 4 06:03:22 PDT 2013
Detect when the immediate is too big and
* emit a zero-splat for unsigned shifts
* emit a shift by size-1 for signed shifts
http://llvm-reviews.chandlerc.com/D1819
CHANGE SINCE LAST DIFF
http://llvm-reviews.chandlerc.com/D1819?vs=4631&id=4668#toc
Files:
lib/CodeGen/CGBuiltin.cpp
lib/CodeGen/CodeGenFunction.h
test/CodeGen/aarch64-neon-shifts.c
test/CodeGen/arm-neon-shifts.c
Index: lib/CodeGen/CGBuiltin.cpp
===================================================================
--- lib/CodeGen/CGBuiltin.cpp
+++ lib/CodeGen/CGBuiltin.cpp
@@ -1657,6 +1657,39 @@
return llvm::ConstantVector::getSplat(VTy->getNumElements(), C);
}
+// \brief Right-shift a vector by a constant.
+Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift,
+ llvm::Type *Ty, bool usgn,
+ const char *name) {
+ llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
+
+ int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
+ int EltSize = VTy->getScalarSizeInBits();
+
+ Vec = Builder.CreateBitCast(Vec, Ty);
+
+ // lshr/ashr are undefined when the shift amount is equal to the vector
+ // element size.
+ if (ShiftAmt == EltSize) {
+ if (usgn) {
+ // Right-shifting an unsigned value by its size yields 0.
+ llvm::Constant *Zero = ConstantInt::get(VTy->getElementType(), 0);
+ return llvm::ConstantVector::getSplat(VTy->getNumElements(), Zero);
+ } else {
+ // Right-shifting a signed value by its size is equivalent
+ // to a shift of size-1.
+ --ShiftAmt;
+ Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
+ }
+ }
+
+ Shift = EmitNeonShiftVector(Shift, Ty, false);
+ if (usgn)
+ return Builder.CreateLShr(Vec, Shift, name);
+ else
+ return Builder.CreateAShr(Vec, Shift, name);
+}
+
/// GetPointeeAlignment - Given an expression with a pointer type, find the
/// alignment of the type referenced by the pointer. Skip over implicit
/// casts.
@@ -3085,12 +3118,7 @@
Ops, "vshrn_n", 1, true);
case ARM::BI__builtin_neon_vshr_n_v:
case ARM::BI__builtin_neon_vshrq_n_v:
- Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
- Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
- if (usgn)
- return Builder.CreateLShr(Ops[0], Ops[1], "vshr_n");
- else
- return Builder.CreateAShr(Ops[0], Ops[1], "vshr_n");
+ return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, usgn, "vshr_n");
case ARM::BI__builtin_neon_vsri_n_v:
case ARM::BI__builtin_neon_vsriq_n_v:
rightShift = true;
@@ -3102,12 +3130,7 @@
case ARM::BI__builtin_neon_vsra_n_v:
case ARM::BI__builtin_neon_vsraq_n_v:
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
- Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
- Ops[2] = EmitNeonShiftVector(Ops[2], Ty, false);
- if (usgn)
- Ops[1] = Builder.CreateLShr(Ops[1], Ops[2], "vsra_n");
- else
- Ops[1] = Builder.CreateAShr(Ops[1], Ops[2], "vsra_n");
+ Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
return Builder.CreateAdd(Ops[0], Ops[1]);
case ARM::BI__builtin_neon_vst1_v:
case ARM::BI__builtin_neon_vst1q_v:
Index: lib/CodeGen/CodeGenFunction.h
===================================================================
--- lib/CodeGen/CodeGenFunction.h
+++ lib/CodeGen/CodeGenFunction.h
@@ -2155,6 +2155,8 @@
llvm::Value *EmitNeonSplat(llvm::Value *V, llvm::Constant *Idx);
llvm::Value *EmitNeonShiftVector(llvm::Value *V, llvm::Type *Ty,
bool negateForRightShift);
+ llvm::Value *EmitNeonRShiftImm(llvm::Value *Vec, llvm::Value *Amt,
+ llvm::Type *Ty, bool usgn, const char *name);
llvm::Value *BuildVector(ArrayRef<llvm::Value*> Ops);
llvm::Value *EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E);
Index: test/CodeGen/aarch64-neon-shifts.c
===================================================================
--- /dev/null
+++ test/CodeGen/aarch64-neon-shifts.c
@@ -0,0 +1,43 @@
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \
+// RUN: -ffp-contract=fast -S -emit-llvm -O1 -o - %s | FileCheck %s
+
+#include <arm_neon.h>
+
+uint8x8_t test_shift_vshr(uint8x8_t a) {
+ // CHECK-LABEL: test_shift_vshr
+ // CHECK: %vshr_n = lshr <8 x i8> %a, <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
+ return vshr_n_u8(a, 5);
+}
+
+int8x8_t test_shift_vshr_smax(int8x8_t a) {
+ // CHECK-LABEL: test_shift_vshr_smax
+ // CHECK: %vshr_n = ashr <8 x i8> %a, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+ return vshr_n_s8(a, 8);
+}
+
+uint8x8_t test_shift_vshr_umax(uint8x8_t a) {
+ // CHECK-LABEL: test_shift_vshr_umax
+ // CHECK: ret <8 x i8> zeroinitializer
+ return vshr_n_u8(a, 8);
+}
+
+uint8x8_t test_shift_vsra(uint8x8_t a, uint8x8_t b) {
+ // CHECK-LABEL: test_shift_vsra
+ // CHECK: %vsra_n = lshr <8 x i8> %b, <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
+ // CHECK: %0 = add <8 x i8> %vsra_n, %a
+ return vsra_n_u8(a, b, 5);
+}
+
+int8x8_t test_shift_vsra_smax(int8x8_t a, int8x8_t b) {
+ // CHECK-LABEL: test_shift_vsra_smax
+ // CHECK: %vsra_n = ashr <8 x i8> %b, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+ // CHECK: %0 = add <8 x i8> %vsra_n, %a
+ return vsra_n_s8(a, b, 8);
+}
+
+uint8x8_t test_shift_vsra_umax(uint8x8_t a, uint8x8_t b) {
+ // CHECK-LABEL: test_shift_vsra_umax
+ // CHECK: ret <8 x i8> %a
+ return vsra_n_u8(a, b, 8);
+}
Index: test/CodeGen/arm-neon-shifts.c
===================================================================
--- /dev/null
+++ test/CodeGen/arm-neon-shifts.c
@@ -0,0 +1,45 @@
+// REQUIRES: arm-registered-target
+// RUN: %clang_cc1 -triple thumbv7-apple-darwin \
+// RUN: -target-cpu cortex-a8 \
+// RUN: -ffreestanding \
+// RUN: -emit-llvm -w -O1 -o - %s | FileCheck %s
+
+#include <arm_neon.h>
+
+uint8x8_t test_shift_vshr(uint8x8_t a) {
+ // CHECK-LABEL: test_shift_vshr
+ // CHECK: %vshr_n = lshr <8 x i8> %a, <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
+ return vshr_n_u8(a, 5);
+}
+
+int8x8_t test_shift_vshr_smax(int8x8_t a) {
+ // CHECK-LABEL: test_shift_vshr_smax
+ // CHECK: %vshr_n = ashr <8 x i8> %a, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+ return vshr_n_s8(a, 8);
+}
+
+uint8x8_t test_shift_vshr_umax(uint8x8_t a) {
+ // CHECK-LABEL: test_shift_vshr_umax
+ // CHECK: ret <8 x i8> zeroinitializer
+ return vshr_n_u8(a, 8);
+}
+
+uint8x8_t test_shift_vsra(uint8x8_t a, uint8x8_t b) {
+ // CHECK-LABEL: test_shift_vsra
+ // CHECK: %vsra_n = lshr <8 x i8> %b, <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
+ // CHECK: %0 = add <8 x i8> %vsra_n, %a
+ return vsra_n_u8(a, b, 5);
+}
+
+int8x8_t test_shift_vsra_smax(int8x8_t a, int8x8_t b) {
+ // CHECK-LABEL: test_shift_vsra_smax
+ // CHECK: %vsra_n = ashr <8 x i8> %b, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+ // CHECK: %0 = add <8 x i8> %vsra_n, %a
+ return vsra_n_s8(a, b, 8);
+}
+
+uint8x8_t test_shift_vsra_umax(uint8x8_t a, uint8x8_t b) {
+ // CHECK-LABEL: test_shift_vsra_umax
+ // CHECK: ret <8 x i8> %a
+ return vsra_n_u8(a, b, 8);
+}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D1819.2.patch
Type: text/x-patch
Size: 6768 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20131004/28d78695/attachment.bin>
More information about the cfe-commits
mailing list