[clang] 6b136b1 - [Power10] Implement custom codegen for the vec_replace_elt and vec_replace_unaligned builtins.

Wed Sep 23 20:55:41 PDT 2020

Author: Amy Kwan
Date: 2020-09-23T22:55:25-05:00
New Revision: 6b136b19cbe4e96adea63b75f1f2f76ec25c708e

URL: https://github.com/llvm/llvm-project/commit/6b136b19cbe4e96adea63b75f1f2f76ec25c708e
DIFF: https://github.com/llvm/llvm-project/commit/6b136b19cbe4e96adea63b75f1f2f76ec25c708e.diff

LOG: [Power10]  Implement custom codegen for the vec_replace_elt and vec_replace_unaligned builtins.

This patch implements custom codegen for the vec_replace_elt and
vec_replace_unaligned builtins.

These builtins map to the @llvm.ppc.altivec.vinsw and @llvm.ppc.altivec.vinsd
intrinsics depending on the arguments. The main motivation for doing custom
codegen for these intrinsics is because there are float and double versions of
the builtin. Normally, the converting the float to an integer would be done via
fptoui in the IR. This is incorrect as fptoui truncates the value and we must
ensure the value is not truncated. Therefore, we provide custom codegen to utilize
bitcast instead as bitcasts do not truncate.

Differential Revision: https://reviews.llvm.org/D83500

Added: 
    clang/test/CodeGen/builtins-ppc-vec-ins-error.c

Modified: 
    clang/include/clang/Basic/BuiltinsPPC.def
    clang/lib/CodeGen/CGBuiltin.cpp
    clang/lib/Headers/altivec.h
    clang/lib/Sema/SemaChecking.cpp
    clang/test/CodeGen/builtins-ppc-p10vector.c

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def
index 91c3f040cb48..29bce799c8f4 100644

--- a/clang/include/clang/Basic/BuiltinsPPC.def
+++ b/clang/include/clang/Basic/BuiltinsPPC.def
@@ -409,6 +409,8 @@ BUILTIN(__builtin_altivec_vinshvlx, "V8UsV8UsUiV8Us", "")
 BUILTIN(__builtin_altivec_vinshvrx, "V8UsV8UsUiV8Us", "")
 BUILTIN(__builtin_altivec_vinswvlx, "V4UiV4UiUiV4Ui", "")
 BUILTIN(__builtin_altivec_vinswvrx, "V4UiV4UiUiV4Ui", "")
+BUILTIN(__builtin_altivec_vec_replace_elt, "V4UiV4UiUiIi", "t")
+BUILTIN(__builtin_altivec_vec_replace_unaligned, "V4UiV4UiUiIi", "t")
 
 // P10 Vector Extract built-ins.
 BUILTIN(__builtin_altivec_vextdubvlx, "V2ULLiV16UcV16UcUi", "")

diff  --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 9bdb21e0c05b..498d8640f329 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -14224,6 +14224,63 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
     Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
     return Builder.CreateCall(F, {X, Undef});
   }
+  case PPC::BI__builtin_altivec_vec_replace_elt:
+  case PPC::BI__builtin_altivec_vec_replace_unaligned: {
+    // The third argument of vec_replace_elt and vec_replace_unaligned must
+    // be a compile time constant and will be emitted either to the vinsw
+    // or vinsd instruction.
+    ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
+    assert(ArgCI &&
+           "Third Arg to vinsw/vinsd intrinsic must be a constant integer!");
+    llvm::Type *ResultType = ConvertType(E->getType());
+    llvm::Function *F = nullptr;
+    Value *Call = nullptr;
+    int64_t ConstArg = ArgCI->getSExtValue();
+    unsigned ArgWidth = Ops[1]->getType()->getPrimitiveSizeInBits();
+    bool Is32Bit = false;
+    assert((ArgWidth == 32 || ArgWidth == 64) && "Invalid argument width");
+    // The input to vec_replace_elt is an element index, not a byte index.
+    if (BuiltinID == PPC::BI__builtin_altivec_vec_replace_elt)
+      ConstArg *= ArgWidth / 8;
+    if (ArgWidth == 32) {
+      Is32Bit = true;
+      // When the second argument is 32 bits, it can either be an integer or
+      // a float. The vinsw intrinsic is used in this case.
+      F = CGM.getIntrinsic(Intrinsic::ppc_altivec_vinsw);
+      // Fix the constant according to endianess.
+      if (getTarget().isLittleEndian())
+        ConstArg = 12 - ConstArg;
+    } else {
+      // When the second argument is 64 bits, it can either be a long long or
+      // a double. The vinsd intrinsic is used in this case.
+      F = CGM.getIntrinsic(Intrinsic::ppc_altivec_vinsd);
+      // Fix the constant for little endian.
+      if (getTarget().isLittleEndian())
+        ConstArg = 8 - ConstArg;
+    }
+    Ops[2] = ConstantInt::getSigned(Int32Ty, ConstArg);
+    // Depending on ArgWidth, the input vector could be a float or a double.
+    // If the input vector is a float type, bitcast the inputs to integers. Or,
+    // if the input vector is a double, bitcast the inputs to 64-bit integers.
+    if (!Ops[1]->getType()->isIntegerTy(ArgWidth)) {
+      Ops[0] = Builder.CreateBitCast(
+          Ops[0], Is32Bit ? llvm::FixedVectorType::get(Int32Ty, 4)
+                          : llvm::FixedVectorType::get(Int64Ty, 2));
+      Ops[1] = Builder.CreateBitCast(Ops[1], Is32Bit ? Int32Ty : Int64Ty);
+    }
+    // Emit the call to vinsw or vinsd.
+    Call = Builder.CreateCall(F, Ops);
+    // Depending on the builtin, bitcast to the approriate result type.
+    if (BuiltinID == PPC::BI__builtin_altivec_vec_replace_elt &&
+        !Ops[1]->getType()->isIntegerTy())
+      return Builder.CreateBitCast(Call, ResultType);
+    else if (BuiltinID == PPC::BI__builtin_altivec_vec_replace_elt &&
+             Ops[1]->getType()->isIntegerTy())
+      return Call;
+    else
+      return Builder.CreateBitCast(Call,
+                                   llvm::FixedVectorType::get(Int8Ty, 16));
+  }
   case PPC::BI__builtin_altivec_vpopcntb:
   case PPC::BI__builtin_altivec_vpopcnth:
   case PPC::BI__builtin_altivec_vpopcntw:

diff  --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h
index 03e82c8ae7e0..572b8863dd1a 100644
--- a/clang/lib/Headers/altivec.h
+++ b/clang/lib/Headers/altivec.h
@@ -17837,6 +17837,14 @@ vec_blendv(vector double __a, vector double __b,
   return __builtin_vsx_xxblendvd(__a, __b, __c);
 }
 
+/* vec_replace_elt */
+
+#define vec_replace_elt __builtin_altivec_vec_replace_elt
+
+/* vec_replace_unaligned */
+
+#define vec_replace_unaligned __builtin_altivec_vec_replace_unaligned
+
 /* vec_splati */
 
 #define vec_splati(__a)                                                        \

diff  --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index f2b70be1d431..eeb322262400 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -2570,6 +2570,17 @@ static bool isValidBPFPreserveFieldInfoArg(Expr *Arg) {
           dyn_cast<ArraySubscriptExpr>(Arg->IgnoreParens()));
 }
 
+static bool isEltOfVectorTy(ASTContext &Context, CallExpr *Call, Sema &S,
+                            QualType VectorTy, QualType EltTy) {
+  QualType VectorEltTy = VectorTy->castAs<VectorType>()->getElementType();
+  if (!Context.hasSameType(VectorEltTy, EltTy)) {
+    S.Diag(Call->getBeginLoc(), diag::err_typecheck_call_
diff erent_arg_types)
+        << Call->getSourceRange() << VectorEltTy << EltTy;
+    return false;
+  }
+  return true;
+}
+
 static bool isValidBPFPreserveTypeInfoArg(Expr *Arg) {
   QualType ArgType = Arg->getType();
   if (ArgType->getAsPlaceholderType())
@@ -3222,6 +3233,14 @@ bool Sema::CheckPPCBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID,
     return SemaVSXCheck(TheCall);
   case PPC::BI__builtin_altivec_vgnb:
      return SemaBuiltinConstantArgRange(TheCall, 1, 2, 7);
+  case PPC::BI__builtin_altivec_vec_replace_elt:
+  case PPC::BI__builtin_altivec_vec_replace_unaligned: {
+    QualType VecTy = TheCall->getArg(0)->getType();
+    QualType EltTy = TheCall->getArg(1)->getType();
+    unsigned Width = Context.getIntWidth(EltTy);
+    return SemaBuiltinConstantArgRange(TheCall, 2, 0, Width == 32 ? 12 : 8) ||
+           !isEltOfVectorTy(Context, TheCall, *this, VecTy, EltTy);
+  }
   case PPC::BI__builtin_vsx_xxeval:
      return SemaBuiltinConstantArgRange(TheCall, 3, 0, 255);
   case PPC::BI__builtin_altivec_vsldbi:

diff  --git a/clang/test/CodeGen/builtins-ppc-p10vector.c b/clang/test/CodeGen/builtins-ppc-p10vector.c
index 3fc931357298..21ddf06b61f2 100644
--- a/clang/test/CodeGen/builtins-ppc-p10vector.c
+++ b/clang/test/CodeGen/builtins-ppc-p10vector.c
@@ -21,6 +21,9 @@ vector signed __int128 vsi128a, vsi128b;
 vector unsigned __int128 vui128a, vui128b, vui128c;
 vector float vfa, vfb;
 vector double vda, vdb;
+float fa;
+double da;
+signed int sia;
 signed int *iap;
 unsigned int uia, uib, *uiap;
 signed char *cap;
@@ -1011,6 +1014,126 @@ vector double test_vec_blend_d(void) {
   return vec_blendv(vda, vdb, vullc);
 }
 
+vector signed int test_vec_replace_elt_si(void) {
+  // CHECK-BE: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 0
+  // CHECK-BE-NEXT: ret <4 x i32>
+  // CHECK-LE: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 12
+  // CHECK-LE-NEXT: ret <4 x i32>
+  return vec_replace_elt(vsia, sia, 0);
+}
+
+vector unsigned int test_vec_replace_elt_ui(void) {
+  // CHECK-BE: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 4
+  // CHECK-BE-NEXT: ret <4 x i32>
+  // CHECK-LE: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 8
+  // CHECK-LE-NEXT: ret <4 x i32>
+  return vec_replace_elt(vuia, uia, 1);
+}
+
+vector float test_vec_replace_elt_f(void) {
+  // CHECK-BE: bitcast float %{{.+}} to i32
+  // CHECK-BE-NEXT: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 8
+  // CHECK-BE-NEXT: bitcast <4 x i32> %{{.*}} to <4 x float>
+  // CHECK-BE-NEXT: ret <4 x float>
+  // CHECK-LE: bitcast float %{{.+}} to i32
+  // CHECK-LE-NEXT: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 4
+  // CHECK-LE-NEXT: bitcast <4 x i32> %{{.*}} to <4 x float>
+  // CHECK-LE-NEXT: ret <4 x float>
+  return vec_replace_elt(vfa, fa, 2);
+}
+
+vector signed long long test_vec_replace_elt_sll(void) {
+  // CHECK-BE: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 0
+  // CHECK-BE-NEXT: ret <2 x i64>
+  // CHECK-LE: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 8
+  // CHECK-LE-NEXT: ret <2 x i64>
+  return vec_replace_elt(vslla, llb, 0);
+}
+
+vector unsigned long long test_vec_replace_elt_ull(void) {
+  // CHECK-BE: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 0
+  // CHECK-BE-NEXT: ret <2 x i64>
+  // CHECK-LE: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 8
+  // CHECK-LE-NEXT: ret <2 x i64>
+  return vec_replace_elt(vulla, ulla, 0);
+}
+
+vector double test_vec_replace_elt_d(void) {
+  // CHECK-BE: bitcast double %{{.+}} to i64
+  // CHECK-BE-NEXT: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 8
+  // CHECK-BE-NEXT: bitcast <2 x i64> %{{.*}} to <2 x double>
+  // CHECK-BE-NEXT: ret <2 x double>
+  // CHECK-LE: bitcast double %{{.+}} to i64
+  // CHECK-LE-NEXT: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 0
+  // CHECK-LE-NEXT: bitcast <2 x i64> %{{.*}} to <2 x double>
+  // CHECK-LE-NEXT: ret <2 x double>
+  return vec_replace_elt(vda, da, 1);
+}
+
+vector unsigned char test_vec_replace_unaligned_si(void) {
+  // CHECK-BE: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 6
+  // CHECK-BE-NEXT: bitcast <4 x i32> %{{.*}} to <16 x i8>
+  // CHECK-BE-NEXT: ret <16 x i8>
+  // CHECK-LE: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 6
+  // CHECK-LE-NEXT: bitcast <4 x i32> %{{.*}} to <16 x i8>
+  // CHECK-LE-NEXT: ret <16 x i8>
+  return vec_replace_unaligned(vsia, sia, 6);
+}
+
+vector unsigned char test_vec_replace_unaligned_ui(void) {
+  // CHECK-BE: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 8
+  // CHECK-BE-NEXT: bitcast <4 x i32> %{{.*}} to <16 x i8>
+  // CHECK-BE-NEXT: ret <16 x i8>
+  // CHECK-LE: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 4
+  // CHECK-LE-NEXT: bitcast <4 x i32> %{{.*}} to <16 x i8>
+  // CHECK-LE-NEXT: ret <16 x i8>
+  return vec_replace_unaligned(vuia, uia, 8);
+}
+
+vector unsigned char test_vec_replace_unaligned_f(void) {
+  // CHECK-BE: bitcast float %{{.+}} to i32
+  // CHECK-BE-NEXT: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 12
+  // CHECK-BE-NEXT: bitcast <4 x i32> %{{.*}} to <16 x i8>
+  // CHECK-BE-NEXT: ret <16 x i8>
+  // CHECK-LE: bitcast float %{{.+}} to i32
+  // CHECK-LE-NEXT: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 0
+  // CHECK-LE-NEXT: bitcast <4 x i32> %{{.*}} to <16 x i8>
+  // CHECK-LE-NEXT: ret <16 x i8>
+  return vec_replace_unaligned(vfa, fa, 12);
+}
+
+vector unsigned char test_vec_replace_unaligned_sll(void) {
+  // CHECK-BE: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 6
+  // CHECK-BE-NEXT: bitcast <2 x i64> %{{.*}} to <16 x i8>
+  // CHECK-BE-NEXT: ret <16 x i8>
+  // CHECK-LE: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 2
+  // CHECK-LE-NEXT: bitcast <2 x i64> %{{.*}} to <16 x i8>
+  // CHECK-LE-NEXT: ret <16 x i8>
+  return vec_replace_unaligned(vslla, llb, 6);
+}
+
+vector unsigned char test_vec_replace_unaligned_ull(void) {
+  // CHECK-BE: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 7
+  // CHECK-BE-NEXT: bitcast <2 x i64> %{{.*}} to <16 x i8>
+  // CHECK-BE-NEXT: ret <16 x i8>
+  // CHECK-LE: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 1
+  // CHECK-LE-NEXT: bitcast <2 x i64> %{{.*}} to <16 x i8>
+  // CHECK-LE-NEXT: ret <16 x i8>
+  return vec_replace_unaligned(vulla, ulla, 7);
+}
+
+vector unsigned char test_vec_replace_unaligned_d(void) {
+  // CHECK-BE: bitcast double %{{.+}} to i64
+  // CHECK-BE-NEXT: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 8
+  // CHECK-BE-NEXT: bitcast <2 x i64> %{{.*}} to <16 x i8>
+  // CHECK-BE-NEXT: ret <16 x i8>
+  // CHECK-LE: bitcast double %{{.+}} to i64
+  // CHECK-LE-NEXT: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 0
+  // CHECK-LE-NEXT: bitcast <2 x i64> %{{.*}} to <16 x i8>
+  // CHECK-LE-NEXT: ret <16 x i8>
+  return vec_replace_unaligned(vda, da, 8);
+}
+
 vector unsigned char test_vec_insertl_uc(void) {
   // CHECK-BE: @llvm.ppc.altivec.vinsblx(<16 x i8> %{{.+}}, i32 %{{.+}}, i32
   // CHECK-BE-NEXT: ret <16 x i8>

diff  --git a/clang/test/CodeGen/builtins-ppc-vec-ins-error.c b/clang/test/CodeGen/builtins-ppc-vec-ins-error.c
new file mode 100644
index 000000000000..3ef5a47a96c5
--- /dev/null
+++ b/clang/test/CodeGen/builtins-ppc-vec-ins-error.c
@@ -0,0 +1,81 @@
+// REQUIRES: powerpc-registered-target
+
+// RUN: %clang_cc1 -target-feature +vsx -target-cpu pwr10 \
+// RUN:   -triple powerpc64le-unknown-unknown -fsyntax-only %s -verify
+// RUN: %clang_cc1 -target-feature +vsx -target-cpu pwr10 \
+// RUN:   -triple powerpc64-unknown-unknown -fsyntax-only %s -verify
+
+#include <altivec.h>
+
+vector signed int vsia;
+vector unsigned int vuia;
+vector signed long long vslla;
+vector unsigned long long vulla;
+vector float vfa;
+vector double vda;
+signed int sia;
+unsigned int uia;
+signed long long slla;
+unsigned long long ulla;
+float fa;
+double da;
+
+vector signed int test_vec_replace_elt_si(void) {
+  return vec_replace_elt(vsia, sia, 13); // expected-error {{argument value 13 is outside the valid range [0, 12]}}
+}
+
+vector unsigned int test_vec_replace_elt_ui(void) {
+  return vec_replace_elt(vuia, sia, 1); // expected-error {{arguments are of 
diff erent types ('unsigned int' vs 'int')}}
+}
+
+vector float test_vec_replace_elt_f(void) {
+  return vec_replace_elt(vfa, fa, 20); // expected-error {{argument value 20 is outside the valid range [0, 12]}}
+}
+
+vector float test_vec_replace_elt_f_2(void) {
+  return vec_replace_elt(vfa, da, 0); // expected-error {{arguments are of 
diff erent types ('float' vs 'double')}}
+}
+
+vector signed long long test_vec_replace_elt_sll(void) {
+  return vec_replace_elt(vslla, slla, 9); // expected-error {{argument value 9 is outside the valid range [0, 8]}}
+}
+
+vector unsigned long long test_vec_replace_elt_ull(void) {
+  return vec_replace_elt(vulla, vda, 0); // expected-error {{arguments are of 
diff erent types ('unsigned long long' vs '__vector double' (vector of 2 'double' values))}}
+}
+
+vector unsigned long long test_vec_replace_elt_ull_2(void) {
+  return vec_replace_elt(vulla, vulla, vsia); // expected-error {{argument to '__builtin_altivec_vec_replace_elt' must be a constant integer}}
+}
+
+vector double test_vec_replace_elt_d(void) {
+  return vec_replace_elt(vda, da, 33); // expected-error {{argument value 33 is outside the valid range [0, 8]}}
+}
+
+vector unsigned char test_vec_replace_unaligned_si(void) {
+  return vec_replace_unaligned(vsia, da, 6); // expected-error {{arguments are of 
diff erent types ('int' vs 'double')}}
+}
+
+vector unsigned char test_vec_replace_unaligned_ui(void) {
+  return vec_replace_unaligned(vuia, uia, 14); // expected-error {{argument value 14 is outside the valid range [0, 12]}}
+}
+
+vector unsigned char test_vec_replace_unaligned_f(void) {
+  return vec_replace_unaligned(vfa, fa, 19); // expected-error {{argument value 19 is outside the valid range [0, 12]}}
+}
+
+vector unsigned char test_vec_replace_unaligned_sll(void) {
+  return vec_replace_unaligned(vslla, fa, 0); // expected-error {{arguments are of 
diff erent types ('long long' vs 'float')}}
+}
+
+vector unsigned char test_vec_replace_unaligned_ull(void) {
+  return vec_replace_unaligned(vulla, ulla, 12); // expected-error {{argument value 12 is outside the valid range [0, 8]}}
+}
+
+vector unsigned char test_vec_replace_unaligned_d(void) {
+  return vec_replace_unaligned(vda, fa, 8); // expected-error {{arguments are of 
diff erent types ('double' vs 'float')}}
+}
+
+vector unsigned char test_vec_replace_unaligned_d_2(void) {
+  return vec_replace_unaligned(vda, vda, da); // expected-error {{argument to '__builtin_altivec_vec_replace_unaligned' must be a constant integer}}
+}