[clang] 7298ae3 - [clang][CodeGen] Fix in codegen for __builtin_popcountg/ctzg/clzg (#90845)
via cfe-commits
cfe-commits at lists.llvm.org
Thu May 2 13:49:42 PDT 2024
Author: Björn Pettersson
Date: 2024-05-02T22:49:39+02:00
New Revision: 7298ae3b6d97777443aa89e38ab34b15482abfb3
URL: https://github.com/llvm/llvm-project/commit/7298ae3b6d97777443aa89e38ab34b15482abfb3
DIFF: https://github.com/llvm/llvm-project/commit/7298ae3b6d97777443aa89e38ab34b15482abfb3.diff
LOG: [clang][CodeGen] Fix in codegen for __builtin_popcountg/ctzg/clzg (#90845)
Make sure that the result from the popcnt/ctlz/cttz intrinsics is
unsigned casted to int, rather than casted as a signed value, when
expanding the __builtin_popcountg/__builtin_ctzg/__builtin_clzg
builtins.
An example would be
unsigned _BitInt(1) x = ...;
int y = __builtin_popcountg(x);
which previously was incorrectly expanded to
%1 = call i1 @llvm.ctpop.i1(i1 %0)
%cast = sext i1 %1 to i32
Since the input type is generic for those "g" versions of the builtins
the intrinsic call may return a value for which the sign bit is set
(that could typically for BitInt of size 1 and 2). So we need to emit a
zext rather than a sext to avoid negative results.
Added:
clang/test/CodeGen/builtins-bitint.c
Modified:
clang/lib/CodeGen/CGBuiltin.cpp
clang/test/CodeGen/builtins.c
Removed:
################################################################################
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 18bed60f15f47c..9d0e6627a52016 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -3239,8 +3239,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
Builder.getInt1(HasFallback || getTarget().isCLZForZeroUndef());
Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
if (Result->getType() != ResultType)
- Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
- "cast");
+ Result =
+ Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast");
if (!HasFallback)
return RValue::get(Result);
@@ -3271,8 +3271,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
Builder.getInt1(HasFallback || getTarget().isCLZForZeroUndef());
Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
if (Result->getType() != ResultType)
- Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
- "cast");
+ Result =
+ Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast");
if (!HasFallback)
return RValue::get(Result);
@@ -3351,8 +3351,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
llvm::Type *ResultType = ConvertType(E->getType());
Value *Result = Builder.CreateCall(F, ArgValue);
if (Result->getType() != ResultType)
- Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
- "cast");
+ Result =
+ Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast");
return RValue::get(Result);
}
case Builtin::BI__builtin_unpredictable: {
diff --git a/clang/test/CodeGen/builtins-bitint.c b/clang/test/CodeGen/builtins-bitint.c
new file mode 100644
index 00000000000000..804e4971287737
--- /dev/null
+++ b/clang/test/CodeGen/builtins-bitint.c
@@ -0,0 +1,126 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
+// RUN: %clang_cc1 -triple arm-unknown-unknown -O0 -std=c23 -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK-O0
+// RUN: %clang_cc1 -triple arm-unknown-unknown -O1 -std=c23 -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK-O1
+
+// Verify that the result from the intrinsic call is zero extended to avoid that
+// we get a negative result from popcountg/ctzg/clzg.
+
+// CHECK-O0-LABEL: define dso_local arm_aapcscc i32 @test_popcountg_ubi1(
+// CHECK-O0-SAME: ) #[[ATTR0:[0-9]+]] {
+// CHECK-O0-NEXT: entry:
+// CHECK-O0-NEXT: [[A:%.*]] = alloca i1, align 1
+// CHECK-O0-NEXT: store i1 true, ptr [[A]], align 1
+// CHECK-O0-NEXT: [[TMP0:%.*]] = load i1, ptr [[A]], align 1
+// CHECK-O0-NEXT: [[TMP1:%.*]] = call i1 @llvm.ctpop.i1(i1 [[TMP0]])
+// CHECK-O0-NEXT: [[CAST:%.*]] = zext i1 [[TMP1]] to i32
+// CHECK-O0-NEXT: ret i32 [[CAST]]
+//
+// CHECK-O1-LABEL: define dso_local arm_aapcscc noundef i32 @test_popcountg_ubi1(
+// CHECK-O1-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-O1-NEXT: entry:
+// CHECK-O1-NEXT: ret i32 1
+//
+int test_popcountg_ubi1() {
+ unsigned _BitInt(1) a = 1uwb;
+ return __builtin_popcountg(a);
+}
+
+// CHECK-O0-LABEL: define dso_local arm_aapcscc i32 @test_popcountg_ubi2(
+// CHECK-O0-SAME: ) #[[ATTR0]] {
+// CHECK-O0-NEXT: entry:
+// CHECK-O0-NEXT: [[A:%.*]] = alloca i2, align 1
+// CHECK-O0-NEXT: store i2 -1, ptr [[A]], align 1
+// CHECK-O0-NEXT: [[TMP0:%.*]] = load i2, ptr [[A]], align 1
+// CHECK-O0-NEXT: [[TMP1:%.*]] = call i2 @llvm.ctpop.i2(i2 [[TMP0]])
+// CHECK-O0-NEXT: [[CAST:%.*]] = zext i2 [[TMP1]] to i32
+// CHECK-O0-NEXT: ret i32 [[CAST]]
+//
+// CHECK-O1-LABEL: define dso_local arm_aapcscc noundef i32 @test_popcountg_ubi2(
+// CHECK-O1-SAME: ) local_unnamed_addr #[[ATTR0]] {
+// CHECK-O1-NEXT: entry:
+// CHECK-O1-NEXT: ret i32 2
+//
+int test_popcountg_ubi2() {
+ unsigned _BitInt(2) a = 3uwb;
+ return __builtin_popcountg(a);
+}
+
+// CHECK-O0-LABEL: define dso_local arm_aapcscc i32 @test_ctzg_ubi1(
+// CHECK-O0-SAME: ) #[[ATTR0]] {
+// CHECK-O0-NEXT: entry:
+// CHECK-O0-NEXT: [[A:%.*]] = alloca i1, align 1
+// CHECK-O0-NEXT: store i1 false, ptr [[A]], align 1
+// CHECK-O0-NEXT: [[TMP0:%.*]] = load i1, ptr [[A]], align 1
+// CHECK-O0-NEXT: [[TMP1:%.*]] = call i1 @llvm.cttz.i1(i1 [[TMP0]], i1 false)
+// CHECK-O0-NEXT: [[CAST:%.*]] = zext i1 [[TMP1]] to i32
+// CHECK-O0-NEXT: ret i32 [[CAST]]
+//
+// CHECK-O1-LABEL: define dso_local arm_aapcscc noundef i32 @test_ctzg_ubi1(
+// CHECK-O1-SAME: ) local_unnamed_addr #[[ATTR0]] {
+// CHECK-O1-NEXT: entry:
+// CHECK-O1-NEXT: ret i32 1
+//
+int test_ctzg_ubi1() {
+ unsigned _BitInt(1) a = 0uwb;
+ return __builtin_ctzg(a);
+}
+
+// CHECK-O0-LABEL: define dso_local arm_aapcscc i32 @test_ctzg_ubi2(
+// CHECK-O0-SAME: ) #[[ATTR0]] {
+// CHECK-O0-NEXT: entry:
+// CHECK-O0-NEXT: [[A:%.*]] = alloca i2, align 1
+// CHECK-O0-NEXT: store i2 0, ptr [[A]], align 1
+// CHECK-O0-NEXT: [[TMP0:%.*]] = load i2, ptr [[A]], align 1
+// CHECK-O0-NEXT: [[TMP1:%.*]] = call i2 @llvm.cttz.i2(i2 [[TMP0]], i1 false)
+// CHECK-O0-NEXT: [[CAST:%.*]] = zext i2 [[TMP1]] to i32
+// CHECK-O0-NEXT: ret i32 [[CAST]]
+//
+// CHECK-O1-LABEL: define dso_local arm_aapcscc noundef i32 @test_ctzg_ubi2(
+// CHECK-O1-SAME: ) local_unnamed_addr #[[ATTR0]] {
+// CHECK-O1-NEXT: entry:
+// CHECK-O1-NEXT: ret i32 2
+//
+int test_ctzg_ubi2() {
+ unsigned _BitInt(2) a = 0uwb;
+ return __builtin_ctzg(a);
+}
+
+// CHECK-O0-LABEL: define dso_local arm_aapcscc i32 @test_clzg_ubi1(
+// CHECK-O0-SAME: ) #[[ATTR0]] {
+// CHECK-O0-NEXT: entry:
+// CHECK-O0-NEXT: [[A:%.*]] = alloca i1, align 1
+// CHECK-O0-NEXT: store i1 false, ptr [[A]], align 1
+// CHECK-O0-NEXT: [[TMP0:%.*]] = load i1, ptr [[A]], align 1
+// CHECK-O0-NEXT: [[TMP1:%.*]] = call i1 @llvm.ctlz.i1(i1 [[TMP0]], i1 false)
+// CHECK-O0-NEXT: [[CAST:%.*]] = zext i1 [[TMP1]] to i32
+// CHECK-O0-NEXT: ret i32 [[CAST]]
+//
+// CHECK-O1-LABEL: define dso_local arm_aapcscc noundef i32 @test_clzg_ubi1(
+// CHECK-O1-SAME: ) local_unnamed_addr #[[ATTR0]] {
+// CHECK-O1-NEXT: entry:
+// CHECK-O1-NEXT: ret i32 1
+//
+int test_clzg_ubi1() {
+ unsigned _BitInt(1) a = 0uwb;
+ return __builtin_clzg(a);
+}
+
+// CHECK-O0-LABEL: define dso_local arm_aapcscc i32 @test_clzg_ubi2(
+// CHECK-O0-SAME: ) #[[ATTR0]] {
+// CHECK-O0-NEXT: entry:
+// CHECK-O0-NEXT: [[A:%.*]] = alloca i2, align 1
+// CHECK-O0-NEXT: store i2 0, ptr [[A]], align 1
+// CHECK-O0-NEXT: [[TMP0:%.*]] = load i2, ptr [[A]], align 1
+// CHECK-O0-NEXT: [[TMP1:%.*]] = call i2 @llvm.ctlz.i2(i2 [[TMP0]], i1 false)
+// CHECK-O0-NEXT: [[CAST:%.*]] = zext i2 [[TMP1]] to i32
+// CHECK-O0-NEXT: ret i32 [[CAST]]
+//
+// CHECK-O1-LABEL: define dso_local arm_aapcscc noundef i32 @test_clzg_ubi2(
+// CHECK-O1-SAME: ) local_unnamed_addr #[[ATTR0]] {
+// CHECK-O1-NEXT: entry:
+// CHECK-O1-NEXT: ret i32 2
+//
+int test_clzg_ubi2() {
+ unsigned _BitInt(2) a = 0uwb;
+ return __builtin_clzg(a);
+}
diff --git a/clang/test/CodeGen/builtins.c b/clang/test/CodeGen/builtins.c
index 407e0857d22311..b41efb59e61dbc 100644
--- a/clang/test/CodeGen/builtins.c
+++ b/clang/test/CodeGen/builtins.c
@@ -949,12 +949,12 @@ void test_builtin_popcountg(unsigned char uc, unsigned short us,
pop = __builtin_popcountg(uc);
// CHECK: %1 = load i8, ptr %uc.addr, align 1
// CHECK-NEXT: %2 = call i8 @llvm.ctpop.i8(i8 %1)
- // CHECK-NEXT: %cast = sext i8 %2 to i32
+ // CHECK-NEXT: %cast = zext i8 %2 to i32
// CHECK-NEXT: store volatile i32 %cast, ptr %pop, align 4
pop = __builtin_popcountg(us);
// CHECK-NEXT: %3 = load i16, ptr %us.addr, align 2
// CHECK-NEXT: %4 = call i16 @llvm.ctpop.i16(i16 %3)
- // CHECK-NEXT: %cast1 = sext i16 %4 to i32
+ // CHECK-NEXT: %cast1 = zext i16 %4 to i32
// CHECK-NEXT: store volatile i32 %cast1, ptr %pop, align 4
pop = __builtin_popcountg(ui);
// CHECK-NEXT: %5 = load i32, ptr %ui.addr, align 4
@@ -992,12 +992,12 @@ void test_builtin_clzg(unsigned char uc, unsigned short us, unsigned int ui,
lz = __builtin_clzg(uc);
// CHECK: %1 = load i8, ptr %uc.addr, align 1
// CHECK-NEXT: %2 = call i8 @llvm.ctlz.i8(i8 %1, i1 true)
- // CHECK-NEXT: %cast = sext i8 %2 to i32
+ // CHECK-NEXT: %cast = zext i8 %2 to i32
// CHECK-NEXT: store volatile i32 %cast, ptr %lz, align 4
lz = __builtin_clzg(us);
// CHECK-NEXT: %3 = load i16, ptr %us.addr, align 2
// CHECK-NEXT: %4 = call i16 @llvm.ctlz.i16(i16 %3, i1 true)
- // CHECK-NEXT: %cast1 = sext i16 %4 to i32
+ // CHECK-NEXT: %cast1 = zext i16 %4 to i32
// CHECK-NEXT: store volatile i32 %cast1, ptr %lz, align 4
lz = __builtin_clzg(ui);
// CHECK-NEXT: %5 = load i32, ptr %ui.addr, align 4
@@ -1026,7 +1026,7 @@ void test_builtin_clzg(unsigned char uc, unsigned short us, unsigned int ui,
lz = __builtin_clzg(uc, sc);
// CHECK-NEXT: %15 = load i8, ptr %uc.addr, align 1
// CHECK-NEXT: %16 = call i8 @llvm.ctlz.i8(i8 %15, i1 true)
- // CHECK-NEXT: %cast6 = sext i8 %16 to i32
+ // CHECK-NEXT: %cast6 = zext i8 %16 to i32
// CHECK-NEXT: %iszero = icmp eq i8 %15, 0
// CHECK-NEXT: %17 = load i8, ptr %sc.addr, align 1
// CHECK-NEXT: %conv = sext i8 %17 to i32
@@ -1035,7 +1035,7 @@ void test_builtin_clzg(unsigned char uc, unsigned short us, unsigned int ui,
lz = __builtin_clzg(us, uc);
// CHECK-NEXT: %18 = load i16, ptr %us.addr, align 2
// CHECK-NEXT: %19 = call i16 @llvm.ctlz.i16(i16 %18, i1 true)
- // CHECK-NEXT: %cast7 = sext i16 %19 to i32
+ // CHECK-NEXT: %cast7 = zext i16 %19 to i32
// CHECK-NEXT: %iszero8 = icmp eq i16 %18, 0
// CHECK-NEXT: %20 = load i8, ptr %uc.addr, align 1
// CHECK-NEXT: %conv9 = zext i8 %20 to i32
@@ -1094,12 +1094,12 @@ void test_builtin_ctzg(unsigned char uc, unsigned short us, unsigned int ui,
tz = __builtin_ctzg(uc);
// CHECK: %1 = load i8, ptr %uc.addr, align 1
// CHECK-NEXT: %2 = call i8 @llvm.cttz.i8(i8 %1, i1 true)
- // CHECK-NEXT: %cast = sext i8 %2 to i32
+ // CHECK-NEXT: %cast = zext i8 %2 to i32
// CHECK-NEXT: store volatile i32 %cast, ptr %tz, align 4
tz = __builtin_ctzg(us);
// CHECK-NEXT: %3 = load i16, ptr %us.addr, align 2
// CHECK-NEXT: %4 = call i16 @llvm.cttz.i16(i16 %3, i1 true)
- // CHECK-NEXT: %cast1 = sext i16 %4 to i32
+ // CHECK-NEXT: %cast1 = zext i16 %4 to i32
// CHECK-NEXT: store volatile i32 %cast1, ptr %tz, align 4
tz = __builtin_ctzg(ui);
// CHECK-NEXT: %5 = load i32, ptr %ui.addr, align 4
@@ -1128,7 +1128,7 @@ void test_builtin_ctzg(unsigned char uc, unsigned short us, unsigned int ui,
tz = __builtin_ctzg(uc, sc);
// CHECK-NEXT: %15 = load i8, ptr %uc.addr, align 1
// CHECK-NEXT: %16 = call i8 @llvm.cttz.i8(i8 %15, i1 true)
- // CHECK-NEXT: %cast6 = sext i8 %16 to i32
+ // CHECK-NEXT: %cast6 = zext i8 %16 to i32
// CHECK-NEXT: %iszero = icmp eq i8 %15, 0
// CHECK-NEXT: %17 = load i8, ptr %sc.addr, align 1
// CHECK-NEXT: %conv = sext i8 %17 to i32
@@ -1137,7 +1137,7 @@ void test_builtin_ctzg(unsigned char uc, unsigned short us, unsigned int ui,
tz = __builtin_ctzg(us, uc);
// CHECK-NEXT: %18 = load i16, ptr %us.addr, align 2
// CHECK-NEXT: %19 = call i16 @llvm.cttz.i16(i16 %18, i1 true)
- // CHECK-NEXT: %cast7 = sext i16 %19 to i32
+ // CHECK-NEXT: %cast7 = zext i16 %19 to i32
// CHECK-NEXT: %iszero8 = icmp eq i16 %18, 0
// CHECK-NEXT: %20 = load i8, ptr %uc.addr, align 1
// CHECK-NEXT: %conv9 = zext i8 %20 to i32
More information about the cfe-commits
mailing list