r222339 - Allow EmitVAArg() to promote types and use this to fix some N32/N64 vararg issues for Mips.
Daniel Sanders
daniel.sanders at imgtec.com
Wed Nov 19 02:01:36 PST 2014
Author: dsanders
Date: Wed Nov 19 04:01:35 2014
New Revision: 222339
URL: http://llvm.org/viewvc/llvm-project?rev=222339&view=rev
Log:
Allow EmitVAArg() to promote types and use this to fix some N32/N64 vararg issues for Mips.
Summary:
With this patch, passing a va_list to another function and reading 10 int's from
it works correctly on a big-endian target.
Based on a pair of patches by David Chisnall, one of which I've reworked
for the current trunk.
Reviewers: theraven, atanasyan
Reviewed By: theraven, atanasyan
Subscribers: cfe-commits
Differential Revision: http://reviews.llvm.org/D6248
Modified:
cfe/trunk/lib/CodeGen/CGExprScalar.cpp
cfe/trunk/lib/CodeGen/TargetInfo.cpp
cfe/trunk/test/CodeGen/mips-varargs.c
Modified: cfe/trunk/lib/CodeGen/CGExprScalar.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGExprScalar.cpp?rev=222339&r1=222338&r2=222339&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGExprScalar.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGExprScalar.cpp Wed Nov 19 04:01:35 2014
@@ -3296,18 +3296,26 @@ Value *ScalarExprEmitter::VisitChooseExp
Value *ScalarExprEmitter::VisitVAArgExpr(VAArgExpr *VE) {
QualType Ty = VE->getType();
+
if (Ty->isVariablyModifiedType())
CGF.EmitVariablyModifiedType(Ty);
llvm::Value *ArgValue = CGF.EmitVAListRef(VE->getSubExpr());
llvm::Value *ArgPtr = CGF.EmitVAArg(ArgValue, VE->getType());
+ llvm::Type *ArgTy = ConvertType(VE->getType());
// If EmitVAArg fails, we fall back to the LLVM instruction.
if (!ArgPtr)
- return Builder.CreateVAArg(ArgValue, ConvertType(VE->getType()));
+ return Builder.CreateVAArg(ArgValue, ArgTy);
// FIXME Volatility.
- return Builder.CreateLoad(ArgPtr);
+ llvm::Value *Val = Builder.CreateLoad(ArgPtr);
+
+ // If EmitVAArg promoted the type, we must truncate it.
+ if (ArgTy != Val->getType())
+ Val = Builder.CreateTrunc(Val, ArgTy);
+
+ return Val;
}
Value *ScalarExprEmitter::VisitBlockExpr(const BlockExpr *block) {
Modified: cfe/trunk/lib/CodeGen/TargetInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/TargetInfo.cpp?rev=222339&r1=222338&r2=222339&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/TargetInfo.cpp (original)
+++ cfe/trunk/lib/CodeGen/TargetInfo.cpp Wed Nov 19 04:01:35 2014
@@ -5919,6 +5919,14 @@ llvm::Value* MipsABIInfo::EmitVAArg(llvm
CodeGenFunction &CGF) const {
llvm::Type *BP = CGF.Int8PtrTy;
llvm::Type *BPP = CGF.Int8PtrPtrTy;
+
+ // Integer arguments are promoted 32-bit on O32 and 64-bit on N32/N64.
+ unsigned SlotSizeInBits = IsO32 ? 32 : 64;
+ if (Ty->isIntegerType() &&
+ CGF.getContext().getIntWidth(Ty) < SlotSizeInBits) {
+ Ty = CGF.getContext().getIntTypeForBitwidth(SlotSizeInBits,
+ Ty->isSignedIntegerType());
+ }
CGBuilderTy &Builder = CGF.Builder;
llvm::Value *VAListAddrAsBPP = Builder.CreateBitCast(VAListAddr, BPP, "ap");
@@ -5943,8 +5951,8 @@ llvm::Value* MipsABIInfo::EmitVAArg(llvm
llvm::Value *AlignedAddr = Builder.CreateBitCast(AddrTyped, BP);
TypeAlign = std::max((unsigned)TypeAlign, MinABIStackAlignInBytes);
- uint64_t Offset =
- llvm::RoundUpToAlignment(CGF.getContext().getTypeSize(Ty) / 8, TypeAlign);
+ unsigned ArgSizeInBits = CGF.getContext().getTypeSize(Ty);
+ uint64_t Offset = llvm::RoundUpToAlignment(ArgSizeInBits / 8, TypeAlign);
llvm::Value *NextAddr =
Builder.CreateGEP(AlignedAddr, llvm::ConstantInt::get(IntTy, Offset),
"ap.next");
Modified: cfe/trunk/test/CodeGen/mips-varargs.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/mips-varargs.c?rev=222339&r1=222338&r2=222339&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/mips-varargs.c (original)
+++ cfe/trunk/test/CodeGen/mips-varargs.c Wed Nov 19 04:01:35 2014
@@ -1,22 +1,25 @@
// RUN: %clang_cc1 -triple mips-unknown-linux -o - -O1 -emit-llvm %s | FileCheck %s -check-prefix=ALL -check-prefix=O32
-// RUN: %clang_cc1 -triple mips64-unknown-linux -o - -O1 -emit-llvm -target-abi n32 %s | FileCheck %s -check-prefix=ALL -check-prefix=N32
-// RUN: %clang_cc1 -triple mips64-unknown-linux -o - -O1 -emit-llvm %s | FileCheck %s -check-prefix=ALL -check-prefix=N64
+// RUN: %clang_cc1 -triple mipsel-unknown-linux -o - -O1 -emit-llvm %s | FileCheck %s -check-prefix=ALL -check-prefix=O32
+// RUN: %clang_cc1 -triple mips64-unknown-linux -o - -O1 -emit-llvm -target-abi n32 %s | FileCheck %s -check-prefix=ALL -check-prefix=N32 -check-prefix=NEW
+// RUN: %clang_cc1 -triple mips64-unknown-linux -o - -O1 -emit-llvm -target-abi n32 %s | FileCheck %s -check-prefix=ALL -check-prefix=N32 -check-prefix=NEW
+// RUN: %clang_cc1 -triple mips64-unknown-linux -o - -O1 -emit-llvm %s | FileCheck %s -check-prefix=ALL -check-prefix=N64 -check-prefix=NEW
+// RUN: %clang_cc1 -triple mips64el-unknown-linux -o - -O1 -emit-llvm %s | FileCheck %s -check-prefix=ALL -check-prefix=N64 -check-prefix=NEW
#include <stdarg.h>
typedef int v4i32 __attribute__ ((__vector_size__ (16)));
-int test_v4i32(char *fmt, ...) {
+int test_i32(char *fmt, ...) {
va_list va;
va_start(va, fmt);
- v4i32 v = va_arg(va, v4i32);
+ int v = va_arg(va, int);
va_end(va);
- return v[0];
+ return v;
}
-// ALL: define i32 @test_v4i32(i8*{{.*}} %fmt, ...)
+// ALL-LABEL: define i32 @test_i32(i8*{{.*}} %fmt, ...)
//
// O32: %va = alloca i8*, align [[PTRALIGN:4]]
// N32: %va = alloca i8*, align [[PTRALIGN:4]]
@@ -24,26 +27,155 @@ int test_v4i32(char *fmt, ...) {
//
// ALL: [[VA1:%.+]] = bitcast i8** %va to i8*
// ALL: call void @llvm.va_start(i8* [[VA1]])
+//
+// O32: [[TMP0:%.+]] = bitcast i8** %va to i32**
+// O32: [[AP_CUR:%.+]] = load i32** [[TMP0]], align [[PTRALIGN]]
+// NEW: [[TMP0:%.+]] = bitcast i8** %va to i64**
+// NEW: [[AP_CUR:%.+]] = load i64** [[TMP0]], align [[PTRALIGN]]
+//
+// O32: [[TMP1:%.+]] = getelementptr i32* [[AP_CUR]], i32 1
+// O32: [[AP_NEXT:%.+]] = bitcast i32* [[TMP1]] to i8*
+// N32: [[TMP1:%.+]] = getelementptr i64* [[AP_CUR]], {{i32|i64}} 1
+// N64: [[TMP1:%.+]] = getelementptr i64* [[AP_CUR]], {{i32|i64}} 1
+// NEW: [[AP_NEXT:%.+]] = bitcast i64* [[TMP1]] to i8*
+//
+// ALL: store i8* [[AP_NEXT]], i8** %va, align [[PTRALIGN]]
+//
+// O32: [[ARG1:%.+]] = load i32* [[AP_CUR]], align 4
+// NEW: [[TMP2:%.+]] = load i64* [[AP_CUR]], align 8
+// NEW: [[ARG1:%.+]] = trunc i64 [[TMP2]] to i32
+//
+// ALL: call void @llvm.va_end(i8* [[VA1]])
+// ALL: ret i32 [[ARG1]]
+// ALL: }
+
+int test_i32_2args(char *fmt, ...) {
+ va_list va;
+
+ va_start(va, fmt);
+ int v1 = va_arg(va, int);
+ int v2 = va_arg(va, int);
+ va_end(va);
+
+ return v1 + v2;
+}
+
+// ALL-LABEL: define i32 @test_i32_2args(i8*{{.*}} %fmt, ...)
+//
+// ALL: %va = alloca i8*, align [[PTRALIGN]]
+// ALL: [[VA1:%.+]] = bitcast i8** %va to i8*
+// ALL: call void @llvm.va_start(i8* [[VA1]])
+//
+// O32: [[TMP0:%.+]] = bitcast i8** %va to i32**
+// O32: [[AP_CUR:%.+]] = load i32** [[TMP0]], align [[PTRALIGN]]
+// NEW: [[TMP0:%.+]] = bitcast i8** %va to i64**
+// NEW: [[AP_CUR:%.+]] = load i64** [[TMP0]], align [[PTRALIGN]]
+//
+// O32: [[TMP1:%.+]] = getelementptr i32* [[AP_CUR]], i32 1
+// O32: [[AP_NEXT:%.+]] = bitcast i32* [[TMP1]] to i8*
+// N32: [[TMP1:%.+]] = getelementptr i64* [[AP_CUR]], [[INTPTR_T:i32]] 1
+// FIXME: N32 optimised this bitcast out. Why only for this ABI?
+// N64: [[TMP1:%.+]] = getelementptr i64* [[AP_CUR]], [[INTPTR_T:i64]] 1
+// N64: [[AP_NEXT:%.+]] = bitcast i64* [[TMP1]] to i8*
+//
+// O32: store i8* [[AP_NEXT]], i8** %va, align [[PTRALIGN]]
+// FIXME: N32 optimised this store out. Why only for this ABI?
+// N64: store i8* [[AP_NEXT]], i8** %va, align [[PTRALIGN]]
+//
+// O32: [[ARG1:%.+]] = load i32* [[AP_CUR]], align 4
+// NEW: [[TMP3:%.+]] = load i64* [[AP_CUR]], align 8
+// NEW: [[ARG1:%.+]] = trunc i64 [[TMP3]] to i32
+//
+// O32: [[TMP2:%.+]] = getelementptr i32* [[AP_CUR]], i32 2
+// O32: [[AP_NEXT:%.+]] = bitcast i32* [[TMP2]] to i8*
+// NEW: [[TMP2:%.+]] = getelementptr i64* [[AP_CUR]], [[INTPTR_T]] 2
+// NEW: [[AP_NEXT:%.+]] = bitcast i64* [[TMP2]] to i8*
+//
+// ALL: store i8* [[AP_NEXT]], i8** %va, align [[PTRALIGN]]
+//
+// O32: [[ARG2:%.+]] = load i32* [[TMP1]], align 4
+// NEW: [[TMP4:%.+]] = load i64* [[TMP1]], align 8
+// NEW: [[ARG2:%.+]] = trunc i64 [[TMP4]] to i32
+//
+// ALL: call void @llvm.va_end(i8* [[VA1]])
+// ALL: [[ADD:%.+]] = add nsw i32 [[ARG1]], [[ARG2]]
+// ALL: ret i32 [[ADD]]
+// ALL: }
+
+long long test_i64(char *fmt, ...) {
+ va_list va;
+
+ va_start(va, fmt);
+ long long v = va_arg(va, long long);
+ va_end(va);
+
+ return v;
+}
+
+// ALL-LABEL: define i64 @test_i64(i8*{{.*}} %fmt, ...)
+//
+// ALL: %va = alloca i8*, align [[PTRALIGN]]
+// ALL: [[VA1:%.+]] = bitcast i8** %va to i8*
+// ALL: call void @llvm.va_start(i8* [[VA1]])
+//
+// O32: [[AP_CUR:%.+]] = load i8** %va, align [[PTRALIGN]]
+// NEW: [[TMP0:%.+]] = bitcast i8** %va to i64**
+// NEW: [[AP_CUR:%.+]] = load i64** [[TMP0]], align [[PTRALIGN]]
+//
+// i64 is 8-byte aligned, while this is within O32's stack alignment there's no
+// guarantee that the offset is still 8-byte aligned after earlier reads.
+// O32: [[PTR0:%.+]] = ptrtoint i8* [[AP_CUR]] to [[INTPTR_T:i32]]
+// O32: [[PTR1:%.+]] = add i32 [[PTR0]], 7
+// O32: [[PTR2:%.+]] = and i32 [[PTR1]], -8
+// O32: [[PTR3:%.+]] = inttoptr [[INTPTR_T]] [[PTR2]] to i64*
+// O32: [[PTR4:%.+]] = inttoptr [[INTPTR_T]] [[PTR2]] to i8*
+//
+// O32: [[AP_NEXT:%.+]] = getelementptr i8* [[PTR4]], [[INTPTR_T]] 8
+// N32: [[TMP1:%.+]] = getelementptr i64* [[AP_CUR]], i32 1
+// N64: [[TMP1:%.+]] = getelementptr i64* [[AP_CUR]], i64 1
+// NEW: [[AP_NEXT:%.+]] = bitcast i64* [[TMP1]] to i8*
+//
+// ALL: store i8* [[AP_NEXT]], i8** %va, align [[PTRALIGN]]
+//
+// O32: [[ARG1:%.+]] = load i64* [[PTR3]], align 8
+// NEW: [[ARG1:%.+]] = load i64* [[AP_CUR]], align 8
+//
+// ALL: call void @llvm.va_end(i8* [[VA1]])
+// ALL: ret i64 [[ARG1]]
+// ALL: }
+
+int test_v4i32(char *fmt, ...) {
+ va_list va;
+
+ va_start(va, fmt);
+ v4i32 v = va_arg(va, v4i32);
+ va_end(va);
+
+ return v[0];
+}
+
+// ALL-LABEL: define i32 @test_v4i32(i8*{{.*}} %fmt, ...)
+//
+// ALL: %va = alloca i8*, align [[PTRALIGN]]
+// ALL: [[VA1:%.+]] = bitcast i8** %va to i8*
+// ALL: call void @llvm.va_start(i8* [[VA1]])
// ALL: [[AP_CUR:%.+]] = load i8** %va, align [[PTRALIGN]]
//
-// O32: [[PTR0:%.+]] = ptrtoint i8* [[AP_CUR]] to [[PTRTYPE:i32]]
-// N32: [[PTR0:%.+]] = ptrtoint i8* [[AP_CUR]] to [[PTRTYPE:i32]]
-// N64: [[PTR0:%.+]] = ptrtoint i8* [[AP_CUR]] to [[PTRTYPE:i64]]
+// O32: [[PTR0:%.+]] = ptrtoint i8* [[AP_CUR]] to [[INTPTR_T:i32]]
+// N32: [[PTR0:%.+]] = ptrtoint i8* [[AP_CUR]] to [[INTPTR_T:i32]]
+// N64: [[PTR0:%.+]] = ptrtoint i8* [[AP_CUR]] to [[INTPTR_T:i64]]
//
// Vectors are 16-byte aligned, however the O32 ABI has a maximum alignment of
// 8-bytes since the base of the stack is 8-byte aligned.
// O32: [[PTR1:%.+]] = add i32 [[PTR0]], 7
// O32: [[PTR2:%.+]] = and i32 [[PTR1]], -8
//
-// N32: [[PTR1:%.+]] = add i32 [[PTR0]], 15
-// N32: [[PTR2:%.+]] = and i32 [[PTR1]], -16
-//
-// N64: [[PTR1:%.+]] = add i64 [[PTR0]], 15
-// N64: [[PTR2:%.+]] = and i64 [[PTR1]], -16
+// NEW: [[PTR1:%.+]] = add [[INTPTR_T]] [[PTR0]], 15
+// NEW: [[PTR2:%.+]] = and [[INTPTR_T]] [[PTR1]], -16
//
-// ALL: [[PTR3:%.+]] = inttoptr [[PTRTYPE]] [[PTR2]] to <4 x i32>*
-// ALL: [[PTR4:%.+]] = inttoptr [[PTRTYPE]] [[PTR2]] to i8*
-// ALL: [[AP_NEXT:%.+]] = getelementptr i8* [[PTR4]], [[PTRTYPE]] 16
+// ALL: [[PTR3:%.+]] = inttoptr [[INTPTR_T]] [[PTR2]] to <4 x i32>*
+// ALL: [[PTR4:%.+]] = inttoptr [[INTPTR_T]] [[PTR2]] to i8*
+// ALL: [[AP_NEXT:%.+]] = getelementptr i8* [[PTR4]], [[INTPTR_T]] 16
// ALL: store i8* [[AP_NEXT]], i8** %va, align [[PTRALIGN]]
// ALL: [[PTR5:%.+]] = load <4 x i32>* [[PTR3]], align 16
// ALL: call void @llvm.va_end(i8* [[VA1]])
More information about the cfe-commits
mailing list