[cfe-commits] implicit sign/bitwidth conversions during array indexing?

Zhongxing Xu xuzhongxing at gmail.com
Thu Nov 13 20:24:15 PST 2008


On Fri, Nov 14, 2008 at 11:41 AM, Ted Kremenek <kremenek at apple.com> wrote:

>
>
>
>
> On Nov 13, 2008, at 6:43 PM, Zhongxing Xu <xuzhongxing at gmail.com> wrote:
>
>
>
> On Fri, Nov 14, 2008 at 10:31 AM, Ted Kremenek < <kremenek at apple.com>
> kremenek at apple.com> wrote:
>
>>
>> On Nov 13, 2008, at 6:03 PM, Zhongxing Xu wrote:
>>
>> The standard does not specify any information about this conversion. The
>> compiler interprets E1[E2] as *(E1+E2).
>>
>>
>> That's right.  That's what the C standard says too. (section 6.5.2.1).
>>
>> The sign does not affect the way that the machine does 'add' (or 'sub').
>> (The sign only affects some operations, c.f. LLVM instructions)
>>
>>
>> I'm skeptical that the choice of signed-ness or bitwidth is arbitrary when
>> handling E2, but I could be wrong.  Since the standard says that E1[E2] is
>> the same as *(E1 + E2) than we probably need to perform any implicit type
>> conversions that would be done by Sema if the expression was literally
>> written that way.  The other option is to look at what compiler does
>> (llvm-gcc for example).
>>
>> For example:
>>
>> void f(int *p) {
>>   short i = 0;
>>   unsigned short i_u = 0;
>>   int j = 0;
>>   unsigned j_u = 0;
>>   long long k = 0;
>>
>>   int x;
>>   x = *(p + i);
>>   x += *(p + j);
>>   x += *(p + i_u);
>>   x += *(p + j_u);
>>   x += *(p + k);
>>
>>   return x;
>> }
>>
>> The -ast-dump (without the DeclStmts) is:
>>
>>   (BinaryOperator 0x21088f0 <line:9:3, col:14> 'int' '='
>>     (DeclRefExpr 0x2108810 <col:3> 'int' Var='x' 0x21087c0)
>>     (UnaryOperator 0x21088d0 <col:7, col:14> 'int' prefix '*'
>>       (ParenExpr 0x21088b0 <col:8, col:14> 'int *'
>>         (BinaryOperator 0x2108890 <col:9, col:13> 'int *' '+'
>>           (DeclRefExpr 0x2108830 <col:9> 'int *' ParmVar='p' 0x21084a0)
>> *          (ImplicitCastExpr 0x2108870 <col:13> 'int'*
>>             (DeclRefExpr 0x2108850 <col:13> 'short' Var='i'
>> 0x21042e0))))))
>>   (CompoundAssignOperator 0x21089d0 <line:10:3, col:15> 'int' '+='
>> ComputeTy='int'
>>     (DeclRefExpr 0x2108910 <col:3> 'int' Var='x' 0x21087c0)
>>     (UnaryOperator 0x21089b0 <col:8, col:15> 'int' prefix '*'
>>       (ParenExpr 0x2108990 <col:9, col:15> 'int *'
>>         (BinaryOperator 0x2108970 <col:10, col:14> 'int *' '+'
>>           (DeclRefExpr 0x2108930 <col:10> 'int *' ParmVar='p' 0x21084a0)
>>           (DeclRefExpr 0x2108950 <col:14> 'int' Var='j' 0x2108630)))))
>>   (CompoundAssignOperator 0x2108ad0 <line:11:3, col:17> 'int' '+='
>> ComputeTy='int'
>>     (DeclRefExpr 0x21089f0 <col:3> 'int' Var='x' 0x21087c0)
>>     (UnaryOperator 0x2108ab0 <col:8, col:17> 'int' prefix '*'
>>       (ParenExpr 0x2108a90 <col:9, col:17> 'int *'
>>         (BinaryOperator 0x2108a70 <col:10, col:14> 'int *' '+'
>>           (DeclRefExpr 0x2108a10 <col:10> 'int *' ParmVar='p' 0x21084a0)
>> *          (ImplicitCastExpr 0x2108a50 <col:14> 'int'*
>>             (DeclRefExpr 0x2108a30 <col:14> 'unsigned short' Var='i_u'
>> 0x21085a0))))))
>>   (CompoundAssignOperator 0x2108bb0 <line:12:3, col:17> 'int' '+='
>> ComputeTy='int'
>>     (DeclRefExpr 0x2108af0 <col:3> 'int' Var='x' 0x21087c0)
>>     (UnaryOperator 0x2108b90 <col:8, col:17> 'int' prefix '*'
>>       (ParenExpr 0x2108b70 <col:9, col:17> 'int *'
>>         (BinaryOperator 0x2108b50 <col:10, col:14> 'int *' '+'
>>           (DeclRefExpr 0x2108b10 <col:10> 'int *' ParmVar='p' 0x21084a0)
>>           (DeclRefExpr 0x2108b30 <col:14> 'unsigned int' Var='j_u'
>> 0x21086a0)))))
>>   (CompoundAssignOperator 0x2108c90 <line:13:3, col:15> 'int' '+='
>> ComputeTy='int'
>>     (DeclRefExpr 0x2108bd0 <col:3> 'int' Var='x' 0x21087c0)
>>     (UnaryOperator 0x2108c70 <col:8, col:15> 'int' prefix '*'
>>       (ParenExpr 0x2108c50 <col:9, col:15> 'int *'
>>         (BinaryOperator 0x2108c30 <col:10, col:14> 'int *' '+'
>>           (DeclRefExpr 0x2108bf0 <col:10> 'int *' ParmVar='p' 0x21084a0)
>>           (DeclRefExpr 0x2108c10 <col:14> 'long long' Var='k'
>> 0x2108730)))))
>>
>> It appears that a promotion and sign change is done for 'short' and
>> 'unsigned short' to int, but there are no conversions otherwise.  Is this
>> correct?  Surely the compiler does some kind of promotion/truncation when
>> doing pointer arithmetic.
>>
>
> Is this the rule:
>  - if the bitwidth of E2 is the same as the pointer, do the arithmetic.
>  - if the bitwidth of E2 is different from the pointer, trunc or ext it to
> the same width of the pointer. Signed-ness affects the ext operation. Then
> do the arithmetic.
>
>
> I'm not certain.  Note that the 'long long' value 'k' was not truncated.
>  Is this a Sema bug, or is this the correct behavior?  For this target
> LongLongWidth is 64, the bit width for 'int' is 32, and the bit width for a
> pointer is (I believe) 32 bits as well.
>

It's truncated in the generated code:

void f() {
  int a[10];
  short x1;
  unsigned short x2;
  long x5;
  unsigned long x6;
  long long x3;
  unsigned long long x4;

  a[x1] = 3;
  a[x2] = 3;
  a[x3] = 3;
  a[x4] = 3;
  a[x5] = 3;
  a[x6] = 3;
}

; ModuleID = 'array3.c'
target datalayout =
"e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
target triple = "i686-pc-linux-gnu"

define void @f(...) nounwind {
entry:
    %a = alloca [10 x i32], align 4        ; <[10 x i32]*> [#uses=6]
    %x1 = alloca i16, align 2        ; <i16*> [#uses=1]
    %x2 = alloca i16, align 2        ; <i16*> [#uses=1]
    %x5 = alloca i32, align 4        ; <i32*> [#uses=1]
    %x6 = alloca i32, align 4        ; <i32*> [#uses=1]
    %x3 = alloca i64, align 4        ; <i64*> [#uses=1]
    %x4 = alloca i64, align 4        ; <i64*> [#uses=1]
    %tmp = load i16* %x1        ; <i16> [#uses=1]
    %arraydecay = getelementptr [10 x i32]* %a, i32 0, i32 0        ; <i32*>
[#uses=1]
    %idxprom = *sext *i16 %tmp to i32        ; <i32> [#uses=1]
    %arrayidx = getelementptr i32* %arraydecay, i32 %idxprom        ; <i32*>
[#uses=1]
    store i32 3, i32* %arrayidx
    %tmp1 = load i16* %x2        ; <i16> [#uses=1]
    %arraydecay2 = getelementptr [10 x i32]* %a, i32 0, i32 0        ;
<i32*> [#uses=1]
    %idxprom3 = *zext* i16 %tmp1 to i32        ; <i32> [#uses=1]
    %arrayidx4 = getelementptr i32* %arraydecay2, i32 %idxprom3        ;
<i32*> [#uses=1]
    store i32 3, i32* %arrayidx4
    %tmp5 = load i64* %x3        ; <i64> [#uses=1]
    %arraydecay6 = getelementptr [10 x i32]* %a, i32 0, i32 0        ;
<i32*> [#uses=1]
    *%idxprom7 = trunc i64 %tmp5 to i32        ; <i32> [#uses=1]*
    %arrayidx8 = getelementptr i32* %arraydecay6, i32 %idxprom7        ;
<i32*> [#uses=1]
    store i32 3, i32* %arrayidx8
    %tmp9 = load i64* %x4        ; <i64> [#uses=1]
    %arraydecay10 = getelementptr [10 x i32]* %a, i32 0, i32 0        ;
<i32*> [#uses=1]
    *%idxprom11 = trunc i64 %tmp9 to i32        ; <i32> [#uses=1]*
    %arrayidx12 = getelementptr i32* %arraydecay10, i32 %idxprom11        ;
<i32*> [#uses=1]
    store i32 3, i32* %arrayidx12
    %tmp13 = load i32* %x5        ; <i32> [#uses=1]
    %arraydecay14 = getelementptr [10 x i32]* %a, i32 0, i32 0        ;
<i32*> [#uses=1]
    %arrayidx15 = getelementptr i32* %arraydecay14, i32 %tmp13        ;
<i32*> [#uses=1]
    store i32 3, i32* %arrayidx15
    %tmp16 = load i32* %x6        ; <i32> [#uses=1]
    %arraydecay17 = getelementptr [10 x i32]* %a, i32 0, i32 0        ;
<i32*> [#uses=1]
    %arrayidx18 = getelementptr i32* %arraydecay17, i32 %tmp16        ;
<i32*> [#uses=1]
    store i32 3, i32* %arrayidx18
    br label %return

return:        ; preds = %entry
    ret void
}
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20081114/c6099976/attachment.html>


More information about the cfe-commits mailing list