[llvm] r214892 - Optimize vector fabs of bitcasted constant integer values.

Tue Aug 5 12:23:01 PDT 2014

Sorry about that. Let me know if the patch below corrects all problems.

1. I don't think we can use 'CHECK-NEXT' in the ARM cases because the order
of the moves to r0 and r1 are not fixed. We can't even use 'CHECK' to
confirm both of the #0 and the #-2147483648 move instructions because plain
'CHECK' imposes ordering. Is there another directive that doesn't impose
ordering?

2. The 'CHECK-NOT' in the ARM cases was requested by Renato. Should the
equivalent also go in the X86 tests?

3. The X86 cases are just one instruction + return, so I've added the
CHECK-NEXT for 'retq' there.

4. I don't think we need to regex the register names in any of these cases.
The ABI specifies the return registers, so they are fixed.

Index: test/CodeGen/ARM/fabs-neon.ll
===================================================================

--- test/CodeGen/ARM/fabs-neon.ll    (revision 214897)
+++ test/CodeGen/ARM/fabs-neon.ll    (working copy)
@@ -31,24 +31,26 @@
 ; We should generate:
 ;    mov    r0, #0
 ;    mvn    r1, #-2147483648
-;    mov    pc, lr
+;    bx    lr

+define i64 @fabs_v2f32_1() {
 ; CHECK-LABEL: fabs_v2f32_1
-define i64 @fabs_v2f32_1() {
+; CHECK: mvn r1, #-2147483648
+; CHECK: bx lr
+; CHECK-NOT: vabs
  %bitcast = bitcast i64 18446744069414584320 to <2 x float> ;
0xFFFF_FFFF_0000_0000
  %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %bitcast)
  %ret = bitcast <2 x float> %fabs to i64
  ret i64 %ret
-; CHECK: mvn r1, #-2147483648
-; CHECK-NOT: vabs
 }

+define i64 @fabs_v2f32_2() {
 ; CHECK-LABEL: fabs_v2f32_2
-define i64 @fabs_v2f32_2() {
+; CHECK: mvn r0, #-2147483648
+; CHECK: bx lr
+; CHECK-NOT: vabs
  %bitcast = bitcast i64 4294967295 to <2 x float> ; 0x0000_0000_FFFF_FFFF
  %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %bitcast)
  %ret = bitcast <2 x float> %fabs to i64
  ret i64 %ret
-; CHECK: mvn r0, #-2147483648
-; CHECK-NOT: vabs
 }
Index: test/CodeGen/X86/vec_fabs.ll
===================================================================
--- test/CodeGen/X86/vec_fabs.ll    (revision 214897)
+++ test/CodeGen/X86/vec_fabs.ll    (working copy)
@@ -54,22 +54,22 @@

 ; CHECK-LABEL: fabs_v2f32_1
 define i64 @fabs_v2f32_1() {
+; CHECK: movabsq $9223372032559808512, %rax # imm = 0x7FFFFFFF00000000
+; CHECK-NEXT: retq
  %bitcast = bitcast i64 18446744069414584320 to <2 x float> ;
0xFFFF_FFFF_0000_0000
  %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %bitcast)
  %ret = bitcast <2 x float> %fabs to i64
  ret i64 %ret
-; CHECK: movabsq $9223372032559808512, %rax
-;  # imm = 0x7FFF_FFFF_0000_0000
 }

 ; CHECK-LABEL: fabs_v2f32_2
 define i64 @fabs_v2f32_2() {
+; CHECK: movl $2147483647, %eax       # imm = 0x7FFFFFFF
+; CHECK-NEXT: retq
  %bitcast = bitcast i64 4294967295 to <2 x float> ; 0x0000_0000_FFFF_FFFF
  %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %bitcast)
  %ret = bitcast <2 x float> %fabs to i64
  ret i64 %ret
-; CHECK: movl $2147483647, %eax
-;  # imm = 0x0000_0000_7FFF_FFFF
 }


On Tue, Aug 5, 2014 at 12:41 PM, Chandler Carruth <chandlerc at google.com>
wrote:

>
> On Tue, Aug 5, 2014 at 10:35 AM, Sanjay Patel <spatel at rotateright.com>
> wrote:
>
>>  ; PR20354: when generating code for a vector fabs op,
>> -; make sure the correct mask is used for all vector elements.
>> -; CHECK-LABEL: .LCPI4_0:
>> -; CHECK-NEXT:    .long 2147483647
>> -; CHECK-NEXT:    .long 2147483647
>> -define i64 @fabs_v2f32(<2 x float> %v) {
>> -; CHECK-LABEL: fabs_v2f32:
>> -; CHECK:         movabsq $-9223372034707292160, %[[R:r[^ ]+]]
>> -; CHECK-NEXT:    vmovq %[[R]], %[[X:xmm[0-9]+]]
>> -; CHECK-NEXT:    vandps   {{.*}}.LCPI4_0{{.*}}, %[[X]], %[[X]]
>> -; CHECK-NEXT:    vmovq   %[[X]], %rax
>> -; CHECK-NEXT:    retq
>> -  %highbits = bitcast i64 9223372039002259456 to <2 x float> ;
>> 0x8000_0000_8000_0000
>> -  %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %highbits)
>> -  %ret = bitcast <2 x float> %fabs to i64
>> -  ret i64 %ret
>> +; make sure that we're only turning off the sign bit of each float value.
>> +; No constant pool loads or vector ops are needed for the fabs of a
>> +; bitcasted integer constant; we should just return an integer constant
>> +; that has the sign bits turned off.
>> +;
>> +; So instead of something like this:
>> +;    movabsq (constant pool load of mask for sign bits)
>> +;    vmovq   (move from integer register to vector/fp register)
>> +;    vandps  (mask off sign bits)
>> +;    vmovq   (move vector/fp register back to integer return register)
>> +;
>> +; We should generate:
>> +;    mov     (put constant value in return register)
>> +
>> +; CHECK-LABEL: fabs_v2f32_1
>> +define i64 @fabs_v2f32_1() {
>> + %bitcast = bitcast i64 18446744069414584320 to <2 x float> ;
>> 0xFFFF_FFFF_0000_0000
>> + %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %bitcast)
>> + %ret = bitcast <2 x float> %fabs to i64
>> + ret i64 %ret
>> +; CHECK: movabsq $9223372032559808512, %rax
>> +;  # imm = 0x7FFF_FFFF_0000_0000
>> +}
>>
>
> I would strongly prefer that you follow the style of FileCheck patterns I
> used. Specifically, please check the entire sequence of instructions.
>
> Also, rather than the weird '# imm' comment, I think we should just check
> the verbose asm comment that produces the readable hex form. Using
> FileCheck against the verbose asm comments just leads to more readable and
> maintainable tests, even if its good to not always rely on them (in case
> they contain a bug somehow).
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20140805/1f3007fd/attachment.html>