[PATCH] Add logical ops to Mips fast-isel

Fri Feb 27 10:34:40 PST 2015

================
Comment at: test/CodeGen/Mips/Fast-ISel/logopm.ll:89
@@ +88,3 @@
+; CHECK-DAG:    lbu     $[[UB1:[0-9]+]], 0($[[UB1_ADDR]])
+; CHECK-DAG:    and     $[[RES1:[0-9]+]], $[[UB1]], $zero
+; CHECK:        andi    $[[RES:[0-9]+]], $[[RES1]], 1
----------------
dsanders wrote:
> There's a really trivial optimization opportunity here (x & 0 == 0). It would simply require an 'if (Opc == Mips::AND && RHSReg == Mips::ZERO) return RHSReg' in emitLogicalOp().
> 
> I'm wondering if we should handle it or not given that the goal is compile time. I'm leaning towards yes since it's so cheap to catch. Do we still delete dead code when using Fast ISel? If so, I think it's probably best to handle it since it removes the need to encode instructions. What do you think?
This situation will not occur in real life. It's occurring here because you wanted me to test all these cases in the matrix and this code I've constructed by hand. If you start to test for all these things you will make the compiler slower and more complicated.
It's also not important at this time in the greater scheme of things which is not not let fast-isel fail and revert to non fast-isel.

================
Comment at: test/CodeGen/Mips/Fast-ISel/logopm.ll:112
@@ +111,3 @@
+; CHECK-DAG:    lbu     $[[UB1:[0-9]+]], 0($[[UB1_ADDR]])
+; CHECK-DAG:    and     $[[RES1:[0-9]+]], $[[UB1]], $[[CONST]]
+; CHECK:        andi    $[[RES:[0-9]+]], $[[RES1]], 1
----------------
dsanders wrote:
> Why not 'andi $[[RES1:[0-9]+]], $[[UB1]], 1'? Will andi support be added in a later patch?
People can use -O0 if they need better code here. The more things you add to complicated fast-isel the more unreadable it becomes and more prone to bugs.

================
Comment at: test/CodeGen/Mips/Fast-ISel/logopm.ll:153-159
@@ +152,9 @@
+; CHECK:        .ent    orUb0
+; CHECK:        lui     $[[REG_GPa:[0-9]+]], %hi(_gp_disp)
+; CHECK:        addiu   $[[REG_GPb:[0-9]+]], $[[REG_GPa]], %lo(_gp_disp)
+; CHECK:        addu    $[[REG_GP:[0-9]+]], $[[REG_GPb]], $25
+; CHECK-DAG:    lw      $[[UB_ADDR:[0-9]+]], %got(ub)($[[REG_GP]])
+; CHECK-DAG:    lw      $[[UB1_ADDR:[0-9]+]], %got(ub1)($[[REG_GP]])
+; CHECK-DAG:    lbu     $[[UB1:[0-9]+]], 0($[[UB1_ADDR]])
+; CHECK:        andi    $[[RES:[0-9]+]], $[[UB1]], 1
+; CHECK:        sb      $[[RES]], 0($[[UB_ADDR]])
----------------
dsanders wrote:
> Just a question: Any idea what optimized the 'X | 0'? I don't see anything in the patch that would do it and I'm surprised that it noticed this and didn't notice 'X & 0' or 'X ^ 0'.
I tried to make clang IR for the whole test matrix but sometimes there is something in llvm which is still optimizing something away.

================
Comment at: test/CodeGen/Mips/Fast-ISel/logopm.ll:228
@@ +227,3 @@
+; CHECK-DAG:    lbu     $[[UB1:[0-9]+]], 0($[[UB1_ADDR]])
+; CHECK-DAG:    xor     $[[RES1:[0-9]+]], $[[UB1]], $zero
+; CHECK:        andi    $[[RES:[0-9]+]], $[[RES1]], 1
----------------
dsanders wrote:
> Similar to the 'X & 0' case above, there's a trivial optimization opportunity here since 'X ^ 0 == X'. It would only require an 'if (Opc == Mips::XOR && RHSReg == Mips::ZERO) return LHSReg' in emitLogicalOp().
Not relevant IMO to worry about this now or even in the future as I explained earlier.

================
Comment at: test/CodeGen/Mips/Fast-ISel/logopm.ll:619-738
@@ +618,122 @@
+
+; Function Attrs: noinline nounwind
+;define i32 @main() #0 {
+;entry:
+;  call void @setUb(i8 zeroext 1, i8 zeroext 1, i8 zeroext 1)
+;   call void @andUb()
+;  %0 = load i8* @ub, align 1, !tbaa !2
+;  %conv = zext i8 %0 to i32
+;  %call =  call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 signext %conv) #2
+;   call void @andUb0()
+;  %1 = load i8* @ub, align 1, !tbaa !2
+;  %conv1 = zext i8 %1 to i32
+;  %call2 =  call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 signext %conv1) #2
+;   call void @andUb1()
+;  %2 = load i8* @ub, align 1, !tbaa !2
+;  %conv3 = zext i8 %2 to i32
+;  %call4 =  call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 signext %conv3) #2
+;   call void @orUb()
+;  %3 = load i8* @ub, align 1, !tbaa !2
+;  %conv5 = zext i8 %3 to i32
+;  %call6 =  call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 signext %conv5) #2
+;   call void @orUb0()
+;  %4 = load i8* @ub, align 1, !tbaa !2
+;  %conv7 = zext i8 %4 to i32
+;  %call8 =  call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 signext %conv7) #2
+;   call void @orUb1()
+;  %5 = load i8* @ub, align 1, !tbaa !2
+;  %conv9 = zext i8 %5 to i32
+;  %call10 =  call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 signext %conv9) #2
+;   call void @xorUb()
+;  %6 = load i8* @ub, align 1, !tbaa !2
+;  %conv11 = zext i8 %6 to i32
+;  %call12 =  call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 signext %conv11) #2
+;   call void @xorUb0()
+;  %7 = load i8* @ub, align 1, !tbaa !2
+;  %conv13 = zext i8 %7 to i32
+;  %call14 =  call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 signext %conv13) #2
+;  call void @xorUb1()
+;  %8 = load i8* @ub, align 1, !tbaa !2
+;  %conv15 = zext i8 %8 to i32
+;  %call16 =  call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 signext %conv15) #2
+;   call void @setUc(i8 zeroext 74, i8 zeroext -31, i8 zeroext 52)
+;   call void @andUc()
+;  %9 = load i8* @uc, align 1, !tbaa !2
+;  %conv17 = zext i8 %9 to i32
+;  %call18 =  call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 signext %conv17) #2
+;   call void @andUc0()
+;  %10 = load i8* @uc, align 1, !tbaa !2
+;  %conv19 = zext i8 %10 to i32
+;  %call20 =  call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 signext %conv19) #2
+;   call void @andUc1()
+;  %11 = load i8* @uc, align 1, !tbaa !2
+;  %conv21 = zext i8 %11 to i32
+;  %call22 =  call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 signext %conv21) #2
+;  call void @orUc()
+;  %12 = load i8* @uc, align 1, !tbaa !2
+;  %conv23 = zext i8 %12 to i32
+;  %call24 =  call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 signext %conv23) #2
+;   call void @orUc0()
+;  %13 = load i8* @uc, align 1, !tbaa !2
+;  %conv25 = zext i8 %13 to i32
+;  %call26 =  call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 signext %conv25) #2
+;   call void @orUc1()
+;  %14 = load i8* @uc, align 1, !tbaa !2
+;  %conv27 = zext i8 %14 to i32
+;  %call28 =  call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 signext %conv27) #2
+;   call void @xorUc()
+;  %15 = load i8* @uc, align 1, !tbaa !2
+;  %conv29 = zext i8 %15 to i32
+;  %call30 =  call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 signext %conv29) #2
+;   call void @xorUc0()
+;  %16 = load i8* @uc, align 1, !tbaa !2
+;  %conv31 = zext i8 %16 to i32
+;  %call32 =  call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 signext %conv31) #2
+;   call void @xorUc1()
+;  %17 = load i8* @uc, align 1, !tbaa !2
+;  %conv33 = zext i8 %17 to i32
+;  %call34 =  call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 signext %conv33) #2
+;   call void @setUs(i16 zeroext 19131, i16 zeroext 4833, i16 zeroext -24524)
+;   call void @andUs()
+;  %18 = load i16* @us, align 2, !tbaa !5
+;  %conv35 = zext i16 %18 to i32
+;  %call36 =  call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 signext %conv35) #2
+;   call void @andUs0()
+;  %19 = load i16* @us, align 2, !tbaa !5
+;  %conv37 = zext i16 %19 to i32
+;  %call38 =  call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 signext %conv37) #2
+;   call void @andUs1()
+;  %20 = load i16* @us, align 2, !tbaa !5
+;  %conv39 = zext i16 %20 to i32
+;  %call40 =  call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 signext %conv39) #2
+;   call void @orUs()
+;  %21 = load i16* @us, align 2, !tbaa !5
+;  %conv41 = zext i16 %21 to i32
+;  %call42 =  call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 signext %conv41) #2
+;   call void @orUs0()
+;  %22 = load i16* @us, align 2, !tbaa !5
+;  %conv43 = zext i16 %22 to i32
+;  %call44 =  call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 signext %conv43) #2
+;   call void @orUs1()
+;  %23 = load i16* @us, align 2, !tbaa !5
+;  %conv45 = zext i16 %23 to i32
+;  %call46 =  call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 signext %conv45) #2
+;   call void @xorUs()
+;  %24 = load i16* @us, align 2, !tbaa !5
+;  %conv47 = zext i16 %24 to i32
+;  %call48 =  call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 signext %conv47) #2
+;   call void @xorUs0()
+;  %25 = load i16* @us, align 2, !tbaa !5
+;  %conv49 = zext i16 %25 to i32
+;  %call50 =  call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 signext %conv49) #2
+;   call void @xorUs1()
+;  %26 = load i16* @us, align 2, !tbaa !5
+;  %conv51 = zext i16 %26 to i32
+;  %call52 =  call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 signext %conv51) #2
+;  ret i32 0
+;}
+
+; Function Attrs: nounwind
+declare i32 @printf(i8* nocapture readonly, ...) #1
+
+attributes #0 = { noinline nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
----------------
dsanders wrote:
> Nit: Commented out code is discouraged. It should be deleted.
This test case is executable and can be compared for results with non fast-isel for testing. It should really be added to test-suite. I do not want to lose this ability to later add this in the mips specific part of test-suite.

================
Comment at: test/CodeGen/Mips/Fast-ISel/logopm.ll:31-111
@@ +30,83 @@
+
+; Function Attrs: nounwind
+define void @i1_test() #0 {
+entry:
+; CHECK-LABEL: .ent i1_test
+  %0 = load i8* @i1_1, align 1
+  %conv = trunc i8 %0 to i1
+  %and = and i1 1, %conv
+; CHECK: 	and	${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}}
+  %conv1 = zext i1 %and to i8
+  store i8 %conv1, i8* @j1_1, align 1
+  %1 = load i8* @i1_1, align 1
+  %conv2 = trunc i8 %1 to i1
+  %2 = load i8* @i1_2, align 1
+  %conv3 = trunc i8 %2 to i1
+  %and4 = and i1 %conv2, %conv3
+; CHECK: 	and	${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}}
+  %conv5 = zext i1 %and4 to i8
+  store i8 %conv5, i8* @j1_2, align 1
+  %3 = load i8* @i1_2, align 1
+  %conv6 = trunc i8 %3 to i1
+  %4 = load i8* @i1_3, align 1
+  %conv7 = trunc i8 %4 to i1
+  %and8 = and i1 %conv6, %conv7
+; CHECK: 	and	${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}}
+  %conv9 = zext i1 %and8 to i8
+  store i8 %conv9, i8* @j1_3, align 1
+  %5 = load i8* @i1_3, align 1
+  %conv10 = trunc i8 %5 to i1
+  %6 = load i8* @i1_4, align 1
+  %conv11 = trunc i8 %6 to i1
+  %and12 = and i1 %conv10, %conv11
+; CHECK: 	and	${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}}
+  %conv13 = zext i1 %and12 to i8
+  store i8 %conv13, i8* @j1_4, align 1
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @i8_test() #0 {
+entry:
+; CHECK-LABEL: .ent i8_test
+  %0 = load i8* @i8_1, align 1
+  %or = or i8 %0, 225
+  store i8 %or, i8* @j8_1, align 1
+; CHECK: 	or	${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}}
+  %1 = load i8* @i8_1, align 1
+  %2 = load i8* @i8_2, align 1
+  %or4 = or i8 %1, %2
+; CHECK: 	or	${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}}
+  store i8 %or4, i8* @j8_2, align 1
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @i16_test() #0 {
+entry:
+; CHECK-LABEL: .ent i16_test
+  %0 = load i16* @i16_1, align 2
+  %xor = xor i16 241, %0
+  store i16 %xor, i16* @j16_1, align 2
+; CHECK: 	xor	${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}}
+  %1 = load i16* @i16_1, align 2
+  %2 = load i16* @i16_2, align 2
+  %xor4 = xor i16 %1, %2
+; CHECK: 	xor	${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}}
+  store i16 %xor4, i16* @j16_2, align 2
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @i32_test() #0 {
+entry:
+  %0 = load i32* @i32_1, align 4
+  %xor = xor i32 %0, -16908096
+  store i32 %xor, i32* @j32_1, align 4
+  %1 = load i32* @i32_1, align 4
+  %2 = load i32* @i32_2, align 4
+  %xor1 = xor i32 %1, %2
+  store i32 %xor1, i32* @j32_2, align 4
+  ret void
+}
+
----------------
dsanders wrote:
> The tests are incomplete. Also, i32_test doesn't have any CHECK's.
> 
> The missing tests are:
> * i1 xor/or
> * i1 and/xor/or using immediates
> * i8 and/xor
> * i8 and/xor/or using immediates
> * i16 and/or
> * i16 and/xor/or using immediates
> * i32 and/xor/or (there's code for xor but no checks)
> * i32 and/xor/or using immediates (there's code for xor but no checks)
> Also, please do one test per function (i.e. one function for the register form, and one for the immediate form). It makes it easier to tell what is covered.
> 
> Finally, test_i1 doesn't need to be significantly different from the others. Taking test_i8 and switching all the types to i1 (and switching to the correct global) will give you a simple test for 1-bit integers.
I like to make the putback tests be executable and possibly removing the "main function" before putting them back so I'm sure that they work.

================
Comment at: test/CodeGen/Mips/Fast-ISel/logopm.ll:31-111
@@ +30,83 @@
+
+; Function Attrs: nounwind
+define void @i1_test() #0 {
+entry:
+; CHECK-LABEL: .ent i1_test
+  %0 = load i8* @i1_1, align 1
+  %conv = trunc i8 %0 to i1
+  %and = and i1 1, %conv
+; CHECK: 	and	${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}}
+  %conv1 = zext i1 %and to i8
+  store i8 %conv1, i8* @j1_1, align 1
+  %1 = load i8* @i1_1, align 1
+  %conv2 = trunc i8 %1 to i1
+  %2 = load i8* @i1_2, align 1
+  %conv3 = trunc i8 %2 to i1
+  %and4 = and i1 %conv2, %conv3
+; CHECK: 	and	${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}}
+  %conv5 = zext i1 %and4 to i8
+  store i8 %conv5, i8* @j1_2, align 1
+  %3 = load i8* @i1_2, align 1
+  %conv6 = trunc i8 %3 to i1
+  %4 = load i8* @i1_3, align 1
+  %conv7 = trunc i8 %4 to i1
+  %and8 = and i1 %conv6, %conv7
+; CHECK: 	and	${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}}
+  %conv9 = zext i1 %and8 to i8
+  store i8 %conv9, i8* @j1_3, align 1
+  %5 = load i8* @i1_3, align 1
+  %conv10 = trunc i8 %5 to i1
+  %6 = load i8* @i1_4, align 1
+  %conv11 = trunc i8 %6 to i1
+  %and12 = and i1 %conv10, %conv11
+; CHECK: 	and	${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}}
+  %conv13 = zext i1 %and12 to i8
+  store i8 %conv13, i8* @j1_4, align 1
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @i8_test() #0 {
+entry:
+; CHECK-LABEL: .ent i8_test
+  %0 = load i8* @i8_1, align 1
+  %or = or i8 %0, 225
+  store i8 %or, i8* @j8_1, align 1
+; CHECK: 	or	${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}}
+  %1 = load i8* @i8_1, align 1
+  %2 = load i8* @i8_2, align 1
+  %or4 = or i8 %1, %2
+; CHECK: 	or	${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}}
+  store i8 %or4, i8* @j8_2, align 1
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @i16_test() #0 {
+entry:
+; CHECK-LABEL: .ent i16_test
+  %0 = load i16* @i16_1, align 2
+  %xor = xor i16 241, %0
+  store i16 %xor, i16* @j16_1, align 2
+; CHECK: 	xor	${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}}
+  %1 = load i16* @i16_1, align 2
+  %2 = load i16* @i16_2, align 2
+  %xor4 = xor i16 %1, %2
+; CHECK: 	xor	${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}}
+  store i16 %xor4, i16* @j16_2, align 2
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @i32_test() #0 {
+entry:
+  %0 = load i32* @i32_1, align 4
+  %xor = xor i32 %0, -16908096
+  store i32 %xor, i32* @j32_1, align 4
+  %1 = load i32* @i32_1, align 4
+  %2 = load i32* @i32_2, align 4
+  %xor1 = xor i32 %1, %2
+  store i32 %xor1, i32* @j32_2, align 4
+  ret void
+}
+
----------------
rkotler wrote:
> dsanders wrote:
> > The tests are incomplete. Also, i32_test doesn't have any CHECK's.
> > 
> > The missing tests are:
> > * i1 xor/or
> > * i1 and/xor/or using immediates
> > * i8 and/xor
> > * i8 and/xor/or using immediates
> > * i16 and/or
> > * i16 and/xor/or using immediates
> > * i32 and/xor/or (there's code for xor but no checks)
> > * i32 and/xor/or using immediates (there's code for xor but no checks)
> > Also, please do one test per function (i.e. one function for the register form, and one for the immediate form). It makes it easier to tell what is covered.
> > 
> > Finally, test_i1 doesn't need to be significantly different from the others. Taking test_i8 and switching all the types to i1 (and switching to the correct global) will give you a simple test for 1-bit integers.
> I like to make the putback tests be executable and possibly removing the "main function" before putting them back so I'm sure that they work.
I32 is handled without this patch already by means of the tablegen patterns.

The need for this patch is to handle things that normally would be taken care of by the legalizer. For example: xor or any non 32 integer types.

http://reviews.llvm.org/D6599

EMAIL PREFERENCES
  http://reviews.llvm.org/settings/panel/emailpreferences/