[llvm] 13792ba - [AArch64][GlobalISel] When lowering signext i1 parameters, don't zero-extend to s8 first.

Sat Oct 15 20:26:06 PDT 2022

Author: Amara Emerson
Date: 2022-10-15T20:25:43-07:00
New Revision: 13792ba4174f84d1f9e2af8921c00c991f2893d8

URL: https://github.com/llvm/llvm-project/commit/13792ba4174f84d1f9e2af8921c00c991f2893d8
DIFF: https://github.com/llvm/llvm-project/commit/13792ba4174f84d1f9e2af8921c00c991f2893d8.diff

LOG: [AArch64][GlobalISel] When lowering signext i1 parameters, don't zero-extend to s8 first.

Fixes https://github.com/llvm/llvm-project/issues/57181

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
    llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-signext.ll
    llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-zeroext.ll
    llvm/test/CodeGen/AArch64/bool-ext-inc.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
index 32f1bf867019d..972a0dcbf66ed 100644

--- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
@@ -383,7 +383,9 @@ bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
 
       // i1 is a special case because SDAG i1 true is naturally zero extended
       // when widened using ANYEXT. We need to do it explicitly here.
-      if (MRI.getType(CurVReg).getSizeInBits() == 1) {
+      auto &Flags = CurArgInfo.Flags[0];
+      if (MRI.getType(CurVReg).getSizeInBits() == 1 && !Flags.isSExt() &&
+          !Flags.isZExt()) {
         CurVReg = MIRBuilder.buildZExt(LLT::scalar(8), CurVReg).getReg(0);
       } else if (TLI.getNumRegistersForCallingConv(Ctx, CC, SplitEVTs[i]) ==
                  1) {
@@ -569,7 +571,8 @@ bool AArch64CallLowering::lowerFormalArguments(
              MRI.getType(OrigArg.Regs[0]).getSizeInBits() == 1 &&
              "Unexpected registers used for i1 arg");
 
-      if (!OrigArg.Flags[0].isZExt()) {
+      auto &Flags = OrigArg.Flags[0];
+      if (!Flags.isZExt() && !Flags.isSExt()) {
         // Lower i1 argument as i8, and insert AssertZExt + Trunc later.
         Register OrigReg = OrigArg.Regs[0];
         Register WideReg = MRI.createGenericVirtualRegister(LLT::scalar(8));
@@ -1110,7 +1113,8 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
   for (auto &OrigArg : Info.OrigArgs) {
     splitToValueTypes(OrigArg, OutArgs, DL, Info.CallConv);
     // AAPCS requires that we zero-extend i1 to 8 bits by the caller.
-    if (OrigArg.Ty->isIntegerTy(1)) {
+    auto &Flags = OrigArg.Flags[0];
+    if (OrigArg.Ty->isIntegerTy(1) && !Flags.isSExt() && !Flags.isZExt()) {
       ArgInfo &OutArg = OutArgs.back();
       assert(OutArg.Regs.size() == 1 &&
              MRI.getType(OutArg.Regs[0]).getSizeInBits() == 1 &&

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-signext.ll b/llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-signext.ll
index b1bcc017368cd..51223dffe3707 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-signext.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-signext.ll
@@ -42,7 +42,7 @@ define i32 @signext_param_i32(i32 signext %x) {
   ret i32 %x
 }
 
-; Zeroext param is passed on the stack. We should still get a G_ASSERT_SEXT.
+; signext param is passed on the stack. We should still get a G_ASSERT_SEXT.
 define i32 @signext_param_stack(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f,
   ; CHECK-LABEL: name: signext_param_stack
   ; CHECK: bb.1 (%ir-block.0):
@@ -61,10 +61,8 @@ define i32 @signext_param_stack(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f,
   ; CHECK-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
   ; CHECK-NEXT:   [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[FRAME_INDEX1]](p0) :: (invariant load (s8) from %fixed-stack.0, align 8)
   ; CHECK-NEXT:   [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[SEXTLOAD]], 1
-  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_SEXT]](s32)
-  ; CHECK-NEXT:   [[ASSERT_ZEXT:%[0-9]+]]:_(s8) = G_ASSERT_ZEXT [[TRUNC]], 1
-  ; CHECK-NEXT:   [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_ZEXT]](s8)
-  ; CHECK-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC1]](s1)
+  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_SEXT]](s32)
+  ; CHECK-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s1)
   ; CHECK-NEXT:   $w0 = COPY [[ZEXT]](s32)
   ; CHECK-NEXT:   RET_ReallyLR implicit $w0
                                 i64 %g, i64 %h, i64 %i, i1 signext %j) {
@@ -124,3 +122,45 @@ define i8 @s8_assert_zext_stack(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e,
                                         i8 signext %j) {
   ret i8 %j
 }
+
+define i32 @callee_signext_i1(i1 signext %0) {
+  ; CHECK-LABEL: name: callee_signext_i1
+  ; CHECK: bb.1 (%ir-block.1):
+  ; CHECK-NEXT:   liveins: $w0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; CHECK-NEXT:   [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY]], 1
+  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_SEXT]](s32)
+  ; CHECK-NEXT:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s1)
+  ; CHECK-NEXT:   $w0 = COPY [[SEXT]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
+  %r = sext i1 %0 to i32
+  ret i32 %r
+}
+
+define i32 @caller_signext_i1() {
+  ; CHECK-LABEL: name: caller_signext_i1
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
+  ; CHECK-NEXT:   [[SEXT:%[0-9]+]]:_(s8) = G_SEXT [[C]](s1)
+  ; CHECK-NEXT:   [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[SEXT]](s8)
+  ; CHECK-NEXT:   $w0 = COPY [[SEXT1]](s32)
+  ; CHECK-NEXT:   BL @callee_signext_i1, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0, implicit-def $w0
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
+  ; CHECK-NEXT:   $w0 = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
+  %r = call i32 @callee_signext_i1(i1 signext true)
+  ret i32 %r
+}
+
+define signext i1 @ret_signext_i1() {
+  ; CHECK-LABEL: name: ret_signext_i1
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+  ; CHECK-NEXT:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[C]](s1)
+  ; CHECK-NEXT:   $w0 = COPY [[SEXT]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
+  ret i1 true
+}

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-zeroext.ll b/llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-zeroext.ll
index aa83590b9c873..1ccd2584fb117 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-zeroext.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-zeroext.ll
@@ -116,3 +116,45 @@ define i8 @s8_assert_zext_stack(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e,
                                         i8 zeroext %j) {
   ret i8 %j
 }
+
+define i32 @callee_zeroext_i1(i1 zeroext %0) {
+  ; CHECK-LABEL: name: callee_zeroext_i1
+  ; CHECK: bb.1 (%ir-block.1):
+  ; CHECK-NEXT:   liveins: $w0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; CHECK-NEXT:   [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 1
+  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_ZEXT]](s32)
+  ; CHECK-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s1)
+  ; CHECK-NEXT:   $w0 = COPY [[ZEXT]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
+  %r = zext i1 %0 to i32
+  ret i32 %r
+}
+
+define i32 @caller_zeroext_i1() {
+  ; CHECK-LABEL: name: caller_zeroext_i1
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
+  ; CHECK-NEXT:   [[ZEXT:%[0-9]+]]:_(s8) = G_ZEXT [[C]](s1)
+  ; CHECK-NEXT:   [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ZEXT]](s8)
+  ; CHECK-NEXT:   $w0 = COPY [[ZEXT1]](s32)
+  ; CHECK-NEXT:   BL @callee_zeroext_i1, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0, implicit-def $w0
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
+  ; CHECK-NEXT:   $w0 = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
+  %r = call i32 @callee_zeroext_i1(i1 zeroext true)
+  ret i32 %r
+}
+
+define zeroext i1 @ret_zeroext_i1() {
+  ; CHECK-LABEL: name: ret_zeroext_i1
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+  ; CHECK-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s1)
+  ; CHECK-NEXT:   $w0 = COPY [[ZEXT]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
+  ret i1 true
+}

diff  --git a/llvm/test/CodeGen/AArch64/bool-ext-inc.ll b/llvm/test/CodeGen/AArch64/bool-ext-inc.ll
index 8f45e97924118..f8d185bedd2b5 100644
--- a/llvm/test/CodeGen/AArch64/bool-ext-inc.ll
+++ b/llvm/test/CodeGen/AArch64/bool-ext-inc.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=1 | FileCheck %s --check-prefix=GISEL
 
 define <4 x i32> @sextbool_add_vector(<4 x i32> %c1, <4 x i32> %c2, <4 x i32> %x) {
 ; CHECK-LABEL: sextbool_add_vector:
@@ -7,6 +8,12 @@ define <4 x i32> @sextbool_add_vector(<4 x i32> %c1, <4 x i32> %c2, <4 x i32> %x
 ; CHECK-NEXT:    cmeq v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    add v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: sextbool_add_vector:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    cmeq v0.4s, v0.4s, v1.4s
+; GISEL-NEXT:    add v0.4s, v2.4s, v0.4s
+; GISEL-NEXT:    ret
   %c = icmp eq <4 x i32> %c1, %c2
   %b = sext <4 x i1> %c to <4 x i32>
   %s = add <4 x i32> %x, %b
@@ -19,6 +26,15 @@ define <4 x i32> @zextbool_sub_vector(<4 x i32> %c1, <4 x i32> %c2, <4 x i32> %x
 ; CHECK-NEXT:    cmeq v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    add v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: zextbool_sub_vector:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    adrp x8, .LCPI1_0
+; GISEL-NEXT:    cmeq v0.4s, v0.4s, v1.4s
+; GISEL-NEXT:    ldr q3, [x8, :lo12:.LCPI1_0]
+; GISEL-NEXT:    and v0.16b, v0.16b, v3.16b
+; GISEL-NEXT:    sub v0.4s, v2.4s, v0.4s
+; GISEL-NEXT:    ret
   %c = icmp eq <4 x i32> %c1, %c2
   %b = zext <4 x i1> %c to <4 x i32>
   %s = sub <4 x i32> %x, %b
@@ -30,6 +46,12 @@ define i32 @assertsext_sub_1(i1 signext %cond, i32 %y) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    add w0, w1, w0
 ; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: assertsext_sub_1:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    and w8, w0, #0x1
+; GISEL-NEXT:    sub w0, w1, w8
+; GISEL-NEXT:    ret
   %e = zext i1 %cond to i32
   %r = sub i32 %y, %e
   ret i32 %r
@@ -40,6 +62,12 @@ define i32 @assertsext_add_1(i1 signext %cond, i32 %y) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub w0, w1, w0
 ; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: assertsext_add_1:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    and w8, w0, #0x1
+; GISEL-NEXT:    add w0, w8, w1
+; GISEL-NEXT:    ret
   %e = zext i1 %cond to i32
   %r = add i32 %e, %y
   ret i32 %r
@@ -50,8 +78,50 @@ define i32 @assertsext_add_1_commute(i1 signext %cond, i32 %y) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub w0, w1, w0
 ; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: assertsext_add_1_commute:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    and w8, w0, #0x1
+; GISEL-NEXT:    add w0, w1, w8
+; GISEL-NEXT:    ret
   %e = zext i1 %cond to i32
   %r = add i32 %y, %e
   ret i32 %r
 }
 
+define i32 @callee_signext_i1(i1 signext %0) {
+; CHECK-LABEL: callee_signext_i1:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: callee_signext_i1:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    ret
+  %r = sext i1 %0 to i32
+  ret i32 %r
+}
+
+define i32 @caller_signext_i1() {
+; CHECK-LABEL: caller_signext_i1:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    mov w0, #-1
+; CHECK-NEXT:    bl callee_signext_i1
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: caller_signext_i1:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; GISEL-NEXT:    .cfi_def_cfa_offset 16
+; GISEL-NEXT:    .cfi_offset w30, -16
+; GISEL-NEXT:    mov w8, #1
+; GISEL-NEXT:    sbfx w0, w8, #0, #1
+; GISEL-NEXT:    bl callee_signext_i1
+; GISEL-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; GISEL-NEXT:    ret
+  %r = call i32 @callee_signext_i1(i1 signext true)
+  ret i32 %r
+}