[llvm] 9403a8b - [GlobalISel][AArch64] Fix miscompile caused by wrong G_ZEXT selection in GISel

Tue Oct 25 18:54:20 PDT 2022

Author: chenglin.bi
Date: 2022-10-26T09:54:13+08:00
New Revision: 9403a8bc37baf840c2d55be1fc09b9e8bf3b6a74

URL: https://github.com/llvm/llvm-project/commit/9403a8bc37baf840c2d55be1fc09b9e8bf3b6a74
DIFF: https://github.com/llvm/llvm-project/commit/9403a8bc37baf840c2d55be1fc09b9e8bf3b6a74.diff

LOG: [GlobalISel][AArch64] Fix miscompile caused by wrong G_ZEXT selection in GISel

The miscompile case's G_ZEXT has a G_FREEZE source.  Similar to D127154, this patch removed isDef32, relying on the AArch64MIPeephole optimizer to remove redundant SUBREG_TO_REG nodes also in GISel.

Fix #58431

Reviewed By: paquette

Differential Revision: https://reviews.llvm.org/D136433

Added: 
    llvm/test/CodeGen/AArch64/pr58431.ll

Modified: 
    llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
    llvm/test/CodeGen/AArch64/GlobalISel/select-cmp.mir
    llvm/test/CodeGen/AArch64/GlobalISel/select-jump-table-brjt.mir
    llvm/test/CodeGen/AArch64/GlobalISel/select-redundant-zext.mir
    llvm/test/CodeGen/AArch64/GlobalISel/select-select.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 14fac54776c96..9dc24a1707afb 100644

--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -3269,24 +3269,12 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
 
       // For the 32-bit -> 64-bit case, we can emit a mov (ORRWrs)
       // + SUBREG_TO_REG.
-      //
-      // If we are zero extending from 32 bits to 64 bits, it's possible that
-      // the instruction implicitly does the zero extend for us. In that case,
-      // we only need the SUBREG_TO_REG.
       if (IsGPR && SrcSize == 32 && DstSize == 64) {
-        // Unlike with the G_LOAD case, we don't want to look through copies
-        // here. (See isDef32.)
-        MachineInstr *Def = MRI.getVRegDef(SrcReg);
-        Register SubregToRegSrc = SrcReg;
-
-        // Does the instruction implicitly zero extend?
-        if (!Def || !isDef32(*Def)) {
-          // No. Zero out using an OR.
-          Register OrDst = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
-          const Register ZReg = AArch64::WZR;
-          MIB.buildInstr(AArch64::ORRWrs, {OrDst}, {ZReg, SrcReg}).addImm(0);
-          SubregToRegSrc = OrDst;
-        }
+        Register SubregToRegSrc =
+            MRI.createVirtualRegister(&AArch64::GPR32RegClass);
+        const Register ZReg = AArch64::WZR;
+        MIB.buildInstr(AArch64::ORRWrs, {SubregToRegSrc}, {ZReg, SrcReg})
+            .addImm(0);
 
         MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
             .addImm(0)

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-cmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-cmp.mir
index 8587c40340aa4..4151f7ecb3eac 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-cmp.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-cmp.mir
@@ -424,7 +424,8 @@ body:             |
     ; CHECK-NEXT: %add_rhs:gpr64 = COPY $x2
     ; CHECK-NEXT: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr %cmp_lhs, %cmp_rhs, implicit-def $nzcv
     ; CHECK-NEXT: %cmp:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
-    ; CHECK-NEXT: %cmp_ext:gpr64 = SUBREG_TO_REG 0, %cmp, %subreg.sub_32
+    ; CHECK-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, %cmp, 0
+    ; CHECK-NEXT: %cmp_ext:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32
     ; CHECK-NEXT: %add:gpr64 = ADDXrr %cmp_ext, %add_rhs
     ; CHECK-NEXT: %or:gpr64 = ORRXrr %add, %cmp_ext
     ; CHECK-NEXT: $x0 = COPY %or
@@ -459,7 +460,8 @@ body:             |
     ; CHECK-NEXT: %add_rhs:gpr64 = COPY $x2
     ; CHECK-NEXT: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr %cmp_lhs, %cmp_rhs, implicit-def $nzcv
     ; CHECK-NEXT: %cmp:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
-    ; CHECK-NEXT: %cmp_ext:gpr64 = SUBREG_TO_REG 0, %cmp, %subreg.sub_32
+    ; CHECK-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, %cmp, 0
+    ; CHECK-NEXT: %cmp_ext:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32
     ; CHECK-NEXT: %add:gpr64 = ADDXrr %cmp_ext, %add_rhs
     ; CHECK-NEXT: [[DEF:%[0-9]+]]:gpr64all = IMPLICIT_DEF
     ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:gpr64 = INSERT_SUBREG [[DEF]], %cmp, %subreg.sub_32

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-jump-table-brjt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-jump-table-brjt.mir
index ee3eacf51f4f6..681116ab83132 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-jump-table-brjt.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-jump-table-brjt.mir
@@ -60,7 +60,8 @@ body:             |
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:gpr32common = COPY $w0
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:gpr32 = COPY $wzr
   ; CHECK-NEXT:   [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 4, 0, implicit-def $nzcv
-  ; CHECK-NEXT:   [[SUBREG_TO_REG:%[0-9]+]]:gpr64common = SUBREG_TO_REG 0, [[SUBSWri]], %subreg.sub_32
+  ; CHECK-NEXT:   [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, [[SUBSWri]], 0
+  ; CHECK-NEXT:   [[SUBREG_TO_REG:%[0-9]+]]:gpr64common = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32
   ; CHECK-NEXT:   [[SUBSXri:%[0-9]+]]:gpr64 = SUBSXri [[SUBREG_TO_REG]], 71, 0, implicit-def $nzcv
   ; CHECK-NEXT:   Bcc 8, %bb.4, implicit $nzcv
   ; CHECK-NEXT: {{  $}}

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-redundant-zext.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-redundant-zext.mir
index 5245537199024..e167fc52dfa06 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-redundant-zext.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-redundant-zext.mir
@@ -16,12 +16,14 @@ body:             |
 
     ; CHECK-LABEL: name: fold
     ; CHECK: liveins: $w0, $w1
-    ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
-    ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
-    ; CHECK: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[COPY1]], [[COPY]]
-    ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[ADDWrr]], %subreg.sub_32
-    ; CHECK: $x0 = COPY [[SUBREG_TO_REG]]
-    ; CHECK: RET_ReallyLR implicit $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
+    ; CHECK-NEXT: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[COPY1]], [[COPY]]
+    ; CHECK-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, [[ADDWrr]], 0
+    ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32
+    ; CHECK-NEXT: $x0 = COPY [[SUBREG_TO_REG]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0
     %0:gpr(s32) = COPY $w0
     %1:gpr(s32) = COPY $w1
     %2:gpr(s32) = G_ADD %1, %0
@@ -44,11 +46,12 @@ body:             |
 
     ; CHECK-LABEL: name: dont_fold_s16
     ; CHECK: liveins: $w0, $w1
-    ; CHECK: [[DEF:%[0-9]+]]:gpr32 = IMPLICIT_DEF
-    ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[DEF]], %subreg.sub_32
-    ; CHECK: [[UBFMXri:%[0-9]+]]:gpr64 = UBFMXri [[SUBREG_TO_REG]], 0, 15
-    ; CHECK: $x0 = COPY [[UBFMXri]]
-    ; CHECK: RET_ReallyLR implicit $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:gpr32 = IMPLICIT_DEF
+    ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[DEF]], %subreg.sub_32
+    ; CHECK-NEXT: [[UBFMXri:%[0-9]+]]:gpr64 = UBFMXri [[SUBREG_TO_REG]], 0, 15
+    ; CHECK-NEXT: $x0 = COPY [[UBFMXri]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0
     %0:gpr(s16) = G_IMPLICIT_DEF
     %3:gpr(s64) = G_ZEXT %0(s16)
     $x0 = COPY %3(s64)
@@ -68,11 +71,12 @@ body:             |
 
     ; CHECK-LABEL: name: dont_fold_copy
     ; CHECK: liveins: $w0
-    ; CHECK: %copy:gpr32 = COPY $w0
-    ; CHECK: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, %copy, 0
-    ; CHECK: %zext:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32
-    ; CHECK: $x0 = COPY %zext
-    ; CHECK: RET_ReallyLR implicit $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %copy:gpr32 = COPY $w0
+    ; CHECK-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, %copy, 0
+    ; CHECK-NEXT: %zext:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32
+    ; CHECK-NEXT: $x0 = COPY %zext
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0
     %copy:gpr(s32) = COPY $w0
     %zext:gpr(s64) = G_ZEXT %copy(s32)
     $x0 = COPY %zext(s64)
@@ -92,12 +96,13 @@ body:             |
 
     ; CHECK-LABEL: name: dont_fold_bitcast
     ; CHECK: liveins: $w0
-    ; CHECK: %copy:gpr32all = COPY $w0
-    ; CHECK: %bitcast1:gpr32 = COPY %copy
-    ; CHECK: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, %bitcast1, 0
-    ; CHECK: %zext:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32
-    ; CHECK: $x0 = COPY %zext
-    ; CHECK: RET_ReallyLR implicit $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %copy:gpr32all = COPY $w0
+    ; CHECK-NEXT: %bitcast1:gpr32 = COPY %copy
+    ; CHECK-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, %bitcast1, 0
+    ; CHECK-NEXT: %zext:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32
+    ; CHECK-NEXT: $x0 = COPY %zext
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0
     %copy:gpr(s32) = COPY $w0
     %bitcast0:gpr(<4 x s8>) = G_BITCAST %copy(s32)
     %bitcast1:gpr(s32) = G_BITCAST %bitcast0
@@ -119,12 +124,13 @@ body:             |
 
     ; CHECK-LABEL: name: dont_fold_trunc
     ; CHECK: liveins: $x0
-    ; CHECK: %copy:gpr64sp = COPY $x0
-    ; CHECK: %trunc:gpr32common = COPY %copy.sub_32
-    ; CHECK: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, %trunc, 0
-    ; CHECK: %zext:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32
-    ; CHECK: $x0 = COPY %zext
-    ; CHECK: RET_ReallyLR implicit $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %copy:gpr64sp = COPY $x0
+    ; CHECK-NEXT: %trunc:gpr32common = COPY %copy.sub_32
+    ; CHECK-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, %trunc, 0
+    ; CHECK-NEXT: %zext:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32
+    ; CHECK-NEXT: $x0 = COPY %zext
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0
     %copy:gpr(s64) = COPY $x0
     %trunc:gpr(s32) = G_TRUNC %copy(s64)
     %zext:gpr(s64) = G_ZEXT %trunc(s32)
@@ -140,21 +146,25 @@ tracksRegLiveness: true
 body:             |
   ; CHECK-LABEL: name: dont_fold_phi
   ; CHECK: bb.0:
-  ; CHECK:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
-  ; CHECK:   liveins: $w0, $w1, $w2
-  ; CHECK:   %copy1:gpr32all = COPY $w0
-  ; CHECK:   %copy2:gpr32all = COPY $w1
-  ; CHECK:   %cond_wide:gpr32 = COPY $w2
-  ; CHECK:   TBNZW %cond_wide, 0, %bb.1
-  ; CHECK:   B %bb.2
-  ; CHECK: bb.1:
-  ; CHECK:   successors: %bb.2(0x80000000)
-  ; CHECK: bb.2:
-  ; CHECK:   %phi:gpr32 = PHI %copy1, %bb.0, %copy2, %bb.1
-  ; CHECK:   [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, %phi, 0
-  ; CHECK:   [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32
-  ; CHECK:   $x0 = COPY [[SUBREG_TO_REG]]
-  ; CHECK:   RET_ReallyLR implicit $x0
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT:   liveins: $w0, $w1, $w2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   %copy1:gpr32all = COPY $w0
+  ; CHECK-NEXT:   %copy2:gpr32all = COPY $w1
+  ; CHECK-NEXT:   %cond_wide:gpr32 = COPY $w2
+  ; CHECK-NEXT:   TBNZW %cond_wide, 0, %bb.1
+  ; CHECK-NEXT:   B %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   %phi:gpr32 = PHI %copy1, %bb.0, %copy2, %bb.1
+  ; CHECK-NEXT:   [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, %phi, 0
+  ; CHECK-NEXT:   [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32
+  ; CHECK-NEXT:   $x0 = COPY [[SUBREG_TO_REG]]
+  ; CHECK-NEXT:   RET_ReallyLR implicit $x0
   ; We should have a ORRWrs here, because isDef32 disallows phis.
 
   bb.0:
@@ -188,13 +198,14 @@ body:             |
 
     ; CHECK-LABEL: name: dont_look_through_copy
     ; CHECK: liveins: $w0, $w1
-    ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
-    ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
-    ; CHECK: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[COPY1]], [[COPY]]
-    ; CHECK: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, [[ADDWrr]], 0
-    ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32
-    ; CHECK: $x0 = COPY [[SUBREG_TO_REG]]
-    ; CHECK: RET_ReallyLR implicit $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
+    ; CHECK-NEXT: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[COPY1]], [[COPY]]
+    ; CHECK-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, [[ADDWrr]], 0
+    ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32
+    ; CHECK-NEXT: $x0 = COPY [[SUBREG_TO_REG]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0
     %0:gpr(s32) = COPY $w0
     %1:gpr(s32) = COPY $w1
     %2:gpr(s32) = G_ADD %1, %0

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-select.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-select.mir
index 617f209e24b03..e26c143135097 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-select.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-select.mir
@@ -574,7 +574,8 @@ body:             |
     ; CHECK-NEXT: %cond:gpr32common = COPY %reg0.sub_32
     ; CHECK-NEXT: %t:gpr64 = COPY $x2
     ; CHECK-NEXT: %negative_one:gpr32 = MOVi32imm -1
-    ; CHECK-NEXT: %zext:gpr64 = SUBREG_TO_REG 0, %negative_one, %subreg.sub_32
+    ; CHECK-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, %negative_one, 0
+    ; CHECK-NEXT: %zext:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32
     ; CHECK-NEXT: %xor:gpr64 = EORXrr %reg1, %zext
     ; CHECK-NEXT: [[ANDSWri:%[0-9]+]]:gpr32 = ANDSWri %cond, 0, implicit-def $nzcv
     ; CHECK-NEXT: %select:gpr64 = CSELXr %t, %xor, 1, implicit $nzcv

diff  --git a/llvm/test/CodeGen/AArch64/pr58431.ll b/llvm/test/CodeGen/AArch64/pr58431.ll
new file mode 100644
index 0000000000000..dcd97597ae409
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/pr58431.ll
@@ -0,0 +1,19 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -global-isel -global-isel-abort=0 | FileCheck %s
+
+define i32 @f(i64 %0) {
+; CHECK-LABEL: f:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #10
+; CHECK-NEXT:    mov w9, w0
+; CHECK-NEXT:    udiv x10, x9, x8
+; CHECK-NEXT:    msub x0, x10, x8, x9
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT:    ret
+  %2 = trunc i64 %0 to i32
+  %3 = freeze i32 %2
+  %4 = zext i32 %3 to i64
+  %5 = urem i64 %4, 10
+  %6 = trunc i64 %5 to i32
+  ret i32 %6
+}