[llvm] [GISel][CombinerHelper] Combine op(trunc(x), trunc(y)) -> trunc(op(x, y)) (PR #89023)
Dhruv Chawla via llvm-commits
llvm-commits at lists.llvm.org
Wed May 29 21:13:00 PDT 2024
https://github.com/dc03-work updated https://github.com/llvm/llvm-project/pull/89023
>From 1ab27b1b8dbb171531129b137371fd50219d2658 Mon Sep 17 00:00:00 2001
From: Dhruv Chawla <dhruvc at nvidia.com>
Date: Wed, 10 Apr 2024 17:24:30 +0530
Subject: [PATCH 1/3] [GISel] Tests for op(trunc(x), trunc(y)) fold
---
.../AArch64/GlobalISel/combine-op-trunc.mir | 339 ++++++++++++++++++
1 file changed, 339 insertions(+)
create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/combine-op-trunc.mir
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-op-trunc.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-op-trunc.mir
new file mode 100644
index 0000000000000..d23c939cc0d86
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-op-trunc.mir
@@ -0,0 +1,339 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -o - -mtriple=aarch64-unknown-unknown -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s | FileCheck %s
+
+# Truncs with a single use get folded.
+
+# and(trunc(x), trunc(y)) -> trunc(and(x, y))
+---
+name: and_trunc
+body: |
+ bb.0:
+ liveins: $w0, $w1
+ ; CHECK-LABEL: name: and_trunc
+ ; CHECK: liveins: $w0, $w1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[TRUNC1]]
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AND]](s16)
+ ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32)
+ %0:_(s32) = COPY $w0
+ %1:_(s32) = COPY $w1
+ %2:_(s16) = G_TRUNC %0
+ %3:_(s16) = G_TRUNC %1
+ %4:_(s16) = G_AND %2, %3
+ %5:_(s32) = G_ANYEXT %4
+ $w0 = COPY %5
+...
+---
+name: and_trunc_vector
+body: |
+ bb.0:
+ liveins: $q0, $q1
+ ; CHECK-LABEL: name: and_trunc_vector
+ ; CHECK: liveins: $q0, $q1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[COPY]](<4 x s32>)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[COPY1]](<4 x s32>)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[TRUNC]], [[TRUNC1]]
+ ; CHECK-NEXT: $x0 = COPY [[AND]](<4 x s16>)
+ %0:_(<4 x s32>) = COPY $q0
+ %1:_(<4 x s32>) = COPY $q1
+ %2:_(<4 x s16>) = G_TRUNC %0
+ %3:_(<4 x s16>) = G_TRUNC %1
+ %4:_(<4 x s16>) = G_AND %2, %3
+ $x0 = COPY %4
+...
+
+# or(trunc(x), trunc(y)) -> trunc(or(x, y))
+---
+name: or_trunc
+body: |
+ bb.0:
+ liveins: $w0, $w1
+ ; CHECK-LABEL: name: or_trunc
+ ; CHECK: liveins: $w0, $w1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC]], [[TRUNC1]]
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+ ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32)
+ %0:_(s32) = COPY $w0
+ %1:_(s32) = COPY $w1
+ %2:_(s16) = G_TRUNC %0
+ %3:_(s16) = G_TRUNC %1
+ %4:_(s16) = G_OR %2, %3
+ %5:_(s32) = G_ANYEXT %4
+ $w0 = COPY %5
+...
+---
+name: or_trunc_vector
+body: |
+ bb.0:
+ liveins: $q0, $q1
+ ; CHECK-LABEL: name: or_trunc_vector
+ ; CHECK: liveins: $q0, $q1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[COPY]](<4 x s32>)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[COPY1]](<4 x s32>)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[TRUNC]], [[TRUNC1]]
+ ; CHECK-NEXT: $x0 = COPY [[OR]](<4 x s16>)
+ %0:_(<4 x s32>) = COPY $q0
+ %1:_(<4 x s32>) = COPY $q1
+ %2:_(<4 x s16>) = G_TRUNC %0
+ %3:_(<4 x s16>) = G_TRUNC %1
+ %4:_(<4 x s16>) = G_OR %2, %3
+ $x0 = COPY %4
+...
+
+# xor(trunc(x), trunc(y)) -> trunc(xor(x, y))
+---
+name: xor_trunc
+body: |
+ bb.0:
+ liveins: $w0, $w1
+ ; CHECK-LABEL: name: xor_trunc
+ ; CHECK: liveins: $w0, $w1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+ ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC]], [[TRUNC1]]
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s16)
+ ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32)
+ %0:_(s32) = COPY $w0
+ %1:_(s32) = COPY $w1
+ %2:_(s16) = G_TRUNC %0
+ %3:_(s16) = G_TRUNC %1
+ %4:_(s16) = G_XOR %2, %3
+ %5:_(s32) = G_ANYEXT %4
+ $w0 = COPY %5
+...
+---
+name: xor_trunc_vector
+body: |
+ bb.0:
+ liveins: $q0, $q1
+ ; CHECK-LABEL: name: xor_trunc_vector
+ ; CHECK: liveins: $q0, $q1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[COPY]](<4 x s32>)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[COPY1]](<4 x s32>)
+ ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[TRUNC]], [[TRUNC1]]
+ ; CHECK-NEXT: $x0 = COPY [[XOR]](<4 x s16>)
+ %0:_(<4 x s32>) = COPY $q0
+ %1:_(<4 x s32>) = COPY $q1
+ %2:_(<4 x s16>) = G_TRUNC %0
+ %3:_(<4 x s16>) = G_TRUNC %1
+ %4:_(<4 x s16>) = G_XOR %2, %3
+ $x0 = COPY %4
+...
+
+# Truncs with multiple uses do not get folded.
+---
+name: or_trunc_multiuse_1
+body: |
+ bb.0:
+ liveins: $w0, $w1, $x2
+ ; CHECK-LABEL: name: or_trunc_multiuse_1
+ ; CHECK: liveins: $w0, $w1, $x2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY $x2
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+ ; CHECK-NEXT: G_STORE [[TRUNC]](s16), [[COPY2]](p0) :: (store (s16))
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC]], [[TRUNC1]]
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+ ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32)
+ %0:_(s32) = COPY $w0
+ %1:_(s32) = COPY $w1
+ %5:_(p0) = COPY $x2
+ %2:_(s16) = G_TRUNC %0
+ %3:_(s16) = G_TRUNC %1
+ G_STORE %2, %5 :: (store (s16))
+ %4:_(s16) = G_OR %2, %3
+ %6:_(s32) = G_ANYEXT %4
+ $w0 = COPY %6
+...
+---
+name: and_trunc_multiuse_2
+body: |
+ bb.0:
+ liveins: $w0, $w1, $x2
+ ; CHECK-LABEL: name: and_trunc_multiuse_2
+ ; CHECK: liveins: $w0, $w1, $x2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY $x2
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+ ; CHECK-NEXT: G_STORE [[TRUNC]](s16), [[COPY2]](p0) :: (store (s16))
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[TRUNC1]]
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AND]](s16)
+ ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32)
+ %0:_(s32) = COPY $w0
+ %1:_(s32) = COPY $w1
+ %5:_(p0) = COPY $x2
+ %2:_(s16) = G_TRUNC %0
+ %3:_(s16) = G_TRUNC %1
+ G_STORE %2, %5 :: (store (s16))
+ %4:_(s16) = G_AND %2, %3
+ %6:_(s32) = G_ANYEXT %4
+ $w0 = COPY %6
+...
+---
+name: xor_trunc_vector_multiuse
+body: |
+ bb.0:
+ liveins: $w0, $w1, $x2
+ ; CHECK-LABEL: name: xor_trunc_vector_multiuse
+ ; CHECK: liveins: $w0, $w1, $x2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY $x2
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[COPY]](<4 x s32>)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[COPY1]](<4 x s32>)
+ ; CHECK-NEXT: G_STORE [[TRUNC]](<4 x s16>), [[COPY2]](p0) :: (store (<4 x s16>))
+ ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[TRUNC]], [[TRUNC1]]
+ ; CHECK-NEXT: $x0 = COPY [[XOR]](<4 x s16>)
+ %0:_(<4 x s32>) = COPY $q0
+ %1:_(<4 x s32>) = COPY $q1
+ %5:_(p0) = COPY $x2
+ %2:_(<4 x s16>) = G_TRUNC %0
+ %3:_(<4 x s16>) = G_TRUNC %1
+ G_STORE %2, %5 :: (store (<4 x s16>))
+ %4:_(<4 x s16>) = G_XOR %2, %3
+ $x0 = COPY %4
+...
+
+# Freezes should get pushed through truncs.
+
+# This optimizes the pattern where `select(cond, T, 0)` gets converted to
+# `and(cond, freeze(T))`.
+
+# and(freeze(trunc(x)), trunc(y)) -> trunc(and(freeze(x), y))
+---
+name: and_trunc_freeze
+body: |
+ bb.0:
+ liveins: $w0, $w1
+ ; CHECK-LABEL: name: and_trunc_freeze
+ ; CHECK: liveins: $w0, $w1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+ ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s16) = G_FREEZE [[TRUNC]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[FREEZE]], [[TRUNC1]]
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AND]](s16)
+ ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32)
+ %0:_(s32) = COPY $w0
+ %1:_(s32) = COPY $w1
+ %2:_(s16) = G_TRUNC %0
+ %3:_(s16) = G_TRUNC %1
+ %6:_(s16) = G_FREEZE %2
+ %4:_(s16) = G_AND %6, %3
+ %5:_(s32) = G_ANYEXT %4
+ $w0 = COPY %5
+...
+
+# and(freeze(trunc(x)), freeze(trunc(y))) -> trunc(and(freeze(x), freeze(y)))
+---
+name: and_trunc_freeze_both
+body: |
+ bb.0:
+ liveins: $w0, $w1
+ ; CHECK-LABEL: name: and_trunc_freeze_both
+ ; CHECK: liveins: $w0, $w1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+ ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s16) = G_FREEZE [[TRUNC]]
+ ; CHECK-NEXT: [[FREEZE1:%[0-9]+]]:_(s16) = G_FREEZE [[TRUNC1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[FREEZE]], [[FREEZE1]]
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AND]](s16)
+ ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32)
+ %0:_(s32) = COPY $w0
+ %1:_(s32) = COPY $w1
+ %2:_(s16) = G_TRUNC %0
+ %3:_(s16) = G_TRUNC %1
+ %6:_(s16) = G_FREEZE %2
+ %7:_(s16) = G_FREEZE %3
+ %4:_(s16) = G_AND %6, %7
+ %5:_(s32) = G_ANYEXT %4
+ $w0 = COPY %5
+...
+
+# The freeze fold is less important for G_OR and G_XOR, however it can still
+# trigger.
+---
+name: or_trunc_freeze
+body: |
+ bb.0:
+ liveins: $w0, $w1
+ ; CHECK-LABEL: name: or_trunc_freeze
+ ; CHECK: liveins: $w0, $w1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+ ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s16) = G_FREEZE [[TRUNC]]
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[FREEZE]], [[TRUNC1]]
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+ ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32)
+ %0:_(s32) = COPY $w0
+ %1:_(s32) = COPY $w1
+ %2:_(s16) = G_TRUNC %0
+ %3:_(s16) = G_TRUNC %1
+ %6:_(s16) = G_FREEZE %2
+ %4:_(s16) = G_OR %6, %3
+ %5:_(s32) = G_ANYEXT %4
+ $w0 = COPY %5
+...
+---
+name: xor_trunc_freeze_both
+body: |
+ bb.0:
+ liveins: $w0, $w1
+ ; CHECK-LABEL: name: xor_trunc_freeze_both
+ ; CHECK: liveins: $w0, $w1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+ ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s16) = G_FREEZE [[TRUNC]]
+ ; CHECK-NEXT: [[FREEZE1:%[0-9]+]]:_(s16) = G_FREEZE [[TRUNC1]]
+ ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[FREEZE]], [[FREEZE1]]
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s16)
+ ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32)
+ %0:_(s32) = COPY $w0
+ %1:_(s32) = COPY $w1
+ %2:_(s16) = G_TRUNC %0
+ %3:_(s16) = G_TRUNC %1
+ %6:_(s16) = G_FREEZE %2
+ %7:_(s16) = G_FREEZE %3
+ %4:_(s16) = G_XOR %6, %7
+ %5:_(s32) = G_ANYEXT %4
+ $w0 = COPY %5
+...
>From efc1440db1f598a1d58327b132c4806baf1ba429 Mon Sep 17 00:00:00 2001
From: Dhruv Chawla <dhruvc at nvidia.com>
Date: Wed, 10 Apr 2024 14:35:11 +0530
Subject: [PATCH 2/3] [GISel][CombinerHelper] Combine op(trunc(x), trunc(y)) ->
trunc(op(x, y))
---
.../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 4 +-
.../AArch64/GlobalISel/combine-op-trunc.mir | 82 +++-----
.../AArch64/GlobalISel/combine-select.mir | 30 ++-
...izer-combiner-narrow-binop-feeding-add.mir | 14 +-
.../prelegalizercombiner-hoist-same-hands.mir | 7 +-
llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll | 191 +++++++++--------
llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll | 195 +++++++++---------
.../test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll | 10 +-
.../test/CodeGen/AMDGPU/GlobalISel/usubsat.ll | 10 +-
llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll | 20 +-
10 files changed, 257 insertions(+), 306 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index dcc1335a4bd44..b7f1589a67bed 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -3153,8 +3153,10 @@ bool CombinerHelper::matchHoistLogicOpWithSameOpcodeHands(
return false;
case TargetOpcode::G_ANYEXT:
case TargetOpcode::G_SEXT:
- case TargetOpcode::G_ZEXT: {
+ case TargetOpcode::G_ZEXT:
+ case TargetOpcode::G_TRUNC: {
// Match: logic (ext X), (ext Y) --> ext (logic X, Y)
+ // Match: logic (trunc X), (trunc Y) -> trunc (logic X, Y)
break;
}
case TargetOpcode::G_AND:
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-op-trunc.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-op-trunc.mir
index d23c939cc0d86..6a16b5b8b9e77 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-op-trunc.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-op-trunc.mir
@@ -14,11 +14,8 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
- ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
- ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
- ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[TRUNC1]]
- ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AND]](s16)
- ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: $w0 = COPY [[AND]](s32)
%0:_(s32) = COPY $w0
%1:_(s32) = COPY $w1
%2:_(s16) = G_TRUNC %0
@@ -37,10 +34,9 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
- ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[COPY]](<4 x s32>)
- ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[COPY1]](<4 x s32>)
- ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[TRUNC]], [[TRUNC1]]
- ; CHECK-NEXT: $x0 = COPY [[AND]](<4 x s16>)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s32>) = G_AND [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[AND]](<4 x s32>)
+ ; CHECK-NEXT: $x0 = COPY [[TRUNC]](<4 x s16>)
%0:_(<4 x s32>) = COPY $q0
%1:_(<4 x s32>) = COPY $q1
%2:_(<4 x s16>) = G_TRUNC %0
@@ -60,11 +56,8 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
- ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
- ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC]], [[TRUNC1]]
- ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
- ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: $w0 = COPY [[OR]](s32)
%0:_(s32) = COPY $w0
%1:_(s32) = COPY $w1
%2:_(s16) = G_TRUNC %0
@@ -83,10 +76,9 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
- ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[COPY]](<4 x s32>)
- ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[COPY1]](<4 x s32>)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[TRUNC]], [[TRUNC1]]
- ; CHECK-NEXT: $x0 = COPY [[OR]](<4 x s16>)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s32>) = G_OR [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[OR]](<4 x s32>)
+ ; CHECK-NEXT: $x0 = COPY [[TRUNC]](<4 x s16>)
%0:_(<4 x s32>) = COPY $q0
%1:_(<4 x s32>) = COPY $q1
%2:_(<4 x s16>) = G_TRUNC %0
@@ -106,11 +98,8 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
- ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
- ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
- ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC]], [[TRUNC1]]
- ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s16)
- ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32)
+ ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: $w0 = COPY [[XOR]](s32)
%0:_(s32) = COPY $w0
%1:_(s32) = COPY $w1
%2:_(s16) = G_TRUNC %0
@@ -129,10 +118,9 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
- ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[COPY]](<4 x s32>)
- ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[COPY1]](<4 x s32>)
- ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[TRUNC]], [[TRUNC1]]
- ; CHECK-NEXT: $x0 = COPY [[XOR]](<4 x s16>)
+ ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s32>) = G_XOR [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[XOR]](<4 x s32>)
+ ; CHECK-NEXT: $x0 = COPY [[TRUNC]](<4 x s16>)
%0:_(<4 x s32>) = COPY $q0
%1:_(<4 x s32>) = COPY $q1
%2:_(<4 x s16>) = G_TRUNC %0
@@ -238,12 +226,9 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
- ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
- ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
- ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s16) = G_FREEZE [[TRUNC]]
- ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[FREEZE]], [[TRUNC1]]
- ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AND]](s16)
- ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32)
+ ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s32) = G_FREEZE [[COPY]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[FREEZE]], [[COPY1]]
+ ; CHECK-NEXT: $w0 = COPY [[AND]](s32)
%0:_(s32) = COPY $w0
%1:_(s32) = COPY $w1
%2:_(s16) = G_TRUNC %0
@@ -265,13 +250,10 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
- ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
- ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
- ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s16) = G_FREEZE [[TRUNC]]
- ; CHECK-NEXT: [[FREEZE1:%[0-9]+]]:_(s16) = G_FREEZE [[TRUNC1]]
- ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[FREEZE]], [[FREEZE1]]
- ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AND]](s16)
- ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32)
+ ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s32) = G_FREEZE [[COPY]]
+ ; CHECK-NEXT: [[FREEZE1:%[0-9]+]]:_(s32) = G_FREEZE [[COPY1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[FREEZE]], [[FREEZE1]]
+ ; CHECK-NEXT: $w0 = COPY [[AND]](s32)
%0:_(s32) = COPY $w0
%1:_(s32) = COPY $w1
%2:_(s16) = G_TRUNC %0
@@ -295,12 +277,9 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
- ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
- ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
- ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s16) = G_FREEZE [[TRUNC]]
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[FREEZE]], [[TRUNC1]]
- ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
- ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32)
+ ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s32) = G_FREEZE [[COPY]]
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[FREEZE]], [[COPY1]]
+ ; CHECK-NEXT: $w0 = COPY [[OR]](s32)
%0:_(s32) = COPY $w0
%1:_(s32) = COPY $w1
%2:_(s16) = G_TRUNC %0
@@ -320,13 +299,10 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
- ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
- ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
- ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s16) = G_FREEZE [[TRUNC]]
- ; CHECK-NEXT: [[FREEZE1:%[0-9]+]]:_(s16) = G_FREEZE [[TRUNC1]]
- ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[FREEZE]], [[FREEZE1]]
- ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s16)
- ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32)
+ ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s32) = G_FREEZE [[COPY]]
+ ; CHECK-NEXT: [[FREEZE1:%[0-9]+]]:_(s32) = G_FREEZE [[COPY1]]
+ ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[FREEZE]], [[FREEZE1]]
+ ; CHECK-NEXT: $w0 = COPY [[XOR]](s32)
%0:_(s32) = COPY $w0
%1:_(s32) = COPY $w1
%2:_(s16) = G_TRUNC %0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir
index 074d4ecbd8785..86fa12aa064ac 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir
@@ -1,7 +1,8 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown- --aarch64postlegalizercombiner-only-enable-rule="select_to_logical" %s -o - | FileCheck %s
+# RUN: llc -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s
# RUN: llc -debugify-and-strip-all-safe -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s
# REQUIRES: asserts
+
---
# select (c, x, x) -> x
name: test_combine_select_same_res
@@ -116,10 +117,9 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x2
- ; CHECK-NEXT: %c:_(s1) = G_TRUNC [[COPY]](s64)
; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[COPY1]]
- ; CHECK-NEXT: %f:_(s1) = G_TRUNC [[FREEZE]](s64)
- ; CHECK-NEXT: %sel:_(s1) = G_OR %c, %f
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[COPY]], [[FREEZE]]
+ ; CHECK-NEXT: %sel:_(s1) = G_TRUNC [[OR]](s64)
; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %sel(s1)
; CHECK-NEXT: $w0 = COPY %ext(s32)
%0:_(s64) = COPY $x0
@@ -143,10 +143,9 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x2
- ; CHECK-NEXT: %c:_(s1) = G_TRUNC [[COPY]](s64)
; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[COPY1]]
- ; CHECK-NEXT: %f:_(s1) = G_TRUNC [[FREEZE]](s64)
- ; CHECK-NEXT: %sel:_(s1) = G_OR %c, %f
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[COPY]], [[FREEZE]]
+ ; CHECK-NEXT: %sel:_(s1) = G_TRUNC [[OR]](s64)
; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %sel(s1)
; CHECK-NEXT: $w0 = COPY %ext(s32)
%0:_(s64) = COPY $x0
@@ -171,12 +170,9 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d2
- ; CHECK-NEXT: %c:_(<2 x s1>) = G_TRUNC [[COPY]](<2 x s32>)
; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<2 x s32>) = G_FREEZE [[COPY1]]
- ; CHECK-NEXT: %f:_(<2 x s1>) = G_TRUNC [[FREEZE]](<2 x s32>)
- ; CHECK-NEXT: %sel:_(<2 x s1>) = G_OR %c, %f
- ; CHECK-NEXT: %ext:_(<2 x s32>) = G_ANYEXT %sel(<2 x s1>)
- ; CHECK-NEXT: $d0 = COPY %ext(<2 x s32>)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[COPY]], [[FREEZE]]
+ ; CHECK-NEXT: $d0 = COPY [[OR]](<2 x s32>)
%0:_(<2 x s32>) = COPY $d0
%1:_(<2 x s32>) = COPY $d1
%2:_(<2 x s32>) = COPY $d2
@@ -200,10 +196,9 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
- ; CHECK-NEXT: %c:_(s1) = G_TRUNC [[COPY]](s64)
; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[COPY1]]
- ; CHECK-NEXT: %t:_(s1) = G_TRUNC [[FREEZE]](s64)
- ; CHECK-NEXT: %sel:_(s1) = G_AND %c, %t
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[FREEZE]]
+ ; CHECK-NEXT: %sel:_(s1) = G_TRUNC [[AND]](s64)
; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %sel(s1)
; CHECK-NEXT: $w0 = COPY %ext(s32)
%0:_(s64) = COPY $x0
@@ -228,10 +223,9 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
- ; CHECK-NEXT: %c:_(s1) = G_TRUNC [[COPY]](s64)
; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[COPY1]]
- ; CHECK-NEXT: %t:_(s1) = G_TRUNC [[FREEZE]](s64)
- ; CHECK-NEXT: %sel:_(s1) = G_AND %c, %t
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[FREEZE]]
+ ; CHECK-NEXT: %sel:_(s1) = G_TRUNC [[AND]](s64)
; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %sel(s1)
; CHECK-NEXT: $w0 = COPY %ext(s32)
%0:_(s64) = COPY $x0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-narrow-binop-feeding-add.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-narrow-binop-feeding-add.mir
index fb19cda303d36..9699d0cf7892c 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-narrow-binop-feeding-add.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-narrow-binop-feeding-add.mir
@@ -84,10 +84,9 @@ body: |
; CHECK: liveins: $x0, $x1
; CHECK: %binop_lhs:_(s64) = COPY $x0
; CHECK: %binop_rhs:_(s64) = COPY $x1
- ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %binop_lhs(s64)
- ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC %binop_rhs(s64)
- ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[TRUNC1]]
- ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[AND]](s32)
+ ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND %binop_lhs, %binop_rhs
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64)
+ ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s32)
; CHECK: $x0 = COPY [[ZEXT]](s64)
; CHECK: RET_ReallyLR implicit $x0
%binop_lhs:_(s64) = COPY $x0
@@ -131,10 +130,9 @@ body: |
; CHECK: liveins: $x0, $x1
; CHECK: %binop_lhs:_(s64) = COPY $x0
; CHECK: %binop_rhs:_(s64) = COPY $x1
- ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %binop_lhs(s64)
- ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC %binop_rhs(s64)
- ; CHECK: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[TRUNC]], [[TRUNC1]]
- ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[XOR]](s32)
+ ; CHECK: [[XOR:%[0-9]+]]:_(s64) = G_XOR %binop_lhs, %binop_rhs
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[XOR]](s64)
+ ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s32)
; CHECK: $x0 = COPY [[ZEXT]](s64)
; CHECK: RET_ReallyLR implicit $x0
%binop_lhs:_(s64) = COPY $x0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-hoist-same-hands.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-hoist-same-hands.mir
index 48fc042d7c737..7f2ae6ee24807 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-hoist-same-hands.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-hoist-same-hands.mir
@@ -268,10 +268,9 @@ body: |
; CHECK: liveins: $w0, $w1
; CHECK: %x_wide:_(s32) = COPY $w0
; CHECK: %y_wide:_(s32) = COPY $w1
- ; CHECK: %x:_(s1) = G_TRUNC %x_wide(s32)
- ; CHECK: %y:_(s1) = G_TRUNC %y_wide(s32)
- ; CHECK: [[OR:%[0-9]+]]:_(s1) = G_OR %x, %y
- ; CHECK: %logic_op:_(s64) = G_SEXT [[OR]](s1)
+ ; CHECK: %8:_(s32) = G_OR %x_wide, %y_wide
+ ; CHECK: %7:_(s1) = G_TRUNC %8(s32)
+ ; CHECK: %logic_op:_(s64) = G_SEXT %7(s1)
; CHECK: $x0 = COPY %logic_op(s64)
; CHECK: RET_ReallyLR implicit $x0
%x_wide:_(s32) = COPY $w0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll
index f9b98059be0b3..06930388901b0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll
@@ -1804,113 +1804,110 @@ define i24 @v_fshl_i24(i24 %lhs, i24 %rhs, i24 %amt) {
define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 inreg %amt.arg) {
; GFX6-LABEL: s_fshl_v2i24:
; GFX6: ; %bb.0:
+; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v2, 24
+; GFX6-NEXT: v_rcp_iflag_f32_e32 v2, v2
; GFX6-NEXT: s_lshr_b32 s6, s0, 16
-; GFX6-NEXT: s_lshr_b32 s7, s0, 24
-; GFX6-NEXT: s_and_b32 s9, s0, 0xff
-; GFX6-NEXT: s_bfe_u32 s0, s0, 0x80008
-; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v0, 24
-; GFX6-NEXT: s_lshl_b32 s0, s0, 8
+; GFX6-NEXT: s_lshr_b32 s7, s1, 8
+; GFX6-NEXT: s_bfe_u32 s9, s0, 0x80008
+; GFX6-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
+; GFX6-NEXT: v_cvt_u32_f32_e32 v2, v2
+; GFX6-NEXT: s_and_b32 s8, s0, 0xff
+; GFX6-NEXT: s_lshl_b32 s9, s9, 8
; GFX6-NEXT: s_and_b32 s6, s6, 0xff
-; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0
-; GFX6-NEXT: s_or_b32 s0, s9, s0
+; GFX6-NEXT: s_and_b32 s1, s1, 0xff
+; GFX6-NEXT: v_mov_b32_e32 v0, s0
+; GFX6-NEXT: s_and_b32 s0, s7, 0xff
+; GFX6-NEXT: s_or_b32 s8, s8, s9
; GFX6-NEXT: s_and_b32 s6, 0xffff, s6
-; GFX6-NEXT: s_lshr_b32 s8, s1, 8
+; GFX6-NEXT: v_alignbit_b32 v0, s1, v0, 24
; GFX6-NEXT: s_and_b32 s0, 0xffff, s0
+; GFX6-NEXT: s_and_b32 s8, 0xffff, s8
; GFX6-NEXT: s_lshl_b32 s6, s6, 16
+; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX6-NEXT: s_lshl_b32 s0, s0, 16
+; GFX6-NEXT: v_mov_b32_e32 v3, 0xffffffe8
+; GFX6-NEXT: s_or_b32 s6, s8, s6
+; GFX6-NEXT: v_or_b32_e32 v0, s0, v0
+; GFX6-NEXT: s_lshr_b32 s0, s2, 16
+; GFX6-NEXT: s_lshr_b32 s1, s3, 8
+; GFX6-NEXT: s_bfe_u32 s8, s2, 0x80008
+; GFX6-NEXT: v_mul_lo_u32 v3, v2, v3
+; GFX6-NEXT: s_and_b32 s7, s2, 0xff
+; GFX6-NEXT: s_lshl_b32 s8, s8, 8
+; GFX6-NEXT: s_and_b32 s0, s0, 0xff
+; GFX6-NEXT: s_and_b32 s3, s3, 0xff
+; GFX6-NEXT: v_mov_b32_e32 v1, s2
; GFX6-NEXT: s_and_b32 s1, s1, 0xff
-; GFX6-NEXT: s_or_b32 s0, s0, s6
-; GFX6-NEXT: s_lshl_b32 s1, s1, 8
-; GFX6-NEXT: s_and_b32 s6, s8, 0xff
-; GFX6-NEXT: s_or_b32 s1, s7, s1
-; GFX6-NEXT: s_and_b32 s6, 0xffff, s6
-; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
+; GFX6-NEXT: s_or_b32 s7, s7, s8
+; GFX6-NEXT: s_and_b32 s0, 0xffff, s0
+; GFX6-NEXT: v_alignbit_b32 v1, s3, v1, 24
+; GFX6-NEXT: s_and_b32 s1, 0xffff, s1
+; GFX6-NEXT: s_and_b32 s7, 0xffff, s7
+; GFX6-NEXT: s_lshl_b32 s0, s0, 16
+; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX6-NEXT: s_lshl_b32 s1, s1, 16
+; GFX6-NEXT: s_or_b32 s0, s7, s0
+; GFX6-NEXT: v_or_b32_e32 v1, s1, v1
+; GFX6-NEXT: s_lshr_b32 s1, s4, 16
+; GFX6-NEXT: s_bfe_u32 s7, s4, 0x80008
+; GFX6-NEXT: v_mul_hi_u32 v3, v2, v3
+; GFX6-NEXT: s_and_b32 s3, s4, 0xff
+; GFX6-NEXT: s_lshl_b32 s7, s7, 8
+; GFX6-NEXT: s_and_b32 s1, s1, 0xff
+; GFX6-NEXT: s_or_b32 s3, s3, s7
; GFX6-NEXT: s_and_b32 s1, 0xffff, s1
-; GFX6-NEXT: s_lshl_b32 s6, s6, 16
-; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0
-; GFX6-NEXT: s_or_b32 s1, s1, s6
-; GFX6-NEXT: s_lshr_b32 s6, s2, 16
-; GFX6-NEXT: s_lshr_b32 s7, s2, 24
-; GFX6-NEXT: s_and_b32 s9, s2, 0xff
-; GFX6-NEXT: s_bfe_u32 s2, s2, 0x80008
-; GFX6-NEXT: s_lshl_b32 s2, s2, 8
-; GFX6-NEXT: s_and_b32 s6, s6, 0xff
-; GFX6-NEXT: s_or_b32 s2, s9, s2
-; GFX6-NEXT: s_and_b32 s6, 0xffff, s6
-; GFX6-NEXT: v_mov_b32_e32 v1, 0xffffffe8
-; GFX6-NEXT: s_lshr_b32 s8, s3, 8
-; GFX6-NEXT: s_and_b32 s2, 0xffff, s2
-; GFX6-NEXT: s_lshl_b32 s6, s6, 16
-; GFX6-NEXT: s_and_b32 s3, s3, 0xff
-; GFX6-NEXT: v_mul_lo_u32 v1, v0, v1
-; GFX6-NEXT: s_or_b32 s2, s2, s6
-; GFX6-NEXT: s_lshl_b32 s3, s3, 8
-; GFX6-NEXT: s_and_b32 s6, s8, 0xff
-; GFX6-NEXT: s_or_b32 s3, s7, s3
-; GFX6-NEXT: s_and_b32 s6, 0xffff, s6
; GFX6-NEXT: s_and_b32 s3, 0xffff, s3
-; GFX6-NEXT: s_lshl_b32 s6, s6, 16
-; GFX6-NEXT: s_or_b32 s3, s3, s6
-; GFX6-NEXT: s_lshr_b32 s6, s4, 16
-; GFX6-NEXT: s_lshr_b32 s7, s4, 24
-; GFX6-NEXT: s_and_b32 s9, s4, 0xff
-; GFX6-NEXT: s_bfe_u32 s4, s4, 0x80008
-; GFX6-NEXT: v_mul_hi_u32 v1, v0, v1
-; GFX6-NEXT: s_lshl_b32 s4, s4, 8
-; GFX6-NEXT: s_and_b32 s6, s6, 0xff
-; GFX6-NEXT: s_or_b32 s4, s9, s4
-; GFX6-NEXT: s_and_b32 s6, 0xffff, s6
-; GFX6-NEXT: s_and_b32 s4, 0xffff, s4
-; GFX6-NEXT: s_lshl_b32 s6, s6, 16
-; GFX6-NEXT: s_or_b32 s4, s4, s6
-; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1
-; GFX6-NEXT: v_mul_hi_u32 v1, s4, v0
-; GFX6-NEXT: s_lshr_b32 s8, s5, 8
-; GFX6-NEXT: s_and_b32 s5, s5, 0xff
-; GFX6-NEXT: s_lshl_b32 s5, s5, 8
-; GFX6-NEXT: v_mul_lo_u32 v1, v1, 24
-; GFX6-NEXT: s_and_b32 s6, s8, 0xff
-; GFX6-NEXT: s_or_b32 s5, s7, s5
-; GFX6-NEXT: s_and_b32 s6, 0xffff, s6
-; GFX6-NEXT: s_and_b32 s5, 0xffff, s5
-; GFX6-NEXT: s_lshl_b32 s6, s6, 16
-; GFX6-NEXT: s_or_b32 s5, s5, s6
-; GFX6-NEXT: v_sub_i32_e32 v1, vcc, s4, v1
-; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v1
-; GFX6-NEXT: v_mul_hi_u32 v0, s5, v0
-; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v1
-; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
-; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v1
-; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v1
-; GFX6-NEXT: v_mul_lo_u32 v0, v0, 24
-; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
-; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 23, v1
-; GFX6-NEXT: v_and_b32_e32 v1, 0xffffff, v1
-; GFX6-NEXT: v_lshl_b32_e32 v1, s0, v1
-; GFX6-NEXT: s_lshr_b32 s0, s2, 1
+; GFX6-NEXT: s_lshl_b32 s1, s1, 16
+; GFX6-NEXT: s_or_b32 s1, s3, s1
+; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v3
+; GFX6-NEXT: v_mul_hi_u32 v3, s1, v2
+; GFX6-NEXT: s_lshr_b32 s2, s5, 8
+; GFX6-NEXT: s_and_b32 s3, s5, 0xff
+; GFX6-NEXT: v_mov_b32_e32 v4, s4
+; GFX6-NEXT: s_and_b32 s2, s2, 0xff
+; GFX6-NEXT: v_alignbit_b32 v4, s3, v4, 24
+; GFX6-NEXT: s_and_b32 s2, 0xffff, s2
+; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4
+; GFX6-NEXT: v_mul_lo_u32 v3, v3, 24
+; GFX6-NEXT: s_lshl_b32 s2, s2, 16
+; GFX6-NEXT: v_or_b32_e32 v4, s2, v4
+; GFX6-NEXT: v_mul_hi_u32 v2, v4, v2
+; GFX6-NEXT: v_sub_i32_e32 v3, vcc, s1, v3
+; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, 24, v3
+; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v3
+; GFX6-NEXT: v_mul_lo_u32 v2, v2, 24
+; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
+; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, 24, v3
+; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v3
+; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
+; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v4, v2
+; GFX6-NEXT: v_sub_i32_e32 v5, vcc, 23, v3
+; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 24, v2
+; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
+; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
+; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 24, v2
+; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
+; GFX6-NEXT: v_and_b32_e32 v3, 0xffffff, v3
+; GFX6-NEXT: s_lshr_b32 s0, s0, 1
+; GFX6-NEXT: v_and_b32_e32 v5, 0xffffff, v5
+; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
+; GFX6-NEXT: v_lshl_b32_e32 v3, s6, v3
+; GFX6-NEXT: v_lshr_b32_e32 v5, s0, v5
+; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 23, v2
; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v2
-; GFX6-NEXT: v_lshr_b32_e32 v2, s0, v2
-; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s5, v0
+; GFX6-NEXT: v_or_b32_e32 v3, v3, v5
+; GFX6-NEXT: v_lshlrev_b32_e32 v0, v2, v0
+; GFX6-NEXT: v_lshrrev_b32_e32 v1, 1, v1
+; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v4
+; GFX6-NEXT: v_lshrrev_b32_e32 v1, v2, v1
+; GFX6-NEXT: v_bfe_u32 v2, v3, 8, 8
+; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX6-NEXT: v_and_b32_e32 v1, 0xff, v3
+; GFX6-NEXT: v_lshlrev_b32_e32 v2, 8, v2
+; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
+; GFX6-NEXT: v_bfe_u32 v2, v3, 16, 8
+; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2
; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
-; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v0
-; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
-; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v0
-; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
-; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 23, v0
-; GFX6-NEXT: v_and_b32_e32 v0, 0xffffff, v0
-; GFX6-NEXT: s_lshr_b32 s0, s3, 1
-; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v2
-; GFX6-NEXT: v_lshl_b32_e32 v0, s1, v0
-; GFX6-NEXT: v_lshr_b32_e32 v2, s0, v2
-; GFX6-NEXT: v_bfe_u32 v3, v1, 8, 8
-; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
-; GFX6-NEXT: v_and_b32_e32 v2, 0xff, v1
-; GFX6-NEXT: v_lshlrev_b32_e32 v3, 8, v3
-; GFX6-NEXT: v_bfe_u32 v1, v1, 16, 8
-; GFX6-NEXT: v_or_b32_e32 v2, v2, v3
-; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; GFX6-NEXT: v_or_b32_e32 v1, v2, v1
; GFX6-NEXT: v_and_b32_e32 v2, 0xff, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 24, v2
; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll
index c8455665e7b40..ff93cddafc872 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll
@@ -1815,113 +1815,110 @@ define i24 @v_fshr_i24(i24 %lhs, i24 %rhs, i24 %amt) {
define amdgpu_ps i48 @s_fshr_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 inreg %amt.arg) {
; GFX6-LABEL: s_fshr_v2i24:
; GFX6: ; %bb.0:
-; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v0, 24
-; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0
+; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v2, 24
+; GFX6-NEXT: v_rcp_iflag_f32_e32 v2, v2
+; GFX6-NEXT: s_lshr_b32 s7, s1, 8
+; GFX6-NEXT: s_bfe_u32 s9, s0, 0x80008
+; GFX6-NEXT: s_and_b32 s1, s1, 0xff
+; GFX6-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
+; GFX6-NEXT: v_cvt_u32_f32_e32 v2, v2
+; GFX6-NEXT: v_mov_b32_e32 v0, s0
+; GFX6-NEXT: v_mov_b32_e32 v3, 0xffffffe8
; GFX6-NEXT: s_lshr_b32 s6, s0, 16
-; GFX6-NEXT: s_lshr_b32 s7, s0, 24
-; GFX6-NEXT: s_lshr_b32 s8, s1, 8
-; GFX6-NEXT: s_and_b32 s9, s0, 0xff
-; GFX6-NEXT: s_bfe_u32 s0, s0, 0x80008
+; GFX6-NEXT: s_and_b32 s8, s0, 0xff
+; GFX6-NEXT: s_lshl_b32 s9, s9, 8
+; GFX6-NEXT: v_alignbit_b32 v0, s1, v0, 24
+; GFX6-NEXT: s_and_b32 s0, s7, 0xff
+; GFX6-NEXT: s_lshr_b32 s1, s2, 16
+; GFX6-NEXT: s_lshr_b32 s7, s3, 8
+; GFX6-NEXT: s_bfe_u32 s10, s2, 0x80008
+; GFX6-NEXT: v_mul_lo_u32 v3, v2, v3
+; GFX6-NEXT: s_or_b32 s8, s8, s9
+; GFX6-NEXT: s_and_b32 s9, s2, 0xff
+; GFX6-NEXT: s_lshl_b32 s10, s10, 8
; GFX6-NEXT: s_and_b32 s1, s1, 0xff
-; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
-; GFX6-NEXT: s_lshl_b32 s0, s0, 8
-; GFX6-NEXT: s_lshl_b32 s1, s1, 8
-; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0
-; GFX6-NEXT: s_or_b32 s0, s9, s0
-; GFX6-NEXT: s_or_b32 s1, s7, s1
-; GFX6-NEXT: s_and_b32 s7, s8, 0xff
-; GFX6-NEXT: s_lshr_b32 s8, s2, 16
-; GFX6-NEXT: s_lshr_b32 s9, s2, 24
-; GFX6-NEXT: s_and_b32 s11, s2, 0xff
-; GFX6-NEXT: s_bfe_u32 s2, s2, 0x80008
-; GFX6-NEXT: s_lshl_b32 s2, s2, 8
-; GFX6-NEXT: s_and_b32 s8, s8, 0xff
-; GFX6-NEXT: s_or_b32 s2, s11, s2
-; GFX6-NEXT: s_and_b32 s8, 0xffff, s8
-; GFX6-NEXT: v_mov_b32_e32 v1, 0xffffffe8
-; GFX6-NEXT: s_lshr_b32 s10, s3, 8
+; GFX6-NEXT: s_and_b32 s3, s3, 0xff
+; GFX6-NEXT: v_mov_b32_e32 v1, s2
+; GFX6-NEXT: s_and_b32 s2, s7, 0xff
+; GFX6-NEXT: s_or_b32 s9, s9, s10
+; GFX6-NEXT: s_and_b32 s1, 0xffff, s1
+; GFX6-NEXT: v_alignbit_b32 v1, s3, v1, 24
+; GFX6-NEXT: s_and_b32 s2, 0xffff, s2
+; GFX6-NEXT: s_and_b32 s9, 0xffff, s9
+; GFX6-NEXT: s_lshl_b32 s1, s1, 16
+; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX6-NEXT: s_lshl_b32 s2, s2, 16
+; GFX6-NEXT: s_or_b32 s1, s9, s1
+; GFX6-NEXT: v_or_b32_e32 v1, s2, v1
+; GFX6-NEXT: s_lshr_b32 s2, s4, 16
+; GFX6-NEXT: s_bfe_u32 s9, s4, 0x80008
+; GFX6-NEXT: v_mul_hi_u32 v3, v2, v3
+; GFX6-NEXT: s_and_b32 s7, s4, 0xff
+; GFX6-NEXT: s_lshl_b32 s9, s9, 8
+; GFX6-NEXT: s_and_b32 s2, s2, 0xff
+; GFX6-NEXT: s_or_b32 s7, s7, s9
; GFX6-NEXT: s_and_b32 s2, 0xffff, s2
-; GFX6-NEXT: s_lshl_b32 s8, s8, 16
+; GFX6-NEXT: s_and_b32 s7, 0xffff, s7
+; GFX6-NEXT: s_lshl_b32 s2, s2, 16
+; GFX6-NEXT: s_or_b32 s2, s7, s2
+; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v3
+; GFX6-NEXT: v_mul_hi_u32 v3, s2, v2
+; GFX6-NEXT: s_lshr_b32 s3, s5, 8
+; GFX6-NEXT: s_and_b32 s5, s5, 0xff
+; GFX6-NEXT: v_mov_b32_e32 v4, s4
; GFX6-NEXT: s_and_b32 s3, s3, 0xff
-; GFX6-NEXT: v_mul_lo_u32 v1, v0, v1
-; GFX6-NEXT: s_or_b32 s2, s2, s8
-; GFX6-NEXT: s_lshl_b32 s3, s3, 8
-; GFX6-NEXT: s_and_b32 s8, s10, 0xff
-; GFX6-NEXT: s_or_b32 s3, s9, s3
-; GFX6-NEXT: s_and_b32 s8, 0xffff, s8
+; GFX6-NEXT: v_alignbit_b32 v4, s5, v4, 24
; GFX6-NEXT: s_and_b32 s3, 0xffff, s3
-; GFX6-NEXT: s_lshl_b32 s8, s8, 16
-; GFX6-NEXT: s_or_b32 s3, s3, s8
-; GFX6-NEXT: s_lshr_b32 s8, s4, 16
-; GFX6-NEXT: s_lshr_b32 s9, s4, 24
-; GFX6-NEXT: s_and_b32 s11, s4, 0xff
-; GFX6-NEXT: s_bfe_u32 s4, s4, 0x80008
-; GFX6-NEXT: v_mul_hi_u32 v1, v0, v1
-; GFX6-NEXT: s_lshl_b32 s4, s4, 8
-; GFX6-NEXT: s_and_b32 s8, s8, 0xff
-; GFX6-NEXT: s_or_b32 s4, s11, s4
-; GFX6-NEXT: s_and_b32 s8, 0xffff, s8
-; GFX6-NEXT: s_and_b32 s4, 0xffff, s4
-; GFX6-NEXT: s_lshl_b32 s8, s8, 16
-; GFX6-NEXT: s_or_b32 s4, s4, s8
-; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1
-; GFX6-NEXT: v_mul_hi_u32 v1, s4, v0
-; GFX6-NEXT: s_lshr_b32 s10, s5, 8
-; GFX6-NEXT: s_and_b32 s5, s5, 0xff
-; GFX6-NEXT: s_lshl_b32 s5, s5, 8
-; GFX6-NEXT: v_mul_lo_u32 v1, v1, 24
-; GFX6-NEXT: s_and_b32 s8, s10, 0xff
-; GFX6-NEXT: s_or_b32 s5, s9, s5
-; GFX6-NEXT: s_and_b32 s8, 0xffff, s8
-; GFX6-NEXT: s_and_b32 s5, 0xffff, s5
-; GFX6-NEXT: s_lshl_b32 s8, s8, 16
-; GFX6-NEXT: s_or_b32 s5, s5, s8
-; GFX6-NEXT: v_sub_i32_e32 v1, vcc, s4, v1
-; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v1
-; GFX6-NEXT: v_mul_hi_u32 v0, s5, v0
-; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v1
-; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
-; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v1
+; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4
+; GFX6-NEXT: v_mul_lo_u32 v3, v3, 24
+; GFX6-NEXT: s_lshl_b32 s3, s3, 16
+; GFX6-NEXT: v_or_b32_e32 v4, s3, v4
+; GFX6-NEXT: v_mul_hi_u32 v2, v4, v2
+; GFX6-NEXT: v_sub_i32_e32 v3, vcc, s2, v3
+; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, 24, v3
+; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v3
+; GFX6-NEXT: v_mul_lo_u32 v2, v2, 24
+; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
+; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, 24, v3
+; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v3
+; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
+; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v4, v2
+; GFX6-NEXT: v_sub_i32_e32 v5, vcc, 23, v3
+; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 24, v2
+; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
; GFX6-NEXT: s_and_b32 s6, s6, 0xff
-; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v1
-; GFX6-NEXT: v_mul_lo_u32 v0, v0, 24
-; GFX6-NEXT: s_and_b32 s0, 0xffff, s0
+; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
+; GFX6-NEXT: s_and_b32 s8, 0xffff, s8
; GFX6-NEXT: s_and_b32 s6, 0xffff, s6
-; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
-; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 23, v1
-; GFX6-NEXT: s_lshl_b32 s4, s6, 17
-; GFX6-NEXT: s_lshl_b32 s0, s0, 1
-; GFX6-NEXT: s_or_b32 s0, s4, s0
-; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v2
-; GFX6-NEXT: v_and_b32_e32 v1, 0xffffff, v1
-; GFX6-NEXT: v_lshl_b32_e32 v2, s0, v2
-; GFX6-NEXT: v_lshr_b32_e32 v1, s2, v1
-; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s5, v0
-; GFX6-NEXT: v_or_b32_e32 v1, v2, v1
-; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v0
-; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
-; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v0
-; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
-; GFX6-NEXT: s_and_b32 s1, 0xffff, s1
-; GFX6-NEXT: s_and_b32 s7, 0xffff, s7
-; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 23, v0
-; GFX6-NEXT: s_lshl_b32 s0, s7, 17
-; GFX6-NEXT: s_lshl_b32 s1, s1, 1
-; GFX6-NEXT: s_or_b32 s0, s0, s1
+; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 24, v2
+; GFX6-NEXT: s_lshl_b32 s2, s6, 17
+; GFX6-NEXT: s_lshl_b32 s3, s8, 1
+; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
+; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX6-NEXT: s_and_b32 s0, 0xffff, s0
+; GFX6-NEXT: s_or_b32 s2, s2, s3
+; GFX6-NEXT: v_and_b32_e32 v5, 0xffffff, v5
+; GFX6-NEXT: v_and_b32_e32 v3, 0xffffff, v3
+; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
+; GFX6-NEXT: v_lshl_b32_e32 v5, s2, v5
+; GFX6-NEXT: v_lshr_b32_e32 v3, s1, v3
+; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 23, v2
+; GFX6-NEXT: s_lshl_b32 s0, s0, 17
+; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0
+; GFX6-NEXT: v_or_b32_e32 v3, v5, v3
+; GFX6-NEXT: v_or_b32_e32 v0, s0, v0
+; GFX6-NEXT: v_and_b32_e32 v4, 0xffffff, v4
; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v2
-; GFX6-NEXT: v_and_b32_e32 v0, 0xffffff, v0
-; GFX6-NEXT: v_lshl_b32_e32 v2, s0, v2
-; GFX6-NEXT: v_lshr_b32_e32 v0, s3, v0
-; GFX6-NEXT: v_bfe_u32 v3, v1, 8, 8
-; GFX6-NEXT: v_or_b32_e32 v0, v2, v0
-; GFX6-NEXT: v_and_b32_e32 v2, 0xff, v1
-; GFX6-NEXT: v_lshlrev_b32_e32 v3, 8, v3
-; GFX6-NEXT: v_bfe_u32 v1, v1, 16, 8
-; GFX6-NEXT: v_or_b32_e32 v2, v2, v3
-; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; GFX6-NEXT: v_or_b32_e32 v1, v2, v1
+; GFX6-NEXT: v_lshlrev_b32_e32 v0, v4, v0
+; GFX6-NEXT: v_lshrrev_b32_e32 v1, v2, v1
+; GFX6-NEXT: v_bfe_u32 v2, v3, 8, 8
+; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX6-NEXT: v_and_b32_e32 v1, 0xff, v3
+; GFX6-NEXT: v_lshlrev_b32_e32 v2, 8, v2
+; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
+; GFX6-NEXT: v_bfe_u32 v2, v3, 16, 8
+; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2
+; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
; GFX6-NEXT: v_and_b32_e32 v2, 0xff, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 24, v2
; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll
index 1821d29d4b050..788692c94b0cf 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll
@@ -192,9 +192,7 @@ define i16 @v_uaddsat_v2i8(i16 %lhs.arg, i16 %rhs.arg) {
; GFX6-NEXT: v_min_u32_e32 v2, v3, v2
; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v2
; GFX6-NEXT: v_lshrrev_b32_e32 v1, 24, v1
-; GFX6-NEXT: v_lshrrev_b32_e32 v0, 24, v0
-; GFX6-NEXT: v_lshlrev_b32_e32 v1, 8, v1
-; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX6-NEXT: v_alignbit_b32 v0, v1, v0, 24
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_uaddsat_v2i8:
@@ -290,9 +288,9 @@ define amdgpu_ps i16 @s_uaddsat_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg) {
; GFX6-NEXT: s_min_u32 s2, s3, s2
; GFX6-NEXT: s_add_i32 s1, s1, s2
; GFX6-NEXT: s_lshr_b32 s1, s1, 24
-; GFX6-NEXT: s_lshr_b32 s0, s0, 24
-; GFX6-NEXT: s_lshl_b32 s1, s1, 8
-; GFX6-NEXT: s_or_b32 s0, s0, s1
+; GFX6-NEXT: v_mov_b32_e32 v0, s0
+; GFX6-NEXT: v_alignbit_b32 v0, s1, v0, 24
+; GFX6-NEXT: v_readfirstlane_b32 s0, v0
; GFX6-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: s_uaddsat_v2i8:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll
index a60370cd460f9..0042d34e235d1 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll
@@ -186,9 +186,7 @@ define i16 @v_usubsat_v2i8(i16 %lhs.arg, i16 %rhs.arg) {
; GFX6-NEXT: v_min_u32_e32 v2, v1, v2
; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v2
; GFX6-NEXT: v_lshrrev_b32_e32 v1, 24, v1
-; GFX6-NEXT: v_lshrrev_b32_e32 v0, 24, v0
-; GFX6-NEXT: v_lshlrev_b32_e32 v1, 8, v1
-; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX6-NEXT: v_alignbit_b32 v0, v1, v0, 24
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_usubsat_v2i8:
@@ -282,9 +280,9 @@ define amdgpu_ps i16 @s_usubsat_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg) {
; GFX6-NEXT: s_min_u32 s2, s1, s2
; GFX6-NEXT: s_sub_i32 s1, s1, s2
; GFX6-NEXT: s_lshr_b32 s1, s1, 24
-; GFX6-NEXT: s_lshr_b32 s0, s0, 24
-; GFX6-NEXT: s_lshl_b32 s1, s1, 8
-; GFX6-NEXT: s_or_b32 s0, s0, s1
+; GFX6-NEXT: v_mov_b32_e32 v0, s0
+; GFX6-NEXT: v_alignbit_b32 v0, s1, v0, 24
+; GFX6-NEXT: v_readfirstlane_b32 s0, v0
; GFX6-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: s_usubsat_v2i8:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll
index cec73b7c3617b..6bb4e2d3dbe26 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll
@@ -26,13 +26,10 @@ entry:
define amdgpu_ps i32 @scalar_xnor_v2i16_one_use(<2 x i16> inreg %a, <2 x i16> inreg %b) {
; GFX7-LABEL: scalar_xnor_v2i16_one_use:
; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3]
; GFX7-NEXT: s_lshl_b32 s1, s1, 16
; GFX7-NEXT: s_and_b32 s0, s0, 0xffff
; GFX7-NEXT: s_or_b32 s0, s1, s0
-; GFX7-NEXT: s_lshl_b32 s1, s3, 16
-; GFX7-NEXT: s_and_b32 s2, s2, 0xffff
-; GFX7-NEXT: s_or_b32 s1, s1, s2
-; GFX7-NEXT: s_xor_b32 s0, s0, s1
; GFX7-NEXT: s_xor_b32 s0, s0, -1
; GFX7-NEXT: ; return to shader part epilog
;
@@ -117,22 +114,17 @@ define amdgpu_ps i64 @scalar_xnor_i64_one_use(i64 inreg %a, i64 inreg %b) {
define amdgpu_ps i64 @scalar_xnor_v4i16_one_use(<4 x i16> inreg %a, <4 x i16> inreg %b) {
; GFX7-LABEL: scalar_xnor_v4i16_one_use:
; GFX7: ; %bb.0:
+; GFX7-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5]
+; GFX7-NEXT: s_xor_b64 s[2:3], s[2:3], s[6:7]
; GFX7-NEXT: s_lshl_b32 s1, s1, 16
; GFX7-NEXT: s_and_b32 s0, s0, 0xffff
+; GFX7-NEXT: s_mov_b32 s8, -1
; GFX7-NEXT: s_or_b32 s0, s1, s0
; GFX7-NEXT: s_lshl_b32 s1, s3, 16
; GFX7-NEXT: s_and_b32 s2, s2, 0xffff
+; GFX7-NEXT: s_mov_b32 s9, s8
; GFX7-NEXT: s_or_b32 s1, s1, s2
-; GFX7-NEXT: s_lshl_b32 s2, s5, 16
-; GFX7-NEXT: s_and_b32 s3, s4, 0xffff
-; GFX7-NEXT: s_or_b32 s2, s2, s3
-; GFX7-NEXT: s_lshl_b32 s3, s7, 16
-; GFX7-NEXT: s_and_b32 s4, s6, 0xffff
-; GFX7-NEXT: s_or_b32 s3, s3, s4
-; GFX7-NEXT: s_mov_b32 s4, -1
-; GFX7-NEXT: s_mov_b32 s5, s4
-; GFX7-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3]
-; GFX7-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5]
+; GFX7-NEXT: s_xor_b64 s[0:1], s[0:1], s[8:9]
; GFX7-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: scalar_xnor_v4i16_one_use:
>From 376c02627b9980ccad912f1ffe2d6b5f6d5d8c76 Mon Sep 17 00:00:00 2001
From: Dhruv Chawla <dhruvc at nvidia.com>
Date: Thu, 23 May 2024 15:50:34 +0530
Subject: [PATCH 3/3] Use TLI.isZExtFree and TLI.isTruncateFree
---
llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp | 16 ++++++++++++++--
...galizer-combiner-narrow-binop-feeding-add.mir | 14 ++++++++------
2 files changed, 22 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index b7f1589a67bed..a078bb275f954 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -3153,10 +3153,22 @@ bool CombinerHelper::matchHoistLogicOpWithSameOpcodeHands(
return false;
case TargetOpcode::G_ANYEXT:
case TargetOpcode::G_SEXT:
- case TargetOpcode::G_ZEXT:
- case TargetOpcode::G_TRUNC: {
+ case TargetOpcode::G_ZEXT: {
// Match: logic (ext X), (ext Y) --> ext (logic X, Y)
+ break;
+ }
+ case TargetOpcode::G_TRUNC: {
// Match: logic (trunc X), (trunc Y) -> trunc (logic X, Y)
+ MachineFunction *MF = MI.getMF();
+ EVT DstEVT = getApproximateEVTForLLT(MRI.getType(Dst), MF->getDataLayout(),
+ MF->getFunction().getContext());
+ EVT XEVT = getApproximateEVTForLLT(XTy, MF->getDataLayout(),
+ MF->getFunction().getContext());
+ const TargetLowering &TLI = getTargetLowering();
+ // Be extra careful sinking truncate. If it's free, there's no benefit in
+ // widening a binop.
+ if (TLI.isZExtFree(DstEVT, XEVT) && TLI.isTruncateFree(XEVT, DstEVT))
+ return false;
break;
}
case TargetOpcode::G_AND:
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-narrow-binop-feeding-add.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-narrow-binop-feeding-add.mir
index 9699d0cf7892c..fb19cda303d36 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-narrow-binop-feeding-add.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-narrow-binop-feeding-add.mir
@@ -84,9 +84,10 @@ body: |
; CHECK: liveins: $x0, $x1
; CHECK: %binop_lhs:_(s64) = COPY $x0
; CHECK: %binop_rhs:_(s64) = COPY $x1
- ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND %binop_lhs, %binop_rhs
- ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64)
- ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s32)
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %binop_lhs(s64)
+ ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC %binop_rhs(s64)
+ ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[TRUNC1]]
+ ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[AND]](s32)
; CHECK: $x0 = COPY [[ZEXT]](s64)
; CHECK: RET_ReallyLR implicit $x0
%binop_lhs:_(s64) = COPY $x0
@@ -130,9 +131,10 @@ body: |
; CHECK: liveins: $x0, $x1
; CHECK: %binop_lhs:_(s64) = COPY $x0
; CHECK: %binop_rhs:_(s64) = COPY $x1
- ; CHECK: [[XOR:%[0-9]+]]:_(s64) = G_XOR %binop_lhs, %binop_rhs
- ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[XOR]](s64)
- ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s32)
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %binop_lhs(s64)
+ ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC %binop_rhs(s64)
+ ; CHECK: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[TRUNC]], [[TRUNC1]]
+ ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[XOR]](s32)
; CHECK: $x0 = COPY [[ZEXT]](s64)
; CHECK: RET_ReallyLR implicit $x0
%binop_lhs:_(s64) = COPY $x0
More information about the llvm-commits
mailing list