[llvm] db8c84f - [GlobalIsel] Push cast through select. (#100539)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 25 10:21:31 PDT 2024
Author: Thorsten Schütt
Date: 2024-07-25T19:21:28+02:00
New Revision: db8c84fc7a75dd60bcfff7160b51e1a55e7e0f73
URL: https://github.com/llvm/llvm-project/commit/db8c84fc7a75dd60bcfff7160b51e1a55e7e0f73
DIFF: https://github.com/llvm/llvm-project/commit/db8c84fc7a75dd60bcfff7160b51e1a55e7e0f73.diff
LOG: [GlobalIsel] Push cast through select. (#100539)
Added:
llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir
Modified:
llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
llvm/include/llvm/Target/GlobalISel/Combine.td
llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll
llvm/test/CodeGen/AMDGPU/ctlz.ll
llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll
llvm/test/CodeGen/AMDGPU/cttz.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 47365c3be3b93..05d7e882f5135 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -129,6 +129,12 @@ class CombinerHelper {
const TargetLowering &getTargetLowering() const;
+ const MachineFunction &getMachineFunction() const;
+
+ const DataLayout &getDataLayout() const;
+
+ LLVMContext &getContext() const;
+
/// \returns true if the combiner is running pre-legalization.
bool isPreLegalize() const;
@@ -884,6 +890,9 @@ class CombinerHelper {
bool matchTruncateOfExt(const MachineInstr &Root, const MachineInstr &ExtMI,
BuildFnTy &MatchInfo);
+ bool matchCastOfSelect(const MachineInstr &Cast, const MachineInstr &SelectMI,
+ BuildFnTy &MatchInfo);
+
private:
/// Checks for legality of an indexed variant of \p LdSt.
bool isIndexedLoadStoreLegal(GLoadStore &LdSt) const;
@@ -996,6 +1005,8 @@ class CombinerHelper {
// Simplify (cmp cc0 x, y) (&& or ||) (cmp cc1 x, y) -> cmp cc2 x, y.
bool tryFoldLogicOfFCmps(GLogicalBinOp *Logic, BuildFnTy &MatchInfo);
+
+ bool isCastFree(unsigned Opcode, LLT ToTy, LLT FromTy) const;
};
} // namespace llvm
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
index 8b7e8c0fbf1f5..ef1171d9f1f64 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
@@ -934,6 +934,22 @@ class GExtOp : public GCastOp {
};
};
+/// Represents an integer-like extending or truncating operation.
+class GExtOrTruncOp : public GCastOp {
+public:
+ static bool classof(const MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ case TargetOpcode::G_SEXT:
+ case TargetOpcode::G_ZEXT:
+ case TargetOpcode::G_ANYEXT:
+ case TargetOpcode::G_TRUNC:
+ return true;
+ default:
+ return false;
+ }
+ };
+};
+
} // namespace llvm
#endif // LLVM_CODEGEN_GLOBALISEL_GENERICMACHINEINSTRS_H
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 2362e77b54be2..2246e20ecc1dc 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1771,10 +1771,25 @@ def truncate_of_zext : truncate_of_opcode<G_ZEXT>;
def truncate_of_sext : truncate_of_opcode<G_SEXT>;
def truncate_of_anyext : truncate_of_opcode<G_ANYEXT>;
+// Push cast through select.
+class select_of_opcode<Instruction castOpcode> : GICombineRule <
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (G_SELECT $select, $cond, $true, $false):$Select,
+ (castOpcode $root, $select):$Cast,
+ [{ return Helper.matchCastOfSelect(*${Cast}, *${Select}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFn(*${Cast}, ${matchinfo}); }])>;
+
+def select_of_zext : select_of_opcode<G_ZEXT>;
+def select_of_anyext : select_of_opcode<G_ANYEXT>;
+def select_of_truncate : select_of_opcode<G_TRUNC>;
+
def cast_combines: GICombineGroup<[
truncate_of_zext,
truncate_of_sext,
- truncate_of_anyext
+ truncate_of_anyext,
+ select_of_zext,
+ select_of_anyext,
+ select_of_truncate
]>;
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 8c05931812af5..d930ab2984629 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -68,6 +68,16 @@ const TargetLowering &CombinerHelper::getTargetLowering() const {
return *Builder.getMF().getSubtarget().getTargetLowering();
}
+const MachineFunction &CombinerHelper::getMachineFunction() const {
+ return Builder.getMF();
+}
+
+const DataLayout &CombinerHelper::getDataLayout() const {
+ return getMachineFunction().getDataLayout();
+}
+
+LLVMContext &CombinerHelper::getContext() const { return Builder.getContext(); }
+
/// \returns The little endian in-memory byte position of byte \p I in a
/// \p ByteWidth bytes wide type.
///
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp
index d36685bf28313..59295f7a65835 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp
@@ -161,3 +161,51 @@ bool CombinerHelper::matchTruncateOfExt(const MachineInstr &Root,
return false;
}
+
+bool CombinerHelper::isCastFree(unsigned Opcode, LLT ToTy, LLT FromTy) const {
+ const TargetLowering &TLI = getTargetLowering();
+ const DataLayout &DL = getDataLayout();
+ LLVMContext &Ctx = getContext();
+
+ switch (Opcode) {
+ case TargetOpcode::G_ANYEXT:
+ case TargetOpcode::G_ZEXT:
+ return TLI.isZExtFree(FromTy, ToTy, DL, Ctx);
+ case TargetOpcode::G_TRUNC:
+ return TLI.isTruncateFree(FromTy, ToTy, DL, Ctx);
+ default:
+ return false;
+ }
+}
+
+bool CombinerHelper::matchCastOfSelect(const MachineInstr &CastMI,
+ const MachineInstr &SelectMI,
+ BuildFnTy &MatchInfo) {
+ const GExtOrTruncOp *Cast = cast<GExtOrTruncOp>(&CastMI);
+ const GSelect *Select = cast<GSelect>(&SelectMI);
+
+ if (!MRI.hasOneNonDBGUse(Select->getReg(0)))
+ return false;
+
+ Register Dst = Cast->getReg(0);
+ LLT DstTy = MRI.getType(Dst);
+ LLT CondTy = MRI.getType(Select->getCondReg());
+ Register TrueReg = Select->getTrueReg();
+ Register FalseReg = Select->getFalseReg();
+ LLT SrcTy = MRI.getType(TrueReg);
+ Register Cond = Select->getCondReg();
+
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SELECT, {DstTy, CondTy}}))
+ return false;
+
+ if (!isCastFree(Cast->getOpcode(), DstTy, SrcTy))
+ return false;
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ auto True = B.buildInstr(Cast->getOpcode(), {DstTy}, {TrueReg});
+ auto False = B.buildInstr(Cast->getOpcode(), {DstTy}, {FalseReg});
+ B.buildSelect(Dst, Cond, True, False);
+ };
+
+ return true;
+}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir
new file mode 100644
index 0000000000000..0f436127ea2eb
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir
@@ -0,0 +1,131 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -o - -mtriple=aarch64-unknown-unknown -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s | FileCheck %s --check-prefixes=CHECK,CHECK-PRE
+# RUN: llc -o - -mtriple=aarch64-unknown-unknown -run-pass=aarch64-postlegalizer-combiner -verify-machineinstrs %s | FileCheck %s --check-prefixes=CHECK,CHECK-POST
+
+---
+name: test_combine_trunc_select
+legalized: true
+body: |
+ bb.1:
+ ; CHECK-PRE-LABEL: name: test_combine_trunc_select
+ ; CHECK-PRE: %cond:_(s32) = COPY $w0
+ ; CHECK-PRE-NEXT: %lhs:_(s64) = COPY $x0
+ ; CHECK-PRE-NEXT: %rhs:_(s64) = COPY $x0
+ ; CHECK-PRE-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %lhs(s64)
+ ; CHECK-PRE-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC %rhs(s64)
+ ; CHECK-PRE-NEXT: %small:_(s32) = G_SELECT %cond(s32), [[TRUNC]], [[TRUNC1]]
+ ; CHECK-PRE-NEXT: $w0 = COPY %small(s32)
+ ;
+ ; CHECK-POST-LABEL: name: test_combine_trunc_select
+ ; CHECK-POST: %cond:_(s32) = COPY $w0
+ ; CHECK-POST-NEXT: %lhs:_(s64) = COPY $x0
+ ; CHECK-POST-NEXT: %rhs:_(s64) = COPY $x0
+ ; CHECK-POST-NEXT: %res:_(s64) = G_SELECT %cond(s32), %lhs, %rhs
+ ; CHECK-POST-NEXT: %small:_(s32) = G_TRUNC %res(s64)
+ ; CHECK-POST-NEXT: $w0 = COPY %small(s32)
+ %cond:_(s32) = COPY $w0
+ %lhs:_(s64) = COPY $x0
+ %rhs:_(s64) = COPY $x0
+ %res:_(s64) = G_SELECT %cond(s32), %lhs, %rhs
+ %small:_(s32) = G_TRUNC %res(s64)
+ $w0 = COPY %small(s32)
+...
+---
+name: test_combine_zext_select
+legalized: true
+body: |
+ bb.1:
+ ; CHECK-PRE-LABEL: name: test_combine_zext_select
+ ; CHECK-PRE: %cond:_(s32) = COPY $w0
+ ; CHECK-PRE-NEXT: %lhs:_(s32) = COPY $w0
+ ; CHECK-PRE-NEXT: %rhs:_(s32) = COPY $w0
+ ; CHECK-PRE-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT %lhs(s32)
+ ; CHECK-PRE-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT %rhs(s32)
+ ; CHECK-PRE-NEXT: %big:_(s64) = G_SELECT %cond(s32), [[ZEXT]], [[ZEXT1]]
+ ; CHECK-PRE-NEXT: $x0 = COPY %big(s64)
+ ;
+ ; CHECK-POST-LABEL: name: test_combine_zext_select
+ ; CHECK-POST: %cond:_(s32) = COPY $w0
+ ; CHECK-POST-NEXT: %lhs:_(s32) = COPY $w0
+ ; CHECK-POST-NEXT: %rhs:_(s32) = COPY $w0
+ ; CHECK-POST-NEXT: %res:_(s32) = G_SELECT %cond(s32), %lhs, %rhs
+ ; CHECK-POST-NEXT: %big:_(s64) = G_ZEXT %res(s32)
+ ; CHECK-POST-NEXT: $x0 = COPY %big(s64)
+ %cond:_(s32) = COPY $w0
+ %lhs:_(s32) = COPY $w0
+ %rhs:_(s32) = COPY $w0
+ %res:_(s32) = G_SELECT %cond(s32), %lhs, %rhs
+ %big:_(s64) = G_ZEXT %res(s32)
+ $x0 = COPY %big(s64)
+...
+---
+name: test_combine_anyzext_select
+legalized: true
+body: |
+ bb.1:
+ ; CHECK-PRE-LABEL: name: test_combine_anyzext_select
+ ; CHECK-PRE: %cond:_(s32) = COPY $w0
+ ; CHECK-PRE-NEXT: %lhs:_(s32) = COPY $w0
+ ; CHECK-PRE-NEXT: %rhs:_(s32) = COPY $w0
+ ; CHECK-PRE-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT %lhs(s32)
+ ; CHECK-PRE-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT %rhs(s32)
+ ; CHECK-PRE-NEXT: %big:_(s64) = G_SELECT %cond(s32), [[ANYEXT]], [[ANYEXT1]]
+ ; CHECK-PRE-NEXT: $x0 = COPY %big(s64)
+ ;
+ ; CHECK-POST-LABEL: name: test_combine_anyzext_select
+ ; CHECK-POST: %cond:_(s32) = COPY $w0
+ ; CHECK-POST-NEXT: %lhs:_(s32) = COPY $w0
+ ; CHECK-POST-NEXT: %rhs:_(s32) = COPY $w0
+ ; CHECK-POST-NEXT: %res:_(s32) = G_SELECT %cond(s32), %lhs, %rhs
+ ; CHECK-POST-NEXT: %big:_(s64) = G_ANYEXT %res(s32)
+ ; CHECK-POST-NEXT: $x0 = COPY %big(s64)
+ %cond:_(s32) = COPY $w0
+ %lhs:_(s32) = COPY $w0
+ %rhs:_(s32) = COPY $w0
+ %res:_(s32) = G_SELECT %cond(s32), %lhs, %rhs
+ %big:_(s64) = G_ANYEXT %res(s32)
+ $x0 = COPY %big(s64)
+...
+---
+name: test_combine_anyzext_select_multi_use
+legalized: true
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: test_combine_anyzext_select_multi_use
+ ; CHECK: %cond:_(s32) = COPY $w0
+ ; CHECK-NEXT: %lhs:_(s32) = COPY $w0
+ ; CHECK-NEXT: %rhs:_(s32) = COPY $w0
+ ; CHECK-NEXT: %res:_(s32) = G_SELECT %cond(s32), %lhs, %rhs
+ ; CHECK-NEXT: %big:_(s64) = G_ANYEXT %res(s32)
+ ; CHECK-NEXT: $x0 = COPY %big(s64)
+ ; CHECK-NEXT: $w0 = COPY %res(s32)
+ %cond:_(s32) = COPY $w0
+ %lhs:_(s32) = COPY $w0
+ %rhs:_(s32) = COPY $w0
+ %res:_(s32) = G_SELECT %cond(s32), %lhs, %rhs
+ %big:_(s64) = G_ANYEXT %res(s32)
+ $x0 = COPY %big(s64)
+ $w0 = COPY %res(s32)
+...
+---
+name: test_combine_trunc_select_vector_out_of_budget
+legalized: true
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: test_combine_trunc_select_vector_out_of_budget
+ ; CHECK: %cond:_(<2 x s32>) = COPY $x0
+ ; CHECK-NEXT: %arg1:_(s64) = COPY $x0
+ ; CHECK-NEXT: %arg2:_(s64) = COPY $x0
+ ; CHECK-NEXT: %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64)
+ ; CHECK-NEXT: %bv2:_(<2 x s64>) = G_BUILD_VECTOR %arg2(s64), %arg1(s64)
+ ; CHECK-NEXT: %res:_(<2 x s64>) = G_SELECT %cond(<2 x s32>), %bv, %bv2
+ ; CHECK-NEXT: %small:_(<2 x s32>) = G_TRUNC %res(<2 x s64>)
+ ; CHECK-NEXT: $x0 = COPY %small(<2 x s32>)
+ %cond:_(<2 x s32>) = COPY $x0
+ %arg1:_(s64) = COPY $x0
+ %arg2:_(s64) = COPY $x0
+ %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64)
+ %bv2:_(<2 x s64>) = G_BUILD_VECTOR %arg2(s64), %arg1(s64)
+ %res:_(<2 x s64>) = G_SELECT %cond(<2 x s32>), %bv, %bv2
+ %small:_(<2 x s32>) = G_TRUNC %res(<2 x s64>)
+ $x0 = COPY %small(<2 x s32>)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll
index ec832ed0f7f3a..63f5464371cc6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll
@@ -1845,39 +1845,37 @@ define amdgpu_ps i65 @s_ashr_i65(i65 inreg %value, i65 inreg %amount) {
; GCN-NEXT: s_lshr_b64 s[2:3], s[0:1], s3
; GCN-NEXT: s_lshl_b64 s[8:9], s[4:5], s8
; GCN-NEXT: s_or_b64 s[2:3], s[2:3], s[8:9]
-; GCN-NEXT: s_ashr_i32 s8, s5, 31
+; GCN-NEXT: s_ashr_i32 s7, s5, 31
; GCN-NEXT: s_ashr_i64 s[4:5], s[4:5], s10
; GCN-NEXT: s_cmp_lg_u32 s11, 0
; GCN-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5]
; GCN-NEXT: s_cmp_lg_u32 s12, 0
-; GCN-NEXT: s_mov_b32 s9, s8
; GCN-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3]
; GCN-NEXT: s_cmp_lg_u32 s11, 0
-; GCN-NEXT: s_cselect_b64 s[2:3], s[6:7], s[8:9]
+; GCN-NEXT: s_cselect_b32 s2, s6, s7
; GCN-NEXT: ; return to shader part epilog
;
; GFX10PLUS-LABEL: s_ashr_i65:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_bfe_i64 s[4:5], s[2:3], 0x10000
-; GFX10PLUS-NEXT: s_sub_i32 s12, s3, 64
-; GFX10PLUS-NEXT: s_sub_i32 s8, 64, s3
+; GFX10PLUS-NEXT: s_sub_i32 s10, s3, 64
+; GFX10PLUS-NEXT: s_sub_i32 s2, 64, s3
; GFX10PLUS-NEXT: s_cmp_lt_u32 s3, 64
-; GFX10PLUS-NEXT: s_cselect_b32 s13, 1, 0
+; GFX10PLUS-NEXT: s_cselect_b32 s11, 1, 0
; GFX10PLUS-NEXT: s_cmp_eq_u32 s3, 0
-; GFX10PLUS-NEXT: s_cselect_b32 s14, 1, 0
-; GFX10PLUS-NEXT: s_ashr_i64 s[6:7], s[4:5], s3
-; GFX10PLUS-NEXT: s_lshr_b64 s[2:3], s[0:1], s3
-; GFX10PLUS-NEXT: s_lshl_b64 s[8:9], s[4:5], s8
-; GFX10PLUS-NEXT: s_ashr_i32 s10, s5, 31
-; GFX10PLUS-NEXT: s_or_b64 s[2:3], s[2:3], s[8:9]
-; GFX10PLUS-NEXT: s_ashr_i64 s[4:5], s[4:5], s12
-; GFX10PLUS-NEXT: s_cmp_lg_u32 s13, 0
-; GFX10PLUS-NEXT: s_mov_b32 s11, s10
-; GFX10PLUS-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5]
-; GFX10PLUS-NEXT: s_cmp_lg_u32 s14, 0
-; GFX10PLUS-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3]
-; GFX10PLUS-NEXT: s_cmp_lg_u32 s13, 0
-; GFX10PLUS-NEXT: s_cselect_b64 s[2:3], s[6:7], s[10:11]
+; GFX10PLUS-NEXT: s_cselect_b32 s12, 1, 0
+; GFX10PLUS-NEXT: s_lshr_b64 s[6:7], s[0:1], s3
+; GFX10PLUS-NEXT: s_lshl_b64 s[8:9], s[4:5], s2
+; GFX10PLUS-NEXT: s_ashr_i64 s[2:3], s[4:5], s3
+; GFX10PLUS-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9]
+; GFX10PLUS-NEXT: s_ashr_i32 s3, s5, 31
+; GFX10PLUS-NEXT: s_ashr_i64 s[4:5], s[4:5], s10
+; GFX10PLUS-NEXT: s_cmp_lg_u32 s11, 0
+; GFX10PLUS-NEXT: s_cselect_b64 s[4:5], s[6:7], s[4:5]
+; GFX10PLUS-NEXT: s_cmp_lg_u32 s12, 0
+; GFX10PLUS-NEXT: s_cselect_b64 s[0:1], s[0:1], s[4:5]
+; GFX10PLUS-NEXT: s_cmp_lg_u32 s11, 0
+; GFX10PLUS-NEXT: s_cselect_b32 s2, s2, s3
; GFX10PLUS-NEXT: ; return to shader part epilog
%result = ashr i65 %value, %amount
ret i65 %result
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll
index 980ba3da4bac7..5dd4fa0809131 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll
@@ -1766,7 +1766,7 @@ define amdgpu_ps i65 @s_lshr_i65(i65 inreg %value, i65 inreg %amount) {
; GCN-NEXT: s_cmp_lg_u32 s12, 0
; GCN-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3]
; GCN-NEXT: s_cmp_lg_u32 s11, 0
-; GCN-NEXT: s_cselect_b64 s[2:3], s[6:7], 0
+; GCN-NEXT: s_cselect_b32 s2, s6, 0
; GCN-NEXT: ; return to shader part epilog
;
; GFX10PLUS-LABEL: s_lshr_i65:
@@ -1788,7 +1788,7 @@ define amdgpu_ps i65 @s_lshr_i65(i65 inreg %value, i65 inreg %amount) {
; GFX10PLUS-NEXT: s_cmp_lg_u32 s12, 0
; GFX10PLUS-NEXT: s_cselect_b64 s[0:1], s[0:1], s[4:5]
; GFX10PLUS-NEXT: s_cmp_lg_u32 s11, 0
-; GFX10PLUS-NEXT: s_cselect_b64 s[2:3], s[2:3], 0
+; GFX10PLUS-NEXT: s_cselect_b32 s2, s2, 0
; GFX10PLUS-NEXT: ; return to shader part epilog
%result = lshr i65 %value, %amount
ret i65 %result
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll
index c2f911cc44587..4cf1c92539c36 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll
@@ -1733,9 +1733,9 @@ define amdgpu_ps i65 @s_shl_i65(i65 inreg %value, i65 inreg %amount) {
; GCN-NEXT: s_lshl_b64 s[8:9], s[0:1], s10
; GCN-NEXT: s_cmp_lg_u32 s11, 0
; GCN-NEXT: s_cselect_b64 s[0:1], s[4:5], 0
-; GCN-NEXT: s_cselect_b64 s[4:5], s[6:7], s[8:9]
+; GCN-NEXT: s_cselect_b32 s3, s6, s8
; GCN-NEXT: s_cmp_lg_u32 s12, 0
-; GCN-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5]
+; GCN-NEXT: s_cselect_b32 s2, s2, s3
; GCN-NEXT: ; return to shader part epilog
;
; GFX10PLUS-LABEL: s_shl_i65:
@@ -1753,9 +1753,9 @@ define amdgpu_ps i65 @s_shl_i65(i65 inreg %value, i65 inreg %amount) {
; GFX10PLUS-NEXT: s_lshl_b64 s[6:7], s[0:1], s10
; GFX10PLUS-NEXT: s_cmp_lg_u32 s11, 0
; GFX10PLUS-NEXT: s_cselect_b64 s[0:1], s[8:9], 0
-; GFX10PLUS-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7]
+; GFX10PLUS-NEXT: s_cselect_b32 s3, s4, s6
; GFX10PLUS-NEXT: s_cmp_lg_u32 s12, 0
-; GFX10PLUS-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5]
+; GFX10PLUS-NEXT: s_cselect_b32 s2, s2, s3
; GFX10PLUS-NEXT: ; return to shader part epilog
%result = shl i65 %value, %amount
ret i65 %result
diff --git a/llvm/test/CodeGen/AMDGPU/ctlz.ll b/llvm/test/CodeGen/AMDGPU/ctlz.ll
index ba0a1e75e29b7..a0b549711f339 100644
--- a/llvm/test/CodeGen/AMDGPU/ctlz.ll
+++ b/llvm/test/CodeGen/AMDGPU/ctlz.ll
@@ -1593,7 +1593,7 @@ define amdgpu_kernel void @v_ctlz_i32_sel_ne_bitwidth(ptr addrspace(1) noalias %
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX10-GISEL-NEXT: v_min_u32_e32 v1, 32, v1
; GFX10-GISEL-NEXT: v_subrev_nc_u32_e32 v1, 24, v1
-; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, -1, vcc_lo
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0xffff, vcc_lo
; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX10-GISEL-NEXT: global_store_byte v1, v0, s[4:5]
; GFX10-GISEL-NEXT: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll b/llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll
index a55c8cdc9b6e8..2168e7fe1dd28 100644
--- a/llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll
+++ b/llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll
@@ -1706,11 +1706,12 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i8_sel_eq_neg1(ptr addrspace(1) noa
; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v3, vcc
; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0xffff
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v2, 24, v0
-; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v2, v2
-; GFX9-GISEL-NEXT: v_cmp_eq_u32_sdwa s[0:1], v0, v1 src0_sel:BYTE_0 src1_sel:DWORD
-; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, -1, s[0:1]
+; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v3, 24, v0
+; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v3, v3
+; GFX9-GISEL-NEXT: v_cmp_eq_u32_sdwa vcc, v0, v1 src0_sel:BYTE_0 src1_sel:DWORD
+; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-GISEL-NEXT: global_store_byte v1, v0, s[4:5]
; GFX9-GISEL-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/cttz.ll b/llvm/test/CodeGen/AMDGPU/cttz.ll
index 57fe6cd4e1e45..14e6c4bcf6d8f 100644
--- a/llvm/test/CodeGen/AMDGPU/cttz.ll
+++ b/llvm/test/CodeGen/AMDGPU/cttz.ll
@@ -1359,7 +1359,7 @@ define amdgpu_kernel void @v_cttz_i32_sel_ne_bitwidth(ptr addrspace(1) noalias %
; GFX10-GISEL-NEXT: v_or_b32_e32 v1, 0x100, v0
; GFX10-GISEL-NEXT: v_cmp_eq_u32_sdwa s0, v0, v2 src0_sel:BYTE_0 src1_sel:DWORD
; GFX10-GISEL-NEXT: v_ffbl_b32_e32 v1, v1
-; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, -1, s0
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0xffff, s0
; GFX10-GISEL-NEXT: global_store_byte v2, v0, s[4:5]
; GFX10-GISEL-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
More information about the llvm-commits
mailing list