[llvm] 72901fe - [AArch64] Fold UBFMXri to UBFMWri when it's an LSR or LSL alias (#106968)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 17 03:21:26 PDT 2024
Author: Csanád Hajdú
Date: 2024-09-17T11:21:23+01:00
New Revision: 72901fe19eb1e55d0ee1c380ab7a9f57d2f187c5
URL: https://github.com/llvm/llvm-project/commit/72901fe19eb1e55d0ee1c380ab7a9f57d2f187c5
DIFF: https://github.com/llvm/llvm-project/commit/72901fe19eb1e55d0ee1c380ab7a9f57d2f187c5.diff
LOG: [AArch64] Fold UBFMXri to UBFMWri when it's an LSR or LSL alias (#106968)
Using the LSR or LSL aliases of UBFM can be faster on some CPUs, so it
is worth changing 64 bit UBFM instructions, that are equivalent to 32
bit LSR/LSL operations, to 32 bit variants.
This change folds the following patterns:
* If `Imms == 31` and `Immr <= Imms`:
`UBFMXri %0, Immr, Imms` -> `UBFMWri %0.sub_32, Immr, Imms`
* If `Immr == Imms + 33`:
`UBFMXri %0, Immr, Imms` -> `UBFMWri %0.sub_32, Immr - 32, Imms`
Added:
Modified:
llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
llvm/test/CodeGen/AArch64/addsub-24bit-imm.mir
llvm/test/CodeGen/AArch64/arm64-bitfield-extract.ll
llvm/test/CodeGen/AArch64/arm64-ld-from-st.ll
llvm/test/CodeGen/AArch64/arm64_32.ll
llvm/test/CodeGen/AArch64/bitfield-extract.ll
llvm/test/CodeGen/AArch64/fast-isel-int-ext3.ll
llvm/test/CodeGen/AArch64/fast-isel-shift.ll
llvm/test/CodeGen/AArch64/machine_cse_impdef_killflags.ll
llvm/test/CodeGen/AArch64/trunc-to-tbl.ll
llvm/test/CodeGen/AArch64/xbfiz.ll
llvm/test/CodeGen/AArch64/zext-to-tbl.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index 8031f19b63239e..1a9e5899892a1b 100644
--- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -1321,6 +1321,17 @@ AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
.add(StMO)
.addImm(AndMaskEncoded)
.setMIFlags(LoadI->getFlags());
+ } else if (IsStoreXReg && Imms == 31) {
+ // Use the 32 bit variant of UBFM if it's the LSR alias of the
+ // instruction.
+ assert(Immr <= Imms && "Expected LSR alias of UBFM");
+ BitExtMI = BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
+ TII->get(AArch64::UBFMWri),
+ TRI->getSubReg(DestReg, AArch64::sub_32))
+ .addReg(TRI->getSubReg(StRt, AArch64::sub_32))
+ .addImm(Immr)
+ .addImm(Imms)
+ .setMIFlags(LoadI->getFlags());
} else {
BitExtMI =
BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
diff --git a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
index 6a18c4516418bd..94a49bde74fd62 100644
--- a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
+++ b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
@@ -64,6 +64,9 @@
// 8. Remove redundant CSELs that select between identical registers, by
// replacing them with unconditional moves.
//
+// 9. Replace UBFMXri with UBFMWri if the instruction is equivalent to a 32 bit
+// LSR or LSL alias of UBFM.
+//
//===----------------------------------------------------------------------===//
#include "AArch64ExpandImm.h"
@@ -132,6 +135,7 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass {
bool visitINSviGPR(MachineInstr &MI, unsigned Opc);
bool visitINSvi64lane(MachineInstr &MI);
bool visitFMOVDr(MachineInstr &MI);
+ bool visitUBFMXri(MachineInstr &MI);
bool visitCopy(MachineInstr &MI);
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -715,6 +719,57 @@ bool AArch64MIPeepholeOpt::visitFMOVDr(MachineInstr &MI) {
return true;
}
+bool AArch64MIPeepholeOpt::visitUBFMXri(MachineInstr &MI) {
+ // Check if the instruction is equivalent to a 32 bit LSR or LSL alias of
+ // UBFM, and replace the UBFMXri instruction with its 32 bit variant, UBFMWri.
+ int64_t Immr = MI.getOperand(2).getImm();
+ int64_t Imms = MI.getOperand(3).getImm();
+
+ bool IsLSR = Imms == 31 && Immr <= Imms;
+ bool IsLSL = Immr == Imms + 33;
+ if (!IsLSR && !IsLSL)
+ return false;
+
+ if (IsLSL) {
+ Immr -= 32;
+ }
+
+ const TargetRegisterClass *DstRC64 =
+ TII->getRegClass(TII->get(MI.getOpcode()), 0, TRI, *MI.getMF());
+ const TargetRegisterClass *DstRC32 =
+ TRI->getSubRegisterClass(DstRC64, AArch64::sub_32);
+ assert(DstRC32 && "Destination register class of UBFMXri doesn't have a "
+ "sub_32 subregister class");
+
+ const TargetRegisterClass *SrcRC64 =
+ TII->getRegClass(TII->get(MI.getOpcode()), 1, TRI, *MI.getMF());
+ const TargetRegisterClass *SrcRC32 =
+ TRI->getSubRegisterClass(SrcRC64, AArch64::sub_32);
+ assert(SrcRC32 && "Source register class of UBFMXri doesn't have a sub_32 "
+ "subregister class");
+
+ Register DstReg64 = MI.getOperand(0).getReg();
+ Register DstReg32 = MRI->createVirtualRegister(DstRC32);
+ Register SrcReg64 = MI.getOperand(1).getReg();
+ Register SrcReg32 = MRI->createVirtualRegister(SrcRC32);
+
+ BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(AArch64::COPY),
+ SrcReg32)
+ .addReg(SrcReg64, 0, AArch64::sub_32);
+ BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(AArch64::UBFMWri),
+ DstReg32)
+ .addReg(SrcReg32)
+ .addImm(Immr)
+ .addImm(Imms);
+ BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
+ TII->get(AArch64::SUBREG_TO_REG), DstReg64)
+ .addImm(0)
+ .addReg(DstReg32)
+ .addImm(AArch64::sub_32);
+ MI.eraseFromParent();
+ return true;
+}
+
// Across a basic-block we might have in i32 extract from a value that only
// operates on upper bits (for example a sxtw). We can replace the COPY with a
// new version skipping the sxtw.
@@ -865,6 +920,9 @@ bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
case AArch64::FMOVDr:
Changed |= visitFMOVDr(MI);
break;
+ case AArch64::UBFMXri:
+ Changed |= visitUBFMXri(MI);
+ break;
case AArch64::COPY:
Changed |= visitCopy(MI);
break;
diff --git a/llvm/test/CodeGen/AArch64/addsub-24bit-imm.mir b/llvm/test/CodeGen/AArch64/addsub-24bit-imm.mir
index 98a73bdd932034..f6c0ec083096ec 100644
--- a/llvm/test/CodeGen/AArch64/addsub-24bit-imm.mir
+++ b/llvm/test/CodeGen/AArch64/addsub-24bit-imm.mir
@@ -36,8 +36,10 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x0
; CHECK-NEXT: [[ADDXri:%[0-9]+]]:gpr64sp = ADDXri [[COPY]], 273, 12
; CHECK-NEXT: [[ADDXri1:%[0-9]+]]:gpr64common = ADDXri [[ADDXri]], 3549, 0
- ; CHECK-NEXT: [[UBFMXri:%[0-9]+]]:gpr64 = UBFMXri [[ADDXri1]], 28, 31
- ; CHECK-NEXT: $x0 = COPY [[UBFMXri]]
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY [[ADDXri1]].sub_32
+ ; CHECK-NEXT: [[UBFMWri:%[0-9]+]]:gpr32 = UBFMWri [[COPY1]], 28, 31
+ ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[UBFMWri]], %subreg.sub_32
+ ; CHECK-NEXT: $x0 = COPY [[SUBREG_TO_REG]]
; CHECK-NEXT: RET_ReallyLR implicit $x0
%0:gpr64 = COPY $x0
%1:gpr32 = MOVi32imm 1121757
@@ -58,8 +60,10 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x0
; CHECK-NEXT: [[SUBXri:%[0-9]+]]:gpr64sp = SUBXri [[COPY]], 273, 12
; CHECK-NEXT: [[SUBXri1:%[0-9]+]]:gpr64common = SUBXri [[SUBXri]], 3549, 0
- ; CHECK-NEXT: [[UBFMXri:%[0-9]+]]:gpr64 = UBFMXri [[SUBXri1]], 28, 31
- ; CHECK-NEXT: $x0 = COPY [[UBFMXri]]
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY [[SUBXri1]].sub_32
+ ; CHECK-NEXT: [[UBFMWri:%[0-9]+]]:gpr32 = UBFMWri [[COPY1]], 28, 31
+ ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[UBFMWri]], %subreg.sub_32
+ ; CHECK-NEXT: $x0 = COPY [[SUBREG_TO_REG]]
; CHECK-NEXT: RET_ReallyLR implicit $x0
%0:gpr64 = COPY $x0
%1:gpr64 = MOVi64imm -1121757
diff --git a/llvm/test/CodeGen/AArch64/arm64-bitfield-extract.ll b/llvm/test/CodeGen/AArch64/arm64-bitfield-extract.ll
index 2b42a3f29a72be..bdbff0563a22b9 100644
--- a/llvm/test/CodeGen/AArch64/arm64-bitfield-extract.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-bitfield-extract.ll
@@ -810,7 +810,7 @@ define i32 @fct19(i64 %arg1) nounwind readonly ssp {
; LLC-NEXT: add w0, w8, #16
; LLC-NEXT: ret
; LLC-NEXT: .LBB26_4: // %if.end13
-; LLC-NEXT: ubfx x8, x0, #16, #16
+; LLC-NEXT: lsr w8, w0, #16
; LLC-NEXT: cbz w8, .LBB26_6
; LLC-NEXT: // %bb.5: // %if.then17
; LLC-NEXT: adrp x9, first_ones
diff --git a/llvm/test/CodeGen/AArch64/arm64-ld-from-st.ll b/llvm/test/CodeGen/AArch64/arm64-ld-from-st.ll
index 44e7bdc37f449a..533a436868300f 100644
--- a/llvm/test/CodeGen/AArch64/arm64-ld-from-st.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-ld-from-st.ll
@@ -64,7 +64,7 @@ define i16 @Str64Ldr16_1(ptr nocapture %P, i64 %v, i64 %n) {
; CHECK-LABEL: Str64Ldr16_1:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str x1, [x0, #8]
-; CHECK-NEXT: ubfx x0, x1, #16, #16
+; CHECK-NEXT: lsr w0, w1, #16
; CHECK-NEXT: ret
entry:
%arrayidx0 = getelementptr inbounds i64, ptr %P, i64 1
@@ -149,7 +149,7 @@ define i8 @Str64Ldr8_3(ptr nocapture %P, i64 %v, i64 %n) {
; CHECK-LABEL: Str64Ldr8_3:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str x1, [x0, #8]
-; CHECK-NEXT: ubfx x0, x1, #24, #8
+; CHECK-NEXT: lsr w0, w1, #24
; CHECK-NEXT: ret
entry:
%arrayidx0 = getelementptr inbounds i64, ptr %P, i64 1
@@ -424,7 +424,7 @@ define i16 @Unscaled_Str64Ldr16_1(ptr nocapture %P, i64 %v, i64 %n) {
; CHECK-LABEL: Unscaled_Str64Ldr16_1:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: stur x1, [x0, #-8]
-; CHECK-NEXT: ubfx x0, x1, #16, #16
+; CHECK-NEXT: lsr w0, w1, #16
; CHECK-NEXT: ret
entry:
%arrayidx0 = getelementptr inbounds i64, ptr %P, i64 -1
@@ -509,7 +509,7 @@ define i8 @Unscaled_Str64Ldr8_3(ptr nocapture %P, i64 %v, i64 %n) {
; CHECK-LABEL: Unscaled_Str64Ldr8_3:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: stur x1, [x0, #-8]
-; CHECK-NEXT: ubfx x0, x1, #24, #8
+; CHECK-NEXT: lsr w0, w1, #24
; CHECK-NEXT: ret
entry:
%arrayidx0 = getelementptr inbounds i64, ptr %P, i64 -1
diff --git a/llvm/test/CodeGen/AArch64/arm64_32.ll b/llvm/test/CodeGen/AArch64/arm64_32.ll
index c63edf0ceeea37..cddadcab9cde18 100644
--- a/llvm/test/CodeGen/AArch64/arm64_32.ll
+++ b/llvm/test/CodeGen/AArch64/arm64_32.ll
@@ -677,7 +677,7 @@ declare i64 @get_int()
define i1 @test_icmp_ptr(ptr %in) {
; CHECK-LABEL: test_icmp_ptr
-; CHECK: ubfx x0, x0, #31, #1
+; CHECK: lsr w0, w0, #31
%res = icmp slt ptr %in, null
ret i1 %res
}
diff --git a/llvm/test/CodeGen/AArch64/bitfield-extract.ll b/llvm/test/CodeGen/AArch64/bitfield-extract.ll
index 9c8871af4a5e9e..9caf34860051c7 100644
--- a/llvm/test/CodeGen/AArch64/bitfield-extract.ll
+++ b/llvm/test/CodeGen/AArch64/bitfield-extract.ll
@@ -99,7 +99,7 @@ declare void @use(i16 signext, i64)
; CHECK-LABEL: test_complex_node:
; CHECK: ldr d0, [x0], #8
-; CHECK: ubfx x[[VAL:[0-9]+]], x0, #5, #27
+; CHECK: lsr w[[VAL:[0-9]+]], w0, #5
; CHECK: str w[[VAL]], [x2]
define <2 x i32> @test_complex_node(ptr %addr, ptr %addr2, ptr %bf ) {
%vec = load <2 x i32>, ptr %addr
@@ -113,3 +113,11 @@ define <2 x i32> @test_complex_node(ptr %addr, ptr %addr2, ptr %bf ) {
ret <2 x i32> %vec
}
+
+; CHECK-LABEL: @test12
+; CHECK: lsr w0, w0, #10
+define i32 @test12(i64 %a) {
+ %tmp = trunc i64 %a to i32
+ %res = lshr i32 %tmp, 10
+ ret i32 %res
+}
diff --git a/llvm/test/CodeGen/AArch64/fast-isel-int-ext3.ll b/llvm/test/CodeGen/AArch64/fast-isel-int-ext3.ll
index 7602ce3fb1c7f8..32a8a943945fc9 100644
--- a/llvm/test/CodeGen/AArch64/fast-isel-int-ext3.ll
+++ b/llvm/test/CodeGen/AArch64/fast-isel-int-ext3.ll
@@ -52,7 +52,7 @@ define i64 @load_unscaled_zext_i16_to_i64(i64 %a) {
define i64 @load_unscaled_zext_i32_to_i64(i64 %a) {
; CHECK-LABEL: load_unscaled_zext_i32_to_i64
; CHECK: ldur w[[REG:[0-9]+]], [x0, #-8]
-; CHECK: ubfx x0, x[[REG]], #0, #32
+; CHECK: lsr w0, w[[REG]], #0
%1 = sub i64 %a, 8
%2 = inttoptr i64 %1 to ptr addrspace(256)
%3 = load i32, ptr addrspace(256) %2
diff --git a/llvm/test/CodeGen/AArch64/fast-isel-shift.ll b/llvm/test/CodeGen/AArch64/fast-isel-shift.ll
index 95891db80bc4ed..76f81719b4547e 100644
--- a/llvm/test/CodeGen/AArch64/fast-isel-shift.ll
+++ b/llvm/test/CodeGen/AArch64/fast-isel-shift.ll
@@ -681,7 +681,7 @@ define i64 @shl_zext_zero(i32 %a) {
; CHECK-LABEL: shl_zext_zero:
; CHECK: ; %bb.0:
; CHECK-NEXT: ; kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT: ubfx x0, x0, #0, #32
+; CHECK-NEXT: lsr w0, w0, #0
; CHECK-NEXT: ret
%1 = zext i32 %a to i64
%2 = shl i64 %1, 0
@@ -692,7 +692,7 @@ define i64 @lshr_zext_zero(i32 %a) {
; CHECK-LABEL: lshr_zext_zero:
; CHECK: ; %bb.0:
; CHECK-NEXT: ; kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT: ubfx x0, x0, #0, #32
+; CHECK-NEXT: lsr w0, w0, #0
; CHECK-NEXT: ret
%1 = zext i32 %a to i64
%2 = lshr i64 %1, 0
@@ -703,7 +703,7 @@ define i64 @ashr_zext_zero(i32 %a) {
; CHECK-LABEL: ashr_zext_zero:
; CHECK: ; %bb.0:
; CHECK-NEXT: ; kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT: ubfx x0, x0, #0, #32
+; CHECK-NEXT: lsr w0, w0, #0
; CHECK-NEXT: ret
%1 = zext i32 %a to i64
%2 = ashr i64 %1, 0
diff --git a/llvm/test/CodeGen/AArch64/machine_cse_impdef_killflags.ll b/llvm/test/CodeGen/AArch64/machine_cse_impdef_killflags.ll
index be1c0f21cc77ab..398a2ac24e5d03 100644
--- a/llvm/test/CodeGen/AArch64/machine_cse_impdef_killflags.ll
+++ b/llvm/test/CodeGen/AArch64/machine_cse_impdef_killflags.ll
@@ -12,7 +12,7 @@ define i64 @csed_impdef_killflag(i64 %a) {
; CHECK-NEXT: mov x9, #2 ; =0x2
; CHECK-NEXT: csel w8, wzr, w8, ne
; CHECK-NEXT: mov x10, #3 ; =0x3
-; CHECK-NEXT: ubfx x8, x8, #0, #32
+; CHECK-NEXT: lsr w8, w8, #0
; CHECK-NEXT: csel x9, x9, x10, ne
; CHECK-NEXT: add x0, x9, x8
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/trunc-to-tbl.ll b/llvm/test/CodeGen/AArch64/trunc-to-tbl.ll
index c4a58ba12dc6be..c838ffb0a6576e 100644
--- a/llvm/test/CodeGen/AArch64/trunc-to-tbl.ll
+++ b/llvm/test/CodeGen/AArch64/trunc-to-tbl.ll
@@ -575,7 +575,7 @@ define void @trunc_v8i19_to_v8i8_in_loop(ptr %A, ptr %dst) {
; CHECK-NEXT: ldrb w14, [x0, #18]
; CHECK-NEXT: ldrh w15, [x0, #16]
; CHECK-NEXT: add x0, x0, #32
-; CHECK-NEXT: ubfx x12, x10, #12, #20
+; CHECK-NEXT: lsr w12, w10, #12
; CHECK-NEXT: fmov s1, w9
; CHECK-NEXT: lsr x11, x9, #19
; CHECK-NEXT: lsr x13, x10, #31
@@ -586,7 +586,7 @@ define void @trunc_v8i19_to_v8i8_in_loop(ptr %A, ptr %dst) {
; CHECK-NEXT: orr x11, x15, x14, lsl #16
; CHECK-NEXT: mov.s v0[1], w13
; CHECK-NEXT: extr x13, x11, x10, #50
-; CHECK-NEXT: ubfx x10, x11, #5, #27
+; CHECK-NEXT: lsr w10, w11, #5
; CHECK-NEXT: mov.s v1[2], w12
; CHECK-NEXT: mov.s v0[2], w13
; CHECK-NEXT: mov.s v1[3], w9
@@ -616,14 +616,14 @@ define void @trunc_v8i19_to_v8i8_in_loop(ptr %A, ptr %dst) {
; CHECK-BE-NEXT: lsr x15, x10, #40
; CHECK-BE-NEXT: extr x12, x12, x11, #57
; CHECK-BE-NEXT: fmov s0, w13
-; CHECK-BE-NEXT: ubfx x13, x10, #7, #25
+; CHECK-BE-NEXT: lsr w13, w10, #7
; CHECK-BE-NEXT: extr x14, x15, x14, #50
-; CHECK-BE-NEXT: ubfx x15, x9, #14, #18
+; CHECK-BE-NEXT: lsr w15, w9, #14
; CHECK-BE-NEXT: extr x9, x10, x9, #40
; CHECK-BE-NEXT: fmov s1, w12
; CHECK-BE-NEXT: orr w12, w17, w16, lsl #8
; CHECK-BE-NEXT: mov v0.s[1], w14
-; CHECK-BE-NEXT: ubfx x9, x9, #12, #20
+; CHECK-BE-NEXT: lsr w9, w9, #12
; CHECK-BE-NEXT: orr w11, w12, w11
; CHECK-BE-NEXT: mov v1.s[1], w15
; CHECK-BE-NEXT: lsr w11, w11, #19
@@ -657,14 +657,14 @@ define void @trunc_v8i19_to_v8i8_in_loop(ptr %A, ptr %dst) {
; CHECK-DISABLE-NEXT: lsr x15, x10, #40
; CHECK-DISABLE-NEXT: extr x12, x12, x11, #57
; CHECK-DISABLE-NEXT: fmov s0, w13
-; CHECK-DISABLE-NEXT: ubfx x13, x10, #7, #25
+; CHECK-DISABLE-NEXT: lsr w13, w10, #7
; CHECK-DISABLE-NEXT: extr x14, x15, x14, #50
-; CHECK-DISABLE-NEXT: ubfx x15, x9, #14, #18
+; CHECK-DISABLE-NEXT: lsr w15, w9, #14
; CHECK-DISABLE-NEXT: extr x9, x10, x9, #40
; CHECK-DISABLE-NEXT: fmov s1, w12
; CHECK-DISABLE-NEXT: orr w12, w17, w16, lsl #8
; CHECK-DISABLE-NEXT: mov v0.s[1], w14
-; CHECK-DISABLE-NEXT: ubfx x9, x9, #12, #20
+; CHECK-DISABLE-NEXT: lsr w9, w9, #12
; CHECK-DISABLE-NEXT: orr w11, w12, w11
; CHECK-DISABLE-NEXT: mov v1.s[1], w15
; CHECK-DISABLE-NEXT: lsr w11, w11, #19
diff --git a/llvm/test/CodeGen/AArch64/xbfiz.ll b/llvm/test/CodeGen/AArch64/xbfiz.ll
index 3211cc3f2cedbf..b777ddcb7efcc4 100644
--- a/llvm/test/CodeGen/AArch64/xbfiz.ll
+++ b/llvm/test/CodeGen/AArch64/xbfiz.ll
@@ -61,3 +61,11 @@ define i32 @noubfiz32(i32 %v) {
%add = add i32 %shl, %and
ret i32 %add
}
+
+define i64 @lsl32_not_ubfiz64(i64 %v) {
+; CHECK-LABEL: lsl32_not_ubfiz64:
+; CHECK: lsl w0, w0, #6
+ %shl = shl i64 %v, 6
+ %and = and i64 %shl, 4294967295
+ ret i64 %and
+}
diff --git a/llvm/test/CodeGen/AArch64/zext-to-tbl.ll b/llvm/test/CodeGen/AArch64/zext-to-tbl.ll
index bb98a7c1dcb0e8..a583ee01dd6634 100644
--- a/llvm/test/CodeGen/AArch64/zext-to-tbl.ll
+++ b/llvm/test/CodeGen/AArch64/zext-to-tbl.ll
@@ -1259,7 +1259,7 @@ define void @zext_v16i4_to_v16i32_in_loop(ptr %src, ptr %dst) {
; CHECK-NEXT: mov.b v1[5], w10
; CHECK-NEXT: ubfx w10, w9, #24, #4
; CHECK-NEXT: mov.b v1[6], w10
-; CHECK-NEXT: ubfx x10, x9, #28, #4
+; CHECK-NEXT: lsr w10, w9, #28
; CHECK-NEXT: mov.b v1[7], w10
; CHECK-NEXT: ubfx x10, x9, #32, #4
; CHECK-NEXT: mov.b v1[8], w10
@@ -1322,7 +1322,7 @@ define void @zext_v16i4_to_v16i32_in_loop(ptr %src, ptr %dst) {
; CHECK-BE-NEXT: mov v1.b[6], w10
; CHECK-BE-NEXT: ubfx x10, x9, #32, #4
; CHECK-BE-NEXT: mov v1.b[7], w10
-; CHECK-BE-NEXT: ubfx x10, x9, #28, #4
+; CHECK-BE-NEXT: lsr w10, w9, #28
; CHECK-BE-NEXT: mov v1.b[8], w10
; CHECK-BE-NEXT: ubfx w10, w9, #24, #4
; CHECK-BE-NEXT: mov v1.b[9], w10
More information about the llvm-commits
mailing list