[llvm] [SelectionDAG] Fix and improve TargetLowering::SimplifySetCC (PR #87646)

Björn Pettersson via llvm-commits llvm-commits at lists.llvm.org
Fri Apr 5 01:19:19 PDT 2024


https://github.com/bjope updated https://github.com/llvm/llvm-project/pull/87646

>From e5a9f18d51f9e905bb89c4470139108375f73901 Mon Sep 17 00:00:00 2001
From: Bjorn Pettersson <bjorn.a.pettersson at ericsson.com>
Date: Wed, 3 Apr 2024 19:00:17 +0200
Subject: [PATCH 1/2] [ARM][PowerPC] Add regression tests for narrowing load in
 TargetLowering::SimplifySetCC

These test cases show some miscomplies for big-endian when dealing
with non byte-sized loads. One part of the problem is that LLVM IR
isn't really telling where the padding goes for non byte-sized
loads/stores. So currently TargetLowering::SimplifySetCC can't assume
anything about it. But the implementation also do not consider that
the TypeStoreSize could be larger than the TypeSize, resulting in
the offset calculation being wrong for big-endian.
---
 .../CodeGen/ARM/simplifysetcc_narrow_load.ll  | 665 ++++++++++++++++++
 .../PowerPC/simplifysetcc_narrow_load.ll      | 390 ++++++++++
 2 files changed, 1055 insertions(+)
 create mode 100644 llvm/test/CodeGen/ARM/simplifysetcc_narrow_load.ll
 create mode 100644 llvm/test/CodeGen/PowerPC/simplifysetcc_narrow_load.ll

diff --git a/llvm/test/CodeGen/ARM/simplifysetcc_narrow_load.ll b/llvm/test/CodeGen/ARM/simplifysetcc_narrow_load.ll
new file mode 100644
index 00000000000000..2cba4b46f9a8a4
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/simplifysetcc_narrow_load.ll
@@ -0,0 +1,665 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -O1 -mtriple arm -o - %s | FileCheck --check-prefix CHECK-LE %s
+; RUN: llc -O1 -mtriple armv7 -o - %s | FileCheck --check-prefix CHECK-V7-LE %s
+; RUN: llc -O1 -mtriple armeb -o - %s | FileCheck --check-prefix CHECK-BE %s
+; RUN: llc -O1 -mtriple armv7eb -o - %s | FileCheck --check-prefix CHECK-V7-BE %s
+
+; A collection of regression tests to verify the load-narrowing part of
+; TargetLowering::SimplifySetCC (and/or other similar rewrites such as
+; combining AND+LOAD into ZEXTLOAD).
+;
+; Using both arm and armv7 to show that alignment restrictions are
+; considered for the narrowed load (armv7 is a bit more relaxed when it
+; comes to unaligned memory accesses).
+
+;--------------------------------------------------------------------------
+; Test non byte-sized types.
+;
+; As long as LLVM IR isn't defining where the padding goes we can't really
+; optimize these (without adding a target lowering hook that can inform
+; ISel about which bits are padding).
+; --------------------------------------------------------------------------
+
+define i1 @test_129_15_0(ptr %y) {
+; CHECK-LE-LABEL: test_129_15_0:
+; CHECK-LE:       @ %bb.0:
+; CHECK-LE-NEXT:    ldrh r0, [r0]
+; CHECK-LE-NEXT:    mov r1, #255
+; CHECK-LE-NEXT:    orr r1, r1, #32512
+; CHECK-LE-NEXT:    ands r0, r0, r1
+; CHECK-LE-NEXT:    movne r0, #1
+; CHECK-LE-NEXT:    mov pc, lr
+;
+; CHECK-V7-LE-LABEL: test_129_15_0:
+; CHECK-V7-LE:       @ %bb.0:
+; CHECK-V7-LE-NEXT:    ldrh r0, [r0]
+; CHECK-V7-LE-NEXT:    bfc r0, #15, #17
+; CHECK-V7-LE-NEXT:    cmp r0, #0
+; CHECK-V7-LE-NEXT:    movwne r0, #1
+; CHECK-V7-LE-NEXT:    bx lr
+;
+; CHECK-BE-LABEL: test_129_15_0:
+; CHECK-BE:       @ %bb.0:
+; CHECK-BE-NEXT:    ldrh r0, [r0, #14]
+; CHECK-BE-NEXT:    mov r1, #255
+; CHECK-BE-NEXT:    orr r1, r1, #32512
+; CHECK-BE-NEXT:    ands r0, r0, r1
+; CHECK-BE-NEXT:    movne r0, #1
+; CHECK-BE-NEXT:    mov pc, lr
+;
+; CHECK-V7-BE-LABEL: test_129_15_0:
+; CHECK-V7-BE:       @ %bb.0:
+; CHECK-V7-BE-NEXT:    ldrh r0, [r0, #14]
+; CHECK-V7-BE-NEXT:    bfc r0, #15, #17
+; CHECK-V7-BE-NEXT:    cmp r0, #0
+; CHECK-V7-BE-NEXT:    movwne r0, #1
+; CHECK-V7-BE-NEXT:    bx lr
+  %a = load i129, ptr %y
+  %b = and i129 %a, u0x7fff
+  %cmp = icmp ne i129 %b, 0
+  ret i1 %cmp
+}
+
+define i1 @test_126_20_4(ptr %y) {
+; CHECK-LE-LABEL: test_126_20_4:
+; CHECK-LE:       @ %bb.0:
+; CHECK-LE-NEXT:    ldr r0, [r0]
+; CHECK-LE-NEXT:    mvn r1, #15
+; CHECK-LE-NEXT:    sub r1, r1, #-16777216
+; CHECK-LE-NEXT:    ands r0, r0, r1
+; CHECK-LE-NEXT:    movne r0, #1
+; CHECK-LE-NEXT:    mov pc, lr
+;
+; CHECK-V7-LE-LABEL: test_126_20_4:
+; CHECK-V7-LE:       @ %bb.0:
+; CHECK-V7-LE-NEXT:    ldr r0, [r0]
+; CHECK-V7-LE-NEXT:    movw r1, #65520
+; CHECK-V7-LE-NEXT:    movt r1, #255
+; CHECK-V7-LE-NEXT:    ands r0, r0, r1
+; CHECK-V7-LE-NEXT:    movwne r0, #1
+; CHECK-V7-LE-NEXT:    bx lr
+;
+; CHECK-BE-LABEL: test_126_20_4:
+; CHECK-BE:       @ %bb.0:
+; CHECK-BE-NEXT:    ldr r0, [r0, #12]
+; CHECK-BE-NEXT:    mvn r1, #15
+; CHECK-BE-NEXT:    sub r1, r1, #-16777216
+; CHECK-BE-NEXT:    ands r0, r0, r1
+; CHECK-BE-NEXT:    movne r0, #1
+; CHECK-BE-NEXT:    mov pc, lr
+;
+; CHECK-V7-BE-LABEL: test_126_20_4:
+; CHECK-V7-BE:       @ %bb.0:
+; CHECK-V7-BE-NEXT:    ldr r0, [r0, #12]
+; CHECK-V7-BE-NEXT:    movw r1, #65520
+; CHECK-V7-BE-NEXT:    movt r1, #255
+; CHECK-V7-BE-NEXT:    ands r0, r0, r1
+; CHECK-V7-BE-NEXT:    movwne r0, #1
+; CHECK-V7-BE-NEXT:    bx lr
+  %a = load i126, ptr %y
+  %b = and i126 %a, u0xfffff0
+  %cmp = icmp ne i126 %b, 0
+  ret i1 %cmp
+}
+
+define i1 @test_33_8_0(ptr %y) {
+; CHECK-LE-LABEL: test_33_8_0:
+; CHECK-LE:       @ %bb.0:
+; CHECK-LE-NEXT:    ldrb r0, [r0]
+; CHECK-LE-NEXT:    cmp r0, #0
+; CHECK-LE-NEXT:    movne r0, #1
+; CHECK-LE-NEXT:    mov pc, lr
+;
+; CHECK-V7-LE-LABEL: test_33_8_0:
+; CHECK-V7-LE:       @ %bb.0:
+; CHECK-V7-LE-NEXT:    ldrb r0, [r0]
+; CHECK-V7-LE-NEXT:    cmp r0, #0
+; CHECK-V7-LE-NEXT:    movwne r0, #1
+; CHECK-V7-LE-NEXT:    bx lr
+;
+; CHECK-BE-LABEL: test_33_8_0:
+; CHECK-BE:       @ %bb.0:
+; CHECK-BE-NEXT:    ldrb r0, [r0, #3]
+; CHECK-BE-NEXT:    cmp r0, #0
+; CHECK-BE-NEXT:    movne r0, #1
+; CHECK-BE-NEXT:    mov pc, lr
+;
+; CHECK-V7-BE-LABEL: test_33_8_0:
+; CHECK-V7-BE:       @ %bb.0:
+; CHECK-V7-BE-NEXT:    ldrb r0, [r0, #3]
+; CHECK-V7-BE-NEXT:    cmp r0, #0
+; CHECK-V7-BE-NEXT:    movwne r0, #1
+; CHECK-V7-BE-NEXT:    bx lr
+  %a = load i33, ptr %y
+  %b = and i33 %a, u0xff
+  %cmp = icmp ne i33 %b, 0
+  ret i1 %cmp
+}
+
+define i1 @test_33_1_32(ptr %y) {
+; CHECK-LE-LABEL: test_33_1_32:
+; CHECK-LE:       @ %bb.0:
+; CHECK-LE-NEXT:    ldrb r0, [r0, #4]
+; CHECK-LE-NEXT:    mov pc, lr
+;
+; CHECK-V7-LE-LABEL: test_33_1_32:
+; CHECK-V7-LE:       @ %bb.0:
+; CHECK-V7-LE-NEXT:    ldrb r0, [r0, #4]
+; CHECK-V7-LE-NEXT:    bx lr
+;
+; CHECK-BE-LABEL: test_33_1_32:
+; CHECK-BE:       @ %bb.0:
+; CHECK-BE-NEXT:    ldr r0, [r0]
+; CHECK-BE-NEXT:    lsr r0, r0, #24
+; CHECK-BE-NEXT:    mov pc, lr
+;
+; CHECK-V7-BE-LABEL: test_33_1_32:
+; CHECK-V7-BE:       @ %bb.0:
+; CHECK-V7-BE-NEXT:    ldr r0, [r0]
+; CHECK-V7-BE-NEXT:    lsr r0, r0, #24
+; CHECK-V7-BE-NEXT:    bx lr
+  %a = load i33, ptr %y
+  %b = and i33 %a, u0x100000000
+  %cmp = icmp ne i33 %b, 0
+  ret i1 %cmp
+}
+
+define i1 @test_33_1_31(ptr %y) {
+; CHECK-LE-LABEL: test_33_1_31:
+; CHECK-LE:       @ %bb.0:
+; CHECK-LE-NEXT:    ldrb r0, [r0, #3]
+; CHECK-LE-NEXT:    lsr r0, r0, #7
+; CHECK-LE-NEXT:    mov pc, lr
+;
+; CHECK-V7-LE-LABEL: test_33_1_31:
+; CHECK-V7-LE:       @ %bb.0:
+; CHECK-V7-LE-NEXT:    ldrb r0, [r0, #3]
+; CHECK-V7-LE-NEXT:    lsr r0, r0, #7
+; CHECK-V7-LE-NEXT:    bx lr
+;
+; CHECK-BE-LABEL: test_33_1_31:
+; CHECK-BE:       @ %bb.0:
+; CHECK-BE-NEXT:    ldrb r0, [r0]
+; CHECK-BE-NEXT:    lsr r0, r0, #7
+; CHECK-BE-NEXT:    mov pc, lr
+;
+; CHECK-V7-BE-LABEL: test_33_1_31:
+; CHECK-V7-BE:       @ %bb.0:
+; CHECK-V7-BE-NEXT:    ldrb r0, [r0]
+; CHECK-V7-BE-NEXT:    lsr r0, r0, #7
+; CHECK-V7-BE-NEXT:    bx lr
+  %a = load i33, ptr %y
+  %b = and i33 %a, u0x80000000
+  %cmp = icmp ne i33 %b, 0
+  ret i1 %cmp
+}
+
+define i1 @test_33_1_0(ptr %y) {
+; CHECK-LE-LABEL: test_33_1_0:
+; CHECK-LE:       @ %bb.0:
+; CHECK-LE-NEXT:    ldrb r0, [r0]
+; CHECK-LE-NEXT:    and r0, r0, #1
+; CHECK-LE-NEXT:    mov pc, lr
+;
+; CHECK-V7-LE-LABEL: test_33_1_0:
+; CHECK-V7-LE:       @ %bb.0:
+; CHECK-V7-LE-NEXT:    ldrb r0, [r0]
+; CHECK-V7-LE-NEXT:    and r0, r0, #1
+; CHECK-V7-LE-NEXT:    bx lr
+;
+; CHECK-BE-LABEL: test_33_1_0:
+; CHECK-BE:       @ %bb.0:
+; CHECK-BE-NEXT:    ldrb r0, [r0, #3]
+; CHECK-BE-NEXT:    and r0, r0, #1
+; CHECK-BE-NEXT:    mov pc, lr
+;
+; CHECK-V7-BE-LABEL: test_33_1_0:
+; CHECK-V7-BE:       @ %bb.0:
+; CHECK-V7-BE-NEXT:    ldrb r0, [r0, #3]
+; CHECK-V7-BE-NEXT:    and r0, r0, #1
+; CHECK-V7-BE-NEXT:    bx lr
+  %a = load i33, ptr %y
+  %b = and i33 %a, u0x1
+  %cmp = icmp ne i33 %b, 0
+  ret i1 %cmp
+}
+
+;--------------------------------------------------------------------------
+; Test byte-sized types.
+;--------------------------------------------------------------------------
+
+
+define i1 @test_128_20_4(ptr %y) {
+; CHECK-LE-LABEL: test_128_20_4:
+; CHECK-LE:       @ %bb.0:
+; CHECK-LE-NEXT:    ldr r0, [r0]
+; CHECK-LE-NEXT:    mvn r1, #15
+; CHECK-LE-NEXT:    sub r1, r1, #-16777216
+; CHECK-LE-NEXT:    ands r0, r0, r1
+; CHECK-LE-NEXT:    movne r0, #1
+; CHECK-LE-NEXT:    mov pc, lr
+;
+; CHECK-V7-LE-LABEL: test_128_20_4:
+; CHECK-V7-LE:       @ %bb.0:
+; CHECK-V7-LE-NEXT:    ldr r0, [r0]
+; CHECK-V7-LE-NEXT:    movw r1, #65520
+; CHECK-V7-LE-NEXT:    movt r1, #255
+; CHECK-V7-LE-NEXT:    ands r0, r0, r1
+; CHECK-V7-LE-NEXT:    movwne r0, #1
+; CHECK-V7-LE-NEXT:    bx lr
+;
+; CHECK-BE-LABEL: test_128_20_4:
+; CHECK-BE:       @ %bb.0:
+; CHECK-BE-NEXT:    ldr r0, [r0, #12]
+; CHECK-BE-NEXT:    mvn r1, #15
+; CHECK-BE-NEXT:    sub r1, r1, #-16777216
+; CHECK-BE-NEXT:    ands r0, r0, r1
+; CHECK-BE-NEXT:    movne r0, #1
+; CHECK-BE-NEXT:    mov pc, lr
+;
+; CHECK-V7-BE-LABEL: test_128_20_4:
+; CHECK-V7-BE:       @ %bb.0:
+; CHECK-V7-BE-NEXT:    ldr r0, [r0, #12]
+; CHECK-V7-BE-NEXT:    movw r1, #65520
+; CHECK-V7-BE-NEXT:    movt r1, #255
+; CHECK-V7-BE-NEXT:    ands r0, r0, r1
+; CHECK-V7-BE-NEXT:    movwne r0, #1
+; CHECK-V7-BE-NEXT:    bx lr
+  %a = load i128, ptr %y
+  %b = and i128 %a, u0xfffff0
+  %cmp = icmp ne i128 %b, 0
+  ret i1 %cmp
+}
+
+define i1 @test_48_16_0(ptr %y) {
+; CHECK-LE-LABEL: test_48_16_0:
+; CHECK-LE:       @ %bb.0:
+; CHECK-LE-NEXT:    ldrh r0, [r0]
+; CHECK-LE-NEXT:    cmp r0, #0
+; CHECK-LE-NEXT:    movne r0, #1
+; CHECK-LE-NEXT:    mov pc, lr
+;
+; CHECK-V7-LE-LABEL: test_48_16_0:
+; CHECK-V7-LE:       @ %bb.0:
+; CHECK-V7-LE-NEXT:    ldrh r0, [r0]
+; CHECK-V7-LE-NEXT:    cmp r0, #0
+; CHECK-V7-LE-NEXT:    movwne r0, #1
+; CHECK-V7-LE-NEXT:    bx lr
+;
+; CHECK-BE-LABEL: test_48_16_0:
+; CHECK-BE:       @ %bb.0:
+; CHECK-BE-NEXT:    ldrh r0, [r0, #4]
+; CHECK-BE-NEXT:    cmp r0, #0
+; CHECK-BE-NEXT:    movne r0, #1
+; CHECK-BE-NEXT:    mov pc, lr
+;
+; CHECK-V7-BE-LABEL: test_48_16_0:
+; CHECK-V7-BE:       @ %bb.0:
+; CHECK-V7-BE-NEXT:    ldrh r0, [r0, #4]
+; CHECK-V7-BE-NEXT:    cmp r0, #0
+; CHECK-V7-BE-NEXT:    movwne r0, #1
+; CHECK-V7-BE-NEXT:    bx lr
+  %a = load i48, ptr %y
+  %b = and i48 %a, u0xffff
+  %cmp = icmp ne i48 %b, 0
+  ret i1 %cmp
+}
+
+define i1 @test_48_16_8(ptr %y) {
+; CHECK-LE-LABEL: test_48_16_8:
+; CHECK-LE:       @ %bb.0:
+; CHECK-LE-NEXT:    ldrh r0, [r0, #1]
+; CHECK-LE-NEXT:    cmp r0, #0
+; CHECK-LE-NEXT:    movne r0, #1
+; CHECK-LE-NEXT:    mov pc, lr
+;
+; CHECK-V7-LE-LABEL: test_48_16_8:
+; CHECK-V7-LE:       @ %bb.0:
+; CHECK-V7-LE-NEXT:    ldrh r0, [r0, #1]
+; CHECK-V7-LE-NEXT:    cmp r0, #0
+; CHECK-V7-LE-NEXT:    movwne r0, #1
+; CHECK-V7-LE-NEXT:    bx lr
+;
+; CHECK-BE-LABEL: test_48_16_8:
+; CHECK-BE:       @ %bb.0:
+; CHECK-BE-NEXT:    ldrh r0, [r0, #3]
+; CHECK-BE-NEXT:    cmp r0, #0
+; CHECK-BE-NEXT:    movne r0, #1
+; CHECK-BE-NEXT:    mov pc, lr
+;
+; CHECK-V7-BE-LABEL: test_48_16_8:
+; CHECK-V7-BE:       @ %bb.0:
+; CHECK-V7-BE-NEXT:    ldrh r0, [r0, #3]
+; CHECK-V7-BE-NEXT:    cmp r0, #0
+; CHECK-V7-BE-NEXT:    movwne r0, #1
+; CHECK-V7-BE-NEXT:    bx lr
+  %a = load i48, ptr %y
+  %b = and i48 %a, u0xffff00
+  %cmp = icmp ne i48 %b, 0
+  ret i1 %cmp
+}
+
+define i1 @test_48_16_16(ptr %y) {
+; CHECK-LE-LABEL: test_48_16_16:
+; CHECK-LE:       @ %bb.0:
+; CHECK-LE-NEXT:    ldrh r0, [r0, #2]
+; CHECK-LE-NEXT:    cmp r0, #0
+; CHECK-LE-NEXT:    movne r0, #1
+; CHECK-LE-NEXT:    mov pc, lr
+;
+; CHECK-V7-LE-LABEL: test_48_16_16:
+; CHECK-V7-LE:       @ %bb.0:
+; CHECK-V7-LE-NEXT:    ldrh r0, [r0, #2]
+; CHECK-V7-LE-NEXT:    cmp r0, #0
+; CHECK-V7-LE-NEXT:    movwne r0, #1
+; CHECK-V7-LE-NEXT:    bx lr
+;
+; CHECK-BE-LABEL: test_48_16_16:
+; CHECK-BE:       @ %bb.0:
+; CHECK-BE-NEXT:    ldrh r0, [r0, #2]
+; CHECK-BE-NEXT:    cmp r0, #0
+; CHECK-BE-NEXT:    movne r0, #1
+; CHECK-BE-NEXT:    mov pc, lr
+;
+; CHECK-V7-BE-LABEL: test_48_16_16:
+; CHECK-V7-BE:       @ %bb.0:
+; CHECK-V7-BE-NEXT:    ldrh r0, [r0, #2]
+; CHECK-V7-BE-NEXT:    cmp r0, #0
+; CHECK-V7-BE-NEXT:    movwne r0, #1
+; CHECK-V7-BE-NEXT:    bx lr
+  %a = load i48, ptr %y
+  %b = and i48 %a, u0xffff0000
+  %cmp = icmp ne i48 %b, 0
+  ret i1 %cmp
+}
+
+define i1 @test_48_16_32(ptr %y) {
+; CHECK-LE-LABEL: test_48_16_32:
+; CHECK-LE:       @ %bb.0:
+; CHECK-LE-NEXT:    ldrh r0, [r0, #4]
+; CHECK-LE-NEXT:    cmp r0, #0
+; CHECK-LE-NEXT:    movne r0, #1
+; CHECK-LE-NEXT:    mov pc, lr
+;
+; CHECK-V7-LE-LABEL: test_48_16_32:
+; CHECK-V7-LE:       @ %bb.0:
+; CHECK-V7-LE-NEXT:    ldrh r0, [r0, #4]
+; CHECK-V7-LE-NEXT:    cmp r0, #0
+; CHECK-V7-LE-NEXT:    movwne r0, #1
+; CHECK-V7-LE-NEXT:    bx lr
+;
+; CHECK-BE-LABEL: test_48_16_32:
+; CHECK-BE:       @ %bb.0:
+; CHECK-BE-NEXT:    ldrh r0, [r0]
+; CHECK-BE-NEXT:    cmp r0, #0
+; CHECK-BE-NEXT:    movne r0, #1
+; CHECK-BE-NEXT:    mov pc, lr
+;
+; CHECK-V7-BE-LABEL: test_48_16_32:
+; CHECK-V7-BE:       @ %bb.0:
+; CHECK-V7-BE-NEXT:    ldrh r0, [r0]
+; CHECK-V7-BE-NEXT:    cmp r0, #0
+; CHECK-V7-BE-NEXT:    movwne r0, #1
+; CHECK-V7-BE-NEXT:    bx lr
+  %a = load i48, ptr %y
+  %b = and i48 %a, u0xffff00000000
+  %cmp = icmp ne i48 %b, 0
+  ret i1 %cmp
+}
+
+define i1 @test_48_17_0(ptr %y) {
+; CHECK-LE-LABEL: test_48_17_0:
+; CHECK-LE:       @ %bb.0:
+; CHECK-LE-NEXT:    ldr r0, [r0]
+; CHECK-LE-NEXT:    ldr r1, .LCPI11_0
+; CHECK-LE-NEXT:    ands r0, r0, r1
+; CHECK-LE-NEXT:    movne r0, #1
+; CHECK-LE-NEXT:    mov pc, lr
+; CHECK-LE-NEXT:    .p2align 2
+; CHECK-LE-NEXT:  @ %bb.1:
+; CHECK-LE-NEXT:  .LCPI11_0:
+; CHECK-LE-NEXT:    .long 131071 @ 0x1ffff
+;
+; CHECK-V7-LE-LABEL: test_48_17_0:
+; CHECK-V7-LE:       @ %bb.0:
+; CHECK-V7-LE-NEXT:    ldr r0, [r0]
+; CHECK-V7-LE-NEXT:    bfc r0, #17, #15
+; CHECK-V7-LE-NEXT:    cmp r0, #0
+; CHECK-V7-LE-NEXT:    movwne r0, #1
+; CHECK-V7-LE-NEXT:    bx lr
+;
+; CHECK-BE-LABEL: test_48_17_0:
+; CHECK-BE:       @ %bb.0:
+; CHECK-BE-NEXT:    ldr r1, [r0]
+; CHECK-BE-NEXT:    ldrh r0, [r0, #4]
+; CHECK-BE-NEXT:    orr r0, r0, r1, lsl #16
+; CHECK-BE-NEXT:    ldr r1, .LCPI11_0
+; CHECK-BE-NEXT:    ands r0, r0, r1
+; CHECK-BE-NEXT:    movne r0, #1
+; CHECK-BE-NEXT:    mov pc, lr
+; CHECK-BE-NEXT:    .p2align 2
+; CHECK-BE-NEXT:  @ %bb.1:
+; CHECK-BE-NEXT:  .LCPI11_0:
+; CHECK-BE-NEXT:    .long 131071 @ 0x1ffff
+;
+; CHECK-V7-BE-LABEL: test_48_17_0:
+; CHECK-V7-BE:       @ %bb.0:
+; CHECK-V7-BE-NEXT:    ldr r1, [r0]
+; CHECK-V7-BE-NEXT:    ldrh r0, [r0, #4]
+; CHECK-V7-BE-NEXT:    orr r0, r0, r1, lsl #16
+; CHECK-V7-BE-NEXT:    bfc r0, #17, #15
+; CHECK-V7-BE-NEXT:    cmp r0, #0
+; CHECK-V7-BE-NEXT:    movwne r0, #1
+; CHECK-V7-BE-NEXT:    bx lr
+  %a = load i48, ptr %y
+  %b = and i48 %a, u0x1ffff
+  %cmp = icmp ne i48 %b, 0
+  ret i1 %cmp
+}
+
+define i1 @test_40_16_0(ptr %y) {
+; CHECK-LE-LABEL: test_40_16_0:
+; CHECK-LE:       @ %bb.0:
+; CHECK-LE-NEXT:    ldrh r0, [r0]
+; CHECK-LE-NEXT:    cmp r0, #0
+; CHECK-LE-NEXT:    movne r0, #1
+; CHECK-LE-NEXT:    mov pc, lr
+;
+; CHECK-V7-LE-LABEL: test_40_16_0:
+; CHECK-V7-LE:       @ %bb.0:
+; CHECK-V7-LE-NEXT:    ldrh r0, [r0]
+; CHECK-V7-LE-NEXT:    cmp r0, #0
+; CHECK-V7-LE-NEXT:    movwne r0, #1
+; CHECK-V7-LE-NEXT:    bx lr
+;
+; CHECK-BE-LABEL: test_40_16_0:
+; CHECK-BE:       @ %bb.0:
+; CHECK-BE-NEXT:    ldrh r0, [r0, #3]
+; CHECK-BE-NEXT:    cmp r0, #0
+; CHECK-BE-NEXT:    movne r0, #1
+; CHECK-BE-NEXT:    mov pc, lr
+;
+; CHECK-V7-BE-LABEL: test_40_16_0:
+; CHECK-V7-BE:       @ %bb.0:
+; CHECK-V7-BE-NEXT:    ldrh r0, [r0, #3]
+; CHECK-V7-BE-NEXT:    cmp r0, #0
+; CHECK-V7-BE-NEXT:    movwne r0, #1
+; CHECK-V7-BE-NEXT:    bx lr
+  %a = load i40, ptr %y
+  %b = and i40 %a, u0xffff
+  %cmp = icmp ne i40 %b, 0
+  ret i1 %cmp
+}
+
+define i1 @test_40_1_32(ptr %y) {
+; CHECK-LE-LABEL: test_40_1_32:
+; CHECK-LE:       @ %bb.0:
+; CHECK-LE-NEXT:    ldrb r0, [r0, #4]
+; CHECK-LE-NEXT:    and r0, r0, #1
+; CHECK-LE-NEXT:    mov pc, lr
+;
+; CHECK-V7-LE-LABEL: test_40_1_32:
+; CHECK-V7-LE:       @ %bb.0:
+; CHECK-V7-LE-NEXT:    ldrb r0, [r0, #4]
+; CHECK-V7-LE-NEXT:    and r0, r0, #1
+; CHECK-V7-LE-NEXT:    bx lr
+;
+; CHECK-BE-LABEL: test_40_1_32:
+; CHECK-BE:       @ %bb.0:
+; CHECK-BE-NEXT:    ldr r0, [r0]
+; CHECK-BE-NEXT:    mov r1, #1
+; CHECK-BE-NEXT:    and r0, r1, r0, lsr #24
+; CHECK-BE-NEXT:    mov pc, lr
+;
+; CHECK-V7-BE-LABEL: test_40_1_32:
+; CHECK-V7-BE:       @ %bb.0:
+; CHECK-V7-BE-NEXT:    ldr r0, [r0]
+; CHECK-V7-BE-NEXT:    ubfx r0, r0, #24, #1
+; CHECK-V7-BE-NEXT:    bx lr
+  %a = load i40, ptr %y
+  %b = and i40 %a, u0x100000000
+  %cmp = icmp ne i40 %b, 0
+  ret i1 %cmp
+}
+
+define i1 @test_24_16_0(ptr %y) {
+; CHECK-LE-LABEL: test_24_16_0:
+; CHECK-LE:       @ %bb.0:
+; CHECK-LE-NEXT:    ldrh r0, [r0]
+; CHECK-LE-NEXT:    cmp r0, #0
+; CHECK-LE-NEXT:    movne r0, #1
+; CHECK-LE-NEXT:    mov pc, lr
+;
+; CHECK-V7-LE-LABEL: test_24_16_0:
+; CHECK-V7-LE:       @ %bb.0:
+; CHECK-V7-LE-NEXT:    ldrh r0, [r0]
+; CHECK-V7-LE-NEXT:    cmp r0, #0
+; CHECK-V7-LE-NEXT:    movwne r0, #1
+; CHECK-V7-LE-NEXT:    bx lr
+;
+; CHECK-BE-LABEL: test_24_16_0:
+; CHECK-BE:       @ %bb.0:
+; CHECK-BE-NEXT:    ldrh r0, [r0, #1]
+; CHECK-BE-NEXT:    cmp r0, #0
+; CHECK-BE-NEXT:    movne r0, #1
+; CHECK-BE-NEXT:    mov pc, lr
+;
+; CHECK-V7-BE-LABEL: test_24_16_0:
+; CHECK-V7-BE:       @ %bb.0:
+; CHECK-V7-BE-NEXT:    ldrh r0, [r0, #1]
+; CHECK-V7-BE-NEXT:    cmp r0, #0
+; CHECK-V7-BE-NEXT:    movwne r0, #1
+; CHECK-V7-BE-NEXT:    bx lr
+  %a = load i24, ptr %y
+  %b = and i24 %a, u0xffff
+  %cmp = icmp ne i24 %b, 0
+  ret i1 %cmp
+}
+
+define i1 @test_24_8_8(ptr %y) {
+; CHECK-LE-LABEL: test_24_8_8:
+; CHECK-LE:       @ %bb.0:
+; CHECK-LE-NEXT:    ldrb r0, [r0, #1]
+; CHECK-LE-NEXT:    lsls r0, r0, #8
+; CHECK-LE-NEXT:    movne r0, #1
+; CHECK-LE-NEXT:    mov pc, lr
+;
+; CHECK-V7-LE-LABEL: test_24_8_8:
+; CHECK-V7-LE:       @ %bb.0:
+; CHECK-V7-LE-NEXT:    ldrb r0, [r0, #1]
+; CHECK-V7-LE-NEXT:    lsls r0, r0, #8
+; CHECK-V7-LE-NEXT:    movwne r0, #1
+; CHECK-V7-LE-NEXT:    bx lr
+;
+; CHECK-BE-LABEL: test_24_8_8:
+; CHECK-BE:       @ %bb.0:
+; CHECK-BE-NEXT:    ldrb r0, [r0, #1]
+; CHECK-BE-NEXT:    lsls r0, r0, #8
+; CHECK-BE-NEXT:    movne r0, #1
+; CHECK-BE-NEXT:    mov pc, lr
+;
+; CHECK-V7-BE-LABEL: test_24_8_8:
+; CHECK-V7-BE:       @ %bb.0:
+; CHECK-V7-BE-NEXT:    ldrb r0, [r0, #1]
+; CHECK-V7-BE-NEXT:    lsls r0, r0, #8
+; CHECK-V7-BE-NEXT:    movwne r0, #1
+; CHECK-V7-BE-NEXT:    bx lr
+  %a = load i24, ptr %y
+  %b = and i24 %a, u0xff00
+  %cmp = icmp ne i24 %b, 0
+  ret i1 %cmp
+}
+
+define i1 @test_24_8_12(ptr %y) {
+; CHECK-LE-LABEL: test_24_8_12:
+; CHECK-LE:       @ %bb.0:
+; CHECK-LE-NEXT:    ldrb r1, [r0, #2]
+; CHECK-LE-NEXT:    ldrh r0, [r0]
+; CHECK-LE-NEXT:    orr r0, r0, r1, lsl #16
+; CHECK-LE-NEXT:    ands r0, r0, #1044480
+; CHECK-LE-NEXT:    movne r0, #1
+; CHECK-LE-NEXT:    mov pc, lr
+;
+; CHECK-V7-LE-LABEL: test_24_8_12:
+; CHECK-V7-LE:       @ %bb.0:
+; CHECK-V7-LE-NEXT:    ldrb r1, [r0, #2]
+; CHECK-V7-LE-NEXT:    ldrh r0, [r0]
+; CHECK-V7-LE-NEXT:    orr r0, r0, r1, lsl #16
+; CHECK-V7-LE-NEXT:    ands r0, r0, #1044480
+; CHECK-V7-LE-NEXT:    movwne r0, #1
+; CHECK-V7-LE-NEXT:    bx lr
+;
+; CHECK-BE-LABEL: test_24_8_12:
+; CHECK-BE:       @ %bb.0:
+; CHECK-BE-NEXT:    ldrh r0, [r0]
+; CHECK-BE-NEXT:    mov r1, #1044480
+; CHECK-BE-NEXT:    ands r0, r1, r0, lsl #8
+; CHECK-BE-NEXT:    movne r0, #1
+; CHECK-BE-NEXT:    mov pc, lr
+;
+; CHECK-V7-BE-LABEL: test_24_8_12:
+; CHECK-V7-BE:       @ %bb.0:
+; CHECK-V7-BE-NEXT:    ldrh r0, [r0]
+; CHECK-V7-BE-NEXT:    mov r1, #1044480
+; CHECK-V7-BE-NEXT:    ands r0, r1, r0, lsl #8
+; CHECK-V7-BE-NEXT:    movwne r0, #1
+; CHECK-V7-BE-NEXT:    bx lr
+  %a = load i24, ptr %y
+  %b = and i24 %a, u0xff000
+  %cmp = icmp ne i24 %b, 0
+  ret i1 %cmp
+}
+
+define i1 @test_24_8_16(ptr %y) {
+; CHECK-LE-LABEL: test_24_8_16:
+; CHECK-LE:       @ %bb.0:
+; CHECK-LE-NEXT:    ldrb r0, [r0, #2]
+; CHECK-LE-NEXT:    lsls r0, r0, #16
+; CHECK-LE-NEXT:    movne r0, #1
+; CHECK-LE-NEXT:    mov pc, lr
+;
+; CHECK-V7-LE-LABEL: test_24_8_16:
+; CHECK-V7-LE:       @ %bb.0:
+; CHECK-V7-LE-NEXT:    ldrb r0, [r0, #2]
+; CHECK-V7-LE-NEXT:    lsls r0, r0, #16
+; CHECK-V7-LE-NEXT:    movwne r0, #1
+; CHECK-V7-LE-NEXT:    bx lr
+;
+; CHECK-BE-LABEL: test_24_8_16:
+; CHECK-BE:       @ %bb.0:
+; CHECK-BE-NEXT:    ldrb r0, [r0]
+; CHECK-BE-NEXT:    lsls r0, r0, #16
+; CHECK-BE-NEXT:    movne r0, #1
+; CHECK-BE-NEXT:    mov pc, lr
+;
+; CHECK-V7-BE-LABEL: test_24_8_16:
+; CHECK-V7-BE:       @ %bb.0:
+; CHECK-V7-BE-NEXT:    ldrb r0, [r0]
+; CHECK-V7-BE-NEXT:    lsls r0, r0, #16
+; CHECK-V7-BE-NEXT:    movwne r0, #1
+; CHECK-V7-BE-NEXT:    bx lr
+  %a = load i24, ptr %y
+  %b = and i24 %a, u0xff0000
+  %cmp = icmp ne i24 %b, 0
+  ret i1 %cmp
+}
diff --git a/llvm/test/CodeGen/PowerPC/simplifysetcc_narrow_load.ll b/llvm/test/CodeGen/PowerPC/simplifysetcc_narrow_load.ll
new file mode 100644
index 00000000000000..1a03423fe6aec5
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/simplifysetcc_narrow_load.ll
@@ -0,0 +1,390 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -O1 -mtriple ppc32le -o - %s | FileCheck --check-prefix CHECK-LE %s
+; RUN: llc -O1 -mtriple ppc32 -o - %s | FileCheck --check-prefix CHECK-BE %s
+
+; A collection of regression tests to verify the load-narrowing part of
+; TargetLowering::SimplifySetCC (and/or other similar rewrites such as
+; combining AND+LOAD into ZEXTLOAD).
+
+
+;--------------------------------------------------------------------------
+; Test non byte-sized types.
+;
+; As long as LLVM IR isn't defining where the padding goes we can't really
+; optimize these (without adding a target lowering hook that can inform
+; ISel about which bits are padding).
+; --------------------------------------------------------------------------
+
+define i1 @test_129_15_0(ptr %y) {
+; CHECK-LE-LABEL: test_129_15_0:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    lhz 3, 0(3)
+; CHECK-LE-NEXT:    clrlwi 3, 3, 17
+; CHECK-LE-NEXT:    addic 4, 3, -1
+; CHECK-LE-NEXT:    subfe 3, 4, 3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_129_15_0:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    lhz 3, 14(3)
+; CHECK-BE-NEXT:    clrlwi 3, 3, 17
+; CHECK-BE-NEXT:    addic 4, 3, -1
+; CHECK-BE-NEXT:    subfe 3, 4, 3
+; CHECK-BE-NEXT:    blr
+  %a = load i129, ptr %y
+  %b = and i129 %a, u0x7fff
+  %cmp = icmp ne i129 %b, 0
+  ret i1 %cmp
+}
+
+define i1 @test_126_20_4(ptr %y) {
+; CHECK-LE-LABEL: test_126_20_4:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    lwz 3, 0(3)
+; CHECK-LE-NEXT:    rlwinm 3, 3, 0, 8, 27
+; CHECK-LE-NEXT:    addic 4, 3, -1
+; CHECK-LE-NEXT:    subfe 3, 4, 3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_126_20_4:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    lwz 3, 12(3)
+; CHECK-BE-NEXT:    rlwinm 3, 3, 0, 8, 27
+; CHECK-BE-NEXT:    addic 4, 3, -1
+; CHECK-BE-NEXT:    subfe 3, 4, 3
+; CHECK-BE-NEXT:    blr
+  %a = load i126, ptr %y
+  %b = and i126 %a, u0xfffff0
+  %cmp = icmp ne i126 %b, 0
+  ret i1 %cmp
+}
+
+define i1 @test_33_8_0(ptr %y) {
+; CHECK-LE-LABEL: test_33_8_0:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    lbz 3, 0(3)
+; CHECK-LE-NEXT:    addic 4, 3, -1
+; CHECK-LE-NEXT:    subfe 3, 4, 3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_33_8_0:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    lbz 3, 3(3)
+; CHECK-BE-NEXT:    addic 4, 3, -1
+; CHECK-BE-NEXT:    subfe 3, 4, 3
+; CHECK-BE-NEXT:    blr
+  %a = load i33, ptr %y
+  %b = and i33 %a, u0xff
+  %cmp = icmp ne i33 %b, 0
+  ret i1 %cmp
+}
+
+define i1 @test_33_1_32(ptr %y) {
+; CHECK-LE-LABEL: test_33_1_32:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    lbz 3, 4(3)
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_33_1_32:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    lwz 3, 0(3)
+; CHECK-BE-NEXT:    srwi 3, 3, 24
+; CHECK-BE-NEXT:    blr
+  %a = load i33, ptr %y
+  %b = and i33 %a, u0x100000000
+  %cmp = icmp ne i33 %b, 0
+  ret i1 %cmp
+}
+
+define i1 @test_33_1_31(ptr %y) {
+; CHECK-LE-LABEL: test_33_1_31:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    lbz 3, 3(3)
+; CHECK-LE-NEXT:    srwi 3, 3, 7
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_33_1_31:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    lbz 3, 0(3)
+; CHECK-BE-NEXT:    srwi 3, 3, 7
+; CHECK-BE-NEXT:    blr
+  %a = load i33, ptr %y
+  %b = and i33 %a, u0x80000000
+  %cmp = icmp ne i33 %b, 0
+  ret i1 %cmp
+}
+
+define i1 @test_33_1_0(ptr %y) {
+; CHECK-LE-LABEL: test_33_1_0:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    lbz 3, 0(3)
+; CHECK-LE-NEXT:    clrlwi 3, 3, 31
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_33_1_0:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    lbz 3, 3(3)
+; CHECK-BE-NEXT:    clrlwi 3, 3, 31
+; CHECK-BE-NEXT:    blr
+  %a = load i33, ptr %y
+  %b = and i33 %a, u0x1
+  %cmp = icmp ne i33 %b, 0
+  ret i1 %cmp
+}
+
+;--------------------------------------------------------------------------
+; Test byte-sized types.
+;--------------------------------------------------------------------------
+
+
+define i1 @test_128_20_4(ptr %y) {
+; CHECK-LE-LABEL: test_128_20_4:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    lwz 3, 0(3)
+; CHECK-LE-NEXT:    rlwinm 3, 3, 0, 8, 27
+; CHECK-LE-NEXT:    addic 4, 3, -1
+; CHECK-LE-NEXT:    subfe 3, 4, 3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_128_20_4:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    lwz 3, 12(3)
+; CHECK-BE-NEXT:    rlwinm 3, 3, 0, 8, 27
+; CHECK-BE-NEXT:    addic 4, 3, -1
+; CHECK-BE-NEXT:    subfe 3, 4, 3
+; CHECK-BE-NEXT:    blr
+  %a = load i128, ptr %y
+  %b = and i128 %a, u0xfffff0
+  %cmp = icmp ne i128 %b, 0
+  ret i1 %cmp
+}
+
+define i1 @test_48_16_0(ptr %y) {
+; CHECK-LE-LABEL: test_48_16_0:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    lhz 3, 0(3)
+; CHECK-LE-NEXT:    addic 4, 3, -1
+; CHECK-LE-NEXT:    subfe 3, 4, 3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_48_16_0:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    lhz 3, 4(3)
+; CHECK-BE-NEXT:    addic 4, 3, -1
+; CHECK-BE-NEXT:    subfe 3, 4, 3
+; CHECK-BE-NEXT:    blr
+  %a = load i48, ptr %y
+  %b = and i48 %a, u0xffff
+  %cmp = icmp ne i48 %b, 0
+  ret i1 %cmp
+}
+
+define i1 @test_48_16_8(ptr %y) {
+; CHECK-LE-LABEL: test_48_16_8:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    lhz 3, 1(3)
+; CHECK-LE-NEXT:    addic 4, 3, -1
+; CHECK-LE-NEXT:    subfe 3, 4, 3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_48_16_8:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    lhz 3, 3(3)
+; CHECK-BE-NEXT:    addic 4, 3, -1
+; CHECK-BE-NEXT:    subfe 3, 4, 3
+; CHECK-BE-NEXT:    blr
+  %a = load i48, ptr %y
+  %b = and i48 %a, u0xffff00
+  %cmp = icmp ne i48 %b, 0
+  ret i1 %cmp
+}
+
+define i1 @test_48_16_16(ptr %y) {
+; CHECK-LE-LABEL: test_48_16_16:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    lhz 3, 2(3)
+; CHECK-LE-NEXT:    addic 4, 3, -1
+; CHECK-LE-NEXT:    subfe 3, 4, 3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_48_16_16:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    lhz 3, 2(3)
+; CHECK-BE-NEXT:    addic 4, 3, -1
+; CHECK-BE-NEXT:    subfe 3, 4, 3
+; CHECK-BE-NEXT:    blr
+  %a = load i48, ptr %y
+  %b = and i48 %a, u0xffff0000
+  %cmp = icmp ne i48 %b, 0
+  ret i1 %cmp
+}
+
+define i1 @test_48_16_32(ptr %y) {
+; CHECK-LE-LABEL: test_48_16_32:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    lhz 3, 4(3)
+; CHECK-LE-NEXT:    addic 4, 3, -1
+; CHECK-LE-NEXT:    subfe 3, 4, 3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_48_16_32:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    lhz 3, 0(3)
+; CHECK-BE-NEXT:    addic 4, 3, -1
+; CHECK-BE-NEXT:    subfe 3, 4, 3
+; CHECK-BE-NEXT:    blr
+  %a = load i48, ptr %y
+  %b = and i48 %a, u0xffff00000000
+  %cmp = icmp ne i48 %b, 0
+  ret i1 %cmp
+}
+
+define i1 @test_48_17_0(ptr %y) {
+; CHECK-LE-LABEL: test_48_17_0:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    lwz 3, 0(3)
+; CHECK-LE-NEXT:    clrlwi 3, 3, 15
+; CHECK-LE-NEXT:    addic 4, 3, -1
+; CHECK-LE-NEXT:    subfe 3, 4, 3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_48_17_0:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    lhz 4, 4(3)
+; CHECK-BE-NEXT:    lwz 3, 0(3)
+; CHECK-BE-NEXT:    clrlwi 4, 4, 16
+; CHECK-BE-NEXT:    rlwimi 4, 3, 16, 15, 15
+; CHECK-BE-NEXT:    addic 3, 4, -1
+; CHECK-BE-NEXT:    subfe 3, 3, 4
+; CHECK-BE-NEXT:    blr
+  %a = load i48, ptr %y
+  %b = and i48 %a, u0x1ffff
+  %cmp = icmp ne i48 %b, 0
+  ret i1 %cmp
+}
+
+define i1 @test_40_16_0(ptr %y) {
+; CHECK-LE-LABEL: test_40_16_0:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    lhz 3, 0(3)
+; CHECK-LE-NEXT:    addic 4, 3, -1
+; CHECK-LE-NEXT:    subfe 3, 4, 3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_40_16_0:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    lhz 3, 3(3)
+; CHECK-BE-NEXT:    addic 4, 3, -1
+; CHECK-BE-NEXT:    subfe 3, 4, 3
+; CHECK-BE-NEXT:    blr
+  %a = load i40, ptr %y
+  %b = and i40 %a, u0xffff
+  %cmp = icmp ne i40 %b, 0
+  ret i1 %cmp
+}
+
+define i1 @test_40_1_32(ptr %y) {
+; CHECK-LE-LABEL: test_40_1_32:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    lbz 3, 4(3)
+; CHECK-LE-NEXT:    clrlwi 3, 3, 31
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_40_1_32:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    lwz 3, 0(3)
+; CHECK-BE-NEXT:    rlwinm 3, 3, 8, 31, 31
+; CHECK-BE-NEXT:    blr
+  %a = load i40, ptr %y
+  %b = and i40 %a, u0x100000000
+  %cmp = icmp ne i40 %b, 0
+  ret i1 %cmp
+}
+
+define i1 @test_24_16_0(ptr %y) {
+; CHECK-LE-LABEL: test_24_16_0:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    lhz 3, 0(3)
+; CHECK-LE-NEXT:    addic 4, 3, -1
+; CHECK-LE-NEXT:    subfe 3, 4, 3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_24_16_0:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    lhz 3, 1(3)
+; CHECK-BE-NEXT:    addic 4, 3, -1
+; CHECK-BE-NEXT:    subfe 3, 4, 3
+; CHECK-BE-NEXT:    blr
+  %a = load i24, ptr %y
+  %b = and i24 %a, u0xffff
+  %cmp = icmp ne i24 %b, 0
+  ret i1 %cmp
+}
+
+define i1 @test_24_8_8(ptr %y) {
+; CHECK-LE-LABEL: test_24_8_8:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    lbz 3, 1(3)
+; CHECK-LE-NEXT:    slwi 3, 3, 8
+; CHECK-LE-NEXT:    addic 4, 3, -1
+; CHECK-LE-NEXT:    subfe 3, 4, 3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_24_8_8:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    lbz 3, 1(3)
+; CHECK-BE-NEXT:    slwi 3, 3, 8
+; CHECK-BE-NEXT:    addic 4, 3, -1
+; CHECK-BE-NEXT:    subfe 3, 4, 3
+; CHECK-BE-NEXT:    blr
+  %a = load i24, ptr %y
+  %b = and i24 %a, u0xff00
+  %cmp = icmp ne i24 %b, 0
+  ret i1 %cmp
+}
+
+define i1 @test_24_8_12(ptr %y) {
+; CHECK-LE-LABEL: test_24_8_12:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    lhz 4, 0(3)
+; CHECK-LE-NEXT:    lbz 3, 2(3)
+; CHECK-LE-NEXT:    rlwinm 4, 4, 0, 16, 19
+; CHECK-LE-NEXT:    rlwimi 4, 3, 16, 12, 15
+; CHECK-LE-NEXT:    addic 3, 4, -1
+; CHECK-LE-NEXT:    subfe 3, 3, 4
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_24_8_12:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    lhz 3, 0(3)
+; CHECK-BE-NEXT:    rlwinm 3, 3, 8, 12, 19
+; CHECK-BE-NEXT:    addic 4, 3, -1
+; CHECK-BE-NEXT:    subfe 3, 4, 3
+; CHECK-BE-NEXT:    blr
+  %a = load i24, ptr %y
+  %b = and i24 %a, u0xff000
+  %cmp = icmp ne i24 %b, 0
+  ret i1 %cmp
+}
+
+define i1 @test_24_8_16(ptr %y) {
+; CHECK-LE-LABEL: test_24_8_16:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    lbz 3, 2(3)
+; CHECK-LE-NEXT:    slwi 3, 3, 16
+; CHECK-LE-NEXT:    addic 4, 3, -1
+; CHECK-LE-NEXT:    subfe 3, 4, 3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_24_8_16:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    lbz 3, 0(3)
+; CHECK-BE-NEXT:    slwi 3, 3, 16
+; CHECK-BE-NEXT:    addic 4, 3, -1
+; CHECK-BE-NEXT:    subfe 3, 4, 3
+; CHECK-BE-NEXT:    blr
+  %a = load i24, ptr %y
+  %b = and i24 %a, u0xff0000
+  %cmp = icmp ne i24 %b, 0
+  ret i1 %cmp
+}

>From ade4ce07f919375eb2d26c1b95cf0436b5a39730 Mon Sep 17 00:00:00 2001
From: Bjorn Pettersson <bjorn.a.pettersson at ericsson.com>
Date: Thu, 4 Apr 2024 16:54:36 +0200
Subject: [PATCH 2/2] [SelectionDAG] Fix and improve
 TargetLowering::SimplifySetCC

The load narrowing part of TargetLowering::SimplifySetCC is updated
according to this:

1) The offset calculation (for big endian) did not work properly for
   non byte-sized types. This is basically solved by an early exit
   if the memory type isn't byte-sized. But the code is also corrected
   to use the store size when calculating the offset.
2) To still allow some optimizations for non-byte-sized types the
   TargetLowering::isPaddedAtMostSignificantBitsWhenStored hook is
   added. By default it assumes that scalar integer types are padded
   starting at the most significant bits, if the type needs padding
   when being stored to memory.
3) Allow optimizing when isPaddedAtMostSignificantBitsWhenStored is
   true, as that hook makes it possible for TargetLowering to know
   how the non byte-sized value is aligned in memory.
4) Update the algorithm to always search for a narrowed load with
   a power-of-2 byte-sized type. In the past the algorithm started
   with the the width of the original load, and then divided it by
   two for each iteration. But for a type such as i48 that would
   just end up trying to narrow the load into a i24 or i12 load,
   and then we would fail sooner or later due to not finding a
   newVT that fulfilled newVT.isRound().
   With this new approach we can narrow the i48 load into either
   an i8, i16 or i32 load. We also try to find more opportunities for
   optimization by checking if such a load is allowed (e.g. alignment
   wise) for any multiple of 8 offset. So even for a byte-sized type
   such as i32 we may now end up narrowing the load into loading the
   16 bits starting at offset 8 (if that is allowed by the target).
   The old algorithm did not even consider that case.
5) Also start using getObjectPtrOffset instead of getMemBasePlusOffset
   when creating the new ptr. This way we get "nsw" on the add.
---
 llvm/include/llvm/CodeGen/TargetLowering.h    |  7 ++
 .../CodeGen/SelectionDAG/TargetLowering.cpp   | 69 +++++++++++--------
 .../CodeGen/ARM/simplifysetcc_narrow_load.ll  | 33 +++++----
 .../PowerPC/simplifysetcc_narrow_load.ll      | 38 ++++------
 4 files changed, 80 insertions(+), 67 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index a4dc097446186a..980d75ad91a0de 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -1804,6 +1804,13 @@ class TargetLoweringBase {
   /// where the sext is redundant, and use x directly.
   virtual bool shouldRemoveRedundantExtend(SDValue Op) const { return true; }
 
+  /// Indicates if any padding is guaranteed to go at the most significant bits
+  /// when storing the type to memory and the type size isn't equal to the store
+  /// size.
+  bool isPaddedAtMostSignificantBitsWhenStored(EVT VT) const {
+    return VT.isScalarInteger() && !VT.isByteSized();
+  }
+
   /// When splitting a value of the specified type into parts, does the Lo
   /// or Hi part come first?  This usually follows the endianness, except
   /// for ppcf128, where the Hi part always comes first.
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 409d66adfd67d1..d36837f98beae4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -4621,7 +4621,10 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
       LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0));
       APInt bestMask;
       unsigned bestWidth = 0, bestOffset = 0;
-      if (Lod->isSimple() && Lod->isUnindexed()) {
+      if (Lod->isSimple() && Lod->isUnindexed() &&
+          (Lod->getMemoryVT().isByteSized() ||
+           isPaddedAtMostSignificantBitsWhenStored(Lod->getMemoryVT()))) {
+        unsigned memWidth = Lod->getMemoryVT().getStoreSizeInBits();
         unsigned origWidth = N0.getValueSizeInBits();
         unsigned maskWidth = origWidth;
         // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
@@ -4629,40 +4632,52 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
         if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
           origWidth = Lod->getMemoryVT().getSizeInBits();
         const APInt &Mask = N0.getConstantOperandAPInt(1);
-        for (unsigned width = origWidth / 2; width>=8; width /= 2) {
+        // Only consider power-of-2 widths (and at least one byte) as candiates
+        // for the narrowed load.
+        for (unsigned width = 8; width < origWidth; width *= 2) {
+          EVT newVT = EVT::getIntegerVT(*DAG.getContext(), width);
+          if (!shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT))
+            continue;
           APInt newMask = APInt::getLowBitsSet(maskWidth, width);
-          for (unsigned offset=0; offset<origWidth/width; offset++) {
+          // Avoid accessing any padding here for now (we could use memWidth
+          // instead of origWidth here otherwise).
+          unsigned maxOffset = origWidth - width;
+          for (unsigned offset = 0; offset <= maxOffset; offset += 8) {
             if (Mask.isSubsetOf(newMask)) {
-              if (Layout.isLittleEndian())
-                bestOffset = (uint64_t)offset * (width/8);
-              else
-                bestOffset = (origWidth/width - offset - 1) * (width/8);
-              bestMask = Mask.lshr(offset * (width/8) * 8);
-              bestWidth = width;
-              break;
+              unsigned ptrOffset =
+                  Layout.isLittleEndian() ? offset : memWidth - width - offset;
+              unsigned IsFast = 0;
+              Align NewAlign = commonAlignment(Lod->getAlign(), ptrOffset / 8);
+              if (allowsMemoryAccess(*DAG.getContext(), Layout, newVT,
+                                     Lod->getAddressSpace(), NewAlign,
+                                     Lod->getMemOperand()->getFlags(),
+                                     &IsFast) &&
+                  IsFast) {
+                bestOffset = ptrOffset / 8;
+                bestMask = Mask.lshr(offset);
+                bestWidth = width;
+                break;
+              }
             }
-            newMask <<= width;
+            newMask <<= 8;
           }
+          if (bestWidth)
+            break;
         }
       }
       if (bestWidth) {
         EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
-        if (newVT.isRound() &&
-            shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT)) {
-          SDValue Ptr = Lod->getBasePtr();
-          if (bestOffset != 0)
-            Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(bestOffset),
-                                           dl);
-          SDValue NewLoad =
-              DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
-                          Lod->getPointerInfo().getWithOffset(bestOffset),
-                          Lod->getOriginalAlign());
-          return DAG.getSetCC(dl, VT,
-                              DAG.getNode(ISD::AND, dl, newVT, NewLoad,
-                                      DAG.getConstant(bestMask.trunc(bestWidth),
-                                                      dl, newVT)),
-                              DAG.getConstant(0LL, dl, newVT), Cond);
-        }
+        SDValue Ptr = Lod->getBasePtr();
+        if (bestOffset != 0)
+          Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(bestOffset));
+        SDValue NewLoad =
+            DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
+                        Lod->getPointerInfo().getWithOffset(bestOffset),
+                        Lod->getOriginalAlign());
+        SDValue And = DAG.getNode(ISD::AND, dl, newVT, NewLoad,
+                                  DAG.getConstant(bestMask.trunc(bestWidth),
+                                                  dl, newVT));
+        return DAG.getSetCC(dl, VT, And, DAG.getConstant(0LL, dl, newVT), Cond);
       }
     }
 
diff --git a/llvm/test/CodeGen/ARM/simplifysetcc_narrow_load.ll b/llvm/test/CodeGen/ARM/simplifysetcc_narrow_load.ll
index 2cba4b46f9a8a4..838da59f9e412c 100644
--- a/llvm/test/CodeGen/ARM/simplifysetcc_narrow_load.ll
+++ b/llvm/test/CodeGen/ARM/simplifysetcc_narrow_load.ll
@@ -40,7 +40,9 @@ define i1 @test_129_15_0(ptr %y) {
 ;
 ; CHECK-BE-LABEL: test_129_15_0:
 ; CHECK-BE:       @ %bb.0:
-; CHECK-BE-NEXT:    ldrh r0, [r0, #14]
+; CHECK-BE-NEXT:    ldr r1, [r0, #12]
+; CHECK-BE-NEXT:    ldrb r0, [r0, #16]
+; CHECK-BE-NEXT:    orr r0, r0, r1, lsl #8
 ; CHECK-BE-NEXT:    mov r1, #255
 ; CHECK-BE-NEXT:    orr r1, r1, #32512
 ; CHECK-BE-NEXT:    ands r0, r0, r1
@@ -49,7 +51,7 @@ define i1 @test_129_15_0(ptr %y) {
 ;
 ; CHECK-V7-BE-LABEL: test_129_15_0:
 ; CHECK-V7-BE:       @ %bb.0:
-; CHECK-V7-BE-NEXT:    ldrh r0, [r0, #14]
+; CHECK-V7-BE-NEXT:    ldrh r0, [r0, #15]
 ; CHECK-V7-BE-NEXT:    bfc r0, #15, #17
 ; CHECK-V7-BE-NEXT:    cmp r0, #0
 ; CHECK-V7-BE-NEXT:    movwne r0, #1
@@ -119,14 +121,14 @@ define i1 @test_33_8_0(ptr %y) {
 ;
 ; CHECK-BE-LABEL: test_33_8_0:
 ; CHECK-BE:       @ %bb.0:
-; CHECK-BE-NEXT:    ldrb r0, [r0, #3]
+; CHECK-BE-NEXT:    ldrb r0, [r0, #4]
 ; CHECK-BE-NEXT:    cmp r0, #0
 ; CHECK-BE-NEXT:    movne r0, #1
 ; CHECK-BE-NEXT:    mov pc, lr
 ;
 ; CHECK-V7-BE-LABEL: test_33_8_0:
 ; CHECK-V7-BE:       @ %bb.0:
-; CHECK-V7-BE-NEXT:    ldrb r0, [r0, #3]
+; CHECK-V7-BE-NEXT:    ldrb r0, [r0, #4]
 ; CHECK-V7-BE-NEXT:    cmp r0, #0
 ; CHECK-V7-BE-NEXT:    movwne r0, #1
 ; CHECK-V7-BE-NEXT:    bx lr
@@ -179,13 +181,13 @@ define i1 @test_33_1_31(ptr %y) {
 ;
 ; CHECK-BE-LABEL: test_33_1_31:
 ; CHECK-BE:       @ %bb.0:
-; CHECK-BE-NEXT:    ldrb r0, [r0]
+; CHECK-BE-NEXT:    ldrb r0, [r0, #1]
 ; CHECK-BE-NEXT:    lsr r0, r0, #7
 ; CHECK-BE-NEXT:    mov pc, lr
 ;
 ; CHECK-V7-BE-LABEL: test_33_1_31:
 ; CHECK-V7-BE:       @ %bb.0:
-; CHECK-V7-BE-NEXT:    ldrb r0, [r0]
+; CHECK-V7-BE-NEXT:    ldrb r0, [r0, #1]
 ; CHECK-V7-BE-NEXT:    lsr r0, r0, #7
 ; CHECK-V7-BE-NEXT:    bx lr
   %a = load i33, ptr %y
@@ -209,13 +211,13 @@ define i1 @test_33_1_0(ptr %y) {
 ;
 ; CHECK-BE-LABEL: test_33_1_0:
 ; CHECK-BE:       @ %bb.0:
-; CHECK-BE-NEXT:    ldrb r0, [r0, #3]
+; CHECK-BE-NEXT:    ldrb r0, [r0, #4]
 ; CHECK-BE-NEXT:    and r0, r0, #1
 ; CHECK-BE-NEXT:    mov pc, lr
 ;
 ; CHECK-V7-BE-LABEL: test_33_1_0:
 ; CHECK-V7-BE:       @ %bb.0:
-; CHECK-V7-BE-NEXT:    ldrb r0, [r0, #3]
+; CHECK-V7-BE-NEXT:    ldrb r0, [r0, #4]
 ; CHECK-V7-BE-NEXT:    and r0, r0, #1
 ; CHECK-V7-BE-NEXT:    bx lr
   %a = load i33, ptr %y
@@ -309,7 +311,7 @@ define i1 @test_48_16_8(ptr %y) {
 ; CHECK-LE-LABEL: test_48_16_8:
 ; CHECK-LE:       @ %bb.0:
 ; CHECK-LE-NEXT:    ldrh r0, [r0, #1]
-; CHECK-LE-NEXT:    cmp r0, #0
+; CHECK-LE-NEXT:    lsls r0, r0, #8
 ; CHECK-LE-NEXT:    movne r0, #1
 ; CHECK-LE-NEXT:    mov pc, lr
 ;
@@ -444,9 +446,7 @@ define i1 @test_48_17_0(ptr %y) {
 ;
 ; CHECK-V7-BE-LABEL: test_48_17_0:
 ; CHECK-V7-BE:       @ %bb.0:
-; CHECK-V7-BE-NEXT:    ldr r1, [r0]
-; CHECK-V7-BE-NEXT:    ldrh r0, [r0, #4]
-; CHECK-V7-BE-NEXT:    orr r0, r0, r1, lsl #16
+; CHECK-V7-BE-NEXT:    ldr r0, [r0, #2]
 ; CHECK-V7-BE-NEXT:    bfc r0, #17, #15
 ; CHECK-V7-BE-NEXT:    cmp r0, #0
 ; CHECK-V7-BE-NEXT:    movwne r0, #1
@@ -506,15 +506,14 @@ define i1 @test_40_1_32(ptr %y) {
 ;
 ; CHECK-BE-LABEL: test_40_1_32:
 ; CHECK-BE:       @ %bb.0:
-; CHECK-BE-NEXT:    ldr r0, [r0]
-; CHECK-BE-NEXT:    mov r1, #1
-; CHECK-BE-NEXT:    and r0, r1, r0, lsr #24
+; CHECK-BE-NEXT:    ldrb r0, [r0]
+; CHECK-BE-NEXT:    and r0, r0, #1
 ; CHECK-BE-NEXT:    mov pc, lr
 ;
 ; CHECK-V7-BE-LABEL: test_40_1_32:
 ; CHECK-V7-BE:       @ %bb.0:
-; CHECK-V7-BE-NEXT:    ldr r0, [r0]
-; CHECK-V7-BE-NEXT:    ubfx r0, r0, #24, #1
+; CHECK-V7-BE-NEXT:    ldrb r0, [r0]
+; CHECK-V7-BE-NEXT:    and r0, r0, #1
 ; CHECK-V7-BE-NEXT:    bx lr
   %a = load i40, ptr %y
   %b = and i40 %a, u0x100000000
diff --git a/llvm/test/CodeGen/PowerPC/simplifysetcc_narrow_load.ll b/llvm/test/CodeGen/PowerPC/simplifysetcc_narrow_load.ll
index 1a03423fe6aec5..49b8e2bc2f7b48 100644
--- a/llvm/test/CodeGen/PowerPC/simplifysetcc_narrow_load.ll
+++ b/llvm/test/CodeGen/PowerPC/simplifysetcc_narrow_load.ll
@@ -26,7 +26,7 @@ define i1 @test_129_15_0(ptr %y) {
 ;
 ; CHECK-BE-LABEL: test_129_15_0:
 ; CHECK-BE:       # %bb.0:
-; CHECK-BE-NEXT:    lhz 3, 14(3)
+; CHECK-BE-NEXT:    lhz 3, 15(3)
 ; CHECK-BE-NEXT:    clrlwi 3, 3, 17
 ; CHECK-BE-NEXT:    addic 4, 3, -1
 ; CHECK-BE-NEXT:    subfe 3, 4, 3
@@ -69,7 +69,7 @@ define i1 @test_33_8_0(ptr %y) {
 ;
 ; CHECK-BE-LABEL: test_33_8_0:
 ; CHECK-BE:       # %bb.0:
-; CHECK-BE-NEXT:    lbz 3, 3(3)
+; CHECK-BE-NEXT:    lbz 3, 4(3)
 ; CHECK-BE-NEXT:    addic 4, 3, -1
 ; CHECK-BE-NEXT:    subfe 3, 4, 3
 ; CHECK-BE-NEXT:    blr
@@ -105,7 +105,7 @@ define i1 @test_33_1_31(ptr %y) {
 ;
 ; CHECK-BE-LABEL: test_33_1_31:
 ; CHECK-BE:       # %bb.0:
-; CHECK-BE-NEXT:    lbz 3, 0(3)
+; CHECK-BE-NEXT:    lbz 3, 1(3)
 ; CHECK-BE-NEXT:    srwi 3, 3, 7
 ; CHECK-BE-NEXT:    blr
   %a = load i33, ptr %y
@@ -123,7 +123,7 @@ define i1 @test_33_1_0(ptr %y) {
 ;
 ; CHECK-BE-LABEL: test_33_1_0:
 ; CHECK-BE:       # %bb.0:
-; CHECK-BE-NEXT:    lbz 3, 3(3)
+; CHECK-BE-NEXT:    lbz 3, 4(3)
 ; CHECK-BE-NEXT:    clrlwi 3, 3, 31
 ; CHECK-BE-NEXT:    blr
   %a = load i33, ptr %y
@@ -250,12 +250,10 @@ define i1 @test_48_17_0(ptr %y) {
 ;
 ; CHECK-BE-LABEL: test_48_17_0:
 ; CHECK-BE:       # %bb.0:
-; CHECK-BE-NEXT:    lhz 4, 4(3)
-; CHECK-BE-NEXT:    lwz 3, 0(3)
-; CHECK-BE-NEXT:    clrlwi 4, 4, 16
-; CHECK-BE-NEXT:    rlwimi 4, 3, 16, 15, 15
-; CHECK-BE-NEXT:    addic 3, 4, -1
-; CHECK-BE-NEXT:    subfe 3, 3, 4
+; CHECK-BE-NEXT:    lwz 3, 2(3)
+; CHECK-BE-NEXT:    clrlwi 3, 3, 15
+; CHECK-BE-NEXT:    addic 4, 3, -1
+; CHECK-BE-NEXT:    subfe 3, 4, 3
 ; CHECK-BE-NEXT:    blr
   %a = load i48, ptr %y
   %b = and i48 %a, u0x1ffff
@@ -292,8 +290,8 @@ define i1 @test_40_1_32(ptr %y) {
 ;
 ; CHECK-BE-LABEL: test_40_1_32:
 ; CHECK-BE:       # %bb.0:
-; CHECK-BE-NEXT:    lwz 3, 0(3)
-; CHECK-BE-NEXT:    rlwinm 3, 3, 8, 31, 31
+; CHECK-BE-NEXT:    lbz 3, 0(3)
+; CHECK-BE-NEXT:    clrlwi 3, 3, 31
 ; CHECK-BE-NEXT:    blr
   %a = load i40, ptr %y
   %b = and i40 %a, u0x100000000
@@ -325,7 +323,6 @@ define i1 @test_24_8_8(ptr %y) {
 ; CHECK-LE-LABEL: test_24_8_8:
 ; CHECK-LE:       # %bb.0:
 ; CHECK-LE-NEXT:    lbz 3, 1(3)
-; CHECK-LE-NEXT:    slwi 3, 3, 8
 ; CHECK-LE-NEXT:    addic 4, 3, -1
 ; CHECK-LE-NEXT:    subfe 3, 4, 3
 ; CHECK-LE-NEXT:    blr
@@ -333,7 +330,6 @@ define i1 @test_24_8_8(ptr %y) {
 ; CHECK-BE-LABEL: test_24_8_8:
 ; CHECK-BE:       # %bb.0:
 ; CHECK-BE-NEXT:    lbz 3, 1(3)
-; CHECK-BE-NEXT:    slwi 3, 3, 8
 ; CHECK-BE-NEXT:    addic 4, 3, -1
 ; CHECK-BE-NEXT:    subfe 3, 4, 3
 ; CHECK-BE-NEXT:    blr
@@ -346,18 +342,16 @@ define i1 @test_24_8_8(ptr %y) {
 define i1 @test_24_8_12(ptr %y) {
 ; CHECK-LE-LABEL: test_24_8_12:
 ; CHECK-LE:       # %bb.0:
-; CHECK-LE-NEXT:    lhz 4, 0(3)
-; CHECK-LE-NEXT:    lbz 3, 2(3)
-; CHECK-LE-NEXT:    rlwinm 4, 4, 0, 16, 19
-; CHECK-LE-NEXT:    rlwimi 4, 3, 16, 12, 15
-; CHECK-LE-NEXT:    addic 3, 4, -1
-; CHECK-LE-NEXT:    subfe 3, 3, 4
+; CHECK-LE-NEXT:    lhz 3, 1(3)
+; CHECK-LE-NEXT:    rlwinm 3, 3, 0, 20, 27
+; CHECK-LE-NEXT:    addic 4, 3, -1
+; CHECK-LE-NEXT:    subfe 3, 4, 3
 ; CHECK-LE-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test_24_8_12:
 ; CHECK-BE:       # %bb.0:
 ; CHECK-BE-NEXT:    lhz 3, 0(3)
-; CHECK-BE-NEXT:    rlwinm 3, 3, 8, 12, 19
+; CHECK-BE-NEXT:    rlwinm 3, 3, 0, 20, 27
 ; CHECK-BE-NEXT:    addic 4, 3, -1
 ; CHECK-BE-NEXT:    subfe 3, 4, 3
 ; CHECK-BE-NEXT:    blr
@@ -371,7 +365,6 @@ define i1 @test_24_8_16(ptr %y) {
 ; CHECK-LE-LABEL: test_24_8_16:
 ; CHECK-LE:       # %bb.0:
 ; CHECK-LE-NEXT:    lbz 3, 2(3)
-; CHECK-LE-NEXT:    slwi 3, 3, 16
 ; CHECK-LE-NEXT:    addic 4, 3, -1
 ; CHECK-LE-NEXT:    subfe 3, 4, 3
 ; CHECK-LE-NEXT:    blr
@@ -379,7 +372,6 @@ define i1 @test_24_8_16(ptr %y) {
 ; CHECK-BE-LABEL: test_24_8_16:
 ; CHECK-BE:       # %bb.0:
 ; CHECK-BE-NEXT:    lbz 3, 0(3)
-; CHECK-BE-NEXT:    slwi 3, 3, 16
 ; CHECK-BE-NEXT:    addic 4, 3, -1
 ; CHECK-BE-NEXT:    subfe 3, 4, 3
 ; CHECK-BE-NEXT:    blr



More information about the llvm-commits mailing list