[llvm] r294185 - Add DAGCombiner load combine tests with non-zero offset
Artur Pilipenko via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 6 06:15:32 PST 2017
Author: apilipenko
Date: Mon Feb 6 08:15:31 2017
New Revision: 294185
URL: http://llvm.org/viewvc/llvm-project?rev=294185&view=rev
Log:
Add DAGCombiner load combine tests with non-zero offset
This is separated from https://reviews.llvm.org/D29394 review.
Modified:
llvm/trunk/test/CodeGen/AArch64/load-combine-big-endian.ll
llvm/trunk/test/CodeGen/AArch64/load-combine.ll
llvm/trunk/test/CodeGen/ARM/load-combine-big-endian.ll
llvm/trunk/test/CodeGen/ARM/load-combine.ll
llvm/trunk/test/CodeGen/X86/load-combine.ll
Modified: llvm/trunk/test/CodeGen/AArch64/load-combine-big-endian.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/load-combine-big-endian.ll?rev=294185&r1=294184&r2=294185&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/load-combine-big-endian.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/load-combine-big-endian.ll Mon Feb 6 08:15:31 2017
@@ -191,3 +191,143 @@ define i64 @load_i64_by_i8(i64* %arg) {
%tmp37 = or i64 %tmp33, %tmp36
ret i64 %tmp37
}
+
+; i8* p; // p[1] is 4 byte aligned
+; (i32) p[1] | ((i32) p[2] << 8) | ((i32) p[3] << 16) | ((i32) p[4] << 24)
+define i32 @load_i32_by_i8_nonzero_offset(i32* %arg) {
+; CHECK-LABEL: load_i32_by_i8_nonzero_offset:
+; CHECK: ldrb w8, [x0, #1]
+; CHECK-NEXT: ldrb w9, [x0, #2]
+; CHECK-NEXT: ldrb w10, [x0, #3]
+; CHECK-NEXT: ldrb w11, [x0, #4]
+; CHECK-NEXT: bfi w8, w9, #8, #8
+; CHECK-NEXT: bfi w8, w10, #16, #8
+; CHECK-NEXT: bfi w8, w11, #24, #8
+; CHECK-NEXT: mov w0, w8
+; CHECK-NEXT: ret
+ %tmp = bitcast i32* %arg to i8*
+ %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
+ %tmp2 = load i8, i8* %tmp1, align 4
+ %tmp3 = zext i8 %tmp2 to i32
+ %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 2
+ %tmp5 = load i8, i8* %tmp4, align 1
+ %tmp6 = zext i8 %tmp5 to i32
+ %tmp7 = shl nuw nsw i32 %tmp6, 8
+ %tmp8 = or i32 %tmp7, %tmp3
+ %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 3
+ %tmp10 = load i8, i8* %tmp9, align 1
+ %tmp11 = zext i8 %tmp10 to i32
+ %tmp12 = shl nuw nsw i32 %tmp11, 16
+ %tmp13 = or i32 %tmp8, %tmp12
+ %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 4
+ %tmp15 = load i8, i8* %tmp14, align 1
+ %tmp16 = zext i8 %tmp15 to i32
+ %tmp17 = shl nuw nsw i32 %tmp16, 24
+ %tmp18 = or i32 %tmp13, %tmp17
+ ret i32 %tmp18
+}
+
+; i8* p; // p[-4] is 4 byte aligned
+; (i32) p[-4] | ((i32) p[-3] << 8) | ((i32) p[-2] << 16) | ((i32) p[-1] << 24)
+define i32 @load_i32_by_i8_neg_offset(i32* %arg) {
+; CHECK-LABEL: load_i32_by_i8_neg_offset:
+; CHECK: ldurb w8, [x0, #-4]
+; CHECK-NEXT: ldurb w9, [x0, #-3]
+; CHECK-NEXT: ldurb w10, [x0, #-2]
+; CHECK-NEXT: ldurb w11, [x0, #-1]
+; CHECK-NEXT: bfi w8, w9, #8, #8
+; CHECK-NEXT: bfi w8, w10, #16, #8
+; CHECK-NEXT: bfi w8, w11, #24, #8
+; CHECK-NEXT: mov w0, w8
+; CHECK-NEXT: ret
+ %tmp = bitcast i32* %arg to i8*
+ %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -4
+ %tmp2 = load i8, i8* %tmp1, align 4
+ %tmp3 = zext i8 %tmp2 to i32
+ %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -3
+ %tmp5 = load i8, i8* %tmp4, align 1
+ %tmp6 = zext i8 %tmp5 to i32
+ %tmp7 = shl nuw nsw i32 %tmp6, 8
+ %tmp8 = or i32 %tmp7, %tmp3
+ %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -2
+ %tmp10 = load i8, i8* %tmp9, align 1
+ %tmp11 = zext i8 %tmp10 to i32
+ %tmp12 = shl nuw nsw i32 %tmp11, 16
+ %tmp13 = or i32 %tmp8, %tmp12
+ %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -1
+ %tmp15 = load i8, i8* %tmp14, align 1
+ %tmp16 = zext i8 %tmp15 to i32
+ %tmp17 = shl nuw nsw i32 %tmp16, 24
+ %tmp18 = or i32 %tmp13, %tmp17
+ ret i32 %tmp18
+}
+
+; i8* p; // p[1] is 4 byte aligned
+; (i32) p[4] | ((i32) p[3] << 8) | ((i32) p[2] << 16) | ((i32) p[1] << 24)
+define i32 @load_i32_by_i8_nonzero_offset_bswap(i32* %arg) {
+; CHECK-LABEL: load_i32_by_i8_nonzero_offset_bswap:
+; CHECK: ldrb w8, [x0, #4]
+; CHECK-NEXT: ldrb w9, [x0, #3]
+; CHECK-NEXT: ldrb w10, [x0, #2]
+; CHECK-NEXT: ldrb w11, [x0, #1]
+; CHECK-NEXT: bfi w8, w9, #8, #8
+; CHECK-NEXT: bfi w8, w10, #16, #8
+; CHECK-NEXT: bfi w8, w11, #24, #8
+; CHECK-NEXT: mov w0, w8
+; CHECK-NEXT: ret
+ %tmp = bitcast i32* %arg to i8*
+ %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 4
+ %tmp2 = load i8, i8* %tmp1, align 1
+ %tmp3 = zext i8 %tmp2 to i32
+ %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 3
+ %tmp5 = load i8, i8* %tmp4, align 1
+ %tmp6 = zext i8 %tmp5 to i32
+ %tmp7 = shl nuw nsw i32 %tmp6, 8
+ %tmp8 = or i32 %tmp7, %tmp3
+ %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
+ %tmp10 = load i8, i8* %tmp9, align 1
+ %tmp11 = zext i8 %tmp10 to i32
+ %tmp12 = shl nuw nsw i32 %tmp11, 16
+ %tmp13 = or i32 %tmp8, %tmp12
+ %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 1
+ %tmp15 = load i8, i8* %tmp14, align 4
+ %tmp16 = zext i8 %tmp15 to i32
+ %tmp17 = shl nuw nsw i32 %tmp16, 24
+ %tmp18 = or i32 %tmp13, %tmp17
+ ret i32 %tmp18
+}
+
+; i8* p; // p[-4] is 4 byte aligned
+; (i32) p[-1] | ((i32) p[-2] << 8) | ((i32) p[-3] << 16) | ((i32) p[-4] << 24)
+define i32 @load_i32_by_i8_neg_offset_bswap(i32* %arg) {
+; CHECK-LABEL: load_i32_by_i8_neg_offset_bswap:
+; CHECK: ldurb w8, [x0, #-1]
+; CHECK-NEXT: ldurb w9, [x0, #-2]
+; CHECK-NEXT: ldurb w10, [x0, #-3]
+; CHECK-NEXT: ldurb w11, [x0, #-4]
+; CHECK-NEXT: bfi w8, w9, #8, #8
+; CHECK-NEXT: bfi w8, w10, #16, #8
+; CHECK-NEXT: bfi w8, w11, #24, #8
+; CHECK-NEXT: mov w0, w8
+; CHECK-NEXT: ret
+ %tmp = bitcast i32* %arg to i8*
+ %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -1
+ %tmp2 = load i8, i8* %tmp1, align 1
+ %tmp3 = zext i8 %tmp2 to i32
+ %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -2
+ %tmp5 = load i8, i8* %tmp4, align 1
+ %tmp6 = zext i8 %tmp5 to i32
+ %tmp7 = shl nuw nsw i32 %tmp6, 8
+ %tmp8 = or i32 %tmp7, %tmp3
+ %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -3
+ %tmp10 = load i8, i8* %tmp9, align 1
+ %tmp11 = zext i8 %tmp10 to i32
+ %tmp12 = shl nuw nsw i32 %tmp11, 16
+ %tmp13 = or i32 %tmp8, %tmp12
+ %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -4
+ %tmp15 = load i8, i8* %tmp14, align 4
+ %tmp16 = zext i8 %tmp15 to i32
+ %tmp17 = shl nuw nsw i32 %tmp16, 24
+ %tmp18 = or i32 %tmp13, %tmp17
+ ret i32 %tmp18
+}
Modified: llvm/trunk/test/CodeGen/AArch64/load-combine.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/load-combine.ll?rev=294185&r1=294184&r2=294185&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/load-combine.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/load-combine.ll Mon Feb 6 08:15:31 2017
@@ -178,3 +178,143 @@ define i64 @load_i64_by_i8_bswap(i64* %a
%tmp37 = or i64 %tmp33, %tmp36
ret i64 %tmp37
}
+
+; i8* p; // p[1] is 4 byte aligned
+; (i32) p[1] | ((i32) p[2] << 8) | ((i32) p[3] << 16) | ((i32) p[4] << 24)
+define i32 @load_i32_by_i8_nonzero_offset(i32* %arg) {
+; CHECK-LABEL: load_i32_by_i8_nonzero_offset:
+; CHECK: ldrb w8, [x0, #1]
+; CHECK-NEXT: ldrb w9, [x0, #2]
+; CHECK-NEXT: ldrb w10, [x0, #3]
+; CHECK-NEXT: ldrb w11, [x0, #4]
+; CHECK-NEXT: bfi w8, w9, #8, #8
+; CHECK-NEXT: bfi w8, w10, #16, #8
+; CHECK-NEXT: bfi w8, w11, #24, #8
+; CHECK-NEXT: mov w0, w8
+; CHECK-NEXT: ret
+ %tmp = bitcast i32* %arg to i8*
+ %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
+ %tmp2 = load i8, i8* %tmp1, align 4
+ %tmp3 = zext i8 %tmp2 to i32
+ %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 2
+ %tmp5 = load i8, i8* %tmp4, align 1
+ %tmp6 = zext i8 %tmp5 to i32
+ %tmp7 = shl nuw nsw i32 %tmp6, 8
+ %tmp8 = or i32 %tmp7, %tmp3
+ %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 3
+ %tmp10 = load i8, i8* %tmp9, align 1
+ %tmp11 = zext i8 %tmp10 to i32
+ %tmp12 = shl nuw nsw i32 %tmp11, 16
+ %tmp13 = or i32 %tmp8, %tmp12
+ %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 4
+ %tmp15 = load i8, i8* %tmp14, align 1
+ %tmp16 = zext i8 %tmp15 to i32
+ %tmp17 = shl nuw nsw i32 %tmp16, 24
+ %tmp18 = or i32 %tmp13, %tmp17
+ ret i32 %tmp18
+}
+
+; i8* p; // p[-4] is 4 byte aligned
+; (i32) p[-4] | ((i32) p[-3] << 8) | ((i32) p[-2] << 16) | ((i32) p[-1] << 24)
+define i32 @load_i32_by_i8_neg_offset(i32* %arg) {
+; CHECK-LABEL: load_i32_by_i8_neg_offset:
+; CHECK: ldurb w8, [x0, #-4]
+; CHECK-NEXT: ldurb w9, [x0, #-3]
+; CHECK-NEXT: ldurb w10, [x0, #-2]
+; CHECK-NEXT: ldurb w11, [x0, #-1]
+; CHECK-NEXT: bfi w8, w9, #8, #8
+; CHECK-NEXT: bfi w8, w10, #16, #8
+; CHECK-NEXT: bfi w8, w11, #24, #8
+; CHECK-NEXT: mov w0, w8
+; CHECK-NEXT: ret
+ %tmp = bitcast i32* %arg to i8*
+ %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -4
+ %tmp2 = load i8, i8* %tmp1, align 4
+ %tmp3 = zext i8 %tmp2 to i32
+ %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -3
+ %tmp5 = load i8, i8* %tmp4, align 1
+ %tmp6 = zext i8 %tmp5 to i32
+ %tmp7 = shl nuw nsw i32 %tmp6, 8
+ %tmp8 = or i32 %tmp7, %tmp3
+ %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -2
+ %tmp10 = load i8, i8* %tmp9, align 1
+ %tmp11 = zext i8 %tmp10 to i32
+ %tmp12 = shl nuw nsw i32 %tmp11, 16
+ %tmp13 = or i32 %tmp8, %tmp12
+ %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -1
+ %tmp15 = load i8, i8* %tmp14, align 1
+ %tmp16 = zext i8 %tmp15 to i32
+ %tmp17 = shl nuw nsw i32 %tmp16, 24
+ %tmp18 = or i32 %tmp13, %tmp17
+ ret i32 %tmp18
+}
+
+; i8* p; // p[1] is 4 byte aligned
+; (i32) p[4] | ((i32) p[3] << 8) | ((i32) p[2] << 16) | ((i32) p[1] << 24)
+define i32 @load_i32_by_i8_nonzero_offset_bswap(i32* %arg) {
+; CHECK-LABEL: load_i32_by_i8_nonzero_offset_bswap:
+; CHECK: ldrb w8, [x0, #4]
+; CHECK-NEXT: ldrb w9, [x0, #3]
+; CHECK-NEXT: ldrb w10, [x0, #2]
+; CHECK-NEXT: ldrb w11, [x0, #1]
+; CHECK-NEXT: bfi w8, w9, #8, #8
+; CHECK-NEXT: bfi w8, w10, #16, #8
+; CHECK-NEXT: bfi w8, w11, #24, #8
+; CHECK-NEXT: mov w0, w8
+; CHECK-NEXT: ret
+ %tmp = bitcast i32* %arg to i8*
+ %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 4
+ %tmp2 = load i8, i8* %tmp1, align 1
+ %tmp3 = zext i8 %tmp2 to i32
+ %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 3
+ %tmp5 = load i8, i8* %tmp4, align 1
+ %tmp6 = zext i8 %tmp5 to i32
+ %tmp7 = shl nuw nsw i32 %tmp6, 8
+ %tmp8 = or i32 %tmp7, %tmp3
+ %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
+ %tmp10 = load i8, i8* %tmp9, align 1
+ %tmp11 = zext i8 %tmp10 to i32
+ %tmp12 = shl nuw nsw i32 %tmp11, 16
+ %tmp13 = or i32 %tmp8, %tmp12
+ %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 1
+ %tmp15 = load i8, i8* %tmp14, align 4
+ %tmp16 = zext i8 %tmp15 to i32
+ %tmp17 = shl nuw nsw i32 %tmp16, 24
+ %tmp18 = or i32 %tmp13, %tmp17
+ ret i32 %tmp18
+}
+
+; i8* p; // p[-4] is 4 byte aligned
+; (i32) p[-1] | ((i32) p[-2] << 8) | ((i32) p[-3] << 16) | ((i32) p[-4] << 24)
+define i32 @load_i32_by_i8_neg_offset_bswap(i32* %arg) {
+; CHECK-LABEL: load_i32_by_i8_neg_offset_bswap:
+; CHECK: ldurb w8, [x0, #-1]
+; CHECK-NEXT: ldurb w9, [x0, #-2]
+; CHECK-NEXT: ldurb w10, [x0, #-3]
+; CHECK-NEXT: ldurb w11, [x0, #-4]
+; CHECK-NEXT: bfi w8, w9, #8, #8
+; CHECK-NEXT: bfi w8, w10, #16, #8
+; CHECK-NEXT: bfi w8, w11, #24, #8
+; CHECK-NEXT: mov w0, w8
+; CHECK-NEXT: ret
+ %tmp = bitcast i32* %arg to i8*
+ %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -1
+ %tmp2 = load i8, i8* %tmp1, align 1
+ %tmp3 = zext i8 %tmp2 to i32
+ %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -2
+ %tmp5 = load i8, i8* %tmp4, align 1
+ %tmp6 = zext i8 %tmp5 to i32
+ %tmp7 = shl nuw nsw i32 %tmp6, 8
+ %tmp8 = or i32 %tmp7, %tmp3
+ %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -3
+ %tmp10 = load i8, i8* %tmp9, align 1
+ %tmp11 = zext i8 %tmp10 to i32
+ %tmp12 = shl nuw nsw i32 %tmp11, 16
+ %tmp13 = or i32 %tmp8, %tmp12
+ %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -4
+ %tmp15 = load i8, i8* %tmp14, align 4
+ %tmp16 = zext i8 %tmp15 to i32
+ %tmp17 = shl nuw nsw i32 %tmp16, 24
+ %tmp18 = or i32 %tmp13, %tmp17
+ ret i32 %tmp18
+}
Modified: llvm/trunk/test/CodeGen/ARM/load-combine-big-endian.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/load-combine-big-endian.ll?rev=294185&r1=294184&r2=294185&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/load-combine-big-endian.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/load-combine-big-endian.ll Mon Feb 6 08:15:31 2017
@@ -269,3 +269,183 @@ define i64 @load_i64_by_i8(i64* %arg) {
%tmp37 = or i64 %tmp33, %tmp36
ret i64 %tmp37
}
+
+; i8* p; // p[1] is 4 byte aligned
+; (i32) p[1] | ((i32) p[2] << 8) | ((i32) p[3] << 16) | ((i32) p[4] << 24)
+define i32 @load_i32_by_i8_nonzero_offset(i32* %arg) {
+; CHECK-LABEL: load_i32_by_i8_nonzero_offset:
+; CHECK: ldrb r1, [r0, #1]
+; CHECK-NEXT: ldrb r2, [r0, #2]
+; CHECK-NEXT: ldrb r3, [r0, #3]
+; CHECK-NEXT: ldrb r0, [r0, #4]
+; CHECK-NEXT: orr r1, r1, r2, lsl #8
+; CHECK-NEXT: orr r1, r1, r3, lsl #16
+; CHECK-NEXT: orr r0, r1, r0, lsl #24
+; CHECK-NEXT: mov pc, lr
+
+; CHECK-ARMv6-LABEL: load_i32_by_i8_nonzero_offset:
+; CHECK-ARMv6: ldrb r1, [r0, #1]
+; CHECK-ARMv6-NEXT: ldrb r2, [r0, #2]
+; CHECK-ARMv6-NEXT: ldrb r3, [r0, #3]
+; CHECK-ARMv6-NEXT: ldrb r0, [r0, #4]
+; CHECK-ARMv6-NEXT: orr r1, r1, r2, lsl #8
+; CHECK-ARMv6-NEXT: orr r1, r1, r3, lsl #16
+; CHECK-ARMv6-NEXT: orr r0, r1, r0, lsl #24
+; CHECK-ARMv6-NEXT: bx lr
+
+ %tmp = bitcast i32* %arg to i8*
+ %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
+ %tmp2 = load i8, i8* %tmp1, align 4
+ %tmp3 = zext i8 %tmp2 to i32
+ %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 2
+ %tmp5 = load i8, i8* %tmp4, align 1
+ %tmp6 = zext i8 %tmp5 to i32
+ %tmp7 = shl nuw nsw i32 %tmp6, 8
+ %tmp8 = or i32 %tmp7, %tmp3
+ %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 3
+ %tmp10 = load i8, i8* %tmp9, align 1
+ %tmp11 = zext i8 %tmp10 to i32
+ %tmp12 = shl nuw nsw i32 %tmp11, 16
+ %tmp13 = or i32 %tmp8, %tmp12
+ %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 4
+ %tmp15 = load i8, i8* %tmp14, align 1
+ %tmp16 = zext i8 %tmp15 to i32
+ %tmp17 = shl nuw nsw i32 %tmp16, 24
+ %tmp18 = or i32 %tmp13, %tmp17
+ ret i32 %tmp18
+}
+
+; i8* p; // p[-4] is 4 byte aligned
+; (i32) p[-4] | ((i32) p[-3] << 8) | ((i32) p[-2] << 16) | ((i32) p[-1] << 24)
+define i32 @load_i32_by_i8_neg_offset(i32* %arg) {
+; CHECK-LABEL: load_i32_by_i8_neg_offset:
+; CHECK: ldrb r1, [r0, #-4]
+; CHECK-NEXT: ldrb r2, [r0, #-3]
+; CHECK-NEXT: ldrb r3, [r0, #-2]
+; CHECK-NEXT: ldrb r0, [r0, #-1]
+; CHECK-NEXT: orr r1, r1, r2, lsl #8
+; CHECK-NEXT: orr r1, r1, r3, lsl #16
+; CHECK-NEXT: orr r0, r1, r0, lsl #24
+; CHECK-NEXT: mov pc, lr
+
+; CHECK-ARMv6-LABEL: load_i32_by_i8_neg_offset:
+; CHECK-ARMv6: ldrb r1, [r0, #-4]
+; CHECK-ARMv6-NEXT: ldrb r2, [r0, #-3]
+; CHECK-ARMv6-NEXT: ldrb r3, [r0, #-2]
+; CHECK-ARMv6-NEXT: ldrb r0, [r0, #-1]
+; CHECK-ARMv6-NEXT: orr r1, r1, r2, lsl #8
+; CHECK-ARMv6-NEXT: orr r1, r1, r3, lsl #16
+; CHECK-ARMv6-NEXT: orr r0, r1, r0, lsl #24
+; CHECK-ARMv6-NEXT: bx lr
+
+ %tmp = bitcast i32* %arg to i8*
+ %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -4
+ %tmp2 = load i8, i8* %tmp1, align 4
+ %tmp3 = zext i8 %tmp2 to i32
+ %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -3
+ %tmp5 = load i8, i8* %tmp4, align 1
+ %tmp6 = zext i8 %tmp5 to i32
+ %tmp7 = shl nuw nsw i32 %tmp6, 8
+ %tmp8 = or i32 %tmp7, %tmp3
+ %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -2
+ %tmp10 = load i8, i8* %tmp9, align 1
+ %tmp11 = zext i8 %tmp10 to i32
+ %tmp12 = shl nuw nsw i32 %tmp11, 16
+ %tmp13 = or i32 %tmp8, %tmp12
+ %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -1
+ %tmp15 = load i8, i8* %tmp14, align 1
+ %tmp16 = zext i8 %tmp15 to i32
+ %tmp17 = shl nuw nsw i32 %tmp16, 24
+ %tmp18 = or i32 %tmp13, %tmp17
+ ret i32 %tmp18
+}
+
+; i8* p; // p[1] is 4 byte aligned
+; (i32) p[4] | ((i32) p[3] << 8) | ((i32) p[2] << 16) | ((i32) p[1] << 24)
+define i32 @load_i32_by_i8_nonzero_offset_bswap(i32* %arg) {
+; CHECK-LABEL: load_i32_by_i8_nonzero_offset_bswap:
+; CHECK: ldrb r1, [r0, #1]
+; CHECK-NEXT: ldrb r2, [r0, #2]
+; CHECK-NEXT: ldrb r3, [r0, #3]
+; CHECK-NEXT: ldrb r0, [r0, #4]
+; CHECK-NEXT: orr r0, r0, r3, lsl #8
+; CHECK-NEXT: orr r0, r0, r2, lsl #16
+; CHECK-NEXT: orr r0, r0, r1, lsl #24
+; CHECK-NEXT: mov pc, lr
+
+; CHECK-ARMv6-LABEL: load_i32_by_i8_nonzero_offset_bswap:
+; CHECK-ARMv6: ldrb r1, [r0, #1]
+; CHECK-ARMv6-NEXT: ldrb r2, [r0, #2]
+; CHECK-ARMv6-NEXT: ldrb r3, [r0, #3]
+; CHECK-ARMv6-NEXT: ldrb r0, [r0, #4]
+; CHECK-ARMv6-NEXT: orr r0, r0, r3, lsl #8
+; CHECK-ARMv6-NEXT: orr r0, r0, r2, lsl #16
+; CHECK-ARMv6-NEXT: orr r0, r0, r1, lsl #24
+; CHECK-ARMv6-NEXT: bx lr
+
+ %tmp = bitcast i32* %arg to i8*
+ %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 4
+ %tmp2 = load i8, i8* %tmp1, align 1
+ %tmp3 = zext i8 %tmp2 to i32
+ %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 3
+ %tmp5 = load i8, i8* %tmp4, align 1
+ %tmp6 = zext i8 %tmp5 to i32
+ %tmp7 = shl nuw nsw i32 %tmp6, 8
+ %tmp8 = or i32 %tmp7, %tmp3
+ %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
+ %tmp10 = load i8, i8* %tmp9, align 1
+ %tmp11 = zext i8 %tmp10 to i32
+ %tmp12 = shl nuw nsw i32 %tmp11, 16
+ %tmp13 = or i32 %tmp8, %tmp12
+ %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 1
+ %tmp15 = load i8, i8* %tmp14, align 4
+ %tmp16 = zext i8 %tmp15 to i32
+ %tmp17 = shl nuw nsw i32 %tmp16, 24
+ %tmp18 = or i32 %tmp13, %tmp17
+ ret i32 %tmp18
+}
+
+; i8* p; // p[-4] is 4 byte aligned
+; (i32) p[-1] | ((i32) p[-2] << 8) | ((i32) p[-3] << 16) | ((i32) p[-4] << 24)
+define i32 @load_i32_by_i8_neg_offset_bswap(i32* %arg) {
+; CHECK-LABEL: load_i32_by_i8_neg_offset_bswap:
+; CHECK: ldrb r1, [r0, #-4]
+; CHECK-NEXT: ldrb r2, [r0, #-3]
+; CHECK-NEXT: ldrb r3, [r0, #-2]
+; CHECK-NEXT: ldrb r0, [r0, #-1]
+; CHECK-NEXT: orr r0, r0, r3, lsl #8
+; CHECK-NEXT: orr r0, r0, r2, lsl #16
+; CHECK-NEXT: orr r0, r0, r1, lsl #24
+; CHECK-NEXT: mov pc, lr
+
+; CHECK-ARMv6-LABEL: load_i32_by_i8_neg_offset_bswap:
+; CHECK-ARMv6: ldrb r1, [r0, #-4]
+; CHECK-ARMv6-NEXT: ldrb r2, [r0, #-3]
+; CHECK-ARMv6-NEXT: ldrb r3, [r0, #-2]
+; CHECK-ARMv6-NEXT: ldrb r0, [r0, #-1]
+; CHECK-ARMv6-NEXT: orr r0, r0, r3, lsl #8
+; CHECK-ARMv6-NEXT: orr r0, r0, r2, lsl #16
+; CHECK-ARMv6-NEXT: orr r0, r0, r1, lsl #24
+; CHECK-ARMv6-NEXT: bx lr
+
+ %tmp = bitcast i32* %arg to i8*
+ %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -1
+ %tmp2 = load i8, i8* %tmp1, align 1
+ %tmp3 = zext i8 %tmp2 to i32
+ %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -2
+ %tmp5 = load i8, i8* %tmp4, align 1
+ %tmp6 = zext i8 %tmp5 to i32
+ %tmp7 = shl nuw nsw i32 %tmp6, 8
+ %tmp8 = or i32 %tmp7, %tmp3
+ %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -3
+ %tmp10 = load i8, i8* %tmp9, align 1
+ %tmp11 = zext i8 %tmp10 to i32
+ %tmp12 = shl nuw nsw i32 %tmp11, 16
+ %tmp13 = or i32 %tmp8, %tmp12
+ %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -4
+ %tmp15 = load i8, i8* %tmp14, align 4
+ %tmp16 = zext i8 %tmp15 to i32
+ %tmp17 = shl nuw nsw i32 %tmp16, 24
+ %tmp18 = or i32 %tmp13, %tmp17
+ ret i32 %tmp18
+}
Modified: llvm/trunk/test/CodeGen/ARM/load-combine.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/load-combine.ll?rev=294185&r1=294184&r2=294185&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/load-combine.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/load-combine.ll Mon Feb 6 08:15:31 2017
@@ -227,3 +227,183 @@ define i64 @load_i64_by_i8_bswap(i64* %a
%tmp37 = or i64 %tmp33, %tmp36
ret i64 %tmp37
}
+
+; i8* p; // p[1] is 4 byte aligned
+; (i32) p[1] | ((i32) p[2] << 8) | ((i32) p[3] << 16) | ((i32) p[4] << 24)
+define i32 @load_i32_by_i8_nonzero_offset(i32* %arg) {
+; CHECK-LABEL: load_i32_by_i8_nonzero_offset:
+; CHECK: ldrb r1, [r0, #1]
+; CHECK-NEXT: ldrb r2, [r0, #2]
+; CHECK-NEXT: ldrb r3, [r0, #3]
+; CHECK-NEXT: ldrb r0, [r0, #4]
+; CHECK-NEXT: orr r1, r1, r2, lsl #8
+; CHECK-NEXT: orr r1, r1, r3, lsl #16
+; CHECK-NEXT: orr r0, r1, r0, lsl #24
+; CHECK-NEXT: mov pc, lr
+
+; CHECK-ARMv6-LABEL: load_i32_by_i8_nonzero_offset:
+; CHECK-ARMv6: ldrb r1, [r0, #1]
+; CHECK-ARMv6-NEXT: ldrb r2, [r0, #2]
+; CHECK-ARMv6-NEXT: ldrb r3, [r0, #3]
+; CHECK-ARMv6-NEXT: ldrb r0, [r0, #4]
+; CHECK-ARMv6-NEXT: orr r1, r1, r2, lsl #8
+; CHECK-ARMv6-NEXT: orr r1, r1, r3, lsl #16
+; CHECK-ARMv6-NEXT: orr r0, r1, r0, lsl #24
+; CHECK-ARMv6-NEXT: bx lr
+
+ %tmp = bitcast i32* %arg to i8*
+ %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
+ %tmp2 = load i8, i8* %tmp1, align 4
+ %tmp3 = zext i8 %tmp2 to i32
+ %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 2
+ %tmp5 = load i8, i8* %tmp4, align 1
+ %tmp6 = zext i8 %tmp5 to i32
+ %tmp7 = shl nuw nsw i32 %tmp6, 8
+ %tmp8 = or i32 %tmp7, %tmp3
+ %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 3
+ %tmp10 = load i8, i8* %tmp9, align 1
+ %tmp11 = zext i8 %tmp10 to i32
+ %tmp12 = shl nuw nsw i32 %tmp11, 16
+ %tmp13 = or i32 %tmp8, %tmp12
+ %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 4
+ %tmp15 = load i8, i8* %tmp14, align 1
+ %tmp16 = zext i8 %tmp15 to i32
+ %tmp17 = shl nuw nsw i32 %tmp16, 24
+ %tmp18 = or i32 %tmp13, %tmp17
+ ret i32 %tmp18
+}
+
+; i8* p; // p[-4] is 4 byte aligned
+; (i32) p[-4] | ((i32) p[-3] << 8) | ((i32) p[-2] << 16) | ((i32) p[-1] << 24)
+define i32 @load_i32_by_i8_neg_offset(i32* %arg) {
+; CHECK-LABEL: load_i32_by_i8_neg_offset:
+; CHECK: ldrb r1, [r0, #-4]
+; CHECK-NEXT: ldrb r2, [r0, #-3]
+; CHECK-NEXT: ldrb r3, [r0, #-2]
+; CHECK-NEXT: ldrb r0, [r0, #-1]
+; CHECK-NEXT: orr r1, r1, r2, lsl #8
+; CHECK-NEXT: orr r1, r1, r3, lsl #16
+; CHECK-NEXT: orr r0, r1, r0, lsl #24
+; CHECK-NEXT: mov pc, lr
+
+; CHECK-ARMv6-LABEL: load_i32_by_i8_neg_offset:
+; CHECK-ARMv6: ldrb r1, [r0, #-4]
+; CHECK-ARMv6-NEXT: ldrb r2, [r0, #-3]
+; CHECK-ARMv6-NEXT: ldrb r3, [r0, #-2]
+; CHECK-ARMv6-NEXT: ldrb r0, [r0, #-1]
+; CHECK-ARMv6-NEXT: orr r1, r1, r2, lsl #8
+; CHECK-ARMv6-NEXT: orr r1, r1, r3, lsl #16
+; CHECK-ARMv6-NEXT: orr r0, r1, r0, lsl #24
+; CHECK-ARMv6-NEXT: bx lr
+
+ %tmp = bitcast i32* %arg to i8*
+ %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -4
+ %tmp2 = load i8, i8* %tmp1, align 4
+ %tmp3 = zext i8 %tmp2 to i32
+ %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -3
+ %tmp5 = load i8, i8* %tmp4, align 1
+ %tmp6 = zext i8 %tmp5 to i32
+ %tmp7 = shl nuw nsw i32 %tmp6, 8
+ %tmp8 = or i32 %tmp7, %tmp3
+ %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -2
+ %tmp10 = load i8, i8* %tmp9, align 1
+ %tmp11 = zext i8 %tmp10 to i32
+ %tmp12 = shl nuw nsw i32 %tmp11, 16
+ %tmp13 = or i32 %tmp8, %tmp12
+ %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -1
+ %tmp15 = load i8, i8* %tmp14, align 1
+ %tmp16 = zext i8 %tmp15 to i32
+ %tmp17 = shl nuw nsw i32 %tmp16, 24
+ %tmp18 = or i32 %tmp13, %tmp17
+ ret i32 %tmp18
+}
+
+; i8* p; // p[1] is 4 byte aligned
+; (i32) p[4] | ((i32) p[3] << 8) | ((i32) p[2] << 16) | ((i32) p[1] << 24)
+define i32 @load_i32_by_i8_nonzero_offset_bswap(i32* %arg) {
+; CHECK-LABEL: load_i32_by_i8_nonzero_offset_bswap:
+; CHECK: ldrb r1, [r0, #1]
+; CHECK-NEXT: ldrb r2, [r0, #2]
+; CHECK-NEXT: ldrb r3, [r0, #3]
+; CHECK-NEXT: ldrb r0, [r0, #4]
+; CHECK-NEXT: orr r0, r0, r3, lsl #8
+; CHECK-NEXT: orr r0, r0, r2, lsl #16
+; CHECK-NEXT: orr r0, r0, r1, lsl #24
+; CHECK-NEXT: mov pc, lr
+
+; CHECK-ARMv6-LABEL: load_i32_by_i8_nonzero_offset_bswap:
+; CHECK-ARMv6: ldrb r1, [r0, #1]
+; CHECK-ARMv6-NEXT: ldrb r2, [r0, #2]
+; CHECK-ARMv6-NEXT: ldrb r3, [r0, #3]
+; CHECK-ARMv6-NEXT: ldrb r0, [r0, #4]
+; CHECK-ARMv6-NEXT: orr r0, r0, r3, lsl #8
+; CHECK-ARMv6-NEXT: orr r0, r0, r2, lsl #16
+; CHECK-ARMv6-NEXT: orr r0, r0, r1, lsl #24
+; CHECK-ARMv6-NEXT: bx lr
+
+ %tmp = bitcast i32* %arg to i8*
+ %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 4
+ %tmp2 = load i8, i8* %tmp1, align 1
+ %tmp3 = zext i8 %tmp2 to i32
+ %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 3
+ %tmp5 = load i8, i8* %tmp4, align 1
+ %tmp6 = zext i8 %tmp5 to i32
+ %tmp7 = shl nuw nsw i32 %tmp6, 8
+ %tmp8 = or i32 %tmp7, %tmp3
+ %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
+ %tmp10 = load i8, i8* %tmp9, align 1
+ %tmp11 = zext i8 %tmp10 to i32
+ %tmp12 = shl nuw nsw i32 %tmp11, 16
+ %tmp13 = or i32 %tmp8, %tmp12
+ %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 1
+ %tmp15 = load i8, i8* %tmp14, align 4
+ %tmp16 = zext i8 %tmp15 to i32
+ %tmp17 = shl nuw nsw i32 %tmp16, 24
+ %tmp18 = or i32 %tmp13, %tmp17
+ ret i32 %tmp18
+}
+
+; i8* p; // p[-4] is 4 byte aligned
+; (i32) p[-1] | ((i32) p[-2] << 8) | ((i32) p[-3] << 16) | ((i32) p[-4] << 24)
+define i32 @load_i32_by_i8_neg_offset_bswap(i32* %arg) {
+; CHECK-LABEL: load_i32_by_i8_neg_offset_bswap:
+; CHECK: ldrb r1, [r0, #-4]
+; CHECK-NEXT: ldrb r2, [r0, #-3]
+; CHECK-NEXT: ldrb r3, [r0, #-2]
+; CHECK-NEXT: ldrb r0, [r0, #-1]
+; CHECK-NEXT: orr r0, r0, r3, lsl #8
+; CHECK-NEXT: orr r0, r0, r2, lsl #16
+; CHECK-NEXT: orr r0, r0, r1, lsl #24
+; CHECK-NEXT: mov pc, lr
+
+; CHECK-ARMv6-LABEL: load_i32_by_i8_neg_offset_bswap:
+; CHECK-ARMv6: ldrb r1, [r0, #-4]
+; CHECK-ARMv6-NEXT: ldrb r2, [r0, #-3]
+; CHECK-ARMv6-NEXT: ldrb r3, [r0, #-2]
+; CHECK-ARMv6-NEXT: ldrb r0, [r0, #-1]
+; CHECK-ARMv6-NEXT: orr r0, r0, r3, lsl #8
+; CHECK-ARMv6-NEXT: orr r0, r0, r2, lsl #16
+; CHECK-ARMv6-NEXT: orr r0, r0, r1, lsl #24
+; CHECK-ARMv6-NEXT: bx lr
+
+ %tmp = bitcast i32* %arg to i8*
+ %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -1
+ %tmp2 = load i8, i8* %tmp1, align 1
+ %tmp3 = zext i8 %tmp2 to i32
+ %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -2
+ %tmp5 = load i8, i8* %tmp4, align 1
+ %tmp6 = zext i8 %tmp5 to i32
+ %tmp7 = shl nuw nsw i32 %tmp6, 8
+ %tmp8 = or i32 %tmp7, %tmp3
+ %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -3
+ %tmp10 = load i8, i8* %tmp9, align 1
+ %tmp11 = zext i8 %tmp10 to i32
+ %tmp12 = shl nuw nsw i32 %tmp11, 16
+ %tmp13 = or i32 %tmp8, %tmp12
+ %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -4
+ %tmp15 = load i8, i8* %tmp14, align 4
+ %tmp16 = zext i8 %tmp15 to i32
+ %tmp17 = shl nuw nsw i32 %tmp16, 24
+ %tmp18 = or i32 %tmp13, %tmp17
+ ret i32 %tmp18
+}
Modified: llvm/trunk/test/CodeGen/X86/load-combine.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/load-combine.ll?rev=294185&r1=294184&r2=294185&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/load-combine.ll (original)
+++ llvm/trunk/test/CodeGen/X86/load-combine.ll Mon Feb 6 08:15:31 2017
@@ -574,8 +574,8 @@ define i32 @load_i32_by_i8_bswap_unrelat
; Non-zero offsets are not supported for now
; i8* p;
; (i32) p[1] | ((i32) p[2] << 8) | ((i32) p[3] << 16) | ((i32) p[4] << 24)
-define i32 @load_i32_by_i8_unsupported_offset(i32* %arg) {
-; CHECK-LABEL: load_i32_by_i8_unsupported_offset:
+define i32 @load_i32_by_i8_nonzero_offset(i32* %arg) {
+; CHECK-LABEL: load_i32_by_i8_nonzero_offset:
; CHECK: # BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movzbl 1(%eax), %ecx
@@ -590,7 +590,7 @@ define i32 @load_i32_by_i8_unsupported_o
; CHECK-NEXT: orl %ecx, %eax
; CHECK-NEXT: retl
;
-; CHECK64-LABEL: load_i32_by_i8_unsupported_offset:
+; CHECK64-LABEL: load_i32_by_i8_nonzero_offset:
; CHECK64: # BB#0:
; CHECK64-NEXT: movzbl 1(%rdi), %eax
; CHECK64-NEXT: movzbl 2(%rdi), %ecx
@@ -622,6 +622,168 @@ define i32 @load_i32_by_i8_unsupported_o
%tmp15 = load i8, i8* %tmp14, align 1
%tmp16 = zext i8 %tmp15 to i32
%tmp17 = shl nuw nsw i32 %tmp16, 24
+ %tmp18 = or i32 %tmp13, %tmp17
+ ret i32 %tmp18
+}
+
+; i8* p;
+; (i32) p[-4] | ((i32) p[-3] << 8) | ((i32) p[-2] << 16) | ((i32) p[-1] << 24)
+define i32 @load_i32_by_i8_neg_offset(i32* %arg) {
+; CHECK-LABEL: load_i32_by_i8_neg_offset:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: movzbl -4(%eax), %ecx
+; CHECK-NEXT: movzbl -3(%eax), %edx
+; CHECK-NEXT: shll $8, %edx
+; CHECK-NEXT: orl %ecx, %edx
+; CHECK-NEXT: movzbl -2(%eax), %ecx
+; CHECK-NEXT: shll $16, %ecx
+; CHECK-NEXT: orl %edx, %ecx
+; CHECK-NEXT: movzbl -1(%eax), %eax
+; CHECK-NEXT: shll $24, %eax
+; CHECK-NEXT: orl %ecx, %eax
+; CHECK-NEXT: retl
+;
+; CHECK64-LABEL: load_i32_by_i8_neg_offset:
+; CHECK64: # BB#0:
+; CHECK64-NEXT: movzbl -4(%rdi), %eax
+; CHECK64-NEXT: movzbl -3(%rdi), %ecx
+; CHECK64-NEXT: shll $8, %ecx
+; CHECK64-NEXT: orl %eax, %ecx
+; CHECK64-NEXT: movzbl -2(%rdi), %edx
+; CHECK64-NEXT: shll $16, %edx
+; CHECK64-NEXT: orl %ecx, %edx
+; CHECK64-NEXT: movzbl -1(%rdi), %eax
+; CHECK64-NEXT: shll $24, %eax
+; CHECK64-NEXT: orl %edx, %eax
+; CHECK64-NEXT: retq
+
+ %tmp = bitcast i32* %arg to i8*
+ %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -4
+ %tmp2 = load i8, i8* %tmp1, align 1
+ %tmp3 = zext i8 %tmp2 to i32
+ %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -3
+ %tmp5 = load i8, i8* %tmp4, align 1
+ %tmp6 = zext i8 %tmp5 to i32
+ %tmp7 = shl nuw nsw i32 %tmp6, 8
+ %tmp8 = or i32 %tmp7, %tmp3
+ %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -2
+ %tmp10 = load i8, i8* %tmp9, align 1
+ %tmp11 = zext i8 %tmp10 to i32
+ %tmp12 = shl nuw nsw i32 %tmp11, 16
+ %tmp13 = or i32 %tmp8, %tmp12
+ %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -1
+ %tmp15 = load i8, i8* %tmp14, align 1
+ %tmp16 = zext i8 %tmp15 to i32
+ %tmp17 = shl nuw nsw i32 %tmp16, 24
+ %tmp18 = or i32 %tmp13, %tmp17
+ ret i32 %tmp18
+}
+
+; i8* p;
+; (i32) p[4] | ((i32) p[3] << 8) | ((i32) p[2] << 16) | ((i32) p[1] << 24)
+define i32 @load_i32_by_i8_nonzero_offset_bswap(i32* %arg) {
+; CHECK-LABEL: load_i32_by_i8_nonzero_offset_bswap:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: movzbl 4(%eax), %ecx
+; CHECK-NEXT: movzbl 3(%eax), %edx
+; CHECK-NEXT: shll $8, %edx
+; CHECK-NEXT: orl %ecx, %edx
+; CHECK-NEXT: movzbl 2(%eax), %ecx
+; CHECK-NEXT: shll $16, %ecx
+; CHECK-NEXT: orl %edx, %ecx
+; CHECK-NEXT: movzbl 1(%eax), %eax
+; CHECK-NEXT: shll $24, %eax
+; CHECK-NEXT: orl %ecx, %eax
+; CHECK-NEXT: retl
+;
+; CHECK64-LABEL: load_i32_by_i8_nonzero_offset_bswap:
+; CHECK64: # BB#0:
+; CHECK64-NEXT: movzbl 4(%rdi), %eax
+; CHECK64-NEXT: movzbl 3(%rdi), %ecx
+; CHECK64-NEXT: shll $8, %ecx
+; CHECK64-NEXT: orl %eax, %ecx
+; CHECK64-NEXT: movzbl 2(%rdi), %edx
+; CHECK64-NEXT: shll $16, %edx
+; CHECK64-NEXT: orl %ecx, %edx
+; CHECK64-NEXT: movzbl 1(%rdi), %eax
+; CHECK64-NEXT: shll $24, %eax
+; CHECK64-NEXT: orl %edx, %eax
+; CHECK64-NEXT: retq
+
+ %tmp = bitcast i32* %arg to i8*
+ %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 4
+ %tmp2 = load i8, i8* %tmp1, align 1
+ %tmp3 = zext i8 %tmp2 to i32
+ %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 3
+ %tmp5 = load i8, i8* %tmp4, align 1
+ %tmp6 = zext i8 %tmp5 to i32
+ %tmp7 = shl nuw nsw i32 %tmp6, 8
+ %tmp8 = or i32 %tmp7, %tmp3
+ %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
+ %tmp10 = load i8, i8* %tmp9, align 1
+ %tmp11 = zext i8 %tmp10 to i32
+ %tmp12 = shl nuw nsw i32 %tmp11, 16
+ %tmp13 = or i32 %tmp8, %tmp12
+ %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 1
+ %tmp15 = load i8, i8* %tmp14, align 1
+ %tmp16 = zext i8 %tmp15 to i32
+ %tmp17 = shl nuw nsw i32 %tmp16, 24
+ %tmp18 = or i32 %tmp13, %tmp17
+ ret i32 %tmp18
+}
+
+; i8* p;
+; (i32) p[-1] | ((i32) p[-2] << 8) | ((i32) p[-3] << 16) | ((i32) p[-4] << 24)
+define i32 @load_i32_by_i8_neg_offset_bswap(i32* %arg) {
+; CHECK-LABEL: load_i32_by_i8_neg_offset_bswap:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: movzbl -1(%eax), %ecx
+; CHECK-NEXT: movzbl -2(%eax), %edx
+; CHECK-NEXT: shll $8, %edx
+; CHECK-NEXT: orl %ecx, %edx
+; CHECK-NEXT: movzbl -3(%eax), %ecx
+; CHECK-NEXT: shll $16, %ecx
+; CHECK-NEXT: orl %edx, %ecx
+; CHECK-NEXT: movzbl -4(%eax), %eax
+; CHECK-NEXT: shll $24, %eax
+; CHECK-NEXT: orl %ecx, %eax
+; CHECK-NEXT: retl
+;
+; CHECK64-LABEL: load_i32_by_i8_neg_offset_bswap:
+; CHECK64: # BB#0:
+; CHECK64-NEXT: movzbl -1(%rdi), %eax
+; CHECK64-NEXT: movzbl -2(%rdi), %ecx
+; CHECK64-NEXT: shll $8, %ecx
+; CHECK64-NEXT: orl %eax, %ecx
+; CHECK64-NEXT: movzbl -3(%rdi), %edx
+; CHECK64-NEXT: shll $16, %edx
+; CHECK64-NEXT: orl %ecx, %edx
+; CHECK64-NEXT: movzbl -4(%rdi), %eax
+; CHECK64-NEXT: shll $24, %eax
+; CHECK64-NEXT: orl %edx, %eax
+; CHECK64-NEXT: retq
+
+ %tmp = bitcast i32* %arg to i8*
+ %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -1
+ %tmp2 = load i8, i8* %tmp1, align 1
+ %tmp3 = zext i8 %tmp2 to i32
+ %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -2
+ %tmp5 = load i8, i8* %tmp4, align 1
+ %tmp6 = zext i8 %tmp5 to i32
+ %tmp7 = shl nuw nsw i32 %tmp6, 8
+ %tmp8 = or i32 %tmp7, %tmp3
+ %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -3
+ %tmp10 = load i8, i8* %tmp9, align 1
+ %tmp11 = zext i8 %tmp10 to i32
+ %tmp12 = shl nuw nsw i32 %tmp11, 16
+ %tmp13 = or i32 %tmp8, %tmp12
+ %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -4
+ %tmp15 = load i8, i8* %tmp14, align 1
+ %tmp16 = zext i8 %tmp15 to i32
+ %tmp17 = shl nuw nsw i32 %tmp16, 24
%tmp18 = or i32 %tmp13, %tmp17
ret i32 %tmp18
}
More information about the llvm-commits
mailing list