[clang] [llvm] [AArch64] Add CodeGen support for scalar FEAT_CPA (PR #105669)
David Green via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 20 05:53:00 PDT 2025
================
@@ -0,0 +1,723 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64 -verify-machineinstrs --mattr=+cpa -aarch64-use-featcpa-codegen=true -O0 -global-isel=1 -global-isel-abort=1 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK-CPA-O0
+; RUN: llc -mtriple=aarch64 -verify-machineinstrs --mattr=+cpa -aarch64-use-featcpa-codegen=true -O3 -global-isel=1 -global-isel-abort=1 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK-CPA-O3
+; RUN: llc -mtriple=aarch64 -verify-machineinstrs --mattr=-cpa -O0 -global-isel=1 -global-isel-abort=1 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK-NOCPA-O0
+; RUN: llc -mtriple=aarch64 -verify-machineinstrs --mattr=-cpa -O3 -global-isel=1 -global-isel-abort=1 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK-NOCPA-O3
+
+%struct.my_type = type { i64, i64 }
+%struct.my_type2 = type { i64, i64, i64, i64, i64, i64 }
+
+ at array = external dso_local global [10 x %struct.my_type], align 8
+ at array2 = external dso_local global [10 x %struct.my_type2], align 8
+
+define void @addpt1(i64 %index, i64 %arg) {
+; CHECK-CPA-O0-LABEL: addpt1:
+; CHECK-CPA-O0: // %bb.0: // %entry
+; CHECK-CPA-O0-NEXT: adrp x8, array
+; CHECK-CPA-O0-NEXT: add x8, x8, :lo12:array
+; CHECK-CPA-O0-NEXT: addpt x8, x8, x0, lsl #4
+; CHECK-CPA-O0-NEXT: str x1, [x8, #8]
+; CHECK-CPA-O0-NEXT: ret
+;
+; CHECK-CPA-O3-LABEL: addpt1:
+; CHECK-CPA-O3: // %bb.0: // %entry
+; CHECK-CPA-O3-NEXT: adrp x8, array
+; CHECK-CPA-O3-NEXT: add x8, x8, :lo12:array
+; CHECK-CPA-O3-NEXT: addpt x8, x8, x0, lsl #4
+; CHECK-CPA-O3-NEXT: str x1, [x8, #8]
+; CHECK-CPA-O3-NEXT: ret
+;
+; CHECK-NOCPA-O0-LABEL: addpt1:
+; CHECK-NOCPA-O0: // %bb.0: // %entry
+; CHECK-NOCPA-O0-NEXT: adrp x8, array
+; CHECK-NOCPA-O0-NEXT: add x8, x8, :lo12:array
+; CHECK-NOCPA-O0-NEXT: add x8, x8, x0, lsl #4
+; CHECK-NOCPA-O0-NEXT: str x1, [x8, #8]
+; CHECK-NOCPA-O0-NEXT: ret
+;
+; CHECK-NOCPA-O3-LABEL: addpt1:
+; CHECK-NOCPA-O3: // %bb.0: // %entry
+; CHECK-NOCPA-O3-NEXT: adrp x8, array
+; CHECK-NOCPA-O3-NEXT: add x8, x8, :lo12:array
+; CHECK-NOCPA-O3-NEXT: add x8, x8, x0, lsl #4
+; CHECK-NOCPA-O3-NEXT: str x1, [x8, #8]
+; CHECK-NOCPA-O3-NEXT: ret
+entry:
+ %e2 = getelementptr inbounds %struct.my_type, ptr @array, i64 %index, i32 1
+ store i64 %arg, ptr %e2, align 8
+ ret void
+}
+
+define void @maddpt1(i32 %pos, ptr %val) {
+; CHECK-CPA-O0-LABEL: maddpt1:
+; CHECK-CPA-O0: // %bb.0: // %entry
+; CHECK-CPA-O0-NEXT: // implicit-def: $x8
+; CHECK-CPA-O0-NEXT: mov w8, w0
+; CHECK-CPA-O0-NEXT: sxtw x8, w8
+; CHECK-CPA-O0-NEXT: mov w9, #48 // =0x30
+; CHECK-CPA-O0-NEXT: // kill: def $x9 killed $w9
+; CHECK-CPA-O0-NEXT: adrp x10, array2
+; CHECK-CPA-O0-NEXT: add x10, x10, :lo12:array2
+; CHECK-CPA-O0-NEXT: maddpt x0, x8, x9, x10
+; CHECK-CPA-O0-NEXT: mov w8, #48 // =0x30
+; CHECK-CPA-O0-NEXT: mov w2, w8
+; CHECK-CPA-O0-NEXT: b memcpy
+;
+; CHECK-CPA-O3-LABEL: maddpt1:
+; CHECK-CPA-O3: // %bb.0: // %entry
+; CHECK-CPA-O3-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK-CPA-O3-NEXT: sxtw x9, w0
+; CHECK-CPA-O3-NEXT: mov w8, #48 // =0x30
+; CHECK-CPA-O3-NEXT: ldr q0, [x1]
+; CHECK-CPA-O3-NEXT: adrp x10, array2
+; CHECK-CPA-O3-NEXT: add x10, x10, :lo12:array2
+; CHECK-CPA-O3-NEXT: maddpt x8, x9, x8, x10
+; CHECK-CPA-O3-NEXT: str q0, [x8]
+; CHECK-CPA-O3-NEXT: ldr q0, [x1, #16]
+; CHECK-CPA-O3-NEXT: str q0, [x8, #16]
+; CHECK-CPA-O3-NEXT: ldr q0, [x1, #32]
+; CHECK-CPA-O3-NEXT: str q0, [x8, #32]
+; CHECK-CPA-O3-NEXT: ret
+;
+; CHECK-NOCPA-O0-LABEL: maddpt1:
+; CHECK-NOCPA-O0: // %bb.0: // %entry
+; CHECK-NOCPA-O0-NEXT: adrp x9, array2
+; CHECK-NOCPA-O0-NEXT: add x9, x9, :lo12:array2
+; CHECK-NOCPA-O0-NEXT: mov w8, #48 // =0x30
+; CHECK-NOCPA-O0-NEXT: smaddl x0, w0, w8, x9
+; CHECK-NOCPA-O0-NEXT: mov w8, #48 // =0x30
+; CHECK-NOCPA-O0-NEXT: mov w2, w8
+; CHECK-NOCPA-O0-NEXT: b memcpy
+;
+; CHECK-NOCPA-O3-LABEL: maddpt1:
+; CHECK-NOCPA-O3: // %bb.0: // %entry
+; CHECK-NOCPA-O3-NEXT: mov w8, #48 // =0x30
+; CHECK-NOCPA-O3-NEXT: adrp x9, array2
+; CHECK-NOCPA-O3-NEXT: add x9, x9, :lo12:array2
+; CHECK-NOCPA-O3-NEXT: smaddl x8, w0, w8, x9
+; CHECK-NOCPA-O3-NEXT: ldr q0, [x1]
+; CHECK-NOCPA-O3-NEXT: str q0, [x8]
+; CHECK-NOCPA-O3-NEXT: ldr q0, [x1, #16]
+; CHECK-NOCPA-O3-NEXT: str q0, [x8, #16]
+; CHECK-NOCPA-O3-NEXT: ldr q0, [x1, #32]
+; CHECK-NOCPA-O3-NEXT: str q0, [x8, #32]
+; CHECK-NOCPA-O3-NEXT: ret
+entry:
+ %idxprom = sext i32 %pos to i64
+ %arrayidx = getelementptr inbounds [10 x %struct.my_type2], ptr @array2, i64 0, i64 %idxprom
+ tail call void @llvm.memcpy.p0.p0.i64(ptr align 8 dereferenceable(48) %arrayidx, ptr align 8 dereferenceable(48) %val, i64 48, i1 false)
+ ret void
+}
+
+define void @msubpt1(i32 %index, i32 %elem) {
+; CHECK-CPA-O0-LABEL: msubpt1:
+; CHECK-CPA-O0: // %bb.0: // %entry
+; CHECK-CPA-O0-NEXT: // implicit-def: $x8
+; CHECK-CPA-O0-NEXT: mov w8, w0
+; CHECK-CPA-O0-NEXT: sxtw x10, w8
+; CHECK-CPA-O0-NEXT: mov w8, #48 // =0x30
+; CHECK-CPA-O0-NEXT: mov w9, w8
+; CHECK-CPA-O0-NEXT: mov w8, #288 // =0x120
+; CHECK-CPA-O0-NEXT: mov w11, w8
+; CHECK-CPA-O0-NEXT: adrp x8, array2
+; CHECK-CPA-O0-NEXT: add x8, x8, :lo12:array2
+; CHECK-CPA-O0-NEXT: addpt x11, x8, x11
+; CHECK-CPA-O0-NEXT: msubpt x0, x9, x10, x11
+; CHECK-CPA-O0-NEXT: mov w9, #48 // =0x30
+; CHECK-CPA-O0-NEXT: mov w2, w9
+; CHECK-CPA-O0-NEXT: mov w9, #96 // =0x60
+; CHECK-CPA-O0-NEXT: // kill: def $x9 killed $w9
+; CHECK-CPA-O0-NEXT: addpt x1, x8, x9
+; CHECK-CPA-O0-NEXT: b memcpy
+;
+; CHECK-CPA-O3-LABEL: msubpt1:
+; CHECK-CPA-O3: // %bb.0: // %entry
+; CHECK-CPA-O3-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK-CPA-O3-NEXT: sxtw x9, w0
+; CHECK-CPA-O3-NEXT: adrp x10, array2+96
+; CHECK-CPA-O3-NEXT: add x10, x10, :lo12:array2+96
+; CHECK-CPA-O3-NEXT: mov w8, #48 // =0x30
+; CHECK-CPA-O3-NEXT: ldr q0, [x10]
+; CHECK-CPA-O3-NEXT: msubpt x8, x8, x9, x10
+; CHECK-CPA-O3-NEXT: str q0, [x8, #192]
+; CHECK-CPA-O3-NEXT: ldr q0, [x10, #16]
+; CHECK-CPA-O3-NEXT: str q0, [x8, #208]
+; CHECK-CPA-O3-NEXT: ldr q0, [x10, #32]
+; CHECK-CPA-O3-NEXT: str q0, [x8, #224]
+; CHECK-CPA-O3-NEXT: ret
+;
+; CHECK-NOCPA-O0-LABEL: msubpt1:
+; CHECK-NOCPA-O0: // %bb.0: // %entry
+; CHECK-NOCPA-O0-NEXT: // implicit-def: $x8
+; CHECK-NOCPA-O0-NEXT: mov w8, w0
+; CHECK-NOCPA-O0-NEXT: sxtw x8, w8
+; CHECK-NOCPA-O0-NEXT: mov w9, #48 // =0x30
+; CHECK-NOCPA-O0-NEXT: // kill: def $x9 killed $w9
+; CHECK-NOCPA-O0-NEXT: mneg x10, x8, x9
+; CHECK-NOCPA-O0-NEXT: adrp x8, array2
+; CHECK-NOCPA-O0-NEXT: add x8, x8, :lo12:array2
+; CHECK-NOCPA-O0-NEXT: add x9, x8, #288
+; CHECK-NOCPA-O0-NEXT: add x0, x9, x10
+; CHECK-NOCPA-O0-NEXT: mov w9, #48 // =0x30
+; CHECK-NOCPA-O0-NEXT: mov w2, w9
+; CHECK-NOCPA-O0-NEXT: add x1, x8, #96
+; CHECK-NOCPA-O0-NEXT: b memcpy
+;
+; CHECK-NOCPA-O3-LABEL: msubpt1:
+; CHECK-NOCPA-O3: // %bb.0: // %entry
+; CHECK-NOCPA-O3-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK-NOCPA-O3-NEXT: sxtw x8, w0
+; CHECK-NOCPA-O3-NEXT: mov w9, #48 // =0x30
+; CHECK-NOCPA-O3-NEXT: mneg x8, x8, x9
+; CHECK-NOCPA-O3-NEXT: adrp x9, array2+96
+; CHECK-NOCPA-O3-NEXT: add x9, x9, :lo12:array2+96
+; CHECK-NOCPA-O3-NEXT: ldr q0, [x9]
+; CHECK-NOCPA-O3-NEXT: add x8, x9, x8
+; CHECK-NOCPA-O3-NEXT: str q0, [x8, #192]
+; CHECK-NOCPA-O3-NEXT: ldr q0, [x9, #16]
+; CHECK-NOCPA-O3-NEXT: str q0, [x8, #208]
+; CHECK-NOCPA-O3-NEXT: ldr q0, [x9, #32]
+; CHECK-NOCPA-O3-NEXT: str q0, [x8, #224]
+; CHECK-NOCPA-O3-NEXT: ret
+entry:
+ %idx.ext = sext i32 %index to i64
+ %idx.neg = sub nsw i64 0, %idx.ext
+ %add.ptr = getelementptr inbounds %struct.my_type2, ptr getelementptr inbounds ([10 x %struct.my_type2], ptr @array2, i64 0, i64 6), i64 %idx.neg
+ tail call void @llvm.memcpy.p0.p0.i64(ptr align 8 dereferenceable(48) %add.ptr, ptr align 8 dereferenceable(48) getelementptr inbounds ([10 x %struct.my_type2], ptr @array2, i64 0, i64 2), i64 48, i1 false), !tbaa.struct !6
+ ret void
+}
+
+define void @subpt1(i32 %index, i32 %elem) {
+; CHECK-CPA-O0-LABEL: subpt1:
+; CHECK-CPA-O0: // %bb.0: // %entry
+; CHECK-CPA-O0-NEXT: mov w8, #-16 // =0xfffffff0
+; CHECK-CPA-O0-NEXT: smull x9, w0, w8
+; CHECK-CPA-O0-NEXT: adrp x8, array
+; CHECK-CPA-O0-NEXT: add x8, x8, :lo12:array
+; CHECK-CPA-O0-NEXT: ldr q0, [x8, #32]
+; CHECK-CPA-O0-NEXT: mov w10, #96 // =0x60
+; CHECK-CPA-O0-NEXT: // kill: def $x10 killed $w10
+; CHECK-CPA-O0-NEXT: addpt x8, x8, x10
+; CHECK-CPA-O0-NEXT: str q0, [x8, x9, lsl #4]
+; CHECK-CPA-O0-NEXT: ret
+;
+; CHECK-CPA-O3-LABEL: subpt1:
+; CHECK-CPA-O3: // %bb.0: // %entry
+; CHECK-CPA-O3-NEXT: mov w8, #-16 // =0xfffffff0
+; CHECK-CPA-O3-NEXT: adrp x9, array+32
+; CHECK-CPA-O3-NEXT: add x9, x9, :lo12:array+32
+; CHECK-CPA-O3-NEXT: smull x8, w0, w8
+; CHECK-CPA-O3-NEXT: ldr q0, [x9]
+; CHECK-CPA-O3-NEXT: addpt x8, x9, x8, lsl #4
+; CHECK-CPA-O3-NEXT: str q0, [x8, #64]
+; CHECK-CPA-O3-NEXT: ret
+;
+; CHECK-NOCPA-O0-LABEL: subpt1:
+; CHECK-NOCPA-O0: // %bb.0: // %entry
+; CHECK-NOCPA-O0-NEXT: mov w8, #-16 // =0xfffffff0
+; CHECK-NOCPA-O0-NEXT: smull x9, w0, w8
+; CHECK-NOCPA-O0-NEXT: adrp x8, array
+; CHECK-NOCPA-O0-NEXT: add x8, x8, :lo12:array
+; CHECK-NOCPA-O0-NEXT: ldr q0, [x8, #32]
+; CHECK-NOCPA-O0-NEXT: add x8, x8, #96
+; CHECK-NOCPA-O0-NEXT: str q0, [x8, x9, lsl #4]
+; CHECK-NOCPA-O0-NEXT: ret
+;
+; CHECK-NOCPA-O3-LABEL: subpt1:
+; CHECK-NOCPA-O3: // %bb.0: // %entry
+; CHECK-NOCPA-O3-NEXT: mov w8, #-16 // =0xfffffff0
+; CHECK-NOCPA-O3-NEXT: adrp x9, array+32
+; CHECK-NOCPA-O3-NEXT: add x9, x9, :lo12:array+32
+; CHECK-NOCPA-O3-NEXT: smull x8, w0, w8
+; CHECK-NOCPA-O3-NEXT: ldr q0, [x9]
+; CHECK-NOCPA-O3-NEXT: add x8, x9, x8, lsl #4
+; CHECK-NOCPA-O3-NEXT: str q0, [x8, #64]
+; CHECK-NOCPA-O3-NEXT: ret
+entry:
+ %conv = sext i32 %index to i64
+ %mul.neg = mul nsw i64 %conv, -16
+ %add.ptr = getelementptr inbounds %struct.my_type, ptr getelementptr inbounds ([10 x %struct.my_type], ptr @array, i64 0, i64 6), i64 %mul.neg
+ tail call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(16) %add.ptr, ptr noundef nonnull align 8 dereferenceable(16) getelementptr inbounds ([10 x %struct.my_type], ptr @array, i64 0, i64 2), i64 16, i1 false), !tbaa.struct !6
+ ret void
+}
+
+define void @subpt2(i32 %index, i32 %elem) {
+; CHECK-CPA-O0-LABEL: subpt2:
+; CHECK-CPA-O0: // %bb.0: // %entry
+; CHECK-CPA-O0-NEXT: mov x8, xzr
+; CHECK-CPA-O0-NEXT: subs x9, x8, w0, sxtw
+; CHECK-CPA-O0-NEXT: adrp x8, array
+; CHECK-CPA-O0-NEXT: add x8, x8, :lo12:array
+; CHECK-CPA-O0-NEXT: ldr q0, [x8, #32]
+; CHECK-CPA-O0-NEXT: mov w10, #96 // =0x60
+; CHECK-CPA-O0-NEXT: // kill: def $x10 killed $w10
+; CHECK-CPA-O0-NEXT: addpt x8, x8, x10
+; CHECK-CPA-O0-NEXT: str q0, [x8, x9, lsl #4]
+; CHECK-CPA-O0-NEXT: ret
+;
+; CHECK-CPA-O3-LABEL: subpt2:
+; CHECK-CPA-O3: // %bb.0: // %entry
+; CHECK-CPA-O3-NEXT: mov x8, xzr
+; CHECK-CPA-O3-NEXT: adrp x9, array+32
+; CHECK-CPA-O3-NEXT: add x9, x9, :lo12:array+32
+; CHECK-CPA-O3-NEXT: sub x8, x8, w0, sxtw
+; CHECK-CPA-O3-NEXT: ldr q0, [x9]
+; CHECK-CPA-O3-NEXT: addpt x8, x9, x8, lsl #4
+; CHECK-CPA-O3-NEXT: str q0, [x8, #64]
+; CHECK-CPA-O3-NEXT: ret
+;
+; CHECK-NOCPA-O0-LABEL: subpt2:
+; CHECK-NOCPA-O0: // %bb.0: // %entry
+; CHECK-NOCPA-O0-NEXT: mov x8, xzr
+; CHECK-NOCPA-O0-NEXT: subs x9, x8, w0, sxtw
+; CHECK-NOCPA-O0-NEXT: adrp x8, array
+; CHECK-NOCPA-O0-NEXT: add x8, x8, :lo12:array
+; CHECK-NOCPA-O0-NEXT: ldr q0, [x8, #32]
+; CHECK-NOCPA-O0-NEXT: add x8, x8, #96
+; CHECK-NOCPA-O0-NEXT: str q0, [x8, x9, lsl #4]
+; CHECK-NOCPA-O0-NEXT: ret
+;
+; CHECK-NOCPA-O3-LABEL: subpt2:
+; CHECK-NOCPA-O3: // %bb.0: // %entry
+; CHECK-NOCPA-O3-NEXT: mov x8, xzr
+; CHECK-NOCPA-O3-NEXT: adrp x9, array+32
+; CHECK-NOCPA-O3-NEXT: add x9, x9, :lo12:array+32
+; CHECK-NOCPA-O3-NEXT: sub x8, x8, w0, sxtw
+; CHECK-NOCPA-O3-NEXT: ldr q0, [x9]
+; CHECK-NOCPA-O3-NEXT: add x8, x9, x8, lsl #4
+; CHECK-NOCPA-O3-NEXT: str q0, [x8, #64]
+; CHECK-NOCPA-O3-NEXT: ret
+entry:
+ %idx.ext = sext i32 %index to i64
+ %idx.neg = sub nsw i64 0, %idx.ext
+ %add.ptr = getelementptr inbounds %struct.my_type, ptr getelementptr inbounds ([10 x %struct.my_type], ptr @array, i64 0, i64 6), i64 %idx.neg
+ tail call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(16) %add.ptr, ptr noundef nonnull align 8 dereferenceable(16) getelementptr inbounds ([10 x %struct.my_type], ptr @array, i64 0, i64 2), i64 16, i1 false), !tbaa.struct !11
+ ret void
+}
+
+define ptr @subpt3(ptr %ptr, i32 %index) {
+; CHECK-CPA-O0-LABEL: subpt3:
+; CHECK-CPA-O0: // %bb.0: // %entry
+; CHECK-CPA-O0-NEXT: mov x8, #-8 // =0xfffffffffffffff8
+; CHECK-CPA-O0-NEXT: addpt x0, x0, x8
+; CHECK-CPA-O0-NEXT: ret
+;
+; CHECK-CPA-O3-LABEL: subpt3:
+; CHECK-CPA-O3: // %bb.0: // %entry
+; CHECK-CPA-O3-NEXT: mov x8, #-8 // =0xfffffffffffffff8
+; CHECK-CPA-O3-NEXT: addpt x0, x0, x8
+; CHECK-CPA-O3-NEXT: ret
+;
+; CHECK-NOCPA-O0-LABEL: subpt3:
+; CHECK-NOCPA-O0: // %bb.0: // %entry
+; CHECK-NOCPA-O0-NEXT: subs x0, x0, #8
+; CHECK-NOCPA-O0-NEXT: ret
+;
+; CHECK-NOCPA-O3-LABEL: subpt3:
+; CHECK-NOCPA-O3: // %bb.0: // %entry
+; CHECK-NOCPA-O3-NEXT: sub x0, x0, #8
+; CHECK-NOCPA-O3-NEXT: ret
+entry:
+ %incdec.ptr.i.i.i = getelementptr inbounds i64, ptr %ptr, i64 -1
+ ret ptr %incdec.ptr.i.i.i
+}
+
+define i64 @subi64(i64 %ptr, i32 %index) {
+; CHECK-CPA-O0-LABEL: subi64:
+; CHECK-CPA-O0: // %bb.0: // %entry
+; CHECK-CPA-O0-NEXT: subs x0, x0, #1
+; CHECK-CPA-O0-NEXT: ret
+;
+; CHECK-CPA-O3-LABEL: subi64:
+; CHECK-CPA-O3: // %bb.0: // %entry
+; CHECK-CPA-O3-NEXT: sub x0, x0, #1
+; CHECK-CPA-O3-NEXT: ret
+;
+; CHECK-NOCPA-O0-LABEL: subi64:
+; CHECK-NOCPA-O0: // %bb.0: // %entry
+; CHECK-NOCPA-O0-NEXT: subs x0, x0, #1
+; CHECK-NOCPA-O0-NEXT: ret
+;
+; CHECK-NOCPA-O3-LABEL: subi64:
+; CHECK-NOCPA-O3: // %bb.0: // %entry
+; CHECK-NOCPA-O3-NEXT: sub x0, x0, #1
+; CHECK-NOCPA-O3-NEXT: ret
+entry:
+ %incdec.ptr.i.i.i = add i64 %ptr, -1
+ ret i64 %incdec.ptr.i.i.i
+}
+
+define i32 @subi32(i32 %ptr, i32 %index) {
+; CHECK-CPA-O0-LABEL: subi32:
+; CHECK-CPA-O0: // %bb.0: // %entry
+; CHECK-CPA-O0-NEXT: subs w0, w0, #1
+; CHECK-CPA-O0-NEXT: ret
+;
+; CHECK-CPA-O3-LABEL: subi32:
+; CHECK-CPA-O3: // %bb.0: // %entry
+; CHECK-CPA-O3-NEXT: sub w0, w0, #1
+; CHECK-CPA-O3-NEXT: ret
+;
+; CHECK-NOCPA-O0-LABEL: subi32:
+; CHECK-NOCPA-O0: // %bb.0: // %entry
+; CHECK-NOCPA-O0-NEXT: subs w0, w0, #1
+; CHECK-NOCPA-O0-NEXT: ret
+;
+; CHECK-NOCPA-O3-LABEL: subi32:
+; CHECK-NOCPA-O3: // %bb.0: // %entry
+; CHECK-NOCPA-O3-NEXT: sub w0, w0, #1
+; CHECK-NOCPA-O3-NEXT: ret
+entry:
+ %incdec.ptr.i.i.i = add i32 %ptr, -1
+ ret i32 %incdec.ptr.i.i.i
+}
+
+define i16 @subi16(i16 %ptr, i32 %index) {
+; CHECK-CPA-O0-LABEL: subi16:
+; CHECK-CPA-O0: // %bb.0: // %entry
+; CHECK-CPA-O0-NEXT: subs w0, w0, #1
+; CHECK-CPA-O0-NEXT: ret
+;
+; CHECK-CPA-O3-LABEL: subi16:
+; CHECK-CPA-O3: // %bb.0: // %entry
+; CHECK-CPA-O3-NEXT: sub w0, w0, #1
+; CHECK-CPA-O3-NEXT: ret
+;
+; CHECK-NOCPA-O0-LABEL: subi16:
+; CHECK-NOCPA-O0: // %bb.0: // %entry
+; CHECK-NOCPA-O0-NEXT: subs w0, w0, #1
+; CHECK-NOCPA-O0-NEXT: ret
+;
+; CHECK-NOCPA-O3-LABEL: subi16:
+; CHECK-NOCPA-O3: // %bb.0: // %entry
+; CHECK-NOCPA-O3-NEXT: sub w0, w0, #1
+; CHECK-NOCPA-O3-NEXT: ret
+entry:
+ %incdec.ptr.i.i.i = add i16 %ptr, -1
+ ret i16 %incdec.ptr.i.i.i
+}
+
+define i64 @addi64(i64 %ptr, i32 %index) {
+; CHECK-CPA-O0-LABEL: addi64:
+; CHECK-CPA-O0: // %bb.0: // %entry
+; CHECK-CPA-O0-NEXT: add x0, x0, #1
+; CHECK-CPA-O0-NEXT: ret
+;
+; CHECK-CPA-O3-LABEL: addi64:
+; CHECK-CPA-O3: // %bb.0: // %entry
+; CHECK-CPA-O3-NEXT: add x0, x0, #1
+; CHECK-CPA-O3-NEXT: ret
+;
+; CHECK-NOCPA-O0-LABEL: addi64:
+; CHECK-NOCPA-O0: // %bb.0: // %entry
+; CHECK-NOCPA-O0-NEXT: add x0, x0, #1
+; CHECK-NOCPA-O0-NEXT: ret
+;
+; CHECK-NOCPA-O3-LABEL: addi64:
+; CHECK-NOCPA-O3: // %bb.0: // %entry
+; CHECK-NOCPA-O3-NEXT: add x0, x0, #1
+; CHECK-NOCPA-O3-NEXT: ret
+entry:
+ %incdec.ptr.i.i.i = add i64 %ptr, 1
+ ret i64 %incdec.ptr.i.i.i
+}
+
+define i32 @addi32(i32 %ptr, i32 %index) {
+; CHECK-CPA-O0-LABEL: addi32:
+; CHECK-CPA-O0: // %bb.0: // %entry
+; CHECK-CPA-O0-NEXT: add w0, w0, #1
+; CHECK-CPA-O0-NEXT: ret
+;
+; CHECK-CPA-O3-LABEL: addi32:
+; CHECK-CPA-O3: // %bb.0: // %entry
+; CHECK-CPA-O3-NEXT: add w0, w0, #1
+; CHECK-CPA-O3-NEXT: ret
+;
+; CHECK-NOCPA-O0-LABEL: addi32:
+; CHECK-NOCPA-O0: // %bb.0: // %entry
+; CHECK-NOCPA-O0-NEXT: add w0, w0, #1
+; CHECK-NOCPA-O0-NEXT: ret
+;
+; CHECK-NOCPA-O3-LABEL: addi32:
+; CHECK-NOCPA-O3: // %bb.0: // %entry
+; CHECK-NOCPA-O3-NEXT: add w0, w0, #1
+; CHECK-NOCPA-O3-NEXT: ret
+entry:
+ %incdec.ptr.i.i.i = add i32 %ptr, 1
+ ret i32 %incdec.ptr.i.i.i
+}
+
+define i16 @addi16(i16 %ptr, i32 %index) {
+; CHECK-CPA-O0-LABEL: addi16:
+; CHECK-CPA-O0: // %bb.0: // %entry
+; CHECK-CPA-O0-NEXT: add w0, w0, #1
+; CHECK-CPA-O0-NEXT: ret
+;
+; CHECK-CPA-O3-LABEL: addi16:
+; CHECK-CPA-O3: // %bb.0: // %entry
+; CHECK-CPA-O3-NEXT: add w0, w0, #1
+; CHECK-CPA-O3-NEXT: ret
+;
+; CHECK-NOCPA-O0-LABEL: addi16:
+; CHECK-NOCPA-O0: // %bb.0: // %entry
+; CHECK-NOCPA-O0-NEXT: add w0, w0, #1
+; CHECK-NOCPA-O0-NEXT: ret
+;
+; CHECK-NOCPA-O3-LABEL: addi16:
+; CHECK-NOCPA-O3: // %bb.0: // %entry
+; CHECK-NOCPA-O3-NEXT: add w0, w0, #1
+; CHECK-NOCPA-O3-NEXT: ret
+entry:
+ %incdec.ptr.i.i.i = add i16 %ptr, 1
+ ret i16 %incdec.ptr.i.i.i
+}
+
+define i64 @arith1(i64 noundef %0, i64 noundef %1, i64 noundef %2) {
+; CHECK-CPA-O0-LABEL: arith1:
+; CHECK-CPA-O0: // %bb.0: // %entry
+; CHECK-CPA-O0-NEXT: sub sp, sp, #32
+; CHECK-CPA-O0-NEXT: .cfi_def_cfa_offset 32
+; CHECK-CPA-O0-NEXT: str x0, [sp, #24]
+; CHECK-CPA-O0-NEXT: str x1, [sp, #16]
+; CHECK-CPA-O0-NEXT: str x2, [sp, #8]
+; CHECK-CPA-O0-NEXT: ldr x8, [sp, #24]
+; CHECK-CPA-O0-NEXT: ldr x9, [sp, #16]
+; CHECK-CPA-O0-NEXT: ldr x10, [sp, #8]
+; CHECK-CPA-O0-NEXT: mul x9, x9, x10
+; CHECK-CPA-O0-NEXT: add x0, x8, x9
+; CHECK-CPA-O0-NEXT: add sp, sp, #32
+; CHECK-CPA-O0-NEXT: ret
+;
+; CHECK-CPA-O3-LABEL: arith1:
+; CHECK-CPA-O3: // %bb.0: // %entry
+; CHECK-CPA-O3-NEXT: sub sp, sp, #32
+; CHECK-CPA-O3-NEXT: .cfi_def_cfa_offset 32
+; CHECK-CPA-O3-NEXT: stp x2, x1, [sp, #8]
+; CHECK-CPA-O3-NEXT: str x0, [sp, #24]
+; CHECK-CPA-O3-NEXT: madd x0, x1, x2, x0
+; CHECK-CPA-O3-NEXT: add sp, sp, #32
+; CHECK-CPA-O3-NEXT: ret
+;
+; CHECK-NOCPA-O0-LABEL: arith1:
+; CHECK-NOCPA-O0: // %bb.0: // %entry
+; CHECK-NOCPA-O0-NEXT: sub sp, sp, #32
+; CHECK-NOCPA-O0-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NOCPA-O0-NEXT: str x0, [sp, #24]
+; CHECK-NOCPA-O0-NEXT: str x1, [sp, #16]
+; CHECK-NOCPA-O0-NEXT: str x2, [sp, #8]
+; CHECK-NOCPA-O0-NEXT: ldr x8, [sp, #24]
+; CHECK-NOCPA-O0-NEXT: ldr x9, [sp, #16]
+; CHECK-NOCPA-O0-NEXT: ldr x10, [sp, #8]
+; CHECK-NOCPA-O0-NEXT: mul x9, x9, x10
+; CHECK-NOCPA-O0-NEXT: add x0, x8, x9
+; CHECK-NOCPA-O0-NEXT: add sp, sp, #32
+; CHECK-NOCPA-O0-NEXT: ret
+;
+; CHECK-NOCPA-O3-LABEL: arith1:
+; CHECK-NOCPA-O3: // %bb.0: // %entry
+; CHECK-NOCPA-O3-NEXT: sub sp, sp, #32
+; CHECK-NOCPA-O3-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NOCPA-O3-NEXT: stp x2, x1, [sp, #8]
+; CHECK-NOCPA-O3-NEXT: str x0, [sp, #24]
+; CHECK-NOCPA-O3-NEXT: madd x0, x1, x2, x0
+; CHECK-NOCPA-O3-NEXT: add sp, sp, #32
+; CHECK-NOCPA-O3-NEXT: ret
+entry:
+ %4 = alloca i64, align 8
+ %5 = alloca i64, align 8
+ %6 = alloca i64, align 8
+ store i64 %0, ptr %4, align 8
+ store i64 %1, ptr %5, align 8
+ store i64 %2, ptr %6, align 8
+ %7 = load i64, ptr %4, align 8
+ %8 = load i64, ptr %5, align 8
+ %9 = load i64, ptr %6, align 8
+ %10 = mul nsw i64 %8, %9
+ %11 = add nsw i64 %7, %10
+ ret i64 %11
+}
+
+define i64 @arith2(ptr noundef %0, i64 noundef %1, i64 noundef %2, i32 noundef %3) {
+; CHECK-CPA-O0-LABEL: arith2:
+; CHECK-CPA-O0: // %bb.0: // %entry
+; CHECK-CPA-O0-NEXT: sub sp, sp, #32
+; CHECK-CPA-O0-NEXT: .cfi_def_cfa_offset 32
+; CHECK-CPA-O0-NEXT: str x0, [sp, #24]
+; CHECK-CPA-O0-NEXT: str x1, [sp, #16]
+; CHECK-CPA-O0-NEXT: str x2, [sp, #8]
+; CHECK-CPA-O0-NEXT: str w3, [sp, #4]
+; CHECK-CPA-O0-NEXT: ldr x10, [sp, #24]
+; CHECK-CPA-O0-NEXT: ldrsw x8, [sp, #4]
+; CHECK-CPA-O0-NEXT: mov w9, #48 // =0x30
+; CHECK-CPA-O0-NEXT: // kill: def $x9 killed $w9
+; CHECK-CPA-O0-NEXT: maddpt x8, x8, x9, x10
+; CHECK-CPA-O0-NEXT: ldr x8, [x8, #24]
+; CHECK-CPA-O0-NEXT: ldr x10, [sp, #16]
+; CHECK-CPA-O0-NEXT: ldr x9, [sp, #8]
+; CHECK-CPA-O0-NEXT: mul x10, x10, x9
+; CHECK-CPA-O0-NEXT: add x8, x8, x10
+; CHECK-CPA-O0-NEXT: subs x0, x8, x9
+; CHECK-CPA-O0-NEXT: add sp, sp, #32
+; CHECK-CPA-O0-NEXT: ret
+;
+; CHECK-CPA-O3-LABEL: arith2:
+; CHECK-CPA-O3: // %bb.0: // %entry
+; CHECK-CPA-O3-NEXT: sub sp, sp, #32
+; CHECK-CPA-O3-NEXT: .cfi_def_cfa_offset 32
+; CHECK-CPA-O3-NEXT: str w3, [sp, #4]
+; CHECK-CPA-O3-NEXT: mov w10, #48 // =0x30
+; CHECK-CPA-O3-NEXT: ldrsw x9, [sp, #4]
+; CHECK-CPA-O3-NEXT: stp x1, x0, [sp, #16]
+; CHECK-CPA-O3-NEXT: maddpt x8, x9, x10, x0
+; CHECK-CPA-O3-NEXT: str x2, [sp, #8]
+; CHECK-CPA-O3-NEXT: ldr x8, [x8, #24]
+; CHECK-CPA-O3-NEXT: madd x8, x1, x2, x8
+; CHECK-CPA-O3-NEXT: sub x0, x8, x2
+; CHECK-CPA-O3-NEXT: add sp, sp, #32
+; CHECK-CPA-O3-NEXT: ret
+;
+; CHECK-NOCPA-O0-LABEL: arith2:
+; CHECK-NOCPA-O0: // %bb.0: // %entry
+; CHECK-NOCPA-O0-NEXT: sub sp, sp, #32
+; CHECK-NOCPA-O0-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NOCPA-O0-NEXT: str x0, [sp, #24]
+; CHECK-NOCPA-O0-NEXT: str x1, [sp, #16]
+; CHECK-NOCPA-O0-NEXT: str x2, [sp, #8]
+; CHECK-NOCPA-O0-NEXT: str w3, [sp, #4]
+; CHECK-NOCPA-O0-NEXT: ldr x8, [sp, #24]
+; CHECK-NOCPA-O0-NEXT: ldrsw x9, [sp, #4]
+; CHECK-NOCPA-O0-NEXT: mov w10, #48 // =0x30
+; CHECK-NOCPA-O0-NEXT: // kill: def $x10 killed $w10
+; CHECK-NOCPA-O0-NEXT: mul x9, x9, x10
+; CHECK-NOCPA-O0-NEXT: add x8, x8, x9
+; CHECK-NOCPA-O0-NEXT: ldr x8, [x8, #24]
+; CHECK-NOCPA-O0-NEXT: ldr x10, [sp, #16]
+; CHECK-NOCPA-O0-NEXT: ldr x9, [sp, #8]
+; CHECK-NOCPA-O0-NEXT: mul x10, x10, x9
+; CHECK-NOCPA-O0-NEXT: add x8, x8, x10
+; CHECK-NOCPA-O0-NEXT: subs x0, x8, x9
+; CHECK-NOCPA-O0-NEXT: add sp, sp, #32
+; CHECK-NOCPA-O0-NEXT: ret
+;
+; CHECK-NOCPA-O3-LABEL: arith2:
+; CHECK-NOCPA-O3: // %bb.0: // %entry
+; CHECK-NOCPA-O3-NEXT: sub sp, sp, #32
+; CHECK-NOCPA-O3-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NOCPA-O3-NEXT: str w3, [sp, #4]
+; CHECK-NOCPA-O3-NEXT: mov w9, #48 // =0x30
+; CHECK-NOCPA-O3-NEXT: ldrsw x8, [sp, #4]
+; CHECK-NOCPA-O3-NEXT: stp x1, x0, [sp, #16]
+; CHECK-NOCPA-O3-NEXT: str x2, [sp, #8]
+; CHECK-NOCPA-O3-NEXT: madd x8, x8, x9, x0
+; CHECK-NOCPA-O3-NEXT: ldr x8, [x8, #24]
+; CHECK-NOCPA-O3-NEXT: madd x8, x1, x2, x8
+; CHECK-NOCPA-O3-NEXT: sub x0, x8, x2
+; CHECK-NOCPA-O3-NEXT: add sp, sp, #32
+; CHECK-NOCPA-O3-NEXT: ret
+entry:
+ %5 = alloca ptr, align 8
+ %6 = alloca i64, align 8
+ %7 = alloca i64, align 8
+ %8 = alloca i32, align 4
+ store ptr %0, ptr %5, align 8
+ store i64 %1, ptr %6, align 8
+ store i64 %2, ptr %7, align 8
+ store i32 %3, ptr %8, align 4
+ %9 = load ptr, ptr %5, align 8
+ %10 = load i32, ptr %8, align 4
+ %11 = sext i32 %10 to i64
+ %12 = getelementptr inbounds %struct.my_type2, ptr %9, i64 %11
+ %13 = getelementptr inbounds %struct.my_type2, ptr %12, i32 0, i32 3
+ %14 = load i64, ptr %13, align 8
+ %15 = load i64, ptr %6, align 8
+ %16 = load i64, ptr %7, align 8
+ %17 = mul nsw i64 %15, %16
+ %18 = add nsw i64 %14, %17
+ %19 = sub nsw i64 %18, %16
+ ret i64 %19
+}
+
+ at a = hidden global [2 x [1 x [2 x i8]]] [[1 x [2 x i8]] [[2 x i8] c"\01\01"], [1 x [2 x i8]] [[2 x i8] c"\01\01"]], align 1
+ at b = hidden global i16 0, align 2
+
+define hidden void @multidim() {
+; CHECK-CPA-O0-LABEL: multidim:
+; CHECK-CPA-O0: // %bb.0: // %entry
+; CHECK-CPA-O0-NEXT: adrp x8, b
+; CHECK-CPA-O0-NEXT: ldrh w9, [x8, :lo12:b]
+; CHECK-CPA-O0-NEXT: mov w10, w9
+; CHECK-CPA-O0-NEXT: ldrh w8, [x8, :lo12:b]
+; CHECK-CPA-O0-NEXT: add w9, w8, #1
+; CHECK-CPA-O0-NEXT: // implicit-def: $x8
+; CHECK-CPA-O0-NEXT: mov w8, w9
+; CHECK-CPA-O0-NEXT: sxtw x9, w8
+; CHECK-CPA-O0-NEXT: mov w8, #2 // =0x2
+; CHECK-CPA-O0-NEXT: mov w11, w8
+; CHECK-CPA-O0-NEXT: adrp x8, a
+; CHECK-CPA-O0-NEXT: add x8, x8, :lo12:a
+; CHECK-CPA-O0-NEXT: addpt x8, x8, x11
+; CHECK-CPA-O0-NEXT: addpt x8, x8, x10, lsl #1
+; CHECK-CPA-O0-NEXT: addpt x8, x8, x9
+; CHECK-CPA-O0-NEXT: ldrb w8, [x8]
+; CHECK-CPA-O0-NEXT: cbz w8, .LBB14_2
+; CHECK-CPA-O0-NEXT: b .LBB14_1
+; CHECK-CPA-O0-NEXT: .LBB14_1: // %if.then
+; CHECK-CPA-O0-NEXT: b .LBB14_2
+; CHECK-CPA-O0-NEXT: .LBB14_2: // %if.end
+; CHECK-CPA-O0-NEXT: ret
+;
+; CHECK-CPA-O3-LABEL: multidim:
+; CHECK-CPA-O3: // %bb.0: // %entry
----------------
davemgreen wrote:
Is it worth adding a store or call to this example to make sure it doesn't all optimize away.
https://github.com/llvm/llvm-project/pull/105669
More information about the llvm-commits
mailing list