[llvm] [X86][SimplifyCFG] Support hoisting load/store with conditional faulting (PR #96878)
Shengchen Kan via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 3 00:48:24 PDT 2024
================
@@ -0,0 +1,581 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -mtriple=x86_64 -mattr=+cf -passes=simplifycfg -simplifycfg-require-and-preserve-domtree=1 -S -simplifycfg-hoist-loads-stores-with-cond-faulting=true | FileCheck %s
+
+;; Redundant bitcast will be opimized out in instcombine pass.
+define void @basic(i32 %a, ptr %b, ptr %p, ptr %q) {
+; CHECK-LABEL: @basic(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[A:%.*]], 0
+; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND]] to <1 x i1>
+; CHECK-NEXT: [[TMP1:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[B:%.*]], i32 4, <1 x i1> [[TMP0]], <1 x i32> poison), !dbg [[DBG8:![0-9]+]]
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i32> [[TMP1]] to i32
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to <1 x i32>
+; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP3]], ptr [[P:%.*]], i32 4, <1 x i1> [[TMP0]])
+; CHECK-NEXT: [[TMP4:%.*]] = xor i1 [[COND]], true
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast i1 [[TMP4]] to <1 x i1>
+; CHECK-NEXT: call void @llvm.masked.store.v1i64.p0(<1 x i64> <i64 1>, ptr [[P]], i32 8, <1 x i1> [[TMP5]]), !dbg [[DBG12:![0-9]+]]
+; CHECK-NEXT: call void @llvm.masked.store.v1i16.p0(<1 x i16> <i16 2>, ptr [[Q:%.*]], i32 8, <1 x i1> [[TMP5]]), !dbg [[DBG12]]
+; CHECK-NEXT: ret void
+;
+entry:
+ %cond = icmp eq i32 %a, 0
+ br i1 %cond, label %if.true, label %if.false
+
+if.false:
+ store i64 1, ptr %p, align 8, !dbg !8
+ store i16 2, ptr %q, align 8, !dbg !8
+ br label %if.end
+
+if.true:
+ %0 = load i32, ptr %b, align 4, !dbg !9
+ store i32 %0, ptr %p, align 4
+ br label %if.end
+
+if.end:
+ ret void
+}
+
+;; simplifycfg is run before sroa. alloca here is not optimized away yet.
+define void @alloca(ptr %p, ptr %q, i32 %a) {
+; CHECK-LABEL: @alloca(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[P_ADDR:%.*]] = alloca ptr, align 8
+; CHECK-NEXT: [[Q_ADDR:%.*]] = alloca ptr, align 8
+; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: store ptr [[P:%.*]], ptr [[P_ADDR]], align 8
+; CHECK-NEXT: store ptr [[Q:%.*]], ptr [[Q_ADDR]], align 8
+; CHECK-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR]], align 4
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast i1 [[TOBOOL]] to <1 x i1>
+; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[Q_ADDR]], align 8
+; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[TMP2]], i32 4, <1 x i1> [[TMP1]], <1 x i32> poison)
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i32> [[TMP3]] to i32
+; CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[P_ADDR]], align 8
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP4]] to <1 x i32>
+; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP6]], ptr [[TMP5]], i32 4, <1 x i1> [[TMP1]])
+; CHECK-NEXT: ret void
+;
+entry:
+ %p.addr = alloca ptr
+ %q.addr = alloca ptr
+ %a.addr = alloca i32
+ store ptr %p, ptr %p.addr
+ store ptr %q, ptr %q.addr
+ store i32 %a, ptr %a.addr
+ %0 = load i32, ptr %a.addr
+ %tobool = icmp ne i32 %0, 0
+ br i1 %tobool, label %if.then, label %if.end
+
+if.then:
+ %1 = load ptr, ptr %q.addr
+ %2 = load i32, ptr %1
+ %3 = load ptr, ptr %p.addr
+ store i32 %2, ptr %3
+ br label %if.end
+
+if.end:
+ ret void
+}
+
+;; successor 1 branches to successor 0.
+define void @succ1to0(ptr %p, ptr %q, i32 %a) {
+; CHECK-LABEL: @succ1to0(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[A:%.*]], 0
+; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[TOBOOL]] to <1 x i1>
+; CHECK-NEXT: [[TMP1:%.*]] = xor i1 [[TOBOOL]], true
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast i1 [[TMP1]] to <1 x i1>
+; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[Q:%.*]], i32 4, <1 x i1> [[TMP2]], <1 x i32> poison)
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i32> [[TMP3]] to i32
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to <1 x i32>
+; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP5]], ptr [[P:%.*]], i32 4, <1 x i1> [[TMP2]])
+; CHECK-NEXT: ret void
+;
+entry:
+ %tobool = icmp ne i32 %a, 0
+ br i1 %tobool, label %if.end, label %if.then
+
+if.end:
+ ret void
+
+if.then:
+ %0 = load i32, ptr %q
+ store i32 %0, ptr %p
+ br label %if.end
+}
+
+define i32 @succ1to0_phi(ptr %p) {
+; CHECK-LABEL: @succ1to0_phi(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[COND:%.*]] = icmp eq ptr [[P:%.*]], null
+; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND]] to <1 x i1>
+; CHECK-NEXT: [[TMP1:%.*]] = xor i1 [[COND]], true
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast i1 [[TMP1]] to <1 x i1>
+; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[P]], i32 4, <1 x i1> [[TMP2]], <1 x i32> poison)
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i32> [[TMP3]] to i32
+; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[COND]], i32 0, i32 [[TMP4]]
+; CHECK-NEXT: ret i32 [[SPEC_SELECT]]
+;
+entry:
+ %cond = icmp eq ptr %p, null
+ br i1 %cond, label %if.true, label %if.false
+
+if.false:
+ %0 = load i32, ptr %p
+ br label %if.true
+
+if.true:
+ %res = phi i32 [ %0, %if.false ], [ 0, %entry ]
+ ret i32 %res
+}
+
+;; successor 0 branches to successor 1.
+define void @succ0to1(i32 %a, ptr %b, ptr %p, ptr %q) {
+; CHECK-LABEL: @succ0to1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[A:%.*]], 0
+; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND]] to <1 x i1>
+; CHECK-NEXT: [[TMP1:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[B:%.*]], i32 4, <1 x i1> [[TMP0]], <1 x i32> poison)
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i32> [[TMP1]] to i32
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to <1 x i32>
+; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP3]], ptr [[P:%.*]], i32 4, <1 x i1> [[TMP0]])
+; CHECK-NEXT: store i32 1, ptr [[Q:%.*]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %cond = icmp eq i32 %a, 0
+ br i1 %cond, label %if.true, label %if.false
+
+if.false:
+ store i32 1, ptr %q
+ br label %if.end
+
+if.true:
+ %0 = load i32, ptr %b
+ store i32 %0, ptr %p
+ br label %if.false
+
+if.end:
+ ret void
+}
+
+;; load after store can be hoisted.
+define i64 @load_after_store(i32 %a, ptr %b, ptr %p, ptr %q) {
+; CHECK-LABEL: @load_after_store(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[A:%.*]], 0
+; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND]] to <1 x i1>
+; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> <i32 1>, ptr [[B:%.*]], i32 4, <1 x i1> [[TMP0]])
+; CHECK-NEXT: [[TMP1:%.*]] = call <1 x i16> @llvm.masked.load.v1i16.p0(ptr [[P:%.*]], i32 2, <1 x i1> [[TMP0]], <1 x i16> poison)
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i16> [[TMP1]] to i16
+; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr [[Q:%.*]], i32 8, <1 x i1> [[TMP0]], <1 x i64> poison)
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to i64
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i16 [[TMP2]] to i64
+; CHECK-NEXT: [[ADD:%.*]] = add i64 [[ZEXT]], [[TMP4]]
+; CHECK-NEXT: [[COMMON_RET_OP:%.*]] = select i1 [[COND]], i64 [[ADD]], i64 0
+; CHECK-NEXT: ret i64 [[COMMON_RET_OP]]
+;
+entry:
+ %cond = icmp eq i32 %a, 0
+ br i1 %cond, label %if.true, label %if.end
+
+if.true:
+ store i32 1, ptr %b
+ %0 = load i16, ptr %p
+ %1 = load i64, ptr %q
+ %zext = zext i16 %0 to i64
+ %add = add i64 %zext, %1
+ ret i64 %add
+
+if.end:
+ ret i64 0
+}
+
+define i32 @load_skip_speculatable_memory_read(i32 %a, ptr %b, ptr %p, ptr %q) {
+; CHECK-LABEL: @load_skip_speculatable_memory_read(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[A:%.*]], 0
+; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND]] to <1 x i1>
+; CHECK-NEXT: [[TMP1:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[B:%.*]], i32 4, <1 x i1> [[TMP0]], <1 x i32> poison)
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i32> [[TMP1]] to i32
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to <1 x i32>
+; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP3]], ptr [[P:%.*]], i32 4, <1 x i1> [[TMP0]])
+; CHECK-NEXT: [[TMP4:%.*]] = xor i1 [[COND]], true
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast i1 [[TMP4]] to <1 x i1>
+; CHECK-NEXT: [[TMP6:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[Q:%.*]], i32 4, <1 x i1> [[TMP5]], <1 x i32> poison)
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <1 x i32> [[TMP6]] to i32
+; CHECK-NEXT: [[READ:%.*]] = call i32 @read_memory_only()
+; CHECK-NEXT: [[PHI:%.*]] = select i1 [[COND]], i32 0, i32 [[READ]]
+; CHECK-NEXT: ret i32 [[PHI]]
+;
+entry:
+ %cond = icmp eq i32 %a, 0
+ br i1 %cond, label %if.true, label %if.false
+
+if.false:
+ %read = call i32 @read_memory_only()
+ %0 = load i32, ptr %q
+ br label %if.end
+
+if.true:
+ %1 = load i32, ptr %b
+ store i32 %1, ptr %p
+ br label %if.end
+
+if.end:
+ %phi = phi i32 [%read, %if.false], [0, %if.true]
+ ret i32 %phi
+}
+
+define i32 @expensive_to_hoist(i32 %a, ptr %b, ptr %p, ptr %q, i32 %v0, i32 %v1, i32 %v2, i1 %cc) {
+; CHECK-LABEL: @expensive_to_hoist(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[A:%.*]], 0
+; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND]] to <1 x i1>
+; CHECK-NEXT: [[TMP1:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[B:%.*]], i32 4, <1 x i1> [[TMP0]], <1 x i32> poison)
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i32> [[TMP1]] to i32
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to <1 x i32>
+; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP3]], ptr [[P:%.*]], i32 4, <1 x i1> [[TMP0]])
+; CHECK-NEXT: [[TMP4:%.*]] = xor i1 [[COND]], true
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast i1 [[TMP4]] to <1 x i1>
+; CHECK-NEXT: call void @llvm.masked.store.v1i64.p0(<1 x i64> <i64 1>, ptr [[P]], i32 8, <1 x i1> [[TMP5]])
+; CHECK-NEXT: call void @llvm.masked.store.v1i16.p0(<1 x i16> <i16 2>, ptr [[Q:%.*]], i32 2, <1 x i1> [[TMP5]])
+; CHECK-NEXT: br i1 [[COND]], label [[COMMON_RET:%.*]], label [[IF_FALSE:%.*]]
+; CHECK: common.ret:
+; CHECK-NEXT: [[COMMON_RET_OP:%.*]] = phi i32 [ [[VVVV:%.*]], [[IF_FALSE]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: ret i32 [[COMMON_RET_OP]]
+; CHECK: if.false:
+; CHECK-NEXT: [[V:%.*]] = udiv i32 [[A]], 12345
+; CHECK-NEXT: [[VV:%.*]] = mul i32 [[V]], [[V0:%.*]]
+; CHECK-NEXT: [[VVV:%.*]] = mul i32 [[VV]], [[V1:%.*]]
+; CHECK-NEXT: [[VVVV]] = select i1 [[CC:%.*]], i32 [[V2:%.*]], i32 [[VVV]]
+; CHECK-NEXT: br label [[COMMON_RET]]
+;
+entry:
+ %cond = icmp eq i32 %a, 0
+ br i1 %cond, label %if.true, label %if.false
+
+if.false:
+ store i64 1, ptr %p
+ store i16 2, ptr %q
+
+ %v = udiv i32 %a, 12345
+ %vv = mul i32 %v, %v0
+ %vvv = mul i32 %vv, %v1
+ %vvvv = select i1 %cc, i32 %v2, i32 %vvv
+ ret i32 %vvvv
+
+if.true:
+ %0 = load i32, ptr %b
+ store i32 %0, ptr %p
+ br label %if.end
+
+if.end:
+ ret i32 0
+}
----------------
KanRobert wrote:
Sure. Working on you previous comment https://github.com/llvm/llvm-project/pull/96878#discussion_r1656831523. IIUC, the request is same as before.
https://github.com/llvm/llvm-project/pull/96878
More information about the llvm-commits
mailing list