[clang] [InstCombine] Add combines/simplifications for `llvm.ptrmask` (PR #67166)

via cfe-commits cfe-commits at lists.llvm.org
Fri Oct 27 13:08:50 PDT 2023


https://github.com/goldsteinn updated https://github.com/llvm/llvm-project/pull/67166

>From 24c65907e7e6792e2d5395102992ce3cdb42fb6d Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Fri, 22 Sep 2023 08:21:21 -0500
Subject: [PATCH 1/7] [InstSimplify] Add tests for simplify `llvm.ptrmask`; NFC

Differential Revision: https://reviews.llvm.org/D156632
---
 llvm/test/Transforms/InstSimplify/ptrmask.ll | 366 +++++++++++++++++++
 1 file changed, 366 insertions(+)
 create mode 100644 llvm/test/Transforms/InstSimplify/ptrmask.ll

diff --git a/llvm/test/Transforms/InstSimplify/ptrmask.ll b/llvm/test/Transforms/InstSimplify/ptrmask.ll
new file mode 100644
index 000000000000000..4945d4bc0edf625
--- /dev/null
+++ b/llvm/test/Transforms/InstSimplify/ptrmask.ll
@@ -0,0 +1,366 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt < %s -passes=instsimplify -S | FileCheck %s
+
+target datalayout = "p1:64:64:64:32"
+
+declare ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) , i32)
+declare ptr @llvm.ptrmask.p0.i64(ptr, i64)
+
+declare <2 x ptr addrspace(1) > @llvm.ptrmask.v2p1.v2i32(<2 x ptr addrspace(1) >, <2 x i32>)
+declare <2 x ptr> @llvm.ptrmask.v2p1.v2i64(<2 x ptr>, <2 x i64>)
+
+define ptr @ptrmask_simplify_poison_mask(ptr %p) {
+; CHECK-LABEL: define ptr @ptrmask_simplify_poison_mask
+; CHECK-SAME: (ptr [[P:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[P]], i64 poison)
+; CHECK-NEXT:    ret ptr [[R]]
+;
+  %r = call ptr @llvm.ptrmask.p0.i64(ptr %p, i64 poison)
+  ret ptr %r
+}
+
+define <2 x ptr addrspace(1) > @ptrmask_simplify_poison_mask_vec(<2 x ptr addrspace(1) > %p) {
+; CHECK-LABEL: define <2 x ptr addrspace(1)> @ptrmask_simplify_poison_mask_vec
+; CHECK-SAME: (<2 x ptr addrspace(1)> [[P:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = call <2 x ptr addrspace(1)> @llvm.ptrmask.v2p1.v2i32(<2 x ptr addrspace(1)> [[P]], <2 x i32> poison)
+; CHECK-NEXT:    ret <2 x ptr addrspace(1)> [[R]]
+;
+  %r = call <2 x ptr addrspace(1) > @llvm.ptrmask.v2p1.v2i32(<2 x ptr addrspace(1) > %p, <2 x i32> poison)
+  ret <2 x ptr addrspace(1) > %r
+}
+
+define <2 x ptr addrspace(1) > @ptrmask_simplify_poison_and_zero_i32_vec_fail(<2 x ptr addrspace(1) > %p) {
+; CHECK-LABEL: define <2 x ptr addrspace(1)> @ptrmask_simplify_poison_and_zero_i32_vec_fail
+; CHECK-SAME: (<2 x ptr addrspace(1)> [[P:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = call <2 x ptr addrspace(1)> @llvm.ptrmask.v2p1.v2i32(<2 x ptr addrspace(1)> [[P]], <2 x i32> <i32 undef, i32 0>)
+; CHECK-NEXT:    ret <2 x ptr addrspace(1)> [[R]]
+;
+  %r = call <2 x ptr addrspace(1) > @llvm.ptrmask.v2p1.v2i32(<2 x ptr addrspace(1) > %p, <2 x i32> <i32 undef, i32 0>)
+  ret <2 x ptr addrspace(1) > %r
+}
+
+define <2 x ptr> @ptrmask_simplify_undef_and_ones_vec(<2 x ptr> %p) {
+; CHECK-LABEL: define <2 x ptr> @ptrmask_simplify_undef_and_ones_vec
+; CHECK-SAME: (<2 x ptr> [[P:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> <i64 undef, i64 -1>)
+; CHECK-NEXT:    ret <2 x ptr> [[R]]
+;
+  %r = call <2 x ptr> @llvm.ptrmask.v2p1.v2i64(<2 x ptr> %p, <2 x i64> <i64 undef, i64 -1>)
+  ret <2 x ptr> %r
+}
+
+define <2 x ptr> @ptrmask_simplify_poison_and_ones_vec(<2 x ptr> %p) {
+; CHECK-LABEL: define <2 x ptr> @ptrmask_simplify_poison_and_ones_vec
+; CHECK-SAME: (<2 x ptr> [[P:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> <i64 poison, i64 -1>)
+; CHECK-NEXT:    ret <2 x ptr> [[R]]
+;
+  %r = call <2 x ptr> @llvm.ptrmask.v2p1.v2i64(<2 x ptr> %p, <2 x i64> <i64 poison, i64 -1>)
+  ret <2 x ptr> %r
+}
+
+define <2 x ptr> @ptrmask_simplify_ones_vec(<2 x ptr> %p) {
+; CHECK-LABEL: define <2 x ptr> @ptrmask_simplify_ones_vec
+; CHECK-SAME: (<2 x ptr> [[P:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> <i64 -1, i64 -1>)
+; CHECK-NEXT:    ret <2 x ptr> [[R]]
+;
+  %r = call <2 x ptr> @llvm.ptrmask.v2p1.v2i64(<2 x ptr> %p, <2 x i64> <i64 -1, i64 -1>)
+  ret <2 x ptr> %r
+}
+
+define <2 x ptr addrspace(1) > @ptrmask_simplify_ones_i32_vec(<2 x ptr addrspace(1) > %p) {
+; CHECK-LABEL: define <2 x ptr addrspace(1)> @ptrmask_simplify_ones_i32_vec
+; CHECK-SAME: (<2 x ptr addrspace(1)> [[P:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = call <2 x ptr addrspace(1)> @llvm.ptrmask.v2p1.v2i32(<2 x ptr addrspace(1)> [[P]], <2 x i32> <i32 -1, i32 -1>)
+; CHECK-NEXT:    ret <2 x ptr addrspace(1)> [[R]]
+;
+  %r = call <2 x ptr addrspace(1) > @llvm.ptrmask.v2p1.v2i32(<2 x ptr addrspace(1) > %p, <2 x i32> <i32 -1, i32 -1>)
+  ret <2 x ptr addrspace(1) > %r
+}
+
+define ptr addrspace(1) @ptrmask_simplify_undef_mask(ptr addrspace(1) %p) {
+; CHECK-LABEL: define ptr addrspace(1) @ptrmask_simplify_undef_mask
+; CHECK-SAME: (ptr addrspace(1) [[P:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) [[P]], i32 undef)
+; CHECK-NEXT:    ret ptr addrspace(1) [[R]]
+;
+  %r = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) %p, i32 undef)
+  ret ptr addrspace(1) %r
+}
+
+define ptr @ptrmask_simplify_0_mask(ptr %p) {
+; CHECK-LABEL: define ptr @ptrmask_simplify_0_mask
+; CHECK-SAME: (ptr [[P:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[P]], i64 0)
+; CHECK-NEXT:    ret ptr [[R]]
+;
+  %r = call ptr @llvm.ptrmask.p0.i64(ptr %p, i64 0)
+  ret ptr %r
+}
+
+define ptr @ptrmask_simplify_1s_mask(ptr %p) {
+; CHECK-LABEL: define ptr @ptrmask_simplify_1s_mask
+; CHECK-SAME: (ptr [[P:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[P]], i64 -1)
+; CHECK-NEXT:    ret ptr [[R]]
+;
+  %r = call ptr @llvm.ptrmask.p0.i64(ptr %p, i64 -1)
+  ret ptr %r
+}
+
+define ptr addrspace(1) @ptrmask_simplify_1s_mask_i32(ptr addrspace(1) %p) {
+; CHECK-LABEL: define ptr addrspace(1) @ptrmask_simplify_1s_mask_i32
+; CHECK-SAME: (ptr addrspace(1) [[P:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) [[P]], i32 -1)
+; CHECK-NEXT:    ret ptr addrspace(1) [[R]]
+;
+  %r = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) %p, i32 -1)
+  ret ptr addrspace(1) %r
+}
+
+define ptr @ptrmask_simplify_poison_ptr(i64 %m) {
+; CHECK-LABEL: define ptr @ptrmask_simplify_poison_ptr
+; CHECK-SAME: (i64 [[M:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr poison, i64 [[M]])
+; CHECK-NEXT:    ret ptr [[R]]
+;
+  %r = call ptr @llvm.ptrmask.p0.i64(ptr poison, i64 %m)
+  ret ptr %r
+}
+
+define ptr addrspace(1) @ptrmask_simplify_undef_ptr(i32 %m) {
+; CHECK-LABEL: define ptr addrspace(1) @ptrmask_simplify_undef_ptr
+; CHECK-SAME: (i32 [[M:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) undef, i32 [[M]])
+; CHECK-NEXT:    ret ptr addrspace(1) [[R]]
+;
+  %r = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) undef, i32 %m)
+  ret ptr addrspace(1) %r
+}
+
+define ptr @ptrmask_simplify_null_ptr(i64 %m) {
+; CHECK-LABEL: define ptr @ptrmask_simplify_null_ptr
+; CHECK-SAME: (i64 [[M:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr null, i64 [[M]])
+; CHECK-NEXT:    ret ptr [[R]]
+;
+  %r = call ptr @llvm.ptrmask.p0.i64(ptr null, i64 %m)
+  ret ptr %r
+}
+
+define ptr @ptrmask_simplify_ptrmask(ptr %p) {
+; CHECK-LABEL: define ptr @ptrmask_simplify_ptrmask
+; CHECK-SAME: (ptr [[P:%.*]]) {
+; CHECK-NEXT:    [[M:%.*]] = ptrtoint ptr [[P]] to i64
+; CHECK-NEXT:    [[R:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[P]], i64 [[M]])
+; CHECK-NEXT:    ret ptr [[R]]
+;
+  %m = ptrtoint ptr %p to i64
+  %r = call ptr @llvm.ptrmask.p0.i64(ptr %p, i64 %m)
+  ret ptr %r
+}
+
+define ptr addrspace(1) @ptrmask_simplify_ptrmask_i32(ptr addrspace(1) %p) {
+; CHECK-LABEL: define ptr addrspace(1) @ptrmask_simplify_ptrmask_i32
+; CHECK-SAME: (ptr addrspace(1) [[P:%.*]]) {
+; CHECK-NEXT:    [[M:%.*]] = ptrtoint ptr addrspace(1) [[P]] to i32
+; CHECK-NEXT:    [[R:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) [[P]], i32 [[M]])
+; CHECK-NEXT:    ret ptr addrspace(1) [[R]]
+;
+  %m = ptrtoint ptr addrspace(1) %p to i32
+  %r = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) %p, i32 %m)
+  ret ptr addrspace(1) %r
+}
+
+define ptr @ptrmask_simplify_aligned_unused(ptr align 64 %p) {
+; CHECK-LABEL: define ptr @ptrmask_simplify_aligned_unused
+; CHECK-SAME: (ptr align 64 [[P:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[P]], i64 -64)
+; CHECK-NEXT:    ret ptr [[R]]
+;
+  %r = call ptr @llvm.ptrmask.p0.i64(ptr %p, i64 -64)
+  ret ptr %r
+}
+
+define <2 x ptr> @ptrmask_simplify_aligned_unused_vec(<2 x ptr> align 128 %p) {
+; CHECK-LABEL: define <2 x ptr> @ptrmask_simplify_aligned_unused_vec
+; CHECK-SAME: (<2 x ptr> align 128 [[P:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> <i64 -64, i64 -64>)
+; CHECK-NEXT:    ret <2 x ptr> [[R]]
+;
+  %r = call <2 x ptr> @llvm.ptrmask.v2p1.v2i64(<2 x ptr> %p, <2 x i64> <i64 -64, i64 -64>)
+  ret <2 x ptr> %r
+}
+
+define <2 x ptr> @ptrmask_simplify_aligned_unused_vec_todo(<2 x ptr> align 128 %p) {
+; CHECK-LABEL: define <2 x ptr> @ptrmask_simplify_aligned_unused_vec_todo
+; CHECK-SAME: (<2 x ptr> align 128 [[P:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> <i64 -64, i64 -128>)
+; CHECK-NEXT:    ret <2 x ptr> [[R]]
+;
+  %r = call <2 x ptr> @llvm.ptrmask.v2p1.v2i64(<2 x ptr> %p, <2 x i64> <i64 -64, i64 -128>)
+  ret <2 x ptr> %r
+}
+
+define ptr addrspace(1) @ptrmask_simplify_aligned_unused_i32(ptr addrspace(1) align 64 %p) {
+; CHECK-LABEL: define ptr addrspace(1) @ptrmask_simplify_aligned_unused_i32
+; CHECK-SAME: (ptr addrspace(1) align 64 [[P:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) [[P]], i32 -64)
+; CHECK-NEXT:    ret ptr addrspace(1) [[R]]
+;
+  %r = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) %p, i32 -64)
+  ret ptr addrspace(1) %r
+}
+
+define ptr @ptrmask_simplify_known_unused(ptr %p) {
+; CHECK-LABEL: define ptr @ptrmask_simplify_known_unused
+; CHECK-SAME: (ptr [[P:%.*]]) {
+; CHECK-NEXT:    [[PM0:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[P]], i64 -64)
+; CHECK-NEXT:    [[PGEP:%.*]] = getelementptr i8, ptr [[PM0]], i64 32
+; CHECK-NEXT:    [[R:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PGEP]], i64 -32)
+; CHECK-NEXT:    ret ptr [[R]]
+;
+  %pm0 = call ptr @llvm.ptrmask.p0.i64(ptr %p, i64 -64)
+  %pgep = getelementptr i8, ptr %pm0, i64 32
+  %r = call ptr @llvm.ptrmask.p0.i64(ptr %pgep, i64 -32)
+  ret ptr %r
+}
+
+define <2 x ptr> @ptrmask_simplify_known_unused_vec(<2 x ptr> %p) {
+; CHECK-LABEL: define <2 x ptr> @ptrmask_simplify_known_unused_vec
+; CHECK-SAME: (<2 x ptr> [[P:%.*]]) {
+; CHECK-NEXT:    [[PM0:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> <i64 -64, i64 -64>)
+; CHECK-NEXT:    [[PGEP:%.*]] = getelementptr i8, <2 x ptr> [[PM0]], <2 x i64> <i64 32, i64 32>
+; CHECK-NEXT:    [[R:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[PGEP]], <2 x i64> <i64 -32, i64 -32>)
+; CHECK-NEXT:    ret <2 x ptr> [[R]]
+;
+  %pm0 = call <2 x ptr> @llvm.ptrmask.v2p1.v2i64(<2 x ptr> %p, <2 x i64> <i64 -64, i64 -64>)
+  %pgep = getelementptr i8, <2 x ptr> %pm0, <2 x i64> <i64 32, i64 32>
+  %r = call <2 x ptr> @llvm.ptrmask.v2p1.v2i64(<2 x ptr> %pgep, <2 x i64> <i64 -32, i64 -32>)
+  ret <2 x ptr> %r
+}
+
+define <2 x ptr> @ptrmask_simplify_known_unused_vec2(<2 x ptr> %p) {
+; CHECK-LABEL: define <2 x ptr> @ptrmask_simplify_known_unused_vec2
+; CHECK-SAME: (<2 x ptr> [[P:%.*]]) {
+; CHECK-NEXT:    [[PM0:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> <i64 -64, i64 -64>)
+; CHECK-NEXT:    [[PGEP:%.*]] = getelementptr i8, <2 x ptr> [[PM0]], <2 x i64> <i64 32, i64 32>
+; CHECK-NEXT:    [[R:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[PGEP]], <2 x i64> <i64 -32, i64 -16>)
+; CHECK-NEXT:    ret <2 x ptr> [[R]]
+;
+  %pm0 = call <2 x ptr> @llvm.ptrmask.v2p1.v2i64(<2 x ptr> %p, <2 x i64> <i64 -64, i64 -64>)
+  %pgep = getelementptr i8, <2 x ptr> %pm0, <2 x i64> <i64 32, i64 32>
+  %r = call <2 x ptr> @llvm.ptrmask.v2p1.v2i64(<2 x ptr> %pgep, <2 x i64> <i64 -32, i64 -16>)
+  ret <2 x ptr> %r
+}
+
+define <2 x ptr> @ptrmask_simplify_known_unused_vec3(<2 x ptr> %p) {
+; CHECK-LABEL: define <2 x ptr> @ptrmask_simplify_known_unused_vec3
+; CHECK-SAME: (<2 x ptr> [[P:%.*]]) {
+; CHECK-NEXT:    [[PM0:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> <i64 -64, i64 -128>)
+; CHECK-NEXT:    [[PGEP:%.*]] = getelementptr i8, <2 x ptr> [[PM0]], <2 x i64> <i64 32, i64 32>
+; CHECK-NEXT:    [[R:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[PGEP]], <2 x i64> <i64 -32, i64 -32>)
+; CHECK-NEXT:    ret <2 x ptr> [[R]]
+;
+  %pm0 = call <2 x ptr> @llvm.ptrmask.v2p1.v2i64(<2 x ptr> %p, <2 x i64> <i64 -64, i64 -128>)
+  %pgep = getelementptr i8, <2 x ptr> %pm0, <2 x i64> <i64 32, i64 32>
+  %r = call <2 x ptr> @llvm.ptrmask.v2p1.v2i64(<2 x ptr> %pgep, <2 x i64> <i64 -32, i64 -32>)
+  ret <2 x ptr> %r
+}
+
+define <2 x ptr> @ptrmask_simplify_known_unused_vec4(<2 x ptr> %p) {
+; CHECK-LABEL: define <2 x ptr> @ptrmask_simplify_known_unused_vec4
+; CHECK-SAME: (<2 x ptr> [[P:%.*]]) {
+; CHECK-NEXT:    [[PM0:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> <i64 -64, i64 -128>)
+; CHECK-NEXT:    [[PGEP:%.*]] = getelementptr i8, <2 x ptr> [[PM0]], <2 x i64> <i64 32, i64 64>
+; CHECK-NEXT:    [[R:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[PGEP]], <2 x i64> <i64 -32, i64 -32>)
+; CHECK-NEXT:    ret <2 x ptr> [[R]]
+;
+  %pm0 = call <2 x ptr> @llvm.ptrmask.v2p1.v2i64(<2 x ptr> %p, <2 x i64> <i64 -64, i64 -128>)
+  %pgep = getelementptr i8, <2 x ptr> %pm0, <2 x i64> <i64 32, i64 64>
+  %r = call <2 x ptr> @llvm.ptrmask.v2p1.v2i64(<2 x ptr> %pgep, <2 x i64> <i64 -32, i64 -32>)
+  ret <2 x ptr> %r
+}
+
+define <2 x ptr> @ptrmask_simplify_known_unused_vec_fail(<2 x ptr> %p) {
+; CHECK-LABEL: define <2 x ptr> @ptrmask_simplify_known_unused_vec_fail
+; CHECK-SAME: (<2 x ptr> [[P:%.*]]) {
+; CHECK-NEXT:    [[PM0:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> <i64 -64, i64 -128>)
+; CHECK-NEXT:    [[PGEP:%.*]] = getelementptr i8, <2 x ptr> [[PM0]], <2 x i64> <i64 16, i64 64>
+; CHECK-NEXT:    [[R:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[PGEP]], <2 x i64> <i64 -32, i64 -32>)
+; CHECK-NEXT:    ret <2 x ptr> [[R]]
+;
+  %pm0 = call <2 x ptr> @llvm.ptrmask.v2p1.v2i64(<2 x ptr> %p, <2 x i64> <i64 -64, i64 -128>)
+  %pgep = getelementptr i8, <2 x ptr> %pm0, <2 x i64> <i64 16, i64 64>
+  %r = call <2 x ptr> @llvm.ptrmask.v2p1.v2i64(<2 x ptr> %pgep, <2 x i64> <i64 -32, i64 -32>)
+  ret <2 x ptr> %r
+}
+
+define <2 x ptr> @ptrmask_simplify_known_unused_vec_fail2(<2 x ptr> %p) {
+; CHECK-LABEL: define <2 x ptr> @ptrmask_simplify_known_unused_vec_fail2
+; CHECK-SAME: (<2 x ptr> [[P:%.*]]) {
+; CHECK-NEXT:    [[PM0:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> <i64 -64, i64 -64>)
+; CHECK-NEXT:    [[PGEP:%.*]] = getelementptr i8, <2 x ptr> [[PM0]], <2 x i64> <i64 32, i64 32>
+; CHECK-NEXT:    [[R:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[PGEP]], <2 x i64> <i64 -32, i64 -64>)
+; CHECK-NEXT:    ret <2 x ptr> [[R]]
+;
+  %pm0 = call <2 x ptr> @llvm.ptrmask.v2p1.v2i64(<2 x ptr> %p, <2 x i64> <i64 -64, i64 -64>)
+  %pgep = getelementptr i8, <2 x ptr> %pm0, <2 x i64> <i64 32, i64 32>
+  %r = call <2 x ptr> @llvm.ptrmask.v2p1.v2i64(<2 x ptr> %pgep, <2 x i64> <i64 -32, i64 -64>)
+  ret <2 x ptr> %r
+}
+
+define <2 x ptr> @ptrmask_simplify_known_unused_vec_fail3(<2 x ptr> %p) {
+; CHECK-LABEL: define <2 x ptr> @ptrmask_simplify_known_unused_vec_fail3
+; CHECK-SAME: (<2 x ptr> [[P:%.*]]) {
+; CHECK-NEXT:    [[PM0:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> <i64 -64, i64 -16>)
+; CHECK-NEXT:    [[PGEP:%.*]] = getelementptr i8, <2 x ptr> [[PM0]], <2 x i64> <i64 32, i64 32>
+; CHECK-NEXT:    [[R:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[PGEP]], <2 x i64> <i64 -32, i64 -32>)
+; CHECK-NEXT:    ret <2 x ptr> [[R]]
+;
+  %pm0 = call <2 x ptr> @llvm.ptrmask.v2p1.v2i64(<2 x ptr> %p, <2 x i64> <i64 -64, i64 -16>)
+  %pgep = getelementptr i8, <2 x ptr> %pm0, <2 x i64> <i64 32, i64 32>
+  %r = call <2 x ptr> @llvm.ptrmask.v2p1.v2i64(<2 x ptr> %pgep, <2 x i64> <i64 -32, i64 -32>)
+  ret <2 x ptr> %r
+}
+
+define ptr @ptrmask_maintain_provenance_i64(ptr %p0) {
+; CHECK-LABEL: define ptr @ptrmask_maintain_provenance_i64
+; CHECK-SAME: (ptr [[P0:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[P0]], i64 0)
+; CHECK-NEXT:    ret ptr [[R]]
+;
+  %r = call ptr @llvm.ptrmask.p0.i64(ptr %p0, i64 0)
+  ret ptr %r
+}
+
+define ptr addrspace(1) @ptrmask_maintain_provenance_i32(ptr addrspace(1) %p0) {
+; CHECK-LABEL: define ptr addrspace(1) @ptrmask_maintain_provenance_i32
+; CHECK-SAME: (ptr addrspace(1) [[P0:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) [[P0]], i32 0)
+; CHECK-NEXT:    ret ptr addrspace(1) [[R]]
+;
+  %r = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) %p0, i32 0)
+  ret ptr addrspace(1) %r
+}
+
+define <2 x ptr> @ptrmask_maintain_provenance_v2i64(<2 x ptr> %p0) {
+; CHECK-LABEL: define <2 x ptr> @ptrmask_maintain_provenance_v2i64
+; CHECK-SAME: (<2 x ptr> [[P0:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P0]], <2 x i64> zeroinitializer)
+; CHECK-NEXT:    ret <2 x ptr> [[R]]
+;
+  %r = call <2 x ptr> @llvm.ptrmask.v2p1.v2i64(<2 x ptr> %p0, <2 x i64> zeroinitializer)
+  ret <2 x ptr> %r
+}
+
+define <2 x ptr addrspace(1) > @ptrmask_maintain_provenance_v2i32(<2 x ptr addrspace(1) > %p0) {
+; CHECK-LABEL: define <2 x ptr addrspace(1)> @ptrmask_maintain_provenance_v2i32
+; CHECK-SAME: (<2 x ptr addrspace(1)> [[P0:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = call <2 x ptr addrspace(1)> @llvm.ptrmask.v2p1.v2i32(<2 x ptr addrspace(1)> [[P0]], <2 x i32> zeroinitializer)
+; CHECK-NEXT:    ret <2 x ptr addrspace(1)> [[R]]
+;
+  %r = call <2 x ptr addrspace(1) > @llvm.ptrmask.v2p1.v2i32(<2 x ptr addrspace(1) > %p0, <2 x i32> zeroinitializer)
+  ret <2 x ptr addrspace(1) > %r
+}

>From 80404fba7c26986b19f26aee74cc6ae38c4fd6b2 Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Fri, 22 Sep 2023 08:21:27 -0500
Subject: [PATCH 2/7] [InstSimplify] Add some basic simplifications for
 `llvm.ptrmask`

Mostly the same as `and`. We also have a check for a useless
`llvm.ptrmask` if the ptr is already known aligned.

Differential Revision: https://reviews.llvm.org/D156633
---
 llvm/lib/Analysis/InstructionSimplify.cpp     | 38 +++++++++++
 .../InferAddressSpaces/AMDGPU/ptrmask.ll      |  5 +-
 .../test/Transforms/InstCombine/align-addr.ll |  8 +--
 llvm/test/Transforms/InstSimplify/ptrmask.ll  | 65 ++++++-------------
 4 files changed, 63 insertions(+), 53 deletions(-)

diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index f0e60c9a2dac6fd..d1147c2134b6873 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -6411,6 +6411,44 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1,
       return Constant::getNullValue(ReturnType);
     break;
   }
+  case Intrinsic::ptrmask: {
+    if (isa<PoisonValue>(Op0) || isa<PoisonValue>(Op1))
+      return PoisonValue::get(Op0->getType());
+
+    // NOTE: We can't apply this simplifications based on the value of Op1
+    // because we need to preserve provenance.
+    if (Q.isUndefValue(Op0) || match(Op0, m_Zero()))
+      return Constant::getNullValue(Op0->getType());
+
+    assert(Op1->getType()->getScalarSizeInBits() ==
+               Q.DL.getIndexTypeSizeInBits(Op0->getType()) &&
+           "Invalid mask width");
+    // If index-width (mask size) is less than pointer-size then mask is
+    // 1-extended.
+    if (match(Op1, m_PtrToInt(m_Specific(Op0))))
+      return Op0;
+
+    // NOTE: We may have attributes associated with the return value of the
+    // llvm.ptrmask intrinsic that will be lost when we just return the
+    // operand. We should try to preserve them.
+    if (match(Op1, m_AllOnes()) || Q.isUndefValue(Op1))
+      return Op0;
+
+    Constant *C;
+    if (match(Op1, m_ImmConstant(C))) {
+      KnownBits PtrKnown = computeKnownBits(Op0, /*Depth=*/0, Q);
+      // See if we only masking off bits we know are already zero due to
+      // alignment.
+      APInt IrrelivantPtrBits =
+          PtrKnown.Zero.zextOrTrunc(C->getType()->getScalarSizeInBits());
+      C = ConstantFoldBinaryOpOperands(
+          Instruction::Or, C, ConstantInt::get(C->getType(), IrrelivantPtrBits),
+          Q.DL);
+      if (C != nullptr && C->isAllOnesValue())
+        return Op0;
+    }
+    break;
+  }
   case Intrinsic::smax:
   case Intrinsic::smin:
   case Intrinsic::umax:
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/ptrmask.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/ptrmask.ll
index 280630468c2c09a..857132a0d10fca0 100644
--- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/ptrmask.ll
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/ptrmask.ll
@@ -320,8 +320,7 @@ define i8 @ptrmask_cast_local_to_flat_const_mask_fffffffffffffffe(ptr addrspace(
 
 define i8 @ptrmask_cast_local_to_flat_const_mask_ffffffffffffffff(ptr addrspace(3) %src.ptr) {
 ; CHECK-LABEL: @ptrmask_cast_local_to_flat_const_mask_ffffffffffffffff(
-; CHECK-NEXT:    [[TMP1:%.*]] = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) [[SRC_PTR:%.*]], i32 -1)
-; CHECK-NEXT:    [[LOAD:%.*]] = load i8, ptr addrspace(3) [[TMP1]], align 1
+; CHECK-NEXT:    [[LOAD:%.*]] = load i8, ptr addrspace(3) [[SRC_PTR:%.*]], align 1
 ; CHECK-NEXT:    ret i8 [[LOAD]]
 ;
   %cast = addrspacecast ptr addrspace(3) %src.ptr to ptr
@@ -333,7 +332,7 @@ define i8 @ptrmask_cast_local_to_flat_const_mask_ffffffffffffffff(ptr addrspace(
 ; Make sure non-constant masks can also be handled.
 define i8 @ptrmask_cast_local_to_flat_load_range_mask(ptr addrspace(3) %src.ptr, ptr addrspace(1) %mask.ptr) {
 ; CHECK-LABEL: @ptrmask_cast_local_to_flat_load_range_mask(
-; CHECK-NEXT:    [[LOAD_MASK:%.*]] = load i64, ptr addrspace(1) [[MASK_PTR:%.*]], align 8, !range !0
+; CHECK-NEXT:    [[LOAD_MASK:%.*]] = load i64, ptr addrspace(1) [[MASK_PTR:%.*]], align 8, !range [[RNG0:![0-9]+]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[LOAD_MASK]] to i32
 ; CHECK-NEXT:    [[TMP2:%.*]] = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) [[SRC_PTR:%.*]], i32 [[TMP1]])
 ; CHECK-NEXT:    [[LOAD:%.*]] = load i8, ptr addrspace(3) [[TMP2]], align 1
diff --git a/llvm/test/Transforms/InstCombine/align-addr.ll b/llvm/test/Transforms/InstCombine/align-addr.ll
index ec8e7c9348f1749..1e49cddf7ffe79d 100644
--- a/llvm/test/Transforms/InstCombine/align-addr.ll
+++ b/llvm/test/Transforms/InstCombine/align-addr.ll
@@ -181,11 +181,9 @@ define <16 x i8> @ptrmask_align8_ptr_align1(ptr align 1 %ptr) {
 }
 
 ; Underlying alignment already the same as forced alignment by ptrmask
-; TODO: Should be able to drop the ptrmask
 define <16 x i8> @ptrmask_align8_ptr_align8(ptr align 8 %ptr) {
 ; CHECK-LABEL: @ptrmask_align8_ptr_align8(
-; CHECK-NEXT:    [[ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -8)
-; CHECK-NEXT:    [[LOAD:%.*]] = load <16 x i8>, ptr [[ALIGNED]], align 1
+; CHECK-NEXT:    [[LOAD:%.*]] = load <16 x i8>, ptr [[PTR:%.*]], align 1
 ; CHECK-NEXT:    ret <16 x i8> [[LOAD]]
 ;
   %aligned = call ptr @llvm.ptrmask.p0.i64(ptr %ptr, i64 -8)
@@ -194,11 +192,9 @@ define <16 x i8> @ptrmask_align8_ptr_align8(ptr align 8 %ptr) {
 }
 
 ; Underlying alignment greater than alignment forced by ptrmask
-; TODO: Should be able to drop the ptrmask
 define <16 x i8> @ptrmask_align8_ptr_align16(ptr align 16 %ptr) {
 ; CHECK-LABEL: @ptrmask_align8_ptr_align16(
-; CHECK-NEXT:    [[ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -8)
-; CHECK-NEXT:    [[LOAD:%.*]] = load <16 x i8>, ptr [[ALIGNED]], align 1
+; CHECK-NEXT:    [[LOAD:%.*]] = load <16 x i8>, ptr [[PTR:%.*]], align 1
 ; CHECK-NEXT:    ret <16 x i8> [[LOAD]]
 ;
   %aligned = call ptr @llvm.ptrmask.p0.i64(ptr %ptr, i64 -8)
diff --git a/llvm/test/Transforms/InstSimplify/ptrmask.ll b/llvm/test/Transforms/InstSimplify/ptrmask.ll
index 4945d4bc0edf625..dd83abfdeee464b 100644
--- a/llvm/test/Transforms/InstSimplify/ptrmask.ll
+++ b/llvm/test/Transforms/InstSimplify/ptrmask.ll
@@ -12,8 +12,7 @@ declare <2 x ptr> @llvm.ptrmask.v2p1.v2i64(<2 x ptr>, <2 x i64>)
 define ptr @ptrmask_simplify_poison_mask(ptr %p) {
 ; CHECK-LABEL: define ptr @ptrmask_simplify_poison_mask
 ; CHECK-SAME: (ptr [[P:%.*]]) {
-; CHECK-NEXT:    [[R:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[P]], i64 poison)
-; CHECK-NEXT:    ret ptr [[R]]
+; CHECK-NEXT:    ret ptr poison
 ;
   %r = call ptr @llvm.ptrmask.p0.i64(ptr %p, i64 poison)
   ret ptr %r
@@ -22,8 +21,7 @@ define ptr @ptrmask_simplify_poison_mask(ptr %p) {
 define <2 x ptr addrspace(1) > @ptrmask_simplify_poison_mask_vec(<2 x ptr addrspace(1) > %p) {
 ; CHECK-LABEL: define <2 x ptr addrspace(1)> @ptrmask_simplify_poison_mask_vec
 ; CHECK-SAME: (<2 x ptr addrspace(1)> [[P:%.*]]) {
-; CHECK-NEXT:    [[R:%.*]] = call <2 x ptr addrspace(1)> @llvm.ptrmask.v2p1.v2i32(<2 x ptr addrspace(1)> [[P]], <2 x i32> poison)
-; CHECK-NEXT:    ret <2 x ptr addrspace(1)> [[R]]
+; CHECK-NEXT:    ret <2 x ptr addrspace(1)> poison
 ;
   %r = call <2 x ptr addrspace(1) > @llvm.ptrmask.v2p1.v2i32(<2 x ptr addrspace(1) > %p, <2 x i32> poison)
   ret <2 x ptr addrspace(1) > %r
@@ -42,8 +40,7 @@ define <2 x ptr addrspace(1) > @ptrmask_simplify_poison_and_zero_i32_vec_fail(<2
 define <2 x ptr> @ptrmask_simplify_undef_and_ones_vec(<2 x ptr> %p) {
 ; CHECK-LABEL: define <2 x ptr> @ptrmask_simplify_undef_and_ones_vec
 ; CHECK-SAME: (<2 x ptr> [[P:%.*]]) {
-; CHECK-NEXT:    [[R:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> <i64 undef, i64 -1>)
-; CHECK-NEXT:    ret <2 x ptr> [[R]]
+; CHECK-NEXT:    ret <2 x ptr> [[P]]
 ;
   %r = call <2 x ptr> @llvm.ptrmask.v2p1.v2i64(<2 x ptr> %p, <2 x i64> <i64 undef, i64 -1>)
   ret <2 x ptr> %r
@@ -52,8 +49,7 @@ define <2 x ptr> @ptrmask_simplify_undef_and_ones_vec(<2 x ptr> %p) {
 define <2 x ptr> @ptrmask_simplify_poison_and_ones_vec(<2 x ptr> %p) {
 ; CHECK-LABEL: define <2 x ptr> @ptrmask_simplify_poison_and_ones_vec
 ; CHECK-SAME: (<2 x ptr> [[P:%.*]]) {
-; CHECK-NEXT:    [[R:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> <i64 poison, i64 -1>)
-; CHECK-NEXT:    ret <2 x ptr> [[R]]
+; CHECK-NEXT:    ret <2 x ptr> [[P]]
 ;
   %r = call <2 x ptr> @llvm.ptrmask.v2p1.v2i64(<2 x ptr> %p, <2 x i64> <i64 poison, i64 -1>)
   ret <2 x ptr> %r
@@ -62,8 +58,7 @@ define <2 x ptr> @ptrmask_simplify_poison_and_ones_vec(<2 x ptr> %p) {
 define <2 x ptr> @ptrmask_simplify_ones_vec(<2 x ptr> %p) {
 ; CHECK-LABEL: define <2 x ptr> @ptrmask_simplify_ones_vec
 ; CHECK-SAME: (<2 x ptr> [[P:%.*]]) {
-; CHECK-NEXT:    [[R:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> <i64 -1, i64 -1>)
-; CHECK-NEXT:    ret <2 x ptr> [[R]]
+; CHECK-NEXT:    ret <2 x ptr> [[P]]
 ;
   %r = call <2 x ptr> @llvm.ptrmask.v2p1.v2i64(<2 x ptr> %p, <2 x i64> <i64 -1, i64 -1>)
   ret <2 x ptr> %r
@@ -72,8 +67,7 @@ define <2 x ptr> @ptrmask_simplify_ones_vec(<2 x ptr> %p) {
 define <2 x ptr addrspace(1) > @ptrmask_simplify_ones_i32_vec(<2 x ptr addrspace(1) > %p) {
 ; CHECK-LABEL: define <2 x ptr addrspace(1)> @ptrmask_simplify_ones_i32_vec
 ; CHECK-SAME: (<2 x ptr addrspace(1)> [[P:%.*]]) {
-; CHECK-NEXT:    [[R:%.*]] = call <2 x ptr addrspace(1)> @llvm.ptrmask.v2p1.v2i32(<2 x ptr addrspace(1)> [[P]], <2 x i32> <i32 -1, i32 -1>)
-; CHECK-NEXT:    ret <2 x ptr addrspace(1)> [[R]]
+; CHECK-NEXT:    ret <2 x ptr addrspace(1)> [[P]]
 ;
   %r = call <2 x ptr addrspace(1) > @llvm.ptrmask.v2p1.v2i32(<2 x ptr addrspace(1) > %p, <2 x i32> <i32 -1, i32 -1>)
   ret <2 x ptr addrspace(1) > %r
@@ -82,8 +76,7 @@ define <2 x ptr addrspace(1) > @ptrmask_simplify_ones_i32_vec(<2 x ptr addrspace
 define ptr addrspace(1) @ptrmask_simplify_undef_mask(ptr addrspace(1) %p) {
 ; CHECK-LABEL: define ptr addrspace(1) @ptrmask_simplify_undef_mask
 ; CHECK-SAME: (ptr addrspace(1) [[P:%.*]]) {
-; CHECK-NEXT:    [[R:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) [[P]], i32 undef)
-; CHECK-NEXT:    ret ptr addrspace(1) [[R]]
+; CHECK-NEXT:    ret ptr addrspace(1) [[P]]
 ;
   %r = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) %p, i32 undef)
   ret ptr addrspace(1) %r
@@ -102,8 +95,7 @@ define ptr @ptrmask_simplify_0_mask(ptr %p) {
 define ptr @ptrmask_simplify_1s_mask(ptr %p) {
 ; CHECK-LABEL: define ptr @ptrmask_simplify_1s_mask
 ; CHECK-SAME: (ptr [[P:%.*]]) {
-; CHECK-NEXT:    [[R:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[P]], i64 -1)
-; CHECK-NEXT:    ret ptr [[R]]
+; CHECK-NEXT:    ret ptr [[P]]
 ;
   %r = call ptr @llvm.ptrmask.p0.i64(ptr %p, i64 -1)
   ret ptr %r
@@ -112,8 +104,7 @@ define ptr @ptrmask_simplify_1s_mask(ptr %p) {
 define ptr addrspace(1) @ptrmask_simplify_1s_mask_i32(ptr addrspace(1) %p) {
 ; CHECK-LABEL: define ptr addrspace(1) @ptrmask_simplify_1s_mask_i32
 ; CHECK-SAME: (ptr addrspace(1) [[P:%.*]]) {
-; CHECK-NEXT:    [[R:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) [[P]], i32 -1)
-; CHECK-NEXT:    ret ptr addrspace(1) [[R]]
+; CHECK-NEXT:    ret ptr addrspace(1) [[P]]
 ;
   %r = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) %p, i32 -1)
   ret ptr addrspace(1) %r
@@ -122,8 +113,7 @@ define ptr addrspace(1) @ptrmask_simplify_1s_mask_i32(ptr addrspace(1) %p) {
 define ptr @ptrmask_simplify_poison_ptr(i64 %m) {
 ; CHECK-LABEL: define ptr @ptrmask_simplify_poison_ptr
 ; CHECK-SAME: (i64 [[M:%.*]]) {
-; CHECK-NEXT:    [[R:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr poison, i64 [[M]])
-; CHECK-NEXT:    ret ptr [[R]]
+; CHECK-NEXT:    ret ptr poison
 ;
   %r = call ptr @llvm.ptrmask.p0.i64(ptr poison, i64 %m)
   ret ptr %r
@@ -132,8 +122,7 @@ define ptr @ptrmask_simplify_poison_ptr(i64 %m) {
 define ptr addrspace(1) @ptrmask_simplify_undef_ptr(i32 %m) {
 ; CHECK-LABEL: define ptr addrspace(1) @ptrmask_simplify_undef_ptr
 ; CHECK-SAME: (i32 [[M:%.*]]) {
-; CHECK-NEXT:    [[R:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) undef, i32 [[M]])
-; CHECK-NEXT:    ret ptr addrspace(1) [[R]]
+; CHECK-NEXT:    ret ptr addrspace(1) null
 ;
   %r = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) undef, i32 %m)
   ret ptr addrspace(1) %r
@@ -142,8 +131,7 @@ define ptr addrspace(1) @ptrmask_simplify_undef_ptr(i32 %m) {
 define ptr @ptrmask_simplify_null_ptr(i64 %m) {
 ; CHECK-LABEL: define ptr @ptrmask_simplify_null_ptr
 ; CHECK-SAME: (i64 [[M:%.*]]) {
-; CHECK-NEXT:    [[R:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr null, i64 [[M]])
-; CHECK-NEXT:    ret ptr [[R]]
+; CHECK-NEXT:    ret ptr null
 ;
   %r = call ptr @llvm.ptrmask.p0.i64(ptr null, i64 %m)
   ret ptr %r
@@ -152,9 +140,7 @@ define ptr @ptrmask_simplify_null_ptr(i64 %m) {
 define ptr @ptrmask_simplify_ptrmask(ptr %p) {
 ; CHECK-LABEL: define ptr @ptrmask_simplify_ptrmask
 ; CHECK-SAME: (ptr [[P:%.*]]) {
-; CHECK-NEXT:    [[M:%.*]] = ptrtoint ptr [[P]] to i64
-; CHECK-NEXT:    [[R:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[P]], i64 [[M]])
-; CHECK-NEXT:    ret ptr [[R]]
+; CHECK-NEXT:    ret ptr [[P]]
 ;
   %m = ptrtoint ptr %p to i64
   %r = call ptr @llvm.ptrmask.p0.i64(ptr %p, i64 %m)
@@ -164,9 +150,7 @@ define ptr @ptrmask_simplify_ptrmask(ptr %p) {
 define ptr addrspace(1) @ptrmask_simplify_ptrmask_i32(ptr addrspace(1) %p) {
 ; CHECK-LABEL: define ptr addrspace(1) @ptrmask_simplify_ptrmask_i32
 ; CHECK-SAME: (ptr addrspace(1) [[P:%.*]]) {
-; CHECK-NEXT:    [[M:%.*]] = ptrtoint ptr addrspace(1) [[P]] to i32
-; CHECK-NEXT:    [[R:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) [[P]], i32 [[M]])
-; CHECK-NEXT:    ret ptr addrspace(1) [[R]]
+; CHECK-NEXT:    ret ptr addrspace(1) [[P]]
 ;
   %m = ptrtoint ptr addrspace(1) %p to i32
   %r = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) %p, i32 %m)
@@ -176,8 +160,7 @@ define ptr addrspace(1) @ptrmask_simplify_ptrmask_i32(ptr addrspace(1) %p) {
 define ptr @ptrmask_simplify_aligned_unused(ptr align 64 %p) {
 ; CHECK-LABEL: define ptr @ptrmask_simplify_aligned_unused
 ; CHECK-SAME: (ptr align 64 [[P:%.*]]) {
-; CHECK-NEXT:    [[R:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[P]], i64 -64)
-; CHECK-NEXT:    ret ptr [[R]]
+; CHECK-NEXT:    ret ptr [[P]]
 ;
   %r = call ptr @llvm.ptrmask.p0.i64(ptr %p, i64 -64)
   ret ptr %r
@@ -206,8 +189,7 @@ define <2 x ptr> @ptrmask_simplify_aligned_unused_vec_todo(<2 x ptr> align 128 %
 define ptr addrspace(1) @ptrmask_simplify_aligned_unused_i32(ptr addrspace(1) align 64 %p) {
 ; CHECK-LABEL: define ptr addrspace(1) @ptrmask_simplify_aligned_unused_i32
 ; CHECK-SAME: (ptr addrspace(1) align 64 [[P:%.*]]) {
-; CHECK-NEXT:    [[R:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) [[P]], i32 -64)
-; CHECK-NEXT:    ret ptr addrspace(1) [[R]]
+; CHECK-NEXT:    ret ptr addrspace(1) [[P]]
 ;
   %r = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) %p, i32 -64)
   ret ptr addrspace(1) %r
@@ -218,8 +200,7 @@ define ptr @ptrmask_simplify_known_unused(ptr %p) {
 ; CHECK-SAME: (ptr [[P:%.*]]) {
 ; CHECK-NEXT:    [[PM0:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[P]], i64 -64)
 ; CHECK-NEXT:    [[PGEP:%.*]] = getelementptr i8, ptr [[PM0]], i64 32
-; CHECK-NEXT:    [[R:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PGEP]], i64 -32)
-; CHECK-NEXT:    ret ptr [[R]]
+; CHECK-NEXT:    ret ptr [[PGEP]]
 ;
   %pm0 = call ptr @llvm.ptrmask.p0.i64(ptr %p, i64 -64)
   %pgep = getelementptr i8, ptr %pm0, i64 32
@@ -232,8 +213,7 @@ define <2 x ptr> @ptrmask_simplify_known_unused_vec(<2 x ptr> %p) {
 ; CHECK-SAME: (<2 x ptr> [[P:%.*]]) {
 ; CHECK-NEXT:    [[PM0:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> <i64 -64, i64 -64>)
 ; CHECK-NEXT:    [[PGEP:%.*]] = getelementptr i8, <2 x ptr> [[PM0]], <2 x i64> <i64 32, i64 32>
-; CHECK-NEXT:    [[R:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[PGEP]], <2 x i64> <i64 -32, i64 -32>)
-; CHECK-NEXT:    ret <2 x ptr> [[R]]
+; CHECK-NEXT:    ret <2 x ptr> [[PGEP]]
 ;
   %pm0 = call <2 x ptr> @llvm.ptrmask.v2p1.v2i64(<2 x ptr> %p, <2 x i64> <i64 -64, i64 -64>)
   %pgep = getelementptr i8, <2 x ptr> %pm0, <2 x i64> <i64 32, i64 32>
@@ -246,8 +226,7 @@ define <2 x ptr> @ptrmask_simplify_known_unused_vec2(<2 x ptr> %p) {
 ; CHECK-SAME: (<2 x ptr> [[P:%.*]]) {
 ; CHECK-NEXT:    [[PM0:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> <i64 -64, i64 -64>)
 ; CHECK-NEXT:    [[PGEP:%.*]] = getelementptr i8, <2 x ptr> [[PM0]], <2 x i64> <i64 32, i64 32>
-; CHECK-NEXT:    [[R:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[PGEP]], <2 x i64> <i64 -32, i64 -16>)
-; CHECK-NEXT:    ret <2 x ptr> [[R]]
+; CHECK-NEXT:    ret <2 x ptr> [[PGEP]]
 ;
   %pm0 = call <2 x ptr> @llvm.ptrmask.v2p1.v2i64(<2 x ptr> %p, <2 x i64> <i64 -64, i64 -64>)
   %pgep = getelementptr i8, <2 x ptr> %pm0, <2 x i64> <i64 32, i64 32>
@@ -260,8 +239,7 @@ define <2 x ptr> @ptrmask_simplify_known_unused_vec3(<2 x ptr> %p) {
 ; CHECK-SAME: (<2 x ptr> [[P:%.*]]) {
 ; CHECK-NEXT:    [[PM0:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> <i64 -64, i64 -128>)
 ; CHECK-NEXT:    [[PGEP:%.*]] = getelementptr i8, <2 x ptr> [[PM0]], <2 x i64> <i64 32, i64 32>
-; CHECK-NEXT:    [[R:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[PGEP]], <2 x i64> <i64 -32, i64 -32>)
-; CHECK-NEXT:    ret <2 x ptr> [[R]]
+; CHECK-NEXT:    ret <2 x ptr> [[PGEP]]
 ;
   %pm0 = call <2 x ptr> @llvm.ptrmask.v2p1.v2i64(<2 x ptr> %p, <2 x i64> <i64 -64, i64 -128>)
   %pgep = getelementptr i8, <2 x ptr> %pm0, <2 x i64> <i64 32, i64 32>
@@ -274,8 +252,7 @@ define <2 x ptr> @ptrmask_simplify_known_unused_vec4(<2 x ptr> %p) {
 ; CHECK-SAME: (<2 x ptr> [[P:%.*]]) {
 ; CHECK-NEXT:    [[PM0:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> <i64 -64, i64 -128>)
 ; CHECK-NEXT:    [[PGEP:%.*]] = getelementptr i8, <2 x ptr> [[PM0]], <2 x i64> <i64 32, i64 64>
-; CHECK-NEXT:    [[R:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[PGEP]], <2 x i64> <i64 -32, i64 -32>)
-; CHECK-NEXT:    ret <2 x ptr> [[R]]
+; CHECK-NEXT:    ret <2 x ptr> [[PGEP]]
 ;
   %pm0 = call <2 x ptr> @llvm.ptrmask.v2p1.v2i64(<2 x ptr> %p, <2 x i64> <i64 -64, i64 -128>)
   %pgep = getelementptr i8, <2 x ptr> %pm0, <2 x i64> <i64 32, i64 64>

>From 20bd106a9bc58aff60d78909b65b9dc7ea966346 Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Fri, 22 Sep 2023 08:21:32 -0500
Subject: [PATCH 3/7] [InstCombine] Add tests for combining `llvm.ptrmask`; NFC

Differential Revision: https://reviews.llvm.org/D156634
---
 .../InstCombine/consecutive-ptrmask.ll        |  27 +-
 llvm/test/Transforms/InstCombine/ptrmask.ll   | 432 ++++++++++++++++++
 2 files changed, 458 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/Transforms/InstCombine/ptrmask.ll

diff --git a/llvm/test/Transforms/InstCombine/consecutive-ptrmask.ll b/llvm/test/Transforms/InstCombine/consecutive-ptrmask.ll
index bfe9efb1e88dd27..a4722c15614092b 100644
--- a/llvm/test/Transforms/InstCombine/consecutive-ptrmask.ll
+++ b/llvm/test/Transforms/InstCombine/consecutive-ptrmask.ll
@@ -4,7 +4,8 @@
 target datalayout = "p1:64:64:64:32"
 
 declare ptr @llvm.ptrmask.p0.i64(ptr, i64)
-declare ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1), i32)
+declare ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) , i32)
+declare <2 x ptr addrspace(1) > @llvm.ptrmask.v2p1.v2i32(<2 x ptr addrspace(1) >, <2 x i32>)
 declare <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr>, <2 x i64>)
 declare void @use.ptr(ptr)
 
@@ -57,3 +58,27 @@ define ptr addrspace(1) @fold_2x_smaller_index_type(ptr addrspace(1) %p, i32 %m0
   %p1 = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) %p0, i32 %m1)
   ret ptr addrspace(1) %p1
 }
+
+define <2 x ptr> @fold_2x_vec_i64(<2 x ptr> %p, <2 x i64> %m0) {
+; CHECK-LABEL: define <2 x ptr> @fold_2x_vec_i64
+; CHECK-SAME: (<2 x ptr> [[P:%.*]], <2 x i64> [[M0:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i64> [[M0]], <i64 -2, i64 -2>
+; CHECK-NEXT:    [[P1:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> [[TMP1]])
+; CHECK-NEXT:    ret <2 x ptr> [[P1]]
+;
+  %p0 = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> %p, <2 x i64> %m0)
+  %p1 = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> %p0, <2 x i64> <i64 -2, i64 -2>)
+  ret <2 x ptr> %p1
+}
+
+define <2 x ptr addrspace(1) > @fold_2x_vec_i32_undef(<2 x ptr addrspace(1) > %p, <2 x i32> %m0) {
+; CHECK-LABEL: define <2 x ptr addrspace(1)> @fold_2x_vec_i32_undef
+; CHECK-SAME: (<2 x ptr addrspace(1)> [[P:%.*]], <2 x i32> [[M0:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[M0]], <i32 -2, i32 undef>
+; CHECK-NEXT:    [[P1:%.*]] = call <2 x ptr addrspace(1)> @llvm.ptrmask.v2p1.v2i32(<2 x ptr addrspace(1)> [[P]], <2 x i32> [[TMP1]])
+; CHECK-NEXT:    ret <2 x ptr addrspace(1)> [[P1]]
+;
+  %p0 = call <2 x ptr addrspace(1) > @llvm.ptrmask.v2p1.v2i32(<2 x ptr addrspace(1) > %p, <2 x i32> %m0)
+  %p1 = call <2 x ptr addrspace(1) > @llvm.ptrmask.v2p1.v2i32(<2 x ptr addrspace(1) > %p0, <2 x i32> <i32 -2, i32 undef>)
+  ret <2 x ptr addrspace(1) > %p1
+}
diff --git a/llvm/test/Transforms/InstCombine/ptrmask.ll b/llvm/test/Transforms/InstCombine/ptrmask.ll
new file mode 100644
index 000000000000000..56aef2d18ed51f0
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/ptrmask.ll
@@ -0,0 +1,432 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+target datalayout = "p1:64:64:64:32"
+
+declare ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1), i32)
+declare ptr @llvm.ptrmask.p0.i64(ptr, i64)
+declare <2 x ptr addrspace(1) > @llvm.ptrmask.v2p1.v2i32(<2 x ptr addrspace(1) >, <2 x i32>)
+declare <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr>, <2 x i64>)
+
+define ptr @ptrmask_combine_consecutive_preserve_attrs(ptr %p0, i64 %m1) {
+; CHECK-LABEL: define ptr @ptrmask_combine_consecutive_preserve_attrs
+; CHECK-SAME: (ptr [[P0:%.*]], i64 [[M1:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[M1]], 224
+; CHECK-NEXT:    [[R:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[P0]], i64 [[TMP1]])
+; CHECK-NEXT:    ret ptr [[R]]
+;
+  %pm0 = call ptr @llvm.ptrmask.p0.i64(ptr %p0, i64 224)
+  %r = call noalias ptr @llvm.ptrmask.p0.i64(ptr %pm0, i64 %m1)
+  ret ptr %r
+}
+
+define <2 x ptr> @ptrmask_combine_consecutive_preserve_attrs_vecs(<2 x ptr> %p0, <2 x i64> %m1) {
+; CHECK-LABEL: define <2 x ptr> @ptrmask_combine_consecutive_preserve_attrs_vecs
+; CHECK-SAME: (<2 x ptr> [[P0:%.*]], <2 x i64> [[M1:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i64> [[M1]], <i64 12345, i64 12345>
+; CHECK-NEXT:    [[R:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P0]], <2 x i64> [[TMP1]])
+; CHECK-NEXT:    ret <2 x ptr> [[R]]
+;
+  %pm0 = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> %p0, <2 x i64> <i64 12345, i64 12345>)
+  %r = call align 128 <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> %pm0, <2 x i64> %m1)
+  ret <2 x ptr> %r
+}
+
+define ptr @ptrmask_combine_consecutive_preserve_attrs_fail(ptr %p0, i64 %m0) {
+; CHECK-LABEL: define ptr @ptrmask_combine_consecutive_preserve_attrs_fail
+; CHECK-SAME: (ptr [[P0:%.*]], i64 [[M0:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[M0]], 193
+; CHECK-NEXT:    [[R:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[P0]], i64 [[TMP1]])
+; CHECK-NEXT:    ret ptr [[R]]
+;
+  %pm0 = call noalias ptr @llvm.ptrmask.p0.i64(ptr %p0, i64 %m0)
+  %r = call ptr @llvm.ptrmask.p0.i64(ptr %pm0, i64 193)
+  ret ptr %r
+}
+
+define ptr @ptrmask_combine_consecutive_preserve_attrs_todo0(ptr %p0) {
+; CHECK-LABEL: define ptr @ptrmask_combine_consecutive_preserve_attrs_todo0
+; CHECK-SAME: (ptr [[P0:%.*]]) {
+; CHECK-NEXT:    [[PM0:%.*]] = call noalias ptr @llvm.ptrmask.p0.i64(ptr [[P0]], i64 224)
+; CHECK-NEXT:    ret ptr [[PM0]]
+;
+  %pm0 = call noalias ptr @llvm.ptrmask.p0.i64(ptr %p0, i64 224)
+  %r = call ptr @llvm.ptrmask.p0.i64(ptr %pm0, i64 224)
+  ret ptr %r
+}
+
+define ptr @ptrmask_combine_consecutive_preserve_attrs_todo1(ptr %p0) {
+; CHECK-LABEL: define ptr @ptrmask_combine_consecutive_preserve_attrs_todo1
+; CHECK-SAME: (ptr [[P0:%.*]]) {
+; CHECK-NEXT:    [[PM0:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[P0]], i64 224)
+; CHECK-NEXT:    ret ptr [[PM0]]
+;
+  %pm0 = call ptr @llvm.ptrmask.p0.i64(ptr %p0, i64 224)
+  %r = call noalias ptr @llvm.ptrmask.p0.i64(ptr %pm0, i64 224)
+  ret ptr %r
+}
+
+define ptr addrspace(1) @ptrmask_combine_consecutive_preserve_attrs_todo2(ptr addrspace(1) %p0) {
+; CHECK-LABEL: define ptr addrspace(1) @ptrmask_combine_consecutive_preserve_attrs_todo2
+; CHECK-SAME: (ptr addrspace(1) [[P0:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) [[P0]], i32 224)
+; CHECK-NEXT:    ret ptr addrspace(1) [[R]]
+;
+  %pm0 = call noalias ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) %p0, i32 224)
+  %r = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) %pm0, i32 224)
+  ret ptr addrspace(1) %r
+}
+
+define ptr @ptrmask_combine_add_nonnull(ptr %p) {
+; CHECK-LABEL: define ptr @ptrmask_combine_add_nonnull
+; CHECK-SAME: (ptr [[P:%.*]]) {
+; CHECK-NEXT:    [[PM0:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[P]], i64 -64)
+; CHECK-NEXT:    [[PGEP:%.*]] = getelementptr i8, ptr [[PM0]], i64 33
+; CHECK-NEXT:    [[R:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PGEP]], i64 -16)
+; CHECK-NEXT:    ret ptr [[R]]
+;
+  %pm0 = call ptr @llvm.ptrmask.p0.i64(ptr %p, i64 -64)
+  %pgep = getelementptr i8, ptr %pm0, i64 33
+  %r = call ptr @llvm.ptrmask.p0.i64(ptr %pgep, i64 -16)
+  ret ptr %r
+}
+
+define ptr @ptrmask_combine_add_alignment(ptr %p) {
+; CHECK-LABEL: define ptr @ptrmask_combine_add_alignment
+; CHECK-SAME: (ptr [[P:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[P]], i64 -64)
+; CHECK-NEXT:    ret ptr [[R]]
+;
+  %r = call ptr @llvm.ptrmask.p0.i64(ptr %p, i64 -64)
+  ret ptr %r
+}
+
+define ptr addrspace(1) @ptrmask_combine_add_alignment2(ptr addrspace(1) align 32 %p) {
+; CHECK-LABEL: define ptr addrspace(1) @ptrmask_combine_add_alignment2
+; CHECK-SAME: (ptr addrspace(1) align 32 [[P:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) [[P]], i32 -64)
+; CHECK-NEXT:    ret ptr addrspace(1) [[R]]
+;
+  %r = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) %p, i32 -64)
+  ret ptr addrspace(1) %r
+}
+
+define <2 x ptr> @ptrmask_combine_add_alignment_vec(<2 x ptr> %p) {
+; CHECK-LABEL: define <2 x ptr> @ptrmask_combine_add_alignment_vec
+; CHECK-SAME: (<2 x ptr> [[P:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> <i64 -96, i64 -96>)
+; CHECK-NEXT:    ret <2 x ptr> [[R]]
+;
+  %r = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> %p, <2 x i64> <i64 -96, i64 -96>)
+  ret <2 x ptr> %r
+}
+
+define ptr addrspace(1) @ptrmask_combine_improve_alignment(ptr addrspace(1) %p) {
+; CHECK-LABEL: define ptr addrspace(1) @ptrmask_combine_improve_alignment
+; CHECK-SAME: (ptr addrspace(1) [[P:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = call align 32 ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) [[P]], i32 -64)
+; CHECK-NEXT:    ret ptr addrspace(1) [[R]]
+;
+  %r = call align 32 ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) %p, i32 -64)
+  ret ptr addrspace(1) %r
+}
+
+define <2 x ptr addrspace(1) > @ptrmask_combine_improve_alignment_vec(<2 x ptr addrspace(1) > %p) {
+; CHECK-LABEL: define <2 x ptr addrspace(1)> @ptrmask_combine_improve_alignment_vec
+; CHECK-SAME: (<2 x ptr addrspace(1)> [[P:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = call align 32 <2 x ptr addrspace(1)> @llvm.ptrmask.v2p1.v2i32(<2 x ptr addrspace(1)> [[P]], <2 x i32> <i32 -64, i32 -128>)
+; CHECK-NEXT:    ret <2 x ptr addrspace(1)> [[R]]
+;
+  %r = call align 32 <2 x ptr addrspace(1) > @llvm.ptrmask.v2p1.v2i32(<2 x ptr addrspace(1) > %p, <2 x i32> <i32 -64, i32 -128>)
+  ret <2 x ptr addrspace(1) > %r
+}
+
+define ptr addrspace(1) @ptrmask_combine_improve_alignment_fail(ptr addrspace(1) %p) {
+; CHECK-LABEL: define ptr addrspace(1) @ptrmask_combine_improve_alignment_fail
+; CHECK-SAME: (ptr addrspace(1) [[P:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = call align 128 ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) [[P]], i32 -64)
+; CHECK-NEXT:    ret ptr addrspace(1) [[R]]
+;
+  %r = call align 128 ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) %p, i32 -64)
+  ret ptr addrspace(1) %r
+}
+
+define i64 @ptrtoint_of_ptrmask(ptr %p, i64 %m) {
+; CHECK-LABEL: define i64 @ptrtoint_of_ptrmask
+; CHECK-SAME: (ptr [[P:%.*]], i64 [[M:%.*]]) {
+; CHECK-NEXT:    [[PM:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[P]], i64 [[M]])
+; CHECK-NEXT:    [[R:%.*]] = ptrtoint ptr [[PM]] to i64
+; CHECK-NEXT:    ret i64 [[R]]
+;
+  %pm = call ptr @llvm.ptrmask.p0.i64(ptr %p, i64 %m)
+  %r = ptrtoint ptr %pm to i64
+  ret i64 %r
+}
+
+; This succeeds because (ptrtoint i32) gets folded to (trunc i32 (ptrtoint i64))
+define i32 @ptrtoint_of_ptrmask2(ptr %p, i64 %m) {
+; CHECK-LABEL: define i32 @ptrtoint_of_ptrmask2
+; CHECK-SAME: (ptr [[P:%.*]], i64 [[M:%.*]]) {
+; CHECK-NEXT:    [[PM:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[P]], i64 [[M]])
+; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[PM]] to i64
+; CHECK-NEXT:    [[R:%.*]] = trunc i64 [[TMP1]] to i32
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %pm = call ptr @llvm.ptrmask.p0.i64(ptr %p, i64 %m)
+  %r = ptrtoint ptr %pm to i32
+  ret i32 %r
+}
+
+define <2 x i64> @ptrtoint_of_ptrmask_vec(<2 x ptr> %p, <2 x i64> %m) {
+; CHECK-LABEL: define <2 x i64> @ptrtoint_of_ptrmask_vec
+; CHECK-SAME: (<2 x ptr> [[P:%.*]], <2 x i64> [[M:%.*]]) {
+; CHECK-NEXT:    [[PM:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> [[M]])
+; CHECK-NEXT:    [[R:%.*]] = ptrtoint <2 x ptr> [[PM]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[R]]
+;
+  %pm = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> %p, <2 x i64> %m)
+  %r = ptrtoint <2 x ptr> %pm to <2 x i64>
+  ret <2 x i64> %r
+}
+
+define <2 x i32> @ptrtoint_of_ptrmask_vec2(<2 x ptr> %p, <2 x i64> %m) {
+; CHECK-LABEL: define <2 x i32> @ptrtoint_of_ptrmask_vec2
+; CHECK-SAME: (<2 x ptr> [[P:%.*]], <2 x i64> [[M:%.*]]) {
+; CHECK-NEXT:    [[PM:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> [[M]])
+; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint <2 x ptr> [[PM]] to <2 x i64>
+; CHECK-NEXT:    [[R:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32>
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %pm = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> %p, <2 x i64> %m)
+  %r = ptrtoint <2 x ptr> %pm to <2 x i32>
+  ret <2 x i32> %r
+}
+
+define i64 @ptrtoint_of_ptrmask_fail(ptr addrspace(1) %p, i32 %m) {
+; CHECK-LABEL: define i64 @ptrtoint_of_ptrmask_fail
+; CHECK-SAME: (ptr addrspace(1) [[P:%.*]], i32 [[M:%.*]]) {
+; CHECK-NEXT:    [[PM:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) [[P]], i32 [[M]])
+; CHECK-NEXT:    [[R:%.*]] = ptrtoint ptr addrspace(1) [[PM]] to i64
+; CHECK-NEXT:    ret i64 [[R]]
+;
+  %pm = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) %p, i32 %m)
+  %r = ptrtoint ptr addrspace(1) %pm to i64
+  ret i64 %r
+}
+
+define <2 x i32> @ptrtoint_of_ptrmask_vec_fail(<2 x ptr addrspace(1) > %p, <2 x i32> %m) {
+; CHECK-LABEL: define <2 x i32> @ptrtoint_of_ptrmask_vec_fail
+; CHECK-SAME: (<2 x ptr addrspace(1)> [[P:%.*]], <2 x i32> [[M:%.*]]) {
+; CHECK-NEXT:    [[PM:%.*]] = call <2 x ptr addrspace(1)> @llvm.ptrmask.v2p1.v2i32(<2 x ptr addrspace(1)> [[P]], <2 x i32> [[M]])
+; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint <2 x ptr addrspace(1)> [[PM]] to <2 x i64>
+; CHECK-NEXT:    [[R:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32>
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %pm = call <2 x ptr addrspace(1) > @llvm.ptrmask.v2p1.v2i32(<2 x ptr addrspace(1) > %p, <2 x i32> %m)
+  %r = ptrtoint <2 x ptr addrspace(1) > %pm to <2 x i32>
+  ret <2 x i32> %r
+}
+
+define ptr addrspace(1) @ptrmask_is_null(ptr addrspace(1) align 32 %p) {
+; CHECK-LABEL: define ptr addrspace(1) @ptrmask_is_null
+; CHECK-SAME: (ptr addrspace(1) align 32 [[P:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) [[P]], i32 31)
+; CHECK-NEXT:    ret ptr addrspace(1) [[R]]
+;
+  %r = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) %p, i32 31)
+  ret ptr addrspace(1) %r
+}
+
+define <2 x ptr addrspace(1) > @ptrmask_is_null_vec(<2 x ptr addrspace(1) > align 64 %p) {
+; CHECK-LABEL: define <2 x ptr addrspace(1)> @ptrmask_is_null_vec
+; CHECK-SAME: (<2 x ptr addrspace(1)> align 64 [[P:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = call <2 x ptr addrspace(1)> @llvm.ptrmask.v2p1.v2i32(<2 x ptr addrspace(1)> [[P]], <2 x i32> <i32 31, i32 63>)
+; CHECK-NEXT:    ret <2 x ptr addrspace(1)> [[R]]
+;
+  %r = call <2 x ptr addrspace(1) > @llvm.ptrmask.v2p1.v2i32(<2 x ptr addrspace(1) > %p, <2 x i32> <i32 31, i32 63>)
+  ret <2 x ptr addrspace(1) > %r
+}
+
+define ptr addrspace(1) @ptrmask_is_null_fail(ptr addrspace(1) align 16 %p) {
+; CHECK-LABEL: define ptr addrspace(1) @ptrmask_is_null_fail
+; CHECK-SAME: (ptr addrspace(1) align 16 [[P:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) [[P]], i32 31)
+; CHECK-NEXT:    ret ptr addrspace(1) [[R]]
+;
+  %r = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) %p, i32 31)
+  ret ptr addrspace(1) %r
+}
+
+define <2 x ptr addrspace(1) > @ptrmask_is_null_vec_fail(<2 x ptr addrspace(1) > align 32 %p) {
+; CHECK-LABEL: define <2 x ptr addrspace(1)> @ptrmask_is_null_vec_fail
+; CHECK-SAME: (<2 x ptr addrspace(1)> align 32 [[P:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = call <2 x ptr addrspace(1)> @llvm.ptrmask.v2p1.v2i32(<2 x ptr addrspace(1)> [[P]], <2 x i32> <i32 31, i32 63>)
+; CHECK-NEXT:    ret <2 x ptr addrspace(1)> [[R]]
+;
+  %r = call <2 x ptr addrspace(1) > @llvm.ptrmask.v2p1.v2i32(<2 x ptr addrspace(1) > %p, <2 x i32> <i32 31, i32 63>)
+  ret <2 x ptr addrspace(1) > %r
+}
+
+define ptr @ptrmask_maintain_provenance_i64(ptr %p0) {
+; CHECK-LABEL: define ptr @ptrmask_maintain_provenance_i64
+; CHECK-SAME: (ptr [[P0:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[P0]], i64 0)
+; CHECK-NEXT:    ret ptr [[R]]
+;
+  %r = call ptr @llvm.ptrmask.p0.i64(ptr %p0, i64 0)
+  ret ptr %r
+}
+
+define ptr addrspace(1) @ptrmask_maintain_provenance_i32(ptr addrspace(1) %p0) {
+; CHECK-LABEL: define ptr addrspace(1) @ptrmask_maintain_provenance_i32
+; CHECK-SAME: (ptr addrspace(1) [[P0:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) [[P0]], i32 0)
+; CHECK-NEXT:    ret ptr addrspace(1) [[R]]
+;
+  %r = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) %p0, i32 0)
+  ret ptr addrspace(1) %r
+}
+
+define ptr @ptrmask_is_useless0(i64 %i, i64 %m) {
+; CHECK-LABEL: define ptr @ptrmask_is_useless0
+; CHECK-SAME: (i64 [[I:%.*]], i64 [[M:%.*]]) {
+; CHECK-NEXT:    [[M0:%.*]] = and i64 [[M]], -4
+; CHECK-NEXT:    [[I0:%.*]] = and i64 [[I]], -4
+; CHECK-NEXT:    [[P0:%.*]] = inttoptr i64 [[I0]] to ptr
+; CHECK-NEXT:    [[R:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[P0]], i64 [[M0]])
+; CHECK-NEXT:    ret ptr [[R]]
+;
+  %m0 = and i64 %m, -4
+  %i0 = and i64 %i, -4
+  %p0 = inttoptr i64 %i0 to ptr
+  %r = call ptr @llvm.ptrmask.p0.i64(ptr %p0, i64 %m0)
+  ret ptr %r
+}
+
+define ptr @ptrmask_is_useless1(i64 %i, i64 %m) {
+; CHECK-LABEL: define ptr @ptrmask_is_useless1
+; CHECK-SAME: (i64 [[I:%.*]], i64 [[M:%.*]]) {
+; CHECK-NEXT:    [[M0:%.*]] = and i64 [[M]], -4
+; CHECK-NEXT:    [[I0:%.*]] = and i64 [[I]], -8
+; CHECK-NEXT:    [[P0:%.*]] = inttoptr i64 [[I0]] to ptr
+; CHECK-NEXT:    [[R:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[P0]], i64 [[M0]])
+; CHECK-NEXT:    ret ptr [[R]]
+;
+  %m0 = and i64 %m, -4
+  %i0 = and i64 %i, -8
+  %p0 = inttoptr i64 %i0 to ptr
+  %r = call ptr @llvm.ptrmask.p0.i64(ptr %p0, i64 %m0)
+  ret ptr %r
+}
+
+define ptr @ptrmask_is_useless2(i64 %i, i64 %m) {
+; CHECK-LABEL: define ptr @ptrmask_is_useless2
+; CHECK-SAME: (i64 [[I:%.*]], i64 [[M:%.*]]) {
+; CHECK-NEXT:    [[M0:%.*]] = and i64 [[M]], 127
+; CHECK-NEXT:    [[I0:%.*]] = and i64 [[I]], 31
+; CHECK-NEXT:    [[P0:%.*]] = inttoptr i64 [[I0]] to ptr
+; CHECK-NEXT:    [[R:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[P0]], i64 [[M0]])
+; CHECK-NEXT:    ret ptr [[R]]
+;
+  %m0 = and i64 %m, 127
+  %i0 = and i64 %i, 31
+  %p0 = inttoptr i64 %i0 to ptr
+  %r = call ptr @llvm.ptrmask.p0.i64(ptr %p0, i64 %m0)
+  ret ptr %r
+}
+
+define ptr @ptrmask_is_useless3(i64 %i, i64 %m) {
+; CHECK-LABEL: define ptr @ptrmask_is_useless3
+; CHECK-SAME: (i64 [[I:%.*]], i64 [[M:%.*]]) {
+; CHECK-NEXT:    [[M0:%.*]] = and i64 [[M]], 127
+; CHECK-NEXT:    [[I0:%.*]] = and i64 [[I]], 127
+; CHECK-NEXT:    [[P0:%.*]] = inttoptr i64 [[I0]] to ptr
+; CHECK-NEXT:    [[R:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[P0]], i64 [[M0]])
+; CHECK-NEXT:    ret ptr [[R]]
+;
+  %m0 = and i64 %m, 127
+  %i0 = and i64 %i, 127
+  %p0 = inttoptr i64 %i0 to ptr
+  %r = call ptr @llvm.ptrmask.p0.i64(ptr %p0, i64 %m0)
+  ret ptr %r
+}
+
+define ptr @ptrmask_is_useless4(i64 %i, i64 %m) {
+; CHECK-LABEL: define ptr @ptrmask_is_useless4
+; CHECK-SAME: (i64 [[I:%.*]], i64 [[M:%.*]]) {
+; CHECK-NEXT:    [[M0:%.*]] = or i64 [[M]], -4
+; CHECK-NEXT:    [[I0:%.*]] = and i64 [[I]], -4
+; CHECK-NEXT:    [[P0:%.*]] = inttoptr i64 [[I0]] to ptr
+; CHECK-NEXT:    [[R:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[P0]], i64 [[M0]])
+; CHECK-NEXT:    ret ptr [[R]]
+;
+  %m0 = or i64 %m, -4
+  %i0 = and i64 %i, -4
+  %p0 = inttoptr i64 %i0 to ptr
+  %r = call ptr @llvm.ptrmask.p0.i64(ptr %p0, i64 %m0)
+  ret ptr %r
+}
+
+define <2 x ptr> @ptrmask_is_useless_vec(<2 x i64> %i, <2 x i64> %m) {
+; CHECK-LABEL: define <2 x ptr> @ptrmask_is_useless_vec
+; CHECK-SAME: (<2 x i64> [[I:%.*]], <2 x i64> [[M:%.*]]) {
+; CHECK-NEXT:    [[M0:%.*]] = and <2 x i64> [[M]], <i64 127, i64 127>
+; CHECK-NEXT:    [[I0:%.*]] = and <2 x i64> [[I]], <i64 31, i64 31>
+; CHECK-NEXT:    [[P0:%.*]] = inttoptr <2 x i64> [[I0]] to <2 x ptr>
+; CHECK-NEXT:    [[R:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P0]], <2 x i64> [[M0]])
+; CHECK-NEXT:    ret <2 x ptr> [[R]]
+;
+  %m0 = and <2 x i64> %m, <i64 127, i64 127>
+  %i0 = and <2 x i64> %i, <i64 31, i64 31>
+  %p0 = inttoptr <2 x i64> %i0 to <2 x ptr>
+  %r = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> %p0, <2 x i64> %m0)
+  ret <2 x ptr> %r
+}
+
+define <2 x ptr> @ptrmask_is_useless_vec_todo(<2 x i64> %i, <2 x i64> %m) {
+; CHECK-LABEL: define <2 x ptr> @ptrmask_is_useless_vec_todo
+; CHECK-SAME: (<2 x i64> [[I:%.*]], <2 x i64> [[M:%.*]]) {
+; CHECK-NEXT:    [[M0:%.*]] = and <2 x i64> [[M]], <i64 127, i64 127>
+; CHECK-NEXT:    [[I0:%.*]] = and <2 x i64> [[I]], <i64 31, i64 127>
+; CHECK-NEXT:    [[P0:%.*]] = inttoptr <2 x i64> [[I0]] to <2 x ptr>
+; CHECK-NEXT:    [[R:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P0]], <2 x i64> [[M0]])
+; CHECK-NEXT:    ret <2 x ptr> [[R]]
+;
+  %m0 = and <2 x i64> %m, <i64 127, i64 127>
+  %i0 = and <2 x i64> %i, <i64 31, i64 127>
+  %p0 = inttoptr <2 x i64> %i0 to <2 x ptr>
+  %r = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> %p0, <2 x i64> %m0)
+  ret <2 x ptr> %r
+}
+
+define ptr @ptrmask_is_useless_fail0(i64 %i, i64 %m) {
+; CHECK-LABEL: define ptr @ptrmask_is_useless_fail0
+; CHECK-SAME: (i64 [[I:%.*]], i64 [[M:%.*]]) {
+; CHECK-NEXT:    [[M0:%.*]] = and i64 [[M]], -4
+; CHECK-NEXT:    [[I0:%.*]] = or i64 [[I]], -4
+; CHECK-NEXT:    [[P0:%.*]] = inttoptr i64 [[I0]] to ptr
+; CHECK-NEXT:    [[R:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr nonnull [[P0]], i64 [[M0]])
+; CHECK-NEXT:    ret ptr [[R]]
+;
+  %m0 = and i64 %m, -4
+  %i0 = or i64 %i, -4
+  %p0 = inttoptr i64 %i0 to ptr
+  %r = call ptr @llvm.ptrmask.p0.i64(ptr %p0, i64 %m0)
+  ret ptr %r
+}
+
+define ptr @ptrmask_is_useless_fail1(i64 %i, i64 %m) {
+; CHECK-LABEL: define ptr @ptrmask_is_useless_fail1
+; CHECK-SAME: (i64 [[I:%.*]], i64 [[M:%.*]]) {
+; CHECK-NEXT:    [[M0:%.*]] = and i64 [[M]], 127
+; CHECK-NEXT:    [[I0:%.*]] = and i64 [[I]], 511
+; CHECK-NEXT:    [[P0:%.*]] = inttoptr i64 [[I0]] to ptr
+; CHECK-NEXT:    [[R:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[P0]], i64 [[M0]])
+; CHECK-NEXT:    ret ptr [[R]]
+;
+  %m0 = and i64 %m, 127
+  %i0 = and i64 %i, 511
+  %p0 = inttoptr i64 %i0 to ptr
+  %r = call ptr @llvm.ptrmask.p0.i64(ptr %p0, i64 %m0)
+  ret ptr %r
+}

>From 50e7b599c5035abfbe4b8bc3f46e695102315fdb Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Fri, 27 Oct 2023 11:35:29 -0500
Subject: [PATCH 4/7] [InstCombine] Implement `SimplifyDemandedBits` for
 `llvm.ptrmask`

Logic basically copies 'and' but we can't return a constant if the
result == rhs (mask) so that case is skipped.
---
 .../InstCombine/InstCombineCalls.cpp          |  5 ++
 .../InstCombine/InstCombineInternal.h         |  1 +
 .../InstCombineSimplifyDemanded.cpp           | 71 ++++++++++++++++---
 llvm/test/Transforms/InstCombine/ptrmask.ll   | 28 +++-----
 4 files changed, 79 insertions(+), 26 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 8acdb7ef4545b3c..dec90562f66815e 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1962,6 +1962,11 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
     break;
   }
   case Intrinsic::ptrmask: {
+    unsigned BitWidth = DL.getPointerTypeSizeInBits(II->getType());
+    KnownBits Known(BitWidth);
+    if (SimplifyDemandedInstructionBits(*II, Known))
+      return II;
+
     Value *InnerPtr, *InnerMask;
     if (match(II->getArgOperand(0),
               m_OneUse(m_Intrinsic<Intrinsic::ptrmask>(m_Value(InnerPtr),
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 01c89ea06f2d9df..34b10220ec88aba 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -544,6 +544,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
   /// Tries to simplify operands to an integer instruction based on its
   /// demanded bits.
   bool SimplifyDemandedInstructionBits(Instruction &Inst);
+  bool SimplifyDemandedInstructionBits(Instruction &Inst, KnownBits &Known);
 
   Value *SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
                                     APInt &UndefElts, unsigned Depth = 0,
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index cd6b017874e8d6c..c1d56a14957a663 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -48,15 +48,20 @@ static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo,
   return true;
 }
 
+/// Returns the bitwidth of the given scalar or pointer type. For vector types,
+/// returns the element type's bitwidth.
+static unsigned getBitWidth(Type *Ty, const DataLayout &DL) {
+  if (unsigned BitWidth = Ty->getScalarSizeInBits())
+    return BitWidth;
 
+  return DL.getPointerTypeSizeInBits(Ty);
+}
 
 /// Inst is an integer instruction that SimplifyDemandedBits knows about. See if
 /// the instruction has any properties that allow us to simplify its operands.
-bool InstCombinerImpl::SimplifyDemandedInstructionBits(Instruction &Inst) {
-  unsigned BitWidth = Inst.getType()->getScalarSizeInBits();
-  KnownBits Known(BitWidth);
-  APInt DemandedMask(APInt::getAllOnes(BitWidth));
-
+bool InstCombinerImpl::SimplifyDemandedInstructionBits(Instruction &Inst,
+                                                       KnownBits &Known) {
+  APInt DemandedMask(APInt::getAllOnes(Known.getBitWidth()));
   Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask, Known,
                                      0, &Inst);
   if (!V) return false;
@@ -65,6 +70,13 @@ bool InstCombinerImpl::SimplifyDemandedInstructionBits(Instruction &Inst) {
   return true;
 }
 
+/// Inst is an integer instruction that SimplifyDemandedBits knows about. See if
+/// the instruction has any properties that allow us to simplify its operands.
+bool InstCombinerImpl::SimplifyDemandedInstructionBits(Instruction &Inst) {
+  KnownBits Known(getBitWidth(Inst.getType(), DL));
+  return SimplifyDemandedInstructionBits(Inst, Known);
+}
+
 /// This form of SimplifyDemandedBits simplifies the specified instruction
 /// operand if possible, updating it in place. It returns true if it made any
 /// change and false otherwise.
@@ -143,7 +155,6 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     return SimplifyMultipleUseDemandedBits(I, DemandedMask, Known, Depth, CxtI);
 
   KnownBits LHSKnown(BitWidth), RHSKnown(BitWidth);
-
   // If this is the root being simplified, allow it to have multiple uses,
   // just set the DemandedMask to all bits so that we can try to simplify the
   // operands.  This allows visitTruncInst (for example) to simplify the
@@ -893,6 +904,48 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
         }
         break;
       }
+      case Intrinsic::ptrmask: {
+        unsigned MaskWidth = I->getOperand(1)->getType()->getScalarSizeInBits();
+        RHSKnown = KnownBits(MaskWidth);
+        // If either the LHS or the RHS are Zero, the result is zero.
+        if (SimplifyDemandedBits(I, 0, DemandedMask, LHSKnown, Depth + 1) ||
+            SimplifyDemandedBits(
+                I, 1, (DemandedMask & ~LHSKnown.Zero).zextOrTrunc(MaskWidth),
+                RHSKnown, Depth + 1))
+          return I;
+
+        // TODO: Should be 1-extend
+        RHSKnown = RHSKnown.anyextOrTrunc(BitWidth);
+        assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?");
+        assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?");
+
+        Known = LHSKnown & RHSKnown;
+        KnownBitsComputed = true;
+
+        // If the client is only demanding bits we know to be zero, return
+        // `llvm.ptrmask(p, 0)`. We can't return `null` here due to pointer
+        // provenance, but making the mask zero will be easily optimizable in
+        // the backend.
+        if (DemandedMask.isSubsetOf(Known.Zero) &&
+            !match(I->getOperand(1), m_Zero()))
+          return replaceOperand(
+              *I, 1, Constant::getNullValue(I->getOperand(1)->getType()));
+
+        // Mask in demanded space does nothing.
+        // NOTE: We may have attributes associated with the return value of the
+        // llvm.ptrmask intrinsic that will be lost when we just return the
+        // operand. We should try to preserve them.
+        if (DemandedMask.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
+          return I->getOperand(0);
+
+        // If the RHS is a constant, see if we can simplify it.
+        if (ShrinkDemandedConstant(
+                I, 1, (DemandedMask & ~LHSKnown.Zero).zextOrTrunc(MaskWidth)))
+          return I;
+
+        break;
+      }
+
       case Intrinsic::fshr:
       case Intrinsic::fshl: {
         const APInt *SA;
@@ -978,8 +1031,10 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
   }
 
   // If the client is only demanding bits that we know, return the known
-  // constant.
-  if (DemandedMask.isSubsetOf(Known.Zero|Known.One))
+  // constant. We can't directly simplify pointers as a constant because of
+  // pointer provanance.
+  // TODO: We could return `(inttoptr const)` for pointers.
+  if (!V->getType()->isPointerTy() && DemandedMask.isSubsetOf(Known.Zero | Known.One))
     return Constant::getIntegerValue(VTy, Known.One);
   return nullptr;
 }
diff --git a/llvm/test/Transforms/InstCombine/ptrmask.ll b/llvm/test/Transforms/InstCombine/ptrmask.ll
index 56aef2d18ed51f0..be8ed0baa0bbb05 100644
--- a/llvm/test/Transforms/InstCombine/ptrmask.ll
+++ b/llvm/test/Transforms/InstCombine/ptrmask.ll
@@ -82,7 +82,7 @@ define ptr @ptrmask_combine_add_nonnull(ptr %p) {
 ; CHECK-SAME: (ptr [[P:%.*]]) {
 ; CHECK-NEXT:    [[PM0:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[P]], i64 -64)
 ; CHECK-NEXT:    [[PGEP:%.*]] = getelementptr i8, ptr [[PM0]], i64 33
-; CHECK-NEXT:    [[R:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PGEP]], i64 -16)
+; CHECK-NEXT:    [[R:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PGEP]], i64 -32)
 ; CHECK-NEXT:    ret ptr [[R]]
 ;
   %pm0 = call ptr @llvm.ptrmask.p0.i64(ptr %p, i64 -64)
@@ -230,7 +230,7 @@ define <2 x i32> @ptrtoint_of_ptrmask_vec_fail(<2 x ptr addrspace(1) > %p, <2 x
 define ptr addrspace(1) @ptrmask_is_null(ptr addrspace(1) align 32 %p) {
 ; CHECK-LABEL: define ptr addrspace(1) @ptrmask_is_null
 ; CHECK-SAME: (ptr addrspace(1) align 32 [[P:%.*]]) {
-; CHECK-NEXT:    [[R:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) [[P]], i32 31)
+; CHECK-NEXT:    [[R:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) [[P]], i32 0)
 ; CHECK-NEXT:    ret ptr addrspace(1) [[R]]
 ;
   %r = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) %p, i32 31)
@@ -250,7 +250,7 @@ define <2 x ptr addrspace(1) > @ptrmask_is_null_vec(<2 x ptr addrspace(1) > alig
 define ptr addrspace(1) @ptrmask_is_null_fail(ptr addrspace(1) align 16 %p) {
 ; CHECK-LABEL: define ptr addrspace(1) @ptrmask_is_null_fail
 ; CHECK-SAME: (ptr addrspace(1) align 16 [[P:%.*]]) {
-; CHECK-NEXT:    [[R:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) [[P]], i32 31)
+; CHECK-NEXT:    [[R:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) [[P]], i32 16)
 ; CHECK-NEXT:    ret ptr addrspace(1) [[R]]
 ;
   %r = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) %p, i32 31)
@@ -290,10 +290,9 @@ define ptr addrspace(1) @ptrmask_maintain_provenance_i32(ptr addrspace(1) %p0) {
 define ptr @ptrmask_is_useless0(i64 %i, i64 %m) {
 ; CHECK-LABEL: define ptr @ptrmask_is_useless0
 ; CHECK-SAME: (i64 [[I:%.*]], i64 [[M:%.*]]) {
-; CHECK-NEXT:    [[M0:%.*]] = and i64 [[M]], -4
 ; CHECK-NEXT:    [[I0:%.*]] = and i64 [[I]], -4
 ; CHECK-NEXT:    [[P0:%.*]] = inttoptr i64 [[I0]] to ptr
-; CHECK-NEXT:    [[R:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[P0]], i64 [[M0]])
+; CHECK-NEXT:    [[R:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[P0]], i64 [[M]])
 ; CHECK-NEXT:    ret ptr [[R]]
 ;
   %m0 = and i64 %m, -4
@@ -306,10 +305,9 @@ define ptr @ptrmask_is_useless0(i64 %i, i64 %m) {
 define ptr @ptrmask_is_useless1(i64 %i, i64 %m) {
 ; CHECK-LABEL: define ptr @ptrmask_is_useless1
 ; CHECK-SAME: (i64 [[I:%.*]], i64 [[M:%.*]]) {
-; CHECK-NEXT:    [[M0:%.*]] = and i64 [[M]], -4
 ; CHECK-NEXT:    [[I0:%.*]] = and i64 [[I]], -8
 ; CHECK-NEXT:    [[P0:%.*]] = inttoptr i64 [[I0]] to ptr
-; CHECK-NEXT:    [[R:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[P0]], i64 [[M0]])
+; CHECK-NEXT:    [[R:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[P0]], i64 [[M]])
 ; CHECK-NEXT:    ret ptr [[R]]
 ;
   %m0 = and i64 %m, -4
@@ -322,10 +320,9 @@ define ptr @ptrmask_is_useless1(i64 %i, i64 %m) {
 define ptr @ptrmask_is_useless2(i64 %i, i64 %m) {
 ; CHECK-LABEL: define ptr @ptrmask_is_useless2
 ; CHECK-SAME: (i64 [[I:%.*]], i64 [[M:%.*]]) {
-; CHECK-NEXT:    [[M0:%.*]] = and i64 [[M]], 127
 ; CHECK-NEXT:    [[I0:%.*]] = and i64 [[I]], 31
 ; CHECK-NEXT:    [[P0:%.*]] = inttoptr i64 [[I0]] to ptr
-; CHECK-NEXT:    [[R:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[P0]], i64 [[M0]])
+; CHECK-NEXT:    [[R:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[P0]], i64 [[M]])
 ; CHECK-NEXT:    ret ptr [[R]]
 ;
   %m0 = and i64 %m, 127
@@ -338,10 +335,9 @@ define ptr @ptrmask_is_useless2(i64 %i, i64 %m) {
 define ptr @ptrmask_is_useless3(i64 %i, i64 %m) {
 ; CHECK-LABEL: define ptr @ptrmask_is_useless3
 ; CHECK-SAME: (i64 [[I:%.*]], i64 [[M:%.*]]) {
-; CHECK-NEXT:    [[M0:%.*]] = and i64 [[M]], 127
 ; CHECK-NEXT:    [[I0:%.*]] = and i64 [[I]], 127
 ; CHECK-NEXT:    [[P0:%.*]] = inttoptr i64 [[I0]] to ptr
-; CHECK-NEXT:    [[R:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[P0]], i64 [[M0]])
+; CHECK-NEXT:    [[R:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[P0]], i64 [[M]])
 ; CHECK-NEXT:    ret ptr [[R]]
 ;
   %m0 = and i64 %m, 127
@@ -354,11 +350,9 @@ define ptr @ptrmask_is_useless3(i64 %i, i64 %m) {
 define ptr @ptrmask_is_useless4(i64 %i, i64 %m) {
 ; CHECK-LABEL: define ptr @ptrmask_is_useless4
 ; CHECK-SAME: (i64 [[I:%.*]], i64 [[M:%.*]]) {
-; CHECK-NEXT:    [[M0:%.*]] = or i64 [[M]], -4
 ; CHECK-NEXT:    [[I0:%.*]] = and i64 [[I]], -4
 ; CHECK-NEXT:    [[P0:%.*]] = inttoptr i64 [[I0]] to ptr
-; CHECK-NEXT:    [[R:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[P0]], i64 [[M0]])
-; CHECK-NEXT:    ret ptr [[R]]
+; CHECK-NEXT:    ret ptr [[P0]]
 ;
   %m0 = or i64 %m, -4
   %i0 = and i64 %i, -4
@@ -370,10 +364,9 @@ define ptr @ptrmask_is_useless4(i64 %i, i64 %m) {
 define <2 x ptr> @ptrmask_is_useless_vec(<2 x i64> %i, <2 x i64> %m) {
 ; CHECK-LABEL: define <2 x ptr> @ptrmask_is_useless_vec
 ; CHECK-SAME: (<2 x i64> [[I:%.*]], <2 x i64> [[M:%.*]]) {
-; CHECK-NEXT:    [[M0:%.*]] = and <2 x i64> [[M]], <i64 127, i64 127>
 ; CHECK-NEXT:    [[I0:%.*]] = and <2 x i64> [[I]], <i64 31, i64 31>
 ; CHECK-NEXT:    [[P0:%.*]] = inttoptr <2 x i64> [[I0]] to <2 x ptr>
-; CHECK-NEXT:    [[R:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P0]], <2 x i64> [[M0]])
+; CHECK-NEXT:    [[R:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P0]], <2 x i64> [[M]])
 ; CHECK-NEXT:    ret <2 x ptr> [[R]]
 ;
   %m0 = and <2 x i64> %m, <i64 127, i64 127>
@@ -386,10 +379,9 @@ define <2 x ptr> @ptrmask_is_useless_vec(<2 x i64> %i, <2 x i64> %m) {
 define <2 x ptr> @ptrmask_is_useless_vec_todo(<2 x i64> %i, <2 x i64> %m) {
 ; CHECK-LABEL: define <2 x ptr> @ptrmask_is_useless_vec_todo
 ; CHECK-SAME: (<2 x i64> [[I:%.*]], <2 x i64> [[M:%.*]]) {
-; CHECK-NEXT:    [[M0:%.*]] = and <2 x i64> [[M]], <i64 127, i64 127>
 ; CHECK-NEXT:    [[I0:%.*]] = and <2 x i64> [[I]], <i64 31, i64 127>
 ; CHECK-NEXT:    [[P0:%.*]] = inttoptr <2 x i64> [[I0]] to <2 x ptr>
-; CHECK-NEXT:    [[R:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P0]], <2 x i64> [[M0]])
+; CHECK-NEXT:    [[R:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P0]], <2 x i64> [[M]])
 ; CHECK-NEXT:    ret <2 x ptr> [[R]]
 ;
   %m0 = and <2 x i64> %m, <i64 127, i64 127>

>From 37df7e31a3d956a294514f0cfc4f0b412a8ec614 Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Fri, 27 Oct 2023 11:35:41 -0500
Subject: [PATCH 5/7] [InstCombine] Deduce `align` and `nonnull` return
 attributes for `llvm.ptrmask`

We can deduce the former based on the mask / incoming pointer
alignment.  We can set the latter based if know the result in non-zero
(this is essentially just caching our analysis result).

Differential Revision: https://reviews.llvm.org/D156636
---
 clang/test/CodeGen/arm64_32-vaarg.c           |  2 +-
 .../InstCombine/InstCombineCalls.cpp          | 22 ++++++++++++
 .../test/Transforms/InstCombine/align-addr.ll |  8 ++---
 .../InstCombine/consecutive-ptrmask.ll        |  2 +-
 llvm/test/Transforms/InstCombine/ptrmask.ll   | 36 +++++++++----------
 5 files changed, 46 insertions(+), 24 deletions(-)

diff --git a/clang/test/CodeGen/arm64_32-vaarg.c b/clang/test/CodeGen/arm64_32-vaarg.c
index 9fbcf88ecfdcc33..3f1f4443436da15 100644
--- a/clang/test/CodeGen/arm64_32-vaarg.c
+++ b/clang/test/CodeGen/arm64_32-vaarg.c
@@ -29,7 +29,7 @@ long long test_longlong(OneLongLong input, va_list *mylist) {
   // CHECK-LABEL: define{{.*}} i64 @test_longlong(i64 %input
   // CHECK: [[STARTPTR:%.*]] = load ptr, ptr %mylist
   // CHECK: [[ALIGN_TMP:%.+]] = getelementptr inbounds i8, ptr [[STARTPTR]], i32 7
-  // CHECK: [[ALIGNED_ADDR:%.+]] = tail call ptr @llvm.ptrmask.p0.i32(ptr nonnull [[ALIGN_TMP]], i32 -8)
+  // CHECK: [[ALIGNED_ADDR:%.+]] = tail call align 8 ptr @llvm.ptrmask.p0.i32(ptr nonnull [[ALIGN_TMP]], i32 -8)
   // CHECK: [[NEXT:%.*]] = getelementptr inbounds i8, ptr [[ALIGNED_ADDR]], i32 8
   // CHECK: store ptr [[NEXT]], ptr %mylist
 
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index dec90562f66815e..9ca8869f9357d7e 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1978,6 +1978,28 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
           *II, Builder.CreateIntrinsic(InnerPtr->getType(), Intrinsic::ptrmask,
                                        {InnerPtr, NewMask}));
     }
+    bool Changed = false;
+    // See if we can deduce non-null.
+    if (!CI.hasRetAttr(Attribute::NonNull) &&
+        (Known.isNonZero() ||
+         isKnownNonZero(II, DL, /*Depth*/ 0, &AC, II, &DT))) {
+      CI.addRetAttr(Attribute::NonNull);
+      Changed = true;
+    }
+
+    unsigned NewAlignmentLog =
+        std::min(Value::MaxAlignmentExponent,
+                 std::min(BitWidth - 1, Known.countMinTrailingZeros()));
+    // Known bits will capture if we had alignment information associated with
+    // the pointer argument.
+    if (NewAlignmentLog > Log2(CI.getRetAlign().valueOrOne())) {
+      CI.removeRetAttr(Attribute::Alignment);
+      CI.addRetAttr(Attribute::getWithAlignment(
+          CI.getContext(), Align(uint64_t(1) << NewAlignmentLog)));
+      Changed = true;
+    }
+    if (Changed)
+      return &CI;
     break;
   }
   case Intrinsic::uadd_with_overflow:
diff --git a/llvm/test/Transforms/InstCombine/align-addr.ll b/llvm/test/Transforms/InstCombine/align-addr.ll
index 1e49cddf7ffe79d..facb5df08a82f43 100644
--- a/llvm/test/Transforms/InstCombine/align-addr.ll
+++ b/llvm/test/Transforms/InstCombine/align-addr.ll
@@ -135,7 +135,7 @@ define <16 x i8> @ptrmask_align_unknown_ptr_align1(ptr align 1 %ptr, i64 %mask)
 
 define <16 x i8> @ptrmask_align_unknown_ptr_align8(ptr align 8 %ptr, i64 %mask) {
 ; CHECK-LABEL: @ptrmask_align_unknown_ptr_align8(
-; CHECK-NEXT:    [[ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 [[MASK:%.*]])
+; CHECK-NEXT:    [[ALIGNED:%.*]] = call align 8 ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 [[MASK:%.*]])
 ; CHECK-NEXT:    [[LOAD:%.*]] = load <16 x i8>, ptr [[ALIGNED]], align 1
 ; CHECK-NEXT:    ret <16 x i8> [[LOAD]]
 ;
@@ -147,7 +147,7 @@ define <16 x i8> @ptrmask_align_unknown_ptr_align8(ptr align 8 %ptr, i64 %mask)
 ; Increase load align from 1 to 2
 define <16 x i8> @ptrmask_align2_ptr_align1(ptr align 1 %ptr) {
 ; CHECK-LABEL: @ptrmask_align2_ptr_align1(
-; CHECK-NEXT:    [[ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -2)
+; CHECK-NEXT:    [[ALIGNED:%.*]] = call align 2 ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -2)
 ; CHECK-NEXT:    [[LOAD:%.*]] = load <16 x i8>, ptr [[ALIGNED]], align 1
 ; CHECK-NEXT:    ret <16 x i8> [[LOAD]]
 ;
@@ -159,7 +159,7 @@ define <16 x i8> @ptrmask_align2_ptr_align1(ptr align 1 %ptr) {
 ; Increase load align from 1 to 4
 define <16 x i8> @ptrmask_align4_ptr_align1(ptr align 1 %ptr) {
 ; CHECK-LABEL: @ptrmask_align4_ptr_align1(
-; CHECK-NEXT:    [[ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -4)
+; CHECK-NEXT:    [[ALIGNED:%.*]] = call align 4 ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -4)
 ; CHECK-NEXT:    [[LOAD:%.*]] = load <16 x i8>, ptr [[ALIGNED]], align 1
 ; CHECK-NEXT:    ret <16 x i8> [[LOAD]]
 ;
@@ -171,7 +171,7 @@ define <16 x i8> @ptrmask_align4_ptr_align1(ptr align 1 %ptr) {
 ; Increase load align from 1 to 8
 define <16 x i8> @ptrmask_align8_ptr_align1(ptr align 1 %ptr) {
 ; CHECK-LABEL: @ptrmask_align8_ptr_align1(
-; CHECK-NEXT:    [[ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -8)
+; CHECK-NEXT:    [[ALIGNED:%.*]] = call align 8 ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -8)
 ; CHECK-NEXT:    [[LOAD:%.*]] = load <16 x i8>, ptr [[ALIGNED]], align 1
 ; CHECK-NEXT:    ret <16 x i8> [[LOAD]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/consecutive-ptrmask.ll b/llvm/test/Transforms/InstCombine/consecutive-ptrmask.ll
index a4722c15614092b..22a7c0c072d9635 100644
--- a/llvm/test/Transforms/InstCombine/consecutive-ptrmask.ll
+++ b/llvm/test/Transforms/InstCombine/consecutive-ptrmask.ll
@@ -63,7 +63,7 @@ define <2 x ptr> @fold_2x_vec_i64(<2 x ptr> %p, <2 x i64> %m0) {
 ; CHECK-LABEL: define <2 x ptr> @fold_2x_vec_i64
 ; CHECK-SAME: (<2 x ptr> [[P:%.*]], <2 x i64> [[M0:%.*]]) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i64> [[M0]], <i64 -2, i64 -2>
-; CHECK-NEXT:    [[P1:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> [[TMP1]])
+; CHECK-NEXT:    [[P1:%.*]] = call align 2 <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> [[TMP1]])
 ; CHECK-NEXT:    ret <2 x ptr> [[P1]]
 ;
   %p0 = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> %p, <2 x i64> %m0)
diff --git a/llvm/test/Transforms/InstCombine/ptrmask.ll b/llvm/test/Transforms/InstCombine/ptrmask.ll
index be8ed0baa0bbb05..9c634eb03cec3aa 100644
--- a/llvm/test/Transforms/InstCombine/ptrmask.ll
+++ b/llvm/test/Transforms/InstCombine/ptrmask.ll
@@ -12,7 +12,7 @@ define ptr @ptrmask_combine_consecutive_preserve_attrs(ptr %p0, i64 %m1) {
 ; CHECK-LABEL: define ptr @ptrmask_combine_consecutive_preserve_attrs
 ; CHECK-SAME: (ptr [[P0:%.*]], i64 [[M1:%.*]]) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[M1]], 224
-; CHECK-NEXT:    [[R:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[P0]], i64 [[TMP1]])
+; CHECK-NEXT:    [[R:%.*]] = call align 32 ptr @llvm.ptrmask.p0.i64(ptr [[P0]], i64 [[TMP1]])
 ; CHECK-NEXT:    ret ptr [[R]]
 ;
   %pm0 = call ptr @llvm.ptrmask.p0.i64(ptr %p0, i64 224)
@@ -47,7 +47,7 @@ define ptr @ptrmask_combine_consecutive_preserve_attrs_fail(ptr %p0, i64 %m0) {
 define ptr @ptrmask_combine_consecutive_preserve_attrs_todo0(ptr %p0) {
 ; CHECK-LABEL: define ptr @ptrmask_combine_consecutive_preserve_attrs_todo0
 ; CHECK-SAME: (ptr [[P0:%.*]]) {
-; CHECK-NEXT:    [[PM0:%.*]] = call noalias ptr @llvm.ptrmask.p0.i64(ptr [[P0]], i64 224)
+; CHECK-NEXT:    [[PM0:%.*]] = call noalias align 32 ptr @llvm.ptrmask.p0.i64(ptr [[P0]], i64 224)
 ; CHECK-NEXT:    ret ptr [[PM0]]
 ;
   %pm0 = call noalias ptr @llvm.ptrmask.p0.i64(ptr %p0, i64 224)
@@ -58,7 +58,7 @@ define ptr @ptrmask_combine_consecutive_preserve_attrs_todo0(ptr %p0) {
 define ptr @ptrmask_combine_consecutive_preserve_attrs_todo1(ptr %p0) {
 ; CHECK-LABEL: define ptr @ptrmask_combine_consecutive_preserve_attrs_todo1
 ; CHECK-SAME: (ptr [[P0:%.*]]) {
-; CHECK-NEXT:    [[PM0:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[P0]], i64 224)
+; CHECK-NEXT:    [[PM0:%.*]] = call align 32 ptr @llvm.ptrmask.p0.i64(ptr [[P0]], i64 224)
 ; CHECK-NEXT:    ret ptr [[PM0]]
 ;
   %pm0 = call ptr @llvm.ptrmask.p0.i64(ptr %p0, i64 224)
@@ -69,7 +69,7 @@ define ptr @ptrmask_combine_consecutive_preserve_attrs_todo1(ptr %p0) {
 define ptr addrspace(1) @ptrmask_combine_consecutive_preserve_attrs_todo2(ptr addrspace(1) %p0) {
 ; CHECK-LABEL: define ptr addrspace(1) @ptrmask_combine_consecutive_preserve_attrs_todo2
 ; CHECK-SAME: (ptr addrspace(1) [[P0:%.*]]) {
-; CHECK-NEXT:    [[R:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) [[P0]], i32 224)
+; CHECK-NEXT:    [[R:%.*]] = call align 32 ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) [[P0]], i32 224)
 ; CHECK-NEXT:    ret ptr addrspace(1) [[R]]
 ;
   %pm0 = call noalias ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) %p0, i32 224)
@@ -80,9 +80,9 @@ define ptr addrspace(1) @ptrmask_combine_consecutive_preserve_attrs_todo2(ptr ad
 define ptr @ptrmask_combine_add_nonnull(ptr %p) {
 ; CHECK-LABEL: define ptr @ptrmask_combine_add_nonnull
 ; CHECK-SAME: (ptr [[P:%.*]]) {
-; CHECK-NEXT:    [[PM0:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[P]], i64 -64)
+; CHECK-NEXT:    [[PM0:%.*]] = call align 64 ptr @llvm.ptrmask.p0.i64(ptr [[P]], i64 -64)
 ; CHECK-NEXT:    [[PGEP:%.*]] = getelementptr i8, ptr [[PM0]], i64 33
-; CHECK-NEXT:    [[R:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PGEP]], i64 -32)
+; CHECK-NEXT:    [[R:%.*]] = call nonnull align 32 ptr @llvm.ptrmask.p0.i64(ptr [[PGEP]], i64 -32)
 ; CHECK-NEXT:    ret ptr [[R]]
 ;
   %pm0 = call ptr @llvm.ptrmask.p0.i64(ptr %p, i64 -64)
@@ -94,7 +94,7 @@ define ptr @ptrmask_combine_add_nonnull(ptr %p) {
 define ptr @ptrmask_combine_add_alignment(ptr %p) {
 ; CHECK-LABEL: define ptr @ptrmask_combine_add_alignment
 ; CHECK-SAME: (ptr [[P:%.*]]) {
-; CHECK-NEXT:    [[R:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[P]], i64 -64)
+; CHECK-NEXT:    [[R:%.*]] = call align 64 ptr @llvm.ptrmask.p0.i64(ptr [[P]], i64 -64)
 ; CHECK-NEXT:    ret ptr [[R]]
 ;
   %r = call ptr @llvm.ptrmask.p0.i64(ptr %p, i64 -64)
@@ -104,7 +104,7 @@ define ptr @ptrmask_combine_add_alignment(ptr %p) {
 define ptr addrspace(1) @ptrmask_combine_add_alignment2(ptr addrspace(1) align 32 %p) {
 ; CHECK-LABEL: define ptr addrspace(1) @ptrmask_combine_add_alignment2
 ; CHECK-SAME: (ptr addrspace(1) align 32 [[P:%.*]]) {
-; CHECK-NEXT:    [[R:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) [[P]], i32 -64)
+; CHECK-NEXT:    [[R:%.*]] = call align 64 ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) [[P]], i32 -64)
 ; CHECK-NEXT:    ret ptr addrspace(1) [[R]]
 ;
   %r = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) %p, i32 -64)
@@ -114,7 +114,7 @@ define ptr addrspace(1) @ptrmask_combine_add_alignment2(ptr addrspace(1) align 3
 define <2 x ptr> @ptrmask_combine_add_alignment_vec(<2 x ptr> %p) {
 ; CHECK-LABEL: define <2 x ptr> @ptrmask_combine_add_alignment_vec
 ; CHECK-SAME: (<2 x ptr> [[P:%.*]]) {
-; CHECK-NEXT:    [[R:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> <i64 -96, i64 -96>)
+; CHECK-NEXT:    [[R:%.*]] = call align 32 <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> <i64 -96, i64 -96>)
 ; CHECK-NEXT:    ret <2 x ptr> [[R]]
 ;
   %r = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> %p, <2 x i64> <i64 -96, i64 -96>)
@@ -124,7 +124,7 @@ define <2 x ptr> @ptrmask_combine_add_alignment_vec(<2 x ptr> %p) {
 define ptr addrspace(1) @ptrmask_combine_improve_alignment(ptr addrspace(1) %p) {
 ; CHECK-LABEL: define ptr addrspace(1) @ptrmask_combine_improve_alignment
 ; CHECK-SAME: (ptr addrspace(1) [[P:%.*]]) {
-; CHECK-NEXT:    [[R:%.*]] = call align 32 ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) [[P]], i32 -64)
+; CHECK-NEXT:    [[R:%.*]] = call align 64 ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) [[P]], i32 -64)
 ; CHECK-NEXT:    ret ptr addrspace(1) [[R]]
 ;
   %r = call align 32 ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) %p, i32 -64)
@@ -134,7 +134,7 @@ define ptr addrspace(1) @ptrmask_combine_improve_alignment(ptr addrspace(1) %p)
 define <2 x ptr addrspace(1) > @ptrmask_combine_improve_alignment_vec(<2 x ptr addrspace(1) > %p) {
 ; CHECK-LABEL: define <2 x ptr addrspace(1)> @ptrmask_combine_improve_alignment_vec
 ; CHECK-SAME: (<2 x ptr addrspace(1)> [[P:%.*]]) {
-; CHECK-NEXT:    [[R:%.*]] = call align 32 <2 x ptr addrspace(1)> @llvm.ptrmask.v2p1.v2i32(<2 x ptr addrspace(1)> [[P]], <2 x i32> <i32 -64, i32 -128>)
+; CHECK-NEXT:    [[R:%.*]] = call align 64 <2 x ptr addrspace(1)> @llvm.ptrmask.v2p1.v2i32(<2 x ptr addrspace(1)> [[P]], <2 x i32> <i32 -64, i32 -128>)
 ; CHECK-NEXT:    ret <2 x ptr addrspace(1)> [[R]]
 ;
   %r = call align 32 <2 x ptr addrspace(1) > @llvm.ptrmask.v2p1.v2i32(<2 x ptr addrspace(1) > %p, <2 x i32> <i32 -64, i32 -128>)
@@ -230,7 +230,7 @@ define <2 x i32> @ptrtoint_of_ptrmask_vec_fail(<2 x ptr addrspace(1) > %p, <2 x
 define ptr addrspace(1) @ptrmask_is_null(ptr addrspace(1) align 32 %p) {
 ; CHECK-LABEL: define ptr addrspace(1) @ptrmask_is_null
 ; CHECK-SAME: (ptr addrspace(1) align 32 [[P:%.*]]) {
-; CHECK-NEXT:    [[R:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) [[P]], i32 0)
+; CHECK-NEXT:    [[R:%.*]] = call align 4294967296 ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) [[P]], i32 0)
 ; CHECK-NEXT:    ret ptr addrspace(1) [[R]]
 ;
   %r = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) %p, i32 31)
@@ -250,7 +250,7 @@ define <2 x ptr addrspace(1) > @ptrmask_is_null_vec(<2 x ptr addrspace(1) > alig
 define ptr addrspace(1) @ptrmask_is_null_fail(ptr addrspace(1) align 16 %p) {
 ; CHECK-LABEL: define ptr addrspace(1) @ptrmask_is_null_fail
 ; CHECK-SAME: (ptr addrspace(1) align 16 [[P:%.*]]) {
-; CHECK-NEXT:    [[R:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) [[P]], i32 16)
+; CHECK-NEXT:    [[R:%.*]] = call align 16 ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) [[P]], i32 16)
 ; CHECK-NEXT:    ret ptr addrspace(1) [[R]]
 ;
   %r = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) %p, i32 31)
@@ -270,7 +270,7 @@ define <2 x ptr addrspace(1) > @ptrmask_is_null_vec_fail(<2 x ptr addrspace(1) >
 define ptr @ptrmask_maintain_provenance_i64(ptr %p0) {
 ; CHECK-LABEL: define ptr @ptrmask_maintain_provenance_i64
 ; CHECK-SAME: (ptr [[P0:%.*]]) {
-; CHECK-NEXT:    [[R:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[P0]], i64 0)
+; CHECK-NEXT:    [[R:%.*]] = call align 4294967296 ptr @llvm.ptrmask.p0.i64(ptr [[P0]], i64 0)
 ; CHECK-NEXT:    ret ptr [[R]]
 ;
   %r = call ptr @llvm.ptrmask.p0.i64(ptr %p0, i64 0)
@@ -280,7 +280,7 @@ define ptr @ptrmask_maintain_provenance_i64(ptr %p0) {
 define ptr addrspace(1) @ptrmask_maintain_provenance_i32(ptr addrspace(1) %p0) {
 ; CHECK-LABEL: define ptr addrspace(1) @ptrmask_maintain_provenance_i32
 ; CHECK-SAME: (ptr addrspace(1) [[P0:%.*]]) {
-; CHECK-NEXT:    [[R:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) [[P0]], i32 0)
+; CHECK-NEXT:    [[R:%.*]] = call align 4294967296 ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) [[P0]], i32 0)
 ; CHECK-NEXT:    ret ptr addrspace(1) [[R]]
 ;
   %r = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) %p0, i32 0)
@@ -292,7 +292,7 @@ define ptr @ptrmask_is_useless0(i64 %i, i64 %m) {
 ; CHECK-SAME: (i64 [[I:%.*]], i64 [[M:%.*]]) {
 ; CHECK-NEXT:    [[I0:%.*]] = and i64 [[I]], -4
 ; CHECK-NEXT:    [[P0:%.*]] = inttoptr i64 [[I0]] to ptr
-; CHECK-NEXT:    [[R:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[P0]], i64 [[M]])
+; CHECK-NEXT:    [[R:%.*]] = call align 4 ptr @llvm.ptrmask.p0.i64(ptr [[P0]], i64 [[M]])
 ; CHECK-NEXT:    ret ptr [[R]]
 ;
   %m0 = and i64 %m, -4
@@ -307,7 +307,7 @@ define ptr @ptrmask_is_useless1(i64 %i, i64 %m) {
 ; CHECK-SAME: (i64 [[I:%.*]], i64 [[M:%.*]]) {
 ; CHECK-NEXT:    [[I0:%.*]] = and i64 [[I]], -8
 ; CHECK-NEXT:    [[P0:%.*]] = inttoptr i64 [[I0]] to ptr
-; CHECK-NEXT:    [[R:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[P0]], i64 [[M]])
+; CHECK-NEXT:    [[R:%.*]] = call align 8 ptr @llvm.ptrmask.p0.i64(ptr [[P0]], i64 [[M]])
 ; CHECK-NEXT:    ret ptr [[R]]
 ;
   %m0 = and i64 %m, -4
@@ -397,7 +397,7 @@ define ptr @ptrmask_is_useless_fail0(i64 %i, i64 %m) {
 ; CHECK-NEXT:    [[M0:%.*]] = and i64 [[M]], -4
 ; CHECK-NEXT:    [[I0:%.*]] = or i64 [[I]], -4
 ; CHECK-NEXT:    [[P0:%.*]] = inttoptr i64 [[I0]] to ptr
-; CHECK-NEXT:    [[R:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr nonnull [[P0]], i64 [[M0]])
+; CHECK-NEXT:    [[R:%.*]] = call align 4 ptr @llvm.ptrmask.p0.i64(ptr nonnull [[P0]], i64 [[M0]])
 ; CHECK-NEXT:    ret ptr [[R]]
 ;
   %m0 = and i64 %m, -4

>From 3b1ac280b960af129278fb4e33aaee33e3d3780c Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Fri, 22 Sep 2023 08:21:53 -0500
Subject: [PATCH 6/7] [InstCombine] Preserve return attributes when merging
 `llvm.ptrmask`

If we have assosiated attributes i.e `([ret_attrs] (ptrmask (ptrmask
p0, m0), m1))` we should preserve `[ret_attrs]` when combining the two
`llvm.ptrmask`s.

Differential Revision: https://reviews.llvm.org/D156638
---
 .../Transforms/InstCombine/InstCombineCalls.cpp    | 14 ++++++++++----
 llvm/test/Transforms/InstCombine/ptrmask.ll        |  4 ++--
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 9ca8869f9357d7e..ff2d92df653751a 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1968,17 +1968,23 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
       return II;
 
     Value *InnerPtr, *InnerMask;
+    bool Changed = false;
+    // Combine:
+    // (ptrmask (ptrmask p, A), B)
+    //    -> (ptrmask p, (and A, B))
     if (match(II->getArgOperand(0),
               m_OneUse(m_Intrinsic<Intrinsic::ptrmask>(m_Value(InnerPtr),
                                                        m_Value(InnerMask))))) {
       assert(II->getArgOperand(1)->getType() == InnerMask->getType() &&
              "Mask types must match");
+      // TODO: If InnerMask == Op1, we could copy attributes from inner
+      // callsite -> outer callsite.
       Value *NewMask = Builder.CreateAnd(II->getArgOperand(1), InnerMask);
-      return replaceInstUsesWith(
-          *II, Builder.CreateIntrinsic(InnerPtr->getType(), Intrinsic::ptrmask,
-                                       {InnerPtr, NewMask}));
+      replaceOperand(CI, 0, InnerPtr);
+      replaceOperand(CI, 1, NewMask);
+      Changed = true;
     }
-    bool Changed = false;
+
     // See if we can deduce non-null.
     if (!CI.hasRetAttr(Attribute::NonNull) &&
         (Known.isNonZero() ||
diff --git a/llvm/test/Transforms/InstCombine/ptrmask.ll b/llvm/test/Transforms/InstCombine/ptrmask.ll
index 9c634eb03cec3aa..4e791b968e1acef 100644
--- a/llvm/test/Transforms/InstCombine/ptrmask.ll
+++ b/llvm/test/Transforms/InstCombine/ptrmask.ll
@@ -12,7 +12,7 @@ define ptr @ptrmask_combine_consecutive_preserve_attrs(ptr %p0, i64 %m1) {
 ; CHECK-LABEL: define ptr @ptrmask_combine_consecutive_preserve_attrs
 ; CHECK-SAME: (ptr [[P0:%.*]], i64 [[M1:%.*]]) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[M1]], 224
-; CHECK-NEXT:    [[R:%.*]] = call align 32 ptr @llvm.ptrmask.p0.i64(ptr [[P0]], i64 [[TMP1]])
+; CHECK-NEXT:    [[R:%.*]] = call noalias align 32 ptr @llvm.ptrmask.p0.i64(ptr [[P0]], i64 [[TMP1]])
 ; CHECK-NEXT:    ret ptr [[R]]
 ;
   %pm0 = call ptr @llvm.ptrmask.p0.i64(ptr %p0, i64 224)
@@ -24,7 +24,7 @@ define <2 x ptr> @ptrmask_combine_consecutive_preserve_attrs_vecs(<2 x ptr> %p0,
 ; CHECK-LABEL: define <2 x ptr> @ptrmask_combine_consecutive_preserve_attrs_vecs
 ; CHECK-SAME: (<2 x ptr> [[P0:%.*]], <2 x i64> [[M1:%.*]]) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i64> [[M1]], <i64 12345, i64 12345>
-; CHECK-NEXT:    [[R:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P0]], <2 x i64> [[TMP1]])
+; CHECK-NEXT:    [[R:%.*]] = call align 128 <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P0]], <2 x i64> [[TMP1]])
 ; CHECK-NEXT:    ret <2 x ptr> [[R]]
 ;
   %pm0 = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> %p0, <2 x i64> <i64 12345, i64 12345>)

>From 7b835e92c07e237a1c2b2213802d14fd0d73ea5f Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Fri, 22 Sep 2023 08:22:01 -0500
Subject: [PATCH 7/7] [InstCombine] Fold `(ptrtoint (ptrmask p0, m0))` -> `(and
 (ptrtoint p0), m0)`

`and` is generally more supported so if we have a `ptrmask` anyways
might as well use `and`.

Differential Revision: https://reviews.llvm.org/D156640
---
 .../InstCombine/InstCombineCasts.cpp          |  9 +++++++++
 llvm/test/Transforms/InstCombine/ptrmask.ll   | 20 +++++++++----------
 2 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 2285a91cbdf2bb5..e069847de54ea10 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -1933,6 +1933,15 @@ Instruction *InstCombinerImpl::visitPtrToInt(PtrToIntInst &CI) {
     return CastInst::CreateIntegerCast(P, Ty, /*isSigned=*/false);
   }
 
+  // (ptrtoint (ptrmask P, M))
+  //    -> (and (ptrtoint P), M)
+  // This is generally beneficial as `and` is better supported than `ptrmask`.
+  Value *Ptr, *Mask;
+  if (match(SrcOp, m_OneUse(m_Intrinsic<Intrinsic::ptrmask>(m_Value(Ptr),
+                                                            m_Value(Mask)))) &&
+      Mask->getType() == Ty)
+    return BinaryOperator::CreateAnd(Builder.CreatePtrToInt(Ptr, Ty), Mask);
+
   if (auto *GEP = dyn_cast<GetElementPtrInst>(SrcOp)) {
     // Fold ptrtoint(gep null, x) to multiply + constant if the GEP has one use.
     // While this can increase the number of instructions it doesn't actually
diff --git a/llvm/test/Transforms/InstCombine/ptrmask.ll b/llvm/test/Transforms/InstCombine/ptrmask.ll
index 4e791b968e1acef..735abe614b0ad75 100644
--- a/llvm/test/Transforms/InstCombine/ptrmask.ll
+++ b/llvm/test/Transforms/InstCombine/ptrmask.ll
@@ -154,8 +154,8 @@ define ptr addrspace(1) @ptrmask_combine_improve_alignment_fail(ptr addrspace(1)
 define i64 @ptrtoint_of_ptrmask(ptr %p, i64 %m) {
 ; CHECK-LABEL: define i64 @ptrtoint_of_ptrmask
 ; CHECK-SAME: (ptr [[P:%.*]], i64 [[M:%.*]]) {
-; CHECK-NEXT:    [[PM:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[P]], i64 [[M]])
-; CHECK-NEXT:    [[R:%.*]] = ptrtoint ptr [[PM]] to i64
+; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[P]] to i64
+; CHECK-NEXT:    [[R:%.*]] = and i64 [[TMP1]], [[M]]
 ; CHECK-NEXT:    ret i64 [[R]]
 ;
   %pm = call ptr @llvm.ptrmask.p0.i64(ptr %p, i64 %m)
@@ -167,9 +167,9 @@ define i64 @ptrtoint_of_ptrmask(ptr %p, i64 %m) {
 define i32 @ptrtoint_of_ptrmask2(ptr %p, i64 %m) {
 ; CHECK-LABEL: define i32 @ptrtoint_of_ptrmask2
 ; CHECK-SAME: (ptr [[P:%.*]], i64 [[M:%.*]]) {
-; CHECK-NEXT:    [[PM:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[P]], i64 [[M]])
-; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[PM]] to i64
-; CHECK-NEXT:    [[R:%.*]] = trunc i64 [[TMP1]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[P]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = and i64 [[TMP1]], [[M]]
+; CHECK-NEXT:    [[R:%.*]] = trunc i64 [[TMP2]] to i32
 ; CHECK-NEXT:    ret i32 [[R]]
 ;
   %pm = call ptr @llvm.ptrmask.p0.i64(ptr %p, i64 %m)
@@ -180,8 +180,8 @@ define i32 @ptrtoint_of_ptrmask2(ptr %p, i64 %m) {
 define <2 x i64> @ptrtoint_of_ptrmask_vec(<2 x ptr> %p, <2 x i64> %m) {
 ; CHECK-LABEL: define <2 x i64> @ptrtoint_of_ptrmask_vec
 ; CHECK-SAME: (<2 x ptr> [[P:%.*]], <2 x i64> [[M:%.*]]) {
-; CHECK-NEXT:    [[PM:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> [[M]])
-; CHECK-NEXT:    [[R:%.*]] = ptrtoint <2 x ptr> [[PM]] to <2 x i64>
+; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint <2 x ptr> [[P]] to <2 x i64>
+; CHECK-NEXT:    [[R:%.*]] = and <2 x i64> [[TMP1]], [[M]]
 ; CHECK-NEXT:    ret <2 x i64> [[R]]
 ;
   %pm = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> %p, <2 x i64> %m)
@@ -192,9 +192,9 @@ define <2 x i64> @ptrtoint_of_ptrmask_vec(<2 x ptr> %p, <2 x i64> %m) {
 define <2 x i32> @ptrtoint_of_ptrmask_vec2(<2 x ptr> %p, <2 x i64> %m) {
 ; CHECK-LABEL: define <2 x i32> @ptrtoint_of_ptrmask_vec2
 ; CHECK-SAME: (<2 x ptr> [[P:%.*]], <2 x i64> [[M:%.*]]) {
-; CHECK-NEXT:    [[PM:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> [[M]])
-; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint <2 x ptr> [[PM]] to <2 x i64>
-; CHECK-NEXT:    [[R:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32>
+; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint <2 x ptr> [[P]] to <2 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i64> [[TMP1]], [[M]]
+; CHECK-NEXT:    [[R:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 ;
   %pm = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> %p, <2 x i64> %m)



More information about the cfe-commits mailing list