[llvm] [InstCombine] Improve bitfield addition (PR #77184)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Jul 21 17:43:18 PDT 2024
https://github.com/ParkHanbum updated https://github.com/llvm/llvm-project/pull/77184
>From 97e293fae64385886ea0c296e4ccb9b3d8a49a74 Mon Sep 17 00:00:00 2001
From: Hanbum Park <kese111 at gmail.com>
Date: Sat, 6 Jan 2024 16:44:19 +0900
Subject: [PATCH 1/4] [InstCombine] Add test for improving bitfield addition
(#33874)
Proof: https://alive2.llvm.org/ce/z/RUL3YU
Fixes #33874
---
llvm/test/Transforms/InstCombine/or.ll | 408 +++++++++++++++++++++++++
1 file changed, 408 insertions(+)
diff --git a/llvm/test/Transforms/InstCombine/or.ll b/llvm/test/Transforms/InstCombine/or.ll
index 6e2085a8bb6c7..5e1a4aa895f61 100644
--- a/llvm/test/Transforms/InstCombine/or.ll
+++ b/llvm/test/Transforms/InstCombine/or.ll
@@ -2029,3 +2029,411 @@ define i32 @or_xor_and_commuted3(i32 %x, i32 %y, i32 %z) {
%or1 = or i32 %xor, %yy
ret i32 %or1
}
+
+; test or disjoint which used for BitField Arithmetic.
+; Positive
+define i8 @src_2_bitfield_op(i8 %x, i8 %y) {
+; CHECK-LABEL: @src_2_bitfield_op(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[NARROW:%.*]] = add i8 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT: [[BF_VALUE:%.*]] = and i8 [[NARROW]], 7
+; CHECK-NEXT: [[BF_LSHR:%.*]] = and i8 [[X]], 24
+; CHECK-NEXT: [[BF_LSHR1228:%.*]] = add i8 [[BF_LSHR]], [[Y]]
+; CHECK-NEXT: [[BF_SHL:%.*]] = and i8 [[BF_LSHR1228]], 24
+; CHECK-NEXT: [[BF_SET20:%.*]] = or disjoint i8 [[BF_VALUE]], [[BF_SHL]]
+; CHECK-NEXT: ret i8 [[BF_SET20]]
+;
+entry:
+ %narrow = add i8 %y, %x
+ %bf.value = and i8 %narrow, 7
+ %bf.lshr = and i8 %x, 24
+ %bf.lshr1228 = add i8 %bf.lshr, %y
+ %bf.shl = and i8 %bf.lshr1228, 24
+ %bf.set20 = or disjoint i8 %bf.value, %bf.shl
+ ret i8 %bf.set20
+}
+
+define i8 @src_2_bitfield_const(i8 %x) {
+; CHECK-LABEL: @src_2_bitfield_const(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[NARROW:%.*]] = add i8 [[X:%.*]], 1
+; CHECK-NEXT: [[BF_VALUE:%.*]] = and i8 [[NARROW]], 7
+; CHECK-NEXT: [[BF_LSHR1228:%.*]] = add i8 [[X]], 8
+; CHECK-NEXT: [[BF_SHL:%.*]] = and i8 [[BF_LSHR1228]], 24
+; CHECK-NEXT: [[BF_SET20:%.*]] = or disjoint i8 [[BF_VALUE]], [[BF_SHL]]
+; CHECK-NEXT: ret i8 [[BF_SET20]]
+;
+entry:
+ %narrow = add i8 %x, 1
+ %bf.value = and i8 %narrow, 7
+ %bf.lshr = and i8 %x, 24
+ %bf.lshr1228 = add i8 %bf.lshr, 8
+ %bf.shl = and i8 %bf.lshr1228, 24
+ %bf.set20 = or disjoint i8 %bf.value, %bf.shl
+ ret i8 %bf.set20
+}
+
+define i8 @src_3_bitfield_op(i8 %x, i8 %y) {
+; CHECK-LABEL: @src_3_bitfield_op(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[NARROW:%.*]] = add i8 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT: [[BF_VALUE:%.*]] = and i8 [[NARROW]], 7
+; CHECK-NEXT: [[BF_LSHR:%.*]] = and i8 [[X]], 24
+; CHECK-NEXT: [[BF_LSHR1244:%.*]] = add i8 [[BF_LSHR]], [[Y]]
+; CHECK-NEXT: [[BF_SHL:%.*]] = and i8 [[BF_LSHR1244]], 24
+; CHECK-NEXT: [[BF_SET20:%.*]] = or disjoint i8 [[BF_VALUE]], [[BF_SHL]]
+; CHECK-NEXT: [[BF_LSHR22:%.*]] = and i8 [[X]], -32
+; CHECK-NEXT: [[BF_LSHR2547:%.*]] = add i8 [[BF_LSHR22]], [[Y]]
+; CHECK-NEXT: [[BF_VALUE30:%.*]] = and i8 [[BF_LSHR2547]], -32
+; CHECK-NEXT: [[BF_SET33:%.*]] = or disjoint i8 [[BF_SET20]], [[BF_VALUE30]]
+; CHECK-NEXT: ret i8 [[BF_SET33]]
+;
+entry:
+ %narrow = add i8 %y, %x
+ %bf.value = and i8 %narrow, 7
+ %bf.lshr = and i8 %x, 24
+ %bf.lshr1244 = add i8 %bf.lshr, %y
+ %bf.shl = and i8 %bf.lshr1244, 24
+ %bf.set20 = or disjoint i8 %bf.value, %bf.shl
+ %bf.lshr22 = and i8 %x, -32
+ %bf.lshr2547 = add i8 %bf.lshr22, %y
+ %bf.value30 = and i8 %bf.lshr2547, -32
+ %bf.set33 = or disjoint i8 %bf.set20, %bf.value30
+ ret i8 %bf.set33
+}
+
+define i8 @src_3_bitfield_const(i8 %x) {
+; CHECK-LABEL: @src_3_bitfield_const(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[NARROW:%.*]] = add i8 [[X:%.*]], 1
+; CHECK-NEXT: [[BF_VALUE:%.*]] = and i8 [[NARROW]], 7
+; CHECK-NEXT: [[BF_LSHR1244:%.*]] = add i8 [[X]], 8
+; CHECK-NEXT: [[BF_SHL:%.*]] = and i8 [[BF_LSHR1244]], 24
+; CHECK-NEXT: [[BF_SET20:%.*]] = or disjoint i8 [[BF_VALUE]], [[BF_SHL]]
+; CHECK-NEXT: [[TMP0:%.*]] = and i8 [[X]], -32
+; CHECK-NEXT: [[BF_VALUE30:%.*]] = add i8 [[TMP0]], 32
+; CHECK-NEXT: [[BF_SET33:%.*]] = or disjoint i8 [[BF_SET20]], [[BF_VALUE30]]
+; CHECK-NEXT: ret i8 [[BF_SET33]]
+;
+entry:
+ %narrow = add i8 %x, 1
+ %bf.value = and i8 %narrow, 7
+ %bf.lshr = and i8 %x, 24
+ %bf.lshr1244 = add i8 %bf.lshr, 8
+ %bf.shl = and i8 %bf.lshr1244, 24
+ %bf.set20 = or disjoint i8 %bf.value, %bf.shl
+ %bf.lshr22 = and i8 %x, -32
+ %bf.lshr2547 = add i8 %bf.lshr22, 32
+ %bf.value30 = and i8 %bf.lshr2547, -32
+ %bf.set33 = or disjoint i8 %bf.set20, %bf.value30
+ ret i8 %bf.set33
+}
+
+; test or disjoint which used for BitField Arithmetic.
+; Negative
+define i8 @src_bit_arithmetic_bitsize_1_low(i8 %x, i8 %y) {
+; CHECK-LABEL: @src_bit_arithmetic_bitsize_1_low(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[NARROW:%.*]] = add i8 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT: [[BF_VALUE:%.*]] = and i8 [[NARROW]], 1
+; CHECK-NEXT: [[BF_LSHR:%.*]] = and i8 [[X]], 30
+; CHECK-NEXT: [[BF_LSHR1244:%.*]] = add i8 [[BF_LSHR]], [[Y]]
+; CHECK-NEXT: [[BF_SHL:%.*]] = and i8 [[BF_LSHR1244]], 30
+; CHECK-NEXT: [[BF_SET20:%.*]] = or disjoint i8 [[BF_VALUE]], [[BF_SHL]]
+; CHECK-NEXT: [[BF_LSHR22:%.*]] = and i8 [[X]], -32
+; CHECK-NEXT: [[BF_LSHR2547:%.*]] = add i8 [[BF_LSHR22]], [[Y]]
+; CHECK-NEXT: [[BF_VALUE30:%.*]] = and i8 [[BF_LSHR2547]], -32
+; CHECK-NEXT: [[BF_SET33:%.*]] = or disjoint i8 [[BF_SET20]], [[BF_VALUE30]]
+; CHECK-NEXT: ret i8 [[BF_SET33]]
+;
+entry:
+ %narrow = add i8 %y, %x
+ %bf.value = and i8 %narrow, 1
+ %bf.lshr = and i8 %x, 30
+ %bf.lshr1244 = add i8 %bf.lshr, %y
+ %bf.shl = and i8 %bf.lshr1244, 30
+ %bf.set20 = or disjoint i8 %bf.value, %bf.shl
+ %bf.lshr22 = and i8 %x, -32
+ %bf.lshr2547 = add i8 %bf.lshr22, %y
+ %bf.value30 = and i8 %bf.lshr2547, -32
+ %bf.set33 = or disjoint i8 %bf.set20, %bf.value30
+ ret i8 %bf.set33
+}
+
+define i8 @src_bit_arithmetic_bitsize_1_mid(i8 %x, i8 %y) {
+; CHECK-LABEL: @src_bit_arithmetic_bitsize_1_mid(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[NARROW:%.*]] = add i8 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT: [[BF_VALUE:%.*]] = and i8 [[NARROW]], 15
+; CHECK-NEXT: [[BF_LSHR:%.*]] = and i8 [[X]], 16
+; CHECK-NEXT: [[BF_LSHR1244:%.*]] = add i8 [[BF_LSHR]], [[Y]]
+; CHECK-NEXT: [[BF_SHL:%.*]] = and i8 [[BF_LSHR1244]], 16
+; CHECK-NEXT: [[BF_SET20:%.*]] = or disjoint i8 [[BF_VALUE]], [[BF_SHL]]
+; CHECK-NEXT: [[BF_LSHR22:%.*]] = and i8 [[X]], -32
+; CHECK-NEXT: [[BF_LSHR2547:%.*]] = add i8 [[BF_LSHR22]], [[Y]]
+; CHECK-NEXT: [[BF_VALUE30:%.*]] = and i8 [[BF_LSHR2547]], -32
+; CHECK-NEXT: [[BF_SET33:%.*]] = or disjoint i8 [[BF_SET20]], [[BF_VALUE30]]
+; CHECK-NEXT: ret i8 [[BF_SET33]]
+;
+entry:
+ %narrow = add i8 %y, %x
+ %bf.value = and i8 %narrow, 15
+ %bf.lshr = and i8 %x, 16
+ %bf.lshr1244 = add i8 %bf.lshr, %y
+ %bf.shl = and i8 %bf.lshr1244, 16
+ %bf.set20 = or disjoint i8 %bf.value, %bf.shl
+ %bf.lshr22 = and i8 %x, -32
+ %bf.lshr2547 = add i8 %bf.lshr22, %y
+ %bf.value30 = and i8 %bf.lshr2547, -32
+ %bf.set33 = or disjoint i8 %bf.set20, %bf.value30
+ ret i8 %bf.set33
+}
+
+define i8 @src_bit_arithmetic_bitsize_1_high(i8 %x, i8 %y) {
+; CHECK-LABEL: @src_bit_arithmetic_bitsize_1_high(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[NARROW:%.*]] = add i8 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT: [[BF_VALUE:%.*]] = and i8 [[NARROW]], 7
+; CHECK-NEXT: [[BF_LSHR:%.*]] = and i8 [[X]], 120
+; CHECK-NEXT: [[BF_LSHR1244:%.*]] = add i8 [[BF_LSHR]], [[Y]]
+; CHECK-NEXT: [[BF_SHL:%.*]] = and i8 [[BF_LSHR1244]], 120
+; CHECK-NEXT: [[BF_SET20:%.*]] = or disjoint i8 [[BF_VALUE]], [[BF_SHL]]
+; CHECK-NEXT: [[BF_LSHR22:%.*]] = and i8 [[X]], -128
+; CHECK-NEXT: [[BF_LSHR2547:%.*]] = add i8 [[BF_LSHR22]], [[Y]]
+; CHECK-NEXT: [[BF_VALUE30:%.*]] = and i8 [[BF_LSHR2547]], -128
+; CHECK-NEXT: [[BF_SET33:%.*]] = or disjoint i8 [[BF_SET20]], [[BF_VALUE30]]
+; CHECK-NEXT: ret i8 [[BF_SET33]]
+;
+entry:
+ %narrow = add i8 %y, %x
+ %bf.value = and i8 %narrow, 7
+ %bf.lshr = and i8 %x, 120
+ %bf.lshr1244 = add i8 %bf.lshr, %y
+ %bf.shl = and i8 %bf.lshr1244, 120
+ %bf.set20 = or disjoint i8 %bf.value, %bf.shl
+ %bf.lshr22 = and i8 %x, -128
+ %bf.lshr2547 = add i8 %bf.lshr22, %y
+ %bf.value30 = and i8 %bf.lshr2547, -128
+ %bf.set33 = or disjoint i8 %bf.set20, %bf.value30
+ ret i8 %bf.set33
+}
+
+define i8 @src_bit_arithmetic_bitmask_low_over_mid(i8 %x, i8 %y) {
+; CHECK-LABEL: @src_bit_arithmetic_bitmask_low_over_mid(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[NARROW:%.*]] = add i8 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT: [[BF_VALUE:%.*]] = and i8 [[NARROW]], 17
+; CHECK-NEXT: [[BF_LSHR:%.*]] = and i8 [[X]], 24
+; CHECK-NEXT: [[BF_LSHR1244:%.*]] = add i8 [[BF_LSHR]], [[Y]]
+; CHECK-NEXT: [[BF_SHL:%.*]] = and i8 [[BF_LSHR1244]], 24
+; CHECK-NEXT: [[BF_SET20:%.*]] = or disjoint i8 [[BF_VALUE]], [[BF_SHL]]
+; CHECK-NEXT: [[BF_LSHR22:%.*]] = and i8 [[X]], -32
+; CHECK-NEXT: [[BF_LSHR2547:%.*]] = add i8 [[BF_LSHR22]], [[Y]]
+; CHECK-NEXT: [[BF_VALUE30:%.*]] = and i8 [[BF_LSHR2547]], -32
+; CHECK-NEXT: [[BF_SET33:%.*]] = or disjoint i8 [[BF_SET20]], [[BF_VALUE30]]
+; CHECK-NEXT: ret i8 [[BF_SET33]]
+;
+entry:
+ %narrow = add i8 %y, %x
+ %bf.value = and i8 %narrow, 17
+ %bf.lshr = and i8 %x, 24
+ %bf.lshr1244 = add i8 %bf.lshr, %y
+ %bf.shl = and i8 %bf.lshr1244, 24
+ %bf.set20 = or disjoint i8 %bf.value, %bf.shl
+ %bf.lshr22 = and i8 %x, -32
+ %bf.lshr2547 = add i8 %bf.lshr22, %y
+ %bf.value30 = and i8 %bf.lshr2547, -32
+ %bf.set33 = or disjoint i8 %bf.set20, %bf.value30
+ ret i8 %bf.set33
+}
+
+define i8 @src_bit_arithmetic_bitmask_mid_over_high(i8 %x, i8 %y) {
+; CHECK-LABEL: @src_bit_arithmetic_bitmask_mid_over_high(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[NARROW:%.*]] = add i8 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT: [[BF_VALUE:%.*]] = and i8 [[NARROW]], 7
+; CHECK-NEXT: [[BF_LSHR:%.*]] = and i8 [[X]], 56
+; CHECK-NEXT: [[BF_LSHR1244:%.*]] = add i8 [[BF_LSHR]], [[Y]]
+; CHECK-NEXT: [[BF_SHL:%.*]] = and i8 [[BF_LSHR1244]], 56
+; CHECK-NEXT: [[BF_SET20:%.*]] = or disjoint i8 [[BF_VALUE]], [[BF_SHL]]
+; CHECK-NEXT: [[BF_LSHR22:%.*]] = and i8 [[X]], -32
+; CHECK-NEXT: [[BF_LSHR2547:%.*]] = add i8 [[BF_LSHR22]], [[Y]]
+; CHECK-NEXT: [[BF_VALUE30:%.*]] = and i8 [[BF_LSHR2547]], -32
+; CHECK-NEXT: [[BF_SET33:%.*]] = or disjoint i8 [[BF_SET20]], [[BF_VALUE30]]
+; CHECK-NEXT: ret i8 [[BF_SET33]]
+;
+entry:
+ %narrow = add i8 %y, %x
+ %bf.value = and i8 %narrow, 7
+ %bf.lshr = and i8 %x, 56
+ %bf.lshr1244 = add i8 %bf.lshr, %y
+ %bf.shl = and i8 %bf.lshr1244, 56
+ %bf.set20 = or disjoint i8 %bf.value, %bf.shl
+ %bf.lshr22 = and i8 %x, -32
+ %bf.lshr2547 = add i8 %bf.lshr22, %y
+ %bf.value30 = and i8 %bf.lshr2547, -32
+ %bf.set33 = or disjoint i8 %bf.set20, %bf.value30
+ ret i8 %bf.set33
+}
+
+define i8 @src_bit_arithmetic_bitmask_mid_under_lower(i8 %x, i8 %y) {
+; CHECK-LABEL: @src_bit_arithmetic_bitmask_mid_under_lower(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[NARROW:%.*]] = add i8 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT: [[BF_VALUE:%.*]] = and i8 [[NARROW]], 7
+; CHECK-NEXT: [[BF_LSHR:%.*]] = and i8 [[X]], 28
+; CHECK-NEXT: [[BF_LSHR1244:%.*]] = add i8 [[BF_LSHR]], [[Y]]
+; CHECK-NEXT: [[BF_SHL:%.*]] = and i8 [[BF_LSHR1244]], 24
+; CHECK-NEXT: [[BF_SET20:%.*]] = or disjoint i8 [[BF_VALUE]], [[BF_SHL]]
+; CHECK-NEXT: [[BF_LSHR22:%.*]] = and i8 [[X]], -32
+; CHECK-NEXT: [[BF_LSHR2547:%.*]] = add i8 [[BF_LSHR22]], [[Y]]
+; CHECK-NEXT: [[BF_VALUE30:%.*]] = and i8 [[BF_LSHR2547]], -32
+; CHECK-NEXT: [[BF_SET33:%.*]] = or disjoint i8 [[BF_SET20]], [[BF_VALUE30]]
+; CHECK-NEXT: ret i8 [[BF_SET33]]
+;
+entry:
+ %narrow = add i8 %y, %x
+ %bf.value = and i8 %narrow, 7
+ %bf.lshr = and i8 %x, 28
+ %bf.lshr1244 = add i8 %bf.lshr, %y
+ %bf.shl = and i8 %bf.lshr1244, 24
+ %bf.set20 = or disjoint i8 %bf.value, %bf.shl
+ %bf.lshr22 = and i8 %x, -32
+ %bf.lshr2547 = add i8 %bf.lshr22, %y
+ %bf.value30 = and i8 %bf.lshr2547, -32
+ %bf.set33 = or disjoint i8 %bf.set20, %bf.value30
+ ret i8 %bf.set33
+}
+
+define i8 @src_bit_arithmetic_bitmask_high_under_mid(i8 %x, i8 %y) {
+; CHECK-LABEL: @src_bit_arithmetic_bitmask_high_under_mid(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[NARROW:%.*]] = add i8 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT: [[BF_VALUE:%.*]] = and i8 [[NARROW]], 7
+; CHECK-NEXT: [[BF_LSHR:%.*]] = and i8 [[X]], 24
+; CHECK-NEXT: [[BF_LSHR1244:%.*]] = add i8 [[BF_LSHR]], [[Y]]
+; CHECK-NEXT: [[BF_SHL:%.*]] = and i8 [[BF_LSHR1244]], 24
+; CHECK-NEXT: [[BF_SET20:%.*]] = or disjoint i8 [[BF_VALUE]], [[BF_SHL]]
+; CHECK-NEXT: [[BF_LSHR22:%.*]] = and i8 [[X]], -16
+; CHECK-NEXT: [[BF_LSHR2547:%.*]] = add i8 [[BF_LSHR22]], [[Y]]
+; CHECK-NEXT: [[BF_VALUE30:%.*]] = and i8 [[BF_LSHR2547]], -16
+; CHECK-NEXT: [[BF_SET33:%.*]] = or disjoint i8 [[BF_SET20]], [[BF_VALUE30]]
+; CHECK-NEXT: ret i8 [[BF_SET33]]
+;
+entry:
+ %narrow = add i8 %y, %x
+ %bf.value = and i8 %narrow, 7
+ %bf.lshr = and i8 %x, 24
+ %bf.lshr1244 = add i8 %bf.lshr, %y
+ %bf.shl = and i8 %bf.lshr1244, 24
+ %bf.set20 = or disjoint i8 %bf.value, %bf.shl
+ %bf.lshr22 = and i8 %x, -16
+ %bf.lshr2547 = add i8 %bf.lshr22, %y
+ %bf.value30 = and i8 %bf.lshr2547, -16
+ %bf.set33 = or disjoint i8 %bf.set20, %bf.value30
+ ret i8 %bf.set33
+}
+
+define i8 @src_bit_arithmetic_addition_over_bitmask_low(i8 %x) {
+; CHECK-LABEL: @src_bit_arithmetic_addition_over_bitmask_low(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[BF_VALUE:%.*]] = and i8 [[X:%.*]], 7
+; CHECK-NEXT: [[BF_LSHR1244:%.*]] = add i8 [[X]], 8
+; CHECK-NEXT: [[BF_SHL:%.*]] = and i8 [[BF_LSHR1244]], 24
+; CHECK-NEXT: [[BF_SET20:%.*]] = or disjoint i8 [[BF_VALUE]], [[BF_SHL]]
+; CHECK-NEXT: [[TMP0:%.*]] = and i8 [[X]], -32
+; CHECK-NEXT: [[BF_VALUE30:%.*]] = add i8 [[TMP0]], 32
+; CHECK-NEXT: [[BF_SET33:%.*]] = or disjoint i8 [[BF_SET20]], [[BF_VALUE30]]
+; CHECK-NEXT: ret i8 [[BF_SET33]]
+;
+entry:
+ %narrow = add i8 %x, 8
+ %bf.value = and i8 %narrow, 7
+ %bf.lshr = and i8 %x, 24
+ %bf.lshr1244 = add i8 %bf.lshr, 8
+ %bf.shl = and i8 %bf.lshr1244, 24
+ %bf.set20 = or disjoint i8 %bf.value, %bf.shl
+ %bf.lshr22 = and i8 %x, -32
+ %bf.lshr2547 = add i8 %bf.lshr22, 32
+ %bf.value30 = and i8 %bf.lshr2547, -32
+ %bf.set33 = or disjoint i8 %bf.set20, %bf.value30
+ ret i8 %bf.set33
+}
+
+define i8 @src_bit_arithmetic_addition_over_bitmask_mid(i8 %x) {
+; CHECK-LABEL: @src_bit_arithmetic_addition_over_bitmask_mid(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[NARROW:%.*]] = add i8 [[X:%.*]], 1
+; CHECK-NEXT: [[BF_VALUE:%.*]] = and i8 [[NARROW]], 7
+; CHECK-NEXT: [[BF_LSHR:%.*]] = and i8 [[X]], 24
+; CHECK-NEXT: [[BF_SET20:%.*]] = or disjoint i8 [[BF_VALUE]], [[BF_LSHR]]
+; CHECK-NEXT: [[TMP0:%.*]] = and i8 [[X]], -32
+; CHECK-NEXT: [[BF_VALUE30:%.*]] = add i8 [[TMP0]], 32
+; CHECK-NEXT: [[BF_SET33:%.*]] = or disjoint i8 [[BF_SET20]], [[BF_VALUE30]]
+; CHECK-NEXT: ret i8 [[BF_SET33]]
+;
+entry:
+ %narrow = add i8 %x, 1
+ %bf.value = and i8 %narrow, 7
+ %bf.lshr = and i8 %x, 24
+ %bf.lshr1244 = add i8 %bf.lshr, 32
+ %bf.shl = and i8 %bf.lshr1244, 24
+ %bf.set20 = or disjoint i8 %bf.value, %bf.shl
+ %bf.lshr22 = and i8 %x, -32
+ %bf.lshr2547 = add i8 %bf.lshr22, 32
+ %bf.value30 = and i8 %bf.lshr2547, -32
+ %bf.set33 = or disjoint i8 %bf.set20, %bf.value30
+ ret i8 %bf.set33
+}
+
+define i8 @src_bit_arithmetic_addition_under_bitmask_mid(i8 %x) {
+; CHECK-LABEL: @src_bit_arithmetic_addition_under_bitmask_mid(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[NARROW:%.*]] = add i8 [[X:%.*]], 1
+; CHECK-NEXT: [[BF_VALUE:%.*]] = and i8 [[NARROW]], 7
+; CHECK-NEXT: [[BF_LSHR:%.*]] = and i8 [[X]], 24
+; CHECK-NEXT: [[BF_SET20:%.*]] = or disjoint i8 [[BF_VALUE]], [[BF_LSHR]]
+; CHECK-NEXT: [[TMP0:%.*]] = and i8 [[X]], -32
+; CHECK-NEXT: [[BF_VALUE30:%.*]] = add i8 [[TMP0]], 32
+; CHECK-NEXT: [[BF_SET33:%.*]] = or disjoint i8 [[BF_SET20]], [[BF_VALUE30]]
+; CHECK-NEXT: ret i8 [[BF_SET33]]
+;
+entry:
+ %narrow = add i8 %x, 1
+ %bf.value = and i8 %narrow, 7
+ %bf.lshr = and i8 %x, 24
+ %bf.lshr1244 = add i8 %bf.lshr, 4
+ %bf.shl = and i8 %bf.lshr1244, 24
+ %bf.set20 = or disjoint i8 %bf.value, %bf.shl
+ %bf.lshr22 = and i8 %x, -32
+ %bf.lshr2547 = add i8 %bf.lshr22, 32
+ %bf.value30 = and i8 %bf.lshr2547, -32
+ %bf.set33 = or disjoint i8 %bf.set20, %bf.value30
+ ret i8 %bf.set33
+}
+
+define i8 @src_bit_arithmetic_addition_under_bitmask_high(i8 %x) {
+; CHECK-LABEL: @src_bit_arithmetic_addition_under_bitmask_high(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[NARROW:%.*]] = add i8 [[X:%.*]], 1
+; CHECK-NEXT: [[BF_VALUE:%.*]] = and i8 [[NARROW]], 7
+; CHECK-NEXT: [[BF_LSHR1244:%.*]] = add i8 [[X]], 8
+; CHECK-NEXT: [[BF_SHL:%.*]] = and i8 [[BF_LSHR1244]], 24
+; CHECK-NEXT: [[BF_SET20:%.*]] = or disjoint i8 [[BF_VALUE]], [[BF_SHL]]
+; CHECK-NEXT: [[BF_LSHR22:%.*]] = and i8 [[X]], -32
+; CHECK-NEXT: [[BF_SET33:%.*]] = or disjoint i8 [[BF_SET20]], [[BF_LSHR22]]
+; CHECK-NEXT: ret i8 [[BF_SET33]]
+;
+entry:
+ %narrow = add i8 %x, 1
+ %bf.value = and i8 %narrow, 7
+ %bf.lshr = and i8 %x, 24
+ %bf.lshr1244 = add i8 %bf.lshr, 8
+ %bf.shl = and i8 %bf.lshr1244, 24
+ %bf.set20 = or disjoint i8 %bf.value, %bf.shl
+ %bf.lshr22 = and i8 %x, -32
+ %bf.lshr2547 = add i8 %bf.lshr22, 16
+ %bf.value30 = and i8 %bf.lshr2547, -32
+ %bf.set33 = or disjoint i8 %bf.set20, %bf.value30
+ ret i8 %bf.set33
+}
>From 9c5c9a19f4b0dcf34ac4b5a6ee5edc67995096e1 Mon Sep 17 00:00:00 2001
From: Hanbum Park <kese111 at gmail.com>
Date: Sat, 6 Jan 2024 16:44:52 +0900
Subject: [PATCH 2/4] [InstCombine] Improve bitfield addition (#33874)
Proof: https://alive2.llvm.org/ce/z/RUL3YU
Fixes #33874
---
.../InstCombine/InstCombineAndOrXor.cpp | 249 ++++++++++++++++++
llvm/test/Transforms/InstCombine/or.ll | 94 +++----
2 files changed, 291 insertions(+), 52 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index f9caa4da44931..2fe94c3aa5ec7 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -3485,6 +3485,252 @@ static Value *foldOrOfInversions(BinaryOperator &I,
return nullptr;
}
+struct BitFieldAddBitMask {
+ const APInt *Lower;
+ const APInt *Upper;
+};
+struct BitFieldOptBitMask {
+ const APInt *Lower;
+ const APInt *Upper;
+ const APInt *New;
+};
+struct BitFieldAddInfo {
+ Value *X;
+ Value *Y;
+ bool opt;
+ union {
+ BitFieldAddBitMask AddMask;
+ BitFieldOptBitMask OptMask;
+ };
+};
+
+/// Bitfield operation is consisted of three-step as following,
+/// 1. extracting the bits
+/// 2. performing operations
+/// 3. eliminating the bits beyond the specified range
+///
+/// Depending on the location of the bitfield on which we want to perform
+/// the operation, all or only some of these steps are performed.
+///
+/// Consider:
+/// %narrow = add i8 %y, %x
+/// %bf.value = and i8 %narrow, 7
+/// %bf.lshr = and i8 %x, 24
+/// %bf.lshr1244 = add i8 %bf.lshr, %y
+/// %bf.shl = and i8 %bf.lshr1244, 24
+/// %bf.set20 = or disjoint i8 %bf.value, %bf.shl
+///
+/// This example show us bitfield operation that doing 0-3 bit first, 4-5 bit
+/// second. as you can see, first 0-3 bitfield operation do not proceed step 1,
+/// it is not necessary because it located bottom of bitfield. after that,
+/// second 4-5 bit operation proceed 3-step as above described.
+///
+/// After the operation for each bitfield is completed, all bits are collected
+/// through the `or disjoint` operation and the result is returned.
+///
+/// Our optimizing oppotunity is reducing 3-step of bitfield operation.
+/// We show you optimized example with constant for more intuitive describing.
+///
+/// Consider:
+/// (first) (second) (final)
+/// ????????(x) ????????(x) 00000???
+/// + 00000001 & 00011000 | 000??000
+/// ---------- ---------- ----------
+/// 0000???? 000??000 = 000?????
+/// & 00000111 + 00001000
+/// = 00000??? ----------
+/// 00???000
+/// & 00011000
+/// ----------
+/// = 000??000
+///
+/// Optimized:
+/// (first) (second) (final)
+/// 000????? (x) 000????? (x) 000????? (x&11) + 9
+/// & 00001011 & 00010100 ^ 000?0?00 (x&20)
+/// ---------- ---------- ----------
+/// 0000?0?? (x & 11) = 000?0?00 = 000?????
+/// + 00001001
+/// ----------
+/// = 000????? (x&11) + 9
+///
+/// 1. Extract each bitfield exclude high bit.
+/// 2. Add sum of all values to be added to each bitfield.
+/// 3. Extract high bits of each bitfield.
+/// 4. Perform ExcludeOR with 2 and 3.
+///
+/// The most important logic here is part 4. ExclusiveOR operation is performed
+/// on the highest bit of each pre-extracted bit field and the value after the
+/// addition operation. Through this, we can obtain normally addition perfomed
+/// results for the highest bit of the bitfield without removing the overflowed
+/// bit.
+static Value *foldBitFieldArithmetic(BinaryOperator &I,
+ InstCombiner::BuilderTy &Builder) {
+ auto *Disjoint = dyn_cast<PossiblyDisjointInst>(&I);
+ if (!Disjoint || !Disjoint->isDisjoint())
+ return nullptr;
+
+ unsigned BitWidth = I.getType()->getScalarSizeInBits();
+
+ // If operand of bitfield operation is a constant, sum of the constants is
+ // computed and returned. if operand is not a constant, operand is
+ // returned. if this operation is not a bitfield operation, null is returned.
+ auto AccumulateY = [&](Value *LoY, Value *UpY, APInt LoMask,
+ APInt UpMask) -> Value * {
+ Value *Y = nullptr;
+ auto *CLoY = dyn_cast_or_null<Constant>(LoY);
+ auto *CUpY = dyn_cast_or_null<Constant>(UpY);
+ // If one of operand is constant, other also must be constant.
+ if ((CLoY == nullptr) ^ (CUpY == nullptr))
+ return nullptr;
+
+ if (CLoY && CUpY) {
+ APInt IUpY = CUpY->getUniqueInteger();
+ APInt ILoY = CLoY->getUniqueInteger();
+ // Each operands bits must in range of its own field.
+ if (!(IUpY.isSubsetOf(UpMask) && ILoY.isSubsetOf(LoMask)))
+ return nullptr;
+ Y = ConstantInt::get(CLoY->getType(), ILoY + IUpY);
+ } else if (LoY == UpY) {
+ Y = LoY;
+ }
+
+ return Y;
+ };
+
+ // Perform whether this `OR disjoint` instruction is bitfield operation
+ // In the case of bitfield operation, the information necessary
+ // to optimize the bitfield operation is extracted and returned as
+ // BitFieldAddInfo.
+ auto MatchBitFieldAdd =
+ [&](BinaryOperator &I) -> std::optional<BitFieldAddInfo> {
+ const APInt *OptLoMask, *OptUpMask, *LoMask, *UpMask, *UpMask2 = nullptr;
+ Value *X, *Y, *UpY;
+
+ // Bitfield has more than 2 member.
+ // ((X&UpMask)+UpY)&UpMask2 | (X&UpMask)+UpY
+ auto BitFieldAddUpper = m_CombineOr(
+ m_And(m_c_Add(m_And(m_Value(X), m_APInt(UpMask)), m_Value(UpY)),
+ m_APInt(UpMask2)),
+ m_c_Add(m_And(m_Value(X), m_APInt(UpMask)), m_Value(UpY)));
+ // Bitfield has more than 2 member but bottom bitfield
+ // BitFieldAddUpper | (X+Y)&LoMask
+ auto BitFieldAdd =
+ m_c_Or(BitFieldAddUpper,
+ m_And(m_c_Add(m_Deferred(X), m_Value(Y)), m_APInt(LoMask)));
+ // When bitfield has only 2 member
+ // (X+Y)&HiMask | (X+UpY)&LoMask
+ auto BitFieldAddIC =
+ m_c_Or(m_And(m_c_Add(m_Value(X), m_Value(Y)), m_APInt(LoMask)),
+ m_And(m_c_Add(m_Deferred(X), m_Value(UpY)), m_APInt(UpMask)));
+ // When `Or optimized-bitfield, BitFieldAddUpper` matched
+ // OptUpMask = highest bits of each bitfield
+ // OptLoMask = all bit of bitfield excluded highest bit
+ // BitFieldAddUpper | ((X&OptLoMask)+Y) ^ ((X&OptUpMask))
+ auto OptBitFieldAdd = m_c_Or(
+ m_c_Xor(m_CombineOr(
+ // When Y is not the constant.
+ m_c_Add(m_And(m_Value(X), m_APInt(OptLoMask)),
+ m_And(m_Value(Y), m_APInt(OptLoMask))),
+ // When Y is Constant, it can be accumulated.
+ m_c_Add(m_And(m_Value(X), m_APInt(OptLoMask)), m_Value(Y))),
+ // If Y is a constant, X^Y&OptUpMask can be pre-computed and
+ // OptUpMask is its result.
+ m_CombineOr(m_And(m_Deferred(X), m_APInt(OptUpMask)),
+ m_And(m_c_Xor(m_Deferred(X), m_Value(UpY)),
+ m_APInt(OptUpMask)))),
+ BitFieldAddUpper);
+
+ // Match bitfield operation.
+ if (match(&I, BitFieldAdd) || match(&I, BitFieldAddIC)) {
+ APInt Mask = APInt::getBitsSet(BitWidth, BitWidth - UpMask->countl_zero(),
+ BitWidth);
+
+ if (!((UpMask2 == nullptr || *UpMask == *UpMask2) &&
+ (LoMask->popcount() >= 2 && UpMask->popcount() >= 2) &&
+ (LoMask->isShiftedMask() && UpMask->isShiftedMask()) &&
+ // Lo & Hi mask must have no common bits
+ ((*LoMask & *UpMask) == 0) &&
+ // These masks must fill all bits while having no common bits.
+ ((Mask ^ *LoMask ^ *UpMask).isAllOnes())))
+ return std::nullopt;
+
+ if (!(Y = AccumulateY(Y, UpY, *LoMask, *UpMask)))
+ return std::nullopt;
+
+ return {{X, Y, false, {{LoMask, UpMask}}}};
+ }
+
+ // Match already optimized bitfield operation.
+ if (match(&I, OptBitFieldAdd)) {
+ APInt Mask = APInt::getBitsSet(
+ BitWidth, BitWidth - OptUpMask->countl_zero(), BitWidth);
+ APInt Mask2 = APInt::getBitsSet(
+ BitWidth, BitWidth - UpMask->countl_zero(), BitWidth);
+
+ // OptLoMask : includes bits of each bit field member, but excludes
+ // highest bit of each bit field.
+ // OptHiMask : includes bits only highest bit of each member.
+ if (!((UpMask2 == nullptr || *UpMask == *UpMask2) &&
+ (UpMask->isShiftedMask() && UpMask->popcount() >= 2) &&
+ // must have no common bit if this operation is bitfield
+ ((*UpMask & (*OptLoMask | *OptUpMask)) == 0) &&
+ // NOT(OptLoMask) must be equals OptUpMask
+ ((~*OptLoMask ^ Mask) == *OptUpMask) &&
+ // These masks must fill all bits while having no common bits.
+ (Mask2 ^ *UpMask ^ (*OptLoMask ^ *OptUpMask)).isAllOnes()))
+ return std::nullopt;
+
+ if (!(Y = AccumulateY(Y, UpY, (*OptLoMask + *OptUpMask), *UpMask)))
+ return std::nullopt;
+
+ struct BitFieldAddInfo Info = {X, Y, true, {{OptLoMask, OptUpMask}}};
+ Info.OptMask.New = UpMask;
+ return {Info};
+ }
+
+ return std::nullopt;
+ };
+
+ if (std::optional<BitFieldAddInfo> Info = MatchBitFieldAdd(I)) {
+ Value *X = Info->X;
+ Value *Y = Info->Y;
+ APInt BitLoMask, BitUpMask;
+ if (Info->opt) {
+ unsigned NewHiBit = BitWidth - (Info->OptMask.New->countl_zero() + 1);
+ // BitLoMask inlude bits of OptMask.New exclude its highest bit
+ BitLoMask = *Info->OptMask.Lower | *Info->OptMask.New;
+ BitLoMask.clearBit(NewHiBit);
+ // BitUpMask only include highest bit of OptMask.New
+ BitUpMask = *Info->OptMask.Upper;
+ BitUpMask.setBit(NewHiBit);
+ } else {
+ // In case BitField operation, we create new optmized bitfield mask.
+ unsigned LowerHiBit = BitWidth - (Info->AddMask.Lower->countl_zero() + 1);
+ unsigned UpperHiBit = BitWidth - (Info->AddMask.Upper->countl_zero() + 1);
+ // BitLoMask include all bits of each bitfield but exclude its highest
+ // bits
+ BitLoMask = *Info->AddMask.Lower | *Info->AddMask.Upper;
+ BitLoMask.clearBit(LowerHiBit);
+ BitLoMask.clearBit(UpperHiBit);
+ // BitUpMask only include highest bit of each bitfield.
+ BitUpMask = APInt::getOneBitSet(BitWidth, LowerHiBit);
+ BitUpMask.setBit(UpperHiBit);
+ }
+
+ // Create optimized bitfield operation logic using the created bitmask.
+ Value *AndXLower = Builder.CreateAnd(X, BitLoMask);
+ Value *AndYLower = Builder.CreateAnd(Y, BitLoMask);
+ Value *Add = Builder.CreateNUWAdd(AndXLower, AndYLower);
+ Value *Xor1 = Builder.CreateXor(X, Y);
+ Value *AndUpper = Builder.CreateAnd(Xor1, BitUpMask);
+ Value *Xor = Builder.CreateXor(Add, AndUpper);
+ return Xor;
+ }
+
+ return nullptr;
+}
+
// FIXME: We use commutative matchers (m_c_*) for some, but not all, matches
// here. We should standardize that construct where it is needed or choose some
// other way to ensure that commutated variants of patterns are not missed.
@@ -4034,6 +4280,9 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
if (Value *V = SimplifyAddWithRemainder(I))
return replaceInstUsesWith(I, V);
+ if (Value *Res = foldBitFieldArithmetic(I, Builder))
+ return replaceInstUsesWith(I, Res);
+
return nullptr;
}
diff --git a/llvm/test/Transforms/InstCombine/or.ll b/llvm/test/Transforms/InstCombine/or.ll
index 5e1a4aa895f61..8272650ddf53d 100644
--- a/llvm/test/Transforms/InstCombine/or.ll
+++ b/llvm/test/Transforms/InstCombine/or.ll
@@ -2035,12 +2035,12 @@ define i32 @or_xor_and_commuted3(i32 %x, i32 %y, i32 %z) {
define i8 @src_2_bitfield_op(i8 %x, i8 %y) {
; CHECK-LABEL: @src_2_bitfield_op(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[NARROW:%.*]] = add i8 [[Y:%.*]], [[X:%.*]]
-; CHECK-NEXT: [[BF_VALUE:%.*]] = and i8 [[NARROW]], 7
-; CHECK-NEXT: [[BF_LSHR:%.*]] = and i8 [[X]], 24
-; CHECK-NEXT: [[BF_LSHR1228:%.*]] = add i8 [[BF_LSHR]], [[Y]]
-; CHECK-NEXT: [[BF_SHL:%.*]] = and i8 [[BF_LSHR1228]], 24
-; CHECK-NEXT: [[BF_SET20:%.*]] = or disjoint i8 [[BF_VALUE]], [[BF_SHL]]
+; CHECK-NEXT: [[TMP0:%.*]] = and i8 [[X:%.*]], 11
+; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[Y:%.*]], 11
+; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i8 [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = xor i8 [[X]], [[Y]]
+; CHECK-NEXT: [[TMP4:%.*]] = and i8 [[TMP3]], 20
+; CHECK-NEXT: [[BF_SET20:%.*]] = xor i8 [[TMP2]], [[TMP4]]
; CHECK-NEXT: ret i8 [[BF_SET20]]
;
entry:
@@ -2056,11 +2056,10 @@ entry:
define i8 @src_2_bitfield_const(i8 %x) {
; CHECK-LABEL: @src_2_bitfield_const(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[NARROW:%.*]] = add i8 [[X:%.*]], 1
-; CHECK-NEXT: [[BF_VALUE:%.*]] = and i8 [[NARROW]], 7
-; CHECK-NEXT: [[BF_LSHR1228:%.*]] = add i8 [[X]], 8
-; CHECK-NEXT: [[BF_SHL:%.*]] = and i8 [[BF_LSHR1228]], 24
-; CHECK-NEXT: [[BF_SET20:%.*]] = or disjoint i8 [[BF_VALUE]], [[BF_SHL]]
+; CHECK-NEXT: [[TMP0:%.*]] = and i8 [[X:%.*]], 11
+; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i8 [[TMP0]], 9
+; CHECK-NEXT: [[TMP2:%.*]] = and i8 [[X]], 20
+; CHECK-NEXT: [[BF_SET20:%.*]] = xor i8 [[TMP1]], [[TMP2]]
; CHECK-NEXT: ret i8 [[BF_SET20]]
;
entry:
@@ -2076,16 +2075,12 @@ entry:
define i8 @src_3_bitfield_op(i8 %x, i8 %y) {
; CHECK-LABEL: @src_3_bitfield_op(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[NARROW:%.*]] = add i8 [[Y:%.*]], [[X:%.*]]
-; CHECK-NEXT: [[BF_VALUE:%.*]] = and i8 [[NARROW]], 7
-; CHECK-NEXT: [[BF_LSHR:%.*]] = and i8 [[X]], 24
-; CHECK-NEXT: [[BF_LSHR1244:%.*]] = add i8 [[BF_LSHR]], [[Y]]
-; CHECK-NEXT: [[BF_SHL:%.*]] = and i8 [[BF_LSHR1244]], 24
-; CHECK-NEXT: [[BF_SET20:%.*]] = or disjoint i8 [[BF_VALUE]], [[BF_SHL]]
-; CHECK-NEXT: [[BF_LSHR22:%.*]] = and i8 [[X]], -32
-; CHECK-NEXT: [[BF_LSHR2547:%.*]] = add i8 [[BF_LSHR22]], [[Y]]
-; CHECK-NEXT: [[BF_VALUE30:%.*]] = and i8 [[BF_LSHR2547]], -32
-; CHECK-NEXT: [[BF_SET33:%.*]] = or disjoint i8 [[BF_SET20]], [[BF_VALUE30]]
+; CHECK-NEXT: [[TMP0:%.*]] = and i8 [[X:%.*]], 107
+; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[Y:%.*]], 107
+; CHECK-NEXT: [[TMP2:%.*]] = add nuw i8 [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = xor i8 [[X]], [[Y]]
+; CHECK-NEXT: [[TMP4:%.*]] = and i8 [[TMP3]], -108
+; CHECK-NEXT: [[BF_SET33:%.*]] = xor i8 [[TMP2]], [[TMP4]]
; CHECK-NEXT: ret i8 [[BF_SET33]]
;
entry:
@@ -2105,14 +2100,10 @@ entry:
define i8 @src_3_bitfield_const(i8 %x) {
; CHECK-LABEL: @src_3_bitfield_const(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[NARROW:%.*]] = add i8 [[X:%.*]], 1
-; CHECK-NEXT: [[BF_VALUE:%.*]] = and i8 [[NARROW]], 7
-; CHECK-NEXT: [[BF_LSHR1244:%.*]] = add i8 [[X]], 8
-; CHECK-NEXT: [[BF_SHL:%.*]] = and i8 [[BF_LSHR1244]], 24
-; CHECK-NEXT: [[BF_SET20:%.*]] = or disjoint i8 [[BF_VALUE]], [[BF_SHL]]
-; CHECK-NEXT: [[TMP0:%.*]] = and i8 [[X]], -32
-; CHECK-NEXT: [[BF_VALUE30:%.*]] = add i8 [[TMP0]], 32
-; CHECK-NEXT: [[BF_SET33:%.*]] = or disjoint i8 [[BF_SET20]], [[BF_VALUE30]]
+; CHECK-NEXT: [[TMP0:%.*]] = and i8 [[X:%.*]], 107
+; CHECK-NEXT: [[TMP1:%.*]] = add nuw i8 [[TMP0]], 41
+; CHECK-NEXT: [[TMP2:%.*]] = and i8 [[X]], -108
+; CHECK-NEXT: [[BF_SET33:%.*]] = xor i8 [[TMP1]], [[TMP2]]
; CHECK-NEXT: ret i8 [[BF_SET33]]
;
entry:
@@ -2192,12 +2183,12 @@ entry:
define i8 @src_bit_arithmetic_bitsize_1_high(i8 %x, i8 %y) {
; CHECK-LABEL: @src_bit_arithmetic_bitsize_1_high(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[NARROW:%.*]] = add i8 [[Y:%.*]], [[X:%.*]]
-; CHECK-NEXT: [[BF_VALUE:%.*]] = and i8 [[NARROW]], 7
-; CHECK-NEXT: [[BF_LSHR:%.*]] = and i8 [[X]], 120
-; CHECK-NEXT: [[BF_LSHR1244:%.*]] = add i8 [[BF_LSHR]], [[Y]]
-; CHECK-NEXT: [[BF_SHL:%.*]] = and i8 [[BF_LSHR1244]], 120
-; CHECK-NEXT: [[BF_SET20:%.*]] = or disjoint i8 [[BF_VALUE]], [[BF_SHL]]
+; CHECK-NEXT: [[TMP0:%.*]] = and i8 [[X:%.*]], 59
+; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[Y:%.*]], 59
+; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i8 [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = xor i8 [[X]], [[Y]]
+; CHECK-NEXT: [[TMP4:%.*]] = and i8 [[TMP3]], 68
+; CHECK-NEXT: [[BF_SET20:%.*]] = xor i8 [[TMP2]], [[TMP4]]
; CHECK-NEXT: [[BF_LSHR22:%.*]] = and i8 [[X]], -128
; CHECK-NEXT: [[BF_LSHR2547:%.*]] = add i8 [[BF_LSHR22]], [[Y]]
; CHECK-NEXT: [[BF_VALUE30:%.*]] = and i8 [[BF_LSHR2547]], -128
@@ -2250,12 +2241,12 @@ entry:
define i8 @src_bit_arithmetic_bitmask_mid_over_high(i8 %x, i8 %y) {
; CHECK-LABEL: @src_bit_arithmetic_bitmask_mid_over_high(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[NARROW:%.*]] = add i8 [[Y:%.*]], [[X:%.*]]
-; CHECK-NEXT: [[BF_VALUE:%.*]] = and i8 [[NARROW]], 7
-; CHECK-NEXT: [[BF_LSHR:%.*]] = and i8 [[X]], 56
-; CHECK-NEXT: [[BF_LSHR1244:%.*]] = add i8 [[BF_LSHR]], [[Y]]
-; CHECK-NEXT: [[BF_SHL:%.*]] = and i8 [[BF_LSHR1244]], 56
-; CHECK-NEXT: [[BF_SET20:%.*]] = or disjoint i8 [[BF_VALUE]], [[BF_SHL]]
+; CHECK-NEXT: [[TMP0:%.*]] = and i8 [[X:%.*]], 27
+; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[Y:%.*]], 27
+; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i8 [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = xor i8 [[X]], [[Y]]
+; CHECK-NEXT: [[TMP4:%.*]] = and i8 [[TMP3]], 36
+; CHECK-NEXT: [[BF_SET20:%.*]] = xor i8 [[TMP2]], [[TMP4]]
; CHECK-NEXT: [[BF_LSHR22:%.*]] = and i8 [[X]], -32
; CHECK-NEXT: [[BF_LSHR2547:%.*]] = add i8 [[BF_LSHR22]], [[Y]]
; CHECK-NEXT: [[BF_VALUE30:%.*]] = and i8 [[BF_LSHR2547]], -32
@@ -2308,12 +2299,12 @@ entry:
define i8 @src_bit_arithmetic_bitmask_high_under_mid(i8 %x, i8 %y) {
; CHECK-LABEL: @src_bit_arithmetic_bitmask_high_under_mid(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[NARROW:%.*]] = add i8 [[Y:%.*]], [[X:%.*]]
-; CHECK-NEXT: [[BF_VALUE:%.*]] = and i8 [[NARROW]], 7
-; CHECK-NEXT: [[BF_LSHR:%.*]] = and i8 [[X]], 24
-; CHECK-NEXT: [[BF_LSHR1244:%.*]] = add i8 [[BF_LSHR]], [[Y]]
-; CHECK-NEXT: [[BF_SHL:%.*]] = and i8 [[BF_LSHR1244]], 24
-; CHECK-NEXT: [[BF_SET20:%.*]] = or disjoint i8 [[BF_VALUE]], [[BF_SHL]]
+; CHECK-NEXT: [[TMP0:%.*]] = and i8 [[X:%.*]], 11
+; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[Y:%.*]], 11
+; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i8 [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = xor i8 [[X]], [[Y]]
+; CHECK-NEXT: [[TMP4:%.*]] = and i8 [[TMP3]], 20
+; CHECK-NEXT: [[BF_SET20:%.*]] = xor i8 [[TMP2]], [[TMP4]]
; CHECK-NEXT: [[BF_LSHR22:%.*]] = and i8 [[X]], -16
; CHECK-NEXT: [[BF_LSHR2547:%.*]] = add i8 [[BF_LSHR22]], [[Y]]
; CHECK-NEXT: [[BF_VALUE30:%.*]] = and i8 [[BF_LSHR2547]], -16
@@ -2415,11 +2406,10 @@ entry:
define i8 @src_bit_arithmetic_addition_under_bitmask_high(i8 %x) {
; CHECK-LABEL: @src_bit_arithmetic_addition_under_bitmask_high(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[NARROW:%.*]] = add i8 [[X:%.*]], 1
-; CHECK-NEXT: [[BF_VALUE:%.*]] = and i8 [[NARROW]], 7
-; CHECK-NEXT: [[BF_LSHR1244:%.*]] = add i8 [[X]], 8
-; CHECK-NEXT: [[BF_SHL:%.*]] = and i8 [[BF_LSHR1244]], 24
-; CHECK-NEXT: [[BF_SET20:%.*]] = or disjoint i8 [[BF_VALUE]], [[BF_SHL]]
+; CHECK-NEXT: [[TMP0:%.*]] = and i8 [[X:%.*]], 11
+; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i8 [[TMP0]], 9
+; CHECK-NEXT: [[TMP2:%.*]] = and i8 [[X]], 20
+; CHECK-NEXT: [[BF_SET20:%.*]] = xor i8 [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[BF_LSHR22:%.*]] = and i8 [[X]], -32
; CHECK-NEXT: [[BF_SET33:%.*]] = or disjoint i8 [[BF_SET20]], [[BF_LSHR22]]
; CHECK-NEXT: ret i8 [[BF_SET33]]
>From 11baae5d3ba38a4d8d39e127a3cecb7f4fd342e2 Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Thu, 18 Jul 2024 12:03:09 +0900
Subject: [PATCH 3/4] Change APInt type of the arguments of AccumulateY to
const APInt
---
llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 2fe94c3aa5ec7..528250a9b2e17 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -3575,8 +3575,8 @@ static Value *foldBitFieldArithmetic(BinaryOperator &I,
// If operand of bitfield operation is a constant, sum of the constants is
// computed and returned. if operand is not a constant, operand is
// returned. if this operation is not a bitfield operation, null is returned.
- auto AccumulateY = [&](Value *LoY, Value *UpY, APInt LoMask,
- APInt UpMask) -> Value * {
+ auto AccumulateY = [&](Value *LoY, Value *UpY, const APInt LoMask,
+ const APInt UpMask) -> Value * {
Value *Y = nullptr;
auto *CLoY = dyn_cast_or_null<Constant>(LoY);
auto *CUpY = dyn_cast_or_null<Constant>(UpY);
>From 77ed1ab462a78cf47191fd3b1d0ba3b813b7839f Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Sun, 21 Jul 2024 21:33:53 +0900
Subject: [PATCH 4/4] Fix overwriting the same variable due to incorrect using
---
llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 528250a9b2e17..3e9bc75a2955f 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -3604,7 +3604,8 @@ static Value *foldBitFieldArithmetic(BinaryOperator &I,
// BitFieldAddInfo.
auto MatchBitFieldAdd =
[&](BinaryOperator &I) -> std::optional<BitFieldAddInfo> {
- const APInt *OptLoMask, *OptUpMask, *LoMask, *UpMask, *UpMask2 = nullptr;
+ const APInt *OptLoMask, *OptLoMask2 = nullptr, *OptUpMask, *LoMask, *UpMask,
+ *UpMask2 = nullptr;
Value *X, *Y, *UpY;
// Bitfield has more than 2 member.
@@ -3631,7 +3632,7 @@ static Value *foldBitFieldArithmetic(BinaryOperator &I,
m_c_Xor(m_CombineOr(
// When Y is not the constant.
m_c_Add(m_And(m_Value(X), m_APInt(OptLoMask)),
- m_And(m_Value(Y), m_APInt(OptLoMask))),
+ m_And(m_Value(Y), m_APInt(OptLoMask2))),
// When Y is Constant, it can be accumulated.
m_c_Add(m_And(m_Value(X), m_APInt(OptLoMask)), m_Value(Y))),
// If Y is a constant, X^Y&OptUpMask can be pre-computed and
@@ -3662,7 +3663,8 @@ static Value *foldBitFieldArithmetic(BinaryOperator &I,
}
// Match already optimized bitfield operation.
- if (match(&I, OptBitFieldAdd)) {
+ if (match(&I, OptBitFieldAdd) &&
+ (OptLoMask2 == OptLoMask || OptLoMask2 == nullptr)) {
APInt Mask = APInt::getBitsSet(
BitWidth, BitWidth - OptUpMask->countl_zero(), BitWidth);
APInt Mask2 = APInt::getBitsSet(
More information about the llvm-commits
mailing list