[clang] [libclang/python] Fix bug in `SourceRange.__contains__`, add tests (PR #101802)

Jannick Kremer via cfe-commits cfe-commits at lists.llvm.org
Wed Aug 14 13:55:42 PDT 2024


https://github.com/DeinAlptraum updated https://github.com/llvm/llvm-project/pull/101802

>From 572b1be0b204561fdbb049f0c17f065d17198ac0 Mon Sep 17 00:00:00 2001
From: Jannick Kremer <jannick.kremer at mailbox.org>
Date: Sat, 3 Aug 2024 09:28:02 +0100
Subject: [PATCH 1/4] [libclang/python] Fix bug in SourceRange.__contains__,
 add tests

---
 clang/bindings/python/clang/cindex.py         |  4 ++
 .../python/tests/cindex/test_source_range.py  | 56 +++++++++++++++++++
 2 files changed, 60 insertions(+)
 create mode 100644 clang/bindings/python/tests/cindex/test_source_range.py

diff --git a/clang/bindings/python/clang/cindex.py b/clang/bindings/python/clang/cindex.py
index c251c46a04adf4..5fd7cc64810732 100644
--- a/clang/bindings/python/clang/cindex.py
+++ b/clang/bindings/python/clang/cindex.py
@@ -386,6 +386,10 @@ def __contains__(self, other):
         # same file, in between lines
         if self.start.line < other.line < self.end.line:
             return True
+        # between columns in one-liner range
+        elif self.start.line == other.line == self.end.line:
+            if self.start.column <= other.column <= self.end.column:
+                return True
         elif self.start.line == other.line:
             # same file first line
             if self.start.column <= other.column:
diff --git a/clang/bindings/python/tests/cindex/test_source_range.py b/clang/bindings/python/tests/cindex/test_source_range.py
new file mode 100644
index 00000000000000..9f76848e890207
--- /dev/null
+++ b/clang/bindings/python/tests/cindex/test_source_range.py
@@ -0,0 +1,56 @@
+import unittest
+
+from clang.cindex import SourceLocation, SourceRange
+
+from .util import get_tu
+
+
+def create_location(tu, line, column):
+    return SourceLocation.from_position(tu, tu.get_file(tu.spelling), line, column)
+
+
+def create_range(tu, line1, column1, line2, column2):
+    return SourceRange.from_locations(
+        create_location(tu, line1, column1), create_location(tu, line2, column2)
+    )
+
+
+class TestSourceRange(unittest.TestCase):
+    def test_contains(self):
+        tu = get_tu(
+            """aaaaa
+aaaaa
+aaaaa
+aaaaa"""
+        )
+
+        l13 = create_location(tu, 1, 3)
+        l21 = create_location(tu, 2, 1)
+        l22 = create_location(tu, 2, 2)
+        l23 = create_location(tu, 2, 3)
+        l24 = create_location(tu, 2, 4)
+        l25 = create_location(tu, 2, 5)
+        l33 = create_location(tu, 3, 3)
+        l31 = create_location(tu, 3, 1)
+        r22_24 = create_range(tu, 2, 2, 2, 4)
+        r23_23 = create_range(tu, 2, 3, 2, 3)
+        r24_32 = create_range(tu, 2, 4, 3, 2)
+        r14_32 = create_range(tu, 1, 4, 3, 2)
+
+        assert l13 not in r22_24  # Line before start
+        assert l21 not in r22_24  # Column before start
+        assert l22 in r22_24  # Colum on start
+        assert l23 in r22_24  # Column in range
+        assert l24 in r22_24  # Column on end
+        assert l25 not in r22_24  # Column after end
+        assert l33 not in r22_24  # Line after end
+
+        assert l23 in r23_23  # In one-column range
+
+        assert l23 not in r24_32  # Outside range in first line
+        assert l33 not in r24_32  # Outside range in last line
+        assert l25 in r24_32  # In range in first line
+        assert l31 in r24_32  # In range in last line
+
+        assert l21 in r14_32  # In range at start of center line
+        assert l25 in r14_32  # In range at end of center line

>From e27cf4b01f61dd6cf7ee8fa86857c566716eabfb Mon Sep 17 00:00:00 2001
From: Jannick Kremer <jannick.kremer at mailbox.org>
Date: Wed, 7 Aug 2024 19:54:35 +0200
Subject: [PATCH 2/4] Implement SourceRange.__contains__ via
 SourceLocation.__le__

---
 clang/bindings/python/clang/cindex.py | 25 +++++++++----------------
 clang/docs/ReleaseNotes.rst           |  2 ++
 2 files changed, 11 insertions(+), 16 deletions(-)

diff --git a/clang/bindings/python/clang/cindex.py b/clang/bindings/python/clang/cindex.py
index 5fd7cc64810732..162ed786607550 100644
--- a/clang/bindings/python/clang/cindex.py
+++ b/clang/bindings/python/clang/cindex.py
@@ -270,6 +270,14 @@ def _get_instantiation(self):
             self._data = (f, int(l.value), int(c.value), int(o.value))
         return self._data
 
+    def __le__(self, other):
+        if self.line < other.line:
+            return True
+        if self.line == other.line and self.column <= other.column:
+            return True
+        # when self.line > other.line
+        return False
+
     @staticmethod
     def from_position(tu, file, line, column):
         """
@@ -383,22 +391,7 @@ def __contains__(self, other):
         ):
             # same file name
             return False
-        # same file, in between lines
-        if self.start.line < other.line < self.end.line:
-            return True
-        # between columns in one-liner range
-        elif self.start.line == other.line == self.end.line:
-            if self.start.column <= other.column <= self.end.column:
-                return True
-        elif self.start.line == other.line:
-            # same file first line
-            if self.start.column <= other.column:
-                return True
-        elif other.line == self.end.line:
-            # same file last line
-            if other.column <= self.end.column:
-                return True
-        return False
+        return self.start <= other <= self.end
 
     def __repr__(self):
         return "<SourceRange start %r, end %r>" % (self.start, self.end)
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 39e1b0fcb09bbd..6d1b3a9c2184d8 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -355,6 +355,8 @@ Sanitizers
 Python Binding Changes
 ----------------------
 - Fixed an issue that led to crashes when calling ``Type.get_exception_specification_kind``.
+- Fixed a bug in ``SourceRange.__contains__`` that led to false positives
+  when the source range is only one line. 
 
 OpenMP Support
 --------------

>From e3196874ce66bb75ec0e49217ae74ea6f535e22b Mon Sep 17 00:00:00 2001
From: Jannick Kremer <jannick.kremer at mailbox.org>
Date: Wed, 14 Aug 2024 19:13:31 +0200
Subject: [PATCH 3/4] Expose file-sensitive SourceLocation::operator<

Use this to implement the operator on Python side
---
 clang/bindings/python/clang/cindex.py         | 15 ++++---
 .../python/tests/cindex/test_location.py      | 41 +++++++++++++++++++
 .../python/tests/cindex/test_source_range.py  | 26 ++++++------
 clang/include/clang-c/CXSourceLocation.h      | 10 +++++
 clang/tools/libclang/CXSourceLocation.cpp     | 12 ++++++
 clang/tools/libclang/libclang.map             |  5 +++
 6 files changed, 87 insertions(+), 22 deletions(-)

diff --git a/clang/bindings/python/clang/cindex.py b/clang/bindings/python/clang/cindex.py
index 162ed786607550..8ea49176972026 100644
--- a/clang/bindings/python/clang/cindex.py
+++ b/clang/bindings/python/clang/cindex.py
@@ -270,14 +270,6 @@ def _get_instantiation(self):
             self._data = (f, int(l.value), int(c.value), int(o.value))
         return self._data
 
-    def __le__(self, other):
-        if self.line < other.line:
-            return True
-        if self.line == other.line and self.column <= other.column:
-            return True
-        # when self.line > other.line
-        return False
-
     @staticmethod
     def from_position(tu, file, line, column):
         """
@@ -327,6 +319,12 @@ def __eq__(self, other):
     def __ne__(self, other):
         return not self.__eq__(other)
 
+    def __lt__(self, other: SourceLocation) -> bool:
+        return conf.lib.clang_lessThanLocations(self, other)  # type: ignore [no-any-return]
+
+    def __le__(self, other: SourceLocation) -> bool:
+        return self < other or self == other
+
     def __repr__(self):
         if self.file:
             filename = self.file.name
@@ -3905,6 +3903,7 @@ def write_main_file_to_stdout(self):
     ("clang_isUnexposed", [CursorKind], bool),
     ("clang_isVirtualBase", [Cursor], bool),
     ("clang_isVolatileQualifiedType", [Type], bool),
+    ("clang_lessThanLocations", [SourceLocation, SourceLocation], bool),
     (
         "clang_parseTranslationUnit",
         [Index, c_interop_string, c_void_p, c_int, c_void_p, c_int, c_int],
diff --git a/clang/bindings/python/tests/cindex/test_location.py b/clang/bindings/python/tests/cindex/test_location.py
index e23677a9be3c09..5cd9e2fd9dea70 100644
--- a/clang/bindings/python/tests/cindex/test_location.py
+++ b/clang/bindings/python/tests/cindex/test_location.py
@@ -130,3 +130,44 @@ def test_is_system_location(self):
         two = get_cursor(tu, "two")
         self.assertFalse(one.location.is_in_system_header)
         self.assertTrue(two.location.is_in_system_header)
+
+    def test_operator_lt(self):
+        tu = get_tu("aaaaa")
+        t1_f1 = tu.get_file(tu.spelling)
+
+        tu2 = TranslationUnit.from_source(
+            tu.spelling,
+            unsaved_files=[
+                (
+                    tu.spelling,
+                    '#include "fake2.c"',
+                ),
+                (
+                    "./fake2.c",
+                    "aaaaa",
+                ),
+            ],
+        )
+        t2_f1 = tu2.get_file(tu.spelling)
+        t2_f2 = tu2.get_file("./fake2.c")
+
+        l_t1_f1_12 = SourceLocation.from_position(tu, t1_f1, 1, 2)
+        l_t1_f1_13 = SourceLocation.from_position(tu, t1_f1, 1, 3)
+        l_t1_f1_14 = SourceLocation.from_position(tu, t1_f1, 1, 4)
+
+        l_t2_f1_13 = SourceLocation.from_position(tu2, t2_f1, 1, 3)
+        l_t2_f2_12 = SourceLocation.from_position(tu2, t2_f2, 1, 2)
+        l_t2_f2_14 = SourceLocation.from_position(tu2, t2_f2, 1, 4)
+
+        # In same file
+        assert l_t1_f1_12 < l_t1_f1_13 < l_t1_f1_14
+        assert not l_t1_f1_13 < l_t1_f1_12
+
+        # In same file, different TU
+        assert l_t1_f1_12 < l_t2_f1_13 < l_t1_f1_14
+        assert not l_t2_f1_13 < l_t1_f1_12
+        assert not l_t1_f1_14 < l_t2_f1_13
+
+        # In different file, same TU
+        assert not l_t2_f1_13 < l_t2_f2_14
+        assert not l_t2_f2_12 < l_t2_f1_13
diff --git a/clang/bindings/python/tests/cindex/test_source_range.py b/clang/bindings/python/tests/cindex/test_source_range.py
index 9f76848e890207..46559a52b4dd25 100644
--- a/clang/bindings/python/tests/cindex/test_source_range.py
+++ b/clang/bindings/python/tests/cindex/test_source_range.py
@@ -5,13 +5,10 @@
 from .util import get_tu
 
 
-def create_location(tu, line, column):
-    return SourceLocation.from_position(tu, tu.get_file(tu.spelling), line, column)
-
-
 def create_range(tu, line1, column1, line2, column2):
     return SourceRange.from_locations(
-        create_location(tu, line1, column1), create_location(tu, line2, column2)
+        SourceLocation.from_position(tu, tu.get_file(tu.spelling), line1, column1),
+        SourceLocation.from_position(tu, tu.get_file(tu.spelling), line2, column2),
     )
 
 
@@ -23,15 +20,16 @@ def test_contains(self):
 aaaaa
 aaaaa"""
         )
-
-        l13 = create_location(tu, 1, 3)
-        l21 = create_location(tu, 2, 1)
-        l22 = create_location(tu, 2, 2)
-        l23 = create_location(tu, 2, 3)
-        l24 = create_location(tu, 2, 4)
-        l25 = create_location(tu, 2, 5)
-        l33 = create_location(tu, 3, 3)
-        l31 = create_location(tu, 3, 1)
+        file = tu.get_file(tu.spelling)
+
+        l13 = SourceLocation.from_position(tu, file, 1, 3)
+        l21 = SourceLocation.from_position(tu, file, 2, 1)
+        l22 = SourceLocation.from_position(tu, file, 2, 2)
+        l23 = SourceLocation.from_position(tu, file, 2, 3)
+        l24 = SourceLocation.from_position(tu, file, 2, 4)
+        l25 = SourceLocation.from_position(tu, file, 2, 5)
+        l33 = SourceLocation.from_position(tu, file, 3, 3)
+        l31 = SourceLocation.from_position(tu, file, 3, 1)
         r22_24 = create_range(tu, 2, 2, 2, 4)
         r23_23 = create_range(tu, 2, 3, 2, 3)
         r24_32 = create_range(tu, 2, 4, 3, 2)
diff --git a/clang/include/clang-c/CXSourceLocation.h b/clang/include/clang-c/CXSourceLocation.h
index dcb13ba273173f..3d46ab70f4eb79 100644
--- a/clang/include/clang-c/CXSourceLocation.h
+++ b/clang/include/clang-c/CXSourceLocation.h
@@ -74,6 +74,16 @@ CINDEX_LINKAGE CXSourceLocation clang_getNullLocation(void);
 CINDEX_LINKAGE unsigned clang_equalLocations(CXSourceLocation loc1,
                                              CXSourceLocation loc2);
 
+/**
+ * Determine for two source locations if they refer to the same file
+ * and one of them comes strictly before the other in the source code.
+ *
+ * \returns non-zero if the source locations refer to the same file and the
+ * first one comes strictly before the second one, zero otherwise.
+ */
+CINDEX_LINKAGE unsigned clang_lessThanLocations(CXSourceLocation loc1,
+                                                CXSourceLocation loc2);
+
 /**
  * Returns non-zero if the given source location is in a system header.
  */
diff --git a/clang/tools/libclang/CXSourceLocation.cpp b/clang/tools/libclang/CXSourceLocation.cpp
index 53cb71f7276f29..d3e498de570c8e 100644
--- a/clang/tools/libclang/CXSourceLocation.cpp
+++ b/clang/tools/libclang/CXSourceLocation.cpp
@@ -50,6 +50,18 @@ unsigned clang_equalLocations(CXSourceLocation loc1, CXSourceLocation loc2) {
           loc1.int_data == loc2.int_data);
 }
 
+unsigned clang_lessThanLocations(CXSourceLocation loc1, CXSourceLocation loc2) {
+  const SourceLocation Loc1 = SourceLocation::getFromRawEncoding(loc1.int_data);
+  const SourceLocation Loc2 = SourceLocation::getFromRawEncoding(loc2.int_data);
+
+  const SourceManager &SM =
+      *static_cast<const SourceManager *>(loc1.ptr_data[0]);
+  if (!SM.isWrittenInSameFile(Loc1, Loc2))
+    return 0;
+
+  return Loc1 < Loc2;
+}
+
 CXSourceRange clang_getNullRange() {
   CXSourceRange Result = { { nullptr, nullptr }, 0, 0 };
   return Result;
diff --git a/clang/tools/libclang/libclang.map b/clang/tools/libclang/libclang.map
index 371fe512ce71c1..60a3cb633ca088 100644
--- a/clang/tools/libclang/libclang.map
+++ b/clang/tools/libclang/libclang.map
@@ -434,6 +434,11 @@ LLVM_19 {
     clang_Cursor_getBinaryOpcodeStr;
 };
 
+LLVM_20 {
+  global:
+    clang_lessThanLocations;
+};
+
 # Example of how to add a new symbol version entry.  If you do add a new symbol
 # version, please update the example to depend on the version you added.
 # LLVM_X {

>From f446933a88fc68383077f8bb8c64c0d3b97d101f Mon Sep 17 00:00:00 2001
From: Jannick Kremer <jannick.kremer at mailbox.org>
Date: Wed, 14 Aug 2024 22:51:01 +0200
Subject: [PATCH 4/4] Use isBeforeInTranslationUnit to establish ordering

Also add breaking Release Note
---
 .../python/tests/cindex/test_location.py      | 43 +++++--------------
 clang/docs/ReleaseNotes.rst                   |  3 ++
 clang/tools/libclang/CXSourceLocation.cpp     |  7 ++-
 3 files changed, 17 insertions(+), 36 deletions(-)

diff --git a/clang/bindings/python/tests/cindex/test_location.py b/clang/bindings/python/tests/cindex/test_location.py
index 5cd9e2fd9dea70..27854a312e6721 100644
--- a/clang/bindings/python/tests/cindex/test_location.py
+++ b/clang/bindings/python/tests/cindex/test_location.py
@@ -133,41 +133,20 @@ def test_is_system_location(self):
 
     def test_operator_lt(self):
         tu = get_tu("aaaaa")
-        t1_f1 = tu.get_file(tu.spelling)
+        t1_f = tu.get_file(tu.spelling)
+        tu2 = get_tu("aaaaa")
 
-        tu2 = TranslationUnit.from_source(
-            tu.spelling,
-            unsaved_files=[
-                (
-                    tu.spelling,
-                    '#include "fake2.c"',
-                ),
-                (
-                    "./fake2.c",
-                    "aaaaa",
-                ),
-            ],
-        )
-        t2_f1 = tu2.get_file(tu.spelling)
-        t2_f2 = tu2.get_file("./fake2.c")
+        l_t1_12 = SourceLocation.from_position(tu, t1_f, 1, 2)
+        l_t1_13 = SourceLocation.from_position(tu, t1_f, 1, 3)
+        l_t1_14 = SourceLocation.from_position(tu, t1_f, 1, 4)
 
-        l_t1_f1_12 = SourceLocation.from_position(tu, t1_f1, 1, 2)
-        l_t1_f1_13 = SourceLocation.from_position(tu, t1_f1, 1, 3)
-        l_t1_f1_14 = SourceLocation.from_position(tu, t1_f1, 1, 4)
-
-        l_t2_f1_13 = SourceLocation.from_position(tu2, t2_f1, 1, 3)
-        l_t2_f2_12 = SourceLocation.from_position(tu2, t2_f2, 1, 2)
-        l_t2_f2_14 = SourceLocation.from_position(tu2, t2_f2, 1, 4)
+        l_t2_13 = SourceLocation.from_position(tu2, tu2.get_file(tu2.spelling), 1, 3)
 
         # In same file
-        assert l_t1_f1_12 < l_t1_f1_13 < l_t1_f1_14
-        assert not l_t1_f1_13 < l_t1_f1_12
+        assert l_t1_12 < l_t1_13 < l_t1_14
+        assert not l_t1_13 < l_t1_12
 
         # In same file, different TU
-        assert l_t1_f1_12 < l_t2_f1_13 < l_t1_f1_14
-        assert not l_t2_f1_13 < l_t1_f1_12
-        assert not l_t1_f1_14 < l_t2_f1_13
-
-        # In different file, same TU
-        assert not l_t2_f1_13 < l_t2_f2_14
-        assert not l_t2_f2_12 < l_t2_f1_13
+        assert l_t1_12 < l_t2_13 < l_t1_14
+        assert not l_t2_13 < l_t1_12
+        assert not l_t1_14 < l_t2_13
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 6d1b3a9c2184d8..b25692e804aacc 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -65,6 +65,9 @@ Clang Python Bindings Potentially Breaking Changes
 - Calling a property on the ``CompletionChunk`` or ``CompletionString`` class
   statically now leads to an error, instead of returning a ``CachedProperty`` object
   that is used internally. Properties are only available on instances.
+- If ``SourceRange.__contains__`` is used with a single-line ``SourceRange``, the check used
+  to return ``True`` if the end of the ``SourceLocation`` lay after the end of the ``SourceRange``
+  on the same line. This is changed to return ``False`` now.
 
 What's New in Clang |release|?
 ==============================
diff --git a/clang/tools/libclang/CXSourceLocation.cpp b/clang/tools/libclang/CXSourceLocation.cpp
index d3e498de570c8e..d97a756f75db93 100644
--- a/clang/tools/libclang/CXSourceLocation.cpp
+++ b/clang/tools/libclang/CXSourceLocation.cpp
@@ -56,10 +56,9 @@ unsigned clang_lessThanLocations(CXSourceLocation loc1, CXSourceLocation loc2) {
 
   const SourceManager &SM =
       *static_cast<const SourceManager *>(loc1.ptr_data[0]);
-  if (!SM.isWrittenInSameFile(Loc1, Loc2))
-    return 0;
-
-  return Loc1 < Loc2;
+  // Use the appropriate SourceManager method here rather than operator< because
+  // ordering is meaningful only if LHS and RHS have the same FileID
+  return SM.isBeforeInTranslationUnit(Loc1, Loc2);
 }
 
 CXSourceRange clang_getNullRange() {



More information about the cfe-commits mailing list