[PATCH] D108230: [analyzer] Ignore single element arrays in getStaticSize() conditionally

Balázs Benics via Phabricator via cfe-commits cfe-commits at lists.llvm.org
Thu Nov 17 01:29:25 PST 2022


steakhal added a comment.
Herald added a project: All.

I did some extensive measurements of this option.
According to my results, we could confidently enable this option by default; but still, keep the flag for backward compatibility and enable the users to opt-out.

The benefit of enabling this is project specific; if that exploits this single element FAM construct or not.
It got rid of about ~4500 reports, mostly by either of the OOB checkers and returned pointer checker.
Also introduced about ~170 new reports of different kinds, as expected.
I focused my evaluation on the disappearing issues, and they are all FPs related to buffer accesses of FAM buffers.
I found only a single questionable case, about which I'm not sure if we suppress a TP - difficult to tell.

Possible TP suppressed:

  Linux/CXGB4VF driver:
  struct adapter {
      // ...
      struct adapter_params params; // params.nports.
      // ...
  
      /* Linux network device resources */
      struct net_device *port[MAX_NPORTS]; // aka [1] <--- this cannot be a FAM, yet derefs of this gonna be suppressed
      const char *name;
      // ...
  };

Finally, here are the FAMs that actually produced FPs reports by accessing the FAM buffer.

  PHP zend_string
      struct _zend_string {
          zend_refcounted_h gc;
          zend_ulong        h;
          size_t            len;
          char              val[1]; // <---
      };
  
  GCC
      struct decl {
          // ...
          union {
              struct excl_rel_decl {
                  int all_names_num;
                  int first_list_length;
                  char *names [1]; // <---
              } excl;
              // ...
          } decl;
      };
  
      struct regexp {
          // ...
          union {
              struct sequence_regexp {
                  int regexps_num;
                  regexp_t regexps [1]; // <---
              } sequence;
              // ...
          } regexp;
      };
  
      struct GTY((tag("GSS_WITH_MEM_OPS")))
      gimple_statement_with_memory_ops : public gimple_statement_with_memory_ops_base {
          /* [ WORD 1-9 ] : base class */
  
          /* [ WORD 10 ]
              Operand vector.  NOTE!  This must always be the last field
              of this structure.  In particular, this means that this
              structure cannot be embedded inside another one.  */
          tree GTY((length ("%h.num_ops"))) op[1]; // <---
      };
  
  CPython
      typedef struct _longobject PyLongObject;
      struct _longobject {
          struct PyVarObject {
              PyObject ob_base;
              Py_ssize_t ob_size; /* Number of items in variable part */
          } ob_base;
          digit ob_digit[1]; // <---
      };
      typedef struct {
          struct PyVarObject {
              PyObject ob_base;
              Py_ssize_t ob_size; /* Number of items in variable part */
          } ob_base;
          uint32_t b_bitmap;
          PyObject *b_array[1]; // <---
      } PyHamtNode_Bitmap;
      struct Bigint {
          struct Bigint *next;
          int k, maxwds, sign, wds;
          ULong x[1]; // <---
      };
  
  International Components for Unicode (ICU); GCC/libdecnumber
      #define DECNUMDIGITS 1
      #define DECDPUN 1
      #define DECNUMUNITS ((DECNUMDIGITS+DECDPUN-1)/DECDPUN)
      typedef struct {
          int32_t digits;
          int32_t exponent;
          uint8_t bits;
          decNumberUnit lsu[DECNUMUNITS]; // aka [1] <---
      } decNumber;
  
  firebird
      typedef struct nod {
          // ...
          SSHORT      nod_count;
          struct nod *nod_arg[1]; // <---
      } *QLI_NOD;
  
  liblmdb
  struct MDB_page {
  	// ...
  	indx_t		mp_ptrs[1];		/**< dynamic size */  // <---
  };
  
  libreoffice
      typedef struct _rtl_String {
          // ...
          sal_Int32 length;
          char buffer[1]; // <---
      } rtl_String;
  
  
  sqlite3
      struct KeyInfo {
          // ...
          u16 nField;         /* Number of entries in aColl[] */
          CollSeq *aColl[1]; // <---
      };
  
  
  Binutils
      struct elf_segment_map {
          // ...
          /* Number of sections (may be 0).  */
          unsigned int count;
          /* Sections.  Actual number of elements is in count field.  */
          asection *sections[1]; // <---
      };
  
      struct frag {
          // ...
          /* For variable-length tail.  */
          offsetT fr_offset;
          /* For variable-length tail.  */
          symbolS *fr_symbol;
          // ...
          /* Data begins here.  */
          char fr_literal[1]; // <---
      };
  
  Z3
      class fixed_bit_vector {
          friend class fixed_bit_vector_manager;
          friend class tbv_manager;
          unsigned m_data[1]; // <---
      };
  
  httpd
      typedef struct apreq_value_t {
          char             *name;    /**< value name */
          apr_size_t        nlen;    /**< length of name */
          apr_size_t        dlen;    /**< length of data */
          char              data[1]; /**< value data  */ //  <---
      } apreq_value_t;
  
  BusyBox
      struct dns_entry {
          struct dns_entry *next;
          uint32_t ip;
          char rip[IP_STRING_LEN]; /* length decimal reversed IP */
          char name[1]; // <---
      };
  
  Linux
      struct i40e_section_table {
          u32 section_count;
          u32 section_offset[1]; // <---
      };
      struct smb_negotiate_req {
          struct smb_hdr hdr;     /* wct = 0 */
          __le16 ByteCount;
          unsigned char DialectsArray[1]; // <---
      } __packed;
  
  zabbix
      typedef struct zbx_jsonpath_list_item {
          struct zbx_jsonpath_list_item	*next;
          /* the structure is always over-allocated so that either int */
          /* or a zero terminated string can be stored in data         */
          char data[1]; // <---
      } zbx_jsonpath_list_node_t;
  
  boost
      struct hash_item {
          struct hash_header header;
          char data[1]; // <---
      };
  
  zstd
      typedef struct {
          ZSTD_pthread_mutex_t poolMutex;
          int totalCCtx;
          int availCCtx;
          ZSTD_customMem cMem;
          ZSTD_CCtx* cctx[1];   /* variable size */ // <---
      } ZSTDMT_CCtxPool;
  
  vim
      struct sblock_S {
          int		sb_used;	// nr of bytes already in use
          sblock_T	*sb_next;	// next block in list
          char_u	sb_data[1];	// data, actually longer // <---
      };
  
  WebkitGTK
      class StackTrace {
          // ...
  
          // We structure the top fields this way because the underlying stack capture
          // facility will capture from the top of the stack, and we'll need to skip the
          // top 2 frame which is of no interest. Setting up the fields layout this way
          // allows us to capture the stack in place and minimize space wastage.
          union {
              struct {
                  int m_size;
                  int m_capacity;
              };
              struct {
                  void* m_skippedFrame0;
                  void* m_skippedFrame1;
              };
          };
          union {
              void** m_borrowedStack;
              void* m_stack[1]; // <---
          };
      };
  
  nginx
      typedef struct {
          void *value;
          u_short len;
          u_char name[1]; // <---
      } ngx_hash_elt_t;
  
  netdata
      struct registry_person_url {
          // ...
          // the name of the machine, as known by the user
          // dynamically allocated to fit properly
          char machine_name[1]; // <---
      };
  
  zfs
      typedef struct LClosure {
          ClosureHeader;
          struct Proto *p;
          UpVal *upvals[1];  /* list of upvalues */ // <---
      } LClosure;

This is not a complete list, but probably comprehensive enough to enable this option by default.
WDYT? @xazax.hun @NoQ

In addition to this, it would be interesting to think about using `LangOptions::StrictFlexArraysLevelKind`: `-fstrict-flex-arrays={**0**,1,2,3}`, but I don't intend to change anything related to that in particular.

  enum class StrictFlexArraysLevelKind {
    /// Any trailing array member is a FAM.
    Default = 0,
    /// Any trailing array member of undefined, 0, or 1 size is a FAM.
    OneZeroOrIncomplete = 1,
    /// Any trailing array member of undefined or 0 size is a FAM.
    ZeroOrIncomplete = 2,
    /// Any trailing array member of undefined size is a FAM.
    IncompleteOnly = 3,
  };

FYI @vabridgers


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D108230/new/

https://reviews.llvm.org/D108230



More information about the cfe-commits mailing list