diffblue/cbmc

Integer to pointer casting behaviour?

Opened this issue · 3 comments

#include <stddef.h>
#include <stdint.h>
#include <stdbool.h>

#define GRANULE_SIZE 8
#define RMM_MAX_GRANULES 2
unsigned char granules_buffer[GRANULE_SIZE * RMM_MAX_GRANULES]
                        __attribute__((__aligned__(GRANULE_SIZE)));

size_t nondet_size_t(void);
unsigned int nondet_unsigned_int(void);
bool nondet_bool(void);
uint64_t nondet_uint64_t(void);

#define ALIGNED(_size, _alignment)              \
                        (((unsigned long)(_size) % (_alignment)) == 0UL)

#define GRANULE_ALIGNED(_addr) ALIGNED((void *)(_addr), GRANULE_SIZE)

bool valid_pa(uint64_t addr)
{
    return GRANULE_ALIGNED(addr)
       && granules_buffer <= addr
       && addr < granules_buffer + sizeof(granules_buffer);
}

uint64_t pa_to_index(uint64_t addr)
{
    return (addr - (uint64_t)granules_buffer)/GRANULE_SIZE;
}


void* pa_to_granule_buffer_ptr(uint64_t addr)
{
#ifndef ERROR
    // NOTE: Has to do this strange computation and type cast so CBMC can handle.
    return (void*)granules_buffer + ((unsigned char *)addr - granules_buffer);
#else
    return (void*)addr;
#endif
}

const uint8_t SPECIAL_VALUE = 42;

void main()
{
    // Set up the initial state.
    //
    // Pick a valid and nondet index
    size_t index = nondet_size_t();
    __CPROVER_assume(index < RMM_MAX_GRANULES);
    // Inject the `SPECIAL_VALUE` to the chosen granule.
    size_t offset = index * GRANULE_SIZE;
    granules_buffer[offset] = SPECIAL_VALUE;

    // Pre-condition
    uint64_t rd = nondet_uint64_t();
    __CPROVER_assume(valid_pa(rd));
    __CPROVER_assume(pa_to_index(rd) == index);

    // Execution, also the buggy code for CBMC.
    uint8_t *rd_ptr = pa_to_granule_buffer_ptr(rd);

    // Post-condition
    __CPROVER_assert(*rd_ptr == SPECIAL_VALUE, "check");
}

We (together with @matetothpal) have some code (above) that has a strange result, especially in the function pa_to_granule_buffer_ptr.

Assume an addr is in the range of a global byte (char) array granules_buffer, the two expressions (void*)granules_buffer + ((unsigned char *)addr - granules_buffer) and (void*)addr, which has the same values, leads to different behaviour. The former gives us expected result, i.e. assert success, while the latter fails.

Although int-to-pointer case is implementation-dependent behaviour, my guess here is that CBMC represents an array as an abstract object, and we can only access this abstract object via some offset but not absolute address, despite that the address is valid?

CBMC version: 5.95.1
Operating system: MacOS
Exact command line resulting in the issue: cbmc main.c -DERROR
What behaviour did you expect: it should pass
What happened instead: report assert fail

We have this problem since almost a year ago, and we have tried several different versions of CBMC on both MacOS and Ubuntu.

We managed to create reduced version of the reproducer. It is a bit more difficult to read, but still might be useful:

#include <stdbool.h>
#include <stdint.h>
#include <string.h>
unsigned char granules_buffer[1];
uint8_t SPECIAL_VALUE;
uint8_t *rd_ptr;

void* pa_to_granule_buffer_ptr(uint64_t addr)
{
#ifndef ERROR
    // NOTE: Has to do this strange computation and type cast so CBMC can handle.
    return (void*)granules_buffer + ((unsigned char *)addr - granules_buffer);
#else
    return (void*)addr;
#endif
}

a(uint64_t addr) {
  if (addr < granules_buffer + sizeof(granules_buffer))
    return;
  return false;
}
b() { return 0; }
check() {
  size_t c;
  size_t e = c;
  granules_buffer[e] = SPECIAL_VALUE;
  uint64_t d;
  __CPROVER_assume(a(d));
  __CPROVER_assume(b() == c);
  rd_ptr = pa_to_granule_buffer_ptr(d);
  __CPROVER_assert(*rd_ptr == SPECIAL_VALUE, "check");
}

Our problem here is that the value set that we use to track points-to information during symbolic execution will not know which object to produce from the integer pointer. #6442 may fix this, but will need a lot more work. Until we have that in place, the following change does make your example work:

--- 8103-before.c	2023-12-14 15:39:48.436007476 +0000
+++ 8103.c	2023-12-14 15:39:34.695802561 +0000
@@ -41,6 +41,7 @@
 }

 const uint8_t SPECIAL_VALUE = 42;
+char *nondet_pointer();

 void main()
 {
@@ -52,9 +53,11 @@
     // Inject the `SPECIAL_VALUE` to the chosen granule.
     size_t offset = index * GRANULE_SIZE;
     granules_buffer[offset] = SPECIAL_VALUE;
+    // make sure we take the address of granules_buffer at least once
+    void *p = granules_buffer;

     // Pre-condition
-    uint64_t rd = nondet_uint64_t();
+    uint64_t rd = (uint64_t)nondet_pointer()
     __CPROVER_assume(valid_pa(rd));
     __CPROVER_assume(pa_to_index(rd) == index);

I see the solution here, thought we request rd be typed uint64_t as there are other checks. We will just continue using the hack (void*)granules_buffer + ((unsigned char *)addr - granules_buffer);.

In general, a proper solution in the CBMC itself will be ideal than our hacky version. Yet, I am not sure if you prefer keeping this issue as opened status? Please feel free to close it or keep it opened.