Due to some obscure reasons which are not relevant for this question, I need to resort to use MAP_FIXED in order to obtain a page close to where the text section of libc lives in memory.
Before reading mmap(2) (which I should had done in the first place), I was expecting to get an error if I called mmap with MAP_FIXED and a base address overlapping an already-mapped area.
However that is not the case. For instance, here is part of /proc/maps for certain process
7ffff7299000-7ffff744c000 r-xp 00000000 08:05 654098 /lib/x86_64-linux-gnu/libc-2.15.so
Which, after making the following mmap call ...
mmap(0x7ffff731b000,
getpagesize(),
PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED,
0,
0);
... turns into:
7ffff7299000-7ffff731b000 r-xp 00000000 08:05 654098 /lib/x86_64-linux-gnu/libc-2.15.so
7ffff731b000-7ffff731c000 rwxp 00000000 00:00 0
7ffff731c000-7ffff744c000 r-xp 00083000 08:05 654098 /lib/x86_64-linux-gnu/libc-2.15.so
Which means I have overwritten part of the virtual address space dedicated to libc with my own page. Clearly not what I want ...
In the MAP_FIXED part of the mmap(2) manual, it clearly states:
If the memory region specified by addr and len overlaps pages of any existing mapping(s), then the overlapped part of the existing mapping(s) will be discarded.
Which explains what I am seeing, but I have a couple of questions:
Use page = sysconf(SC_PAGE_SIZE)
to find out the page size, then scan each page-sized block you wish to check using msync(addr, page, 0)
(with (unsigned long)addr % page == 0
, i.e. addr
aligned to pages). If it returns -1
with errno == ENOMEM
, that page is not mapped.
Edited: As fons commented below, mincore(addr,page,&dummy)
is superior to msync()
. (The implementation of the syscall is in mm/mincore.c
in the Linux kernel sources, with C libraries usually providing a wrapper that updates errno
. As the syscall does the mapping check immediately after making sure addr
is page aligned, it is optimal in the not-mapped case (ENOMEM
). It does some work if the page is already mapped, so if performance is paramount, try to avoid checking pages you know are mapped.
You must do this individually, separately per each page, because for regions larger than a single page, ENOMEM
means that the region was not fully mapped; it might still be partially mapped. Mapping is always granular to page-sized units.
As far as I can tell, there is no way to tell mmap()
to fail if the region is already mapped, or contains already mapped pages. (The same applies to mremap()
, so you cannot create a mapping, then move it to the desired region.)
This means you run a risk of a race condition. It would be best to execute the actual syscalls yourself, instead of the C library wrappers, just in case they do memory allocation or change memory mappings internally:
#define _GNU_SOURCE
#include <unistd.h>
#include <sys/syscall.h>
static size_t page = 0;
static inline size_t page_size(void)
{
if (!page)
page = (size_t)sysconf(_SC_PAGESIZE);
return page;
}
static inline int raw_msync(void *addr, size_t length, int flags)
{
return syscall(SYS_msync, addr, length, flags);
}
static inline void *raw_mmap(void *addr, size_t length, int prot, int flags)
{
return (void *)syscall(SYS_mmap, addr, length, prot, flags, -1, (off_t)0);
}
However, I suspect that whatever it is you are trying to do, you eventually need to parse /proc/self/maps
anyway.
I recommend avoiding standard I/O stdio.h
altogether (as the various operations will allocate memory dynamically, and thus change the mappings), and instead use the lower-level unistd.h
interfaces, which are much less likely to affect the mappings. Here is a set of simple, crude functions, that you can use to find out each mapped region and the protections enabled in that region (and discard the other info). In practice, it uses about a kilobyte of code and less than that in stack, so it is very useful even on limited architectures (say, embedded devices).
#include <unistd.h>
#include <fcntl.h>
#include <errno.h>
#include <string.h>
#ifndef INPUT_BUFFER
#define INPUT_BUFFER 512
#endif /* INPUT_BUFFER */
#ifndef INPUT_EOF
#define INPUT_EOF -256
#endif /* INPUT_EOF */
#define PERM_PRIVATE 16
#define PERM_SHARED 8
#define PERM_READ 4
#define PERM_WRITE 2
#define PERM_EXEC 1
typedef struct {
int descriptor;
int status;
unsigned char *next;
unsigned char *ends;
unsigned char buffer[INPUT_BUFFER + 16];
} input_buffer;
/* Refill input buffer. Returns the number of new bytes.
* Sets status to ENODATA at EOF.
*/
static size_t input_refill(input_buffer *const input)
{
ssize_t n;
if (input->status)
return (size_t)0;
if (input->next > input->buffer) {
if (input->ends > input->next) {
memmove(input->buffer, input->next,
(size_t)(input->ends - input->next));
input->ends = input->buffer + (size_t)(input->ends - input->next);
input->next = input->buffer;
} else {
input->ends = input->buffer;
input->next = input->buffer;
}
}
do {
n = read(input->descriptor, input->ends,
INPUT_BUFFER - (size_t)(input->ends - input->buffer));
} while (n == (ssize_t)-1 && errno == EINTR);
if (n > (ssize_t)0) {
input->ends += n;
return (size_t)n;
} else
if (n == (ssize_t)0) {
input->status = ENODATA;
return (size_t)0;
}
if (n == (ssize_t)-1)
input->status = errno;
else
input->status = EIO;
return (size_t)0;
}
/* Low-lever getchar() equivalent.
*/
static inline int input_next(input_buffer *const input)
{
if (input->next < input->ends)
return *(input->next++);
else
if (input_refill(input) > 0)
return *(input->next++);
else
return INPUT_EOF;
}
/* Low-level ungetc() equivalent.
*/
static inline int input_back(input_buffer *const input, const int c)
{
if (c < 0 || c > 255)
return INPUT_EOF;
else
if (input->next > input->buffer)
return *(--input->next) = c;
else
if (input->ends >= input->buffer + sizeof input->buffer)
return INPUT_EOF;
memmove(input->next + 1, input->next, (size_t)(input->ends - input->next));
input->ends++;
return *(input->next) = c;
}
/* Low-level fopen() equivalent.
*/
static int input_open(input_buffer *const input, const char *const filename)
{
if (!input)
return errno = EINVAL;
input->descriptor = -1;
input->status = 0;
input->next = input->buffer;
input->ends = input->buffer;
if (!filename || !*filename)
return errno = input->status = EINVAL;
do {
input->descriptor = open(filename, O_RDONLY | O_NOCTTY);
} while (input->descriptor == -1 && errno == EINTR);
if (input->descriptor == -1)
return input->status = errno;
return 0;
}
/* Low-level fclose() equivalent.
*/
static int input_close(input_buffer *const input)
{
int result;
if (!input)
return errno = EINVAL;
/* EOF is not an error; we use ENODATA for that. */
if (input->status == ENODATA)
input->status = 0;
if (input->descriptor != -1) {
do {
result = close(input->descriptor);
} while (result == -1 && errno == EINTR);
if (result == -1 && !input->status)
input->status = errno;
}
input->descriptor = -1;
input->next = input->buffer;
input->ends = input->buffer;
return errno = input->status;
}
/* Read /proc/self/maps, and fill in the arrays corresponding to the fields.
* The function will return the number of mappings, even if not all are saved.
*/
size_t read_maps(size_t const n,
void **const ptr, size_t *const len,
unsigned char *const mode)
{
input_buffer input;
size_t i = 0;
unsigned long curr_start, curr_end;
unsigned char curr_mode;
int c;
errno = 0;
if (input_open(&input, "/proc/self/maps"))
return (size_t)0; /* errno already set. */
c = input_next(&input);
while (c >= 0) {
/* Skip leading controls and whitespace */
while (c >= 0 && c <= 32)
c = input_next(&input);
/* EOF? */
if (c < 0)
break;
curr_start = 0UL;
curr_end = 0UL;
curr_mode = 0U;
/* Start of address range. */
while (1)
if (c >= '0' && c <= '9') {
curr_start = (16UL * curr_start) + c - '0';
c = input_next(&input);
} else
if (c >= 'A' && c <= 'F') {
curr_start = (16UL * curr_start) + c - 'A' + 10;
c = input_next(&input);
} else
if (c >= 'a' && c <= 'f') {
curr_start = (16UL * curr_start) + c - 'a' + 10;
c = input_next(&input);
} else
break;
if (c == '-')
c = input_next(&input);
else {
errno = EIO;
return (size_t)0;
}
/* End of address range. */
while (1)
if (c >= '0' && c <= '9') {
curr_end = (16UL * curr_end) + c - '0';
c = input_next(&input);
} else
if (c >= 'A' && c <= 'F') {
curr_end = (16UL * curr_end) + c - 'A' + 10;
c = input_next(&input);
} else
if (c >= 'a' && c <= 'f') {
curr_end = (16UL * curr_end) + c - 'a' + 10;
c = input_next(&input);
} else
break;
if (c == ' ')
c = input_next(&input);
else {
errno = EIO;
return (size_t)0;
}
/* Permissions. */
while (1)
if (c == 'r') {
curr_mode |= PERM_READ;
c = input_next(&input);
} else
if (c == 'w') {
curr_mode |= PERM_WRITE;
c = input_next(&input);
} else
if (c == 'x') {
curr_mode |= PERM_EXEC;
c = input_next(&input);
} else
if (c == 's') {
curr_mode |= PERM_SHARED;
c = input_next(&input);
} else
if (c == 'p') {
curr_mode |= PERM_PRIVATE;
c = input_next(&input);
} else
if (c == '-') {
c = input_next(&input);
} else
break;
if (c == ' ')
c = input_next(&input);
else {
errno = EIO;
return (size_t)0;
}
/* Skip the rest of the line. */
while (c >= 0 && c != '\n')
c = input_next(&input);
/* Add to arrays, if possible. */
if (i < n) {
if (ptr) ptr[i] = (void *)curr_start;
if (len) len[i] = (size_t)(curr_end - curr_start);
if (mode) mode[i] = curr_mode;
}
i++;
}
if (input_close(&input))
return (size_t)0; /* errno already set. */
errno = 0;
return i;
}
The read_maps()
function reads up to n
regions, start addresses as void *
into the ptr
array, lengths into the len
array, and permissions into the mode
array, returning the total number of maps (may be greater than n
), or zero with errno
set if an error occurs.
It is quite possible to use syscalls for the low-level I/O above, so that you don't use any C library features, but I don't think it is at all necessary. (The C libraries, as far as I can tell, use very simple wrappers around the actual syscalls for these.)
I hope you find this useful.