Wednesday, April 4, 2007

How to bypass the buffer cache in Linux

Linux has 2 kind of caches: the page cache and the buffer cache. The role of the page cache is to speed-up the access of the files on disks, in a similar way the buffer cache contains buffers of pages read from or being written to block devices. Both of them are memory areas managed in different ways (one more optimized for file objects and the other more block device oriented).

From /proc/meminfo is possible to monitor the memory allocated for both caches (Buffers is the buffer cache, Cached is the page cache), for example:
# cat /proc/meminfo
...
Buffers:         15116 kB
Cached:          67912 kB
...

To perform an I/O benchmark on block devices (like /dev/sda, /dev/sdb, etc.) we usually use a simple `dd`, that loads data from device into memory (in read tests) or write from memory to device (in write tests). But in this cases data are accessed only once! There are no more reads or writes on their buffers. In these cases the buffer cache is only an overhead and it should be meaningful to bypass it.

A way is to open the files using the flag O_DIRECT. This flag allows to bypass the caching mechanisms and exploit directly the DMA from/to the block device and the userspace source/destination buffers.

Obviously there's not in the kernel a global flag to say: "ok just disable buffer cache" and it's not even possibile to disable the buffer cache for a single process.

In the case that you can (and you want) to patch and recompile your application you could explicitly set the flag O_DIRECT in every open()s, but it wouldn't be so handy... ;-)

Another solution is to write a simple glibc wrapper that intercepts all the open() and set the O_DIRECT flag.

Following an example:

libdirectio.c
#define _GNU_SOURCE
#define __USE_GNU

#include <stdio.h>
#include <stdarg.h>
#include <string.h>
#include <fcntl.h>
#include <dlfcn.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>

#define DEBUG

#ifdef DEBUG
#define DPRINTF(format, args...) fprintf(stderr, "debug: " format, ##args)
#else
#define DPRINTF(format, args...)
#endif

int open(const char *, int, ...) __attribute__ ((weak, alias("wrap_open")));
int __open(const char *, int, ...) __attribute__ ((weak, alias("wrap_open")));
int open64(const char *, int, ...) __attribute__ ((weak, alias("wrap_open64")));
int __open64(const char *, int, ...) __attribute__ ((weak, alias("wrap_open64")));

static int (*orig_open)(const char *, int, ...) = NULL;
static int (*orig_open64)(const char *, int, ...) = NULL;

static int __do_wrap_open(const char *name, int flags, mode_t mode,
int (*func_open)(const char *, int, ...))
{
    if (strncmp("/dev/null", name, sizeof("/dev/null"))) {
        DPRINTF("setting flags O_DIRECT on %s\n", name);
        flags |= O_DIRECT;
    }
    if (!strncmp("/dev/", name, sizeof("/dev/") - 1) ||
            !strncmp("/proc/", name, sizeof("/proc/") - 1))
        return fd;
    return func_open(name, flags, mode);
}

int wrap_open(const char *name, int flags, ...)
{
    va_list args;
    mode_t mode;

    va_start(args, flags);
    mode = va_arg(args, mode_t);
    va_end(args);

    DPRINTF("calling libc open(%s, 0x%x, 0x%x)\n", name, flags, mode);

    return __do_wrap_open(name, flags, mode, orig_open);
}

int wrap_open64(const char *name, int flags, ...)
{
    va_list args;
    mode_t mode;

    va_start(args, flags);
    mode = va_arg(args, mode_t);
    va_end(args);

    DPRINTF("calling libc open64(%s, 0x%x, 0x%x)\n", name, flags, mode);

    return __do_wrap_open(name, flags, mode, orig_open64);
}

void _init(void)
{
    orig_open = dlsym(RTLD_NEXT, "open");
    if (!orig_open) {
        fprintf(stderr, "error: missing symbol open!\n");
        exit(1);
    }
    orig_open64 = dlsym(RTLD_NEXT, "open64");
    if (!orig_open64) {
        fprintf(stderr, "error: missing symbol open64!\n");
        exit(1);
    }
}

Makefile
VERSION=0.1

TARGET=libdirectio.so.$(VERSION)
OBJS=libdirectio.o
CC=gcc
CFLAGS= -fPIC -Wall -O2 -g
SHAREDFLAGS= -nostartfiles -shared -W1,-soname,libdirectio.so.0

all: $(TARGET)

%.o: %.c
$(CC) -I. $(CFLAGS) -c $< -o $@

$(TARGET): $(OBJS)
$(CC) $(SHAREDFLAGS) $(OBJS) -o $(TARGET) -lc -ldl

clean:
rm -f $(OBJS) $(TARGET)
To compile the library simply run `make`. You can pre-load it using the LD_PRELOAD environment variable in this way:
# export LD_PRELOAD=$FULL_PATH_OF_YOUR_LIBRARY/libdirectio.so.0.1
Then you can run your brand-new direct I/O benchmark (typically `dd`) for block devices. To unload the library and restore the standard access simply run:
# unload LD_PRELOAD

2 comments:

Anonymous said...

Makefile:12: *** missing separator. Stop.

Anonymous said...

Ok! no more apps, since 2.6.16 we can run:

sync ; echo 3 | sudo tee /proc/sys/vm/drop_caches