diff options
author | Ted Unangst <tedu@cvs.openbsd.org> | 2015-11-18 16:46:50 +0000 |
---|---|---|
committer | Ted Unangst <tedu@cvs.openbsd.org> | 2015-11-18 16:46:50 +0000 |
commit | 7f4c0fdc74ec60d06c17be934b746894d104a342 (patch) | |
tree | 88df1a6eeb783fc895188448b43eb09ddbd8228d /lib/libc | |
parent | 665937ab3faa9d5982b7b578438ae089ba7699d7 (diff) |
Add icdb, the internal c database. A simpler replacement for the old
Berzerkeley DB code.
Diffstat (limited to 'lib/libc')
-rw-r--r-- | lib/libc/stdlib/icdb.c | 367 |
1 files changed, 367 insertions, 0 deletions
diff --git a/lib/libc/stdlib/icdb.c b/lib/libc/stdlib/icdb.c new file mode 100644 index 00000000000..077c59a1e47 --- /dev/null +++ b/lib/libc/stdlib/icdb.c @@ -0,0 +1,367 @@ +/* $OpenBSD: icdb.c,v 1.1 2015/11/18 16:46:49 tedu Exp $ */ +/* + * Copyright (c) 2015 Ted Unangst <tedu@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <fcntl.h> +#include <stdint.h> +#include <stdio.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <icdb.h> + +#include <sys/mman.h> +#include <sys/stat.h> + +#include <siphash.h> + +/* + * Creating a new icdb: icdb_new + * Opening existing icdb: icdb_open + * + * Adding new entries: icdb_add + * Adding entries does not update the disk or indices. + * + * Save to disk: icdb_save + * Update indices: icdb_rehash + * icdb_save will call rehash. + * + * Change an existing entry: icdb_update + * Changing entries does write to disk. + * + * Find an entry: icdb_lookup + * Looking up an entry is only defined when the indices are synced. + * + * Close and free resources: icdb_close + */ + +/* + * There are two major modes of operation. + * + * Existing databases use the mmap codepath. The entire database is mapped + * into the address space for quick access. Individual entries may be updated, + * but no new entries added. + * + * New databases use malloc backed memory instead. The database may be saved + * with icdb_save. It should be saved to a new file to avoid corrupting any + * open databases in other processes. + */ + +/* + * An icdb has the following format: + * struct icbinfo header + * indexes [ uint32_t * indexsize * nkeys ] + * entries [ entrysize * nentries ] + * + * To find an entry in the file, the user specifies which key to use. + * The key is hashed and looked up in the index. The index contains the + * position of the entry in the entries array. -1 identifies not found. + * Chaining is done by rehashing the hash. All keys are fixed size byte arrays. + */ + +/* + * Header info for icdb. This struct is stored on disk. + */ +struct icdbinfo { + uint32_t magic; /* magic */ + uint32_t nentries; /* number of entries stored */ + uint32_t entrysize; /* size of each entry */ + uint32_t indexsize; /* number of entries in hash index */ + uint32_t nkeys; /* number of keys defined */ + uint32_t keysize[8]; /* size of each key */ + uint32_t keyoffset[8]; /* offset of each key in entry */ + SIPHASH_KEY siphashkey; /* random hash key */ +}; + +/* + * In memory representation with auxiliary data. + * idxdata and entries will be written to disk after info. + */ +struct icdb { + struct icdbinfo *info; + void *idxdata[8]; + void *entries; + size_t maplen; + uint32_t allocated; + int fd; +}; + +const uint32_t magic = 0x1ca9d0b7; + +static uint32_t +roundup(uint32_t num) +{ + uint32_t r = 2; + + while (r < num * 3 / 2) + r *= 2; + return r; +} + +struct icdb * +icdb_new(uint32_t nentries, uint32_t entrysize, + uint32_t nkeys, uint32_t *keysizes, uint32_t *keyoffsets) +{ + struct icdb *db; + struct icdbinfo *info; + int i; + + if (entrysize == 0 || entrysize > 1048576) + return NULL; + if (nkeys > 8) + return NULL; + + if (!(db = calloc(1, sizeof(*db)))) + return NULL; + if (!(info = calloc(1, sizeof(*info)))) { + free(db); + return NULL; + } + db->info = info; + db->fd = -1; + info->magic = magic; + if (nentries) + if ((db->entries = reallocarray(NULL, nentries, entrysize))) + db->allocated = nentries; + info->entrysize = entrysize; + info->nkeys = nkeys; + for (i = 0; i < nkeys; i++) { + info->keysize[i] = keysizes[i]; + info->keyoffset[i] = keyoffsets[i]; + } + return db; +} + +struct icdb * +icdb_open(const char *name, int flags) +{ + struct icdb *db = NULL; + struct icdbinfo *info; + struct stat sb; + uint8_t *ptr = MAP_FAILED; + uint32_t baseoff, indexsize, idxmask, idxlen; + int fd, i; + + if ((fd = open(name, flags | O_CLOEXEC)) == -1) + return NULL; + if (fstat(fd, &sb) != 0) + goto fail; + ptr = mmap(NULL, sb.st_size, PROT_READ | + ((flags & O_RDWR) ? PROT_WRITE : 0), MAP_SHARED, fd, 0); + if (ptr == MAP_FAILED) + goto fail; + info = (struct icdbinfo *)ptr; + if (info->magic != magic) + goto fail; + + if (!(db = calloc(1, sizeof(*db)))) + goto fail; + db->info = info; + + indexsize = info->indexsize; + idxmask = indexsize - 1; + idxlen = indexsize * sizeof(uint32_t); + baseoff = sizeof(*info) + idxlen * info->nkeys; + + for (i = 0; i < info->nkeys; i++) + db->idxdata[i] = ptr + sizeof(*info) + i * idxlen; + db->entries = ptr + baseoff; + db->maplen = sb.st_size; + db->fd = fd; + return db; + +fail: + if (ptr != MAP_FAILED) + munmap(ptr, sb.st_size); + if (fd != -1) + close(fd); + free(db); + return NULL; +} + +int +icdb_get(struct icdb *db, void *entry, uint32_t idx) +{ + uint32_t entrysize = db->info->entrysize; + + memcpy(entry, (uint8_t *)db->entries + idx * entrysize, entrysize); + return 0; +} + +int +icdb_lookup(struct icdb *db, int keynum, const void *key, void *entry, uint32_t *idxp) +{ + struct icdbinfo *info = db->info; + uint32_t entrysize = info->entrysize; + uint32_t offset; + uint64_t hash; + uint32_t indexsize, idxmask, idxlen; + uint32_t *idxdata; + + indexsize = info->indexsize; + idxmask = indexsize - 1; + idxlen = indexsize * sizeof(uint32_t); + + idxdata = db->idxdata[keynum]; + + hash = SipHash24(&info->siphashkey, key, info->keysize[keynum]); + while ((offset = idxdata[hash & idxmask]) != -1) { + if (icdb_get(db, entry, offset) != 0) + return -1; + if (memcmp((uint8_t *)entry + info->keyoffset[keynum], key, + info->keysize[keynum]) == 0) { + if (idxp) + *idxp = offset; + return 0; + } + hash = SipHash24(&info->siphashkey, &hash, sizeof(hash)); + } + return 1; +} + +int +icdb_nentries(struct icdb *db) +{ + return db->info->nentries; +} + +const void * +icdb_entries(struct icdb *db) +{ + return db->entries; +} + +int +icdb_update(struct icdb *db, const void *entry, int offset) +{ + struct icdbinfo *info = db->info; + uint32_t entrysize = info->entrysize; + uint32_t baseoff; + uint32_t indexsize, idxmask, idxlen; + + indexsize = info->indexsize; + idxmask = indexsize - 1; + idxlen = indexsize * sizeof(uint32_t); + baseoff = sizeof(*info) + idxlen * info->nkeys; + + memcpy((uint8_t *)db->entries + offset * entrysize, + entry, entrysize); + if (db->fd != -1) + msync(db->entries + offset * entrysize, entrysize, MS_SYNC); + return 0; +} + +int +icdb_add(struct icdb *db, const void *entry) +{ + struct icdbinfo *info = db->info; + size_t entrysize = info->entrysize; + + if (db->allocated == info->nentries) { + void *p; + size_t amt = db->allocated ? db->allocated * 2 : 63; + if (!(p = reallocarray(db->entries, amt, entrysize))) + return -1; + db->allocated = amt; + db->entries = p; + } + memcpy((uint8_t *)db->entries + info->nentries * entrysize, + entry, entrysize); + info->nentries++; + return 0; +} + +int +icdb_rehash(struct icdb *db) +{ + struct icdbinfo *info = db->info; + uint32_t entrysize = info->entrysize; + uint32_t indexsize, idxmask, idxlen; + int i, j; + + indexsize = info->indexsize = roundup(info->nentries); + idxmask = indexsize - 1; + idxlen = sizeof(uint32_t) * indexsize; + + arc4random_buf(&info->siphashkey, sizeof(info->siphashkey)); + + for (i = 0; i < info->nkeys; i++) { + uint32_t *idxdata = reallocarray(db->idxdata[i], + indexsize, sizeof(uint32_t)); + if (!idxdata) + return -1; + memset(idxdata, 0xff, idxlen); + db->idxdata[i] = idxdata; + } + for (j = 0; j < info->nentries; j++) { + for (i = 0; i < info->nkeys; i++) { + uint32_t *idxdata = db->idxdata[i]; + uint64_t hash = SipHash24(&info->siphashkey, + (uint8_t *)db->entries + j * entrysize + + info->keyoffset[i], info->keysize[i]); + while (idxdata[hash & idxmask] != -1) + hash = SipHash24(&info->siphashkey, &hash, sizeof(hash)); + idxdata[hash & idxmask] = j; + } + } + return 0; +} + +int +icdb_save(struct icdb *db, int fd) +{ + struct icdbinfo *info = db->info; + uint32_t entrysize = info->entrysize; + uint32_t indexsize, idxlen; + int i; + + if (icdb_rehash(db) != 0) + return -1; + + indexsize = info->indexsize; + idxlen = sizeof(uint32_t) * indexsize; + + if (ftruncate(fd, 0) != 0) + return -1; + if (write(fd, info, sizeof(*info)) != sizeof(*info)) + return -1; + for (i = 0; i < info->nkeys; i++) { + if (write(fd, db->idxdata[i], idxlen) != idxlen) + return -1; + } + if (write(fd, db->entries, info->nentries * entrysize) != + info->nentries * entrysize) + return -1; + return 0; +} + +int +icdb_close(struct icdb *db) +{ + int i; + + if (db->fd == -1) { + for (i = 0; i < db->info->nkeys; i++) + free(db->idxdata[i]); + free(db->entries); + free(db->info); + } else { + munmap(db->info, db->maplen); + close(db->fd); + } + free(db); + return 0; +} |