Projects
Eulaceura:Mainline
apptainer
_service:obs_scm:70.patch
Sign Up
Log In
Username
Password
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File _service:obs_scm:70.patch of Package apptainer
From 2ff837b814e84eedc25574014da9ed24ee44b7b4 Mon Sep 17 00:00:00 2001 From: Kevin Vigor <kvigor@gmail.com> Date: Thu, 6 Feb 2020 09:59:56 -0800 Subject: [PATCH 1/3] Add locking to cache API in anticipation of multithreaded decompression. The existing cache API has effectively no locking and relies on the single- threaded nature of the code to prevent contention on cache entries. As a first step to multithreading the driver, refactor the cache API to allow alternative implementations. In this changeset the API is changed but the internal cache implementation is not make thread-safe, so it still suitable only for single-threaded usage. Specific changes: * sqfs_cache type is made opaque. * previously, newly allocated cache entries were assumed valid. This meant that on any failure path following a cache entry allocation, one had to be careful to call sqfs_cache_invalidate(). The assumption is now reversed, cache entries are invalid until explicitly marked valid with sqfs_cache_entry_valid(). This simplifies error handling. * block cache code was intermixed with generic cache code, relocate to fs.c * cache eviction led to object destruction in block cache. The only thing preventing cache eviction while block object in use is single-threaded nature of code. Instead use a refcounting mechanism on the block entries so that we can independently manage block lifetime. --- Makefile.am | 6 +- cache.c | 142 +++++++++++++++++++++++++++++----------------- cache.h | 40 ++++++------- common.h | 26 ++++++++- file.c | 23 ++++---- file.h | 4 -- fs.c | 61 ++++++++++++++++---- fs.h | 5 ++ table.c | 2 +- tests/cachetest.c | 107 ++++++++++++++++++++++++++++++++++ 10 files changed, 314 insertions(+), 102 deletions(-) create mode 100644 tests/cachetest.c diff --git a/Makefile.am b/Makefile.am index 5659cd22..eaf7ac97 100644 --- a/Makefile.am +++ b/Makefile.am @@ -105,9 +105,11 @@ endif TESTS = if SQ_FUSE_TESTS TESTS += tests/ll-smoke.sh -check_PROGRAMS = endiantest +check_PROGRAMS = cachetest endiantest +cachetest_SOURCES=tests/cachetest.c +cachetest_LDADD=libsquashfuse.la $(COMPRESSION_LIBS) endiantest_SOURCES = tests/endiantest.c -TESTS += endiantest +TESTS += cachetest endiantest endif if SQ_DEMO_TESTS TESTS += tests/ls.sh diff --git a/cache.c b/cache.c index 0deacfca..36d02234 100644 --- a/cache.c +++ b/cache.c @@ -22,85 +22,121 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ + +#include "config.h" #include "cache.h" #include "fs.h" +#include <assert.h> #include <stdlib.h> +typedef struct sqfs_cache_internal { + uint8_t *buf; + + sqfs_cache_dispose dispose; + + size_t size, count; + size_t next; /* next block to evict */ +} sqfs_cache_internal; + +typedef struct { + int valid; + sqfs_cache_idx idx; +} sqfs_cache_entry_hdr; + sqfs_err sqfs_cache_init(sqfs_cache *cache, size_t size, size_t count, - sqfs_cache_dispose dispose) { - cache->size = size; - cache->count = count; - cache->dispose = dispose; - cache->next = 0; - - cache->idxs = calloc(count, sizeof(sqfs_cache_idx)); - cache->buf = calloc(count, size); - if (cache->idxs && cache->buf) + sqfs_cache_dispose dispose) { + + sqfs_cache_internal *c = malloc(sizeof(sqfs_cache_internal)); + if (!c) { + return SQFS_ERR; + } + + c->size = size + sizeof(sqfs_cache_entry_hdr); + c->count = count; + c->dispose = dispose; + c->next = 0; + + c->buf = calloc(count, c->size); + + if (c->buf) { + *cache = c; return SQFS_OK; - - sqfs_cache_destroy(cache); + } + + sqfs_cache_destroy(&c); return SQFS_ERR; } -static void *sqfs_cache_entry(sqfs_cache *cache, size_t i) { - return cache->buf + i * cache->size; +static sqfs_cache_entry_hdr *sqfs_cache_entry_header( + sqfs_cache_internal* cache, + size_t i) { + return (sqfs_cache_entry_hdr *)(cache->buf + i * cache->size); +} + +static void* sqfs_cache_entry(sqfs_cache_internal* cache, size_t i) { + return (void *)(sqfs_cache_entry_header(cache, i) + 1); } void sqfs_cache_destroy(sqfs_cache *cache) { - if (cache->buf && cache->idxs) { - size_t i; - for (i = 0; i < cache->count; ++i) { - if (cache->idxs[i] != SQFS_CACHE_IDX_INVALID) - cache->dispose(sqfs_cache_entry(cache, i)); + if (cache && *cache) { + sqfs_cache_internal *c = *cache; + if (c->buf) { + size_t i; + for (i = 0; i < c->count; ++i) { + sqfs_cache_entry_hdr *hdr = + sqfs_cache_entry_header(c, i); + if (hdr->valid) { + c->dispose((void *)(hdr + 1)); + } + } } + free(c->buf); + free(c); + *cache = NULL; } - free(cache->buf); - free(cache->idxs); } void *sqfs_cache_get(sqfs_cache *cache, sqfs_cache_idx idx) { size_t i; - for (i = 0; i < cache->count; ++i) { - if (cache->idxs[i] == idx) - return sqfs_cache_entry(cache, i); + sqfs_cache_internal *c = *cache; + sqfs_cache_entry_hdr *hdr; + + for (i = 0; i < c->count; ++i) { + hdr = sqfs_cache_entry_header(c, i); + if (hdr->idx == idx) { + assert(hdr->valid); + return sqfs_cache_entry(c, i); + } } - return NULL; -} -void *sqfs_cache_add(sqfs_cache *cache, sqfs_cache_idx idx) { - size_t i = (cache->next++); - cache->next %= cache->count; - - if (cache->idxs[i] != SQFS_CACHE_IDX_INVALID) - cache->dispose(sqfs_cache_entry(cache, i)); - - cache->idxs[i] = idx; - return sqfs_cache_entry(cache, i); -} + /* No existing entry; free one if necessary, allocate a new one. */ + i = (c->next++); + c->next %= c->count; -/* sqfs_cache_add can be called but the caller can fail to fill it (IO - * error, etc). sqfs_cache_invalidate invalidates the cache entry. - * It does not call dispose; it merely marks the entry as reusable - * since it is never fully initialized. - */ -void sqfs_cache_invalidate(sqfs_cache *cache, sqfs_cache_idx idx) { - size_t i; - for (i = 0; i < cache->count; ++i) { - if (cache->idxs[i] == idx) { - cache->idxs[i] = SQFS_CACHE_IDX_INVALID; - return; - } + hdr = sqfs_cache_entry_header(c, i); + if (hdr->valid) { + /* evict */ + c->dispose((void *)(hdr + 1)); + hdr->valid = 0; } + + hdr->idx = idx; + return (void *)(hdr + 1); +} + +int sqfs_cache_entry_valid(const sqfs_cache *cache, const void *e) { + sqfs_cache_entry_hdr *hdr = ((sqfs_cache_entry_hdr *)e) - 1; + return hdr->valid; } -static void sqfs_block_cache_dispose(void *data) { - sqfs_block_cache_entry *entry = (sqfs_block_cache_entry*)data; - sqfs_block_dispose(entry->block); +void sqfs_cache_entry_mark_valid(sqfs_cache *cache, void *e) { + sqfs_cache_entry_hdr *hdr = ((sqfs_cache_entry_hdr *)e) - 1; + assert(hdr->valid == 0); + hdr->valid = 1; } -sqfs_err sqfs_block_cache_init(sqfs_cache *cache, size_t count) { - return sqfs_cache_init(cache, sizeof(sqfs_block_cache_entry), count, - &sqfs_block_cache_dispose); +void sqfs_cache_put(const sqfs_cache *cache, const void *e) { + // nada, we have no locking in single-threaded implementation. } diff --git a/cache.h b/cache.h index b78c524d..da471352 100644 --- a/cache.h +++ b/cache.h @@ -33,35 +33,37 @@ * - No thread safety * - Misses are caller's responsibility */ -#define SQFS_CACHE_IDX_INVALID 0 typedef uint64_t sqfs_cache_idx; typedef void (*sqfs_cache_dispose)(void* data); -typedef struct { - sqfs_cache_idx *idxs; - uint8_t *buf; - - sqfs_cache_dispose dispose; - - size_t size, count; - size_t next; /* next block to evict */ -} sqfs_cache; +struct sqfs_cache_internal; +typedef struct sqfs_cache_internal *sqfs_cache; sqfs_err sqfs_cache_init(sqfs_cache *cache, size_t size, size_t count, sqfs_cache_dispose dispose); void sqfs_cache_destroy(sqfs_cache *cache); +/* Get an entry for the given index. + * + * This will always succeed (evicting if necessary). The caller must then + * call sqfs_cache_entry_valid() to determine if the entry is valid. If not + * valid, the entry is newly allocated and the caller is responsible for + * initializing it and then calling sqfs_cache_entry_mark_valid(). + * + * This call may block in multithreaded case. + * + * In multithreaded case, the cache is locked on return (no entries can + * be added or removed). Caller must call sqfs_cache_put() when it is safe + * to evict the returned cache entry. + */ void *sqfs_cache_get(sqfs_cache *cache, sqfs_cache_idx idx); -void *sqfs_cache_add(sqfs_cache *cache, sqfs_cache_idx idx); -void sqfs_cache_invalidate(sqfs_cache *cache, sqfs_cache_idx idx); - - -typedef struct { - sqfs_block *block; - size_t data_size; -} sqfs_block_cache_entry; +/* inform cache it is now safe to evict this entry. */ +void sqfs_cache_put(const sqfs_cache *cache, const void *e); -sqfs_err sqfs_block_cache_init(sqfs_cache *cache, size_t count); +/* Determine if cache entry contains valid contents. */ +int sqfs_cache_entry_valid(const sqfs_cache *cache, const void *e); +/* Mark cache entry as containing valid contents. */ +void sqfs_cache_entry_mark_valid(sqfs_cache *cache, void *e); #endif diff --git a/common.h b/common.h index aeac5c67..9d50e006 100644 --- a/common.h +++ b/common.h @@ -32,12 +32,23 @@ #include <sys/types.h> #ifdef _WIN32 - #include <win32.h> +# include <win32.h> +# include <intrin.h> +# define atomic_inc_relaxed(ptr) \ + _InterlockedIncrement(ptr) +# define atomic_dec_acqrel(ptr) \ + _InterlockedDecrement(ptr) #else typedef mode_t sqfs_mode_t; typedef uid_t sqfs_id_t; typedef off_t sqfs_off_t; typedef int sqfs_fd_t; + +# define atomic_inc_relaxed(ptr) \ + __atomic_add_fetch(&block->refcount, 1, __ATOMIC_RELAXED) +# define atomic_dec_acqrel(ptr) \ + __atomic_sub_fetch(&block->refcount, 1, __ATOMIC_ACQ_REL) + #endif typedef enum { @@ -59,6 +70,7 @@ typedef struct sqfs_inode sqfs_inode; typedef struct { size_t size; void *data; + long refcount; } sqfs_block; typedef struct { @@ -66,4 +78,16 @@ typedef struct { size_t offset; } sqfs_md_cursor; +/* Increment the refcount on the block. */ +static inline void sqfs_block_ref(sqfs_block *block) { + atomic_inc_relaxed(&block->refcount); +} + +/* decrement the refcount on the block, return non-zero if we held the last + * reference. + */ +static inline int sqfs_block_deref(sqfs_block *block) { + return atomic_dec_acqrel(&block->refcount) == 0; +} + #endif diff --git a/file.c b/file.c index a4d894eb..d09f2a7d 100644 --- a/file.c +++ b/file.c @@ -177,7 +177,7 @@ sqfs_err sqfs_read_range(sqfs *fs, sqfs_inode *inode, sqfs_off_t start, take = (size_t)(*size); if (block) { memcpy(buf, (char*)block->data + data_off + read_off, take); - /* BLOCK CACHED, DON'T DISPOSE */ + sqfs_block_dispose(block); } else { memset(buf, 0, take); } @@ -226,7 +226,7 @@ sqfs_err sqfs_blockidx_init(sqfs_cache *cache) { } sqfs_err sqfs_blockidx_add(sqfs *fs, sqfs_inode *inode, - sqfs_blockidx_entry **out) { + sqfs_blockidx_entry **out, sqfs_blockidx_entry **cachep) { size_t blocks; /* Number of blocks in the file */ size_t md_size; /* Amount of metadata necessary to hold the blocksizes */ size_t count; /* Number of block-index entries necessary */ @@ -234,10 +234,6 @@ sqfs_err sqfs_blockidx_add(sqfs *fs, sqfs_inode *inode, sqfs_blockidx_entry *blockidx; sqfs_blocklist bl; - /* For the cache */ - sqfs_cache_idx idx; - sqfs_blockidx_entry **cachep; - size_t i = 0; bool first = true; @@ -270,8 +266,6 @@ sqfs_err sqfs_blockidx_add(sqfs *fs, sqfs_inode *inode, } } - idx = inode->base.inode_number + 1; /* zero means invalid */ - cachep = sqfs_cache_add(&fs->blockidx, idx); *out = *cachep = blockidx; return SQFS_OK; } @@ -299,12 +293,16 @@ sqfs_err sqfs_blockidx_blocklist(sqfs *fs, sqfs_inode *inode, /* Get the index, creating it if necessary */ idx = inode->base.inode_number + 1; /* zero means invalid index */ - if ((bp = sqfs_cache_get(&fs->blockidx, idx))) { + bp = sqfs_cache_get(&fs->blockidx, idx); + if (sqfs_cache_entry_valid(&fs->blockidx, bp)) { blockidx = *bp; } else { - sqfs_err err = sqfs_blockidx_add(fs, inode, &blockidx); - if (err) + sqfs_err err = sqfs_blockidx_add(fs, inode, &blockidx, bp); + if (err) { + sqfs_cache_put(&fs->blockidx, bp); return err; + } + sqfs_cache_entry_mark_valid(&fs->blockidx, bp); } skipped = (metablock * SQUASHFS_METADATA_SIZE / sizeof(sqfs_blocklist_entry)) @@ -316,6 +314,9 @@ sqfs_err sqfs_blockidx_blocklist(sqfs *fs, sqfs_inode *inode, bl->remain -= skipped; bl->pos = (uint64_t)skipped * fs->sb.block_size; bl->block = blockidx->data_block; + + sqfs_cache_put(&fs->blockidx, bp); + return SQFS_OK; } diff --git a/file.h b/file.h index 249c6413..e3d2b028 100644 --- a/file.h +++ b/file.h @@ -71,10 +71,6 @@ typedef struct { sqfs_err sqfs_blockidx_init(sqfs_cache *cache); -/* Fill *out with all the block-index entries for this file */ -sqfs_err sqfs_blockidx_add(sqfs *fs, sqfs_inode *inode, - sqfs_blockidx_entry **out); - /* Get a blocklist fast-forwarded to the correct location */ sqfs_err sqfs_blockidx_blocklist(sqfs *fs, sqfs_inode *inode, sqfs_blocklist *bl, sqfs_off_t start); diff --git a/fs.c b/fs.c index d69bb681..1838c5ca 100644 --- a/fs.c +++ b/fs.c @@ -124,6 +124,8 @@ sqfs_err sqfs_block_read(sqfs *fs, sqfs_off_t pos, bool compressed, sqfs_err err = SQFS_ERR; if (!(*block = malloc(sizeof(**block)))) return SQFS_ERR; + /* start with refcount one, so dispose on failure path works as expected. */ + (*block)->refcount = 1; if (!((*block)->data = malloc(size))) goto error; @@ -188,44 +190,81 @@ sqfs_err sqfs_data_block_read(sqfs *fs, sqfs_off_t pos, uint32_t hdr, } sqfs_err sqfs_md_cache(sqfs *fs, sqfs_off_t *pos, sqfs_block **block) { - sqfs_block_cache_entry *entry = sqfs_cache_get( - &fs->md_cache, *pos); - if (!entry) { + sqfs_block_cache_entry *entry = sqfs_cache_get(&fs->md_cache, *pos); + if (!sqfs_cache_entry_valid(&fs->md_cache, entry)) { sqfs_err err = SQFS_OK; - entry = sqfs_cache_add(&fs->md_cache, *pos); /* fprintf(stderr, "MD BLOCK: %12llx\n", (long long)*pos); */ err = sqfs_md_block_read(fs, *pos, &entry->data_size, &entry->block); if (err) { - sqfs_cache_invalidate(&fs->md_cache, *pos); + sqfs_cache_put(&fs->md_cache, entry); return err; } + sqfs_cache_entry_mark_valid(&fs->md_cache, entry); } + /* block is created with refcount 1, which accounts for presence in the + * cache (will be decremented on eviction). + * + * We increment it here as a convienience for the caller, who will + * obviously want one. Therefore all callers must eventually call deref + * by means of calling sqfs_block_dispose(). + */ *block = entry->block; *pos += entry->data_size; + + sqfs_block_ref(entry->block); + /* it is now safe to evict the entry from the cache, we have a + * reference to the block so eviction will not destroy it. + */ + sqfs_cache_put(&fs->md_cache, entry); + return SQFS_OK; } sqfs_err sqfs_data_cache(sqfs *fs, sqfs_cache *cache, sqfs_off_t pos, uint32_t hdr, sqfs_block **block) { sqfs_block_cache_entry *entry = sqfs_cache_get(cache, pos); - if (!entry) { + if (!sqfs_cache_entry_valid(cache, entry)) { sqfs_err err = SQFS_OK; - entry = sqfs_cache_add(cache, pos); err = sqfs_data_block_read(fs, pos, hdr, &entry->block); if (err) { - sqfs_cache_invalidate(cache, pos); + sqfs_cache_put(cache, entry); return err; } + sqfs_cache_entry_mark_valid(cache, entry); } + /* block is created with refcount 1, which accounts for presence in the + * cache (will be decremented on eviction). + * + * We increment it here as a convenience for the caller, who will + * obviously want one. Therefore all callers must eventually call deref + * by means of calling sqfs_block_dispose(). + */ *block = entry->block; + sqfs_block_ref(*block); + /* it is now safe to evict the entry from the cache, we have a + * reference to the block so eviction will not destroy it. + */ + sqfs_cache_put(cache, entry); return SQFS_OK; } void sqfs_block_dispose(sqfs_block *block) { - free(block->data); - free(block); + if (sqfs_block_deref(block)) { + free(block->data); + free(block); + } +} + +static void sqfs_block_cache_dispose(void *data) { + sqfs_block_cache_entry *entry = (sqfs_block_cache_entry*)data; + sqfs_block_dispose(entry->block); +} + +sqfs_err sqfs_block_cache_init(sqfs_cache *cache, size_t count) { + return sqfs_cache_init(cache, sizeof(sqfs_block_cache_entry), count, + &sqfs_block_cache_dispose); } void sqfs_md_cursor_inode(sqfs_md_cursor *cur, sqfs_inode_id id, sqfs_off_t base) { @@ -247,7 +286,6 @@ sqfs_err sqfs_md_read(sqfs *fs, sqfs_md_cursor *cur, void *buf, size_t size) { take = size; if (buf) memcpy(buf, (char*)block->data + cur->offset, take); - /* BLOCK CACHED, DON'T DISPOSE */ if (buf) buf = (char*)buf + take; @@ -257,6 +295,7 @@ sqfs_err sqfs_md_read(sqfs *fs, sqfs_md_cursor *cur, void *buf, size_t size) { cur->block = pos; cur->offset = 0; } + sqfs_block_dispose(block); } return SQFS_OK; } diff --git a/fs.h b/fs.h index d300a3bb..1d475ce0 100644 --- a/fs.h +++ b/fs.h @@ -97,6 +97,11 @@ sqfs_compression_type sqfs_compression(sqfs *fs); void sqfs_md_header(uint16_t hdr, bool *compressed, uint16_t *size); void sqfs_data_header(uint32_t hdr, bool *compressed, uint32_t *size); +typedef struct { + sqfs_block *block; + size_t data_size; +} sqfs_block_cache_entry; +sqfs_err sqfs_block_cache_init(sqfs_cache *cache, size_t count); sqfs_err sqfs_block_read(sqfs *fs, sqfs_off_t pos, bool compressed, uint32_t size, size_t outsize, sqfs_block **block); void sqfs_block_dispose(sqfs_block *block); diff --git a/table.c b/table.c index c035398f..02a5442c 100644 --- a/table.c +++ b/table.c @@ -76,6 +76,6 @@ sqfs_err sqfs_table_get(sqfs_table *table, sqfs *fs, size_t idx, void *buf) { return SQFS_ERR; memcpy(buf, (char*)(block->data) + off, table->each); - /* BLOCK CACHED, DON'T DISPOSE */ + sqfs_block_dispose(block); return SQFS_OK; } diff --git a/tests/cachetest.c b/tests/cachetest.c new file mode 100644 index 00000000..8a2c2363 --- /dev/null +++ b/tests/cachetest.c @@ -0,0 +1,107 @@ +#include "cache.h" +#include <stdio.h> + +typedef struct { + int x; + int y; +} TestStruct; + +static void TestStructDispose(void *t) { + // nada. +} + +#define EXPECT_EQ(exp1, exp2) \ + do { if ((exp1) != (exp2)) { \ + printf("Test failure: expected " #exp1 " to equal " #exp2 \ + " at " __FILE__ ":%d\n", __LINE__); \ + ++errors; \ + } \ + } while (0) + +#define EXPECT_NE(exp1, exp2) \ + do { if ((exp1) == (exp2)) { \ + printf("Test failure: expected " #exp1 " to !equal " #exp2 \ + " at " __FILE__ ":%d\n", __LINE__); \ + ++errors; \ + } \ + } while (0) + + +int test_cache_miss(void) { + int errors = 0; + sqfs_cache cache; + TestStruct *entry; + + EXPECT_EQ(sqfs_cache_init(&cache, sizeof(TestStruct), 16, + TestStructDispose), SQFS_OK); + entry = (TestStruct *)sqfs_cache_get(&cache, 1); + EXPECT_EQ(sqfs_cache_entry_valid(&cache, entry), 0); + sqfs_cache_destroy(&cache); + + return errors == 0; +} + +int test_mark_valid_and_lookup(void) { + int errors = 0; + sqfs_cache cache; + TestStruct *entry; + + EXPECT_EQ(sqfs_cache_init(&cache, sizeof(TestStruct), 16, + TestStructDispose), SQFS_OK); + entry = (TestStruct *)sqfs_cache_get(&cache, 1); + entry->x = 666; + entry->y = 777; + sqfs_cache_entry_mark_valid(&cache, entry); + sqfs_cache_put(&cache, entry); + EXPECT_NE(sqfs_cache_entry_valid(&cache, entry), 0); + entry = (TestStruct *)sqfs_cache_get(&cache, 1); + EXPECT_NE(sqfs_cache_entry_valid(&cache, entry), 0); + EXPECT_EQ(entry->x, 666); + EXPECT_EQ(entry->y, 777); + sqfs_cache_put(&cache, entry); + + sqfs_cache_destroy(&cache); + return errors == 0; +} + +int test_two_entries(void) { + int errors = 0; + sqfs_cache cache; + TestStruct *entry1, *entry2; + + EXPECT_EQ(sqfs_cache_init(&cache, sizeof(TestStruct), 16, + TestStructDispose), SQFS_OK); + + entry1 = (TestStruct *)sqfs_cache_get(&cache, 1); + entry1->x = 1; + entry1->y = 2; + sqfs_cache_entry_mark_valid(&cache, entry1); + sqfs_cache_put(&cache, entry1); + + entry2 = (TestStruct *)sqfs_cache_get(&cache, 666); + entry2->x = 3; + entry2->y = 4; + sqfs_cache_entry_mark_valid(&cache, entry2); + sqfs_cache_put(&cache, entry2); + + entry1 = (TestStruct *)sqfs_cache_get(&cache, 1); + sqfs_cache_put(&cache, entry1); + entry2 = (TestStruct *)sqfs_cache_get(&cache, 666); + sqfs_cache_put(&cache, entry2); + EXPECT_NE(sqfs_cache_entry_valid(&cache, entry1), 0); + EXPECT_NE(sqfs_cache_entry_valid(&cache, entry2), 0); + EXPECT_EQ(entry1->x, 1); + EXPECT_EQ(entry1->y, 2); + EXPECT_EQ(entry2->x, 3); + EXPECT_EQ(entry2->y, 4); + + sqfs_cache_destroy(&cache); + + return errors == 0; +} + +int main(void) { + return test_cache_miss() && + test_mark_valid_and_lookup() && + test_two_entries() ? 0 : 1; +} From 379c8507c15ef43b641c1024b43372f2de9fb480 Mon Sep 17 00:00:00 2001 From: Kevin Vigor <kvigor@gmail.com> Date: Thu, 6 Feb 2020 12:20:19 -0800 Subject: [PATCH 2/3] Implement multi-threaded squashfuse_ll, allowing parallel decompression. A simple thread-safe cache implementation is added and squashfuse_ll init is altered to use fuse_session_loop_mt(). Multithreading must be explicitly enabled at configure time with the --enable-multithreading option. If enabled, the resulting squashfuse_ll will be multithreaded by default, but this may be disabled at runtime with the '-s' FUSE commandline option. --- Makefile.am | 8 +- cache.c | 4 + cache_mt.c | 169 +++++++++++++++++++++++++++++++ configure.ac | 11 +- fs.c | 9 +- ll.c | 77 ++++++++++---- ll_main.c | 20 +++- m4/squashfuse_c.m4 | 33 +----- squashfs_fs.h | 6 +- tests/cachetest.c | 1 + tests/ll-smoke-singlethreaded.sh | 10 ++ tests/ll-smoke.sh | 141 ++++++++++++++++++++++++++ tests/ll-smoke.sh.in | 6 +- 13 files changed, 432 insertions(+), 63 deletions(-) create mode 100644 cache_mt.c create mode 100755 tests/ll-smoke-singlethreaded.sh create mode 100755 tests/ll-smoke.sh diff --git a/Makefile.am b/Makefile.am index eaf7ac97..17b01be4 100644 --- a/Makefile.am +++ b/Makefile.am @@ -26,7 +26,7 @@ pkgconfig_DATA = squashfuse.pc noinst_LTLIBRARIES += libsquashfuse_convenience.la libsquashfuse_convenience_la_SOURCES = swap.c cache.c table.c dir.c file.c fs.c \ decompress.c xattr.c hash.c stack.c traverse.c util.c \ - nonstd-pread.c nonstd-stat.c \ + nonstd-pread.c nonstd-stat.c cache_mt.c \ squashfs_fs.h common.h nonstd-internal.h nonstd.h swap.h cache.h table.h \ dir.h file.h decompress.h xattr.h squashfuse.h hash.h stack.h traverse.h \ util.h fs.h @@ -105,6 +105,12 @@ endif TESTS = if SQ_FUSE_TESTS TESTS += tests/ll-smoke.sh +if MULTITHREADED +# I know this test looks backwards, but the default smoke test is multithreaded +# when threading is enabled. So we additionally run a singlethreaded test in +# that case. +TESTS += tests/ll-smoke-singlethreaded.sh +endif check_PROGRAMS = cachetest endiantest cachetest_SOURCES=tests/cachetest.c cachetest_LDADD=libsquashfuse.la $(COMPRESSION_LIBS) diff --git a/cache.c b/cache.c index 36d02234..45408f24 100644 --- a/cache.c +++ b/cache.c @@ -24,6 +24,9 @@ */ #include "config.h" + +#ifndef SQFS_MULTITHREADED + #include "cache.h" #include "fs.h" @@ -140,3 +143,4 @@ void sqfs_cache_entry_mark_valid(sqfs_cache *cache, void *e) { void sqfs_cache_put(const sqfs_cache *cache, const void *e) { // nada, we have no locking in single-threaded implementation. } +#endif /* SQFS_MULTITHREADED */ diff --git a/cache_mt.c b/cache_mt.c new file mode 100644 index 00000000..1b17fa5a --- /dev/null +++ b/cache_mt.c @@ -0,0 +1,169 @@ +#include "config.h" + +#ifdef SQFS_MULTITHREADED + +/* Thread-safe cache implementation. + * + * Simple implementation: basic hash table, each individual entry is + * protected by a mutex, any collision is handled by eviction. + */ + +#include "cache.h" +#include "fs.h" + +#include <assert.h> +#include <pthread.h> +#include <stdlib.h> + +typedef struct sqfs_cache_internal { + uint8_t *buf; + sqfs_cache_dispose dispose; + size_t entry_size, count; +} sqfs_cache_internal; + +typedef struct { + enum { EMPTY, FULL } state; + sqfs_cache_idx idx; + pthread_mutex_t lock; +} sqfs_cache_entry_hdr; + +// MurmurHash64A performance-optimized for hash of uint64_t keys +const static uint64_t kMurmur2Seed = 4193360111ul; +static uint64_t MurmurRehash64A(uint64_t key) { + const uint64_t m = 0xc6a4a7935bd1e995; + const int r = 47; + + uint64_t h = (uint64_t)kMurmur2Seed ^ (sizeof(uint64_t) * m); + + key *= m; + key ^= key >> r; + key *= m; + + h ^= key; + h *= m; + + h ^= h >> r; + h *= m; + h ^= h >> r; + + return h; +} + +static sqfs_cache_entry_hdr *sqfs_cache_entry_header( + sqfs_cache_internal* cache, + size_t i) { + assert(i < cache->count); + return (sqfs_cache_entry_hdr *)(cache->buf + i * cache->entry_size); +} + +sqfs_err sqfs_cache_init(sqfs_cache *cache, size_t entry_size, size_t count, + sqfs_cache_dispose dispose) { + size_t i; + pthread_mutexattr_t attr; + sqfs_cache_internal *c = malloc(sizeof(sqfs_cache_internal)); + + if (!c) { + return SQFS_ERR; + } + + c->entry_size = entry_size + sizeof(sqfs_cache_entry_hdr); + c->count = count; + c->dispose = dispose; + + pthread_mutexattr_init(&attr); +#if defined(_GNU_SOURCE) && !defined(NDEBUG) + pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK); +#endif + + c->buf = calloc(c->count, c->entry_size); + if (!c->buf) { + goto err_out; + } + + for (i = 0; i < c->count; ++i) { + sqfs_cache_entry_hdr *hdr = sqfs_cache_entry_header(c, i); + hdr->state = EMPTY; + if (pthread_mutex_init(&hdr->lock, &attr)) { + goto err_out; + } + } + + pthread_mutexattr_destroy(&attr); + + *cache = c; + return SQFS_OK; + +err_out: + sqfs_cache_destroy(&c); + return SQFS_ERR; +} + +void sqfs_cache_destroy(sqfs_cache *cache) { + if (cache && *cache) { + sqfs_cache_internal *c = *cache; + if (c->buf) { + size_t i; + for (i = 0; i < c->count; ++i) { + sqfs_cache_entry_hdr *hdr = + sqfs_cache_entry_header(c, i); + if (hdr->state == FULL) { + c->dispose((void *)(hdr + 1)); + } + if (pthread_mutex_destroy(&hdr->lock)) { + assert(0); + } + } + } + free(c->buf); + free(c); + *cache = NULL; + } +} + +void *sqfs_cache_get(sqfs_cache *cache, sqfs_cache_idx idx) { + sqfs_cache_internal *c = *cache; + sqfs_cache_entry_hdr *hdr; + void *entry; + + uint64_t key = MurmurRehash64A(idx) % c->count; + + hdr = sqfs_cache_entry_header(c, key); + if (pthread_mutex_lock(&hdr->lock)) { assert(0); } + /* matching unlock is in sqfs_cache_put() */ + entry = (void *)(hdr + 1); + + if (hdr->state == EMPTY) { + hdr->idx = idx; + return entry; + } + + /* There's a valid entry: it's either a cache hit or a collision. */ + assert(hdr->state == FULL); + if (hdr->idx == idx) { + return entry; + } + + /* Collision. */ + c->dispose((void *)(hdr + 1)); + hdr->state = EMPTY; + hdr->idx = idx; + return entry; +} + +int sqfs_cache_entry_valid(const sqfs_cache *cache, const void *e) { + sqfs_cache_entry_hdr *hdr = ((sqfs_cache_entry_hdr *)e) - 1; + return hdr->state == FULL; +} + +void sqfs_cache_entry_mark_valid(sqfs_cache *cache, void *e) { + sqfs_cache_entry_hdr *hdr = ((sqfs_cache_entry_hdr *)e) - 1; + assert(hdr->state == EMPTY); + hdr->state = FULL; +} + +void sqfs_cache_put(const sqfs_cache *cache, const void *e) { + sqfs_cache_entry_hdr *hdr = ((sqfs_cache_entry_hdr *)e) - 1; + if (pthread_mutex_unlock(&hdr->lock)) { assert(0); } +} + +#endif /* SQFS_MULTITHREADED */ diff --git a/configure.ac b/configure.ac index 762766e9..3869075a 100644 --- a/configure.ac +++ b/configure.ac @@ -10,6 +10,7 @@ AH_BOTTOM([#endif]) AC_CANONICAL_BUILD AC_CANONICAL_TARGET AM_INIT_AUTOMAKE([foreign -Wall subdir-objects]) +AC_USE_SYSTEM_EXTENSIONS AM_SILENT_RULES(yes) AM_PROG_AR LT_INIT @@ -23,10 +24,8 @@ AC_PROG_SED AC_PROG_CPP AC_SYS_LARGEFILE AM_PROG_CC_C_O -SQ_PROG_CPP_POSIX_2001 SQ_PROG_CC_WALL -AC_DEFINE([_POSIX_C_SOURCE], [200112L], [POSIX 2001 compatibility]) # Non-POSIX declarations SQ_CHECK_DECL_MAKEDEV @@ -97,6 +96,14 @@ AC_CONFIG_FILES([tests/ll-smoke.sh],[chmod +x tests/ll-smoke.sh]) AS_IF([test "x$sq_high_level$sq_low_level$sq_demo" = xnonono], AC_MSG_FAILURE([Nothing left to build])) +AC_ARG_ENABLE([multithreading], + AS_HELP_STRING([--enable-multithreading], [enable multi-threaded low-level FUSE driver]), + [ + AC_CHECK_LIB([pthread], [pthread_mutex_lock], [], AC_MSG_ERROR([libpthread is required for multithreaded build])) + AC_DEFINE(SQFS_MULTITHREADED, 1, [Enable multi-threaded low-level FUSE driver]) + ]) +AM_CONDITIONAL([MULTITHREADED], [test x$enable_multithreading = xyes]) + AC_SUBST([sq_decompressors]) AC_SUBST([sq_high_level]) AC_SUBST([sq_low_level]) diff --git a/fs.c b/fs.c index 1838c5ca..ab854b0f 100644 --- a/fs.c +++ b/fs.c @@ -34,8 +34,13 @@ #include <sys/stat.h> -#define DATA_CACHED_BLKS 1 -#define FRAG_CACHED_BLKS 3 +#ifdef SQFS_MULTITHREADED +# define DATA_CACHED_BLKS 48 +# define FRAG_CACHED_BLKS 48 +#else +# define DATA_CACHED_BLKS 1 +# define FRAG_CACHED_BLKS 3 +#endif void sqfs_version_supported(int *min_major, int *min_minor, int *max_major, int *max_minor) { diff --git a/ll.c b/ll.c index 4d17ba5b..596c8bf1 100644 --- a/ll.c +++ b/ll.c @@ -52,11 +52,49 @@ static sig_atomic_t open_refcount = 0; /* same as lib/fuse_signals.c */ static struct fuse_session *fuse_instance = NULL; +static void update_access_time(void) { +#ifdef SQFS_MULTITHREADED + /* We only need to track access time if we have an idle timeout, + * don't bother with expensive operations if idle_timeout is 0. + */ + if (idle_timeout_secs) { + time_t now = time(NULL); + __atomic_store_n(&last_access, now, __ATOMIC_RELEASE); + } +#else + last_access = time(NULL); +#endif +} + +static void update_open_refcount(int delta) { +#ifdef SQFS_MULTITHREADED + __atomic_fetch_add(&open_refcount, delta, __ATOMIC_RELEASE); +#else + open_refcount += delta; +#endif +} + +static inline time_t get_access_time(void) { +#ifdef SQFS_MULTITHREADED + return __atomic_load_n(&last_access, __ATOMIC_ACQUIRE); +#else + return last_access; +#endif +} + +static inline sig_atomic_t get_open_refcount(void) { +#ifdef SQFS_MULTITHREADED + return __atomic_load_n(&open_refcount, __ATOMIC_ACQUIRE); +#else + return open_refcount; +#endif +} + void sqfs_ll_op_getattr(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) { sqfs_ll_i lli; struct stat st; - last_access = time(NULL); + update_access_time(); if (sqfs_ll_iget(req, &lli, ino)) return; @@ -71,7 +109,7 @@ void sqfs_ll_op_getattr(fuse_req_t req, fuse_ino_t ino, void sqfs_ll_op_opendir(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) { sqfs_ll_i *lli; - last_access = time(NULL); + update_access_time(); fi->fh = (intptr_t)NULL; @@ -86,7 +124,7 @@ void sqfs_ll_op_opendir(fuse_req_t req, fuse_ino_t ino, fuse_reply_err(req, ENOTDIR); } else { fi->fh = (intptr_t)lli; - ++open_refcount; + update_open_refcount(1); fuse_reply_open(req, fi); return; } @@ -96,14 +134,14 @@ void sqfs_ll_op_opendir(fuse_req_t req, fuse_ino_t ino, void sqfs_ll_op_create(fuse_req_t req, fuse_ino_t parent, const char *name, mode_t mode, struct fuse_file_info *fi) { - last_access = time(NULL); + update_access_time(); fuse_reply_err(req, EROFS); } void sqfs_ll_op_releasedir(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) { - last_access = time(NULL); - --open_refcount; + update_access_time(); + update_open_refcount(-1); free((sqfs_ll_i*)(intptr_t)fi->fh); fuse_reply_err(req, 0); /* yes, this is necessary */ } @@ -132,7 +170,7 @@ void sqfs_ll_op_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, sqfs_ll_i *lli = (sqfs_ll_i*)(intptr_t)fi->fh; int err = 0; - last_access = time(NULL); + update_access_time(); if (sqfs_dir_open(&lli->ll->fs, &lli->inode, &dir, off)) err = EINVAL; if (!err && !(bufpos = buf = malloc(size))) @@ -173,7 +211,7 @@ void sqfs_ll_op_lookup(fuse_req_t req, fuse_ino_t parent, bool found; sqfs_inode inode; - last_access = time(NULL); + update_access_time(); if (sqfs_ll_iget(req, &lli, parent)) return; @@ -223,7 +261,7 @@ void sqfs_ll_op_open(fuse_req_t req, fuse_ino_t ino, sqfs_inode *inode; sqfs_ll *ll; - last_access = time(NULL); + update_access_time(); if (fi->flags & (O_WRONLY | O_RDWR)) { fuse_reply_err(req, EROFS); return; @@ -243,7 +281,7 @@ void sqfs_ll_op_open(fuse_req_t req, fuse_ino_t ino, } else { fi->fh = (intptr_t)inode; fi->keep_cache = 1; - ++open_refcount; + update_open_refcount(1); fuse_reply_open(req, fi); return; } @@ -254,8 +292,8 @@ void sqfs_ll_op_release(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) { free((sqfs_inode*)(intptr_t)fi->fh); fi->fh = 0; - last_access = time(NULL); - --open_refcount; + update_access_time(); + update_open_refcount(-1); fuse_reply_err(req, 0); } @@ -272,7 +310,7 @@ void sqfs_ll_op_read(fuse_req_t req, fuse_ino_t ino, return; } - last_access = time(NULL); + update_access_time(); osize = size; err = sqfs_read_range(&ll->fs, inode, off, &osize, buf); if (err) { @@ -289,7 +327,7 @@ void sqfs_ll_op_readlink(fuse_req_t req, fuse_ino_t ino) { char *dst; size_t size; sqfs_ll_i lli; - last_access = time(NULL); + update_access_time(); if (sqfs_ll_iget(req, &lli, ino)) return; @@ -313,7 +351,7 @@ void sqfs_ll_op_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) { char *buf; int ferr; - last_access = time(NULL); + update_access_time(); if (sqfs_ll_iget(req, &lli, ino)) return; @@ -351,7 +389,7 @@ void sqfs_ll_op_getxattr(fuse_req_t req, fuse_ino_t ino, } #endif - last_access = time(NULL); + update_access_time(); if (sqfs_ll_iget(req, &lli, ino)) return; @@ -373,7 +411,7 @@ void sqfs_ll_op_getxattr(fuse_req_t req, fuse_ino_t ino, void sqfs_ll_op_forget(fuse_req_t req, fuse_ino_t ino, unsigned long nlookup) { sqfs_ll_i lli; - last_access = time(NULL); + update_access_time(); sqfs_ll_iget(req, &lli, SQFS_FUSE_INODE_NONE); lli.ll->ino_forget(lli.ll, ino, nlookup); fuse_reply_none(req); @@ -489,7 +527,8 @@ void alarm_tick(int sig) { return; } - if (open_refcount == 0 && time(NULL) - last_access > idle_timeout_secs) { + if (get_open_refcount() == 0 && + time(NULL) - get_access_time() > idle_timeout_secs) { /* Safely shutting down fuse in a cross-platform way is a dark art! But just about any platform should stop on SIGINT, so do that */ kill(getpid(), SIGINT); @@ -499,8 +538,8 @@ void alarm_tick(int sig) { } void setup_idle_timeout(struct fuse_session *se, unsigned int timeout_secs) { - last_access = time(NULL); idle_timeout_secs = timeout_secs; + update_access_time(); struct sigaction sa; memset(&sa, 0, sizeof(struct sigaction)); diff --git a/ll_main.c b/ll_main.c index aca76935..22302085 100644 --- a/ll_main.c +++ b/ll_main.c @@ -142,8 +142,22 @@ int main(int argc, char *argv[]) { if (opts.idle_timeout_secs) { setup_idle_timeout(ch.session, opts.idle_timeout_secs); } - /* FIXME: multithreading */ - err = fuse_session_loop(ch.session); +#ifdef SQFS_MULTITHREADED +# if FUSE_USE_VERSION >= 30 + if (!fuse_cmdline_opts.singlethread) { + struct fuse_loop_config config; + config.clone_fd = 1; + config.max_idle_threads = 10; + err = fuse_session_loop_mt(ch.session, &config); + } +# else /* FUSE_USE_VERSION < 30 */ + if (fuse_cmdline_opts.mt) { + err = fuse_session_loop_mt(ch.session); + } +# endif /* FUSE_USE_VERSION */ + else +#endif + err = fuse_session_loop(ch.session); teardown_idle_timeout(); fuse_remove_signal_handlers(ch.session); } @@ -157,4 +171,4 @@ int main(int argc, char *argv[]) { free(fuse_cmdline_opts.mountpoint); return -err; -} \ No newline at end of file +} diff --git a/m4/squashfuse_c.m4 b/m4/squashfuse_c.m4 index f29a90b1..c4039c42 100644 --- a/m4/squashfuse_c.m4 +++ b/m4/squashfuse_c.m4 @@ -21,37 +21,6 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# SQ_PROG_CPP_POSIX_2001 -# -# Check if a preprocessor flag is needed for POSIX-2001 headers. -# Needed at least on Solaris and derivatives. -AC_DEFUN([SQ_PROG_CPP_POSIX_2001],[ -AC_CACHE_CHECK([for option for POSIX-2001 preprocessor], - [sq_cv_prog_cpp_posix2001], -[ - sq_cv_prog_cpp_posix2001=unknown - sq_save_CPPFLAGS=$CPPFLAGS - for sq_flags in none -std=gnu99 -xc99=all - do - AS_IF([test "x$sq_flags" = xnone],, - [CPPFLAGS="$save_CPPFLAGS $sq_flags"]) - AC_PREPROC_IFELSE([AC_LANG_PROGRAM([ - #define _POSIX_C_SOURCE 200112L - #include <sys/types.h> - ])],[ - sq_cv_prog_cpp_posix2001=$sq_flags - break - ]) - done - CPPFLAGS=$sq_save_CPPFLAGS -]) -AS_IF([test "x$sq_cv_prog_cpp_posix2001" = xunknown], - [AC_MSG_FAILURE([can't preprocess for POSIX-2001])], - [AS_IF([test "x$sq_cv_prog_cpp_posix2001" = xnone],, - CPPFLAGS="$CPPFLAGS $sq_cv_prog_cpp_posix2001") -]) -]) - # SQ_PROG_CC_WALL # # Check if -Wall is supported @@ -67,4 +36,4 @@ AC_CACHE_CHECK([how to enable all compiler warnings], ]) AS_IF([test "x$sq_cv_prog_cc_wall" = xunknown],, [AC_SUBST([AM_CFLAGS],["$AM_CFLAGS $sq_cv_prog_cc_wall"])]) -]) \ No newline at end of file +]) diff --git a/squashfs_fs.h b/squashfs_fs.h index e0ab1f4e..a85b7606 100644 --- a/squashfs_fs.h +++ b/squashfs_fs.h @@ -105,7 +105,11 @@ /* cached data constants for filesystem */ -#define SQUASHFS_CACHED_BLKS 8 +#ifdef SQFS_MULTITHREADED +# define SQUASHFS_CACHED_BLKS 128 +#else +# define SQUASHFS_CACHED_BLKS 8 +#endif #define SQUASHFS_MAX_FILE_SIZE_LOG 64 diff --git a/tests/cachetest.c b/tests/cachetest.c index 8a2c2363..c515fcdf 100644 --- a/tests/cachetest.c +++ b/tests/cachetest.c @@ -36,6 +36,7 @@ int test_cache_miss(void) { TestStructDispose), SQFS_OK); entry = (TestStruct *)sqfs_cache_get(&cache, 1); EXPECT_EQ(sqfs_cache_entry_valid(&cache, entry), 0); + sqfs_cache_put(&cache, entry); sqfs_cache_destroy(&cache); return errors == 0; diff --git a/tests/ll-smoke-singlethreaded.sh b/tests/ll-smoke-singlethreaded.sh new file mode 100755 index 00000000..c7cbfc38 --- /dev/null +++ b/tests/ll-smoke-singlethreaded.sh @@ -0,0 +1,10 @@ +!/bin/bash + +# Singlethreaded ll-smoke test. +# +# When multithreading is enabled at build time, it is the default +# behavior of squashfuse_ll, but can be disabled at runtime with +# the FUSE '-s' commandline option. +# +# So we just re-run the normal ll-smoke test with the '-s' option. +SFLL_EXTRA_ARGS="-s" $(dirname -- $0)/ll-smoke.sh diff --git a/tests/ll-smoke.sh b/tests/ll-smoke.sh new file mode 100755 index 00000000..6b9f4641 --- /dev/null +++ b/tests/ll-smoke.sh @@ -0,0 +1,141 @@ +#!/bin/sh + +. "tests/lib.sh" + +# Very simple smoke test for squashfuse_ll. Make some random files. +# assemble a squashfs image, mount it, compare the files. + +SFLL=${1:-./squashfuse_ll} # The squashfuse_ll binary. + +IDLE_TIMEOUT=5 + +trap cleanup EXIT +set -e + +WORKDIR=$(mktemp -d) + +sq_umount() { + case linux-gnu in + linux*) + fusermount3 -u $1 + ;; + *) + umount $1 + ;; + esac +} + +sq_is_mountpoint() { + mount | grep -q "$1" +} + +cleanup() { + set +e # Don't care about errors here. + if [ -n "$WORKDIR" ]; then + if [ -n "$SQ_SAVE_LOGS" ]; then + cp "$WORKDIR/squashfs_ll.log" "$SQ_SAVE_LOGS" || true + fi + if sq_is_mountpoint "$WORKDIR/mount"; then + sq_umount "$WORKDIR/mount" + fi + rm -rf "$WORKDIR" + fi +} + +find_compressors + +echo "Generating random test files..." +mkdir -p "$WORKDIR/source" +head -c 64000000 /dev/urandom >"$WORKDIR/source/rand1" +head -c 17000 /dev/urandom >"$WORKDIR/source/rand2" +head -c 100000000 /dev/urandom >"$WORKDIR/source/rand3" +head -c 87 /dev/zero >"$WORKDIR/source/z1 with spaces" + +for comp in $compressors; do + echo "Building $comp squashfs image..." + mksquashfs "$WORKDIR/source" "$WORKDIR/squashfs.image" -comp $comp -no-progress + + mkdir -p "$WORKDIR/mount" + + echo "Mounting squashfs image..." + $SFLL -f $SFLL_EXTRA_ARGS "$WORKDIR/squashfs.image" "$WORKDIR/mount" >"$WORKDIR/squashfs_ll.log" 2>&1 & + # Wait up to 5 seconds to be mounted. TSAN builds can take some time to mount. + for _ in $(seq 5); do + if sq_is_mountpoint "$WORKDIR/mount"; then + break + fi + sleep 1 + done + + if ! sq_is_mountpoint "$WORKDIR/mount"; then + echo "Image did not mount after 5 seconds." + cp "$WORKDIR/squashfs_ll.log" /tmp/squashfs_ll.smoke.log + echo "There may be clues in /tmp/squashfs_ll.smoke.log" + exit 1 + fi + + if command -v fio >/dev/null; then + echo "FIO tests..." + fio --filename="$WORKDIR/mount/rand1" --direct=1 --rw=randread --ioengine=libaio --bs=512 --iodepth=16 --numjobs=4 --name=j1 --minimal --output=/dev/null --runtime 30 + fio --filename="$WORKDIR/mount/rand2" --rw=randread --ioengine=libaio --bs=4k --iodepth=16 --numjobs=4 --name=j2 --minimal --output=/dev/null --runtime 30 + fio --filename="$WORKDIR/mount/rand3" --rw=randread --ioengine=psync --bs=128k --name=j3 --minimal --output=/dev/null --runtime 30 + else + echo "Consider installing fio for better test coverage." + fi + + echo "Comparing files..." + cmp "$WORKDIR/source/rand1" "$WORKDIR/mount/rand1" + cmp "$WORKDIR/source/rand2" "$WORKDIR/mount/rand2" + cmp "$WORKDIR/source/rand3" "$WORKDIR/mount/rand3" + cmp "$WORKDIR/source/z1 with spaces" "$WORKDIR/mount/z1 with spaces" + + echo "Parallel md5sum..." + md5sum "$WORKDIR"/mount/* >"$WORKDIR/md5sums" + split -l1 "$WORKDIR/md5sums" "$WORKDIR/sumpiece" + echo "$WORKDIR"/sumpiece* | xargs -P4 -n1 md5sum -c + + echo "Lookup tests..." + # Look for non-existent files to exercise failed lookup path. + if [ -e "$WORKDIR/mount/bogus" ]; then + echo "Bogus existence test" + exit 1 + fi + # Twice so we hit cache path. + if [ -e "$WORKDIR/mount/bogus" ]; then + echo "Bogus existence test #2" + exit 1 + fi + + SRCSZ=$(wc -c < "$WORKDIR/source/rand1") + MNTSZ=$(wc -c < "$WORKDIR/mount/rand1") + if [ "$SRCSZ" != "$MNTSZ" ]; then + echo "Bogus size $MNTSZ != $SRCSZ" + exit 1 + fi + + echo "Unmounting..." + sq_umount "$WORKDIR/mount" + + # Only test timeouts once, it takes a long time + if [ -z "$did_timeout" ]; then + echo "Remounting with idle unmount option..." + $SFLL $SFLL_EXTRA_ARGS -otimeout=$IDLE_TIMEOUT "$WORKDIR/squashfs.image" "$WORKDIR/mount" + if ! sq_is_mountpoint "$WORKDIR/mount"; then + echo "Not mounted?" + exit 1 + fi + echo "Waiting up to $(( IDLE_TIMEOUT + 10 )) seconds for idle unmount..." + sleep $(( IDLE_TIMEOUT + 10 )) + if sq_is_mountpoint "$WORKDIR/mount"; then + echo "FS did not idle unmount in timely way." + exit 1 + fi + + did_timeout=yes + fi + + rm -f "$WORKDIR/squashfs.image" +done + +echo "Success." +exit 0 diff --git a/tests/ll-smoke.sh.in b/tests/ll-smoke.sh.in index 84256267..d7ddd8a1 100755 --- a/tests/ll-smoke.sh.in +++ b/tests/ll-smoke.sh.in @@ -52,13 +52,13 @@ head -c 100000000 /dev/urandom >"$WORKDIR/source/rand3" head -c 87 /dev/zero >"$WORKDIR/source/z1 with spaces" for comp in $compressors; do - echo "Building $comp squashfs image,.," + echo "Building $comp squashfs image..." mksquashfs "$WORKDIR/source" "$WORKDIR/squashfs.image" -comp $comp -no-progress mkdir -p "$WORKDIR/mount" echo "Mounting squashfs image..." - $SFLL -f "$WORKDIR/squashfs.image" "$WORKDIR/mount" >"$WORKDIR/squashfs_ll.log" 2>&1 & + $SFLL -f $SFLL_EXTRA_ARGS "$WORKDIR/squashfs.image" "$WORKDIR/mount" >"$WORKDIR/squashfs_ll.log" 2>&1 & # Wait up to 5 seconds to be mounted. TSAN builds can take some time to mount. for _ in $(seq 5); do if sq_is_mountpoint "$WORKDIR/mount"; then @@ -119,7 +119,7 @@ for comp in $compressors; do # Only test timeouts once, it takes a long time if [ -z "$did_timeout" ]; then echo "Remounting with idle unmount option..." - $SFLL -otimeout=$IDLE_TIMEOUT "$WORKDIR/squashfs.image" "$WORKDIR/mount" + $SFLL $SFLL_EXTRA_ARGS -otimeout=$IDLE_TIMEOUT "$WORKDIR/squashfs.image" "$WORKDIR/mount" if ! sq_is_mountpoint "$WORKDIR/mount"; then echo "Not mounted?" exit 1 From 069e8f802481af0636fabef46bec75de1992b220 Mon Sep 17 00:00:00 2001 From: Kevin Vigor <kvigor@gmail.com> Date: Mon, 23 May 2022 14:55:25 -0600 Subject: [PATCH 3/3] Enable lazy umount on SIGTERM. libfuse sets SIGTERM signal handler to exit immediately. This is very unfortunate if any other processes are still using the filesystem. Teach squashfuse_ll to respond to SIGTERM with lazy umount. We cannot directly call umount2() API from the signal handler, since it is not signal safe, but we can fork/exec fusermount3 (yay posix?). This is also a win because fusermount is suid, enabling non-privileged users to umount. Note that normal libfuse umount uses same strategy when running as non-root. Note that this must be explicitly enabled at configure time with --enable-sigterm-handler, and it is only tested on linux. --- Makefile.am | 3 ++ configure.ac | 9 +++++ ll_main.c | 87 +++++++++++++++++++++++++++++++++++++++++ tests/umount-test.sh.in | 85 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 184 insertions(+) create mode 100755 tests/umount-test.sh.in diff --git a/Makefile.am b/Makefile.am index 17b01be4..67c17cde 100644 --- a/Makefile.am +++ b/Makefile.am @@ -111,6 +111,9 @@ if MULTITHREADED # that case. TESTS += tests/ll-smoke-singlethreaded.sh endif +if SIGTERM_HANDLER +TESTS += tests/umount-test.sh +endif check_PROGRAMS = cachetest endiantest cachetest_SOURCES=tests/cachetest.c cachetest_LDADD=libsquashfuse.la $(COMPRESSION_LIBS) diff --git a/configure.ac b/configure.ac index 3869075a..7cd4db77 100644 --- a/configure.ac +++ b/configure.ac @@ -91,6 +91,7 @@ AS_IF([test "x$sq_tests" = x], [sq_tests=" none"]) AC_SUBST([sq_mksquashfs_compressors]) AC_CONFIG_FILES([tests/ll-smoke.sh],[chmod +x tests/ll-smoke.sh]) +AC_CONFIG_FILES([tests/umount-test.sh],[chmod +x tests/umount-test.sh]) AS_IF([test "x$sq_high_level$sq_low_level$sq_demo" = xnonono], @@ -104,6 +105,14 @@ AC_ARG_ENABLE([multithreading], ]) AM_CONDITIONAL([MULTITHREADED], [test x$enable_multithreading = xyes]) +AC_ARG_ENABLE([sigterm-handler], + AS_HELP_STRING([--enable-sigterm-handler], [enable lazy umount on SIGTERM in low-level FUSE driver]), + [ + AC_CHECK_HEADER([linux/version.h], , [], AC_MSG_ERROR([linux host required for sigterm-handler.])) + AC_DEFINE(SQFS_SIGTERM_HANDLER, 1, [Enable lazy umount on SIGTERM in low-level FUSE driver]) + ]) +AM_CONDITIONAL([SIGTERM_HANDLER], [test x$enable_sigterm_handler = xyes]) + AC_SUBST([sq_decompressors]) AC_SUBST([sq_high_level]) AC_SUBST([sq_low_level]) diff --git a/ll_main.c b/ll_main.c index 22302085..0f956b5d 100644 --- a/ll_main.c +++ b/ll_main.c @@ -37,6 +37,90 @@ #include <signal.h> #include <unistd.h> + +#if defined(SQFS_SIGTERM_HANDLER) +#include <sys/utsname.h> +#include <linux/version.h> +static bool kernel_version_at_least(unsigned required_major, + unsigned required_minor, + unsigned required_micro) { + struct utsname info; + + if (uname(&info) >= 0) { + unsigned major, minor, micro; + + if (sscanf(info.release, "%u.%u.%u", &major, &minor, µ) == 3) { + return KERNEL_VERSION(major, minor, micro) >= + KERNEL_VERSION(required_major, required_minor, required_micro); + } + } + return false; +} + +/* libfuse's default SIGTERM handler (set up in fuse_set_signal_handlers()) + * immediately calls fuse_session_exit(), which shuts down the filesystem + * even if there are active users. This leads to nastiness if other processes + * still depend on the filesystem. + * + * So: we respond to SIGTERM by starting a lazy unmount. This is done + * by exec'ing fusermount3, which works properly for unpriviledged + * users (we cannot use umount2() syscall because it is not signal safe; + * fork() and exec(), amazingly, are). + * + * If we fail to start the lazy umount, we signal ourself with SIGINT, + * which falls back to the old behavior of exiting ASAP. + */ +static const char *g_mount_point = NULL; +static void sigterm_handler(int signum) { + /* Unfortunately, lazy umount of in-use fuse filesystem triggers + * kernel bug on kernels < 5.2, Fixed by kernel commit + * e8f3bd773d22f488724dffb886a1618da85c2966 in 5.2. + */ + if (g_mount_point && kernel_version_at_least(5,2,0)) { + int pid = fork(); + if (pid == 0) { + /* child process: disassociate ourself from parent so + * we do not become zombie (as parent does not waitpid()). + */ + pid_t parent = getppid(); + setsid(); + execl("/bin/fusermount3", "fusermount3", + "-u", "-q", "-z", "--", g_mount_point, NULL); + execlp("fusermount3", "fusermount3", + "-u", "-q", "-z", "--", g_mount_point, NULL); + /* if we get here, we can't run fusermount, + * kill the original process with a harshness. + */ + kill(parent, SIGINT); + _exit(0); + } else if (pid > 0) { + /* parent process: nothing to do, murderous child will do us + * in one way or another. + */ + return; + } + } + /* If we get here, we have failed to lazy unmount for whatever reason, + * kill ourself more brutally. + */ + kill(getpid(), SIGINT); +} + +static void set_sigterm_handler(const char *mountpoint) { + struct sigaction sa; + + g_mount_point = mountpoint; + memset(&sa, 0, sizeof(sa)); + sa.sa_handler = sigterm_handler; + sigemptyset(&(sa.sa_mask)); + sa.sa_flags = SA_RESTART; + + if (sigaction(SIGTERM, &sa, NULL) == -1) { + perror("sigaction(SIGTERM)"); + } +} +#endif /* SQFS_SIGTERM_HANDLER */ + int main(int argc, char *argv[]) { struct fuse_args args; sqfs_opts opts; @@ -139,6 +223,9 @@ int main(int argc, char *argv[]) { ll) == SQFS_OK) { if (sqfs_ll_daemonize(fuse_cmdline_opts.foreground) != -1) { if (fuse_set_signal_handlers(ch.session) != -1) { +#if defined(SQFS_SIGTERM_HANDLER) + set_sigterm_handler(fuse_cmdline_opts.mountpoint); +#endif if (opts.idle_timeout_secs) { setup_idle_timeout(ch.session, opts.idle_timeout_secs); } diff --git a/tests/umount-test.sh.in b/tests/umount-test.sh.in new file mode 100755 index 00000000..06fed533 --- /dev/null +++ b/tests/umount-test.sh.in @@ -0,0 +1,85 @@ +#!/bin/bash + +. "tests/lib.sh" + +SFLL=${1:-./squashfuse_ll} # The squashfuse_ll binary. +TIMEOUT=20 + +case @build_os@ in + linux*) + ;; + *) + echo "This test is only enabled on linux hosts." + exit 0 + ;; +esac + +function cleanup { + set +e + if [[ -n "$TAIL_PID" ]]; then + kill "$TAIL_PID" + fi + @sq_fusermount@ -u "$MNTDIR" >& /dev/null + rm -rf "$WORKDIR" +} + +set -e +WORKDIR=$(mktemp -d) +MNTDIR="$WORKDIR/mountpoint" +mkdir -p "$MNTDIR" +mkdir -p "$WORKDIR/source" +trap cleanup EXIT + +# Make a tiny squashfs filesystem. +echo "Hello world" >"$WORKDIR/source/hello" +mksquashfs "$WORKDIR/source" "$WORKDIR/squashfs.image" -comp zstd -no-progress >& /dev/null + +# Mount it. +$SFLL "$WORKDIR/squashfs.image" "$MNTDIR" +SFPID=$(pgrep -f "squashfuse_ll.*$MNTDIR") + +if ! [[ -d /proc/$SFPID ]]; then + echo "squashfuse process missing" + exit 1 +fi +if ! grep -q "$MNTDIR" /proc/mounts; then + echo "mount missing." + exit 1 +fi + +# background a task to hold a file open from the image. +tail -f "${MNTDIR}/hello" >/dev/null & +TAIL_PID=$! + +# SIGTERM the squashfuse process. +kill -15 "$SFPID" + +# Now we expect the mountpoint to disappear due to lazy umount. +if ! timeout $TIMEOUT bash -c \ + "while grep -q $MNTDIR /proc/mounts; do \ + sleep 1; + done"; then + echo "$MNTDIR did not dismount in response to SIGTERM." + exit 1 +fi + +# but the process should remain alive, because of the background task. +if ! [[ -d /proc/$SFPID ]]; then + echo "squashfuse process missing" + exit 1 +fi + +# Now kill the background process. +kill $TAIL_PID +TAIL_PID= + +# Now we expect the process to die. +if ! timeout $TIMEOUT bash -c \ + "while [[ -d /proc/$SFPID ]]; do \ + sleep 1; + done"; then + echo "squashfuse process did not die once filesystem was released." + exit 1 +fi + +echo "Success."
Locations
Projects
Search
Status Monitor
Help
Open Build Service
OBS Manuals
API Documentation
OBS Portal
Reporting a Bug
Contact
Mailing List
Forums
Chat (IRC)
Twitter
Open Build Service (OBS)
is an
openSUSE project
.
浙ICP备2022010568号-2