diff options
Diffstat (limited to 'fs')
80 files changed, 2051 insertions, 1527 deletions
diff --git a/fs/afs/flock.c b/fs/afs/flock.c index 210acafe4a9..3ff8bdd18fb 100644 --- a/fs/afs/flock.c +++ b/fs/afs/flock.c @@ -432,7 +432,6 @@ vfs_rejected_lock: list_del_init(&fl->fl_u.afs.link); if (list_empty(&vnode->granted_locks)) afs_defer_unlock(vnode, key); - spin_unlock(&vnode->lock); goto abort_attempt; } diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c index c52be53f694..5ffb570cd3a 100644 --- a/fs/afs/mntpt.c +++ b/fs/afs/mntpt.c @@ -17,7 +17,6 @@ #include <linux/pagemap.h> #include <linux/mount.h> #include <linux/namei.h> -#include <linux/mnt_namespace.h> #include "internal.h" @@ -485,6 +485,8 @@ static inline void really_put_req(struct kioctx *ctx, struct kiocb *req) { assert_spin_locked(&ctx->ctx_lock); + if (req->ki_eventfd != NULL) + eventfd_ctx_put(req->ki_eventfd); if (req->ki_dtor) req->ki_dtor(req); if (req->ki_iovec != &req->ki_inline_vec) @@ -509,8 +511,6 @@ static void aio_fput_routine(struct work_struct *data) /* Complete the fput(s) */ if (req->ki_filp != NULL) __fput(req->ki_filp); - if (req->ki_eventfd != NULL) - __fput(req->ki_eventfd); /* Link the iocb into the context's free list */ spin_lock_irq(&ctx->ctx_lock); @@ -528,8 +528,6 @@ static void aio_fput_routine(struct work_struct *data) */ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req) { - int schedule_putreq = 0; - dprintk(KERN_DEBUG "aio_put(%p): f_count=%ld\n", req, atomic_long_read(&req->ki_filp->f_count)); @@ -549,24 +547,16 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req) * we would not be holding the last reference to the file*, so * this function will be executed w/out any aio kthread wakeup. */ - if (unlikely(atomic_long_dec_and_test(&req->ki_filp->f_count))) - schedule_putreq++; - else - req->ki_filp = NULL; - if (req->ki_eventfd != NULL) { - if (unlikely(atomic_long_dec_and_test(&req->ki_eventfd->f_count))) - schedule_putreq++; - else - req->ki_eventfd = NULL; - } - if (unlikely(schedule_putreq)) { + if (unlikely(atomic_long_dec_and_test(&req->ki_filp->f_count))) { get_ioctx(ctx); spin_lock(&fput_lock); list_add(&req->ki_list, &fput_head); spin_unlock(&fput_lock); queue_work(aio_wq, &fput_work); - } else + } else { + req->ki_filp = NULL; really_put_req(ctx, req); + } return 1; } @@ -1622,7 +1612,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, * an eventfd() fd, and will be signaled for each completed * event using the eventfd_signal() function. */ - req->ki_eventfd = eventfd_fget((int) iocb->aio_resfd); + req->ki_eventfd = eventfd_ctx_fdget((int) iocb->aio_resfd); if (IS_ERR(req->ki_eventfd)) { ret = PTR_ERR(req->ki_eventfd); req->ki_eventfd = NULL; diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 9fa212b014a..b7c1603cd4b 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1522,11 +1522,11 @@ static int fill_note_info(struct elfhdr *elf, int phdrs, info->thread = NULL; psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL); - fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo); - if (psinfo == NULL) return 0; + fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo); + /* * Figure out how many notes we're going to need for each thread. */ @@ -1929,7 +1929,10 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un elf = kmalloc(sizeof(*elf), GFP_KERNEL); if (!elf) goto out; - + /* + * The number of segs are recored into ELF header as 16bit value. + * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here. + */ segs = current->mm->map_count; #ifdef ELF_CORE_EXTRA_PHDRS segs += ELF_CORE_EXTRA_PHDRS; diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c index 31c46a241ba..49a34e7f730 100644 --- a/fs/bio-integrity.c +++ b/fs/bio-integrity.c @@ -1,7 +1,7 @@ /* * bio-integrity.c - bio data integrity extensions * - * Copyright (C) 2007, 2008 Oracle Corporation + * Copyright (C) 2007, 2008, 2009 Oracle Corporation * Written by: Martin K. Petersen <martin.petersen@oracle.com> * * This program is free software; you can redistribute it and/or @@ -25,63 +25,121 @@ #include <linux/bio.h> #include <linux/workqueue.h> -static struct kmem_cache *bio_integrity_slab __read_mostly; -static mempool_t *bio_integrity_pool; -static struct bio_set *integrity_bio_set; +struct integrity_slab { + struct kmem_cache *slab; + unsigned short nr_vecs; + char name[8]; +}; + +#define IS(x) { .nr_vecs = x, .name = "bip-"__stringify(x) } +struct integrity_slab bip_slab[BIOVEC_NR_POOLS] __read_mostly = { + IS(1), IS(4), IS(16), IS(64), IS(128), IS(BIO_MAX_PAGES), +}; +#undef IS + static struct workqueue_struct *kintegrityd_wq; +static inline unsigned int vecs_to_idx(unsigned int nr) +{ + switch (nr) { + case 1: + return 0; + case 2 ... 4: + return 1; + case 5 ... 16: + return 2; + case 17 ... 64: + return 3; + case 65 ... 128: + return 4; + case 129 ... BIO_MAX_PAGES: + return 5; + default: + BUG(); + } +} + +static inline int use_bip_pool(unsigned int idx) +{ + if (idx == BIOVEC_NR_POOLS) + return 1; + + return 0; +} + /** - * bio_integrity_alloc - Allocate integrity payload and attach it to bio + * bio_integrity_alloc_bioset - Allocate integrity payload and attach it to bio * @bio: bio to attach integrity metadata to * @gfp_mask: Memory allocation mask * @nr_vecs: Number of integrity metadata scatter-gather elements + * @bs: bio_set to allocate from * * Description: This function prepares a bio for attaching integrity * metadata. nr_vecs specifies the maximum number of pages containing * integrity metadata that can be attached. */ -struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, - gfp_t gfp_mask, - unsigned int nr_vecs) +struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *bio, + gfp_t gfp_mask, + unsigned int nr_vecs, + struct bio_set *bs) { struct bio_integrity_payload *bip; - struct bio_vec *iv; - unsigned long idx; + unsigned int idx = vecs_to_idx(nr_vecs); BUG_ON(bio == NULL); + bip = NULL; - bip = mempool_alloc(bio_integrity_pool, gfp_mask); - if (unlikely(bip == NULL)) { - printk(KERN_ERR "%s: could not alloc bip\n", __func__); - return NULL; - } + /* Lower order allocations come straight from slab */ + if (!use_bip_pool(idx)) + bip = kmem_cache_alloc(bip_slab[idx].slab, gfp_mask); - memset(bip, 0, sizeof(*bip)); + /* Use mempool if lower order alloc failed or max vecs were requested */ + if (bip == NULL) { + bip = mempool_alloc(bs->bio_integrity_pool, gfp_mask); - iv = bvec_alloc_bs(gfp_mask, nr_vecs, &idx, integrity_bio_set); - if (unlikely(iv == NULL)) { - printk(KERN_ERR "%s: could not alloc bip_vec\n", __func__); - mempool_free(bip, bio_integrity_pool); - return NULL; + if (unlikely(bip == NULL)) { + printk(KERN_ERR "%s: could not alloc bip\n", __func__); + return NULL; + } } - bip->bip_pool = idx; - bip->bip_vec = iv; + memset(bip, 0, sizeof(*bip)); + + bip->bip_slab = idx; bip->bip_bio = bio; bio->bi_integrity = bip; return bip; } +EXPORT_SYMBOL(bio_integrity_alloc_bioset); + +/** + * bio_integrity_alloc - Allocate integrity payload and attach it to bio + * @bio: bio to attach integrity metadata to + * @gfp_mask: Memory allocation mask + * @nr_vecs: Number of integrity metadata scatter-gather elements + * + * Description: This function prepares a bio for attaching integrity + * metadata. nr_vecs specifies the maximum number of pages containing + * integrity metadata that can be attached. + */ +struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, + gfp_t gfp_mask, + unsigned int nr_vecs) +{ + return bio_integrity_alloc_bioset(bio, gfp_mask, nr_vecs, fs_bio_set); +} EXPORT_SYMBOL(bio_integrity_alloc); /** * bio_integrity_free - Free bio integrity payload * @bio: bio containing bip to be freed + * @bs: bio_set this bio was allocated from * * Description: Used to free the integrity portion of a bio. Usually * called from bio_free(). */ -void bio_integrity_free(struct bio *bio) +void bio_integrity_free(struct bio *bio, struct bio_set *bs) { struct bio_integrity_payload *bip = bio->bi_integrity; @@ -92,8 +150,10 @@ void bio_integrity_free(struct bio *bio) && bip->bip_buf != NULL) kfree(bip->bip_buf); - bvec_free_bs(integrity_bio_set, bip->bip_vec, bip->bip_pool); - mempool_free(bip, bio_integrity_pool); + if (use_bip_pool(bip->bip_slab)) + mempool_free(bip, bs->bio_integrity_pool); + else + kmem_cache_free(bip_slab[bip->bip_slab].slab, bip); bio->bi_integrity = NULL; } @@ -114,7 +174,7 @@ int bio_integrity_add_page(struct bio *bio, struct page *page, struct bio_integrity_payload *bip = bio->bi_integrity; struct bio_vec *iv; - if (bip->bip_vcnt >= bvec_nr_vecs(bip->bip_pool)) { + if (bip->bip_vcnt >= bvec_nr_vecs(bip->bip_slab)) { printk(KERN_ERR "%s: bip_vec full\n", __func__); return 0; } @@ -647,8 +707,8 @@ void bio_integrity_split(struct bio *bio, struct bio_pair *bp, int sectors) bp->iv1 = bip->bip_vec[0]; bp->iv2 = bip->bip_vec[0]; - bp->bip1.bip_vec = &bp->iv1; - bp->bip2.bip_vec = &bp->iv2; + bp->bip1.bip_vec[0] = bp->iv1; + bp->bip2.bip_vec[0] = bp->iv2; bp->iv1.bv_len = sectors * bi->tuple_size; bp->iv2.bv_offset += sectors * bi->tuple_size; @@ -667,17 +727,19 @@ EXPORT_SYMBOL(bio_integrity_split); * @bio: New bio * @bio_src: Original bio * @gfp_mask: Memory allocation mask + * @bs: bio_set to allocate bip from * * Description: Called to allocate a bip when cloning a bio */ -int bio_integrity_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp_mask) +int bio_integrity_clone(struct bio *bio, struct bio *bio_src, + gfp_t gfp_mask, struct bio_set *bs) { struct bio_integrity_payload *bip_src = bio_src->bi_integrity; struct bio_integrity_payload *bip; BUG_ON(bip_src == NULL); - bip = bio_integrity_alloc(bio, gfp_mask, bip_src->bip_vcnt); + bip = bio_integrity_alloc_bioset(bio, gfp_mask, bip_src->bip_vcnt, bs); if (bip == NULL) return -EIO; @@ -693,25 +755,43 @@ int bio_integrity_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp_mask) } EXPORT_SYMBOL(bio_integrity_clone); -static int __init bio_integrity_init(void) +int bioset_integrity_create(struct bio_set *bs, int pool_size) { - kintegrityd_wq = create_workqueue("kintegrityd"); + unsigned int max_slab = vecs_to_idx(BIO_MAX_PAGES); + + bs->bio_integrity_pool = + mempool_create_slab_pool(pool_size, bip_slab[max_slab].slab); + if (!bs->bio_integrity_pool) + return -1; + + return 0; +} +EXPORT_SYMBOL(bioset_integrity_create); + +void bioset_integrity_free(struct bio_set *bs) +{ + if (bs->bio_integrity_pool) + mempool_destroy(bs->bio_integrity_pool); +} +EXPORT_SYMBOL(bioset_integrity_free); + +void __init bio_integrity_init(void) +{ + unsigned int i; + + kintegrityd_wq = create_workqueue("kintegrityd"); if (!kintegrityd_wq) panic("Failed to create kintegrityd\n"); - bio_integrity_slab = KMEM_CACHE(bio_integrity_payload, - SLAB_HWCACHE_ALIGN|SLAB_PANIC); + for (i = 0 ; i < BIOVEC_NR_POOLS ; i++) { + unsigned int size; - bio_integrity_pool = mempool_create_slab_pool(BIO_POOL_SIZE, - bio_integrity_slab); - if (!bio_integrity_pool) - panic("bio_integrity: can't allocate bip pool\n"); + size = sizeof(struct bio_integrity_payload) + + bip_slab[i].nr_vecs * sizeof(struct bio_vec); - integrity_bio_set = bioset_create(BIO_POOL_SIZE, 0); - if (!integrity_bio_set) - panic("bio_integrity: can't allocate bio_set\n"); - - return 0; + bip_slab[i].slab = + kmem_cache_create(bip_slab[i].name, size, 0, + SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); + } } -subsys_initcall(bio_integrity_init); @@ -238,7 +238,7 @@ void bio_free(struct bio *bio, struct bio_set *bs) bvec_free_bs(bs, bio->bi_io_vec, BIO_POOL_IDX(bio)); if (bio_integrity(bio)) - bio_integrity_free(bio); + bio_integrity_free(bio, bs); /* * If we have front padding, adjust the bio pointer before freeing @@ -341,7 +341,7 @@ struct bio *bio_alloc(gfp_t gfp_mask, int nr_iovecs) static void bio_kmalloc_destructor(struct bio *bio) { if (bio_integrity(bio)) - bio_integrity_free(bio); + bio_integrity_free(bio, fs_bio_set); kfree(bio); } @@ -472,7 +472,7 @@ struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask) if (bio_integrity(bio)) { int ret; - ret = bio_integrity_clone(b, bio, gfp_mask); + ret = bio_integrity_clone(b, bio, gfp_mask, fs_bio_set); if (ret < 0) { bio_put(b); @@ -705,14 +705,13 @@ static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count, } static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs, - struct sg_iovec *iov, int iov_count, int uncopy, - int do_free_page) + struct sg_iovec *iov, int iov_count, + int to_user, int from_user, int do_free_page) { int ret = 0, i; struct bio_vec *bvec; int iov_idx = 0; unsigned int iov_off = 0; - int read = bio_data_dir(bio) == READ; __bio_for_each_segment(bvec, bio, i, 0) { char *bv_addr = page_address(bvec->bv_page); @@ -727,13 +726,14 @@ static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs, iov_addr = iov[iov_idx].iov_base + iov_off; if (!ret) { - if (!read && !uncopy) - ret = copy_from_user(bv_addr, iov_addr, - bytes); - if (read && uncopy) + if (to_user) ret = copy_to_user(iov_addr, bv_addr, bytes); + if (from_user) + ret = copy_from_user(bv_addr, iov_addr, + bytes); + if (ret) ret = -EFAULT; } @@ -770,7 +770,8 @@ int bio_uncopy_user(struct bio *bio) if (!bio_flagged(bio, BIO_NULL_MAPPED)) ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs, - bmd->nr_sgvecs, 1, bmd->is_our_pages); + bmd->nr_sgvecs, bio_data_dir(bio) == READ, + 0, bmd->is_our_pages); bio_free_map_data(bmd); bio_put(bio); return ret; @@ -875,8 +876,9 @@ struct bio *bio_copy_user_iov(struct request_queue *q, /* * success */ - if (!write_to_vm && (!map_data || !map_data->null_mapped)) { - ret = __bio_copy_iov(bio, bio->bi_io_vec, iov, iov_count, 0, 0); + if ((!write_to_vm && (!map_data || !map_data->null_mapped)) || + (map_data && map_data->from_user)) { + ret = __bio_copy_iov(bio, bio->bi_io_vec, iov, iov_count, 0, 1, 0); if (ret) goto cleanup; } @@ -1539,6 +1541,7 @@ void bioset_free(struct bio_set *bs) if (bs->bio_pool) mempool_destroy(bs->bio_pool); + bioset_integrity_free(bs); biovec_free_pools(bs); bio_put_slab(bs); @@ -1579,6 +1582,9 @@ struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad) if (!bs->bio_pool) goto bad; + if (bioset_integrity_create(bs, pool_size)) + goto bad; + if (!biovec_create_pools(bs, pool_size)) return bs; @@ -1616,6 +1622,7 @@ static int __init init_bio(void) if (!bio_slabs) panic("bio: can't allocate bios\n"); + bio_integrity_init(); biovec_init_slabs(); fs_bio_set = bioset_create(BIO_POOL_SIZE, 0); diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 7f88628a1a7..6e4f6c50a12 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -299,8 +299,8 @@ int btrfs_start_workers(struct btrfs_workers *workers, int num_workers) "btrfs-%s-%d", workers->name, workers->num_workers + i); if (IS_ERR(worker->task)) { - kfree(worker); ret = PTR_ERR(worker->task); + kfree(worker); goto fail; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 2779c2f5360..98a87383871 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2074,8 +2074,7 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path); int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf); -int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root - *root); +int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref); int btrfs_drop_subtree(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *node, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index edc7d208c5c..a5aca3997d4 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -990,15 +990,13 @@ static inline int extent_ref_type(u64 parent, u64 owner) return type; } -static int find_next_key(struct btrfs_path *path, struct btrfs_key *key) +static int find_next_key(struct btrfs_path *path, int level, + struct btrfs_key *key) { - int level; - BUG_ON(!path->keep_locks); - for (level = 0; level < BTRFS_MAX_LEVEL; level++) { + for (; level < BTRFS_MAX_LEVEL; level++) { if (!path->nodes[level]) break; - btrfs_assert_tree_locked(path->nodes[level]); if (path->slots[level] + 1 >= btrfs_header_nritems(path->nodes[level])) continue; @@ -1158,7 +1156,8 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans, * For simplicity, we just do not add new inline back * ref if there is any kind of item for this block */ - if (find_next_key(path, &key) == 0 && key.objectid == bytenr && + if (find_next_key(path, 0, &key) == 0 && + key.objectid == bytenr && key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) { err = -EAGAIN; goto out; @@ -2697,7 +2696,7 @@ again: printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes" ", %llu bytes_used, %llu bytes_reserved, " - "%llu bytes_pinned, %llu bytes_readonly, %llu may use" + "%llu bytes_pinned, %llu bytes_readonly, %llu may use " "%llu total\n", (unsigned long long)bytes, (unsigned long long)data_sinfo->bytes_delalloc, (unsigned long long)data_sinfo->bytes_used, @@ -4128,6 +4127,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, return buf; } +#if 0 int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *leaf) { @@ -4171,8 +4171,6 @@ int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, return 0; } -#if 0 - static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_leaf_ref *ref) @@ -4553,262 +4551,471 @@ out: } #endif +struct walk_control { + u64 refs[BTRFS_MAX_LEVEL]; + u64 flags[BTRFS_MAX_LEVEL]; + struct btrfs_key update_progress; + int stage; + int level; + int shared_level; + int update_ref; + int keep_locks; +}; + +#define DROP_REFERENCE 1 +#define UPDATE_BACKREF 2 + /* - * helper function for drop_subtree, this function is similar to - * walk_down_tree. The main difference is that it checks reference - * counts while tree blocks are locked. + * hepler to process tree block while walking down the tree. + * + * when wc->stage == DROP_REFERENCE, this function checks + * reference count of the block. if the block is shared and + * we need update back refs for the subtree rooted at the + * block, this function changes wc->stage to UPDATE_BACKREF + * + * when wc->stage == UPDATE_BACKREF, this function updates + * back refs for pointers in the block. + * + * NOTE: return value 1 means we should stop walking down. */ -static noinline int walk_down_tree(struct btrfs_trans_handle *trans, +static noinline int walk_down_proc(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct btrfs_path *path, int *level) + struct btrfs_path *path, + struct walk_control *wc) { - struct extent_buffer *next; - struct extent_buffer *cur; - struct extent_buffer *parent; - u64 bytenr; - u64 ptr_gen; - u64 refs; - u64 flags; - u32 blocksize; + int level = wc->level; + struct extent_buffer *eb = path->nodes[level]; + struct btrfs_key key; + u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF; int ret; - cur = path->nodes[*level]; - ret = btrfs_lookup_extent_info(trans, root, cur->start, cur->len, - &refs, &flags); - BUG_ON(ret); - if (refs > 1) - goto out; + if (wc->stage == UPDATE_BACKREF && + btrfs_header_owner(eb) != root->root_key.objectid) + return 1; - BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)); + /* + * when reference count of tree block is 1, it won't increase + * again. once full backref flag is set, we never clear it. + */ + if ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) || + (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag))) { + BUG_ON(!path->locks[level]); + ret = btrfs_lookup_extent_info(trans, root, + eb->start, eb->len, + &wc->refs[level], + &wc->flags[level]); + BUG_ON(ret); + BUG_ON(wc->refs[level] == 0); + } - while (*level >= 0) { - cur = path->nodes[*level]; - if (*level == 0) { - ret = btrfs_drop_leaf_ref(trans, root, cur); - BUG_ON(ret); - clean_tree_block(trans, root, cur); - break; - } - if (path->slots[*level] >= btrfs_header_nritems(cur)) { - clean_tree_block(trans, root, cur); - break; + if (wc->stage == DROP_REFERENCE && + wc->update_ref && wc->refs[level] > 1) { + BUG_ON(eb == root->node); + BUG_ON(path->slots[level] > 0); + if (level == 0) + btrfs_item_key_to_cpu(eb, &key, path->slots[level]); + else + btrfs_node_key_to_cpu(eb, &key, path->slots[level]); + if (btrfs_header_owner(eb) == root->root_key.objectid && + btrfs_comp_cpu_keys(&key, &wc->update_progress) >= 0) { + wc->stage = UPDATE_BACKREF; + wc->shared_level = level; } + } - bytenr = btrfs_node_blockptr(cur, path->slots[*level]); - blocksize = btrfs_level_size(root, *level - 1); - ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); + if (wc->stage == DROP_REFERENCE) { + if (wc->refs[level] > 1) + return 1; - next = read_tree_block(root, bytenr, blocksize, ptr_gen); - btrfs_tree_lock(next); - btrfs_set_lock_blocking(next); + if (path->locks[level] && !wc->keep_locks) { + btrfs_tree_unlock(eb); + path->locks[level] = 0; + } + return 0; + } - ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize, - &refs, &flags); + /* wc->stage == UPDATE_BACKREF */ + if (!(wc->flags[level] & flag)) { + BUG_ON(!path->locks[level]); + ret = btrfs_inc_ref(trans, root, eb, 1); BUG_ON(ret); - if (refs > 1) { - parent = path->nodes[*level]; - ret = btrfs_free_extent(trans, root, bytenr, - blocksize, parent->start, - btrfs_header_owner(parent), - *level - 1, 0); + ret = btrfs_dec_ref(trans, root, eb, 0); + BUG_ON(ret); + ret = btrfs_set_disk_extent_flags(trans, root, eb->start, + eb->len, flag, 0); + BUG_ON(ret); + wc->flags[level] |= flag; + } + + /* + * the block is shared by multiple trees, so it's not good to + * keep the tree lock + */ + if (path->locks[level] && level > 0) { + btrfs_tree_unlock(eb); + path->locks[level] = 0; + } + return 0; +} + +/* + * hepler to process tree block while walking up the tree. + * + * when wc->stage == DROP_REFERENCE, this function drops + * reference count on the block. + * + * when wc->stage == UPDATE_BACKREF, this function changes + * wc->stage back to DROP_REFERENCE if we changed wc->stage + * to UPDATE_BACKREF previously while processing the block. + * + * NOTE: return value 1 means we should stop walking up. + */ +static noinline int walk_up_proc(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct walk_control *wc) +{ + int ret = 0; + int level = wc->level; + struct extent_buffer *eb = path->nodes[level]; + u64 parent = 0; + + if (wc->stage == UPDATE_BACKREF) { + BUG_ON(wc->shared_level < level); + if (level < wc->shared_level) + goto out; + + BUG_ON(wc->refs[level] <= 1); + ret = find_next_key(path, level + 1, &wc->update_progress); + if (ret > 0) + wc->update_ref = 0; + + wc->stage = DROP_REFERENCE; + wc->shared_level = -1; + path->slots[level] = 0; + + /* + * check reference count again if the block isn't locked. + * we should start walking down the tree again if reference + * count is one. + */ + if (!path->locks[level]) { + BUG_ON(level == 0); + btrfs_tree_lock(eb); + btrfs_set_lock_blocking(eb); + path->locks[level] = 1; + + ret = btrfs_lookup_extent_info(trans, root, + eb->start, eb->len, + &wc->refs[level], + &wc->flags[level]); BUG_ON(ret); - path->slots[*level]++; - btrfs_tree_unlock(next); - free_extent_buffer(next); - continue; + BUG_ON(wc->refs[level] == 0); + if (wc->refs[level] == 1) { + btrfs_tree_unlock(eb); + path->locks[level] = 0; + return 1; + } + } else { + BUG_ON(level != 0); } + } - BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)); + /* wc->stage == DROP_REFERENCE */ + BUG_ON(wc->refs[level] > 1 && !path->locks[level]); - *level = btrfs_header_level(next); - path->nodes[*level] = next; - path->slots[*level] = 0; - path->locks[*level] = 1; - cond_resched(); + if (wc->refs[level] == 1) { + if (level == 0) { + if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) + ret = btrfs_dec_ref(trans, root, eb, 1); + else + ret = btrfs_dec_ref(trans, root, eb, 0); + BUG_ON(ret); + } + /* make block locked assertion in clean_tree_block happy */ + if (!path->locks[level] && + btrfs_header_generation(eb) == trans->transid) { + btrfs_tree_lock(eb); + btrfs_set_lock_blocking(eb); + path->locks[level] = 1; + } + clean_tree_block(trans, root, eb); + } + + if (eb == root->node) { + if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) + parent = eb->start; + else + BUG_ON(root->root_key.objectid != + btrfs_header_owner(eb)); + } else { + if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF) + parent = path->nodes[level + 1]->start; + else + BUG_ON(root->root_key.objectid != + btrfs_header_owner(path->nodes[level + 1])); } -out: - if (path->nodes[*level] == root->node) - parent = path->nodes[*level]; - else - parent = path->nodes[*level + 1]; - bytenr = path->nodes[*level]->start; - blocksize = path->nodes[*level]->len; - ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent->start, - btrfs_header_owner(parent), *level, 0); + ret = btrfs_free_extent(trans, root, eb->start, eb->len, parent, + root->root_key.objectid, level, 0); BUG_ON(ret); +out: + wc->refs[level] = 0; + wc->flags[level] = 0; + return ret; +} + +static noinline int walk_down_tree(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct walk_control *wc) +{ + struct extent_buffer *next; + struct extent_buffer *cur; + u64 bytenr; + u64 ptr_gen; + u32 blocksize; + int level = wc->level; + int ret; + + while (level >= 0) { + cur = path->nodes[level]; + BUG_ON(path->slots[level] >= btrfs_header_nritems(cur)); - if (path->locks[*level]) { - btrfs_tree_unlock(path->nodes[*level]); - path->locks[*level] = 0; + ret = walk_down_proc(trans, root, path, wc); + if (ret > 0) + break; + + if (level == 0) + break; + + bytenr = btrfs_node_blockptr(cur, path->slots[level]); + blocksize = btrfs_level_size(root, level - 1); + ptr_gen = btrfs_node_ptr_generation(cur, path->slots[level]); + + next = read_tree_block(root, bytenr, blocksize, ptr_gen); + btrfs_tree_lock(next); + btrfs_set_lock_blocking(next); + + level--; + BUG_ON(level != btrfs_header_level(next)); + path->nodes[level] = next; + path->slots[level] = 0; + path->locks[level] = 1; + wc->level = level; } - free_extent_buffer(path->nodes[*level]); - path->nodes[*level] = NULL; - *level += 1; - cond_resched(); return 0; } -/* - * helper for dropping snapshots. This walks back up the tree in the path - * to find the first node higher up where we haven't yet gone through - * all the slots - */ static noinline int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, - int *level, int max_level) + struct walk_control *wc, int max_level) { - struct btrfs_root_item *root_item = &root->root_item; - int i; - int slot; + int level = wc->level; int ret; - for (i = *level; i < max_level && path->nodes[i]; i++) { - slot = path->slots[i]; - if (slot + 1 < btrfs_header_nritems(path->nodes[i])) { - /* - * there is more work to do in this level. - * Update the drop_progress marker to reflect - * the work we've done so far, and then bump - * the slot number - */ - path->slots[i]++; - WARN_ON(*level == 0); - if (max_level == BTRFS_MAX_LEVEL) { - btrfs_node_key(path->nodes[i], - &root_item->drop_progress, - path->slots[i]); - root_item->drop_level = i; - } - *level = i; + path->slots[level] = btrfs_header_nritems(path->nodes[level]); + while (level < max_level && path->nodes[level]) { + wc->level = level; + if (path->slots[level] + 1 < + btrfs_header_nritems(path->nodes[level])) { + path->slots[level]++; return 0; } else { - struct extent_buffer *parent; - - /* - * this whole node is done, free our reference - * on it and go up one level - */ - if (path->nodes[*level] == root->node) - parent = path->nodes[*level]; - else - parent = path->nodes[*level + 1]; + ret = walk_up_proc(trans, root, path, wc); + if (ret > 0) + return 0; - clean_tree_block(trans, root, path->nodes[i]); - ret = btrfs_free_extent(trans, root, - path->nodes[i]->start, - path->nodes[i]->len, - parent->start, - btrfs_header_owner(parent), - *level, 0); - BUG_ON(ret); - if (path->locks[*level]) { - btrfs_tree_unlock(path->nodes[i]); - path->locks[i] = 0; + if (path->locks[level]) { + btrfs_tree_unlock(path->nodes[level]); + path->locks[level] = 0; } - free_extent_buffer(path->nodes[i]); - path->nodes[i] = NULL; - *level = i + 1; + free_extent_buffer(path->nodes[level]); + path->nodes[level] = NULL; + level++; } } return 1; } /* - * drop the reference count on the tree rooted at 'snap'. This traverses - * the tree freeing any blocks that have a ref count of zero after being - * decremented. + * drop a subvolume tree. + * + * this function traverses the tree freeing any blocks that only + * referenced by the tree. + * + * when a shared tree block is found. this function decreases its + * reference count by one. if update_ref is true, this function + * also make sure backrefs for the shared block and all lower level + * blocks are properly updated. */ -int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root - *root) +int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) { - int ret = 0; - int wret; - int level; struct btrfs_path *path; - int update_count; + struct btrfs_trans_handle *trans; + struct btrfs_root *tree_root = root->fs_info->tree_root; struct btrfs_root_item *root_item = &root->root_item; + struct walk_control *wc; + struct btrfs_key key; + int err = 0; + int ret; + int level; path = btrfs_alloc_path(); BUG_ON(!path); - level = btrfs_header_level(root->node); + wc = kzalloc(sizeof(*wc), GFP_NOFS); + BUG_ON(!wc); + + trans = btrfs_start_transaction(tree_root, 1); + if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { + level = btrfs_header_level(root->node); path->nodes[level] = btrfs_lock_root_node(root); btrfs_set_lock_blocking(path->nodes[level]); path->slots[level] = 0; path->locks[level] = 1; + memset(&wc->update_progress, 0, + sizeof(wc->update_progress)); } else { - struct btrfs_key key; - struct btrfs_disk_key found_key; - struct extent_buffer *node; - btrfs_disk_key_to_cpu(&key, &root_item->drop_progress); + memcpy(&wc->update_progress, &key, + sizeof(wc->update_progress)); + level = root_item->drop_level; + BUG_ON(level == 0); path->lowest_level = level; - wret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - if (wret < 0) { - ret = wret; + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + path->lowest_level = 0; + if (ret < 0) { + err = ret; goto out; } - node = path->nodes[level]; - btrfs_node_key(node, &found_key, path->slots[level]); - WARN_ON(memcmp(&found_key, &root_item->drop_progress, - sizeof(found_key))); + btrfs_node_key_to_cpu(path->nodes[level], &key, + path->slots[level]); + WARN_ON(memcmp(&key, &wc->update_progress, sizeof(key))); + /* * unlock our path, this is safe because only this * function is allowed to delete this snapshot */ btrfs_unlock_up_safe(path, 0); + + level = btrfs_header_level(root->node); + while (1) { + btrfs_tree_lock(path->nodes[level]); + btrfs_set_lock_blocking(path->nodes[level]); + + ret = btrfs_lookup_extent_info(trans, root, + path->nodes[level]->start, + path->nodes[level]->len, + &wc->refs[level], + &wc->flags[level]); + BUG_ON(ret); + BUG_ON(wc->refs[level] == 0); + + if (level == root_item->drop_level) + break; + + btrfs_tree_unlock(path->nodes[level]); + WARN_ON(wc->refs[level] != 1); + level--; + } } + + wc->level = level; + wc->shared_level = -1; + wc->stage = DROP_REFERENCE; + wc->update_ref = update_ref; + wc->keep_locks = 0; + while (1) { - unsigned long update; - wret = walk_down_tree(trans, root, path, &level); - if (wret > 0) + ret = walk_down_tree(trans, root, path, wc); + if (ret < 0) { + err = ret; break; - if (wret < 0) - ret = wret; + } - wret = walk_up_tree(trans, root, path, &level, - BTRFS_MAX_LEVEL); - if (wret > 0) + ret = walk_up_tree(trans, root, path, wc, BTRFS_MAX_LEVEL); + if (ret < 0) { + err = ret; break; - if (wret < 0) - ret = wret; - if (trans->transaction->in_commit || - trans->transaction->delayed_refs.flushing) { - ret = -EAGAIN; + } + + if (ret > 0) { + BUG_ON(wc->stage != DROP_REFERENCE); break; } - for (update_count = 0; update_count < 16; update_count++) { + + if (wc->stage == DROP_REFERENCE) { + level = wc->level; + btrfs_node_key(path->nodes[level], + &root_item->drop_progress, + path->slots[level]); + root_item->drop_level = level; + } + + BUG_ON(wc->level == 0); + if (trans->transaction->in_commit || + trans->transaction->delayed_refs.flushing) { + ret = btrfs_update_root(trans, tree_root, + &root->root_key, + root_item); + BUG_ON(ret); + + btrfs_end_transaction(trans, tree_root); + trans = btrfs_start_transaction(tree_root, 1); + } else { + unsigned long update; update = trans->delayed_ref_updates; trans->delayed_ref_updates = 0; if (update) - btrfs_run_delayed_refs(trans, root, update); - else - break; + btrfs_run_delayed_refs(trans, tree_root, + update); } } + btrfs_release_path(root, path); + BUG_ON(err); + + ret = btrfs_del_root(trans, tree_root, &root->root_key); + BUG_ON(ret); + + free_extent_buffer(root->node); + free_extent_buffer(root->commit_root); + kfree(root); out: + btrfs_end_transaction(trans, tree_root); + kfree(wc); btrfs_free_path(path); - return ret; + return err; } +/* + * drop subtree rooted at tree block 'node'. + * + * NOTE: this function will unlock and release tree block 'node' + */ int btrfs_drop_subtree(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *node, struct extent_buffer *parent) { struct btrfs_path *path; + struct walk_control *wc; int level; int parent_level; int ret = 0; int wret; + BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); + path = btrfs_alloc_path(); BUG_ON(!path); + wc = kzalloc(sizeof(*wc), GFP_NOFS); + BUG_ON(!wc); + btrfs_assert_tree_locked(parent); parent_level = btrfs_header_level(parent); extent_buffer_get(parent); @@ -4817,24 +5024,33 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans, btrfs_assert_tree_locked(node); level = btrfs_header_level(node); - extent_buffer_get(node); path->nodes[level] = node; path->slots[level] = 0; + path->locks[level] = 1; + + wc->refs[parent_level] = 1; + wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF; + wc->level = level; + wc->shared_level = -1; + wc->stage = DROP_REFERENCE; + wc->update_ref = 0; + wc->keep_locks = 1; while (1) { - wret = walk_down_tree(trans, root, path, &level); - if (wret < 0) + wret = walk_down_tree(trans, root, path, wc); + if (wret < 0) { ret = wret; - if (wret != 0) break; + } - wret = walk_up_tree(trans, root, path, &level, parent_level); + wret = walk_up_tree(trans, root, path, wc, parent_level); if (wret < 0) ret = wret; if (wret != 0) break; } + kfree(wc); btrfs_free_path(path); return ret; } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 126477eaecf..7c3cd248d8d 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -151,7 +151,10 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans, } if (end_pos > isize) { i_size_write(inode, end_pos); - btrfs_update_inode(trans, root, inode); + /* we've only changed i_size in ram, and we haven't updated + * the disk i_size. There is no need to log the inode + * at this time. + */ } err = btrfs_end_transaction(trans, root); out_unlock: diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index dbe1aabf96c..7ffa3d34ea1 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3580,12 +3580,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, owner = 1; BTRFS_I(inode)->block_group = btrfs_find_block_group(root, 0, alloc_hint, owner); - if ((mode & S_IFREG)) { - if (btrfs_test_opt(root, NODATASUM)) - BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM; - if (btrfs_test_opt(root, NODATACOW)) - BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW; - } key[0].objectid = objectid; btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY); @@ -3640,6 +3634,13 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, btrfs_inherit_iflags(inode, dir); + if ((mode & S_IFREG)) { + if (btrfs_test_opt(root, NODATASUM)) + BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM; + if (btrfs_test_opt(root, NODATACOW)) + BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW; + } + insert_inode_hash(inode); inode_tree_add(inode); return inode; @@ -5082,6 +5083,7 @@ static long btrfs_fallocate(struct inode *inode, int mode, u64 mask = BTRFS_I(inode)->root->sectorsize - 1; struct extent_map *em; struct btrfs_trans_handle *trans; + struct btrfs_root *root; int ret; alloc_start = offset & ~mask; @@ -5100,6 +5102,13 @@ static long btrfs_fallocate(struct inode *inode, int mode, goto out; } + root = BTRFS_I(inode)->root; + + ret = btrfs_check_data_free_space(root, inode, + alloc_end - alloc_start); + if (ret) + goto out; + locked_end = alloc_end - 1; while (1) { struct btrfs_ordered_extent *ordered; @@ -5107,7 +5116,7 @@ static long btrfs_fallocate(struct inode *inode, int mode, trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1); if (!trans) { ret = -EIO; - goto out; + goto out_free; } /* the extent lock is ordered inside the running @@ -5168,6 +5177,8 @@ static long btrfs_fallocate(struct inode *inode, int mode, GFP_NOFS); btrfs_end_transaction(trans, BTRFS_I(inode)->root); +out_free: + btrfs_free_reserved_data_space(root, inode, alloc_end - alloc_start); out: mutex_unlock(&inode->i_mutex); return ret; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index eff18f5b536..9f4db848db1 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1028,7 +1028,8 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, struct btrfs_file_extent_item); comp = btrfs_file_extent_compression(leaf, extent); type = btrfs_file_extent_type(leaf, extent); - if (type == BTRFS_FILE_EXTENT_REG) { + if (type == BTRFS_FILE_EXTENT_REG || + type == BTRFS_FILE_EXTENT_PREALLOC) { disko = btrfs_file_extent_disk_bytenr(leaf, extent); diskl = btrfs_file_extent_disk_num_bytes(leaf, @@ -1051,7 +1052,8 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, new_key.objectid = inode->i_ino; new_key.offset = key.offset + destoff - off; - if (type == BTRFS_FILE_EXTENT_REG) { + if (type == BTRFS_FILE_EXTENT_REG || + type == BTRFS_FILE_EXTENT_PREALLOC) { ret = btrfs_insert_empty_item(trans, root, path, &new_key, size); if (ret) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index b23dc209ae1..00839793477 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -1788,7 +1788,7 @@ static void merge_func(struct btrfs_work *work) btrfs_end_transaction(trans, root); } - btrfs_drop_dead_root(reloc_root); + btrfs_drop_snapshot(reloc_root, 0); if (atomic_dec_and_test(async->num_pending)) complete(async->done); @@ -2075,9 +2075,6 @@ static int do_relocation(struct btrfs_trans_handle *trans, ret = btrfs_drop_subtree(trans, root, eb, upper->eb); BUG_ON(ret); - - btrfs_tree_unlock(eb); - free_extent_buffer(eb); } if (!lowest) { btrfs_tree_unlock(upper->eb); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 4e83457ea25..2dbf1c1f56e 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -593,6 +593,7 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) return 0; } +#if 0 /* * when dropping snapshots, we generate a ton of delayed refs, and it makes * sense not to join the transaction while it is trying to flush the current @@ -681,6 +682,7 @@ int btrfs_drop_dead_root(struct btrfs_root *root) btrfs_btree_balance_dirty(tree_root, nr); return ret; } +#endif /* * new snapshots need to be created at a very specific time in the @@ -1081,7 +1083,7 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root) while (!list_empty(&list)) { root = list_entry(list.next, struct btrfs_root, root_list); list_del_init(&root->root_list); - btrfs_drop_dead_root(root); + btrfs_drop_snapshot(root, 0); } return 0; } diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES index b4868983942..92888aa9074 100644 --- a/fs/cifs/CHANGES +++ b/fs/cifs/CHANGES @@ -5,7 +5,11 @@ client generated ones by default (mount option "serverino" turned on by default if server supports it). Add forceuid and forcegid mount options (so that when negotiating unix extensions specifying which uid mounted does not immediately force the server's reported -uids to be overridden). +uids to be overridden). Add support for scope mount parm. Improve +hard link detection to use same inode for both. Do not set +read-only dos attribute on directories (for chmod) since Windows +explorer special cases this attribute bit for directories for +a different purpose. Version 1.58 ------------ diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c index 1b09f167006..20692fbfdb2 100644 --- a/fs/cifs/asn1.c +++ b/fs/cifs/asn1.c @@ -49,6 +49,7 @@ #define ASN1_OJI 6 /* Object Identifier */ #define ASN1_OJD 7 /* Object Description */ #define ASN1_EXT 8 /* External */ +#define ASN1_ENUM 10 /* Enumerated */ #define ASN1_SEQ 16 /* Sequence */ #define ASN1_SET 17 /* Set */ #define ASN1_NUMSTR 18 /* Numerical String */ @@ -78,10 +79,12 @@ #define SPNEGO_OID_LEN 7 #define NTLMSSP_OID_LEN 10 #define KRB5_OID_LEN 7 +#define KRB5U2U_OID_LEN 8 #define MSKRB5_OID_LEN 7 static unsigned long SPNEGO_OID[7] = { 1, 3, 6, 1, 5, 5, 2 }; static unsigned long NTLMSSP_OID[10] = { 1, 3, 6, 1, 4, 1, 311, 2, 2, 10 }; static unsigned long KRB5_OID[7] = { 1, 2, 840, 113554, 1, 2, 2 }; +static unsigned long KRB5U2U_OID[8] = { 1, 2, 840, 113554, 1, 2, 2, 3 }; static unsigned long MSKRB5_OID[7] = { 1, 2, 840, 48018, 1, 2, 2 }; /* @@ -122,6 +125,28 @@ asn1_octet_decode(struct asn1_ctx *ctx, unsigned char *ch) return 1; } +#if 0 /* will be needed later by spnego decoding/encoding of ntlmssp */ +static unsigned char +asn1_enum_decode(struct asn1_ctx *ctx, __le32 *val) +{ + unsigned char ch; + + if (ctx->pointer >= ctx->end) { + ctx->error = ASN1_ERR_DEC_EMPTY; + return 0; + } + + ch = *(ctx->pointer)++; /* ch has 0xa, ptr points to lenght octet */ + if ((ch) == ASN1_ENUM) /* if ch value is ENUM, 0xa */ + *val = *(++(ctx->pointer)); /* value has enum value */ + else + return 0; + + ctx->pointer++; + return 1; +} +#endif + static unsigned char asn1_tag_decode(struct asn1_ctx *ctx, unsigned int *tag) { @@ -476,10 +501,9 @@ decode_negTokenInit(unsigned char *security_blob, int length, unsigned int cls, con, tag, oidlen, rc; bool use_ntlmssp = false; bool use_kerberos = false; + bool use_kerberosu2u = false; bool use_mskerberos = false; - *secType = NTLM; /* BB eventually make Kerberos or NLTMSSP the default*/ - /* cifs_dump_mem(" Received SecBlob ", security_blob, length); */ asn1_open(&ctx, security_blob, length); @@ -515,6 +539,7 @@ decode_negTokenInit(unsigned char *security_blob, int length, return 0; } + /* SPNEGO */ if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { cFYI(1, ("Error decoding negTokenInit")); return 0; @@ -526,6 +551,7 @@ decode_negTokenInit(unsigned char *security_blob, int length, return 0; } + /* negTokenInit */ if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { cFYI(1, ("Error decoding negTokenInit")); return 0; @@ -537,6 +563,7 @@ decode_negTokenInit(unsigned char *security_blob, int length, return 0; } + /* sequence */ if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { cFYI(1, ("Error decoding 2nd part of negTokenInit")); return 0; @@ -548,6 +575,7 @@ decode_negTokenInit(unsigned char *security_blob, int length, return 0; } + /* sequence of */ if (asn1_header_decode (&ctx, &sequence_end, &cls, &con, &tag) == 0) { cFYI(1, ("Error decoding 2nd part of negTokenInit")); @@ -560,6 +588,7 @@ decode_negTokenInit(unsigned char *security_blob, int length, return 0; } + /* list of security mechanisms */ while (!asn1_eoc_decode(&ctx, sequence_end)) { rc = asn1_header_decode(&ctx, &end, &cls, &con, &tag); if (!rc) { @@ -576,11 +605,15 @@ decode_negTokenInit(unsigned char *security_blob, int length, if (compare_oid(oid, oidlen, MSKRB5_OID, MSKRB5_OID_LEN) && - !use_kerberos) + !use_mskerberos) use_mskerberos = true; + else if (compare_oid(oid, oidlen, KRB5U2U_OID, + KRB5U2U_OID_LEN) && + !use_kerberosu2u) + use_kerberosu2u = true; else if (compare_oid(oid, oidlen, KRB5_OID, KRB5_OID_LEN) && - !use_mskerberos) + !use_kerberos) use_kerberos = true; else if (compare_oid(oid, oidlen, NTLMSSP_OID, NTLMSSP_OID_LEN)) @@ -593,7 +626,12 @@ decode_negTokenInit(unsigned char *security_blob, int length, } } + /* mechlistMIC */ if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { + /* Check if we have reached the end of the blob, but with + no mechListMic (e.g. NTLMSSP instead of KRB5) */ + if (ctx.error == ASN1_ERR_DEC_EMPTY) + goto decode_negtoken_exit; cFYI(1, ("Error decoding last part negTokenInit exit3")); return 0; } else if ((cls != ASN1_CTX) || (con != ASN1_CON)) { @@ -602,6 +640,8 @@ decode_negTokenInit(unsigned char *security_blob, int length, cls, con, tag, end, *end)); return 0; } + + /* sequence */ if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { cFYI(1, ("Error decoding last part negTokenInit exit5")); return 0; @@ -611,6 +651,7 @@ decode_negTokenInit(unsigned char *security_blob, int length, cls, con, tag, end, *end)); } + /* sequence of */ if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { cFYI(1, ("Error decoding last part negTokenInit exit 7")); return 0; @@ -619,6 +660,8 @@ decode_negTokenInit(unsigned char *security_blob, int length, cls, con, tag, end, *end)); return 0; } + + /* general string */ if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { cFYI(1, ("Error decoding last part negTokenInit exit9")); return 0; @@ -630,13 +673,13 @@ decode_negTokenInit(unsigned char *security_blob, int length, } cFYI(1, ("Need to call asn1_octets_decode() function for %s", ctx.pointer)); /* is this UTF-8 or ASCII? */ - +decode_negtoken_exit: if (use_kerberos) *secType = Kerberos; else if (use_mskerberos) *secType = MSKerberos; else if (use_ntlmssp) - *secType = NTLMSSP; + *secType = RawNTLMSSP; return 1; } diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c index 4a4581cb2b5..051caecf7d6 100644 --- a/fs/cifs/cifs_spnego.c +++ b/fs/cifs/cifs_spnego.c @@ -86,6 +86,9 @@ struct key_type cifs_spnego_key_type = { /* strlen of ";user=" */ #define USER_KEY_LEN 6 +/* strlen of ";pid=0x" */ +#define PID_KEY_LEN 7 + /* get a key struct with a SPNEGO security blob, suitable for session setup */ struct key * cifs_get_spnego_key(struct cifsSesInfo *sesInfo) @@ -103,7 +106,8 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo) IP_KEY_LEN + INET6_ADDRSTRLEN + MAX_MECH_STR_LEN + UID_KEY_LEN + (sizeof(uid_t) * 2) + - USER_KEY_LEN + strlen(sesInfo->userName) + 1; + USER_KEY_LEN + strlen(sesInfo->userName) + + PID_KEY_LEN + (sizeof(pid_t) * 2) + 1; spnego_key = ERR_PTR(-ENOMEM); description = kzalloc(desc_len, GFP_KERNEL); @@ -141,6 +145,9 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo) dp = description + strlen(description); sprintf(dp, ";user=%s", sesInfo->userName); + dp = description + strlen(description); + sprintf(dp, ";pid=0x%x", current->pid); + cFYI(1, ("key description = %s", description)); spnego_key = request_key(&cifs_spnego_key_type, description, ""); diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index 1403b5d86a7..6941c22398a 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c @@ -327,7 +327,7 @@ static void dump_ace(struct cifs_ace *pace, char *end_of_acl) static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl, struct cifs_sid *pownersid, struct cifs_sid *pgrpsid, - struct inode *inode) + struct cifs_fattr *fattr) { int i; int num_aces = 0; @@ -340,7 +340,7 @@ static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl, if (!pdacl) { /* no DACL in the security descriptor, set all the permissions for user/group/other */ - inode->i_mode |= S_IRWXUGO; + fattr->cf_mode |= S_IRWXUGO; return; } @@ -357,7 +357,7 @@ static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl, /* reset rwx permissions for user/group/other. Also, if num_aces is 0 i.e. DACL has no ACEs, user/group/other have no permissions */ - inode->i_mode &= ~(S_IRWXUGO); + fattr->cf_mode &= ~(S_IRWXUGO); acl_base = (char *)pdacl; acl_size = sizeof(struct cifs_acl); @@ -379,17 +379,17 @@ static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl, if (compare_sids(&(ppace[i]->sid), pownersid)) access_flags_to_mode(ppace[i]->access_req, ppace[i]->type, - &(inode->i_mode), + &fattr->cf_mode, &user_mask); if (compare_sids(&(ppace[i]->sid), pgrpsid)) access_flags_to_mode(ppace[i]->access_req, ppace[i]->type, - &(inode->i_mode), + &fattr->cf_mode, &group_mask); if (compare_sids(&(ppace[i]->sid), &sid_everyone)) access_flags_to_mode(ppace[i]->access_req, ppace[i]->type, - &(inode->i_mode), + &fattr->cf_mode, &other_mask); /* memcpy((void *)(&(cifscred->aces[i])), @@ -464,7 +464,7 @@ static int parse_sid(struct cifs_sid *psid, char *end_of_acl) /* Convert CIFS ACL to POSIX form */ static int parse_sec_desc(struct cifs_ntsd *pntsd, int acl_len, - struct inode *inode) + struct cifs_fattr *fattr) { int rc; struct cifs_sid *owner_sid_ptr, *group_sid_ptr; @@ -472,7 +472,7 @@ static int parse_sec_desc(struct cifs_ntsd *pntsd, int acl_len, char *end_of_acl = ((char *)pntsd) + acl_len; __u32 dacloffset; - if ((inode == NULL) || (pntsd == NULL)) + if (pntsd == NULL) return -EIO; owner_sid_ptr = (struct cifs_sid *)((char *)pntsd + @@ -497,7 +497,7 @@ static int parse_sec_desc(struct cifs_ntsd *pntsd, int acl_len, if (dacloffset) parse_dacl(dacl_ptr, end_of_acl, owner_sid_ptr, - group_sid_ptr, inode); + group_sid_ptr, fattr); else cFYI(1, ("no ACL")); /* BB grant all or default perms? */ @@ -508,7 +508,6 @@ static int parse_sec_desc(struct cifs_ntsd *pntsd, int acl_len, memcpy((void *)(&(cifscred->gsid)), (void *)group_sid_ptr, sizeof(struct cifs_sid)); */ - return 0; } @@ -671,8 +670,9 @@ static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen, } /* Translate the CIFS ACL (simlar to NTFS ACL) for a file into mode bits */ -void acl_to_uid_mode(struct cifs_sb_info *cifs_sb, struct inode *inode, - const char *path, const __u16 *pfid) +void +cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr, + struct inode *inode, const char *path, const __u16 *pfid) { struct cifs_ntsd *pntsd = NULL; u32 acllen = 0; @@ -687,7 +687,7 @@ void acl_to_uid_mode(struct cifs_sb_info *cifs_sb, struct inode *inode, /* if we can retrieve the ACL, now parse Access Control Entries, ACEs */ if (pntsd) - rc = parse_sec_desc(pntsd, acllen, inode); + rc = parse_sec_desc(pntsd, acllen, fattr); if (rc) cFYI(1, ("parse sec desc failed rc = %d", rc)); diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 0d92114195a..44f30504b82 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -308,7 +308,6 @@ cifs_alloc_inode(struct super_block *sb) if (!cifs_inode) return NULL; cifs_inode->cifsAttrs = 0x20; /* default */ - atomic_set(&cifs_inode->inUse, 0); cifs_inode->time = 0; cifs_inode->write_behind_rc = 0; /* Until the file is open and we have gotten oplock @@ -333,6 +332,27 @@ cifs_destroy_inode(struct inode *inode) kmem_cache_free(cifs_inode_cachep, CIFS_I(inode)); } +static void +cifs_show_address(struct seq_file *s, struct TCP_Server_Info *server) +{ + seq_printf(s, ",addr="); + + switch (server->addr.sockAddr.sin_family) { + case AF_INET: + seq_printf(s, "%pI4", &server->addr.sockAddr.sin_addr.s_addr); + break; + case AF_INET6: + seq_printf(s, "%pI6", + &server->addr.sockAddr6.sin6_addr.s6_addr); + if (server->addr.sockAddr6.sin6_scope_id) + seq_printf(s, "%%%u", + server->addr.sockAddr6.sin6_scope_id); + break; + default: + seq_printf(s, "(unknown)"); + } +} + /* * cifs_show_options() is for displaying mount options in /proc/mounts. * Not all settable options are displayed but most of the important @@ -343,83 +363,64 @@ cifs_show_options(struct seq_file *s, struct vfsmount *m) { struct cifs_sb_info *cifs_sb; struct cifsTconInfo *tcon; - struct TCP_Server_Info *server; cifs_sb = CIFS_SB(m->mnt_sb); + tcon = cifs_sb->tcon; - if (cifs_sb) { - tcon = cifs_sb->tcon; - if (tcon) { - seq_printf(s, ",unc=%s", cifs_sb->tcon->treeName); - if (tcon->ses) { - if (tcon->ses->userName) - seq_printf(s, ",username=%s", - tcon->ses->userName); - if (tcon->ses->domainName) - seq_printf(s, ",domain=%s", - tcon->ses->domainName); - server = tcon->ses->server; - if (server) { - seq_printf(s, ",addr="); - switch (server->addr.sockAddr6. - sin6_family) { - case AF_INET6: - seq_printf(s, "%pI6", - &server->addr.sockAddr6.sin6_addr); - break; - case AF_INET: - seq_printf(s, "%pI4", - &server->addr.sockAddr.sin_addr.s_addr); - break; - } - } - } - if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID) || - !(tcon->unix_ext)) - seq_printf(s, ",uid=%d", cifs_sb->mnt_uid); - if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID) || - !(tcon->unix_ext)) - seq_printf(s, ",gid=%d", cifs_sb->mnt_gid); - if (!tcon->unix_ext) { - seq_printf(s, ",file_mode=0%o,dir_mode=0%o", + seq_printf(s, ",unc=%s", cifs_sb->tcon->treeName); + if (tcon->ses->userName) + seq_printf(s, ",username=%s", tcon->ses->userName); + if (tcon->ses->domainName) + seq_printf(s, ",domain=%s", tcon->ses->domainName); + + seq_printf(s, ",uid=%d", cifs_sb->mnt_uid); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID) + seq_printf(s, ",forceuid"); + + seq_printf(s, ",gid=%d", cifs_sb->mnt_gid); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID) + seq_printf(s, ",forcegid"); + + cifs_show_address(s, tcon->ses->server); + + if (!tcon->unix_ext) + seq_printf(s, ",file_mode=0%o,dir_mode=0%o", cifs_sb->mnt_file_mode, cifs_sb->mnt_dir_mode); - } - if (tcon->seal) - seq_printf(s, ",seal"); - if (tcon->nocase) - seq_printf(s, ",nocase"); - if (tcon->retry) - seq_printf(s, ",hard"); - } - if (cifs_sb->prepath) - seq_printf(s, ",prepath=%s", cifs_sb->prepath); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) - seq_printf(s, ",posixpaths"); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) - seq_printf(s, ",setuids"); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) - seq_printf(s, ",serverino"); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) - seq_printf(s, ",directio"); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) - seq_printf(s, ",nouser_xattr"); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR) - seq_printf(s, ",mapchars"); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) - seq_printf(s, ",sfu"); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) - seq_printf(s, ",nobrl"); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) - seq_printf(s, ",cifsacl"); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) - seq_printf(s, ",dynperm"); - if (m->mnt_sb->s_flags & MS_POSIXACL) - seq_printf(s, ",acl"); - - seq_printf(s, ",rsize=%d", cifs_sb->rsize); - seq_printf(s, ",wsize=%d", cifs_sb->wsize); - } + if (tcon->seal) + seq_printf(s, ",seal"); + if (tcon->nocase) + seq_printf(s, ",nocase"); + if (tcon->retry) + seq_printf(s, ",hard"); + if (cifs_sb->prepath) + seq_printf(s, ",prepath=%s", cifs_sb->prepath); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) + seq_printf(s, ",posixpaths"); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) + seq_printf(s, ",setuids"); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) + seq_printf(s, ",serverino"); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) + seq_printf(s, ",directio"); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) + seq_printf(s, ",nouser_xattr"); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR) + seq_printf(s, ",mapchars"); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) + seq_printf(s, ",sfu"); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) + seq_printf(s, ",nobrl"); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) + seq_printf(s, ",cifsacl"); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) + seq_printf(s, ",dynperm"); + if (m->mnt_sb->s_flags & MS_POSIXACL) + seq_printf(s, ",acl"); + + seq_printf(s, ",rsize=%d", cifs_sb->rsize); + seq_printf(s, ",wsize=%d", cifs_sb->wsize); + return 0; } @@ -535,9 +536,14 @@ static void cifs_umount_begin(struct super_block *sb) if (tcon == NULL) return; - lock_kernel(); read_lock(&cifs_tcp_ses_lock); - if (tcon->tc_count == 1) + if ((tcon->tc_count > 1) || (tcon->tidStatus == CifsExiting)) { + /* we have other mounts to same share or we have + already tried to force umount this and woken up + all waiting network requests, nothing to do */ + read_unlock(&cifs_tcp_ses_lock); + return; + } else if (tcon->tc_count == 1) tcon->tidStatus = CifsExiting; read_unlock(&cifs_tcp_ses_lock); @@ -552,9 +558,7 @@ static void cifs_umount_begin(struct super_block *sb) wake_up_all(&tcon->ses->server->response_q); msleep(1); } -/* BB FIXME - finish add checks for tidStatus BB */ - unlock_kernel(); return; } diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 9570a0e8023..6c170948300 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -24,6 +24,19 @@ #define ROOT_I 2 +/* + * ino_t is 32-bits on 32-bit arch. We have to squash the 64-bit value down + * so that it will fit. + */ +static inline ino_t +cifs_uniqueid_to_ino_t(u64 fileid) +{ + ino_t ino = (ino_t) fileid; + if (sizeof(ino_t) < sizeof(u64)) + ino ^= fileid >> (sizeof(u64)-sizeof(ino_t)) * 8; + return ino; +} + extern struct file_system_type cifs_fs_type; extern const struct address_space_operations cifs_addr_ops; extern const struct address_space_operations cifs_addr_ops_smallbuf; @@ -100,5 +113,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); extern const struct export_operations cifs_export_ops; #endif /* EXPERIMENTAL */ -#define CIFS_VERSION "1.59" +#define CIFS_VERSION "1.60" #endif /* _CIFSFS_H */ diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index a61ab772c6f..63f6cdfa563 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -83,7 +83,7 @@ enum securityEnum { NTLM, /* Legacy NTLM012 auth with NTLM hash */ NTLMv2, /* Legacy NTLM auth with NTLMv2 hash */ RawNTLMSSP, /* NTLMSSP without SPNEGO, NTLMv2 hash */ - NTLMSSP, /* NTLMSSP via SPNEGO, NTLMv2 hash */ +/* NTLMSSP, */ /* can use rawNTLMSSP instead of NTLMSSP via SPNEGO */ Kerberos, /* Kerberos via SPNEGO */ MSKerberos, /* MS Kerberos via SPNEGO */ }; @@ -364,13 +364,13 @@ struct cifsInodeInfo { struct list_head openFileList; int write_behind_rc; __u32 cifsAttrs; /* e.g. DOS archive bit, sparse, compressed, system */ - atomic_t inUse; /* num concurrent users (local openers cifs) of file*/ unsigned long time; /* jiffies of last update/check of inode */ bool clientCanCacheRead:1; /* read oplock */ bool clientCanCacheAll:1; /* read and writebehind oplock */ bool oplockPending:1; bool delete_pending:1; /* DELETE_ON_CLOSE is set */ u64 server_eof; /* current file size on server */ + u64 uniqueid; /* server inode number */ struct inode vfs_inode; }; @@ -472,6 +472,32 @@ struct dfs_info3_param { char *node_name; }; +/* + * common struct for holding inode info when searching for or updating an + * inode with new info + */ + +#define CIFS_FATTR_DFS_REFERRAL 0x1 +#define CIFS_FATTR_DELETE_PENDING 0x2 +#define CIFS_FATTR_NEED_REVAL 0x4 + +struct cifs_fattr { + u32 cf_flags; + u32 cf_cifsattrs; + u64 cf_uniqueid; + u64 cf_eof; + u64 cf_bytes; + uid_t cf_uid; + gid_t cf_gid; + umode_t cf_mode; + dev_t cf_rdev; + unsigned int cf_nlink; + unsigned int cf_dtype; + struct timespec cf_atime; + struct timespec cf_mtime; + struct timespec cf_ctime; +}; + static inline void free_dfs_info_param(struct dfs_info3_param *param) { if (param) { diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h index a785f69dbc9..2d07f890a84 100644 --- a/fs/cifs/cifspdu.h +++ b/fs/cifs/cifspdu.h @@ -2328,19 +2328,7 @@ struct file_attrib_tag { typedef struct { __le32 NextEntryOffset; __u32 ResumeKey; /* as with FileIndex - no need to convert */ - __le64 EndOfFile; - __le64 NumOfBytes; - __le64 LastStatusChange; /*SNIA specs DCE time for the 3 time fields */ - __le64 LastAccessTime; - __le64 LastModificationTime; - __le64 Uid; - __le64 Gid; - __le32 Type; - __le64 DevMajor; - __le64 DevMinor; - __le64 UniqueId; - __le64 Permissions; - __le64 Nlinks; + FILE_UNIX_BASIC_INFO basic; char FileName[1]; } __attribute__((packed)) FILE_UNIX_INFO; /* level 0x202 */ diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index f9452329bcc..da8fbf56599 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -74,7 +74,7 @@ extern unsigned int smbCalcSize(struct smb_hdr *ptr); extern unsigned int smbCalcSize_LE(struct smb_hdr *ptr); extern int decode_negTokenInit(unsigned char *security_blob, int length, enum securityEnum *secType); -extern int cifs_inet_pton(const int, const char *source, void *dst); +extern int cifs_convert_address(char *src, void *dst); extern int map_smb_to_linux_error(struct smb_hdr *smb, int logErr); extern void header_assemble(struct smb_hdr *, char /* command */ , const struct cifsTconInfo *, int /* length of @@ -98,9 +98,13 @@ extern struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time, extern int cifs_posix_open(char *full_path, struct inode **pinode, struct super_block *sb, int mode, int oflags, int *poplock, __u16 *pnetfid, int xid); -extern void posix_fill_in_inode(struct inode *tmp_inode, - FILE_UNIX_BASIC_INFO *pData, int isNewInode); -extern struct inode *cifs_new_inode(struct super_block *sb, __u64 *inum); +extern void cifs_unix_basic_to_fattr(struct cifs_fattr *fattr, + FILE_UNIX_BASIC_INFO *info, + struct cifs_sb_info *cifs_sb); +extern void cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr); +extern struct inode *cifs_iget(struct super_block *sb, + struct cifs_fattr *fattr); + extern int cifs_get_inode_info(struct inode **pinode, const unsigned char *search_path, FILE_ALL_INFO *pfile_info, @@ -108,8 +112,9 @@ extern int cifs_get_inode_info(struct inode **pinode, extern int cifs_get_inode_info_unix(struct inode **pinode, const unsigned char *search_path, struct super_block *sb, int xid); -extern void acl_to_uid_mode(struct cifs_sb_info *cifs_sb, struct inode *inode, - const char *path, const __u16 *pfid); +extern void cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, + struct cifs_fattr *fattr, struct inode *inode, + const char *path, const __u16 *pfid); extern int mode_to_acl(struct inode *inode, const char *path, __u64); extern int cifs_mount(struct super_block *, struct cifs_sb_info *, char *, @@ -215,7 +220,11 @@ struct cifs_unix_set_info_args { dev_t device; }; -extern int CIFSSMBUnixSetInfo(const int xid, struct cifsTconInfo *pTcon, +extern int CIFSSMBUnixSetFileInfo(const int xid, struct cifsTconInfo *tcon, + const struct cifs_unix_set_info_args *args, + u16 fid, u32 pid_of_opener); + +extern int CIFSSMBUnixSetPathInfo(const int xid, struct cifsTconInfo *pTcon, char *fileName, const struct cifs_unix_set_info_args *args, const struct nls_table *nls_codepage, diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index b84c61d5bca..922f5fe2084 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -594,7 +594,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses) else if (secFlags & CIFSSEC_MAY_KRB5) server->secType = Kerberos; else if (secFlags & CIFSSEC_MAY_NTLMSSP) - server->secType = NTLMSSP; + server->secType = RawNTLMSSP; else if (secFlags & CIFSSEC_MAY_LANMAN) server->secType = LANMAN; /* #ifdef CONFIG_CIFS_EXPERIMENTAL @@ -729,7 +729,7 @@ CIFSSMBTDis(const int xid, struct cifsTconInfo *tcon) * the tcon is no longer on the list, so no need to take lock before * checking this. */ - if (tcon->need_reconnect) + if ((tcon->need_reconnect) || (tcon->ses->need_reconnect)) return 0; rc = small_smb_init(SMB_COM_TREE_DISCONNECT, 0, tcon, @@ -5074,10 +5074,114 @@ SetAttrLgcyRetry: } #endif /* temporarily unneeded SetAttr legacy function */ +static void +cifs_fill_unix_set_info(FILE_UNIX_BASIC_INFO *data_offset, + const struct cifs_unix_set_info_args *args) +{ + u64 mode = args->mode; + + /* + * Samba server ignores set of file size to zero due to bugs in some + * older clients, but we should be precise - we use SetFileSize to + * set file size and do not want to truncate file size to zero + * accidently as happened on one Samba server beta by putting + * zero instead of -1 here + */ + data_offset->EndOfFile = cpu_to_le64(NO_CHANGE_64); + data_offset->NumOfBytes = cpu_to_le64(NO_CHANGE_64); + data_offset->LastStatusChange = cpu_to_le64(args->ctime); + data_offset->LastAccessTime = cpu_to_le64(args->atime); + data_offset->LastModificationTime = cpu_to_le64(args->mtime); + data_offset->Uid = cpu_to_le64(args->uid); + data_offset->Gid = cpu_to_le64(args->gid); + /* better to leave device as zero when it is */ + data_offset->DevMajor = cpu_to_le64(MAJOR(args->device)); + data_offset->DevMinor = cpu_to_le64(MINOR(args->device)); + data_offset->Permissions = cpu_to_le64(mode); + + if (S_ISREG(mode)) + data_offset->Type = cpu_to_le32(UNIX_FILE); + else if (S_ISDIR(mode)) + data_offset->Type = cpu_to_le32(UNIX_DIR); + else if (S_ISLNK(mode)) + data_offset->Type = cpu_to_le32(UNIX_SYMLINK); + else if (S_ISCHR(mode)) + data_offset->Type = cpu_to_le32(UNIX_CHARDEV); + else if (S_ISBLK(mode)) + data_offset->Type = cpu_to_le32(UNIX_BLOCKDEV); + else if (S_ISFIFO(mode)) + data_offset->Type = cpu_to_le32(UNIX_FIFO); + else if (S_ISSOCK(mode)) + data_offset->Type = cpu_to_le32(UNIX_SOCKET); +} + int -CIFSSMBUnixSetInfo(const int xid, struct cifsTconInfo *tcon, char *fileName, - const struct cifs_unix_set_info_args *args, - const struct nls_table *nls_codepage, int remap) +CIFSSMBUnixSetFileInfo(const int xid, struct cifsTconInfo *tcon, + const struct cifs_unix_set_info_args *args, + u16 fid, u32 pid_of_opener) +{ + struct smb_com_transaction2_sfi_req *pSMB = NULL; + FILE_UNIX_BASIC_INFO *data_offset; + int rc = 0; + u16 params, param_offset, offset, byte_count, count; + + cFYI(1, ("Set Unix Info (via SetFileInfo)")); + rc = small_smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB); + + if (rc) + return rc; + + pSMB->hdr.Pid = cpu_to_le16((__u16)pid_of_opener); + pSMB->hdr.PidHigh = cpu_to_le16((__u16)(pid_of_opener >> 16)); + + params = 6; + pSMB->MaxSetupCount = 0; + pSMB->Reserved = 0; + pSMB->Flags = 0; + pSMB->Timeout = 0; + pSMB->Reserved2 = 0; + param_offset = offsetof(struct smb_com_transaction2_sfi_req, Fid) - 4; + offset = param_offset + params; + + data_offset = (FILE_UNIX_BASIC_INFO *) + ((char *)(&pSMB->hdr.Protocol) + offset); + count = sizeof(FILE_UNIX_BASIC_INFO); + + pSMB->MaxParameterCount = cpu_to_le16(2); + /* BB find max SMB PDU from sess */ + pSMB->MaxDataCount = cpu_to_le16(1000); + pSMB->SetupCount = 1; + pSMB->Reserved3 = 0; + pSMB->SubCommand = cpu_to_le16(TRANS2_SET_FILE_INFORMATION); + byte_count = 3 /* pad */ + params + count; + pSMB->DataCount = cpu_to_le16(count); + pSMB->ParameterCount = cpu_to_le16(params); + pSMB->TotalDataCount = pSMB->DataCount; + pSMB->TotalParameterCount = pSMB->ParameterCount; + pSMB->ParameterOffset = cpu_to_le16(param_offset); + pSMB->DataOffset = cpu_to_le16(offset); + pSMB->Fid = fid; + pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_UNIX_BASIC); + pSMB->Reserved4 = 0; + pSMB->hdr.smb_buf_length += byte_count; + pSMB->ByteCount = cpu_to_le16(byte_count); + + cifs_fill_unix_set_info(data_offset, args); + + rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0); + if (rc) + cFYI(1, ("Send error in Set Time (SetFileInfo) = %d", rc)); + + /* Note: On -EAGAIN error only caller can retry on handle based calls + since file handle passed in no longer valid */ + + return rc; +} + +int +CIFSSMBUnixSetPathInfo(const int xid, struct cifsTconInfo *tcon, char *fileName, + const struct cifs_unix_set_info_args *args, + const struct nls_table *nls_codepage, int remap) { TRANSACTION2_SPI_REQ *pSMB = NULL; TRANSACTION2_SPI_RSP *pSMBr = NULL; @@ -5086,7 +5190,6 @@ CIFSSMBUnixSetInfo(const int xid, struct cifsTconInfo *tcon, char *fileName, int bytes_returned = 0; FILE_UNIX_BASIC_INFO *data_offset; __u16 params, param_offset, offset, count, byte_count; - __u64 mode = args->mode; cFYI(1, ("In SetUID/GID/Mode")); setPermsRetry: @@ -5137,38 +5240,8 @@ setPermsRetry: pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_UNIX_BASIC); pSMB->Reserved4 = 0; pSMB->hdr.smb_buf_length += byte_count; - /* Samba server ignores set of file size to zero due to bugs in some - older clients, but we should be precise - we use SetFileSize to - set file size and do not want to truncate file size to zero - accidently as happened on one Samba server beta by putting - zero instead of -1 here */ - data_offset->EndOfFile = cpu_to_le64(NO_CHANGE_64); - data_offset->NumOfBytes = cpu_to_le64(NO_CHANGE_64); - data_offset->LastStatusChange = cpu_to_le64(args->ctime); - data_offset->LastAccessTime = cpu_to_le64(args->atime); - data_offset->LastModificationTime = cpu_to_le64(args->mtime); - data_offset->Uid = cpu_to_le64(args->uid); - data_offset->Gid = cpu_to_le64(args->gid); - /* better to leave device as zero when it is */ - data_offset->DevMajor = cpu_to_le64(MAJOR(args->device)); - data_offset->DevMinor = cpu_to_le64(MINOR(args->device)); - data_offset->Permissions = cpu_to_le64(mode); - - if (S_ISREG(mode)) - data_offset->Type = cpu_to_le32(UNIX_FILE); - else if (S_ISDIR(mode)) - data_offset->Type = cpu_to_le32(UNIX_DIR); - else if (S_ISLNK(mode)) - data_offset->Type = cpu_to_le32(UNIX_SYMLINK); - else if (S_ISCHR(mode)) - data_offset->Type = cpu_to_le32(UNIX_CHARDEV); - else if (S_ISBLK(mode)) - data_offset->Type = cpu_to_le32(UNIX_BLOCKDEV); - else if (S_ISFIFO(mode)) - data_offset->Type = cpu_to_le32(UNIX_FIFO); - else if (S_ISSOCK(mode)) - data_offset->Type = cpu_to_le32(UNIX_SOCKET); + cifs_fill_unix_set_info(data_offset, args); pSMB->ByteCount = cpu_to_le16(byte_count); rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 97f4311b9a8..e16d7592116 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -70,7 +70,6 @@ struct smb_vol { mode_t file_mode; mode_t dir_mode; unsigned secFlg; - bool rw:1; bool retry:1; bool intr:1; bool setuids:1; @@ -832,7 +831,6 @@ cifs_parse_mount_options(char *options, const char *devname, vol->dir_mode = vol->file_mode = S_IRUGO | S_IXUGO | S_IWUSR; /* vol->retry default is 0 (i.e. "soft" limited retry not hard retry) */ - vol->rw = true; /* default is always to request posix paths. */ vol->posix_paths = 1; /* default to using server inode numbers where available */ @@ -1199,7 +1197,9 @@ cifs_parse_mount_options(char *options, const char *devname, } else if (strnicmp(data, "guest", 5) == 0) { /* ignore */ } else if (strnicmp(data, "rw", 2) == 0) { - vol->rw = true; + /* ignore */ + } else if (strnicmp(data, "ro", 2) == 0) { + /* ignore */ } else if (strnicmp(data, "noblocksend", 11) == 0) { vol->noblocksnd = 1; } else if (strnicmp(data, "noautotune", 10) == 0) { @@ -1218,8 +1218,6 @@ cifs_parse_mount_options(char *options, const char *devname, parse these options again and set anything and it is ok to just ignore them */ continue; - } else if (strnicmp(data, "ro", 2) == 0) { - vol->rw = false; } else if (strnicmp(data, "hard", 4) == 0) { vol->retry = 1; } else if (strnicmp(data, "soft", 4) == 0) { @@ -1386,8 +1384,10 @@ cifs_find_tcp_session(struct sockaddr_storage *addr) server->addr.sockAddr.sin_addr.s_addr)) continue; else if (addr->ss_family == AF_INET6 && - !ipv6_addr_equal(&server->addr.sockAddr6.sin6_addr, - &addr6->sin6_addr)) + (!ipv6_addr_equal(&server->addr.sockAddr6.sin6_addr, + &addr6->sin6_addr) || + server->addr.sockAddr6.sin6_scope_id != + addr6->sin6_scope_id)) continue; ++server->srv_count; @@ -1433,28 +1433,15 @@ cifs_get_tcp_session(struct smb_vol *volume_info) memset(&addr, 0, sizeof(struct sockaddr_storage)); - if (volume_info->UNCip && volume_info->UNC) { - rc = cifs_inet_pton(AF_INET, volume_info->UNCip, - &sin_server->sin_addr.s_addr); - - if (rc <= 0) { - /* not ipv4 address, try ipv6 */ - rc = cifs_inet_pton(AF_INET6, volume_info->UNCip, - &sin_server6->sin6_addr.in6_u); - if (rc > 0) - addr.ss_family = AF_INET6; - } else { - addr.ss_family = AF_INET; - } + cFYI(1, ("UNC: %s ip: %s", volume_info->UNC, volume_info->UNCip)); - if (rc <= 0) { + if (volume_info->UNCip && volume_info->UNC) { + rc = cifs_convert_address(volume_info->UNCip, &addr); + if (!rc) { /* we failed translating address */ rc = -EINVAL; goto out_err; } - - cFYI(1, ("UNC: %s ip: %s", volume_info->UNC, - volume_info->UNCip)); } else if (volume_info->UNCip) { /* BB using ip addr as tcp_ses name to connect to the DFS root below */ @@ -1513,14 +1500,14 @@ cifs_get_tcp_session(struct smb_vol *volume_info) cFYI(1, ("attempting ipv6 connect")); /* BB should we allow ipv6 on port 139? */ /* other OS never observed in Wild doing 139 with v6 */ + sin_server6->sin6_port = htons(volume_info->port); memcpy(&tcp_ses->addr.sockAddr6, sin_server6, sizeof(struct sockaddr_in6)); - sin_server6->sin6_port = htons(volume_info->port); rc = ipv6_connect(tcp_ses); } else { + sin_server->sin_port = htons(volume_info->port); memcpy(&tcp_ses->addr.sockAddr, sin_server, sizeof(struct sockaddr_in)); - sin_server->sin_port = htons(volume_info->port); rc = ipv4_connect(tcp_ses); } if (rc < 0) { diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 3758965d73d..4326ffd90fa 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -188,6 +188,7 @@ int cifs_posix_open(char *full_path, struct inode **pinode, FILE_UNIX_BASIC_INFO *presp_data; __u32 posix_flags = 0; struct cifs_sb_info *cifs_sb = CIFS_SB(sb); + struct cifs_fattr fattr; cFYI(1, ("posix open %s", full_path)); @@ -236,22 +237,21 @@ int cifs_posix_open(char *full_path, struct inode **pinode, if (presp_data->Type == cpu_to_le32(-1)) goto posix_open_ret; /* open ok, caller does qpathinfo */ - /* get new inode and set it up */ if (!pinode) goto posix_open_ret; /* caller does not need info */ + cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb); + + /* get new inode and set it up */ if (*pinode == NULL) { - __u64 unique_id = le64_to_cpu(presp_data->UniqueId); - *pinode = cifs_new_inode(sb, &unique_id); + *pinode = cifs_iget(sb, &fattr); + if (!*pinode) { + rc = -ENOMEM; + goto posix_open_ret; + } + } else { + cifs_fattr_to_inode(*pinode, &fattr); } - /* else an inode was passed in. Update its info, don't create one */ - - /* We do not need to close the file if new_inode fails since - the caller will retry qpathinfo as long as inode is null */ - if (*pinode == NULL) - goto posix_open_ret; - - posix_fill_in_inode(*pinode, presp_data, 1); cifs_fill_fileinfo(*pinode, *pnetfid, cifs_sb->tcon, write_only); @@ -307,8 +307,9 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, full_path = build_path_from_dentry(direntry); if (full_path == NULL) { + rc = -ENOMEM; FreeXid(xid); - return -ENOMEM; + return rc; } if (oplockEnabled) @@ -424,9 +425,10 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, args.uid = NO_CHANGE_64; args.gid = NO_CHANGE_64; } - CIFSSMBUnixSetInfo(xid, tcon, full_path, &args, - cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); + CIFSSMBUnixSetPathInfo(xid, tcon, full_path, &args, + cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); } else { /* BB implement mode setting via Windows security descriptors e.g. */ @@ -514,10 +516,10 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode, args.uid = NO_CHANGE_64; args.gid = NO_CHANGE_64; } - rc = CIFSSMBUnixSetInfo(xid, pTcon, full_path, - &args, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + rc = CIFSSMBUnixSetPathInfo(xid, pTcon, full_path, &args, + cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); if (!rc) { rc = cifs_get_inode_info_unix(&newinode, full_path, @@ -540,8 +542,9 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode, buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL); if (buf == NULL) { kfree(full_path); + rc = -ENOMEM; FreeXid(xid); - return -ENOMEM; + return rc; } rc = CIFSSMBOpen(xid, pTcon, full_path, @@ -641,6 +644,15 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, } } + /* + * O_EXCL: optimize away the lookup, but don't hash the dentry. Let + * the VFS handle the create. + */ + if (nd->flags & LOOKUP_EXCL) { + d_instantiate(direntry, NULL); + return 0; + } + /* can not grab the rename sem here since it would deadlock in the cases (beginning of sys_rename itself) in which we already have the sb rename sem */ diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c index df4a306f697..87948147d7e 100644 --- a/fs/cifs/dns_resolve.c +++ b/fs/cifs/dns_resolve.c @@ -35,26 +35,11 @@ * 0 - name is not IP */ static int -is_ip(const char *name) +is_ip(char *name) { - int rc; - struct sockaddr_in sin_server; - struct sockaddr_in6 sin_server6; - - rc = cifs_inet_pton(AF_INET, name, - &sin_server.sin_addr.s_addr); - - if (rc <= 0) { - /* not ipv4 address, try ipv6 */ - rc = cifs_inet_pton(AF_INET6, name, - &sin_server6.sin6_addr.in6_u); - if (rc > 0) - return 1; - } else { - return 1; - } - /* we failed translating address */ - return 0; + struct sockaddr_storage ss; + + return cifs_convert_address(name, &ss); } static int @@ -72,7 +57,7 @@ dns_resolver_instantiate(struct key *key, const void *data, ip[datalen] = '\0'; /* make sure this looks like an address */ - if (!is_ip((const char *) ip)) { + if (!is_ip(ip)) { kfree(ip); return -EINVAL; } diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 06866841b97..c34b7f8a217 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -300,14 +300,16 @@ int cifs_open(struct inode *inode, struct file *file) pCifsInode = CIFS_I(file->f_path.dentry->d_inode); pCifsFile = cifs_fill_filedata(file); if (pCifsFile) { + rc = 0; FreeXid(xid); - return 0; + return rc; } full_path = build_path_from_dentry(file->f_path.dentry); if (full_path == NULL) { + rc = -ENOMEM; FreeXid(xid); - return -ENOMEM; + return rc; } cFYI(1, ("inode = 0x%p file flags are 0x%x for %s", @@ -446,9 +448,9 @@ int cifs_open(struct inode *inode, struct file *file) .mtime = NO_CHANGE_64, .device = 0, }; - CIFSSMBUnixSetInfo(xid, tcon, full_path, &args, - cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & + CIFSSMBUnixSetPathInfo(xid, tcon, full_path, &args, + cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); } } @@ -491,11 +493,12 @@ static int cifs_reopen_file(struct file *file, bool can_flush) return -EBADF; xid = GetXid(); - mutex_unlock(&pCifsFile->fh_mutex); + mutex_lock(&pCifsFile->fh_mutex); if (!pCifsFile->invalidHandle) { - mutex_lock(&pCifsFile->fh_mutex); + mutex_unlock(&pCifsFile->fh_mutex); + rc = 0; FreeXid(xid); - return 0; + return rc; } if (file->f_path.dentry == NULL) { @@ -524,7 +527,7 @@ static int cifs_reopen_file(struct file *file, bool can_flush) if (full_path == NULL) { rc = -ENOMEM; reopen_error_exit: - mutex_lock(&pCifsFile->fh_mutex); + mutex_unlock(&pCifsFile->fh_mutex); FreeXid(xid); return rc; } @@ -566,14 +569,14 @@ reopen_error_exit: cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); if (rc) { - mutex_lock(&pCifsFile->fh_mutex); + mutex_unlock(&pCifsFile->fh_mutex); cFYI(1, ("cifs_open returned 0x%x", rc)); cFYI(1, ("oplock: %d", oplock)); } else { reopen_success: pCifsFile->netfid = netfid; pCifsFile->invalidHandle = false; - mutex_lock(&pCifsFile->fh_mutex); + mutex_unlock(&pCifsFile->fh_mutex); pCifsInode = CIFS_I(inode); if (pCifsInode) { if (can_flush) { @@ -845,8 +848,9 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock) tcon = cifs_sb->tcon; if (file->private_data == NULL) { + rc = -EBADF; FreeXid(xid); - return -EBADF; + return rc; } netfid = ((struct cifsFileInfo *)file->private_data)->netfid; @@ -1805,8 +1809,9 @@ ssize_t cifs_user_read(struct file *file, char __user *read_data, pTcon = cifs_sb->tcon; if (file->private_data == NULL) { + rc = -EBADF; FreeXid(xid); - return -EBADF; + return rc; } open_file = (struct cifsFileInfo *)file->private_data; @@ -1885,8 +1890,9 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size, pTcon = cifs_sb->tcon; if (file->private_data == NULL) { + rc = -EBADF; FreeXid(xid); - return -EBADF; + return rc; } open_file = (struct cifsFileInfo *)file->private_data; @@ -2019,8 +2025,9 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, xid = GetXid(); if (file->private_data == NULL) { + rc = -EBADF; FreeXid(xid); - return -EBADF; + return rc; } open_file = (struct cifsFileInfo *)file->private_data; cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); @@ -2185,8 +2192,9 @@ static int cifs_readpage(struct file *file, struct page *page) xid = GetXid(); if (file->private_data == NULL) { + rc = -EBADF; FreeXid(xid); - return -EBADF; + return rc; } cFYI(1, ("readpage %p at offset %d 0x%x\n", diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index fad882b075b..18afe57b246 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -77,239 +77,202 @@ static void cifs_set_ops(struct inode *inode, const bool is_dfs_referral) } } -static void cifs_unix_info_to_inode(struct inode *inode, - FILE_UNIX_BASIC_INFO *info, int force_uid_gid) +/* populate an inode with info from a cifs_fattr struct */ +void +cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr) { + struct cifsInodeInfo *cifs_i = CIFS_I(inode); struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); - struct cifsInodeInfo *cifsInfo = CIFS_I(inode); - __u64 num_of_bytes = le64_to_cpu(info->NumOfBytes); - __u64 end_of_file = le64_to_cpu(info->EndOfFile); + unsigned long oldtime = cifs_i->time; + + inode->i_atime = fattr->cf_atime; + inode->i_mtime = fattr->cf_mtime; + inode->i_ctime = fattr->cf_ctime; + inode->i_rdev = fattr->cf_rdev; + inode->i_nlink = fattr->cf_nlink; + inode->i_uid = fattr->cf_uid; + inode->i_gid = fattr->cf_gid; + + /* if dynperm is set, don't clobber existing mode */ + if (inode->i_state & I_NEW || + !(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM)) + inode->i_mode = fattr->cf_mode; + + cifs_i->cifsAttrs = fattr->cf_cifsattrs; + cifs_i->uniqueid = fattr->cf_uniqueid; + + if (fattr->cf_flags & CIFS_FATTR_NEED_REVAL) + cifs_i->time = 0; + else + cifs_i->time = jiffies; + + cFYI(1, ("inode 0x%p old_time=%ld new_time=%ld", inode, + oldtime, cifs_i->time)); - inode->i_atime = cifs_NTtimeToUnix(info->LastAccessTime); - inode->i_mtime = - cifs_NTtimeToUnix(info->LastModificationTime); - inode->i_ctime = cifs_NTtimeToUnix(info->LastStatusChange); - inode->i_mode = le64_to_cpu(info->Permissions); + cifs_i->delete_pending = fattr->cf_flags & CIFS_FATTR_DELETE_PENDING; + + /* + * Can't safely change the file size here if the client is writing to + * it due to potential races. + */ + spin_lock(&inode->i_lock); + if (is_size_safe_to_change(cifs_i, fattr->cf_eof)) { + i_size_write(inode, fattr->cf_eof); + + /* + * i_blocks is not related to (i_size / i_blksize), + * but instead 512 byte (2**9) size is required for + * calculating num blocks. + */ + inode->i_blocks = (512 - 1 + fattr->cf_bytes) >> 9; + } + spin_unlock(&inode->i_lock); + + cifs_set_ops(inode, fattr->cf_flags & CIFS_FATTR_DFS_REFERRAL); +} + +/* Fill a cifs_fattr struct with info from FILE_UNIX_BASIC_INFO. */ +void +cifs_unix_basic_to_fattr(struct cifs_fattr *fattr, FILE_UNIX_BASIC_INFO *info, + struct cifs_sb_info *cifs_sb) +{ + memset(fattr, 0, sizeof(*fattr)); + fattr->cf_uniqueid = le64_to_cpu(info->UniqueId); + fattr->cf_bytes = le64_to_cpu(info->NumOfBytes); + fattr->cf_eof = le64_to_cpu(info->EndOfFile); + + fattr->cf_atime = cifs_NTtimeToUnix(info->LastAccessTime); + fattr->cf_mtime = cifs_NTtimeToUnix(info->LastModificationTime); + fattr->cf_ctime = cifs_NTtimeToUnix(info->LastStatusChange); + fattr->cf_mode = le64_to_cpu(info->Permissions); /* * Since we set the inode type below we need to mask off * to avoid strange results if bits set above. */ - inode->i_mode &= ~S_IFMT; + fattr->cf_mode &= ~S_IFMT; switch (le32_to_cpu(info->Type)) { case UNIX_FILE: - inode->i_mode |= S_IFREG; + fattr->cf_mode |= S_IFREG; + fattr->cf_dtype = DT_REG; break; case UNIX_SYMLINK: - inode->i_mode |= S_IFLNK; + fattr->cf_mode |= S_IFLNK; + fattr->cf_dtype = DT_LNK; break; case UNIX_DIR: - inode->i_mode |= S_IFDIR; + fattr->cf_mode |= S_IFDIR; + fattr->cf_dtype = DT_DIR; break; case UNIX_CHARDEV: - inode->i_mode |= S_IFCHR; - inode->i_rdev = MKDEV(le64_to_cpu(info->DevMajor), - le64_to_cpu(info->DevMinor) & MINORMASK); + fattr->cf_mode |= S_IFCHR; + fattr->cf_dtype = DT_CHR; + fattr->cf_rdev = MKDEV(le64_to_cpu(info->DevMajor), + le64_to_cpu(info->DevMinor) & MINORMASK); break; case UNIX_BLOCKDEV: - inode->i_mode |= S_IFBLK; - inode->i_rdev = MKDEV(le64_to_cpu(info->DevMajor), - le64_to_cpu(info->DevMinor) & MINORMASK); + fattr->cf_mode |= S_IFBLK; + fattr->cf_dtype = DT_BLK; + fattr->cf_rdev = MKDEV(le64_to_cpu(info->DevMajor), + le64_to_cpu(info->DevMinor) & MINORMASK); break; case UNIX_FIFO: - inode->i_mode |= S_IFIFO; + fattr->cf_mode |= S_IFIFO; + fattr->cf_dtype = DT_FIFO; break; case UNIX_SOCKET: - inode->i_mode |= S_IFSOCK; + fattr->cf_mode |= S_IFSOCK; + fattr->cf_dtype = DT_SOCK; break; default: /* safest to call it a file if we do not know */ - inode->i_mode |= S_IFREG; + fattr->cf_mode |= S_IFREG; + fattr->cf_dtype = DT_REG; cFYI(1, ("unknown type %d", le32_to_cpu(info->Type))); break; } - if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID) && - !force_uid_gid) - inode->i_uid = cifs_sb->mnt_uid; + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID) + fattr->cf_uid = cifs_sb->mnt_uid; else - inode->i_uid = le64_to_cpu(info->Uid); + fattr->cf_uid = le64_to_cpu(info->Uid); - if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID) && - !force_uid_gid) - inode->i_gid = cifs_sb->mnt_gid; + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID) + fattr->cf_gid = cifs_sb->mnt_gid; else - inode->i_gid = le64_to_cpu(info->Gid); - - inode->i_nlink = le64_to_cpu(info->Nlinks); - - cifsInfo->server_eof = end_of_file; - spin_lock(&inode->i_lock); - if (is_size_safe_to_change(cifsInfo, end_of_file)) { - /* - * We can not safely change the file size here if the client - * is writing to it due to potential races. - */ - i_size_write(inode, end_of_file); + fattr->cf_gid = le64_to_cpu(info->Gid); - /* - * i_blocks is not related to (i_size / i_blksize), - * but instead 512 byte (2**9) size is required for - * calculating num blocks. - */ - inode->i_blocks = (512 - 1 + num_of_bytes) >> 9; - } - spin_unlock(&inode->i_lock); + fattr->cf_nlink = le64_to_cpu(info->Nlinks); } - /* - * Needed to setup inode data for the directory which is the - * junction to the new submount (ie to setup the fake directory - * which represents a DFS referral) - */ -static void fill_fake_finddataunix(FILE_UNIX_BASIC_INFO *pfnd_dat, - struct super_block *sb) -{ - struct inode *pinode = NULL; - - memset(pfnd_dat, 0, sizeof(FILE_UNIX_BASIC_INFO)); - -/* __le64 pfnd_dat->EndOfFile = cpu_to_le64(0); - __le64 pfnd_dat->NumOfBytes = cpu_to_le64(0); - __u64 UniqueId = 0; */ - pfnd_dat->LastStatusChange = - cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME)); - pfnd_dat->LastAccessTime = - cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME)); - pfnd_dat->LastModificationTime = - cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME)); - pfnd_dat->Type = cpu_to_le32(UNIX_DIR); - pfnd_dat->Permissions = cpu_to_le64(S_IXUGO | S_IRWXU); - pfnd_dat->Nlinks = cpu_to_le64(2); - if (sb->s_root) - pinode = sb->s_root->d_inode; - if (pinode == NULL) - return; - - /* fill in default values for the remaining based on root - inode since we can not query the server for this inode info */ - pfnd_dat->DevMajor = cpu_to_le64(MAJOR(pinode->i_rdev)); - pfnd_dat->DevMinor = cpu_to_le64(MINOR(pinode->i_rdev)); - pfnd_dat->Uid = cpu_to_le64(pinode->i_uid); - pfnd_dat->Gid = cpu_to_le64(pinode->i_gid); -} - -/** - * cifs_new inode - create new inode, initialize, and hash it - * @sb - pointer to superblock - * @inum - if valid pointer and serverino is enabled, replace i_ino with val - * - * Create a new inode, initialize it for CIFS and hash it. Returns the new - * inode or NULL if one couldn't be allocated. + * Fill a cifs_fattr struct with fake inode info. * - * If the share isn't mounted with "serverino" or inum is a NULL pointer then - * we'll just use the inode number assigned by new_inode(). Note that this can - * mean i_ino collisions since the i_ino assigned by new_inode is not - * guaranteed to be unique. + * Needed to setup cifs_fattr data for the directory which is the + * junction to the new submount (ie to setup the fake directory + * which represents a DFS referral). */ -struct inode * -cifs_new_inode(struct super_block *sb, __u64 *inum) +void +cifs_create_dfs_fattr(struct cifs_fattr *fattr, struct super_block *sb) { - struct inode *inode; - - inode = new_inode(sb); - if (inode == NULL) - return NULL; - - /* - * BB: Is i_ino == 0 legal? Here, we assume that it is. If it isn't we - * stop passing inum as ptr. Are there sanity checks we can use to - * ensure that the server is really filling in that field? Also, - * if serverino is disabled, perhaps we should be using iunique()? - */ - if (inum && (CIFS_SB(sb)->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)) - inode->i_ino = (unsigned long) *inum; - - /* - * must set this here instead of cifs_alloc_inode since VFS will - * clobber i_flags - */ - if (sb->s_flags & MS_NOATIME) - inode->i_flags |= S_NOATIME | S_NOCMTIME; - - insert_inode_hash(inode); + struct cifs_sb_info *cifs_sb = CIFS_SB(sb); - return inode; + cFYI(1, ("creating fake fattr for DFS referral")); + + memset(fattr, 0, sizeof(*fattr)); + fattr->cf_mode = S_IFDIR | S_IXUGO | S_IRWXU; + fattr->cf_uid = cifs_sb->mnt_uid; + fattr->cf_gid = cifs_sb->mnt_gid; + fattr->cf_atime = CURRENT_TIME; + fattr->cf_ctime = CURRENT_TIME; + fattr->cf_mtime = CURRENT_TIME; + fattr->cf_nlink = 2; + fattr->cf_flags |= CIFS_FATTR_DFS_REFERRAL; } int cifs_get_inode_info_unix(struct inode **pinode, - const unsigned char *full_path, struct super_block *sb, int xid) + const unsigned char *full_path, + struct super_block *sb, int xid) { - int rc = 0; + int rc; FILE_UNIX_BASIC_INFO find_data; - struct cifsTconInfo *pTcon; - struct inode *inode; + struct cifs_fattr fattr; + struct cifsTconInfo *tcon; struct cifs_sb_info *cifs_sb = CIFS_SB(sb); - bool is_dfs_referral = false; - struct cifsInodeInfo *cifsInfo; - __u64 num_of_bytes; - __u64 end_of_file; - pTcon = cifs_sb->tcon; + tcon = cifs_sb->tcon; cFYI(1, ("Getting info on %s", full_path)); /* could have done a find first instead but this returns more info */ - rc = CIFSSMBUnixQPathInfo(xid, pTcon, full_path, &find_data, + rc = CIFSSMBUnixQPathInfo(xid, tcon, full_path, &find_data, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); - if (rc == -EREMOTE && !is_dfs_referral) { - is_dfs_referral = true; - cFYI(DBG2, ("DFS ref")); - /* for DFS, server does not give us real inode data */ - fill_fake_finddataunix(&find_data, sb); - rc = 0; - } else if (rc) - goto cgiiu_exit; - num_of_bytes = le64_to_cpu(find_data.NumOfBytes); - end_of_file = le64_to_cpu(find_data.EndOfFile); + if (!rc) { + cifs_unix_basic_to_fattr(&fattr, &find_data, cifs_sb); + } else if (rc == -EREMOTE) { + cifs_create_dfs_fattr(&fattr, sb); + rc = 0; + } else { + return rc; + } - /* get new inode */ if (*pinode == NULL) { - __u64 unique_id = le64_to_cpu(find_data.UniqueId); - *pinode = cifs_new_inode(sb, &unique_id); - if (*pinode == NULL) { + /* get new inode */ + *pinode = cifs_iget(sb, &fattr); + if (!*pinode) rc = -ENOMEM; - goto cgiiu_exit; - } + } else { + /* we already have inode, update it */ + cifs_fattr_to_inode(*pinode, &fattr); } - inode = *pinode; - cifsInfo = CIFS_I(inode); - - cFYI(1, ("Old time %ld", cifsInfo->time)); - cifsInfo->time = jiffies; - cFYI(1, ("New time %ld", cifsInfo->time)); - /* this is ok to set on every inode revalidate */ - atomic_set(&cifsInfo->inUse, 1); - - cifs_unix_info_to_inode(inode, &find_data, 0); - - if (num_of_bytes < end_of_file) - cFYI(1, ("allocation size less than end of file")); - cFYI(1, ("Size %ld and blocks %llu", - (unsigned long) inode->i_size, - (unsigned long long)inode->i_blocks)); - - cifs_set_ops(inode, is_dfs_referral); -cgiiu_exit: return rc; } -static int decode_sfu_inode(struct inode *inode, __u64 size, - const unsigned char *path, - struct cifs_sb_info *cifs_sb, int xid) +static int +cifs_sfu_type(struct cifs_fattr *fattr, const unsigned char *path, + struct cifs_sb_info *cifs_sb, int xid) { int rc; int oplock = 0; @@ -321,10 +284,15 @@ static int decode_sfu_inode(struct inode *inode, __u64 size, pbuf = buf; - if (size == 0) { - inode->i_mode |= S_IFIFO; + fattr->cf_mode &= ~S_IFMT; + + if (fattr->cf_eof == 0) { + fattr->cf_mode |= S_IFIFO; + fattr->cf_dtype = DT_FIFO; return 0; - } else if (size < 8) { + } else if (fattr->cf_eof < 8) { + fattr->cf_mode |= S_IFREG; + fattr->cf_dtype = DT_REG; return -EINVAL; /* EOPNOTSUPP? */ } @@ -336,42 +304,46 @@ static int decode_sfu_inode(struct inode *inode, __u64 size, if (rc == 0) { int buf_type = CIFS_NO_BUFFER; /* Read header */ - rc = CIFSSMBRead(xid, pTcon, - netfid, + rc = CIFSSMBRead(xid, pTcon, netfid, 24 /* length */, 0 /* offset */, &bytes_read, &pbuf, &buf_type); if ((rc == 0) && (bytes_read >= 8)) { if (memcmp("IntxBLK", pbuf, 8) == 0) { cFYI(1, ("Block device")); - inode->i_mode |= S_IFBLK; + fattr->cf_mode |= S_IFBLK; + fattr->cf_dtype = DT_BLK; if (bytes_read == 24) { /* we have enough to decode dev num */ __u64 mjr; /* major */ __u64 mnr; /* minor */ mjr = le64_to_cpu(*(__le64 *)(pbuf+8)); mnr = le64_to_cpu(*(__le64 *)(pbuf+16)); - inode->i_rdev = MKDEV(mjr, mnr); + fattr->cf_rdev = MKDEV(mjr, mnr); } } else if (memcmp("IntxCHR", pbuf, 8) == 0) { cFYI(1, ("Char device")); - inode->i_mode |= S_IFCHR; + fattr->cf_mode |= S_IFCHR; + fattr->cf_dtype = DT_CHR; if (bytes_read == 24) { /* we have enough to decode dev num */ __u64 mjr; /* major */ __u64 mnr; /* minor */ mjr = le64_to_cpu(*(__le64 *)(pbuf+8)); mnr = le64_to_cpu(*(__le64 *)(pbuf+16)); - inode->i_rdev = MKDEV(mjr, mnr); + fattr->cf_rdev = MKDEV(mjr, mnr); } } else if (memcmp("IntxLNK", pbuf, 7) == 0) { cFYI(1, ("Symlink")); - inode->i_mode |= S_IFLNK; + fattr->cf_mode |= S_IFLNK; + fattr->cf_dtype = DT_LNK; } else { - inode->i_mode |= S_IFREG; /* file? */ + fattr->cf_mode |= S_IFREG; /* file? */ + fattr->cf_dtype = DT_REG; rc = -EOPNOTSUPP; } } else { - inode->i_mode |= S_IFREG; /* then it is a file */ + fattr->cf_mode |= S_IFREG; /* then it is a file */ + fattr->cf_dtype = DT_REG; rc = -EOPNOTSUPP; /* or some unknown SFU type */ } CIFSSMBClose(xid, pTcon, netfid); @@ -381,9 +353,13 @@ static int decode_sfu_inode(struct inode *inode, __u64 size, #define SFBITS_MASK (S_ISVTX | S_ISGID | S_ISUID) /* SETFILEBITS valid bits */ -static int get_sfu_mode(struct inode *inode, - const unsigned char *path, - struct cifs_sb_info *cifs_sb, int xid) +/* + * Fetch mode bits as provided by SFU. + * + * FIXME: Doesn't this clobber the type bit we got from cifs_sfu_type ? + */ +static int cifs_sfu_mode(struct cifs_fattr *fattr, const unsigned char *path, + struct cifs_sb_info *cifs_sb, int xid) { #ifdef CONFIG_CIFS_XATTR ssize_t rc; @@ -391,68 +367,80 @@ static int get_sfu_mode(struct inode *inode, __u32 mode; rc = CIFSSMBQueryEA(xid, cifs_sb->tcon, path, "SETFILEBITS", - ea_value, 4 /* size of buf */, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); + ea_value, 4 /* size of buf */, cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); if (rc < 0) return (int)rc; else if (rc > 3) { mode = le32_to_cpu(*((__le32 *)ea_value)); - inode->i_mode &= ~SFBITS_MASK; - cFYI(1, ("special bits 0%o org mode 0%o", mode, inode->i_mode)); - inode->i_mode = (mode & SFBITS_MASK) | inode->i_mode; + fattr->cf_mode &= ~SFBITS_MASK; + cFYI(1, ("special bits 0%o org mode 0%o", mode, + fattr->cf_mode)); + fattr->cf_mode = (mode & SFBITS_MASK) | fattr->cf_mode; cFYI(1, ("special mode bits 0%o", mode)); - return 0; - } else { - return 0; } + + return 0; #else return -EOPNOTSUPP; #endif } -/* - * Needed to setup inode data for the directory which is the - * junction to the new submount (ie to setup the fake directory - * which represents a DFS referral) - */ -static void fill_fake_finddata(FILE_ALL_INFO *pfnd_dat, - struct super_block *sb) +/* Fill a cifs_fattr struct with info from FILE_ALL_INFO */ +void +cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info, + struct cifs_sb_info *cifs_sb, bool adjust_tz) { - memset(pfnd_dat, 0, sizeof(FILE_ALL_INFO)); - -/* __le64 pfnd_dat->AllocationSize = cpu_to_le64(0); - __le64 pfnd_dat->EndOfFile = cpu_to_le64(0); - __u8 pfnd_dat->DeletePending = 0; - __u8 pfnd_data->Directory = 0; - __le32 pfnd_dat->EASize = 0; - __u64 pfnd_dat->IndexNumber = 0; - __u64 pfnd_dat->IndexNumber1 = 0; */ - pfnd_dat->CreationTime = - cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME)); - pfnd_dat->LastAccessTime = - cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME)); - pfnd_dat->LastWriteTime = - cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME)); - pfnd_dat->ChangeTime = - cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME)); - pfnd_dat->Attributes = cpu_to_le32(ATTR_DIRECTORY); - pfnd_dat->NumberOfLinks = cpu_to_le32(2); + memset(fattr, 0, sizeof(*fattr)); + fattr->cf_cifsattrs = le32_to_cpu(info->Attributes); + if (info->DeletePending) + fattr->cf_flags |= CIFS_FATTR_DELETE_PENDING; + + if (info->LastAccessTime) + fattr->cf_atime = cifs_NTtimeToUnix(info->LastAccessTime); + else + fattr->cf_atime = CURRENT_TIME; + + fattr->cf_ctime = cifs_NTtimeToUnix(info->ChangeTime); + fattr->cf_mtime = cifs_NTtimeToUnix(info->LastWriteTime); + + if (adjust_tz) { + fattr->cf_ctime.tv_sec += cifs_sb->tcon->ses->server->timeAdj; + fattr->cf_mtime.tv_sec += cifs_sb->tcon->ses->server->timeAdj; + } + + fattr->cf_eof = le64_to_cpu(info->EndOfFile); + fattr->cf_bytes = le64_to_cpu(info->AllocationSize); + + if (fattr->cf_cifsattrs & ATTR_DIRECTORY) { + fattr->cf_mode = S_IFDIR | cifs_sb->mnt_dir_mode; + fattr->cf_dtype = DT_DIR; + } else { + fattr->cf_mode = S_IFREG | cifs_sb->mnt_file_mode; + fattr->cf_dtype = DT_REG; + + /* clear write bits if ATTR_READONLY is set */ + if (fattr->cf_cifsattrs & ATTR_READONLY) + fattr->cf_mode &= ~(S_IWUGO); + } + + fattr->cf_nlink = le32_to_cpu(info->NumberOfLinks); + + fattr->cf_uid = cifs_sb->mnt_uid; + fattr->cf_gid = cifs_sb->mnt_gid; } int cifs_get_inode_info(struct inode **pinode, const unsigned char *full_path, FILE_ALL_INFO *pfindData, struct super_block *sb, int xid, const __u16 *pfid) { - int rc = 0; - __u32 attr; - struct cifsInodeInfo *cifsInfo; + int rc = 0, tmprc; struct cifsTconInfo *pTcon; - struct inode *inode; struct cifs_sb_info *cifs_sb = CIFS_SB(sb); char *buf = NULL; bool adjustTZ = false; - bool is_dfs_referral = false; - umode_t default_mode; + struct cifs_fattr fattr; pTcon = cifs_sb->tcon; cFYI(1, ("Getting info on %s", full_path)); @@ -487,163 +475,82 @@ int cifs_get_inode_info(struct inode **pinode, adjustTZ = true; } } - /* dump_mem("\nQPathInfo return data",&findData, sizeof(findData)); */ - if (rc == -EREMOTE) { - is_dfs_referral = true; - fill_fake_finddata(pfindData, sb); + + if (!rc) { + cifs_all_info_to_fattr(&fattr, (FILE_ALL_INFO *) pfindData, + cifs_sb, adjustTZ); + } else if (rc == -EREMOTE) { + cifs_create_dfs_fattr(&fattr, sb); rc = 0; - } else if (rc) + } else { goto cgii_exit; + } - attr = le32_to_cpu(pfindData->Attributes); - - /* get new inode */ + /* + * If an inode wasn't passed in, then get the inode number + * + * Is an i_ino of zero legal? Can we use that to check if the server + * supports returning inode numbers? Are there other sanity checks we + * can use to ensure that the server is really filling in that field? + * + * We can not use the IndexNumber field by default from Windows or + * Samba (in ALL_INFO buf) but we can request it explicitly. The SNIA + * CIFS spec claims that this value is unique within the scope of a + * share, and the windows docs hint that it's actually unique + * per-machine. + * + * There may be higher info levels that work but are there Windows + * server or network appliances for which IndexNumber field is not + * guaranteed unique? + */ if (*pinode == NULL) { - __u64 inode_num; - __u64 *pinum = &inode_num; - - /* Is an i_ino of zero legal? Can we use that to check - if the server supports returning inode numbers? Are - there other sanity checks we can use to ensure that - the server is really filling in that field? */ - - /* We can not use the IndexNumber field by default from - Windows or Samba (in ALL_INFO buf) but we can request - it explicitly. It may not be unique presumably if - the server has multiple devices mounted under one share */ - - /* There may be higher info levels that work but are - there Windows server or network appliances for which - IndexNumber field is not guaranteed unique? */ - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) { int rc1 = 0; rc1 = CIFSGetSrvInodeNumber(xid, pTcon, - full_path, pinum, + full_path, &fattr.cf_uniqueid, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); if (rc1) { - cFYI(1, ("GetSrvInodeNum rc %d", rc1)); - pinum = NULL; /* BB EOPNOSUPP disable SERVER_INUM? */ + cFYI(1, ("GetSrvInodeNum rc %d", rc1)); + fattr.cf_uniqueid = iunique(sb, ROOT_I); } } else { - pinum = NULL; + fattr.cf_uniqueid = iunique(sb, ROOT_I); } - - *pinode = cifs_new_inode(sb, pinum); - if (*pinode == NULL) { - rc = -ENOMEM; - goto cgii_exit; - } - } - inode = *pinode; - cifsInfo = CIFS_I(inode); - cifsInfo->cifsAttrs = attr; - cifsInfo->delete_pending = pfindData->DeletePending ? true : false; - cFYI(1, ("Old time %ld", cifsInfo->time)); - cifsInfo->time = jiffies; - cFYI(1, ("New time %ld", cifsInfo->time)); - - /* blksize needs to be multiple of two. So safer to default to - blksize and blkbits set in superblock so 2**blkbits and blksize - will match rather than setting to: - (pTcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE) & 0xFFFFFE00;*/ - - /* Linux can not store file creation time so ignore it */ - if (pfindData->LastAccessTime) - inode->i_atime = cifs_NTtimeToUnix(pfindData->LastAccessTime); - else /* do not need to use current_fs_time - time not stored */ - inode->i_atime = CURRENT_TIME; - inode->i_mtime = cifs_NTtimeToUnix(pfindData->LastWriteTime); - inode->i_ctime = cifs_NTtimeToUnix(pfindData->ChangeTime); - cFYI(DBG2, ("Attributes came in as 0x%x", attr)); - if (adjustTZ && (pTcon->ses) && (pTcon->ses->server)) { - inode->i_ctime.tv_sec += pTcon->ses->server->timeAdj; - inode->i_mtime.tv_sec += pTcon->ses->server->timeAdj; - } - - /* get default inode mode */ - if (attr & ATTR_DIRECTORY) - default_mode = cifs_sb->mnt_dir_mode; - else - default_mode = cifs_sb->mnt_file_mode; - - /* set permission bits */ - if (atomic_read(&cifsInfo->inUse) == 0 || - (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) == 0) - inode->i_mode = default_mode; - else { - /* just reenable write bits if !ATTR_READONLY */ - if ((inode->i_mode & S_IWUGO) == 0 && - (attr & ATTR_READONLY) == 0) - inode->i_mode |= (S_IWUGO & default_mode); - - inode->i_mode &= ~S_IFMT; - } - /* clear write bits if ATTR_READONLY is set */ - if (attr & ATTR_READONLY) - inode->i_mode &= ~S_IWUGO; - - /* set inode type */ - if ((attr & ATTR_SYSTEM) && - (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL)) { - /* no need to fix endianness on 0 */ - if (pfindData->EndOfFile == 0) - inode->i_mode |= S_IFIFO; - else if (decode_sfu_inode(inode, - le64_to_cpu(pfindData->EndOfFile), - full_path, cifs_sb, xid)) - cFYI(1, ("unknown SFU file type\n")); } else { - if (attr & ATTR_DIRECTORY) - inode->i_mode |= S_IFDIR; - else - inode->i_mode |= S_IFREG; + fattr.cf_uniqueid = CIFS_I(*pinode)->uniqueid; } - cifsInfo->server_eof = le64_to_cpu(pfindData->EndOfFile); - spin_lock(&inode->i_lock); - if (is_size_safe_to_change(cifsInfo, cifsInfo->server_eof)) { - /* can not safely shrink the file size here if the - client is writing to it due to potential races */ - i_size_write(inode, cifsInfo->server_eof); - - /* 512 bytes (2**9) is the fake blocksize that must be - used for this calculation */ - inode->i_blocks = (512 - 1 + le64_to_cpu( - pfindData->AllocationSize)) >> 9; + /* query for SFU type info if supported and needed */ + if (fattr.cf_cifsattrs & ATTR_SYSTEM && + cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) { + tmprc = cifs_sfu_type(&fattr, full_path, cifs_sb, xid); + if (tmprc) + cFYI(1, ("cifs_sfu_type failed: %d", tmprc)); } - spin_unlock(&inode->i_lock); - - inode->i_nlink = le32_to_cpu(pfindData->NumberOfLinks); - /* BB fill in uid and gid here? with help from winbind? - or retrieve from NTFS stream extended attribute */ #ifdef CONFIG_CIFS_EXPERIMENTAL /* fill in 0777 bits from ACL */ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) { cFYI(1, ("Getting mode bits from ACL")); - acl_to_uid_mode(cifs_sb, inode, full_path, pfid); + cifs_acl_to_fattr(cifs_sb, &fattr, *pinode, full_path, pfid); } #endif - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) { - /* fill in remaining high mode bits e.g. SUID, VTX */ - get_sfu_mode(inode, full_path, cifs_sb, xid); - } else if (atomic_read(&cifsInfo->inUse) == 0) { - inode->i_uid = cifs_sb->mnt_uid; - inode->i_gid = cifs_sb->mnt_gid; - /* set so we do not keep refreshing these fields with - bad data after user has changed them in memory */ - atomic_set(&cifsInfo->inUse, 1); - } - - cifs_set_ops(inode, is_dfs_referral); - + /* fill in remaining high mode bits e.g. SUID, VTX */ + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) + cifs_sfu_mode(&fattr, full_path, cifs_sb, xid); + if (!*pinode) { + *pinode = cifs_iget(sb, &fattr); + if (!*pinode) + rc = -ENOMEM; + } else { + cifs_fattr_to_inode(*pinode, &fattr); + } cgii_exit: kfree(buf); @@ -695,33 +602,78 @@ char *cifs_build_path_to_root(struct cifs_sb_info *cifs_sb) return full_path; } +static int +cifs_find_inode(struct inode *inode, void *opaque) +{ + struct cifs_fattr *fattr = (struct cifs_fattr *) opaque; + + if (CIFS_I(inode)->uniqueid != fattr->cf_uniqueid) + return 0; + + return 1; +} + +static int +cifs_init_inode(struct inode *inode, void *opaque) +{ + struct cifs_fattr *fattr = (struct cifs_fattr *) opaque; + + CIFS_I(inode)->uniqueid = fattr->cf_uniqueid; + return 0; +} + +/* Given fattrs, get a corresponding inode */ +struct inode * +cifs_iget(struct super_block *sb, struct cifs_fattr *fattr) +{ + unsigned long hash; + struct inode *inode; + + cFYI(1, ("looking for uniqueid=%llu", fattr->cf_uniqueid)); + + /* hash down to 32-bits on 32-bit arch */ + hash = cifs_uniqueid_to_ino_t(fattr->cf_uniqueid); + + inode = iget5_locked(sb, hash, cifs_find_inode, cifs_init_inode, fattr); + + /* we have fattrs in hand, update the inode */ + if (inode) { + cifs_fattr_to_inode(inode, fattr); + if (sb->s_flags & MS_NOATIME) + inode->i_flags |= S_NOATIME | S_NOCMTIME; + if (inode->i_state & I_NEW) { + inode->i_ino = hash; + unlock_new_inode(inode); + } + } + + return inode; +} + /* gets root inode */ struct inode *cifs_root_iget(struct super_block *sb, unsigned long ino) { int xid; struct cifs_sb_info *cifs_sb; - struct inode *inode; + struct inode *inode = NULL; long rc; char *full_path; - inode = iget_locked(sb, ino); - if (!inode) - return ERR_PTR(-ENOMEM); - if (!(inode->i_state & I_NEW)) - return inode; - - cifs_sb = CIFS_SB(inode->i_sb); + cifs_sb = CIFS_SB(sb); full_path = cifs_build_path_to_root(cifs_sb); if (full_path == NULL) return ERR_PTR(-ENOMEM); xid = GetXid(); if (cifs_sb->tcon->unix_ext) - rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb, - xid); + rc = cifs_get_inode_info_unix(&inode, full_path, sb, xid); else - rc = cifs_get_inode_info(&inode, full_path, NULL, inode->i_sb, + rc = cifs_get_inode_info(&inode, full_path, NULL, sb, xid, NULL); + + if (!inode) + return ERR_PTR(-ENOMEM); + if (rc && cifs_sb->tcon->ipc) { cFYI(1, ("ipc connection - fake read inode")); inode->i_mode |= S_IFDIR; @@ -737,7 +689,6 @@ struct inode *cifs_root_iget(struct super_block *sb, unsigned long ino) return ERR_PTR(rc); } - unlock_new_inode(inode); kfree(full_path); /* can not call macro FreeXid here since in a void func @@ -988,8 +939,9 @@ int cifs_unlink(struct inode *dir, struct dentry *dentry) * sb->s_vfs_rename_mutex here */ full_path = build_path_from_dentry(dentry); if (full_path == NULL) { + rc = -ENOMEM; FreeXid(xid); - return -ENOMEM; + return rc; } if ((tcon->ses->capabilities & CAP_UNIX) && @@ -1062,44 +1014,6 @@ out_reval: return rc; } -void posix_fill_in_inode(struct inode *tmp_inode, - FILE_UNIX_BASIC_INFO *pData, int isNewInode) -{ - struct cifsInodeInfo *cifsInfo = CIFS_I(tmp_inode); - loff_t local_size; - struct timespec local_mtime; - - cifsInfo->time = jiffies; - atomic_inc(&cifsInfo->inUse); - - /* save mtime and size */ - local_mtime = tmp_inode->i_mtime; - local_size = tmp_inode->i_size; - - cifs_unix_info_to_inode(tmp_inode, pData, 1); - cifs_set_ops(tmp_inode, false); - - if (!S_ISREG(tmp_inode->i_mode)) - return; - - /* - * No sense invalidating pages for new inode - * since we we have not started caching - * readahead file data yet. - */ - if (isNewInode) - return; - - if (timespec_equal(&tmp_inode->i_mtime, &local_mtime) && - (local_size == tmp_inode->i_size)) { - cFYI(1, ("inode exists but unchanged")); - } else { - /* file may have changed on server */ - cFYI(1, ("invalidate inode, readdir detected change")); - invalidate_remote_inode(tmp_inode); - } -} - int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode) { int rc = 0, tmprc; @@ -1108,6 +1022,7 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode) struct cifsTconInfo *pTcon; char *full_path = NULL; struct inode *newinode = NULL; + struct cifs_fattr fattr; cFYI(1, ("In cifs_mkdir, mode = 0x%x inode = 0x%p", mode, inode)); @@ -1118,8 +1033,9 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode) full_path = build_path_from_dentry(direntry); if (full_path == NULL) { + rc = -ENOMEM; FreeXid(xid); - return -ENOMEM; + return rc; } if ((pTcon->ses->capabilities & CAP_UNIX) && @@ -1146,7 +1062,6 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode) cFYI(1, ("posix mkdir returned 0x%x", rc)); d_drop(direntry); } else { - __u64 unique_id; if (pInfo->Type == cpu_to_le32(-1)) { /* no return info, go query for it */ kfree(pInfo); @@ -1160,20 +1075,15 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode) else direntry->d_op = &cifs_dentry_ops; - unique_id = le64_to_cpu(pInfo->UniqueId); - newinode = cifs_new_inode(inode->i_sb, &unique_id); - if (newinode == NULL) { + cifs_unix_basic_to_fattr(&fattr, pInfo, cifs_sb); + newinode = cifs_iget(inode->i_sb, &fattr); + if (!newinode) { kfree(pInfo); goto mkdir_get_info; } - newinode->i_nlink = 2; d_instantiate(direntry, newinode); - /* we already checked in POSIXCreate whether - frame was long enough */ - posix_fill_in_inode(direntry->d_inode, - pInfo, 1 /* NewInode */); #ifdef CONFIG_CIFS_DEBUG2 cFYI(1, ("instantiated dentry %p %s to inode %p", direntry, direntry->d_name.name, newinode)); @@ -1236,10 +1146,10 @@ mkdir_get_info: args.uid = NO_CHANGE_64; args.gid = NO_CHANGE_64; } - CIFSSMBUnixSetInfo(xid, pTcon, full_path, &args, - cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + CIFSSMBUnixSetPathInfo(xid, pTcon, full_path, &args, + cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); } else { if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) && (mode & S_IWUGO) == 0) { @@ -1303,8 +1213,9 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry) full_path = build_path_from_dentry(direntry); if (full_path == NULL) { + rc = -ENOMEM; FreeXid(xid); - return -ENOMEM; + return rc; } rc = CIFSSMBRmDir(xid, pTcon, full_path, cifs_sb->local_nls, @@ -1508,8 +1419,9 @@ int cifs_revalidate(struct dentry *direntry) since that would deadlock */ full_path = build_path_from_dentry(direntry); if (full_path == NULL) { + rc = -ENOMEM; FreeXid(xid); - return -ENOMEM; + return rc; } cFYI(1, ("Revalidate: %s inode 0x%p count %d dentry: 0x%p d_time %ld " "jiffies %ld", full_path, direntry->d_inode, @@ -1618,6 +1530,7 @@ int cifs_getattr(struct vfsmount *mnt, struct dentry *dentry, if (!err) { generic_fillattr(dentry->d_inode, stat); stat->blksize = CIFS_MAX_MSGSIZE; + stat->ino = CIFS_I(dentry->d_inode)->uniqueid; } return err; } @@ -1782,6 +1695,7 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs) struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct cifsTconInfo *pTcon = cifs_sb->tcon; struct cifs_unix_set_info_args *args = NULL; + struct cifsFileInfo *open_file; cFYI(1, ("setattr_unix on file %s attrs->ia_valid=0x%x", direntry->d_name.name, attrs->ia_valid)); @@ -1868,10 +1782,18 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs) args->ctime = NO_CHANGE_64; args->device = 0; - rc = CIFSSMBUnixSetInfo(xid, pTcon, full_path, args, - cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + open_file = find_writable_file(cifsInode); + if (open_file) { + u16 nfid = open_file->netfid; + u32 npid = open_file->pid; + rc = CIFSSMBUnixSetFileInfo(xid, pTcon, args, nfid, npid); + atomic_dec(&open_file->wrtPending); + } else { + rc = CIFSSMBUnixSetPathInfo(xid, pTcon, full_path, args, + cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); + } if (!rc) rc = inode_setattr(inode, attrs); @@ -1911,8 +1833,9 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs) full_path = build_path_from_dentry(direntry); if (full_path == NULL) { + rc = -ENOMEM; FreeXid(xid); - return -ENOMEM; + return rc; } /* diff --git a/fs/cifs/link.c b/fs/cifs/link.c index cd83c53fcbb..fc1e0487eae 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -172,8 +172,9 @@ cifs_symlink(struct inode *inode, struct dentry *direntry, const char *symname) full_path = build_path_from_dentry(direntry); if (full_path == NULL) { + rc = -ENOMEM; FreeXid(xid); - return -ENOMEM; + return rc; } cFYI(1, ("Full path: %s", full_path)); diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c index 32d6baa0a54..bd6d6895730 100644 --- a/fs/cifs/netmisc.c +++ b/fs/cifs/netmisc.c @@ -133,10 +133,12 @@ static const struct smb_to_posix_error mapping_table_ERRHRD[] = { {0, 0} }; -/* Convert string containing dotted ip address to binary form */ -/* returns 0 if invalid address */ - -int +/* + * Convert a string containing text IPv4 or IPv6 address to binary form. + * + * Returns 0 on failure. + */ +static int cifs_inet_pton(const int address_family, const char *cp, void *dst) { int ret = 0; @@ -153,6 +155,52 @@ cifs_inet_pton(const int address_family, const char *cp, void *dst) return ret; } +/* + * Try to convert a string to an IPv4 address and then attempt to convert + * it to an IPv6 address if that fails. Set the family field if either + * succeeds. If it's an IPv6 address and it has a '%' sign in it, try to + * treat the part following it as a numeric sin6_scope_id. + * + * Returns 0 on failure. + */ +int +cifs_convert_address(char *src, void *dst) +{ + int rc; + char *pct, *endp; + struct sockaddr_in *s4 = (struct sockaddr_in *) dst; + struct sockaddr_in6 *s6 = (struct sockaddr_in6 *) dst; + + /* IPv4 address */ + if (cifs_inet_pton(AF_INET, src, &s4->sin_addr.s_addr)) { + s4->sin_family = AF_INET; + return 1; + } + + /* temporarily terminate string */ + pct = strchr(src, '%'); + if (pct) + *pct = '\0'; + + rc = cifs_inet_pton(AF_INET6, src, &s6->sin6_addr.s6_addr); + + /* repair temp termination (if any) and make pct point to scopeid */ + if (pct) + *pct++ = '%'; + + if (!rc) + return rc; + + s6->sin6_family = AF_INET6; + if (pct) { + s6->sin6_scope_id = (u32) simple_strtoul(pct, &endp, 0); + if (!*pct || *endp) + return 0; + } + + return rc; +} + /***************************************************************************** convert a NT status code to a dos class/code *****************************************************************************/ diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 86d0055dc52..f823a4a208a 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -63,374 +63,123 @@ static inline void dump_cifs_file_struct(struct file *file, char *label) } #endif /* DEBUG2 */ -/* Returns 1 if new inode created, 2 if both dentry and inode were */ -/* Might check in the future if inode number changed so we can rehash inode */ -static int -construct_dentry(struct qstr *qstring, struct file *file, - struct inode **ptmp_inode, struct dentry **pnew_dentry, - __u64 *inum) +/* + * Find the dentry that matches "name". If there isn't one, create one. If it's + * a negative dentry or the uniqueid changed, then drop it and recreate it. + */ +static struct dentry * +cifs_readdir_lookup(struct dentry *parent, struct qstr *name, + struct cifs_fattr *fattr) { - struct dentry *tmp_dentry = NULL; - struct super_block *sb = file->f_path.dentry->d_sb; - int rc = 0; + struct dentry *dentry, *alias; + struct inode *inode; + struct super_block *sb = parent->d_inode->i_sb; + + cFYI(1, ("For %s", name->name)); + + dentry = d_lookup(parent, name); + if (dentry) { + /* FIXME: check for inode number changes? */ + if (dentry->d_inode != NULL) + return dentry; + d_drop(dentry); + dput(dentry); + } - cFYI(1, ("For %s", qstring->name)); - - qstring->hash = full_name_hash(qstring->name, qstring->len); - tmp_dentry = d_lookup(file->f_path.dentry, qstring); - if (tmp_dentry) { - /* BB: overwrite old name? i.e. tmp_dentry->d_name and - * tmp_dentry->d_name.len?? - */ - cFYI(0, ("existing dentry with inode 0x%p", - tmp_dentry->d_inode)); - *ptmp_inode = tmp_dentry->d_inode; - if (*ptmp_inode == NULL) { - *ptmp_inode = cifs_new_inode(sb, inum); - if (*ptmp_inode == NULL) - return rc; - rc = 1; - } - } else { - tmp_dentry = d_alloc(file->f_path.dentry, qstring); - if (tmp_dentry == NULL) { - cERROR(1, ("Failed allocating dentry")); - *ptmp_inode = NULL; - return rc; - } + dentry = d_alloc(parent, name); + if (dentry == NULL) + return NULL; - if (CIFS_SB(sb)->tcon->nocase) - tmp_dentry->d_op = &cifs_ci_dentry_ops; - else - tmp_dentry->d_op = &cifs_dentry_ops; + inode = cifs_iget(sb, fattr); + if (!inode) { + dput(dentry); + return NULL; + } - *ptmp_inode = cifs_new_inode(sb, inum); - if (*ptmp_inode == NULL) - return rc; - rc = 2; + if (CIFS_SB(sb)->tcon->nocase) + dentry->d_op = &cifs_ci_dentry_ops; + else + dentry->d_op = &cifs_dentry_ops; + + alias = d_materialise_unique(dentry, inode); + if (alias != NULL) { + dput(dentry); + if (IS_ERR(alias)) + return NULL; + dentry = alias; } - tmp_dentry->d_time = jiffies; - *pnew_dentry = tmp_dentry; - return rc; + return dentry; } -static void fill_in_inode(struct inode *tmp_inode, int new_buf_type, - char *buf, unsigned int *pobject_type, int isNewInode) +static void +cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb) { - loff_t local_size; - struct timespec local_mtime; - - struct cifsInodeInfo *cifsInfo = CIFS_I(tmp_inode); - struct cifs_sb_info *cifs_sb = CIFS_SB(tmp_inode->i_sb); - __u32 attr; - __u64 allocation_size; - __u64 end_of_file; - umode_t default_mode; - - /* save mtime and size */ - local_mtime = tmp_inode->i_mtime; - local_size = tmp_inode->i_size; - - if (new_buf_type) { - FILE_DIRECTORY_INFO *pfindData = (FILE_DIRECTORY_INFO *)buf; - - attr = le32_to_cpu(pfindData->ExtFileAttributes); - allocation_size = le64_to_cpu(pfindData->AllocationSize); - end_of_file = le64_to_cpu(pfindData->EndOfFile); - tmp_inode->i_atime = - cifs_NTtimeToUnix(pfindData->LastAccessTime); - tmp_inode->i_mtime = - cifs_NTtimeToUnix(pfindData->LastWriteTime); - tmp_inode->i_ctime = - cifs_NTtimeToUnix(pfindData->ChangeTime); - } else { /* legacy, OS2 and DOS style */ - int offset = cifs_sb->tcon->ses->server->timeAdj; - FIND_FILE_STANDARD_INFO *pfindData = - (FIND_FILE_STANDARD_INFO *)buf; - - tmp_inode->i_mtime = cnvrtDosUnixTm(pfindData->LastWriteDate, - pfindData->LastWriteTime, - offset); - tmp_inode->i_atime = cnvrtDosUnixTm(pfindData->LastAccessDate, - pfindData->LastAccessTime, - offset); - tmp_inode->i_ctime = cnvrtDosUnixTm(pfindData->LastWriteDate, - pfindData->LastWriteTime, - offset); - attr = le16_to_cpu(pfindData->Attributes); - allocation_size = le32_to_cpu(pfindData->AllocationSize); - end_of_file = le32_to_cpu(pfindData->DataSize); - } + fattr->cf_uid = cifs_sb->mnt_uid; + fattr->cf_gid = cifs_sb->mnt_gid; - /* Linux can not store file creation time unfortunately so ignore it */ - - cifsInfo->cifsAttrs = attr; -#ifdef CONFIG_CIFS_EXPERIMENTAL - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) { - /* get more accurate mode via ACL - so force inode refresh */ - cifsInfo->time = 0; - } else -#endif /* CONFIG_CIFS_EXPERIMENTAL */ - cifsInfo->time = jiffies; - - /* treat dos attribute of read-only as read-only mode bit e.g. 555? */ - /* 2767 perms - indicate mandatory locking */ - /* BB fill in uid and gid here? with help from winbind? - or retrieve from NTFS stream extended attribute */ - if (atomic_read(&cifsInfo->inUse) == 0) { - tmp_inode->i_uid = cifs_sb->mnt_uid; - tmp_inode->i_gid = cifs_sb->mnt_gid; - } - - if (attr & ATTR_DIRECTORY) - default_mode = cifs_sb->mnt_dir_mode; - else - default_mode = cifs_sb->mnt_file_mode; - - /* set initial permissions */ - if ((atomic_read(&cifsInfo->inUse) == 0) || - (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) == 0) - tmp_inode->i_mode = default_mode; - else { - /* just reenable write bits if !ATTR_READONLY */ - if ((tmp_inode->i_mode & S_IWUGO) == 0 && - (attr & ATTR_READONLY) == 0) - tmp_inode->i_mode |= (S_IWUGO & default_mode); - - tmp_inode->i_mode &= ~S_IFMT; + if (fattr->cf_cifsattrs & ATTR_DIRECTORY) { + fattr->cf_mode = S_IFDIR | cifs_sb->mnt_dir_mode; + fattr->cf_dtype = DT_DIR; + } else { + fattr->cf_mode = S_IFREG | cifs_sb->mnt_file_mode; + fattr->cf_dtype = DT_REG; } - /* clear write bits if ATTR_READONLY is set */ - if (attr & ATTR_READONLY) - tmp_inode->i_mode &= ~S_IWUGO; + if (fattr->cf_cifsattrs & ATTR_READONLY) + fattr->cf_mode &= ~S_IWUGO; - /* set inode type */ - if ((attr & ATTR_SYSTEM) && - (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL)) { - if (end_of_file == 0) { - tmp_inode->i_mode |= S_IFIFO; - *pobject_type = DT_FIFO; + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL && + fattr->cf_cifsattrs & ATTR_SYSTEM) { + if (fattr->cf_eof == 0) { + fattr->cf_mode &= ~S_IFMT; + fattr->cf_mode |= S_IFIFO; + fattr->cf_dtype = DT_FIFO; } else { /* - * trying to get the type can be slow, so just call - * this a regular file for now, and mark for reval + * trying to get the type and mode via SFU can be slow, + * so just call those regular files for now, and mark + * for reval */ - tmp_inode->i_mode |= S_IFREG; - *pobject_type = DT_REG; - cifsInfo->time = 0; - } - } else { - if (attr & ATTR_DIRECTORY) { - tmp_inode->i_mode |= S_IFDIR; - *pobject_type = DT_DIR; - } else { - tmp_inode->i_mode |= S_IFREG; - *pobject_type = DT_REG; + fattr->cf_flags |= CIFS_FATTR_NEED_REVAL; } } +} - /* can not fill in nlink here as in qpathinfo version and Unx search */ - if (atomic_read(&cifsInfo->inUse) == 0) - atomic_set(&cifsInfo->inUse, 1); - - cifsInfo->server_eof = end_of_file; - spin_lock(&tmp_inode->i_lock); - if (is_size_safe_to_change(cifsInfo, end_of_file)) { - /* can not safely change the file size here if the - client is writing to it due to potential races */ - i_size_write(tmp_inode, end_of_file); - - /* 512 bytes (2**9) is the fake blocksize that must be used */ - /* for this calculation, even though the reported blocksize is larger */ - tmp_inode->i_blocks = (512 - 1 + allocation_size) >> 9; - } - spin_unlock(&tmp_inode->i_lock); - - if (allocation_size < end_of_file) - cFYI(1, ("May be sparse file, allocation less than file size")); - cFYI(1, ("File Size %ld and blocks %llu", - (unsigned long)tmp_inode->i_size, - (unsigned long long)tmp_inode->i_blocks)); - if (S_ISREG(tmp_inode->i_mode)) { - cFYI(1, ("File inode")); - tmp_inode->i_op = &cifs_file_inode_ops; - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) { - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) - tmp_inode->i_fop = &cifs_file_direct_nobrl_ops; - else - tmp_inode->i_fop = &cifs_file_direct_ops; - } else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) - tmp_inode->i_fop = &cifs_file_nobrl_ops; - else - tmp_inode->i_fop = &cifs_file_ops; - - if ((cifs_sb->tcon) && (cifs_sb->tcon->ses) && - (cifs_sb->tcon->ses->server->maxBuf < - PAGE_CACHE_SIZE + MAX_CIFS_HDR_SIZE)) - tmp_inode->i_data.a_ops = &cifs_addr_ops_smallbuf; - else - tmp_inode->i_data.a_ops = &cifs_addr_ops; - - if (isNewInode) - return; /* No sense invalidating pages for new inode - since have not started caching readahead file - data yet */ - - if (timespec_equal(&tmp_inode->i_mtime, &local_mtime) && - (local_size == tmp_inode->i_size)) { - cFYI(1, ("inode exists but unchanged")); - } else { - /* file may have changed on server */ - cFYI(1, ("invalidate inode, readdir detected change")); - invalidate_remote_inode(tmp_inode); - } - } else if (S_ISDIR(tmp_inode->i_mode)) { - cFYI(1, ("Directory inode")); - tmp_inode->i_op = &cifs_dir_inode_ops; - tmp_inode->i_fop = &cifs_dir_ops; - } else if (S_ISLNK(tmp_inode->i_mode)) { - cFYI(1, ("Symbolic Link inode")); - tmp_inode->i_op = &cifs_symlink_inode_ops; - } else { - cFYI(1, ("Init special inode")); - init_special_inode(tmp_inode, tmp_inode->i_mode, - tmp_inode->i_rdev); - } +void +cifs_dir_info_to_fattr(struct cifs_fattr *fattr, FILE_DIRECTORY_INFO *info, + struct cifs_sb_info *cifs_sb) +{ + memset(fattr, 0, sizeof(*fattr)); + fattr->cf_cifsattrs = le32_to_cpu(info->ExtFileAttributes); + fattr->cf_eof = le64_to_cpu(info->EndOfFile); + fattr->cf_bytes = le64_to_cpu(info->AllocationSize); + fattr->cf_atime = cifs_NTtimeToUnix(info->LastAccessTime); + fattr->cf_ctime = cifs_NTtimeToUnix(info->ChangeTime); + fattr->cf_mtime = cifs_NTtimeToUnix(info->LastWriteTime); + + cifs_fill_common_info(fattr, cifs_sb); } -static void unix_fill_in_inode(struct inode *tmp_inode, - FILE_UNIX_INFO *pfindData, unsigned int *pobject_type, int isNewInode) +void +cifs_std_info_to_fattr(struct cifs_fattr *fattr, FIND_FILE_STANDARD_INFO *info, + struct cifs_sb_info *cifs_sb) { - loff_t local_size; - struct timespec local_mtime; - - struct cifsInodeInfo *cifsInfo = CIFS_I(tmp_inode); - struct cifs_sb_info *cifs_sb = CIFS_SB(tmp_inode->i_sb); - - __u32 type = le32_to_cpu(pfindData->Type); - __u64 num_of_bytes = le64_to_cpu(pfindData->NumOfBytes); - __u64 end_of_file = le64_to_cpu(pfindData->EndOfFile); - cifsInfo->time = jiffies; - atomic_inc(&cifsInfo->inUse); - - /* save mtime and size */ - local_mtime = tmp_inode->i_mtime; - local_size = tmp_inode->i_size; - - tmp_inode->i_atime = - cifs_NTtimeToUnix(pfindData->LastAccessTime); - tmp_inode->i_mtime = - cifs_NTtimeToUnix(pfindData->LastModificationTime); - tmp_inode->i_ctime = - cifs_NTtimeToUnix(pfindData->LastStatusChange); - - tmp_inode->i_mode = le64_to_cpu(pfindData->Permissions); - /* since we set the inode type below we need to mask off type - to avoid strange results if bits above were corrupt */ - tmp_inode->i_mode &= ~S_IFMT; - if (type == UNIX_FILE) { - *pobject_type = DT_REG; - tmp_inode->i_mode |= S_IFREG; - } else if (type == UNIX_SYMLINK) { - *pobject_type = DT_LNK; - tmp_inode->i_mode |= S_IFLNK; - } else if (type == UNIX_DIR) { - *pobject_type = DT_DIR; - tmp_inode->i_mode |= S_IFDIR; - } else if (type == UNIX_CHARDEV) { - *pobject_type = DT_CHR; - tmp_inode->i_mode |= S_IFCHR; - tmp_inode->i_rdev = MKDEV(le64_to_cpu(pfindData->DevMajor), - le64_to_cpu(pfindData->DevMinor) & MINORMASK); - } else if (type == UNIX_BLOCKDEV) { - *pobject_type = DT_BLK; - tmp_inode->i_mode |= S_IFBLK; - tmp_inode->i_rdev = MKDEV(le64_to_cpu(pfindData->DevMajor), - le64_to_cpu(pfindData->DevMinor) & MINORMASK); - } else if (type == UNIX_FIFO) { - *pobject_type = DT_FIFO; - tmp_inode->i_mode |= S_IFIFO; - } else if (type == UNIX_SOCKET) { - *pobject_type = DT_SOCK; - tmp_inode->i_mode |= S_IFSOCK; - } else { - /* safest to just call it a file */ - *pobject_type = DT_REG; - tmp_inode->i_mode |= S_IFREG; - cFYI(1, ("unknown inode type %d", type)); - } + int offset = cifs_sb->tcon->ses->server->timeAdj; - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID) - tmp_inode->i_uid = cifs_sb->mnt_uid; - else - tmp_inode->i_uid = le64_to_cpu(pfindData->Uid); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID) - tmp_inode->i_gid = cifs_sb->mnt_gid; - else - tmp_inode->i_gid = le64_to_cpu(pfindData->Gid); - tmp_inode->i_nlink = le64_to_cpu(pfindData->Nlinks); - - cifsInfo->server_eof = end_of_file; - spin_lock(&tmp_inode->i_lock); - if (is_size_safe_to_change(cifsInfo, end_of_file)) { - /* can not safely change the file size here if the - client is writing to it due to potential races */ - i_size_write(tmp_inode, end_of_file); - - /* 512 bytes (2**9) is the fake blocksize that must be used */ - /* for this calculation, not the real blocksize */ - tmp_inode->i_blocks = (512 - 1 + num_of_bytes) >> 9; - } - spin_unlock(&tmp_inode->i_lock); + memset(fattr, 0, sizeof(*fattr)); + fattr->cf_atime = cnvrtDosUnixTm(info->LastAccessDate, + info->LastAccessTime, offset); + fattr->cf_ctime = cnvrtDosUnixTm(info->LastWriteDate, + info->LastWriteTime, offset); + fattr->cf_mtime = cnvrtDosUnixTm(info->LastWriteDate, + info->LastWriteTime, offset); - if (S_ISREG(tmp_inode->i_mode)) { - cFYI(1, ("File inode")); - tmp_inode->i_op = &cifs_file_inode_ops; + fattr->cf_cifsattrs = le16_to_cpu(info->Attributes); + fattr->cf_bytes = le32_to_cpu(info->AllocationSize); + fattr->cf_eof = le32_to_cpu(info->DataSize); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) { - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) - tmp_inode->i_fop = &cifs_file_direct_nobrl_ops; - else - tmp_inode->i_fop = &cifs_file_direct_ops; - } else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) - tmp_inode->i_fop = &cifs_file_nobrl_ops; - else - tmp_inode->i_fop = &cifs_file_ops; - - if ((cifs_sb->tcon) && (cifs_sb->tcon->ses) && - (cifs_sb->tcon->ses->server->maxBuf < - PAGE_CACHE_SIZE + MAX_CIFS_HDR_SIZE)) - tmp_inode->i_data.a_ops = &cifs_addr_ops_smallbuf; - else - tmp_inode->i_data.a_ops = &cifs_addr_ops; - - if (isNewInode) - return; /* No sense invalidating pages for new inode - since we have not started caching readahead - file data for it yet */ - - if (timespec_equal(&tmp_inode->i_mtime, &local_mtime) && - (local_size == tmp_inode->i_size)) { - cFYI(1, ("inode exists but unchanged")); - } else { - /* file may have changed on server */ - cFYI(1, ("invalidate inode, readdir detected change")); - invalidate_remote_inode(tmp_inode); - } - } else if (S_ISDIR(tmp_inode->i_mode)) { - cFYI(1, ("Directory inode")); - tmp_inode->i_op = &cifs_dir_inode_ops; - tmp_inode->i_fop = &cifs_dir_ops; - } else if (S_ISLNK(tmp_inode->i_mode)) { - cFYI(1, ("Symbolic Link inode")); - tmp_inode->i_op = &cifs_symlink_inode_ops; -/* tmp_inode->i_fop = *//* do not need to set to anything */ - } else { - cFYI(1, ("Special inode")); - init_special_inode(tmp_inode, tmp_inode->i_mode, - tmp_inode->i_rdev); - } + cifs_fill_common_info(fattr, cifs_sb); } /* BB eventually need to add the following helper function to @@ -872,7 +621,7 @@ static int cifs_get_name_from_search_buf(struct qstr *pqst, len = strnlen(filename, PATH_MAX); } - *pinum = le64_to_cpu(pFindData->UniqueId); + *pinum = le64_to_cpu(pFindData->basic.UniqueId); } else if (level == SMB_FIND_FILE_DIRECTORY_INFO) { FILE_DIRECTORY_INFO *pFindData = (FILE_DIRECTORY_INFO *)current_entry; @@ -932,11 +681,12 @@ static int cifs_filldir(char *pfindEntry, struct file *file, filldir_t filldir, int rc = 0; struct qstr qstring; struct cifsFileInfo *pCifsF; - unsigned int obj_type; - __u64 inum; + u64 inum; + ino_t ino; + struct super_block *sb; struct cifs_sb_info *cifs_sb; - struct inode *tmp_inode; struct dentry *tmp_dentry; + struct cifs_fattr fattr; /* get filename and len into qstring */ /* get dentry */ @@ -954,60 +704,53 @@ static int cifs_filldir(char *pfindEntry, struct file *file, filldir_t filldir, if (rc != 0) return 0; - cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); + sb = file->f_path.dentry->d_sb; + cifs_sb = CIFS_SB(sb); qstring.name = scratch_buf; rc = cifs_get_name_from_search_buf(&qstring, pfindEntry, pCifsF->srch_inf.info_level, pCifsF->srch_inf.unicode, cifs_sb, - max_len, - &inum /* returned */); + max_len, &inum /* returned */); if (rc) return rc; - /* only these two infolevels return valid inode numbers */ - if (pCifsF->srch_inf.info_level == SMB_FIND_FILE_UNIX || - pCifsF->srch_inf.info_level == SMB_FIND_FILE_ID_FULL_DIR_INFO) - rc = construct_dentry(&qstring, file, &tmp_inode, &tmp_dentry, - &inum); - else - rc = construct_dentry(&qstring, file, &tmp_inode, &tmp_dentry, - NULL); - - if ((tmp_inode == NULL) || (tmp_dentry == NULL)) - return -ENOMEM; - - /* we pass in rc below, indicating whether it is a new inode, - so we can figure out whether to invalidate the inode cached - data if the file has changed */ if (pCifsF->srch_inf.info_level == SMB_FIND_FILE_UNIX) - unix_fill_in_inode(tmp_inode, - (FILE_UNIX_INFO *)pfindEntry, - &obj_type, rc); + cifs_unix_basic_to_fattr(&fattr, + &((FILE_UNIX_INFO *) pfindEntry)->basic, + cifs_sb); else if (pCifsF->srch_inf.info_level == SMB_FIND_FILE_INFO_STANDARD) - fill_in_inode(tmp_inode, 0 /* old level 1 buffer type */, - pfindEntry, &obj_type, rc); + cifs_std_info_to_fattr(&fattr, (FIND_FILE_STANDARD_INFO *) + pfindEntry, cifs_sb); else - fill_in_inode(tmp_inode, 1 /* NT */, pfindEntry, &obj_type, rc); + cifs_dir_info_to_fattr(&fattr, (FILE_DIRECTORY_INFO *) + pfindEntry, cifs_sb); - if (rc) /* new inode - needs to be tied to dentry */ { - d_instantiate(tmp_dentry, tmp_inode); - if (rc == 2) - d_rehash(tmp_dentry); - } + /* FIXME: make _to_fattr functions fill this out */ + if (pCifsF->srch_inf.info_level == SMB_FIND_FILE_ID_FULL_DIR_INFO) + fattr.cf_uniqueid = inum; + else + fattr.cf_uniqueid = iunique(sb, ROOT_I); + ino = cifs_uniqueid_to_ino_t(fattr.cf_uniqueid); + tmp_dentry = cifs_readdir_lookup(file->f_dentry, &qstring, &fattr); rc = filldir(direntry, qstring.name, qstring.len, file->f_pos, - tmp_inode->i_ino, obj_type); + ino, fattr.cf_dtype); + + /* + * we can not return filldir errors to the caller since they are + * "normal" when the stat blocksize is too small - we return remapped + * error instead + * + * FIXME: This looks bogus. filldir returns -EOVERFLOW in the above + * case already. Why should we be clobbering other errors from it? + */ if (rc) { cFYI(1, ("filldir rc = %d", rc)); - /* we can not return filldir errors to the caller - since they are "normal" when the stat blocksize - is too small - we return remapped error instead */ rc = -EOVERFLOW; } - dput(tmp_dentry); return rc; } diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 897a052270f..7085a6275c4 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -802,7 +802,7 @@ ssetup_ntlmssp_authenticate: #endif /* CONFIG_CIFS_UPCALL */ } else { #ifdef CONFIG_CIFS_EXPERIMENTAL - if ((experimEnabled > 1) && (type == RawNTLMSSP)) { + if (type == RawNTLMSSP) { if ((pSMB->req.hdr.Flags2 & SMBFLG2_UNICODE) == 0) { cERROR(1, ("NTLMSSP requires Unicode support")); rc = -ENOSYS; diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c index e9527eedc63..a75afa3dd9e 100644 --- a/fs/cifs/xattr.c +++ b/fs/cifs/xattr.c @@ -64,8 +64,9 @@ int cifs_removexattr(struct dentry *direntry, const char *ea_name) full_path = build_path_from_dentry(direntry); if (full_path == NULL) { + rc = -ENOMEM; FreeXid(xid); - return -ENOMEM; + return rc; } if (ea_name == NULL) { cFYI(1, ("Null xattr names not supported")); @@ -118,8 +119,9 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name, full_path = build_path_from_dentry(direntry); if (full_path == NULL) { + rc = -ENOMEM; FreeXid(xid); - return -ENOMEM; + return rc; } /* return dos attributes as pseudo xattr */ /* return alt name if available as pseudo attr */ @@ -225,8 +227,9 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name, full_path = build_path_from_dentry(direntry); if (full_path == NULL) { + rc = -ENOMEM; FreeXid(xid); - return -ENOMEM; + return rc; } /* return dos attributes as pseudo xattr */ /* return alt name if available as pseudo attr */ @@ -351,8 +354,9 @@ ssize_t cifs_listxattr(struct dentry *direntry, char *data, size_t buf_size) full_path = build_path_from_dentry(direntry); if (full_path == NULL) { + rc = -ENOMEM; FreeXid(xid); - return -ENOMEM; + return rc; } /* return dos attributes as pseudo xattr */ /* return alt name if available as pseudo attr */ diff --git a/fs/compat.c b/fs/compat.c index cdd51a3a7c5..fbadb947727 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -1486,8 +1486,8 @@ int compat_do_execve(char * filename, if (!bprm) goto out_files; - retval = mutex_lock_interruptible(¤t->cred_guard_mutex); - if (retval < 0) + retval = -ERESTARTNOINTR; + if (mutex_lock_interruptible(¤t->cred_guard_mutex)) goto out_free; current->in_execve = 1; diff --git a/fs/eventfd.c b/fs/eventfd.c index 3f0e1974abd..31d12de83a2 100644 --- a/fs/eventfd.c +++ b/fs/eventfd.c @@ -14,35 +14,44 @@ #include <linux/list.h> #include <linux/spinlock.h> #include <linux/anon_inodes.h> -#include <linux/eventfd.h> #include <linux/syscalls.h> #include <linux/module.h> +#include <linux/kref.h> +#include <linux/eventfd.h> struct eventfd_ctx { + struct kref kref; wait_queue_head_t wqh; /* * Every time that a write(2) is performed on an eventfd, the * value of the __u64 being written is added to "count" and a * wakeup is performed on "wqh". A read(2) will return the "count" * value to userspace, and will reset "count" to zero. The kernel - * size eventfd_signal() also, adds to the "count" counter and + * side eventfd_signal() also, adds to the "count" counter and * issue a wakeup. */ __u64 count; unsigned int flags; }; -/* - * Adds "n" to the eventfd counter "count". Returns "n" in case of - * success, or a value lower then "n" in case of coutner overflow. - * This function is supposed to be called by the kernel in paths - * that do not allow sleeping. In this function we allow the counter - * to reach the ULLONG_MAX value, and we signal this as overflow - * condition by returining a POLLERR to poll(2). +/** + * eventfd_signal - Adds @n to the eventfd counter. + * @ctx: [in] Pointer to the eventfd context. + * @n: [in] Value of the counter to be added to the eventfd internal counter. + * The value cannot be negative. + * + * This function is supposed to be called by the kernel in paths that do not + * allow sleeping. In this function we allow the counter to reach the ULLONG_MAX + * value, and we signal this as overflow condition by returining a POLLERR + * to poll(2). + * + * Returns @n in case of success, a non-negative number lower than @n in case + * of overflow, or the following error codes: + * + * -EINVAL : The value of @n is negative. */ -int eventfd_signal(struct file *file, int n) +int eventfd_signal(struct eventfd_ctx *ctx, int n) { - struct eventfd_ctx *ctx = file->private_data; unsigned long flags; if (n < 0) @@ -59,9 +68,45 @@ int eventfd_signal(struct file *file, int n) } EXPORT_SYMBOL_GPL(eventfd_signal); +static void eventfd_free(struct kref *kref) +{ + struct eventfd_ctx *ctx = container_of(kref, struct eventfd_ctx, kref); + + kfree(ctx); +} + +/** + * eventfd_ctx_get - Acquires a reference to the internal eventfd context. + * @ctx: [in] Pointer to the eventfd context. + * + * Returns: In case of success, returns a pointer to the eventfd context. + */ +struct eventfd_ctx *eventfd_ctx_get(struct eventfd_ctx *ctx) +{ + kref_get(&ctx->kref); + return ctx; +} +EXPORT_SYMBOL_GPL(eventfd_ctx_get); + +/** + * eventfd_ctx_put - Releases a reference to the internal eventfd context. + * @ctx: [in] Pointer to eventfd context. + * + * The eventfd context reference must have been previously acquired either + * with eventfd_ctx_get() or eventfd_ctx_fdget()). + */ +void eventfd_ctx_put(struct eventfd_ctx *ctx) +{ + kref_put(&ctx->kref, eventfd_free); +} +EXPORT_SYMBOL_GPL(eventfd_ctx_put); + static int eventfd_release(struct inode *inode, struct file *file) { - kfree(file->private_data); + struct eventfd_ctx *ctx = file->private_data; + + wake_up_poll(&ctx->wqh, POLLHUP); + eventfd_ctx_put(ctx); return 0; } @@ -185,6 +230,16 @@ static const struct file_operations eventfd_fops = { .write = eventfd_write, }; +/** + * eventfd_fget - Acquire a reference of an eventfd file descriptor. + * @fd: [in] Eventfd file descriptor. + * + * Returns a pointer to the eventfd file structure in case of success, or the + * following error pointer: + * + * -EBADF : Invalid @fd file descriptor. + * -EINVAL : The @fd file descriptor is not an eventfd file. + */ struct file *eventfd_fget(int fd) { struct file *file; @@ -201,6 +256,48 @@ struct file *eventfd_fget(int fd) } EXPORT_SYMBOL_GPL(eventfd_fget); +/** + * eventfd_ctx_fdget - Acquires a reference to the internal eventfd context. + * @fd: [in] Eventfd file descriptor. + * + * Returns a pointer to the internal eventfd context, otherwise the error + * pointers returned by the following functions: + * + * eventfd_fget + */ +struct eventfd_ctx *eventfd_ctx_fdget(int fd) +{ + struct file *file; + struct eventfd_ctx *ctx; + + file = eventfd_fget(fd); + if (IS_ERR(file)) + return (struct eventfd_ctx *) file; + ctx = eventfd_ctx_get(file->private_data); + fput(file); + + return ctx; +} +EXPORT_SYMBOL_GPL(eventfd_ctx_fdget); + +/** + * eventfd_ctx_fileget - Acquires a reference to the internal eventfd context. + * @file: [in] Eventfd file pointer. + * + * Returns a pointer to the internal eventfd context, otherwise the error + * pointer: + * + * -EINVAL : The @fd file descriptor is not an eventfd file. + */ +struct eventfd_ctx *eventfd_ctx_fileget(struct file *file) +{ + if (file->f_op != &eventfd_fops) + return ERR_PTR(-EINVAL); + + return eventfd_ctx_get(file->private_data); +} +EXPORT_SYMBOL_GPL(eventfd_ctx_fileget); + SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags) { int fd; @@ -217,6 +314,7 @@ SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags) if (!ctx) return -ENOMEM; + kref_init(&ctx->kref); init_waitqueue_head(&ctx->wqh); ctx->count = count; ctx->flags = flags; diff --git a/fs/exec.c b/fs/exec.c index e639957d7a5..4a8849e45b2 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1277,8 +1277,8 @@ int do_execve(char * filename, if (!bprm) goto out_files; - retval = mutex_lock_interruptible(¤t->cred_guard_mutex); - if (retval < 0) + retval = -ERESTARTNOINTR; + if (mutex_lock_interruptible(¤t->cred_guard_mutex)) goto out_free; current->in_execve = 1; diff --git a/fs/exofs/common.h b/fs/exofs/common.h index 24667eedc02..c6718e4817f 100644 --- a/fs/exofs/common.h +++ b/fs/exofs/common.h @@ -2,9 +2,7 @@ * common.h - Common definitions for both Kernel and user-mode utilities * * Copyright (C) 2005, 2006 - * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) - * Copyright (C) 2005, 2006 - * International Business Machines + * Avishay Traeger (avishay@gmail.com) * Copyright (C) 2008, 2009 * Boaz Harrosh <bharrosh@panasas.com> * diff --git a/fs/exofs/dir.c b/fs/exofs/dir.c index 65b0c8c776a..4cfab1cc75c 100644 --- a/fs/exofs/dir.c +++ b/fs/exofs/dir.c @@ -1,8 +1,6 @@ /* * Copyright (C) 2005, 2006 - * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) - * Copyright (C) 2005, 2006 - * International Business Machines + * Avishay Traeger (avishay@gmail.com) * Copyright (C) 2008, 2009 * Boaz Harrosh <bharrosh@panasas.com> * diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h index 0fd4c785967..5ec72e020b2 100644 --- a/fs/exofs/exofs.h +++ b/fs/exofs/exofs.h @@ -1,8 +1,6 @@ /* * Copyright (C) 2005, 2006 - * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) - * Copyright (C) 2005, 2006 - * International Business Machines + * Avishay Traeger (avishay@gmail.com) * Copyright (C) 2008, 2009 * Boaz Harrosh <bharrosh@panasas.com> * @@ -156,6 +154,9 @@ ino_t exofs_parent_ino(struct dentry *child); int exofs_set_link(struct inode *, struct exofs_dir_entry *, struct page *, struct inode *); +/* super.c */ +int exofs_sync_fs(struct super_block *sb, int wait); + /********************* * operation vectors * *********************/ diff --git a/fs/exofs/file.c b/fs/exofs/file.c index 6ed7fe48475..839b9dc1e70 100644 --- a/fs/exofs/file.c +++ b/fs/exofs/file.c @@ -1,8 +1,6 @@ /* * Copyright (C) 2005, 2006 - * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) - * Copyright (C) 2005, 2006 - * International Business Machines + * Avishay Traeger (avishay@gmail.com) * Copyright (C) 2008, 2009 * Boaz Harrosh <bharrosh@panasas.com> * @@ -47,16 +45,23 @@ static int exofs_file_fsync(struct file *filp, struct dentry *dentry, { int ret; struct address_space *mapping = filp->f_mapping; + struct inode *inode = dentry->d_inode; + struct super_block *sb; ret = filemap_write_and_wait(mapping); if (ret) return ret; - /*Note: file_fsync below also calles sync_blockdev, which is a no-op - * for exofs, but other then that it does sync_inode and - * sync_superblock which is what we need here. - */ - return file_fsync(filp, dentry, datasync); + /* sync the inode attributes */ + ret = write_inode_now(inode, 1); + + /* This is a good place to write the sb */ + /* TODO: Sechedule an sb-sync on create */ + sb = inode->i_sb; + if (sb->s_dirt) + exofs_sync_fs(sb, 1); + + return ret; } static int exofs_flush(struct file *file, fl_owner_t id) diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index 77d0a295eb1..6c10f747669 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c @@ -1,8 +1,6 @@ /* * Copyright (C) 2005, 2006 - * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) - * Copyright (C) 2005, 2006 - * International Business Machines + * Avishay Traeger (avishay@gmail.com) * Copyright (C) 2008, 2009 * Boaz Harrosh <bharrosh@panasas.com> * @@ -295,6 +293,9 @@ static int read_exec(struct page_collect *pcol, bool is_sync) err: if (!is_sync) _unlock_pcol_pages(pcol, ret, READ); + else /* Pages unlocked by caller in sync mode only free bio */ + pcol_free(pcol); + kfree(pcol_copy); if (or) osd_end_request(or); diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c index 77fdd765e76..b7dd0c23686 100644 --- a/fs/exofs/namei.c +++ b/fs/exofs/namei.c @@ -1,8 +1,6 @@ /* * Copyright (C) 2005, 2006 - * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) - * Copyright (C) 2005, 2006 - * International Business Machines + * Avishay Traeger (avishay@gmail.com) * Copyright (C) 2008, 2009 * Boaz Harrosh <bharrosh@panasas.com> * diff --git a/fs/exofs/osd.c b/fs/exofs/osd.c index b3d2ccb87aa..4372542df28 100644 --- a/fs/exofs/osd.c +++ b/fs/exofs/osd.c @@ -1,8 +1,6 @@ /* * Copyright (C) 2005, 2006 - * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) - * Copyright (C) 2005, 2006 - * International Business Machines + * Avishay Traeger (avishay@gmail.com) * Copyright (C) 2008, 2009 * Boaz Harrosh <bharrosh@panasas.com> * diff --git a/fs/exofs/super.c b/fs/exofs/super.c index 8216c5b77b5..a343b4ea62f 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c @@ -1,8 +1,6 @@ /* * Copyright (C) 2005, 2006 - * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) - * Copyright (C) 2005, 2006 - * International Business Machines + * Avishay Traeger (avishay@gmail.com) * Copyright (C) 2008, 2009 * Boaz Harrosh <bharrosh@panasas.com> * @@ -200,7 +198,7 @@ static const struct export_operations exofs_export_ops; /* * Write the superblock to the OSD */ -static int exofs_sync_fs(struct super_block *sb, int wait) +int exofs_sync_fs(struct super_block *sb, int wait) { struct exofs_sb_info *sbi; struct exofs_fscb *fscb; diff --git a/fs/exofs/symlink.c b/fs/exofs/symlink.c index 36e2d7bc7f7..4dd687c3e74 100644 --- a/fs/exofs/symlink.c +++ b/fs/exofs/symlink.c @@ -1,8 +1,6 @@ /* * Copyright (C) 2005, 2006 - * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) - * Copyright (C) 2005, 2006 - * International Business Machines + * Avishay Traeger (avishay@gmail.com) * Copyright (C) 2008, 2009 * Boaz Harrosh <bharrosh@panasas.com> * diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index 6524ecaebb7..e1dedb0f787 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c @@ -66,8 +66,16 @@ static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry, str inode = NULL; if (ino) { inode = ext2_iget(dir->i_sb, ino); - if (IS_ERR(inode)) - return ERR_CAST(inode); + if (unlikely(IS_ERR(inode))) { + if (PTR_ERR(inode) == -ESTALE) { + ext2_error(dir->i_sb, __func__, + "deleted inode referenced: %lu", + ino); + return ERR_PTR(-EIO); + } else { + return ERR_CAST(inode); + } + } } return d_splice_alias(inode, dentry); } diff --git a/fs/fat/file.c b/fs/fat/file.c index b28ea646ff6..f042b965c95 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -134,7 +134,7 @@ static int fat_file_release(struct inode *inode, struct file *filp) if ((filp->f_mode & FMODE_WRITE) && MSDOS_SB(inode->i_sb)->options.flush) { fat_flush_inodes(inode->i_sb, inode, NULL); - congestion_wait(WRITE, HZ/10); + congestion_wait(BLK_RW_ASYNC, HZ/10); } return 0; } diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 8fed2ed12f3..cbceacbc0bf 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -16,6 +16,7 @@ #include <linux/pagemap.h> #include <linux/file.h> #include <linux/slab.h> +#include <linux/blkdev.h> MODULE_ALIAS_MISCDEV(FUSE_MINOR); @@ -286,8 +287,8 @@ __releases(&fc->lock) } if (fc->num_background == FUSE_CONGESTION_THRESHOLD && fc->connected && fc->bdi_initialized) { - clear_bdi_congested(&fc->bdi, READ); - clear_bdi_congested(&fc->bdi, WRITE); + clear_bdi_congested(&fc->bdi, BLK_RW_SYNC); + clear_bdi_congested(&fc->bdi, BLK_RW_ASYNC); } fc->num_background--; fc->active_background--; @@ -414,8 +415,8 @@ static void fuse_request_send_nowait_locked(struct fuse_conn *fc, fc->blocked = 1; if (fc->num_background == FUSE_CONGESTION_THRESHOLD && fc->bdi_initialized) { - set_bdi_congested(&fc->bdi, READ); - set_bdi_congested(&fc->bdi, WRITE); + set_bdi_congested(&fc->bdi, BLK_RW_SYNC); + set_bdi_congested(&fc->bdi, BLK_RW_ASYNC); } list_add_tail(&req->list, &fc->bg_queue); flush_bg_queue(fc); @@ -849,6 +850,81 @@ err: return err; } +static int fuse_notify_inval_inode(struct fuse_conn *fc, unsigned int size, + struct fuse_copy_state *cs) +{ + struct fuse_notify_inval_inode_out outarg; + int err = -EINVAL; + + if (size != sizeof(outarg)) + goto err; + + err = fuse_copy_one(cs, &outarg, sizeof(outarg)); + if (err) + goto err; + fuse_copy_finish(cs); + + down_read(&fc->killsb); + err = -ENOENT; + if (!fc->sb) + goto err_unlock; + + err = fuse_reverse_inval_inode(fc->sb, outarg.ino, + outarg.off, outarg.len); + +err_unlock: + up_read(&fc->killsb); + return err; + +err: + fuse_copy_finish(cs); + return err; +} + +static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size, + struct fuse_copy_state *cs) +{ + struct fuse_notify_inval_entry_out outarg; + int err = -EINVAL; + char buf[FUSE_NAME_MAX+1]; + struct qstr name; + + if (size < sizeof(outarg)) + goto err; + + err = fuse_copy_one(cs, &outarg, sizeof(outarg)); + if (err) + goto err; + + err = -ENAMETOOLONG; + if (outarg.namelen > FUSE_NAME_MAX) + goto err; + + name.name = buf; + name.len = outarg.namelen; + err = fuse_copy_one(cs, buf, outarg.namelen + 1); + if (err) + goto err; + fuse_copy_finish(cs); + buf[outarg.namelen] = 0; + name.hash = full_name_hash(name.name, name.len); + + down_read(&fc->killsb); + err = -ENOENT; + if (!fc->sb) + goto err_unlock; + + err = fuse_reverse_inval_entry(fc->sb, outarg.parent, &name); + +err_unlock: + up_read(&fc->killsb); + return err; + +err: + fuse_copy_finish(cs); + return err; +} + static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code, unsigned int size, struct fuse_copy_state *cs) { @@ -856,6 +932,12 @@ static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code, case FUSE_NOTIFY_POLL: return fuse_notify_poll(fc, size, cs); + case FUSE_NOTIFY_INVAL_INODE: + return fuse_notify_inval_inode(fc, size, cs); + + case FUSE_NOTIFY_INVAL_ENTRY: + return fuse_notify_inval_entry(fc, size, cs); + default: fuse_copy_finish(cs); return -EINVAL; @@ -910,7 +992,7 @@ static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { int err; - unsigned nbytes = iov_length(iov, nr_segs); + size_t nbytes = iov_length(iov, nr_segs); struct fuse_req *req; struct fuse_out_header oh; struct fuse_copy_state cs; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index b3089a083d3..e703654e7f4 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -375,7 +375,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode, struct fuse_conn *fc = get_fuse_conn(dir); struct fuse_req *req; struct fuse_req *forget_req; - struct fuse_open_in inarg; + struct fuse_create_in inarg; struct fuse_open_out outopen; struct fuse_entry_out outentry; struct fuse_file *ff; @@ -399,15 +399,20 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode, if (!ff) goto out_put_request; + if (!fc->dont_mask) + mode &= ~current_umask(); + flags &= ~O_NOCTTY; memset(&inarg, 0, sizeof(inarg)); memset(&outentry, 0, sizeof(outentry)); inarg.flags = flags; inarg.mode = mode; + inarg.umask = current_umask(); req->in.h.opcode = FUSE_CREATE; req->in.h.nodeid = get_node_id(dir); req->in.numargs = 2; - req->in.args[0].size = sizeof(inarg); + req->in.args[0].size = fc->minor < 12 ? sizeof(struct fuse_open_in) : + sizeof(inarg); req->in.args[0].value = &inarg; req->in.args[1].size = entry->d_name.len + 1; req->in.args[1].value = entry->d_name.name; @@ -546,12 +551,17 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, int mode, if (IS_ERR(req)) return PTR_ERR(req); + if (!fc->dont_mask) + mode &= ~current_umask(); + memset(&inarg, 0, sizeof(inarg)); inarg.mode = mode; inarg.rdev = new_encode_dev(rdev); + inarg.umask = current_umask(); req->in.h.opcode = FUSE_MKNOD; req->in.numargs = 2; - req->in.args[0].size = sizeof(inarg); + req->in.args[0].size = fc->minor < 12 ? FUSE_COMPAT_MKNOD_IN_SIZE : + sizeof(inarg); req->in.args[0].value = &inarg; req->in.args[1].size = entry->d_name.len + 1; req->in.args[1].value = entry->d_name.name; @@ -578,8 +588,12 @@ static int fuse_mkdir(struct inode *dir, struct dentry *entry, int mode) if (IS_ERR(req)) return PTR_ERR(req); + if (!fc->dont_mask) + mode &= ~current_umask(); + memset(&inarg, 0, sizeof(inarg)); inarg.mode = mode; + inarg.umask = current_umask(); req->in.h.opcode = FUSE_MKDIR; req->in.numargs = 2; req->in.args[0].size = sizeof(inarg); @@ -845,6 +859,43 @@ int fuse_update_attributes(struct inode *inode, struct kstat *stat, return err; } +int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid, + struct qstr *name) +{ + int err = -ENOTDIR; + struct inode *parent; + struct dentry *dir; + struct dentry *entry; + + parent = ilookup5(sb, parent_nodeid, fuse_inode_eq, &parent_nodeid); + if (!parent) + return -ENOENT; + + mutex_lock(&parent->i_mutex); + if (!S_ISDIR(parent->i_mode)) + goto unlock; + + err = -ENOENT; + dir = d_find_alias(parent); + if (!dir) + goto unlock; + + entry = d_lookup(dir, name); + dput(dir); + if (!entry) + goto unlock; + + fuse_invalidate_attr(parent); + fuse_invalidate_entry(entry); + dput(entry); + err = 0; + + unlock: + mutex_unlock(&parent->i_mutex); + iput(parent); + return err; +} + /* * Calling into a user-controlled filesystem gives the filesystem * daemon ptrace-like capabilities over the requester process. This diff --git a/fs/fuse/file.c b/fs/fuse/file.c index fce6ce694fd..cbc464043b6 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1922,7 +1922,7 @@ unsigned fuse_file_poll(struct file *file, poll_table *wait) req = fuse_get_req(fc); if (IS_ERR(req)) - return PTR_ERR(req); + return POLLERR; req->in.h.opcode = FUSE_POLL; req->in.h.nodeid = ff->nodeid; diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index aaf2f9ff970..52b641fc0fa 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -446,6 +446,9 @@ struct fuse_conn { /** Do multi-page cached writes */ unsigned big_writes:1; + /** Don't apply umask to creation modes */ + unsigned dont_mask:1; + /** The number of requests waiting for completion */ atomic_t num_waiting; @@ -481,6 +484,12 @@ struct fuse_conn { /** Called on final put */ void (*release)(struct fuse_conn *); + + /** Super block for this connection. */ + struct super_block *sb; + + /** Read/write semaphore to hold when accessing sb. */ + struct rw_semaphore killsb; }; static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb) @@ -509,6 +518,11 @@ extern const struct file_operations fuse_dev_operations; extern const struct dentry_operations fuse_dentry_operations; /** + * Inode to nodeid comparison. + */ +int fuse_inode_eq(struct inode *inode, void *_nodeidp); + +/** * Get a filled in inode */ struct inode *fuse_iget(struct super_block *sb, u64 nodeid, @@ -708,6 +722,19 @@ void fuse_release_nowrite(struct inode *inode); u64 fuse_get_attr_version(struct fuse_conn *fc); +/** + * File-system tells the kernel to invalidate cache for the given node id. + */ +int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid, + loff_t offset, loff_t len); + +/** + * File-system tells the kernel to invalidate parent attributes and + * the dentry matching parent/name. + */ +int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid, + struct qstr *name); + int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file, bool isdir); ssize_t fuse_direct_io(struct file *file, const char __user *buf, diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index d8673ccf90b..f91ccc4a189 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -206,7 +206,7 @@ static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr) BUG(); } -static int fuse_inode_eq(struct inode *inode, void *_nodeidp) +int fuse_inode_eq(struct inode *inode, void *_nodeidp) { u64 nodeid = *(u64 *) _nodeidp; if (get_node_id(inode) == nodeid) @@ -257,6 +257,31 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid, return inode; } +int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid, + loff_t offset, loff_t len) +{ + struct inode *inode; + pgoff_t pg_start; + pgoff_t pg_end; + + inode = ilookup5(sb, nodeid, fuse_inode_eq, &nodeid); + if (!inode) + return -ENOENT; + + fuse_invalidate_attr(inode); + if (offset >= 0) { + pg_start = offset >> PAGE_CACHE_SHIFT; + if (len <= 0) + pg_end = -1; + else + pg_end = (offset + len - 1) >> PAGE_CACHE_SHIFT; + invalidate_inode_pages2_range(inode->i_mapping, + pg_start, pg_end); + } + iput(inode); + return 0; +} + static void fuse_umount_begin(struct super_block *sb) { fuse_abort_conn(get_fuse_conn_super(sb)); @@ -480,6 +505,7 @@ void fuse_conn_init(struct fuse_conn *fc) memset(fc, 0, sizeof(*fc)); spin_lock_init(&fc->lock); mutex_init(&fc->inst_mutex); + init_rwsem(&fc->killsb); atomic_set(&fc->count, 1); init_waitqueue_head(&fc->waitq); init_waitqueue_head(&fc->blocked_waitq); @@ -725,6 +751,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) } if (arg->flags & FUSE_BIG_WRITES) fc->big_writes = 1; + if (arg->flags & FUSE_DONT_MASK) + fc->dont_mask = 1; } else { ra_pages = fc->max_read / PAGE_CACHE_SIZE; fc->no_lock = 1; @@ -748,7 +776,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req) arg->minor = FUSE_KERNEL_MINOR_VERSION; arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE; arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC | - FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES; + FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK; req->in.h.opcode = FUSE_INIT; req->in.numargs = 1; req->in.args[0].size = sizeof(*arg); @@ -860,10 +888,16 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) fuse_conn_init(fc); fc->dev = sb->s_dev; + fc->sb = sb; err = fuse_bdi_init(fc, sb); if (err) goto err_put_conn; + /* Handle umasking inside the fuse code */ + if (sb->s_flags & MS_POSIXACL) + fc->dont_mask = 1; + sb->s_flags |= MS_POSIXACL; + fc->release = fuse_free_conn; fc->flags = d.flags; fc->user_id = d.user_id; @@ -941,12 +975,25 @@ static int fuse_get_sb(struct file_system_type *fs_type, return get_sb_nodev(fs_type, flags, raw_data, fuse_fill_super, mnt); } +static void fuse_kill_sb_anon(struct super_block *sb) +{ + struct fuse_conn *fc = get_fuse_conn_super(sb); + + if (fc) { + down_write(&fc->killsb); + fc->sb = NULL; + up_write(&fc->killsb); + } + + kill_anon_super(sb); +} + static struct file_system_type fuse_fs_type = { .owner = THIS_MODULE, .name = "fuse", .fs_flags = FS_HAS_SUBTYPE, .get_sb = fuse_get_sb, - .kill_sb = kill_anon_super, + .kill_sb = fuse_kill_sb_anon, }; #ifdef CONFIG_BLOCK @@ -958,11 +1005,24 @@ static int fuse_get_sb_blk(struct file_system_type *fs_type, mnt); } +static void fuse_kill_sb_blk(struct super_block *sb) +{ + struct fuse_conn *fc = get_fuse_conn_super(sb); + + if (fc) { + down_write(&fc->killsb); + fc->sb = NULL; + up_write(&fc->killsb); + } + + kill_block_super(sb); +} + static struct file_system_type fuseblk_fs_type = { .owner = THIS_MODULE, .name = "fuseblk", .get_sb = fuse_get_sb_blk, - .kill_sb = kill_block_super, + .kill_sb = fuse_kill_sb_blk, .fs_flags = FS_REQUIRES_DEV | FS_HAS_SUBTYPE, }; diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index fe02ad4740e..032604e5ef2 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -972,6 +972,7 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent) sb->s_blocksize_bits = 10; sb->s_magic = HOSTFS_SUPER_MAGIC; sb->s_op = &hostfs_sbops; + sb->s_maxbytes = MAX_LFS_FILESIZE; /* NULL is printed as <NULL> by sprintf: avoid that. */ if (req_root == NULL) diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 58a7963e168..85f96bc651c 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -142,6 +142,7 @@ static const struct dentry_operations isofs_dentry_ops[] = { struct iso9660_options{ unsigned int rock:1; + unsigned int joliet:1; unsigned int cruft:1; unsigned int hide:1; unsigned int showassoc:1; @@ -151,7 +152,6 @@ struct iso9660_options{ unsigned int gid_set:1; unsigned int utf8:1; unsigned char map; - char joliet; unsigned char check; unsigned int blocksize; mode_t fmode; @@ -632,7 +632,7 @@ static int isofs_fill_super(struct super_block *s, void *data, int silent) else if (isonum_711(vdp->type) == ISO_VD_SUPPLEMENTARY) { sec = (struct iso_supplementary_descriptor *)vdp; if (sec->escape[0] == 0x25 && sec->escape[1] == 0x2f) { - if (opt.joliet == 'y') { + if (opt.joliet) { if (sec->escape[2] == 0x40) joliet_level = 1; else if (sec->escape[2] == 0x43) diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c index a0244740b75..b47679be118 100644 --- a/fs/jffs2/erase.c +++ b/fs/jffs2/erase.c @@ -270,19 +270,21 @@ static inline void jffs2_remove_node_refs_from_ino_list(struct jffs2_sb_info *c, D2({ int i=0; struct jffs2_raw_node_ref *this; - printk(KERN_DEBUG "After remove_node_refs_from_ino_list: \n" KERN_DEBUG); + printk(KERN_DEBUG "After remove_node_refs_from_ino_list: \n"); this = ic->nodes; + printk(KERN_DEBUG); while(this) { - printk( "0x%08x(%d)->", ref_offset(this), ref_flags(this)); + printk(KERN_CONT "0x%08x(%d)->", + ref_offset(this), ref_flags(this)); if (++i == 5) { - printk("\n" KERN_DEBUG); + printk(KERN_DEBUG); i=0; } this = this->next_in_ino; } - printk("\n"); + printk(KERN_CONT "\n"); }); switch (ic->class) { diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c index 7515e73e2bf..696686cc206 100644 --- a/fs/jffs2/scan.c +++ b/fs/jffs2/scan.c @@ -130,9 +130,9 @@ int jffs2_scan_medium(struct jffs2_sb_info *c) if (jffs2_sum_active()) { s = kzalloc(sizeof(struct jffs2_summary), GFP_KERNEL); if (!s) { - kfree(flashbuf); JFFS2_WARNING("Can't allocate memory for summary\n"); - return -ENOMEM; + ret = -ENOMEM; + goto out; } } diff --git a/fs/namei.c b/fs/namei.c index 5b961eb71cb..f3c5b278895 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1761,6 +1761,10 @@ do_last: goto exit; } filp = nameidata_to_filp(&nd, open_flag); + if (IS_ERR(filp)) + ima_counts_put(&nd.path, + acc_mode & (MAY_READ | MAY_WRITE | + MAY_EXEC)); mnt_drop_write(nd.path.mnt); if (nd.root.mnt) path_put(&nd.root); @@ -1817,6 +1821,9 @@ ok: goto exit; } filp = nameidata_to_filp(&nd, open_flag); + if (IS_ERR(filp)) + ima_counts_put(&nd.path, + acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC)); /* * It is now safe to drop the mnt write * because the filp has had a write taken diff --git a/fs/namespace.c b/fs/namespace.c index 3dc283fd471..277c28a63ea 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -22,6 +22,7 @@ #include <linux/seq_file.h> #include <linux/mnt_namespace.h> #include <linux/namei.h> +#include <linux/nsproxy.h> #include <linux/security.h> #include <linux/mount.h> #include <linux/ramfs.h> diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index 46177cb8706..b35d2a61606 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c @@ -30,7 +30,6 @@ #include <linux/nfs_idmap.h> #include <linux/vfs.h> #include <linux/namei.h> -#include <linux/mnt_namespace.h> #include <linux/security.h> #include <asm/system.h> diff --git a/fs/nfs/write.c b/fs/nfs/write.c index ce728829f79..35d81316163 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -19,6 +19,7 @@ #include <linux/nfs_mount.h> #include <linux/nfs_page.h> #include <linux/backing-dev.h> +#include <linux/blkdev.h> #include <asm/uaccess.h> @@ -202,8 +203,10 @@ static int nfs_set_page_writeback(struct page *page) struct nfs_server *nfss = NFS_SERVER(inode); if (atomic_long_inc_return(&nfss->writeback) > - NFS_CONGESTION_ON_THRESH) - set_bdi_congested(&nfss->backing_dev_info, WRITE); + NFS_CONGESTION_ON_THRESH) { + set_bdi_congested(&nfss->backing_dev_info, + BLK_RW_ASYNC); + } } return ret; } @@ -215,7 +218,7 @@ static void nfs_end_page_writeback(struct page *page) end_page_writeback(page); if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) - clear_bdi_congested(&nfss->backing_dev_info, WRITE); + clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC); } /* diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 4145083dcf8..23341c1063b 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -678,7 +678,6 @@ __be32 nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access, struct file **filp) { - const struct cred *cred = current_cred(); struct dentry *dentry; struct inode *inode; int flags = O_RDONLY|O_LARGEFILE; @@ -733,7 +732,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, vfs_dq_init(inode); } *filp = dentry_open(dget(dentry), mntget(fhp->fh_export->ex_path.mnt), - flags, cred); + flags, current_cred()); if (IS_ERR(*filp)) host_err = PTR_ERR(*filp); else diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c index 36df60b6d8a..99d58a028b9 100644 --- a/fs/nilfs2/bmap.c +++ b/fs/nilfs2/bmap.c @@ -568,6 +568,7 @@ void nilfs_bmap_abort_update_v(struct nilfs_bmap *bmap, } static struct lock_class_key nilfs_bmap_dat_lock_key; +static struct lock_class_key nilfs_bmap_mdt_lock_key; /** * nilfs_bmap_read - read a bmap from an inode @@ -603,7 +604,11 @@ int nilfs_bmap_read(struct nilfs_bmap *bmap, struct nilfs_inode *raw_inode) bmap->b_ptr_type = NILFS_BMAP_PTR_VS; bmap->b_last_allocated_key = 0; bmap->b_last_allocated_ptr = NILFS_BMAP_INVALID_PTR; + lockdep_set_class(&bmap->b_sem, &nilfs_bmap_mdt_lock_key); break; + case NILFS_IFILE_INO: + lockdep_set_class(&bmap->b_sem, &nilfs_bmap_mdt_lock_key); + /* Fall through */ default: bmap->b_ptr_type = NILFS_BMAP_PTR_VM; bmap->b_last_allocated_key = 0; diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c index 7d49813f66d..aec942cf79e 100644 --- a/fs/nilfs2/cpfile.c +++ b/fs/nilfs2/cpfile.c @@ -307,7 +307,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile, ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh); if (ret < 0) { if (ret != -ENOENT) - goto out_header; + break; /* skip hole */ ret = 0; continue; @@ -340,7 +340,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile, continue; printk(KERN_ERR "%s: cannot delete block\n", __func__); - goto out_header; + break; } } @@ -358,7 +358,6 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile, kunmap_atomic(kaddr, KM_USER0); } - out_header: brelse(header_bh); out_sem: diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c index 0b2710e2d56..8927ca27e6f 100644 --- a/fs/nilfs2/dat.c +++ b/fs/nilfs2/dat.c @@ -134,15 +134,6 @@ void nilfs_dat_commit_start(struct inode *dat, struct nilfs_palloc_req *req, entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, req->pr_entry_bh, kaddr); entry->de_start = cpu_to_le64(nilfs_mdt_cno(dat)); - if (entry->de_blocknr != cpu_to_le64(0) || - entry->de_end != cpu_to_le64(NILFS_CNO_MAX)) { - printk(KERN_CRIT - "%s: vbn = %llu, start = %llu, end = %llu, pbn = %llu\n", - __func__, (unsigned long long)req->pr_entry_nr, - (unsigned long long)le64_to_cpu(entry->de_start), - (unsigned long long)le64_to_cpu(entry->de_end), - (unsigned long long)le64_to_cpu(entry->de_blocknr)); - } entry->de_blocknr = cpu_to_le64(blocknr); kunmap_atomic(kaddr, KM_USER0); diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index aa977549919..8b5e4778cf2 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -1829,26 +1829,13 @@ static int nilfs_segctor_write(struct nilfs_sc_info *sci, err = nilfs_segbuf_write(segbuf, &wi); res = nilfs_segbuf_wait(segbuf, &wi); - err = unlikely(err) ? : res; - if (unlikely(err)) + err = err ? : res; + if (err) return err; } return 0; } -static int nilfs_page_has_uncleared_buffer(struct page *page) -{ - struct buffer_head *head, *bh; - - head = bh = page_buffers(page); - do { - if (buffer_dirty(bh) && !list_empty(&bh->b_assoc_buffers)) - return 1; - bh = bh->b_this_page; - } while (bh != head); - return 0; -} - static void __nilfs_end_page_io(struct page *page, int err) { if (!err) { @@ -1872,12 +1859,11 @@ static void nilfs_end_page_io(struct page *page, int err) if (!page) return; - if (buffer_nilfs_node(page_buffers(page)) && - nilfs_page_has_uncleared_buffer(page)) - /* For b-tree node pages, this function may be called twice - or more because they might be split in a segment. - This check assures that cleanup has been done for all - buffers in a split btnode page. */ + if (buffer_nilfs_node(page_buffers(page)) && !PageWriteback(page)) + /* + * For b-tree node pages, this function may be called twice + * or more because they might be split in a segment. + */ return; __nilfs_end_page_io(page, err); @@ -1940,7 +1926,7 @@ static void nilfs_segctor_abort_write(struct nilfs_sc_info *sci, } if (bh->b_page != fs_page) { nilfs_end_page_io(fs_page, err); - if (unlikely(fs_page == failed_page)) + if (fs_page && fs_page == failed_page) goto done; fs_page = bh->b_page; } diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index ff231ad2389..ff27a296584 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -296,12 +296,15 @@ static int inotify_fasync(int fd, struct file *file, int on) static int inotify_release(struct inode *ignored, struct file *file) { struct fsnotify_group *group = file->private_data; + struct user_struct *user = group->inotify_data.user; fsnotify_clear_marks_by_group(group); /* free this group, matching get was inotify_init->fsnotify_obtain_group */ fsnotify_put_group(group); + atomic_dec(&user->inotify_devs); + return 0; } diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 607c579e5ec..70f36c043d6 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -2042,8 +2042,8 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id, * changes */ invalidate_bdev(sb->s_bdev); } - mutex_lock(&inode->i_mutex); mutex_lock(&dqopt->dqonoff_mutex); + mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA); if (sb_has_quota_loaded(sb, type)) { error = -EBUSY; goto out_lock; @@ -2094,7 +2094,6 @@ out_file_init: dqopt->files[type] = NULL; iput(inode); out_lock: - mutex_unlock(&dqopt->dqonoff_mutex); if (oldflags != -1) { down_write(&dqopt->dqptr_sem); /* Set the flags back (in the case of accidental quotaon() @@ -2104,6 +2103,7 @@ out_lock: up_write(&dqopt->dqptr_sem); } mutex_unlock(&inode->i_mutex); + mutex_unlock(&dqopt->dqonoff_mutex); out_fmt: put_quota_format(fmt); diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 77f5bb746bf..90622200b39 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -997,7 +997,7 @@ static int reiserfs_async_progress_wait(struct super_block *s) DEFINE_WAIT(wait); struct reiserfs_journal *j = SB_JOURNAL(s); if (atomic_read(&j->j_async_throttle)) - congestion_wait(WRITE, HZ / 10); + congestion_wait(BLK_RW_ASYNC, HZ / 10); return 0; } diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index d3aeb061612..7adea74d6a8 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -24,7 +24,6 @@ #include <linux/exportfs.h> #include <linux/quotaops.h> #include <linux/vfs.h> -#include <linux/mnt_namespace.h> #include <linux/mount.h> #include <linux/namei.h> #include <linux/crc32.h> diff --git a/fs/sync.c b/fs/sync.c index dd200025af8..3422ba61d86 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -112,8 +112,13 @@ restart: mutex_unlock(&mutex); } +/* + * sync everything. Start out by waking pdflush, because that writes back + * all queues in parallel. + */ SYSCALL_DEFINE0(sync) { + wakeup_pdflush(0); sync_filesystems(0); sync_filesystems(1); if (unlikely(laptop_mode)) diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c index 9345806c885..2524714bece 100644 --- a/fs/sysfs/bin.c +++ b/fs/sysfs/bin.c @@ -171,6 +171,7 @@ static ssize_t write(struct file *file, const char __user *userbuf, if (count > 0) *off = offs + count; + kfree(temp); return count; } diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c index bc5857199ec..762a7d6cec7 100644 --- a/fs/ubifs/io.c +++ b/fs/ubifs/io.c @@ -297,6 +297,7 @@ static enum hrtimer_restart wbuf_timer_callback_nolock(struct hrtimer *timer) { struct ubifs_wbuf *wbuf = container_of(timer, struct ubifs_wbuf, timer); + dbg_io("jhead %d", wbuf->jhead); wbuf->need_sync = 1; wbuf->c->need_wbuf_sync = 1; ubifs_wake_up_bgt(wbuf->c); @@ -311,8 +312,12 @@ static void new_wbuf_timer_nolock(struct ubifs_wbuf *wbuf) { ubifs_assert(!hrtimer_active(&wbuf->timer)); - if (!ktime_to_ns(wbuf->softlimit)) + if (wbuf->no_timer) return; + dbg_io("set timer for jhead %d, %llu-%llu millisecs", wbuf->jhead, + div_u64(ktime_to_ns(wbuf->softlimit), USEC_PER_SEC), + div_u64(ktime_to_ns(wbuf->softlimit) + wbuf->delta, + USEC_PER_SEC)); hrtimer_start_range_ns(&wbuf->timer, wbuf->softlimit, wbuf->delta, HRTIMER_MODE_REL); } @@ -323,11 +328,8 @@ static void new_wbuf_timer_nolock(struct ubifs_wbuf *wbuf) */ static void cancel_wbuf_timer_nolock(struct ubifs_wbuf *wbuf) { - /* - * If the syncer is waiting for the lock (from the background thread's - * context) and another task is changing write-buffer then the syncing - * should be canceled. - */ + if (wbuf->no_timer) + return; wbuf->need_sync = 0; hrtimer_cancel(&wbuf->timer); } @@ -349,8 +351,8 @@ int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf) /* Write-buffer is empty or not seeked */ return 0; - dbg_io("LEB %d:%d, %d bytes", - wbuf->lnum, wbuf->offs, wbuf->used); + dbg_io("LEB %d:%d, %d bytes, jhead %d", + wbuf->lnum, wbuf->offs, wbuf->used, wbuf->jhead); ubifs_assert(!(c->vfs_sb->s_flags & MS_RDONLY)); ubifs_assert(!(wbuf->avail & 7)); ubifs_assert(wbuf->offs + c->min_io_size <= c->leb_size); @@ -390,7 +392,7 @@ int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf) * @offs: logical eraseblock offset to seek to * @dtype: data type * - * This function targets the write buffer to logical eraseblock @lnum:@offs. + * This function targets the write-buffer to logical eraseblock @lnum:@offs. * The write-buffer is synchronized if it is not empty. Returns zero in case of * success and a negative error code in case of failure. */ @@ -399,7 +401,7 @@ int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs, { const struct ubifs_info *c = wbuf->c; - dbg_io("LEB %d:%d", lnum, offs); + dbg_io("LEB %d:%d, jhead %d", lnum, offs, wbuf->jhead); ubifs_assert(lnum >= 0 && lnum < c->leb_cnt); ubifs_assert(offs >= 0 && offs <= c->leb_size); ubifs_assert(offs % c->min_io_size == 0 && !(offs & 7)); @@ -506,9 +508,9 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) struct ubifs_info *c = wbuf->c; int err, written, n, aligned_len = ALIGN(len, 8), offs; - dbg_io("%d bytes (%s) to wbuf at LEB %d:%d", len, - dbg_ntype(((struct ubifs_ch *)buf)->node_type), wbuf->lnum, - wbuf->offs + wbuf->used); + dbg_io("%d bytes (%s) to jhead %d wbuf at LEB %d:%d", len, + dbg_ntype(((struct ubifs_ch *)buf)->node_type), wbuf->jhead, + wbuf->lnum, wbuf->offs + wbuf->used); ubifs_assert(len > 0 && wbuf->lnum >= 0 && wbuf->lnum < c->leb_cnt); ubifs_assert(wbuf->offs >= 0 && wbuf->offs % c->min_io_size == 0); ubifs_assert(!(wbuf->offs & 7) && wbuf->offs <= c->leb_size); @@ -533,8 +535,8 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) memcpy(wbuf->buf + wbuf->used, buf, len); if (aligned_len == wbuf->avail) { - dbg_io("flush wbuf to LEB %d:%d", wbuf->lnum, - wbuf->offs); + dbg_io("flush jhead %d wbuf to LEB %d:%d", + wbuf->jhead, wbuf->lnum, wbuf->offs); err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs, c->min_io_size, wbuf->dtype); @@ -562,7 +564,8 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) * minimal I/O unit. We have to fill and flush write-buffer and switch * to the next min. I/O unit. */ - dbg_io("flush wbuf to LEB %d:%d", wbuf->lnum, wbuf->offs); + dbg_io("flush jhead %d wbuf to LEB %d:%d", + wbuf->jhead, wbuf->lnum, wbuf->offs); memcpy(wbuf->buf + wbuf->used, buf, wbuf->avail); err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs, c->min_io_size, wbuf->dtype); @@ -695,7 +698,8 @@ int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len, int err, rlen, overlap; struct ubifs_ch *ch = buf; - dbg_io("LEB %d:%d, %s, length %d", lnum, offs, dbg_ntype(type), len); + dbg_io("LEB %d:%d, %s, length %d, jhead %d", lnum, offs, + dbg_ntype(type), len, wbuf->jhead); ubifs_assert(wbuf && lnum >= 0 && lnum < c->leb_cnt && offs >= 0); ubifs_assert(!(offs & 7) && offs < c->leb_size); ubifs_assert(type >= 0 && type < UBIFS_NODE_TYPES_CNT); @@ -819,13 +823,12 @@ out: * @c: UBIFS file-system description object * @wbuf: write-buffer to initialize * - * This function initializes write buffer. Returns zero in case of success + * This function initializes write-buffer. Returns zero in case of success * %-ENOMEM in case of failure. */ int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf) { size_t size; - ktime_t hardlimit; wbuf->buf = kmalloc(c->min_io_size, GFP_KERNEL); if (!wbuf->buf) @@ -851,22 +854,16 @@ int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf) hrtimer_init(&wbuf->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); wbuf->timer.function = wbuf_timer_callback_nolock; - /* - * Make write-buffer soft limit to be 20% of the hard limit. The - * write-buffer timer is allowed to expire any time between the soft - * and hard limits. - */ - hardlimit = ktime_set(DEFAULT_WBUF_TIMEOUT_SECS, 0); - wbuf->delta = (DEFAULT_WBUF_TIMEOUT_SECS * NSEC_PER_SEC) * 2 / 10; - wbuf->softlimit = ktime_sub_ns(hardlimit, wbuf->delta); - hrtimer_set_expires_range_ns(&wbuf->timer, wbuf->softlimit, - wbuf->delta); + wbuf->softlimit = ktime_set(WBUF_TIMEOUT_SOFTLIMIT, 0); + wbuf->delta = WBUF_TIMEOUT_HARDLIMIT - WBUF_TIMEOUT_SOFTLIMIT; + wbuf->delta *= 1000000000ULL; + ubifs_assert(wbuf->delta <= ULONG_MAX); return 0; } /** * ubifs_wbuf_add_ino_nolock - add an inode number into the wbuf inode array. - * @wbuf: the write-buffer whereto add + * @wbuf: the write-buffer where to add * @inum: the inode number * * This function adds an inode number to the inode array of the write-buffer. diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c index 805605250f1..e5f6cf8a115 100644 --- a/fs/ubifs/recovery.c +++ b/fs/ubifs/recovery.c @@ -53,6 +53,25 @@ static int is_empty(void *buf, int len) } /** + * first_non_ff - find offset of the first non-0xff byte. + * @buf: buffer to search in + * @len: length of buffer + * + * This function returns offset of the first non-0xff byte in @buf or %-1 if + * the buffer contains only 0xff bytes. + */ +static int first_non_ff(void *buf, int len) +{ + uint8_t *p = buf; + int i; + + for (i = 0; i < len; i++) + if (*p++ != 0xff) + return i; + return -1; +} + +/** * get_master_node - get the last valid master node allowing for corruption. * @c: UBIFS file-system description object * @lnum: LEB number @@ -357,11 +376,7 @@ static int is_last_write(const struct ubifs_info *c, void *buf, int offs) empty_offs = ALIGN(offs + 1, c->min_io_size); check_len = c->leb_size - empty_offs; p = buf + empty_offs - offs; - - for (; check_len > 0; check_len--) - if (*p++ != 0xff) - return 0; - return 1; + return is_empty(p, check_len); } /** @@ -543,8 +558,8 @@ static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs) * * This function does a scan of a LEB, but caters for errors that might have * been caused by the unclean unmount from which we are attempting to recover. - * - * This function returns %0 on success and a negative error code on failure. + * Returns %0 in case of success, %-EUCLEAN if an unrecoverable corruption is + * found, and a negative error code in case of failure. */ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf, int grouped) @@ -643,7 +658,8 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, goto corrupted; default: dbg_err("unknown"); - goto corrupted; + err = -EINVAL; + goto error; } } @@ -652,8 +668,13 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, clean_buf(c, &buf, lnum, &offs, &len); need_clean = 1; } else { - ubifs_err("corrupt empty space at LEB %d:%d", - lnum, offs); + int corruption = first_non_ff(buf, len); + + ubifs_err("corrupt empty space LEB %d:%d, corruption " + "starts at %d", lnum, offs, corruption); + /* Make sure we dump interesting non-0xFF data */ + offs = corruption; + buf += corruption; goto corrupted; } } @@ -813,7 +834,7 @@ struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum, static int recover_head(const struct ubifs_info *c, int lnum, int offs, void *sbuf) { - int len, err, need_clean = 0; + int len, err; if (c->min_io_size > 1) len = c->min_io_size; @@ -827,19 +848,7 @@ static int recover_head(const struct ubifs_info *c, int lnum, int offs, /* Read at the head location and check it is empty flash */ err = ubi_read(c->ubi, lnum, sbuf, offs, len); - if (err) - need_clean = 1; - else { - uint8_t *p = sbuf; - - while (len--) - if (*p++ != 0xff) { - need_clean = 1; - break; - } - } - - if (need_clean) { + if (err || !is_empty(sbuf, len)) { dbg_rcvry("cleaning head at %d:%d", lnum, offs); if (offs == 0) return ubifs_leb_unmap(c, lnum); diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c index 11cc80125a4..2970500f32d 100644 --- a/fs/ubifs/replay.c +++ b/fs/ubifs/replay.c @@ -837,9 +837,10 @@ static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf) dbg_mnt("replay log LEB %d:%d", lnum, offs); sleb = ubifs_scan(c, lnum, offs, sbuf); - if (IS_ERR(sleb)) { - if (c->need_recovery) - sleb = ubifs_recover_log_leb(c, lnum, offs, sbuf); + if (IS_ERR(sleb) ) { + if (PTR_ERR(sleb) != -EUCLEAN || !c->need_recovery) + return PTR_ERR(sleb); + sleb = ubifs_recover_log_leb(c, lnum, offs, sbuf); if (IS_ERR(sleb)) return PTR_ERR(sleb); } @@ -957,7 +958,7 @@ out: return err; out_dump: - ubifs_err("log error detected while replying the log at LEB %d:%d", + ubifs_err("log error detected while replaying the log at LEB %d:%d", lnum, offs + snod->offs); dbg_dump_node(c, snod->node); ubifs_scan_destroy(sleb); diff --git a/fs/ubifs/scan.c b/fs/ubifs/scan.c index 0ed82479b44..892ebfee4fe 100644 --- a/fs/ubifs/scan.c +++ b/fs/ubifs/scan.c @@ -238,12 +238,12 @@ void ubifs_scanned_corruption(const struct ubifs_info *c, int lnum, int offs, { int len; - ubifs_err("corrupted data at LEB %d:%d", lnum, offs); + ubifs_err("corruption at LEB %d:%d", lnum, offs); if (dbg_failure_mode) return; len = c->leb_size - offs; - if (len > 4096) - len = 4096; + if (len > 8192) + len = 8192; dbg_err("first %d bytes from LEB %d:%d", len, lnum, offs); print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 4, buf, len, 1); } @@ -256,7 +256,9 @@ void ubifs_scanned_corruption(const struct ubifs_info *c, int lnum, int offs, * @sbuf: scan buffer (must be c->leb_size) * * This function scans LEB number @lnum and returns complete information about - * its contents. Returns an error code in case of failure. + * its contents. Returns the scaned information in case of success and, + * %-EUCLEAN if the LEB neads recovery, and other negative error codes in case + * of failure. */ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum, int offs, void *sbuf) @@ -279,7 +281,6 @@ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum, cond_resched(); ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 0); - if (ret > 0) { /* Padding bytes or a valid padding node */ offs += ret; @@ -304,7 +305,8 @@ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum, goto corrupted; default: dbg_err("unknown"); - goto corrupted; + err = -EINVAL; + goto error; } err = ubifs_add_snod(c, sleb, buf, offs); @@ -317,8 +319,10 @@ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum, len -= node_len; } - if (offs % c->min_io_size) - goto corrupted; + if (offs % c->min_io_size) { + ubifs_err("empty space starts at non-aligned offset %d", offs); + goto corrupted;; + } ubifs_end_scan(c, sleb, lnum, offs); diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 79fad43f3c5..26d2e0d8046 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -797,7 +797,7 @@ static int alloc_wbufs(struct ubifs_info *c) * does not need to be synchronized by timer. */ c->jheads[GCHD].wbuf.dtype = UBI_LONGTERM; - c->jheads[GCHD].wbuf.softlimit = ktime_set(0, 0); + c->jheads[GCHD].wbuf.no_timer = 1; return 0; } @@ -986,7 +986,7 @@ static int ubifs_parse_options(struct ubifs_info *c, char *options, switch (token) { /* * %Opt_fast_unmount and %Opt_norm_unmount options are ignored. - * We accepte them in order to be backware-compatible. But this + * We accept them in order to be backward-compatible. But this * should be removed at some point. */ case Opt_fast_unmount: @@ -1287,6 +1287,9 @@ static int mount_ubifs(struct ubifs_info *c) if (err) goto out_journal; + /* Calculate 'min_idx_lebs' after journal replay */ + c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); + err = ubifs_mount_orphans(c, c->need_recovery, mounted_read_only); if (err) goto out_orphans; @@ -1754,10 +1757,8 @@ static void ubifs_put_super(struct super_block *sb) /* Synchronize write-buffers */ if (c->jheads) - for (i = 0; i < c->jhead_cnt; i++) { + for (i = 0; i < c->jhead_cnt; i++) ubifs_wbuf_sync(&c->jheads[i].wbuf); - hrtimer_cancel(&c->jheads[i].wbuf.timer); - } /* * On fatal errors c->ro_media is set to 1, in which case we do @@ -1975,7 +1976,8 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent) err = bdi_init(&c->bdi); if (err) goto out_close; - err = bdi_register(&c->bdi, NULL, "ubifs"); + err = bdi_register(&c->bdi, NULL, "ubifs_%d_%d", + c->vi.ubi_num, c->vi.vol_id); if (err) goto out_bdi; diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 1bf01d82006..a2934909442 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -95,8 +95,9 @@ */ #define BGT_NAME_PATTERN "ubifs_bgt%d_%d" -/* Default write-buffer synchronization timeout in seconds */ -#define DEFAULT_WBUF_TIMEOUT_SECS 5 +/* Write-buffer synchronization timeout interval in seconds */ +#define WBUF_TIMEOUT_SOFTLIMIT 3 +#define WBUF_TIMEOUT_HARDLIMIT 5 /* Maximum possible inode number (only 32-bit inodes are supported now) */ #define MAX_INUM 0xFFFFFFFF @@ -654,7 +655,8 @@ typedef int (*ubifs_lpt_scan_callback)(struct ubifs_info *c, * @delta: hard and soft timeouts delta (the timer expire inteval is @softlimit * and @softlimit + @delta) * @timer: write-buffer timer - * @need_sync: it is set if its timer expired and needs sync + * @no_timer: non-zero if this write-buffer does not have a timer + * @need_sync: non-zero if the timer expired and the wbuf needs sync'ing * @next_ino: points to the next position of the following inode number * @inodes: stores the inode numbers of the nodes which are in wbuf * @@ -683,7 +685,8 @@ struct ubifs_wbuf { ktime_t softlimit; unsigned long long delta; struct hrtimer timer; - int need_sync; + unsigned int no_timer:1; + unsigned int need_sync:1; int next_ino; ino_t *inodes; }; diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c index 1cd3b55ee3d..2d3f90afe5f 100644 --- a/fs/xfs/linux-2.6/kmem.c +++ b/fs/xfs/linux-2.6/kmem.c @@ -53,7 +53,7 @@ kmem_alloc(size_t size, unsigned int __nocast flags) printk(KERN_ERR "XFS: possible memory allocation " "deadlock in %s (mode:0x%x)\n", __func__, lflags); - congestion_wait(WRITE, HZ/50); + congestion_wait(BLK_RW_ASYNC, HZ/50); } while (1); } @@ -130,7 +130,7 @@ kmem_zone_alloc(kmem_zone_t *zone, unsigned int __nocast flags) printk(KERN_ERR "XFS: possible memory allocation " "deadlock in %s (mode:0x%x)\n", __func__, lflags); - congestion_wait(WRITE, HZ/50); + congestion_wait(BLK_RW_ASYNC, HZ/50); } while (1); } diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 1418b916fc2..0c93c7ef3d1 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -412,7 +412,7 @@ _xfs_buf_lookup_pages( XFS_STATS_INC(xb_page_retries); xfsbufd_wakeup(0, gfp_mask); - congestion_wait(WRITE, HZ/50); + congestion_wait(BLK_RW_ASYNC, HZ/50); goto retry; } |