From df46b9a44ceb5af2ea2351ce8e28ae7bd840b00f Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Mon, 20 Jun 2005 14:04:44 +0200 Subject: [PATCH] Add blk_rq_map_kern() Add blk_rq_map_kern which takes a kernel buffer and maps it into a request and bio. This can be used by the dm hw_handlers, old sg_scsi_ioctl, and one day scsi special requests so all requests comming into scsi will have bios. All requests having bios should allow scsi to use scatter lists for all IO and allow it to use block layer functions. Signed-off-by: Jens Axboe --- fs/bio.c | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) (limited to 'fs') diff --git a/fs/bio.c b/fs/bio.c index 3a1472acc36..707b9af2dd0 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -701,6 +701,71 @@ void bio_unmap_user(struct bio *bio) bio_put(bio); } +static struct bio *__bio_map_kern(request_queue_t *q, void *data, + unsigned int len, unsigned int gfp_mask) +{ + unsigned long kaddr = (unsigned long)data; + unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; + unsigned long start = kaddr >> PAGE_SHIFT; + const int nr_pages = end - start; + int offset, i; + struct bio *bio; + + bio = bio_alloc(gfp_mask, nr_pages); + if (!bio) + return ERR_PTR(-ENOMEM); + + offset = offset_in_page(kaddr); + for (i = 0; i < nr_pages; i++) { + unsigned int bytes = PAGE_SIZE - offset; + + if (len <= 0) + break; + + if (bytes > len) + bytes = len; + + if (__bio_add_page(q, bio, virt_to_page(data), bytes, + offset) < bytes) + break; + + data += bytes; + len -= bytes; + offset = 0; + } + + return bio; +} + +/** + * bio_map_kern - map kernel address into bio + * @q: the request_queue_t for the bio + * @data: pointer to buffer to map + * @len: length in bytes + * @gfp_mask: allocation flags for bio allocation + * + * Map the kernel address into a bio suitable for io to a block + * device. Returns an error pointer in case of error. + */ +struct bio *bio_map_kern(request_queue_t *q, void *data, unsigned int len, + unsigned int gfp_mask) +{ + struct bio *bio; + + bio = __bio_map_kern(q, data, len, gfp_mask); + if (IS_ERR(bio)) + return bio; + + if (bio->bi_size == len) + return bio; + + /* + * Don't support partial mappings. + */ + bio_put(bio); + return ERR_PTR(-EINVAL); +} + /* * bio_set_pages_dirty() and bio_check_pages_dirty() are support functions * for performing direct-IO in BIOs. @@ -1088,6 +1153,7 @@ EXPORT_SYMBOL(bio_add_page); EXPORT_SYMBOL(bio_get_nr_vecs); EXPORT_SYMBOL(bio_map_user); EXPORT_SYMBOL(bio_unmap_user); +EXPORT_SYMBOL(bio_map_kern); EXPORT_SYMBOL(bio_pair_release); EXPORT_SYMBOL(bio_split); EXPORT_SYMBOL(bio_split_pool); -- cgit v1.2.3-70-g09d2 From b823825e8e09aac6dc1ca362cd5639a87329d636 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 20 Jun 2005 14:05:27 +0200 Subject: [PATCH] Keep the bio end_io parts inside of bio.c for blk_rq_map_kern() Signed-off-by: Jens Axboe --- drivers/block/ll_rw_blk.c | 11 ----------- fs/bio.c | 11 +++++++++++ 2 files changed, 11 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c index e30a3c93b70..1471aca6fa1 100644 --- a/drivers/block/ll_rw_blk.c +++ b/drivers/block/ll_rw_blk.c @@ -2177,16 +2177,6 @@ int blk_rq_unmap_user(struct request *rq, struct bio *bio, unsigned int ulen) EXPORT_SYMBOL(blk_rq_unmap_user); -static int blk_rq_map_kern_endio(struct bio *bio, unsigned int bytes_done, - int error) -{ - if (bio->bi_size) - return 1; - - bio_put(bio); - return 0; -} - /** * blk_rq_map_kern - map kernel data to a request, for REQ_BLOCK_PC usage * @q: request queue where request should be inserted @@ -2213,7 +2203,6 @@ struct request *blk_rq_map_kern(request_queue_t *q, int rw, void *kbuf, if (!IS_ERR(bio)) { if (rw) bio->bi_rw |= (1 << BIO_RW); - bio->bi_end_io = blk_rq_map_kern_endio; rq->bio = rq->biotail = bio; blk_rq_bio_prep(q, rq, bio); diff --git a/fs/bio.c b/fs/bio.c index 707b9af2dd0..c0d9140e470 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -701,6 +701,16 @@ void bio_unmap_user(struct bio *bio) bio_put(bio); } +static int bio_map_kern_endio(struct bio *bio, unsigned int bytes_done, int err) +{ + if (bio->bi_size) + return 1; + + bio_put(bio); + return 0; +} + + static struct bio *__bio_map_kern(request_queue_t *q, void *data, unsigned int len, unsigned int gfp_mask) { @@ -734,6 +744,7 @@ static struct bio *__bio_map_kern(request_queue_t *q, void *data, offset = 0; } + bio->bi_end_io = bio_map_kern_endio; return bio; } -- cgit v1.2.3-70-g09d2 From f1970baf6d74e03bd32072ab453f2fc01bc1b8d3 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Mon, 20 Jun 2005 14:06:52 +0200 Subject: [PATCH] Add scatter-gather support for the block layer SG_IO Signed-off-by: Jens Axboe --- drivers/block/ll_rw_blk.c | 64 +++++++++++++++++-- drivers/block/scsi_ioctl.c | 34 ++++++---- fs/bio.c | 150 +++++++++++++++++++++++++++++++-------------- include/linux/bio.h | 4 ++ include/linux/blkdev.h | 1 + 5 files changed, 191 insertions(+), 62 deletions(-) (limited to 'fs') diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c index 42c4f3651cf..874e46fc374 100644 --- a/drivers/block/ll_rw_blk.c +++ b/drivers/block/ll_rw_blk.c @@ -2148,6 +2148,50 @@ int blk_rq_map_user(request_queue_t *q, struct request *rq, void __user *ubuf, EXPORT_SYMBOL(blk_rq_map_user); +/** + * blk_rq_map_user_iov - map user data to a request, for REQ_BLOCK_PC usage + * @q: request queue where request should be inserted + * @rq: request to map data to + * @iov: pointer to the iovec + * @iov_count: number of elements in the iovec + * + * Description: + * Data will be mapped directly for zero copy io, if possible. Otherwise + * a kernel bounce buffer is used. + * + * A matching blk_rq_unmap_user() must be issued at the end of io, while + * still in process context. + * + * Note: The mapped bio may need to be bounced through blk_queue_bounce() + * before being submitted to the device, as pages mapped may be out of + * reach. It's the callers responsibility to make sure this happens. The + * original bio must be passed back in to blk_rq_unmap_user() for proper + * unmapping. + */ +int blk_rq_map_user_iov(request_queue_t *q, struct request *rq, + struct sg_iovec *iov, int iov_count) +{ + struct bio *bio; + + if (!iov || iov_count <= 0) + return -EINVAL; + + /* we don't allow misaligned data like bio_map_user() does. If the + * user is using sg, they're expected to know the alignment constraints + * and respect them accordingly */ + bio = bio_map_user_iov(q, NULL, iov, iov_count, rq_data_dir(rq)== READ); + if (IS_ERR(bio)) + return PTR_ERR(bio); + + rq->bio = rq->biotail = bio; + blk_rq_bio_prep(q, rq, bio); + rq->buffer = rq->data = NULL; + rq->data_len = bio->bi_size; + return 0; +} + +EXPORT_SYMBOL(blk_rq_map_user_iov); + /** * blk_rq_unmap_user - unmap a request with user data * @rq: request to be unmapped @@ -2207,6 +2251,19 @@ int blk_rq_map_kern(request_queue_t *q, struct request *rq, void *kbuf, EXPORT_SYMBOL(blk_rq_map_kern); +void blk_execute_rq_nowait(request_queue_t *q, struct gendisk *bd_disk, + struct request *rq, int at_head, + void (*done)(struct request *)) +{ + int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; + + rq->rq_disk = bd_disk; + rq->flags |= REQ_NOMERGE; + rq->end_io = done; + elv_add_request(q, rq, where, 1); + generic_unplug_device(q); +} + /** * blk_execute_rq - insert a request into queue for execution * @q: queue to insert the request in @@ -2224,8 +2281,6 @@ int blk_execute_rq(request_queue_t *q, struct gendisk *bd_disk, char sense[SCSI_SENSE_BUFFERSIZE]; int err = 0; - rq->rq_disk = bd_disk; - /* * we need an extra reference to the request, so we can look at * it after io completion @@ -2238,11 +2293,8 @@ int blk_execute_rq(request_queue_t *q, struct gendisk *bd_disk, rq->sense_len = 0; } - rq->flags |= REQ_NOMERGE; rq->waiting = &wait; - rq->end_io = blk_end_sync_rq; - elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 1); - generic_unplug_device(q); + blk_execute_rq_nowait(q, bd_disk, rq, 0, blk_end_sync_rq); wait_for_completion(&wait); rq->waiting = NULL; diff --git a/drivers/block/scsi_ioctl.c b/drivers/block/scsi_ioctl.c index 93c4ca874be..09a7e73a081 100644 --- a/drivers/block/scsi_ioctl.c +++ b/drivers/block/scsi_ioctl.c @@ -231,17 +231,11 @@ static int sg_io(struct file *file, request_queue_t *q, if (verify_command(file, cmd)) return -EPERM; - /* - * we'll do that later - */ - if (hdr->iovec_count) - return -EOPNOTSUPP; - if (hdr->dxfer_len > (q->max_sectors << 9)) return -EIO; reading = writing = 0; - if (hdr->dxfer_len) { + if (hdr->dxfer_len) switch (hdr->dxfer_direction) { default: return -EINVAL; @@ -261,11 +255,29 @@ static int sg_io(struct file *file, request_queue_t *q, if (!rq) return -ENOMEM; - if (reading || writing) { - ret = blk_rq_map_user(q, rq, hdr->dxferp, hdr->dxfer_len); - if (ret) + if (hdr->iovec_count) { + const int size = sizeof(struct sg_iovec) * hdr->iovec_count; + struct sg_iovec *iov; + + iov = kmalloc(size, GFP_KERNEL); + if (!iov) { + ret = -ENOMEM; goto out; - } + } + + if (copy_from_user(iov, hdr->dxferp, size)) { + kfree(iov); + ret = -EFAULT; + goto out; + } + + ret = blk_rq_map_user_iov(q, rq, iov, hdr->iovec_count); + kfree(iov); + } else if (hdr->dxfer_len) + ret = blk_rq_map_user(q, rq, hdr->dxferp, hdr->dxfer_len); + + if (ret) + goto out; /* * fill in request structure diff --git a/fs/bio.c b/fs/bio.c index c0d9140e470..24e4045788e 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -25,6 +25,7 @@ #include #include #include +#include /* for struct sg_iovec */ #define BIO_POOL_SIZE 256 @@ -549,22 +550,34 @@ out_bmd: return ERR_PTR(ret); } -static struct bio *__bio_map_user(request_queue_t *q, struct block_device *bdev, - unsigned long uaddr, unsigned int len, - int write_to_vm) +static struct bio *__bio_map_user_iov(request_queue_t *q, + struct block_device *bdev, + struct sg_iovec *iov, int iov_count, + int write_to_vm) { - unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; - unsigned long start = uaddr >> PAGE_SHIFT; - const int nr_pages = end - start; - int ret, offset, i; + int i, j; + int nr_pages = 0; struct page **pages; struct bio *bio; + int cur_page = 0; + int ret, offset; - /* - * transfer and buffer must be aligned to at least hardsector - * size for now, in the future we can relax this restriction - */ - if ((uaddr & queue_dma_alignment(q)) || (len & queue_dma_alignment(q))) + for (i = 0; i < iov_count; i++) { + unsigned long uaddr = (unsigned long)iov[i].iov_base; + unsigned long len = iov[i].iov_len; + unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; + unsigned long start = uaddr >> PAGE_SHIFT; + + nr_pages += end - start; + /* + * transfer and buffer must be aligned to at least hardsector + * size for now, in the future we can relax this restriction + */ + if ((uaddr & queue_dma_alignment(q)) || (len & queue_dma_alignment(q))) + return ERR_PTR(-EINVAL); + } + + if (!nr_pages) return ERR_PTR(-EINVAL); bio = bio_alloc(GFP_KERNEL, nr_pages); @@ -576,42 +589,54 @@ static struct bio *__bio_map_user(request_queue_t *q, struct block_device *bdev, if (!pages) goto out; - down_read(¤t->mm->mmap_sem); - ret = get_user_pages(current, current->mm, uaddr, nr_pages, - write_to_vm, 0, pages, NULL); - up_read(¤t->mm->mmap_sem); - - if (ret < nr_pages) - goto out; - - bio->bi_bdev = bdev; - - offset = uaddr & ~PAGE_MASK; - for (i = 0; i < nr_pages; i++) { - unsigned int bytes = PAGE_SIZE - offset; - - if (len <= 0) - break; - - if (bytes > len) - bytes = len; + memset(pages, 0, nr_pages * sizeof(struct page *)); + + for (i = 0; i < iov_count; i++) { + unsigned long uaddr = (unsigned long)iov[i].iov_base; + unsigned long len = iov[i].iov_len; + unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; + unsigned long start = uaddr >> PAGE_SHIFT; + const int local_nr_pages = end - start; + const int page_limit = cur_page + local_nr_pages; + + down_read(¤t->mm->mmap_sem); + ret = get_user_pages(current, current->mm, uaddr, + local_nr_pages, + write_to_vm, 0, &pages[cur_page], NULL); + up_read(¤t->mm->mmap_sem); + + if (ret < local_nr_pages) + goto out_unmap; + + + offset = uaddr & ~PAGE_MASK; + for (j = cur_page; j < page_limit; j++) { + unsigned int bytes = PAGE_SIZE - offset; + + if (len <= 0) + break; + + if (bytes > len) + bytes = len; + + /* + * sorry... + */ + if (__bio_add_page(q, bio, pages[j], bytes, offset) < bytes) + break; + + len -= bytes; + offset = 0; + } + cur_page = j; /* - * sorry... + * release the pages we didn't map into the bio, if any */ - if (__bio_add_page(q, bio, pages[i], bytes, offset) < bytes) - break; - - len -= bytes; - offset = 0; + while (j < page_limit) + page_cache_release(pages[j++]); } - /* - * release the pages we didn't map into the bio, if any - */ - while (i < nr_pages) - page_cache_release(pages[i++]); - kfree(pages); /* @@ -620,9 +645,17 @@ static struct bio *__bio_map_user(request_queue_t *q, struct block_device *bdev, if (!write_to_vm) bio->bi_rw |= (1 << BIO_RW); + bio->bi_bdev = bdev; bio->bi_flags |= (1 << BIO_USER_MAPPED); return bio; -out: + + out_unmap: + for (i = 0; i < nr_pages; i++) { + if(!pages[i]) + break; + page_cache_release(pages[i]); + } + out: kfree(pages); bio_put(bio); return ERR_PTR(ret); @@ -641,10 +674,34 @@ out: */ struct bio *bio_map_user(request_queue_t *q, struct block_device *bdev, unsigned long uaddr, unsigned int len, int write_to_vm) +{ + struct sg_iovec iov; + + iov.iov_base = (__user void *)uaddr; + iov.iov_len = len; + + return bio_map_user_iov(q, bdev, &iov, 1, write_to_vm); +} + +/** + * bio_map_user_iov - map user sg_iovec table into bio + * @q: the request_queue_t for the bio + * @bdev: destination block device + * @iov: the iovec. + * @iov_count: number of elements in the iovec + * @write_to_vm: bool indicating writing to pages or not + * + * Map the user space address into a bio suitable for io to a block + * device. Returns an error pointer in case of error. + */ +struct bio *bio_map_user_iov(request_queue_t *q, struct block_device *bdev, + struct sg_iovec *iov, int iov_count, + int write_to_vm) { struct bio *bio; + int len = 0, i; - bio = __bio_map_user(q, bdev, uaddr, len, write_to_vm); + bio = __bio_map_user_iov(q, bdev, iov, iov_count, write_to_vm); if (IS_ERR(bio)) return bio; @@ -657,6 +714,9 @@ struct bio *bio_map_user(request_queue_t *q, struct block_device *bdev, */ bio_get(bio); + for (i = 0; i < iov_count; i++) + len += iov[i].iov_len; + if (bio->bi_size == len) return bio; diff --git a/include/linux/bio.h b/include/linux/bio.h index 1dd2bc2e84a..ebcd03ba2e2 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -281,6 +281,10 @@ extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int); extern int bio_get_nr_vecs(struct block_device *); extern struct bio *bio_map_user(struct request_queue *, struct block_device *, unsigned long, unsigned int, int); +struct sg_iovec; +extern struct bio *bio_map_user_iov(struct request_queue *, + struct block_device *, + struct sg_iovec *, int, int); extern void bio_unmap_user(struct bio *); extern struct bio *bio_map_kern(struct request_queue *, void *, unsigned int, unsigned int); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index fc0dce07861..0430ea3e5f2 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -561,6 +561,7 @@ extern void blk_queue_activity_fn(request_queue_t *, activity_fn *, void *); extern int blk_rq_map_user(request_queue_t *, struct request *, void __user *, unsigned int); extern int blk_rq_unmap_user(struct bio *, unsigned int); extern int blk_rq_map_kern(request_queue_t *, struct request *, void *, unsigned int, unsigned int); +extern int blk_rq_map_user_iov(request_queue_t *, struct request *, struct sg_iovec *, int); extern int blk_execute_rq(request_queue_t *, struct gendisk *, struct request *); static inline request_queue_t *bdev_get_queue(struct block_device *bdev) -- cgit v1.2.3-70-g09d2 From eb6f1160ddb2fdadf50f350da79d0796c37f17e2 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 1 Sep 2005 17:43:25 -0700 Subject: [CRYPTO]: Use CRYPTO_TFM_REQ_MAY_SLEEP where appropriate This patch goes through the current users of the crypto layer and sets CRYPTO_TFM_REQ_MAY_SLEEP at crypto_alloc_tfm() where all crypto operations are performed in process context. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- drivers/block/cryptoloop.c | 6 ++++-- drivers/md/dm-crypt.c | 7 ++++--- drivers/net/wireless/airo.c | 2 +- fs/nfsd/nfs4recover.c | 2 +- net/sunrpc/auth_gss/gss_krb5_crypto.c | 2 +- security/seclvl.c | 2 +- 6 files changed, 12 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/drivers/block/cryptoloop.c b/drivers/block/cryptoloop.c index 5be6f998d8c..3d4261c39f1 100644 --- a/drivers/block/cryptoloop.c +++ b/drivers/block/cryptoloop.c @@ -57,9 +57,11 @@ cryptoloop_init(struct loop_device *lo, const struct loop_info64 *info) mode = strsep(&cmsp, "-"); if (mode == NULL || strcmp(mode, "cbc") == 0) - tfm = crypto_alloc_tfm(cipher, CRYPTO_TFM_MODE_CBC); + tfm = crypto_alloc_tfm(cipher, CRYPTO_TFM_MODE_CBC | + CRYPTO_TFM_REQ_MAY_SLEEP); else if (strcmp(mode, "ecb") == 0) - tfm = crypto_alloc_tfm(cipher, CRYPTO_TFM_MODE_ECB); + tfm = crypto_alloc_tfm(cipher, CRYPTO_TFM_MODE_ECB | + CRYPTO_TFM_REQ_MAY_SLEEP); if (tfm == NULL) return -EINVAL; diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index d0a4bab220e..b82bc315047 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -144,7 +144,7 @@ static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti, } /* Hash the cipher key with the given hash algorithm */ - hash_tfm = crypto_alloc_tfm(opts, 0); + hash_tfm = crypto_alloc_tfm(opts, CRYPTO_TFM_REQ_MAY_SLEEP); if (hash_tfm == NULL) { ti->error = PFX "Error initializing ESSIV hash"; return -EINVAL; @@ -172,7 +172,8 @@ static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti, /* Setup the essiv_tfm with the given salt */ essiv_tfm = crypto_alloc_tfm(crypto_tfm_alg_name(cc->tfm), - CRYPTO_TFM_MODE_ECB); + CRYPTO_TFM_MODE_ECB | + CRYPTO_TFM_REQ_MAY_SLEEP); if (essiv_tfm == NULL) { ti->error = PFX "Error allocating crypto tfm for ESSIV"; kfree(salt); @@ -587,7 +588,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad1; } - tfm = crypto_alloc_tfm(cipher, crypto_flags); + tfm = crypto_alloc_tfm(cipher, crypto_flags | CRYPTO_TFM_REQ_MAY_SLEEP); if (!tfm) { ti->error = PFX "Error allocating crypto tfm"; goto bad1; diff --git a/drivers/net/wireless/airo.c b/drivers/net/wireless/airo.c index df20adcd073..7fdb85dda4e 100644 --- a/drivers/net/wireless/airo.c +++ b/drivers/net/wireless/airo.c @@ -1301,7 +1301,7 @@ static int micsetup(struct airo_info *ai) { int i; if (ai->tfm == NULL) - ai->tfm = crypto_alloc_tfm("aes", 0); + ai->tfm = crypto_alloc_tfm("aes", CRYPTO_TFM_REQ_MAY_SLEEP); if (ai->tfm == NULL) { printk(KERN_ERR "airo: failed to load transform for AES\n"); diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 57ed50fe7f8..02132f33320 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -93,7 +93,7 @@ nfs4_make_rec_clidname(char *dname, struct xdr_netobj *clname) dprintk("NFSD: nfs4_make_rec_clidname for %.*s\n", clname->len, clname->data); - tfm = crypto_alloc_tfm("md5", 0); + tfm = crypto_alloc_tfm("md5", CRYPTO_TFM_REQ_MAY_SLEEP); if (tfm == NULL) goto out; cksum.len = crypto_tfm_alg_digestsize(tfm); diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c index 5a7265aeaf8..7ad74449b18 100644 --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c @@ -160,7 +160,7 @@ make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body, " unsupported checksum %d", cksumtype); goto out; } - if (!(tfm = crypto_alloc_tfm(cksumname, 0))) + if (!(tfm = crypto_alloc_tfm(cksumname, CRYPTO_TFM_REQ_MAY_SLEEP))) goto out; cksum->len = crypto_tfm_alg_digestsize(tfm); if ((cksum->data = kmalloc(cksum->len, GFP_KERNEL)) == NULL) diff --git a/security/seclvl.c b/security/seclvl.c index c8e87b22c9b..96b1f2122f6 100644 --- a/security/seclvl.c +++ b/security/seclvl.c @@ -321,7 +321,7 @@ plaintext_to_sha1(unsigned char *hash, const char *plaintext, int len) "bytes.\n", len, PAGE_SIZE); return -ENOMEM; } - tfm = crypto_alloc_tfm("sha1", 0); + tfm = crypto_alloc_tfm("sha1", CRYPTO_TFM_REQ_MAY_SLEEP); if (tfm == NULL) { seclvl_printk(0, KERN_ERR, "Failed to load transform for SHA1\n"); -- cgit v1.2.3-70-g09d2 From 573dbd95964b01a942aa0c68e92b06f2c9536964 Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Thu, 1 Sep 2005 17:44:29 -0700 Subject: [CRYPTO]: crypto_free_tfm() callers no longer need to check for NULL Since the patch to add a NULL short-circuit to crypto_free_tfm() went in, there's no longer any need for callers of that function to check for NULL. This patch removes the redundant NULL checks and also a few similar checks for NULL before calls to kfree() that I ran into while doing the crypto_free_tfm bits. I've succesfuly compile tested this patch, and a kernel with the patch applied boots and runs just fine. When I posted the patch to LKML (and other lists/people on Cc) it drew the following comments : J. Bruce Fields commented "I've no problem with the auth_gss or nfsv4 bits.--b." Sridhar Samudrala said "sctp change looks fine." Herbert Xu signed off on the patch. So, I guess this is ready to be dropped into -mm and eventually mainline. Signed-off-by: Jesper Juhl Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- drivers/net/wireless/airo.c | 3 +-- fs/nfsd/nfs4recover.c | 3 +-- net/ipv4/ah4.c | 18 ++++++------------ net/ipv4/esp4.c | 24 ++++++++---------------- net/ipv4/ipcomp.c | 3 +-- net/ipv6/addrconf.c | 6 ++---- net/ipv6/ah6.c | 18 ++++++------------ net/ipv6/esp6.c | 24 ++++++++---------------- net/ipv6/ipcomp6.c | 3 +-- net/sctp/endpointola.c | 3 +-- net/sctp/socket.c | 3 +-- net/sunrpc/auth_gss/gss_krb5_crypto.c | 3 +-- net/sunrpc/auth_gss/gss_krb5_mech.c | 9 +++------ net/sunrpc/auth_gss/gss_spkm3_mech.c | 12 ++++-------- 14 files changed, 44 insertions(+), 88 deletions(-) (limited to 'fs') diff --git a/drivers/net/wireless/airo.c b/drivers/net/wireless/airo.c index 7fdb85dda4e..2bb8170cf6f 100644 --- a/drivers/net/wireless/airo.c +++ b/drivers/net/wireless/airo.c @@ -2403,8 +2403,7 @@ void stop_airo_card( struct net_device *dev, int freeres ) } } #ifdef MICSUPPORT - if (ai->tfm) - crypto_free_tfm(ai->tfm); + crypto_free_tfm(ai->tfm); #endif del_airo_dev( dev ); free_netdev( dev ); diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 02132f33320..954cf893d50 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -114,8 +114,7 @@ nfs4_make_rec_clidname(char *dname, struct xdr_netobj *clname) kfree(cksum.data); status = nfs_ok; out: - if (tfm) - crypto_free_tfm(tfm); + crypto_free_tfm(tfm); return status; } diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 514c85b2631..035ad2c9e1b 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -263,10 +263,8 @@ static int ah_init_state(struct xfrm_state *x) error: if (ahp) { - if (ahp->work_icv) - kfree(ahp->work_icv); - if (ahp->tfm) - crypto_free_tfm(ahp->tfm); + kfree(ahp->work_icv); + crypto_free_tfm(ahp->tfm); kfree(ahp); } return -EINVAL; @@ -279,14 +277,10 @@ static void ah_destroy(struct xfrm_state *x) if (!ahp) return; - if (ahp->work_icv) { - kfree(ahp->work_icv); - ahp->work_icv = NULL; - } - if (ahp->tfm) { - crypto_free_tfm(ahp->tfm); - ahp->tfm = NULL; - } + kfree(ahp->work_icv); + ahp->work_icv = NULL; + crypto_free_tfm(ahp->tfm); + ahp->tfm = NULL; kfree(ahp); } diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index b31ffc5053d..1b5a09d1b90 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -343,22 +343,14 @@ static void esp_destroy(struct xfrm_state *x) if (!esp) return; - if (esp->conf.tfm) { - crypto_free_tfm(esp->conf.tfm); - esp->conf.tfm = NULL; - } - if (esp->conf.ivec) { - kfree(esp->conf.ivec); - esp->conf.ivec = NULL; - } - if (esp->auth.tfm) { - crypto_free_tfm(esp->auth.tfm); - esp->auth.tfm = NULL; - } - if (esp->auth.work_icv) { - kfree(esp->auth.work_icv); - esp->auth.work_icv = NULL; - } + crypto_free_tfm(esp->conf.tfm); + esp->conf.tfm = NULL; + kfree(esp->conf.ivec); + esp->conf.ivec = NULL; + crypto_free_tfm(esp->auth.tfm); + esp->auth.tfm = NULL; + kfree(esp->auth.work_icv); + esp->auth.work_icv = NULL; kfree(esp); } diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index dcb7ee6c485..fc718df17b4 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -345,8 +345,7 @@ static void ipcomp_free_tfms(struct crypto_tfm **tfms) for_each_cpu(cpu) { struct crypto_tfm *tfm = *per_cpu_ptr(tfms, cpu); - if (tfm) - crypto_free_tfm(tfm); + crypto_free_tfm(tfm); } free_percpu(tfms); } diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 937ad32db77..6d6fb74f3b5 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3593,10 +3593,8 @@ void __exit addrconf_cleanup(void) rtnl_unlock(); #ifdef CONFIG_IPV6_PRIVACY - if (likely(md5_tfm != NULL)) { - crypto_free_tfm(md5_tfm); - md5_tfm = NULL; - } + crypto_free_tfm(md5_tfm); + md5_tfm = NULL; #endif #ifdef CONFIG_PROC_FS diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 0ebfad907a0..f3629730eb1 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -401,10 +401,8 @@ static int ah6_init_state(struct xfrm_state *x) error: if (ahp) { - if (ahp->work_icv) - kfree(ahp->work_icv); - if (ahp->tfm) - crypto_free_tfm(ahp->tfm); + kfree(ahp->work_icv); + crypto_free_tfm(ahp->tfm); kfree(ahp); } return -EINVAL; @@ -417,14 +415,10 @@ static void ah6_destroy(struct xfrm_state *x) if (!ahp) return; - if (ahp->work_icv) { - kfree(ahp->work_icv); - ahp->work_icv = NULL; - } - if (ahp->tfm) { - crypto_free_tfm(ahp->tfm); - ahp->tfm = NULL; - } + kfree(ahp->work_icv); + ahp->work_icv = NULL; + crypto_free_tfm(ahp->tfm); + ahp->tfm = NULL; kfree(ahp); } diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index e8bff9d3d96..9b27460f0cc 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -276,22 +276,14 @@ static void esp6_destroy(struct xfrm_state *x) if (!esp) return; - if (esp->conf.tfm) { - crypto_free_tfm(esp->conf.tfm); - esp->conf.tfm = NULL; - } - if (esp->conf.ivec) { - kfree(esp->conf.ivec); - esp->conf.ivec = NULL; - } - if (esp->auth.tfm) { - crypto_free_tfm(esp->auth.tfm); - esp->auth.tfm = NULL; - } - if (esp->auth.work_icv) { - kfree(esp->auth.work_icv); - esp->auth.work_icv = NULL; - } + crypto_free_tfm(esp->conf.tfm); + esp->conf.tfm = NULL; + kfree(esp->conf.ivec); + esp->conf.ivec = NULL; + crypto_free_tfm(esp->auth.tfm); + esp->auth.tfm = NULL; + kfree(esp->auth.work_icv); + esp->auth.work_icv = NULL; kfree(esp); } diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 135383ef538..85bfbc69b2c 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -341,8 +341,7 @@ static void ipcomp6_free_tfms(struct crypto_tfm **tfms) for_each_cpu(cpu) { struct crypto_tfm *tfm = *per_cpu_ptr(tfms, cpu); - if (tfm) - crypto_free_tfm(tfm); + crypto_free_tfm(tfm); } free_percpu(tfms); } diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index e47ac0d1a6d..e22ccd65596 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -193,8 +193,7 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep) sctp_unhash_endpoint(ep); /* Free up the HMAC transform. */ - if (sctp_sk(ep->base.sk)->hmac) - sctp_crypto_free_tfm(sctp_sk(ep->base.sk)->hmac); + sctp_crypto_free_tfm(sctp_sk(ep->base.sk)->hmac); /* Cleanup. */ sctp_inq_free(&ep->base.inqueue); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 4454afe4727..91ec8c93691 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4194,8 +4194,7 @@ out: sctp_release_sock(sk); return err; cleanup: - if (tfm) - sctp_crypto_free_tfm(tfm); + sctp_crypto_free_tfm(tfm); goto out; } diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c index 7ad74449b18..ee6ae74cd1b 100644 --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c @@ -199,8 +199,7 @@ make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body, crypto_digest_final(tfm, cksum->data); code = 0; out: - if (tfm) - crypto_free_tfm(tfm); + crypto_free_tfm(tfm); return code; } diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index cf726510df8..606a8a82caf 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -185,12 +185,9 @@ static void gss_delete_sec_context_kerberos(void *internal_ctx) { struct krb5_ctx *kctx = internal_ctx; - if (kctx->seq) - crypto_free_tfm(kctx->seq); - if (kctx->enc) - crypto_free_tfm(kctx->enc); - if (kctx->mech_used.data) - kfree(kctx->mech_used.data); + crypto_free_tfm(kctx->seq); + crypto_free_tfm(kctx->enc); + kfree(kctx->mech_used.data); kfree(kctx); } diff --git a/net/sunrpc/auth_gss/gss_spkm3_mech.c b/net/sunrpc/auth_gss/gss_spkm3_mech.c index dad05994c3e..6c97d61baa9 100644 --- a/net/sunrpc/auth_gss/gss_spkm3_mech.c +++ b/net/sunrpc/auth_gss/gss_spkm3_mech.c @@ -214,14 +214,10 @@ static void gss_delete_sec_context_spkm3(void *internal_ctx) { struct spkm3_ctx *sctx = internal_ctx; - if(sctx->derived_integ_key) - crypto_free_tfm(sctx->derived_integ_key); - if(sctx->derived_conf_key) - crypto_free_tfm(sctx->derived_conf_key); - if(sctx->share_key.data) - kfree(sctx->share_key.data); - if(sctx->mech_used.data) - kfree(sctx->mech_used.data); + crypto_free_tfm(sctx->derived_integ_key); + crypto_free_tfm(sctx->derived_conf_key); + kfree(sctx->share_key.data); + kfree(sctx->mech_used.data); kfree(sctx); } -- cgit v1.2.3-70-g09d2 From 616b1c7238f0de5cec12045267a924035f8ed317 Mon Sep 17 00:00:00 2001 From: Dean Roehrich Date: Fri, 2 Sep 2005 15:30:57 +1000 Subject: [XFS] Update copyrights SGI-PV: 933551 SGI-Modid: xfs-linux:xfs-kern:190625a Signed-off-by: Dean Roehrich Signed-off-by: Nathan Scott --- fs/xfs/xfs_dmapi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/xfs/xfs_dmapi.h b/fs/xfs/xfs_dmapi.h index 55c17adaaa3..19e872856f6 100644 --- a/fs/xfs/xfs_dmapi.h +++ b/fs/xfs/xfs_dmapi.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License as -- cgit v1.2.3-70-g09d2 From 536388be42c938fb6d0eece681526ce13bb50aab Mon Sep 17 00:00:00 2001 From: Dean Roehrich Date: Fri, 2 Sep 2005 15:35:43 +1000 Subject: [XFS] upate copyrights SGI-PV: 933765 SGI-Modid: xfs-linux:xfs-kern:190760a Signed-off-by: Dean Roehrich Signed-off-by: Nathan Scott --- fs/xfs/linux-2.6/xfs_vnode.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h index a6e57c647be..56d85d85fb0 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.h +++ b/fs/xfs/linux-2.6/xfs_vnode.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License as -- cgit v1.2.3-70-g09d2 From bb3f724e12eb9c62c92ff6f14a856bc58ba35f5e Mon Sep 17 00:00:00 2001 From: Dean Roehrich Date: Fri, 2 Sep 2005 15:43:05 +1000 Subject: [XFS] send dmapi events from nopage for mmapped files SGI-PV: 935317 SGI-Modid: xfs-linux:xfs-kern:192007a Signed-off-by: Dean Roehrich Signed-off-by: Nathan Scott --- fs/xfs/linux-2.6/xfs_file.c | 90 ++++++++++++++++----------------------------- 1 file changed, 32 insertions(+), 58 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index f1ce4323f56..3881622bcf0 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -311,6 +311,31 @@ linvfs_fsync( #define nextdp(dp) ((struct xfs_dirent *)((char *)(dp) + (dp)->d_reclen)) +#ifdef CONFIG_XFS_DMAPI + +STATIC struct page * +linvfs_filemap_nopage( + struct vm_area_struct *area, + unsigned long address, + int *type) +{ + struct inode *inode = area->vm_file->f_dentry->d_inode; + vnode_t *vp = LINVFS_GET_VP(inode); + xfs_mount_t *mp = XFS_VFSTOM(vp->v_vfsp); + int error; + + ASSERT_ALWAYS(vp->v_vfsp->vfs_flag & VFS_DMI); + + error = XFS_SEND_MMAP(mp, area, 0); + if (error) + return NULL; + + return filemap_nopage(area, address, type); +} + +#endif /* CONFIG_XFS_DMAPI */ + + STATIC int linvfs_readdir( struct file *filp, @@ -390,14 +415,6 @@ done: return -error; } -#ifdef CONFIG_XFS_DMAPI -STATIC void -linvfs_mmap_close( - struct vm_area_struct *vma) -{ - xfs_dm_mm_put(vma); -} -#endif /* CONFIG_XFS_DMAPI */ STATIC int linvfs_file_mmap( @@ -411,16 +428,11 @@ linvfs_file_mmap( vma->vm_ops = &linvfs_file_vm_ops; - if (vp->v_vfsp->vfs_flag & VFS_DMI) { - xfs_mount_t *mp = XFS_VFSTOM(vp->v_vfsp); - - error = -XFS_SEND_MMAP(mp, vma, 0); - if (error) - return error; #ifdef CONFIG_XFS_DMAPI + if (vp->v_vfsp->vfs_flag & VFS_DMI) { vma->vm_ops = &linvfs_dmapi_file_vm_ops; -#endif } +#endif /* CONFIG_XFS_DMAPI */ VOP_SETATTR(vp, &va, XFS_AT_UPDATIME, NULL, error); if (!error) @@ -474,6 +486,7 @@ linvfs_ioctl_invis( return error; } +#ifdef CONFIG_XFS_DMAPI #ifdef HAVE_VMOP_MPROTECT STATIC int linvfs_mprotect( @@ -494,6 +507,7 @@ linvfs_mprotect( return error; } #endif /* HAVE_VMOP_MPROTECT */ +#endif /* CONFIG_XFS_DMAPI */ #ifdef HAVE_FOP_OPEN_EXEC /* If the user is attempting to execute a file that is offline then @@ -528,49 +542,10 @@ open_exec_out: } #endif /* HAVE_FOP_OPEN_EXEC */ -/* - * Temporary workaround to the AIO direct IO write problem. - * This code can go and we can revert to do_sync_write once - * the writepage(s) rework is merged. - */ -STATIC ssize_t -linvfs_write( - struct file *filp, - const char __user *buf, - size_t len, - loff_t *ppos) -{ - struct kiocb kiocb; - ssize_t ret; - - init_sync_kiocb(&kiocb, filp); - kiocb.ki_pos = *ppos; - ret = __linvfs_write(&kiocb, buf, 0, len, kiocb.ki_pos); - *ppos = kiocb.ki_pos; - return ret; -} -STATIC ssize_t -linvfs_write_invis( - struct file *filp, - const char __user *buf, - size_t len, - loff_t *ppos) -{ - struct kiocb kiocb; - ssize_t ret; - - init_sync_kiocb(&kiocb, filp); - kiocb.ki_pos = *ppos; - ret = __linvfs_write(&kiocb, buf, IO_INVIS, len, kiocb.ki_pos); - *ppos = kiocb.ki_pos; - return ret; -} - - struct file_operations linvfs_file_operations = { .llseek = generic_file_llseek, .read = do_sync_read, - .write = linvfs_write, + .write = do_sync_write, .readv = linvfs_readv, .writev = linvfs_writev, .aio_read = linvfs_aio_read, @@ -592,7 +567,7 @@ struct file_operations linvfs_file_operations = { struct file_operations linvfs_invis_file_operations = { .llseek = generic_file_llseek, .read = do_sync_read, - .write = linvfs_write_invis, + .write = do_sync_write, .readv = linvfs_readv_invis, .writev = linvfs_writev_invis, .aio_read = linvfs_aio_read_invis, @@ -626,8 +601,7 @@ static struct vm_operations_struct linvfs_file_vm_ops = { #ifdef CONFIG_XFS_DMAPI static struct vm_operations_struct linvfs_dmapi_file_vm_ops = { - .close = linvfs_mmap_close, - .nopage = filemap_nopage, + .nopage = linvfs_filemap_nopage, .populate = filemap_populate, #ifdef HAVE_VMOP_MPROTECT .mprotect = linvfs_mprotect, -- cgit v1.2.3-70-g09d2 From cdb626878f6f5e37d678d30c9cacf5726b88a656 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 2 Sep 2005 16:24:19 +1000 Subject: [XFS] replace vn_get usage by ihold SGI-PV: 938306 SGI-Modid: xfs-linux:xfs-kern:194627a Signed-off-by: Christoph Hellwig Signed-off-by: Nathan Scott --- fs/xfs/linux-2.6/xfs_vnode.c | 24 ------------------------ fs/xfs/linux-2.6/xfs_vnode.h | 21 ++++++--------------- fs/xfs/quota/xfs_qm_syscalls.c | 16 +++++----------- fs/xfs/xfs_vfsops.c | 36 ++++-------------------------------- 4 files changed, 15 insertions(+), 82 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c index 250cad54e89..353276bda34 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.c +++ b/fs/xfs/linux-2.6/xfs_vnode.c @@ -162,30 +162,6 @@ vn_initialize( return vp; } -/* - * Get a reference on a vnode. - */ -vnode_t * -vn_get( - struct vnode *vp, - vmap_t *vmap) -{ - struct inode *inode; - - XFS_STATS_INC(vn_get); - inode = LINVFS_GET_IP(vp); - if (inode->i_state & I_FREEING) - return NULL; - - inode = ilookup(vmap->v_vfsp->vfs_super, vmap->v_ino); - if (!inode) /* Inode not present */ - return NULL; - - vn_trace_exit(vp, "vn_get", (inst_t *)__return_address); - - return vp; -} - /* * Revalidate the Linux inode from the vattr. * Note: i_size _not_ updated; we must hold the inode diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h index 56d85d85fb0..6cb0a01df25 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.h +++ b/fs/xfs/linux-2.6/xfs_vnode.h @@ -504,20 +504,6 @@ extern void vn_init(void); extern int vn_wait(struct vnode *); extern vnode_t *vn_initialize(struct inode *); -/* - * Acquiring and invalidating vnodes: - * - * if (vn_get(vp, version, 0)) - * ...; - * vn_purge(vp, version); - * - * vn_get and vn_purge must be called with vmap_t arguments, sampled - * while a lock that the vnode's VOP_RECLAIM function acquires is - * held, to ensure that the vnode sampled with the lock held isn't - * recycled (VOP_RECLAIMed) or deallocated between the release of the lock - * and the subsequent vn_get or vn_purge. - */ - /* * vnode_map structures _must_ match vn_epoch and vnode structure sizes. */ @@ -532,7 +518,6 @@ typedef struct vnode_map { (vmap).v_ino = (vp)->v_inode.i_ino; } extern void vn_purge(struct vnode *, vmap_t *); -extern vnode_t *vn_get(struct vnode *, vmap_t *); extern int vn_revalidate(struct vnode *); extern void vn_revalidate_core(struct vnode *, vattr_t *); extern void vn_remove(struct vnode *); @@ -560,6 +545,12 @@ extern void vn_rele(struct vnode *); #define VN_RELE(vp) (iput(LINVFS_GET_IP(vp))) #endif +static inline struct vnode *vn_grab(struct vnode *vp) +{ + struct inode *inode = igrab(LINVFS_GET_IP(vp)); + return inode ? LINVFS_GET_VP(inode) : NULL; +} + /* * Vname handling macros. */ diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c index 68e98962dbe..15e02e8a9d4 100644 --- a/fs/xfs/quota/xfs_qm_syscalls.c +++ b/fs/xfs/quota/xfs_qm_syscalls.c @@ -1053,7 +1053,6 @@ xfs_qm_dqrele_all_inodes( struct xfs_mount *mp, uint flags) { - vmap_t vmap; xfs_inode_t *ip, *topino; uint ireclaims; vnode_t *vp; @@ -1061,8 +1060,8 @@ xfs_qm_dqrele_all_inodes( ASSERT(mp->m_quotainfo); -again: XFS_MOUNT_ILOCK(mp); +again: ip = mp->m_inodes; if (ip == NULL) { XFS_MOUNT_IUNLOCK(mp); @@ -1090,18 +1089,14 @@ again: } vnode_refd = B_FALSE; if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0) { - /* - * Sample vp mapping while holding the mplock, lest - * we come across a non-existent vnode. - */ - VMAP(vp, vmap); ireclaims = mp->m_ireclaims; topino = mp->m_inodes; - XFS_MOUNT_IUNLOCK(mp); + vp = vn_grab(vp); + if (!vp) + goto again; + XFS_MOUNT_IUNLOCK(mp); /* XXX restart limit ? */ - if ( ! (vp = vn_get(vp, &vmap))) - goto again; xfs_ilock(ip, XFS_ILOCK_EXCL); vnode_refd = B_TRUE; } else { @@ -1137,7 +1132,6 @@ again: */ if (topino != mp->m_inodes || mp->m_ireclaims != ireclaims) { /* XXX use a sentinel */ - XFS_MOUNT_IUNLOCK(mp); goto again; } ip = ip->i_mnext; diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c index 42bcc021520..b8ce6cad2b7 100644 --- a/fs/xfs/xfs_vfsops.c +++ b/fs/xfs/xfs_vfsops.c @@ -906,7 +906,6 @@ xfs_sync_inodes( xfs_inode_t *ip_next; xfs_buf_t *bp; vnode_t *vp = NULL; - vmap_t vmap; int error; int last_error; uint64_t fflag; @@ -1101,48 +1100,21 @@ xfs_sync_inodes( * lock in xfs_ireclaim() after the inode is pulled from * the mount list will sleep until we release it here. * This keeps the vnode from being freed while we reference - * it. It is also cheaper and simpler than actually doing - * a vn_get() for every inode we touch here. + * it. */ if (xfs_ilock_nowait(ip, lock_flags) == 0) { - if ((flags & SYNC_BDFLUSH) || (vp == NULL)) { ip = ip->i_mnext; continue; } - /* - * We need to unlock the inode list lock in order - * to lock the inode. Insert a marker record into - * the inode list to remember our position, dropping - * the lock is now done inside the IPOINTER_INSERT - * macro. - * - * We also use the inode list lock to protect us - * in taking a snapshot of the vnode version number - * for use in calling vn_get(). - */ - VMAP(vp, vmap); - IPOINTER_INSERT(ip, mp); - - vp = vn_get(vp, &vmap); + vp = vn_grab(vp); if (vp == NULL) { - /* - * The vnode was reclaimed once we let go - * of the inode list lock. Skip to the - * next list entry. Remove the marker. - */ - - XFS_MOUNT_ILOCK(mp); - - mount_locked = B_TRUE; - vnode_refed = B_FALSE; - - IPOINTER_REMOVE(ip, mp); - + ip = ip->i_mnext; continue; } + IPOINTER_INSERT(ip, mp); xfs_ilock(ip, lock_flags); ASSERT(vp == XFS_ITOV(ip)); -- cgit v1.2.3-70-g09d2 From eedb5530aad71aecbc1e99cb67f676c26280d3f9 Mon Sep 17 00:00:00 2001 From: Nathan Scott Date: Fri, 2 Sep 2005 16:39:56 +1000 Subject: [XFS] Make metadata IO completion consistent with other IO completion handlers. SGI-PV: 938409 SGI-Modid: xfs-linux:xfs-kern:22965a Signed-off-by: Nathan Scott --- fs/xfs/linux-2.6/xfs_buf.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index df0cba239dd..58286b1d733 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License as @@ -1249,8 +1249,8 @@ bio_end_io_pagebuf( int error) { xfs_buf_t *pb = (xfs_buf_t *)bio->bi_private; - unsigned int i, blocksize = pb->pb_target->pbr_bsize; - struct bio_vec *bvec = bio->bi_io_vec; + unsigned int blocksize = pb->pb_target->pbr_bsize; + struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; if (bio->bi_size) return 1; @@ -1258,10 +1258,12 @@ bio_end_io_pagebuf( if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) pb->pb_error = EIO; - for (i = 0; i < bio->bi_vcnt; i++, bvec++) { + do { struct page *page = bvec->bv_page; - if (pb->pb_error) { + if (unlikely(pb->pb_error)) { + if (pb->pb_flags & PBF_READ) + ClearPageUptodate(page); SetPageError(page); } else if (blocksize == PAGE_CACHE_SIZE) { SetPageUptodate(page); @@ -1270,10 +1272,13 @@ bio_end_io_pagebuf( set_page_region(page, bvec->bv_offset, bvec->bv_len); } + if (--bvec >= bio->bi_io_vec) + prefetchw(&bvec->bv_page->flags); + if (_pagebuf_iolocked(pb)) { unlock_page(page); } - } + } while (bvec >= bio->bi_io_vec); _pagebuf_iodone(pb, 1); bio_put(bio); -- cgit v1.2.3-70-g09d2 From bcec2b7f2bf856bdf2a8780a57fe78417a513682 Mon Sep 17 00:00:00 2001 From: Nathan Scott Date: Fri, 2 Sep 2005 16:40:17 +1000 Subject: [XFS] Add a chunk of tracing code to diagnose truncate related issues. SGI-PV: 938410 SGI-Modid: xfs-linux:xfs-kern:22966a Signed-off-by: Nathan Scott --- fs/xfs/linux-2.6/xfs_aops.c | 11 +++++++++++ fs/xfs/linux-2.6/xfs_lrw.h | 7 ++++--- 2 files changed, 15 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index a3a4b5aaf5d..bd9aba1f235 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -1202,6 +1202,16 @@ out_unlock: return error; } +STATIC int +linvfs_invalidate_page( + struct page *page, + unsigned long offset) +{ + xfs_page_trace(XFS_INVALIDPAGE_ENTER, + page->mapping->host, page, offset); + return block_invalidatepage(page, offset); +} + /* * Called to move a page into cleanable state - and from there * to be released. Possibly the page is already clean. We always @@ -1279,6 +1289,7 @@ struct address_space_operations linvfs_aops = { .writepage = linvfs_writepage, .sync_page = block_sync_page, .releasepage = linvfs_release_page, + .invalidatepage = linvfs_invalidate_page, .prepare_write = linvfs_prepare_write, .commit_write = generic_commit_write, .bmap = linvfs_bmap, diff --git a/fs/xfs/linux-2.6/xfs_lrw.h b/fs/xfs/linux-2.6/xfs_lrw.h index f197a720e39..6294dcdb797 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.h +++ b/fs/xfs/linux-2.6/xfs_lrw.h @@ -70,9 +70,10 @@ struct xfs_iomap; #define XFS_SENDFILE_ENTER 21 #define XFS_WRITEPAGE_ENTER 22 #define XFS_RELEASEPAGE_ENTER 23 -#define XFS_IOMAP_ALLOC_ENTER 24 -#define XFS_IOMAP_ALLOC_MAP 25 -#define XFS_IOMAP_UNWRITTEN 26 +#define XFS_INVALIDPAGE_ENTER 24 +#define XFS_IOMAP_ALLOC_ENTER 25 +#define XFS_IOMAP_ALLOC_MAP 26 +#define XFS_IOMAP_UNWRITTEN 27 extern void xfs_rw_enter_trace(int, struct xfs_iocore *, void *, size_t, loff_t, int); extern void xfs_inval_cached_trace(struct xfs_iocore *, -- cgit v1.2.3-70-g09d2 From 3bdbfb104e53b367892cc9510e6722346dfb656b Mon Sep 17 00:00:00 2001 From: David Chinner Date: Fri, 2 Sep 2005 16:40:47 +1000 Subject: [XFS] Prevent the incore superblock sb_fdblocks count from leaking when we are getting ENOSPC errors on writes. When we fail to allocate space for indirect blocks in xfs_bmapi() make sure we release the direct block allocation before returning. SGI-PV: 938502 SGI-Modid: xfs-linux:xfs-kern:22986a Signed-off-by: David Chinner Signed-off-by: Nathan Scott --- fs/xfs/xfs_bmap.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 6f5d283888a..3e76def1283 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -4754,10 +4754,20 @@ xfs_bmapi( error = xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS, -(alen), rsvd); - if (!error) + if (!error) { error = xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS, -(indlen), rsvd); + if (error && rt) { + xfs_mod_incore_sb(ip->i_mount, + XFS_SBS_FREXTENTS, + extsz, rsvd); + } else if (error) { + xfs_mod_incore_sb(ip->i_mount, + XFS_SBS_FDBLOCKS, + alen, rsvd); + } + } if (error) { if (XFS_IS_QUOTA_ON(ip->i_mount)) -- cgit v1.2.3-70-g09d2 From ad4a8ac4e9d9cffb0a4c9ebebc6bda9d8dbbfe99 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Fri, 2 Sep 2005 16:41:16 +1000 Subject: [XFS] Fix check for writeable file in xfs_ioc_space ioctl code SGI-PV: 938905 SGI-Modid: xfs-linux:xfs-kern:195240a Signed-off-by: Eric Sandeen Signed-off-by: Nathan Scott --- fs/xfs/linux-2.6/xfs_ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index 05a447e51cc..35cbd88e1a5 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c @@ -982,7 +982,7 @@ xfs_ioc_space( if (vp->v_inode.i_flags & (S_IMMUTABLE|S_APPEND)) return -XFS_ERROR(EPERM); - if (!(filp->f_flags & FMODE_WRITE)) + if (!(filp->f_mode & FMODE_WRITE)) return -XFS_ERROR(EBADF); if (vp->v_type != VREG) -- cgit v1.2.3-70-g09d2 From d52b44d07a43b723ac2fbf1bf4053031f723676c Mon Sep 17 00:00:00 2001 From: Nathan Scott Date: Fri, 2 Sep 2005 16:41:32 +1000 Subject: [XFS] Fix regression in transaction reserved-block accounting for direct writes. SGI-PV: 938145 SGI-Modid: xfs-linux:xfs-kern:23088a Signed-off-by: Nathan Scott --- fs/xfs/xfs_iomap.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 2edd6769e5d..44999d557d8 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -391,9 +391,9 @@ xfs_iomap_write_direct( xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS], *imapp; xfs_bmap_free_t free_list; int aeof; - xfs_filblks_t datablocks, qblocks, resblks; + xfs_filblks_t qblocks, resblks; int committed; - int numrtextents; + int resrtextents; /* * Make sure that the dquots are there. This doesn't hold @@ -434,14 +434,14 @@ xfs_iomap_write_direct( if (!(extsz = ip->i_d.di_extsize)) extsz = mp->m_sb.sb_rextsize; - numrtextents = qblocks = (count_fsb + extsz - 1); - do_div(numrtextents, mp->m_sb.sb_rextsize); + resrtextents = qblocks = (count_fsb + extsz - 1); + do_div(resrtextents, mp->m_sb.sb_rextsize); + resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); quota_flag = XFS_QMOPT_RES_RTBLKS; - datablocks = 0; } else { - datablocks = qblocks = count_fsb; + resrtextents = 0; + resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, count_fsb); quota_flag = XFS_QMOPT_RES_REGBLKS; - numrtextents = 0; } /* @@ -449,9 +449,8 @@ xfs_iomap_write_direct( */ xfs_iunlock(ip, XFS_ILOCK_EXCL); tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); - resblks = XFS_DIOSTRAT_SPACE_RES(mp, datablocks); error = xfs_trans_reserve(tp, resblks, - XFS_WRITE_LOG_RES(mp), numrtextents, + XFS_WRITE_LOG_RES(mp), resrtextents, XFS_TRANS_PERM_LOG_RES, XFS_WRITE_LOG_COUNT); -- cgit v1.2.3-70-g09d2 From 32fb9b57aef35b82434cfb4c9de18b484fc3ec88 Mon Sep 17 00:00:00 2001 From: Tim Shimmin Date: Fri, 2 Sep 2005 16:41:43 +1000 Subject: [XFS] Fix up the calculation of the reservation overhead to hopefully include all the components which make up the transaction in the ondisk log. Having this incomplete has shown up as problems on IRIX when some v2 log changes went in. The symptom was the msg of "xfs_log_write: reservation ran out. Need to up reservation" and was seen on synchronous writes on files with lots of holes (and therefore lots of extents). SGI-PV: 931457 SGI-Modid: xfs-linux:xfs-kern:23095a Signed-off-by: Tim Shimmin Signed-off-by: Nathan Scott --- fs/xfs/xfs_log.c | 54 +++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 41 insertions(+), 13 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 1cd2ac16387..42975cb9e53 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -3179,29 +3179,57 @@ xlog_ticket_get(xlog_t *log, * and their unit amount is the total amount of space required. * * The following lines of code account for non-transaction data - * which occupy space in the on-disk log. + * which occupy space in the on-disk log. + * + * Normal form of a transaction is: + * ... + * and then there are LR hdrs, split-recs and roundoff at end of syncs. + * + * We need to account for all the leadup data and trailer data + * around the transaction data. + * And then we need to account for the worst case in terms of using + * more space. + * The worst case will happen if: + * - the placement of the transaction happens to be such that the + * roundoff is at its maximum + * - the transaction data is synced before the commit record is synced + * i.e. | + * Therefore the commit record is in its own Log Record. + * This can happen as the commit record is called with its + * own region to xlog_write(). + * This then means that in the worst case, roundoff can happen for + * the commit-rec as well. + * The commit-rec is smaller than padding in this scenario and so it is + * not added separately. */ + /* for trans header */ + unit_bytes += sizeof(xlog_op_header_t); + unit_bytes += sizeof(xfs_trans_header_t); + /* for start-rec */ - unit_bytes += sizeof(xlog_op_header_t); + unit_bytes += sizeof(xlog_op_header_t); + + /* for LR headers */ + num_headers = ((unit_bytes + log->l_iclog_size-1) >> log->l_iclog_size_log); + unit_bytes += log->l_iclog_hsize * num_headers; - /* for padding */ + /* for commit-rec LR header - note: padding will subsume the ophdr */ + unit_bytes += log->l_iclog_hsize; + + /* for split-recs - ophdrs added when data split over LRs */ + unit_bytes += sizeof(xlog_op_header_t) * num_headers; + + /* for roundoff padding for transaction data and one for commit record */ if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb) && - log->l_mp->m_sb.sb_logsunit > 1) { + log->l_mp->m_sb.sb_logsunit > 1) { /* log su roundoff */ - unit_bytes += log->l_mp->m_sb.sb_logsunit; + unit_bytes += 2*log->l_mp->m_sb.sb_logsunit; } else { /* BB roundoff */ - unit_bytes += BBSIZE; + unit_bytes += 2*BBSIZE; } - /* for commit-rec */ - unit_bytes += sizeof(xlog_op_header_t); - - /* for LR headers */ - num_headers = ((unit_bytes + log->l_iclog_size-1) >> log->l_iclog_size_log); - unit_bytes += log->l_iclog_hsize * num_headers; - tic->t_unit_res = unit_bytes; tic->t_curr_res = unit_bytes; tic->t_cnt = cnt; -- cgit v1.2.3-70-g09d2 From 7e9c63961558092d584936a874cf3fee80002eb6 Mon Sep 17 00:00:00 2001 From: Tim Shimmin Date: Fri, 2 Sep 2005 16:42:05 +1000 Subject: [XFS] 929956 add log debugging and tracing info SGI-PV: 931456 SGI-Modid: xfs-linux:xfs-kern:23155a Signed-off-by: Tim Shimmin Signed-off-by: Nathan Scott --- fs/xfs/quota/xfs_dquot_item.c | 1 + fs/xfs/xfs_buf_item.c | 4 ++ fs/xfs/xfs_extfree_item.c | 2 + fs/xfs/xfs_inode_item.c | 9 +++ fs/xfs/xfs_log.c | 161 +++++++++++++++++++++++++++++++++++++++--- fs/xfs/xfs_log.h | 38 +++++++++- fs/xfs/xfs_log_priv.h | 68 +++++++++++++++--- fs/xfs/xfs_trans.c | 3 +- fs/xfs/xfs_trans.h | 1 + 9 files changed, 265 insertions(+), 22 deletions(-) (limited to 'fs') diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c index f5271b7b1e8..e74eaa7dd1b 100644 --- a/fs/xfs/quota/xfs_dquot_item.c +++ b/fs/xfs/quota/xfs_dquot_item.c @@ -509,6 +509,7 @@ xfs_qm_qoff_logitem_format(xfs_qoff_logitem_t *qf, log_vector->i_addr = (xfs_caddr_t)&(qf->qql_format); log_vector->i_len = sizeof(xfs_qoff_logitem_t); + XLOG_VEC_SET_TYPE(log_vector, XLOG_REG_TYPE_QUOTAOFF); qf->qql_format.qf_size = 1; } diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 30b8285ad47..a264657acfd 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -274,6 +274,7 @@ xfs_buf_item_format( ((bip->bli_format.blf_map_size - 1) * sizeof(uint))); vecp->i_addr = (xfs_caddr_t)&bip->bli_format; vecp->i_len = base_size; + XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_BFORMAT); vecp++; nvecs = 1; @@ -320,12 +321,14 @@ xfs_buf_item_format( buffer_offset = first_bit * XFS_BLI_CHUNK; vecp->i_addr = xfs_buf_offset(bp, buffer_offset); vecp->i_len = nbits * XFS_BLI_CHUNK; + XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_BCHUNK); nvecs++; break; } else if (next_bit != last_bit + 1) { buffer_offset = first_bit * XFS_BLI_CHUNK; vecp->i_addr = xfs_buf_offset(bp, buffer_offset); vecp->i_len = nbits * XFS_BLI_CHUNK; + XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_BCHUNK); nvecs++; vecp++; first_bit = next_bit; @@ -337,6 +340,7 @@ xfs_buf_item_format( buffer_offset = first_bit * XFS_BLI_CHUNK; vecp->i_addr = xfs_buf_offset(bp, buffer_offset); vecp->i_len = nbits * XFS_BLI_CHUNK; + XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_BCHUNK); /* You would think we need to bump the nvecs here too, but we do not * this number is used by recovery, and it gets confused by the boundary * split here diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index db7cbd1bc85..cc7d1494a45 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -107,6 +107,7 @@ xfs_efi_item_format(xfs_efi_log_item_t *efip, log_vector->i_addr = (xfs_caddr_t)&(efip->efi_format); log_vector->i_len = size; + XLOG_VEC_SET_TYPE(log_vector, XLOG_REG_TYPE_EFI_FORMAT); ASSERT(size >= sizeof(xfs_efi_log_format_t)); } @@ -426,6 +427,7 @@ xfs_efd_item_format(xfs_efd_log_item_t *efdp, log_vector->i_addr = (xfs_caddr_t)&(efdp->efd_format); log_vector->i_len = size; + XLOG_VEC_SET_TYPE(log_vector, XLOG_REG_TYPE_EFD_FORMAT); ASSERT(size >= sizeof(xfs_efd_log_format_t)); } diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 0eed30f5cb1..276ec70eb7f 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -248,6 +248,7 @@ xfs_inode_item_format( vecp->i_addr = (xfs_caddr_t)&iip->ili_format; vecp->i_len = sizeof(xfs_inode_log_format_t); + XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IFORMAT); vecp++; nvecs = 1; @@ -292,6 +293,7 @@ xfs_inode_item_format( vecp->i_addr = (xfs_caddr_t)&ip->i_d; vecp->i_len = sizeof(xfs_dinode_core_t); + XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_ICORE); vecp++; nvecs++; iip->ili_format.ilf_fields |= XFS_ILOG_CORE; @@ -349,6 +351,7 @@ xfs_inode_item_format( vecp->i_addr = (char *)(ip->i_df.if_u1.if_extents); vecp->i_len = ip->i_df.if_bytes; + XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IEXT); } else #endif { @@ -367,6 +370,7 @@ xfs_inode_item_format( vecp->i_addr = (xfs_caddr_t)ext_buffer; vecp->i_len = xfs_iextents_copy(ip, ext_buffer, XFS_DATA_FORK); + XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IEXT); } ASSERT(vecp->i_len <= ip->i_df.if_bytes); iip->ili_format.ilf_dsize = vecp->i_len; @@ -384,6 +388,7 @@ xfs_inode_item_format( ASSERT(ip->i_df.if_broot != NULL); vecp->i_addr = (xfs_caddr_t)ip->i_df.if_broot; vecp->i_len = ip->i_df.if_broot_bytes; + XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IBROOT); vecp++; nvecs++; iip->ili_format.ilf_dsize = ip->i_df.if_broot_bytes; @@ -409,6 +414,7 @@ xfs_inode_item_format( ASSERT((ip->i_df.if_real_bytes == 0) || (ip->i_df.if_real_bytes == data_bytes)); vecp->i_len = (int)data_bytes; + XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_ILOCAL); vecp++; nvecs++; iip->ili_format.ilf_dsize = (unsigned)data_bytes; @@ -486,6 +492,7 @@ xfs_inode_item_format( vecp->i_len = xfs_iextents_copy(ip, ext_buffer, XFS_ATTR_FORK); #endif + XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IATTR_EXT); iip->ili_format.ilf_asize = vecp->i_len; vecp++; nvecs++; @@ -500,6 +507,7 @@ xfs_inode_item_format( ASSERT(ip->i_afp->if_broot != NULL); vecp->i_addr = (xfs_caddr_t)ip->i_afp->if_broot; vecp->i_len = ip->i_afp->if_broot_bytes; + XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IATTR_BROOT); vecp++; nvecs++; iip->ili_format.ilf_asize = ip->i_afp->if_broot_bytes; @@ -523,6 +531,7 @@ xfs_inode_item_format( ASSERT((ip->i_afp->if_real_bytes == 0) || (ip->i_afp->if_real_bytes == data_bytes)); vecp->i_len = (int)data_bytes; + XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IATTR_LOCAL); vecp++; nvecs++; iip->ili_format.ilf_asize = (unsigned)data_bytes; diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 42975cb9e53..54a6f114240 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -159,11 +159,15 @@ xfs_buftarg_t *xlog_target; void xlog_trace_loggrant(xlog_t *log, xlog_ticket_t *tic, xfs_caddr_t string) { - if (! log->l_grant_trace) { - log->l_grant_trace = ktrace_alloc(1024, KM_NOSLEEP); - if (! log->l_grant_trace) + unsigned long cnts; + + if (!log->l_grant_trace) { + log->l_grant_trace = ktrace_alloc(2048, KM_NOSLEEP); + if (!log->l_grant_trace) return; } + /* ticket counts are 1 byte each */ + cnts = ((unsigned long)tic->t_ocnt) | ((unsigned long)tic->t_cnt) << 8; ktrace_enter(log->l_grant_trace, (void *)tic, @@ -178,10 +182,10 @@ xlog_trace_loggrant(xlog_t *log, xlog_ticket_t *tic, xfs_caddr_t string) (void *)((unsigned long)CYCLE_LSN(log->l_tail_lsn)), (void *)((unsigned long)BLOCK_LSN(log->l_tail_lsn)), (void *)string, - (void *)((unsigned long)13), - (void *)((unsigned long)14), - (void *)((unsigned long)15), - (void *)((unsigned long)16)); + (void *)((unsigned long)tic->t_trans_type), + (void *)cnts, + (void *)((unsigned long)tic->t_curr_res), + (void *)((unsigned long)tic->t_unit_res)); } void @@ -274,9 +278,11 @@ xfs_log_done(xfs_mount_t *mp, * Release ticket if not permanent reservation or a specifc * request has been made to release a permanent reservation. */ + xlog_trace_loggrant(log, ticket, "xfs_log_done: (non-permanent)"); xlog_ungrant_log_space(log, ticket); xlog_state_put_ticket(log, ticket); } else { + xlog_trace_loggrant(log, ticket, "xfs_log_done: (permanent)"); xlog_regrant_reserve_log_space(log, ticket); } @@ -399,7 +405,8 @@ xfs_log_reserve(xfs_mount_t *mp, int cnt, xfs_log_ticket_t *ticket, __uint8_t client, - uint flags) + uint flags, + uint t_type) { xlog_t *log = mp->m_log; xlog_ticket_t *internal_ticket; @@ -421,13 +428,19 @@ xfs_log_reserve(xfs_mount_t *mp, if (*ticket != NULL) { ASSERT(flags & XFS_LOG_PERM_RESERV); internal_ticket = (xlog_ticket_t *)*ticket; + xlog_trace_loggrant(log, internal_ticket, "xfs_log_reserve: existing ticket (permanent trans)"); xlog_grant_push_ail(mp, internal_ticket->t_unit_res); retval = xlog_regrant_write_log_space(log, internal_ticket); } else { /* may sleep if need to allocate more tickets */ internal_ticket = xlog_ticket_get(log, unit_bytes, cnt, client, flags); + internal_ticket->t_trans_type = t_type; *ticket = internal_ticket; + xlog_trace_loggrant(log, internal_ticket, + (internal_ticket->t_flags & XLOG_TIC_PERM_RESERV) ? + "xfs_log_reserve: create new ticket (permanent trans)" : + "xfs_log_reserve: create new ticket"); xlog_grant_push_ail(mp, (internal_ticket->t_unit_res * internal_ticket->t_cnt)); @@ -601,8 +614,9 @@ xfs_log_unmount_write(xfs_mount_t *mp) if (! (XLOG_FORCED_SHUTDOWN(log))) { reg[0].i_addr = (void*)&magic; reg[0].i_len = sizeof(magic); + XLOG_VEC_SET_TYPE(®[0], XLOG_REG_TYPE_UNMOUNT); - error = xfs_log_reserve(mp, 600, 1, &tic, XFS_LOG, 0); + error = xfs_log_reserve(mp, 600, 1, &tic, XFS_LOG, 0, 0); if (!error) { /* remove inited flag */ ((xlog_ticket_t *)tic)->t_flags = 0; @@ -1272,6 +1286,7 @@ xlog_commit_record(xfs_mount_t *mp, reg[0].i_addr = NULL; reg[0].i_len = 0; + XLOG_VEC_SET_TYPE(®[0], XLOG_REG_TYPE_COMMIT); ASSERT_ALWAYS(iclog); if ((error = xlog_write(mp, reg, 1, ticket, commitlsnp, @@ -1604,6 +1619,117 @@ xlog_state_finish_copy(xlog_t *log, +/* + * print out info relating to regions written which consume + * the reservation + */ +#if defined(XFS_LOG_RES_DEBUG) +STATIC void +xlog_print_tic_res(xfs_mount_t *mp, xlog_ticket_t *ticket) +{ + uint i; + uint ophdr_spc = ticket->t_res_num_ophdrs * (uint)sizeof(xlog_op_header_t); + + /* match with XLOG_REG_TYPE_* in xfs_log.h */ + static char *res_type_str[XLOG_REG_TYPE_MAX] = { + "bformat", + "bchunk", + "efi_format", + "efd_format", + "iformat", + "icore", + "iext", + "ibroot", + "ilocal", + "iattr_ext", + "iattr_broot", + "iattr_local", + "qformat", + "dquot", + "quotaoff", + "LR header", + "unmount", + "commit", + "trans header" + }; + static char *trans_type_str[XFS_TRANS_TYPE_MAX] = { + "SETATTR_NOT_SIZE", + "SETATTR_SIZE", + "INACTIVE", + "CREATE", + "CREATE_TRUNC", + "TRUNCATE_FILE", + "REMOVE", + "LINK", + "RENAME", + "MKDIR", + "RMDIR", + "SYMLINK", + "SET_DMATTRS", + "GROWFS", + "STRAT_WRITE", + "DIOSTRAT", + "WRITE_SYNC", + "WRITEID", + "ADDAFORK", + "ATTRINVAL", + "ATRUNCATE", + "ATTR_SET", + "ATTR_RM", + "ATTR_FLAG", + "CLEAR_AGI_BUCKET", + "QM_SBCHANGE", + "DUMMY1", + "DUMMY2", + "QM_QUOTAOFF", + "QM_DQALLOC", + "QM_SETQLIM", + "QM_DQCLUSTER", + "QM_QINOCREATE", + "QM_QUOTAOFF_END", + "SB_UNIT", + "FSYNC_TS", + "GROWFSRT_ALLOC", + "GROWFSRT_ZERO", + "GROWFSRT_FREE", + "SWAPEXT" + }; + + xfs_fs_cmn_err(CE_WARN, mp, + "xfs_log_write: reservation summary:\n" + " trans type = %s (%u)\n" + " unit res = %d bytes\n" + " current res = %d bytes\n" + " total reg = %u bytes (o/flow = %u bytes)\n" + " ophdrs = %u (ophdr space = %u bytes)\n" + " ophdr + reg = %u bytes\n" + " num regions = %u\n", + ((ticket->t_trans_type <= 0 || + ticket->t_trans_type > XFS_TRANS_TYPE_MAX) ? + "bad-trans-type" : trans_type_str[ticket->t_trans_type-1]), + ticket->t_trans_type, + ticket->t_unit_res, + ticket->t_curr_res, + ticket->t_res_arr_sum, ticket->t_res_o_flow, + ticket->t_res_num_ophdrs, ophdr_spc, + ticket->t_res_arr_sum + + ticket->t_res_o_flow + ophdr_spc, + ticket->t_res_num); + + for (i = 0; i < ticket->t_res_num; i++) { + uint r_type = ticket->t_res_arr[i].r_type; + cmn_err(CE_WARN, + "region[%u]: %s - %u bytes\n", + i, + ((r_type <= 0 || r_type > XLOG_REG_TYPE_MAX) ? + "bad-rtype" : res_type_str[r_type-1]), + ticket->t_res_arr[i].r_len); + } +} +#else +#define xlog_print_tic_res(mp, ticket) +#endif + /* * Write some region out to in-core log * @@ -1677,16 +1803,21 @@ xlog_write(xfs_mount_t * mp, * xlog_op_header_t and may need to be double word aligned. */ len = 0; - if (ticket->t_flags & XLOG_TIC_INITED) /* acct for start rec of xact */ + if (ticket->t_flags & XLOG_TIC_INITED) { /* acct for start rec of xact */ len += sizeof(xlog_op_header_t); + XLOG_TIC_ADD_OPHDR(ticket); + } for (index = 0; index < nentries; index++) { len += sizeof(xlog_op_header_t); /* each region gets >= 1 */ + XLOG_TIC_ADD_OPHDR(ticket); len += reg[index].i_len; + XLOG_TIC_ADD_REGION(ticket, reg[index].i_len, reg[index].i_type); } contwr = *start_lsn = 0; if (ticket->t_curr_res < len) { + xlog_print_tic_res(mp, ticket); #ifdef DEBUG xlog_panic( "xfs_log_write: reservation ran out. Need to up reservation"); @@ -1790,6 +1921,7 @@ xlog_write(xfs_mount_t * mp, len += sizeof(xlog_op_header_t); /* from splitting of region */ /* account for new log op header */ ticket->t_curr_res -= sizeof(xlog_op_header_t); + XLOG_TIC_ADD_OPHDR(ticket); } xlog_verify_dest_ptr(log, ptr); @@ -2282,6 +2414,9 @@ restart: */ if (log_offset == 0) { ticket->t_curr_res -= log->l_iclog_hsize; + XLOG_TIC_ADD_REGION(ticket, + log->l_iclog_hsize, + XLOG_REG_TYPE_LRHEADER); INT_SET(head->h_cycle, ARCH_CONVERT, log->l_curr_cycle); ASSIGN_LSN(head->h_lsn, log); ASSERT(log->l_curr_block >= 0); @@ -2468,6 +2603,7 @@ xlog_regrant_write_log_space(xlog_t *log, #endif tic->t_curr_res = tic->t_unit_res; + XLOG_TIC_RESET_RES(tic); if (tic->t_cnt > 0) return (0); @@ -2608,6 +2744,7 @@ xlog_regrant_reserve_log_space(xlog_t *log, XLOG_GRANT_SUB_SPACE(log, ticket->t_curr_res, 'w'); XLOG_GRANT_SUB_SPACE(log, ticket->t_curr_res, 'r'); ticket->t_curr_res = ticket->t_unit_res; + XLOG_TIC_RESET_RES(ticket); xlog_trace_loggrant(log, ticket, "xlog_regrant_reserve_log_space: sub current res"); xlog_verify_grant_head(log, 1); @@ -2624,6 +2761,7 @@ xlog_regrant_reserve_log_space(xlog_t *log, xlog_verify_grant_head(log, 0); GRANT_UNLOCK(log, s); ticket->t_curr_res = ticket->t_unit_res; + XLOG_TIC_RESET_RES(ticket); } /* xlog_regrant_reserve_log_space */ @@ -3237,10 +3375,13 @@ xlog_ticket_get(xlog_t *log, tic->t_tid = (xlog_tid_t)((__psint_t)tic & 0xffffffff); tic->t_clientid = client; tic->t_flags = XLOG_TIC_INITED; + tic->t_trans_type = 0; if (xflags & XFS_LOG_PERM_RESERV) tic->t_flags |= XLOG_TIC_PERM_RESERV; sv_init(&(tic->t_sema), SV_DEFAULT, "logtick"); + XLOG_TIC_RESET_RES(tic); + return tic; } /* xlog_ticket_get */ diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index 0db122ddda3..18961119fc6 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h @@ -114,9 +114,44 @@ xfs_lsn_t _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2) #define XFS_VOLUME 0x2 #define XFS_LOG 0xaa + +/* Region types for iovec's i_type */ +#if defined(XFS_LOG_RES_DEBUG) +#define XLOG_REG_TYPE_BFORMAT 1 +#define XLOG_REG_TYPE_BCHUNK 2 +#define XLOG_REG_TYPE_EFI_FORMAT 3 +#define XLOG_REG_TYPE_EFD_FORMAT 4 +#define XLOG_REG_TYPE_IFORMAT 5 +#define XLOG_REG_TYPE_ICORE 6 +#define XLOG_REG_TYPE_IEXT 7 +#define XLOG_REG_TYPE_IBROOT 8 +#define XLOG_REG_TYPE_ILOCAL 9 +#define XLOG_REG_TYPE_IATTR_EXT 10 +#define XLOG_REG_TYPE_IATTR_BROOT 11 +#define XLOG_REG_TYPE_IATTR_LOCAL 12 +#define XLOG_REG_TYPE_QFORMAT 13 +#define XLOG_REG_TYPE_DQUOT 14 +#define XLOG_REG_TYPE_QUOTAOFF 15 +#define XLOG_REG_TYPE_LRHEADER 16 +#define XLOG_REG_TYPE_UNMOUNT 17 +#define XLOG_REG_TYPE_COMMIT 18 +#define XLOG_REG_TYPE_TRANSHDR 19 +#define XLOG_REG_TYPE_MAX 19 +#endif + +#if defined(XFS_LOG_RES_DEBUG) +#define XLOG_VEC_SET_TYPE(vecp, t) ((vecp)->i_type = (t)) +#else +#define XLOG_VEC_SET_TYPE(vecp, t) +#endif + + typedef struct xfs_log_iovec { xfs_caddr_t i_addr; /* beginning address of region */ int i_len; /* length in bytes of region */ +#if defined(XFS_LOG_RES_DEBUG) + uint i_type; /* type of region */ +#endif } xfs_log_iovec_t; typedef void* xfs_log_ticket_t; @@ -159,7 +194,8 @@ int xfs_log_reserve(struct xfs_mount *mp, int count, xfs_log_ticket_t *ticket, __uint8_t clientid, - uint flags); + uint flags, + uint t_type); int xfs_log_write(struct xfs_mount *mp, xfs_log_iovec_t region[], int nentries, diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 1a1d452f15f..eb7fdc6ebc3 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -335,18 +335,66 @@ typedef __uint32_t xlog_tid_t; #define XLOG_COVER_OPS 5 + +/* Ticket reservation region accounting */ +#if defined(XFS_LOG_RES_DEBUG) +#define XLOG_TIC_LEN_MAX 15 +#define XLOG_TIC_RESET_RES(t) ((t)->t_res_num = \ + (t)->t_res_arr_sum = (t)->t_res_num_ophdrs = 0) +#define XLOG_TIC_ADD_OPHDR(t) ((t)->t_res_num_ophdrs++) +#define XLOG_TIC_ADD_REGION(t, len, type) \ + do { \ + if ((t)->t_res_num == XLOG_TIC_LEN_MAX) { \ + /* add to overflow and start again */ \ + (t)->t_res_o_flow += (t)->t_res_arr_sum; \ + (t)->t_res_num = 0; \ + (t)->t_res_arr_sum = 0; \ + } \ + (t)->t_res_arr[(t)->t_res_num].r_len = (len); \ + (t)->t_res_arr[(t)->t_res_num].r_type = (type); \ + (t)->t_res_arr_sum += (len); \ + (t)->t_res_num++; \ + } while (0) + +/* + * Reservation region + * As would be stored in xfs_log_iovec but without the i_addr which + * we don't care about. + */ +typedef struct xlog_res { + uint r_len; + uint r_type; +} xlog_res_t; +#else +#define XLOG_TIC_RESET_RES(t) +#define XLOG_TIC_ADD_OPHDR(t) +#define XLOG_TIC_ADD_REGION(t, len, type) +#endif + + typedef struct xlog_ticket { - sv_t t_sema; /* sleep on this semaphore :20 */ - struct xlog_ticket *t_next; /* : 4 */ - struct xlog_ticket *t_prev; /* : 4 */ - xlog_tid_t t_tid; /* transaction identifier : 4 */ - int t_curr_res; /* current reservation in bytes : 4 */ - int t_unit_res; /* unit reservation in bytes : 4 */ - __uint8_t t_ocnt; /* original count : 1 */ - __uint8_t t_cnt; /* current count : 1 */ - __uint8_t t_clientid; /* who does this belong to; : 1 */ - __uint8_t t_flags; /* properties of reservation : 1 */ + sv_t t_sema; /* sleep on this semaphore : 20 */ + struct xlog_ticket *t_next; /* :4|8 */ + struct xlog_ticket *t_prev; /* :4|8 */ + xlog_tid_t t_tid; /* transaction identifier : 4 */ + int t_curr_res; /* current reservation in bytes : 4 */ + int t_unit_res; /* unit reservation in bytes : 4 */ + char t_ocnt; /* original count : 1 */ + char t_cnt; /* current count : 1 */ + char t_clientid; /* who does this belong to; : 1 */ + char t_flags; /* properties of reservation : 1 */ + uint t_trans_type; /* transaction type : 4 */ + +#if defined (XFS_LOG_RES_DEBUG) + /* reservation array fields */ + uint t_res_num; /* num in array : 4 */ + xlog_res_t t_res_arr[XLOG_TIC_LEN_MAX]; /* array of res : X */ + uint t_res_num_ophdrs; /* num op hdrs : 4 */ + uint t_res_arr_sum; /* array sum : 4 */ + uint t_res_o_flow; /* sum overflow : 4 */ +#endif } xlog_ticket_t; + #endif diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 06dfca531f7..92efe272b83 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -276,7 +276,7 @@ xfs_trans_reserve( error = xfs_log_reserve(tp->t_mountp, logspace, logcount, &tp->t_ticket, - XFS_TRANSACTION, log_flags); + XFS_TRANSACTION, log_flags, tp->t_type); if (error) { goto undo_blocks; } @@ -1032,6 +1032,7 @@ xfs_trans_fill_vecs( tp->t_header.th_num_items = nitems; log_vector->i_addr = (xfs_caddr_t)&tp->t_header; log_vector->i_len = sizeof(xfs_trans_header_t); + XLOG_VEC_SET_TYPE(log_vector, XLOG_REG_TYPE_TRANSHDR); } diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index ec541d66fa2..9ee5eeee802 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -112,6 +112,7 @@ typedef struct xfs_trans_header { #define XFS_TRANS_GROWFSRT_ZERO 38 #define XFS_TRANS_GROWFSRT_FREE 39 #define XFS_TRANS_SWAPEXT 40 +#define XFS_TRANS_TYPE_MAX 40 /* new transaction types need to be reflected in xfs_logprint(8) */ -- cgit v1.2.3-70-g09d2 From e69a333b5e0c8c6b687b07665a3cb5545657d2aa Mon Sep 17 00:00:00 2001 From: Nathan Scott Date: Fri, 2 Sep 2005 16:42:26 +1000 Subject: [XFS] Add in grpid/nogrpid mount option parsing, actual code was always there.. SGI-PV: 939444 SGI-Modid: xfs-linux:xfs-kern:23162a Signed-off-by: Nathan Scott --- fs/xfs/xfs_vfsops.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c index b8ce6cad2b7..d4b9545c2b5 100644 --- a/fs/xfs/xfs_vfsops.c +++ b/fs/xfs/xfs_vfsops.c @@ -1621,6 +1621,10 @@ xfs_vget( #define MNTOPT_SWIDTH "swidth" /* data volume stripe width */ #define MNTOPT_NOUUID "nouuid" /* ignore filesystem UUID */ #define MNTOPT_MTPT "mtpt" /* filesystem mount point */ +#define MNTOPT_GRPID "grpid" /* group-ID from parent directory */ +#define MNTOPT_NOGRPID "nogrpid" /* group-ID from current process */ +#define MNTOPT_BSDGROUPS "bsdgroups" /* group-ID from parent directory */ +#define MNTOPT_SYSVGROUPS "sysvgroups" /* group-ID from current process */ #define MNTOPT_ALLOCSIZE "allocsize" /* preferred allocation size */ #define MNTOPT_IHASHSIZE "ihashsize" /* size of inode hash table */ #define MNTOPT_NORECOVERY "norecovery" /* don't run XFS recovery */ @@ -1741,6 +1745,12 @@ xfs_parseargs( } args->flags |= XFSMNT_IHASHSIZE; args->ihashsize = simple_strtoul(value, &eov, 10); + } else if (!strcmp(this_char, MNTOPT_GRPID) || + !strcmp(this_char, MNTOPT_BSDGROUPS)) { + vfsp->vfs_flag |= VFS_GRPID; + } else if (!strcmp(this_char, MNTOPT_NOGRPID) || + !strcmp(this_char, MNTOPT_SYSVGROUPS)) { + vfsp->vfs_flag &= ~VFS_GRPID; } else if (!strcmp(this_char, MNTOPT_WSYNC)) { args->flags |= XFSMNT_WSYNC; } else if (!strcmp(this_char, MNTOPT_OSYNCISOSYNC)) { @@ -1862,6 +1872,7 @@ xfs_showargs( }; struct proc_xfs_info *xfs_infop; struct xfs_mount *mp = XFS_BHVTOM(bhv); + struct vfs *vfsp = XFS_MTOVFS(mp); for (xfs_infop = xfs_info; xfs_infop->flag; xfs_infop++) { if (mp->m_flags & xfs_infop->flag) @@ -1898,7 +1909,10 @@ xfs_showargs( if (!(mp->m_flags & XFS_MOUNT_32BITINOOPT)) seq_printf(m, "," MNTOPT_64BITINODE); - + + if (vfsp->vfs_flag & VFS_GRPID) + seq_printf(m, "," MNTOPT_GRPID); + return 0; } -- cgit v1.2.3-70-g09d2 From 155ffd075caedcea5ad595c95403c71bfc391c4a Mon Sep 17 00:00:00 2001 From: Nathan Scott Date: Fri, 2 Sep 2005 16:43:48 +1000 Subject: [XFS] Remove extraneous quotacheck diagnostics. SGI-PV: 907752 SGI-Modid: xfs-linux:xfs-kern:23163a Signed-off-by: Nathan Scott --- fs/xfs/quota/xfs_qm.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index f665ca8f9e9..4badf38df5e 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c @@ -388,11 +388,8 @@ xfs_qm_mount_quotas( goto write_changes; } -#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY) - cmn_err(CE_NOTE, "Attempting to turn on disk quotas."); -#endif - ASSERT(XFS_IS_QUOTA_RUNNING(mp)); + /* * Allocate the quotainfo structure inside the mount struct, and * create quotainode(s), and change/rev superblock if necessary. @@ -410,19 +407,14 @@ xfs_qm_mount_quotas( */ if (XFS_QM_NEED_QUOTACHECK(mp) && !(mfsi_flags & XFS_MFSI_NO_QUOTACHECK)) { -#ifdef DEBUG - cmn_err(CE_NOTE, "Doing a quotacheck. Please wait."); -#endif if ((error = xfs_qm_quotacheck(mp))) { /* Quotacheck has failed and quotas have * been disabled. */ return XFS_ERROR(error); } -#ifdef DEBUG - cmn_err(CE_NOTE, "Done quotacheck."); -#endif } + write_changes: /* * We actually don't have to acquire the SB_LOCK at all. -- cgit v1.2.3-70-g09d2 From 0432dab2d2d3b35347a95c01c78a40781b6431fb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 2 Sep 2005 16:46:51 +1000 Subject: [XFS] remove struct vnode::v_type SGI-PV: 936236 SGI-Modid: xfs-linux:xfs-kern:195878a Signed-off-by: Christoph Hellwig Signed-off-by: Nathan Scott --- fs/xfs/linux-2.6/xfs_ioctl.c | 16 +++++++++++----- fs/xfs/linux-2.6/xfs_iops.c | 6 ++---- fs/xfs/linux-2.6/xfs_super.c | 35 ++++++++++++++++++++-------------- fs/xfs/linux-2.6/xfs_vnode.c | 16 +--------------- fs/xfs/linux-2.6/xfs_vnode.h | 26 +++++++------------------ fs/xfs/xfs_acl.c | 6 +++--- fs/xfs/xfs_inode.c | 3 +-- fs/xfs/xfs_vnodeops.c | 45 ++++++++++++++++++++++---------------------- 8 files changed, 68 insertions(+), 85 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index 35cbd88e1a5..6a3326bcd8d 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c @@ -141,13 +141,19 @@ xfs_find_handle( return -XFS_ERROR(EINVAL); } - /* we need the vnode */ - vp = LINVFS_GET_VP(inode); - if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) { + switch (inode->i_mode & S_IFMT) { + case S_IFREG: + case S_IFDIR: + case S_IFLNK: + break; + default: iput(inode); return -XFS_ERROR(EBADF); } + /* we need the vnode */ + vp = LINVFS_GET_VP(inode); + /* now we can grab the fsid */ memcpy(&handle.ha_fsid, vp->v_vfsp->vfs_altfsid, sizeof(xfs_fsid_t)); hsize = sizeof(xfs_fsid_t); @@ -386,7 +392,7 @@ xfs_readlink_by_handle( return -error; /* Restrict this handle operation to symlinks only. */ - if (vp->v_type != VLNK) { + if (!S_ISLNK(inode->i_mode)) { VN_RELE(vp); return -XFS_ERROR(EINVAL); } @@ -985,7 +991,7 @@ xfs_ioc_space( if (!(filp->f_mode & FMODE_WRITE)) return -XFS_ERROR(EBADF); - if (vp->v_type != VREG) + if (!VN_ISREG(vp)) return -XFS_ERROR(EINVAL); if (copy_from_user(&bf, arg, sizeof(bf))) diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index f252605514e..d237cc5be76 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -140,7 +140,6 @@ linvfs_mknod( memset(&va, 0, sizeof(va)); va.va_mask = XFS_AT_TYPE|XFS_AT_MODE; - va.va_type = IFTOVT(mode); va.va_mode = mode; switch (mode & S_IFMT) { @@ -308,14 +307,13 @@ linvfs_symlink( cvp = NULL; memset(&va, 0, sizeof(va)); - va.va_type = VLNK; - va.va_mode = irix_symlink_mode ? 0777 & ~current->fs->umask : S_IRWXUGO; + va.va_mode = S_IFLNK | + (irix_symlink_mode ? 0777 & ~current->fs->umask : S_IRWXUGO); va.va_mask = XFS_AT_TYPE|XFS_AT_MODE; error = 0; VOP_SYMLINK(dvp, dentry, &va, (char *)symname, &cvp, NULL, error); if (!error && cvp) { - ASSERT(cvp->v_type == VLNK); ip = LINVFS_GET_IP(cvp); d_instantiate(dentry, ip); validate_fields(dir); diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index f6dd7de2592..d2c8a11e22b 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -138,24 +138,25 @@ STATIC __inline__ void xfs_set_inodeops( struct inode *inode) { - vnode_t *vp = LINVFS_GET_VP(inode); - - if (vp->v_type == VNON) { - vn_mark_bad(vp); - } else if (S_ISREG(inode->i_mode)) { + switch (inode->i_mode & S_IFMT) { + case S_IFREG: inode->i_op = &linvfs_file_inode_operations; inode->i_fop = &linvfs_file_operations; inode->i_mapping->a_ops = &linvfs_aops; - } else if (S_ISDIR(inode->i_mode)) { + break; + case S_IFDIR: inode->i_op = &linvfs_dir_inode_operations; inode->i_fop = &linvfs_dir_operations; - } else if (S_ISLNK(inode->i_mode)) { + break; + case S_IFLNK: inode->i_op = &linvfs_symlink_inode_operations; if (inode->i_blocks) inode->i_mapping->a_ops = &linvfs_aops; - } else { + break; + default: inode->i_op = &linvfs_file_inode_operations; init_special_inode(inode, inode->i_mode, inode->i_rdev); + break; } } @@ -167,16 +168,23 @@ xfs_revalidate_inode( { struct inode *inode = LINVFS_GET_IP(vp); - inode->i_mode = (ip->i_d.di_mode & MODEMASK) | VTTOIF(vp->v_type); + inode->i_mode = ip->i_d.di_mode; inode->i_nlink = ip->i_d.di_nlink; inode->i_uid = ip->i_d.di_uid; inode->i_gid = ip->i_d.di_gid; - if (((1 << vp->v_type) & ((1<i_mode & S_IFMT) { + case S_IFBLK: + case S_IFCHR: + inode->i_rdev = + MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff, + sysv_minor(ip->i_df.if_u2.if_rdev)); + break; + default: inode->i_rdev = 0; - } else { - xfs_dev_t dev = ip->i_df.if_u2.if_rdev; - inode->i_rdev = MKDEV(sysv_major(dev) & 0x1ff, sysv_minor(dev)); + break; } + inode->i_blksize = PAGE_CACHE_SIZE; inode->i_generation = ip->i_d.di_gen; i_size_write(inode, ip->i_d.di_size); @@ -231,7 +239,6 @@ xfs_initialize_vnode( * finish our work. */ if (ip->i_d.di_mode != 0 && unlock && (inode->i_state & I_NEW)) { - vp->v_type = IFTOVT(ip->i_d.di_mode); xfs_revalidate_inode(XFS_BHVTOM(bdp), vp, ip); xfs_set_inodeops(inode); diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c index 353276bda34..ad16af38e96 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.c +++ b/fs/xfs/linux-2.6/xfs_vnode.c @@ -44,19 +44,6 @@ DEFINE_SPINLOCK(vnumber_lock); #define vptosync(v) (&vsync[((unsigned long)v) % NVSYNC]) sv_t vsync[NVSYNC]; -/* - * Translate stat(2) file types to vnode types and vice versa. - * Aware of numeric order of S_IFMT and vnode type values. - */ -enum vtype iftovt_tab[] = { - VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, - VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VNON -}; - -u_short vttoif_tab[] = { - 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, S_IFIFO, 0, S_IFSOCK -}; - void vn_init(void) @@ -95,7 +82,6 @@ vn_reclaim( vp->v_flag &= (VRECLM|VWAIT); VN_UNLOCK(vp, 0); - vp->v_type = VNON; vp->v_fbhv = NULL; #ifdef XFS_VNODE_TRACE @@ -174,7 +160,7 @@ vn_revalidate_core( { struct inode *inode = LINVFS_GET_IP(vp); - inode->i_mode = VTTOIF(vap->va_type) | vap->va_mode; + inode->i_mode = vap->va_mode; inode->i_nlink = vap->va_nlink; inode->i_uid = vap->va_uid; inode->i_gid = vap->va_gid; diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h index 6cb0a01df25..bc9ed722ba1 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.h +++ b/fs/xfs/linux-2.6/xfs_vnode.h @@ -65,10 +65,6 @@ struct vattr; struct xfs_iomap; struct attrlist_cursor_kern; -/* - * Vnode types. VNON means no type. - */ -enum vtype { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VFIFO, VBAD, VSOCK }; typedef xfs_ino_t vnumber_t; typedef struct dentry vname_t; @@ -77,11 +73,9 @@ typedef bhv_head_t vn_bhv_head_t; /* * MP locking protocols: * v_flag, v_vfsp VN_LOCK/VN_UNLOCK - * v_type read-only or fs-dependent */ typedef struct vnode { __u32 v_flag; /* vnode flags (see below) */ - enum vtype v_type; /* vnode type */ struct vfs *v_vfsp; /* ptr to containing VFS */ vnumber_t v_number; /* in-core vnode number */ vn_bhv_head_t v_bh; /* behavior head */ @@ -93,6 +87,12 @@ typedef struct vnode { /* inode MUST be last */ } vnode_t; +#define VN_ISLNK(vp) S_ISLNK((vp)->v_inode.i_mode) +#define VN_ISREG(vp) S_ISREG((vp)->v_inode.i_mode) +#define VN_ISDIR(vp) S_ISDIR((vp)->v_inode.i_mode) +#define VN_ISCHR(vp) S_ISCHR((vp)->v_inode.i_mode) +#define VN_ISBLK(vp) S_ISBLK((vp)->v_inode.i_mode) + #define v_fbhv v_bh.bh_first /* first behavior */ #define v_fops v_bh.bh_first->bd_ops /* first behavior ops */ @@ -132,17 +132,6 @@ typedef enum { #define LINVFS_GET_VP(inode) ((vnode_t *)list_entry(inode, vnode_t, v_inode)) #define LINVFS_GET_IP(vp) (&(vp)->v_inode) -/* - * Convert between vnode types and inode formats (since POSIX.1 - * defines mode word of stat structure in terms of inode formats). - */ -extern enum vtype iftovt_tab[]; -extern u_short vttoif_tab[]; -#define IFTOVT(mode) (iftovt_tab[((mode) & S_IFMT) >> 12]) -#define VTTOIF(indx) (vttoif_tab[(int)(indx)]) -#define MAKEIMODE(indx, mode) (int)(VTTOIF(indx) | (mode)) - - /* * Vnode flags. */ @@ -408,7 +397,6 @@ typedef struct vnodeops { */ typedef struct vattr { int va_mask; /* bit-mask of attributes present */ - enum vtype va_type; /* vnode type (for create) */ mode_t va_mode; /* file access mode and type */ xfs_nlink_t va_nlink; /* number of references to file */ uid_t va_uid; /* owner user id */ @@ -498,7 +486,7 @@ typedef struct vattr { * Check whether mandatory file locking is enabled. */ #define MANDLOCK(vp, mode) \ - ((vp)->v_type == VREG && ((mode) & (VSGID|(VEXEC>>3))) == VSGID) + (VN_ISREG(vp) && ((mode) & (VSGID|(VEXEC>>3))) == VSGID) extern void vn_init(void); extern int vn_wait(struct vnode *); diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index 8d01dce8c53..92fd1d67f87 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c @@ -85,7 +85,7 @@ xfs_acl_vhasacl_default( { int error; - if (vp->v_type != VDIR) + if (!VN_ISDIR(vp)) return 0; xfs_acl_get_attr(vp, NULL, _ACL_TYPE_DEFAULT, ATTR_KERNOVAL, &error); return (error == 0); @@ -389,7 +389,7 @@ xfs_acl_allow_set( if (vp->v_inode.i_flags & (S_IMMUTABLE|S_APPEND)) return EPERM; - if (kind == _ACL_TYPE_DEFAULT && vp->v_type != VDIR) + if (kind == _ACL_TYPE_DEFAULT && !VN_ISDIR(vp)) return ENOTDIR; if (vp->v_vfsp->vfs_flag & VFS_RDONLY) return EROFS; @@ -750,7 +750,7 @@ xfs_acl_inherit( * If the new file is a directory, its default ACL is a copy of * the containing directory's default ACL. */ - if (vp->v_type == VDIR) + if (VN_ISDIR(vp)) xfs_acl_set_attr(vp, pdaclp, _ACL_TYPE_DEFAULT, &error); if (!error && !basicperms) xfs_acl_set_attr(vp, cacl, _ACL_TYPE_ACCESS, &error); diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 34bdf590968..db43308aae9 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1128,7 +1128,6 @@ xfs_ialloc( ASSERT(ip != NULL); vp = XFS_ITOV(ip); - vp->v_type = IFTOVT(mode); ip->i_d.di_mode = (__uint16_t)mode; ip->i_d.di_onlink = 0; ip->i_d.di_nlink = nlink; @@ -1250,7 +1249,7 @@ xfs_ialloc( */ xfs_trans_log_inode(tp, ip, flags); - /* now that we have a v_type we can set Linux inode ops (& unlock) */ + /* now that we have an i_mode we can set Linux inode ops (& unlock) */ VFS_INIT_VNODE(XFS_MTOVFS(tp->t_mountp), vp, XFS_ITOBHV(ip), 1); *ipp = ip; diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 1377c868f3f..c4aa24ff85a 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -104,7 +104,7 @@ xfs_open( * If it's a directory with any blocks, read-ahead block 0 * as we're almost certain to have the next operation be a read there. */ - if (vp->v_type == VDIR && ip->i_d.di_nextents > 0) { + if (VN_ISDIR(vp) && ip->i_d.di_nextents > 0) { mode = xfs_ilock_map_shared(ip); if (ip->i_d.di_nextents > 0) (void)xfs_da_reada_buf(NULL, ip, 0, XFS_DATA_FORK); @@ -163,18 +163,21 @@ xfs_getattr( /* * Copy from in-core inode. */ - vap->va_type = vp->v_type; - vap->va_mode = ip->i_d.di_mode & MODEMASK; + vap->va_mode = ip->i_d.di_mode; vap->va_uid = ip->i_d.di_uid; vap->va_gid = ip->i_d.di_gid; vap->va_projid = ip->i_d.di_projid; /* * Check vnode type block/char vs. everything else. - * Do it with bitmask because that's faster than looking - * for multiple values individually. */ - if (((1 << vp->v_type) & ((1<i_d.di_mode & S_IFMT) { + case S_IFBLK: + case S_IFCHR: + vap->va_rdev = ip->i_df.if_u2.if_rdev; + vap->va_blocksize = BLKDEV_IOSIZE; + break; + default: vap->va_rdev = 0; if (!(ip->i_d.di_flags & XFS_DIFLAG_REALTIME)) { @@ -224,9 +227,7 @@ xfs_getattr( (ip->i_d.di_extsize << mp->m_sb.sb_blocklog) : (mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog); } - } else { - vap->va_rdev = ip->i_df.if_u2.if_rdev; - vap->va_blocksize = BLKDEV_IOSIZE; + break; } vap->va_atime.tv_sec = ip->i_d.di_atime.t_sec; @@ -468,7 +469,7 @@ xfs_setattr( m |= S_ISGID; #if 0 /* Linux allows this, Irix doesn't. */ - if ((vap->va_mode & S_ISVTX) && vp->v_type != VDIR) + if ((vap->va_mode & S_ISVTX) && !VN_ISDIR(vp)) m |= S_ISVTX; #endif if (m && !capable(CAP_FSETID)) @@ -546,10 +547,10 @@ xfs_setattr( goto error_return; } - if (vp->v_type == VDIR) { + if (VN_ISDIR(vp)) { code = XFS_ERROR(EISDIR); goto error_return; - } else if (vp->v_type != VREG) { + } else if (!VN_ISREG(vp)) { code = XFS_ERROR(EINVAL); goto error_return; } @@ -1567,7 +1568,7 @@ xfs_release( vp = BHV_TO_VNODE(bdp); ip = XFS_BHVTOI(bdp); - if ((vp->v_type != VREG) || (ip->i_d.di_mode == 0)) { + if (!VN_ISREG(vp) || (ip->i_d.di_mode == 0)) { return 0; } @@ -1895,7 +1896,7 @@ xfs_create( dp = XFS_BHVTOI(dir_bdp); mp = dp->i_mount; - dm_di_mode = vap->va_mode|VTTOIF(vap->va_type); + dm_di_mode = vap->va_mode; namelen = VNAMELEN(dentry); if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_CREATE)) { @@ -1973,8 +1974,7 @@ xfs_create( (error = XFS_DIR_CANENTER(mp, tp, dp, name, namelen))) goto error_return; rdev = (vap->va_mask & XFS_AT_RDEV) ? vap->va_rdev : 0; - error = xfs_dir_ialloc(&tp, dp, - MAKEIMODE(vap->va_type,vap->va_mode), 1, + error = xfs_dir_ialloc(&tp, dp, vap->va_mode, 1, rdev, credp, prid, resblks > 0, &ip, &committed); if (error) { @@ -2620,7 +2620,7 @@ xfs_link( vn_trace_entry(src_vp, __FUNCTION__, (inst_t *)__return_address); target_namelen = VNAMELEN(dentry); - if (src_vp->v_type == VDIR) + if (VN_ISDIR(src_vp)) return XFS_ERROR(EPERM); src_bdp = vn_bhv_lookup_unlocked(VN_BHV_HEAD(src_vp), &xfs_vnodeops); @@ -2805,7 +2805,7 @@ xfs_mkdir( tp = NULL; dp_joined_to_trans = B_FALSE; - dm_di_mode = vap->va_mode|VTTOIF(vap->va_type); + dm_di_mode = vap->va_mode; if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_CREATE)) { error = XFS_SEND_NAMESP(mp, DM_EVENT_CREATE, @@ -2879,8 +2879,7 @@ xfs_mkdir( /* * create the directory inode. */ - error = xfs_dir_ialloc(&tp, dp, - MAKEIMODE(vap->va_type,vap->va_mode), 2, + error = xfs_dir_ialloc(&tp, dp, vap->va_mode, 2, 0, credp, prid, resblks > 0, &cdp, NULL); if (error) { @@ -3650,7 +3649,7 @@ xfs_rwlock( vnode_t *vp; vp = BHV_TO_VNODE(bdp); - if (vp->v_type == VDIR) + if (VN_ISDIR(vp)) return 1; ip = XFS_BHVTOI(bdp); if (locktype == VRWLOCK_WRITE) { @@ -3681,7 +3680,7 @@ xfs_rwunlock( vnode_t *vp; vp = BHV_TO_VNODE(bdp); - if (vp->v_type == VDIR) + if (VN_ISDIR(vp)) return; ip = XFS_BHVTOI(bdp); if (locktype == VRWLOCK_WRITE) { @@ -4567,7 +4566,7 @@ xfs_change_file_space( /* * must be a regular file and have write permission */ - if (vp->v_type != VREG) + if (!VN_ISREG(vp)) return XFS_ERROR(EINVAL); xfs_ilock(ip, XFS_ILOCK_SHARED); -- cgit v1.2.3-70-g09d2 From 6f948fbd443255e3a918438ce41cd7581cf8146d Mon Sep 17 00:00:00 2001 From: Tim Shimmin Date: Fri, 2 Sep 2005 16:52:55 +1000 Subject: [XFS] Need to unlock the AIL before calling xfs_force_shutdown() because when it goes to force out the log, and get the tail lsn, it will want to get the AIL lock. SGI-PV: 940076 SGI-Modid: xfs-linux:xfs-kern:23260a Signed-off-by: Tim Shimmin Signed-off-by: Nathan Scott --- fs/xfs/xfs_trans_ail.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 7bc5eab4c2c..2a71b4f91bf 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -379,8 +379,8 @@ xfs_trans_delete_ail( else { xfs_cmn_err(XFS_PTAG_AILDELETE, CE_ALERT, mp, "xfs_trans_delete_ail: attempting to delete a log item that is not in the AIL"); - xfs_force_shutdown(mp, XFS_CORRUPT_INCORE); AIL_UNLOCK(mp, s); + xfs_force_shutdown(mp, XFS_CORRUPT_INCORE); } } } -- cgit v1.2.3-70-g09d2 From 760dea671ea9c5b8c732d76d09673d6d052a186f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 2 Sep 2005 16:56:02 +1000 Subject: [XFS] Fix sparse warnings in kmem_* functions Patch from Victor Fusco SGI-PV: 940376 SGI-Modid: xfs-linux:xfs-kern:196705a Signed-off-by: Christoph Hellwig Signed-off-by: Nathan Scott --- fs/xfs/linux-2.6/kmem.c | 23 ++++++++++++----------- fs/xfs/linux-2.6/kmem.h | 23 ++++++++++++----------- fs/xfs/xfs_log_recover.c | 2 +- 3 files changed, 25 insertions(+), 23 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c index 364ea8c386b..4b184559f23 100644 --- a/fs/xfs/linux-2.6/kmem.c +++ b/fs/xfs/linux-2.6/kmem.c @@ -45,11 +45,11 @@ void * -kmem_alloc(size_t size, int flags) +kmem_alloc(size_t size, unsigned int __nocast flags) { - int retries = 0; - int lflags = kmem_flags_convert(flags); - void *ptr; + int retries = 0; + unsigned int lflags = kmem_flags_convert(flags); + void *ptr; do { if (size < MAX_SLAB_SIZE || retries > MAX_VMALLOCS) @@ -67,7 +67,7 @@ kmem_alloc(size_t size, int flags) } void * -kmem_zalloc(size_t size, int flags) +kmem_zalloc(size_t size, unsigned int __nocast flags) { void *ptr; @@ -89,7 +89,8 @@ kmem_free(void *ptr, size_t size) } void * -kmem_realloc(void *ptr, size_t newsize, size_t oldsize, int flags) +kmem_realloc(void *ptr, size_t newsize, size_t oldsize, + unsigned int __nocast flags) { void *new; @@ -104,11 +105,11 @@ kmem_realloc(void *ptr, size_t newsize, size_t oldsize, int flags) } void * -kmem_zone_alloc(kmem_zone_t *zone, int flags) +kmem_zone_alloc(kmem_zone_t *zone, unsigned int __nocast flags) { - int retries = 0; - int lflags = kmem_flags_convert(flags); - void *ptr; + int retries = 0; + unsigned int lflags = kmem_flags_convert(flags); + void *ptr; do { ptr = kmem_cache_alloc(zone, lflags); @@ -123,7 +124,7 @@ kmem_zone_alloc(kmem_zone_t *zone, int flags) } void * -kmem_zone_zalloc(kmem_zone_t *zone, int flags) +kmem_zone_zalloc(kmem_zone_t *zone, unsigned int __nocast flags) { void *ptr; diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h index 1397b669b05..109fcf27e25 100644 --- a/fs/xfs/linux-2.6/kmem.h +++ b/fs/xfs/linux-2.6/kmem.h @@ -39,10 +39,10 @@ /* * memory management routines */ -#define KM_SLEEP 0x0001 -#define KM_NOSLEEP 0x0002 -#define KM_NOFS 0x0004 -#define KM_MAYFAIL 0x0008 +#define KM_SLEEP 0x0001u +#define KM_NOSLEEP 0x0002u +#define KM_NOFS 0x0004u +#define KM_MAYFAIL 0x0008u #define kmem_zone kmem_cache_s #define kmem_zone_t kmem_cache_t @@ -81,9 +81,9 @@ typedef unsigned long xfs_pflags_t; *(NSTATEP) = *(OSTATEP); \ } while (0) -static __inline unsigned int kmem_flags_convert(int flags) +static __inline unsigned int kmem_flags_convert(unsigned int __nocast flags) { - int lflags = __GFP_NOWARN; /* we'll report problems, if need be */ + unsigned int lflags = __GFP_NOWARN; /* we'll report problems, if need be */ #ifdef DEBUG if (unlikely(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL))) { @@ -125,12 +125,13 @@ kmem_zone_destroy(kmem_zone_t *zone) BUG(); } -extern void *kmem_zone_zalloc(kmem_zone_t *, int); -extern void *kmem_zone_alloc(kmem_zone_t *, int); +extern void *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast); +extern void *kmem_zone_alloc(kmem_zone_t *, unsigned int __nocast); -extern void *kmem_alloc(size_t, int); -extern void *kmem_realloc(void *, size_t, size_t, int); -extern void *kmem_zalloc(size_t, int); +extern void *kmem_alloc(size_t, unsigned int __nocast); +extern void *kmem_realloc(void *, size_t, size_t, + unsigned int __nocast); +extern void *kmem_zalloc(size_t, unsigned int __nocast); extern void kmem_free(void *, size_t); typedef struct shrinker *kmem_shaker_t; diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 0aac28ddb81..14faabaabf2 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -1387,7 +1387,7 @@ xlog_recover_add_to_cont_trans( old_ptr = item->ri_buf[item->ri_cnt-1].i_addr; old_len = item->ri_buf[item->ri_cnt-1].i_len; - ptr = kmem_realloc(old_ptr, len+old_len, old_len, 0); + ptr = kmem_realloc(old_ptr, len+old_len, old_len, 0u); memcpy(&ptr[old_len], dp, len); /* d, s, l */ item->ri_buf[item->ri_cnt-1].i_len += len; item->ri_buf[item->ri_cnt-1].i_addr = ptr; -- cgit v1.2.3-70-g09d2 From 592cb26bda6fe69838529acf71e50a6dee7acbb4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 2 Sep 2005 16:56:14 +1000 Subject: [XFS] remove unessecary vnode flags SGI-PV: 934766 SGI-Modid: xfs-linux:xfs-kern:196852a Signed-off-by: Christoph Hellwig Signed-off-by: Nathan Scott --- fs/xfs/linux-2.6/xfs_vnode.c | 59 +------------------------------------------- fs/xfs/linux-2.6/xfs_vnode.h | 4 --- fs/xfs/xfs_iget.c | 11 --------- 3 files changed, 1 insertion(+), 73 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c index ad16af38e96..654da98de2a 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.c +++ b/fs/xfs/linux-2.6/xfs_vnode.c @@ -78,10 +78,6 @@ vn_reclaim( } ASSERT(vp->v_fbhv == NULL); - VN_LOCK(vp); - vp->v_flag &= (VRECLM|VWAIT); - VN_UNLOCK(vp, 0); - vp->v_fbhv = NULL; #ifdef XFS_VNODE_TRACE @@ -92,31 +88,6 @@ vn_reclaim( return 0; } -STATIC void -vn_wakeup( - struct vnode *vp) -{ - VN_LOCK(vp); - if (vp->v_flag & VWAIT) - sv_broadcast(vptosync(vp)); - vp->v_flag &= ~(VRECLM|VWAIT|VMODIFIED); - VN_UNLOCK(vp, 0); -} - -int -vn_wait( - struct vnode *vp) -{ - VN_LOCK(vp); - if (vp->v_flag & (VINACT | VRECLM)) { - vp->v_flag |= VWAIT; - sv_wait(vptosync(vp), PINOD, &vp->v_lock, 0); - return 1; - } - VN_UNLOCK(vp, 0); - return 0; -} - struct vnode * vn_initialize( struct inode *inode) @@ -221,7 +192,6 @@ vn_purge( { vn_trace_entry(vp, "vn_purge", (inst_t *)__return_address); -again: /* * Check whether vp has already been reclaimed since our caller * sampled its version while holding a filesystem cache lock that @@ -233,19 +203,6 @@ again: return; } - /* - * If vp is being reclaimed or inactivated, wait until it is inert, - * then proceed. Can't assume that vnode is actually reclaimed - * just because the reclaimed flag is asserted -- a vn_alloc - * reclaim can fail. - */ - if (vp->v_flag & (VINACT | VRECLM)) { - ASSERT(vn_count(vp) == 0); - vp->v_flag |= VWAIT; - sv_wait(vptosync(vp), PINOD, &vp->v_lock, 0); - goto again; - } - /* * Another process could have raced in and gotten this vnode... */ @@ -255,7 +212,6 @@ again: } XFS_STATS_DEC(vn_active); - vp->v_flag |= VRECLM; VN_UNLOCK(vp, 0); /* @@ -266,11 +222,6 @@ again: */ if (vn_reclaim(vp) != 0) panic("vn_purge: cannot reclaim"); - - /* - * Wakeup anyone waiting for vp to be reclaimed. - */ - vn_wakeup(vp); } /* @@ -315,11 +266,6 @@ vn_rele( * return. */ if (!vcnt) { - /* - * As soon as we turn this on, noone can find us in vn_get - * until we turn off VINACT or VRECLM - */ - vp->v_flag |= VINACT; VN_UNLOCK(vp, 0); /* @@ -330,10 +276,7 @@ vn_rele( VOP_INACTIVE(vp, NULL, cache); VN_LOCK(vp); - if (vp->v_flag & VWAIT) - sv_broadcast(vptosync(vp)); - - vp->v_flag &= ~(VINACT|VWAIT|VRECLM|VMODIFIED); + vp->v_flag &= ~VMODIFIED; } VN_UNLOCK(vp, 0); diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h index bc9ed722ba1..4a74569a569 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.h +++ b/fs/xfs/linux-2.6/xfs_vnode.h @@ -135,9 +135,6 @@ typedef enum { /* * Vnode flags. */ -#define VINACT 0x1 /* vnode is being inactivated */ -#define VRECLM 0x2 /* vnode is being reclaimed */ -#define VWAIT 0x4 /* waiting for VINACT/VRECLM to end */ #define VMODIFIED 0x8 /* XFS inode state possibly differs */ /* to the Linux inode state. */ @@ -489,7 +486,6 @@ typedef struct vattr { (VN_ISREG(vp) && ((mode) & (VSGID|(VEXEC>>3))) == VSGID) extern void vn_init(void); -extern int vn_wait(struct vnode *); extern vnode_t *vn_initialize(struct inode *); /* diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index d3da00045f2..fa796910f3a 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -505,7 +505,6 @@ xfs_iget( vnode_t *vp = NULL; int error; -retry: XFS_STATS_INC(xs_ig_attempts); if ((inode = iget_locked(XFS_MTOVFS(mp)->vfs_super, ino))) { @@ -526,16 +525,6 @@ inode_allocate: iput(inode); } } else { - /* These are true if the inode is in inactive or - * reclaim. The linux inode is about to go away, - * wait for that path to finish, and try again. - */ - if (vp->v_flag & (VINACT | VRECLM)) { - vn_wait(vp); - iput(inode); - goto retry; - } - if (is_bad_inode(inode)) { iput(inode); return EIO; -- cgit v1.2.3-70-g09d2 From 51c91ed52b8a9a30fcb2a465b40c20a1f11735ba Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 2 Sep 2005 16:58:38 +1000 Subject: [XFS] add infrastructure for waiting on I/O completion at inode reclaim time SGI-PV: 934766 SGI-Modid: xfs-linux:xfs-kern:196854a Signed-off-by: Christoph Hellwig Signed-off-by: Nathan Scott --- fs/xfs/linux-2.6/xfs_aops.c | 11 ++--------- fs/xfs/linux-2.6/xfs_vnode.c | 28 +++++++++++++++++++++----- fs/xfs/linux-2.6/xfs_vnode.h | 4 ++++ fs/xfs/xfs_vnodeops.c | 47 +++----------------------------------------- 4 files changed, 32 insertions(+), 58 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index bd9aba1f235..b55cb7f02e8 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -139,7 +139,7 @@ linvfs_unwritten_convert( XFS_BUF_SET_FSPRIVATE(bp, NULL); XFS_BUF_CLR_IODONE_FUNC(bp); XFS_BUF_UNDATAIO(bp); - iput(LINVFS_GET_IP(vp)); + vn_iowake(vp); pagebuf_iodone(bp, 0, 0); } @@ -448,14 +448,7 @@ xfs_map_unwritten( if (!pb) return -EAGAIN; - /* Take a reference to the inode to prevent it from - * being reclaimed while we have outstanding unwritten - * extent IO on it. - */ - if ((igrab(inode)) != inode) { - pagebuf_free(pb); - return -EAGAIN; - } + atomic_inc(&LINVFS_GET_VP(inode)->v_iocount); /* Set the count to 1 initially, this will stop an I/O * completion callout which happens before we have started diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c index 654da98de2a..46afc86a286 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.c +++ b/fs/xfs/linux-2.6/xfs_vnode.c @@ -42,17 +42,33 @@ DEFINE_SPINLOCK(vnumber_lock); */ #define NVSYNC 37 #define vptosync(v) (&vsync[((unsigned long)v) % NVSYNC]) -sv_t vsync[NVSYNC]; +STATIC wait_queue_head_t vsync[NVSYNC]; void vn_init(void) { - register sv_t *svp; - register int i; + int i; - for (svp = vsync, i = 0; i < NVSYNC; i++, svp++) - init_sv(svp, SV_DEFAULT, "vsy", i); + for (i = 0; i < NVSYNC; i++) + init_waitqueue_head(&vsync[i]); +} + +void +vn_iowait( + struct vnode *vp) +{ + wait_queue_head_t *wq = vptosync(vp); + + wait_event(*wq, (atomic_read(&vp->v_iocount) == 0)); +} + +void +vn_iowake( + struct vnode *vp) +{ + if (atomic_dec_and_test(&vp->v_iocount)) + wake_up(vptosync(vp)); } /* @@ -111,6 +127,8 @@ vn_initialize( /* Initialize the first behavior and the behavior chain head. */ vn_bhv_head_init(VN_BHV_HEAD(vp), "vnode"); + atomic_set(&vp->v_iocount, 0); + #ifdef XFS_VNODE_TRACE vp->v_trace = ktrace_alloc(VNODE_TRACE_SIZE, KM_SLEEP); #endif /* XFS_VNODE_TRACE */ diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h index 4a74569a569..9977afa3890 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.h +++ b/fs/xfs/linux-2.6/xfs_vnode.h @@ -80,6 +80,7 @@ typedef struct vnode { vnumber_t v_number; /* in-core vnode number */ vn_bhv_head_t v_bh; /* behavior head */ spinlock_t v_lock; /* VN_LOCK/VN_UNLOCK */ + atomic_t v_iocount; /* outstanding I/O count */ #ifdef XFS_VNODE_TRACE struct ktrace *v_trace; /* trace header structure */ #endif @@ -506,6 +507,9 @@ extern int vn_revalidate(struct vnode *); extern void vn_revalidate_core(struct vnode *, vattr_t *); extern void vn_remove(struct vnode *); +extern void vn_iowait(struct vnode *vp); +extern void vn_iowake(struct vnode *vp); + static inline int vn_count(struct vnode *vp) { return atomic_read(&LINVFS_GET_IP(vp)->i_count); diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index c4aa24ff85a..58bfe629b93 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -3846,51 +3846,10 @@ xfs_reclaim( return 0; } - if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) { - if (ip->i_d.di_size > 0) { - /* - * Flush and invalidate any data left around that is - * a part of this file. - * - * Get the inode's i/o lock so that buffers are pushed - * out while holding the proper lock. We can't hold - * the inode lock here since flushing out buffers may - * cause us to try to get the lock in xfs_strategy(). - * - * We don't have to call remapf() here, because there - * cannot be any mapped file references to this vnode - * since it is being reclaimed. - */ - xfs_ilock(ip, XFS_IOLOCK_EXCL); - - /* - * If we hit an IO error, we need to make sure that the - * buffer and page caches of file data for - * the file are tossed away. We don't want to use - * VOP_FLUSHINVAL_PAGES here because we don't want dirty - * pages to stay attached to the vnode, but be - * marked P_BAD. pdflush/vnode_pagebad - * hates that. - */ - if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { - VOP_FLUSHINVAL_PAGES(vp, 0, -1, FI_NONE); - } else { - VOP_TOSS_PAGES(vp, 0, -1, FI_NONE); - } + vn_iowait(vp); - ASSERT(VN_CACHED(vp) == 0); - ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || - ip->i_delayed_blks == 0); - xfs_iunlock(ip, XFS_IOLOCK_EXCL); - } else if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { - /* - * di_size field may not be quite accurate if we're - * shutting down. - */ - VOP_TOSS_PAGES(vp, 0, -1, FI_NONE); - ASSERT(VN_CACHED(vp) == 0); - } - } + ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0); + ASSERT(VN_CACHED(vp) == 0); /* If we have nothing to flush with this inode then complete the * teardown now, otherwise break the link between the xfs inode -- cgit v1.2.3-70-g09d2 From 0829c3602f4df95898752c402ea90b92a3e33154 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 2 Sep 2005 16:58:49 +1000 Subject: [XFS] Add infrastructure for tracking I/O completions SGI-PV: 934766 SGI-Modid: xfs-linux:xfs-kern:196856a Signed-off-by: Christoph Hellwig Signed-off-by: Nathan Scott --- fs/xfs/linux-2.6/xfs_aops.c | 156 +++++++++++++++++++++++++------------------ fs/xfs/linux-2.6/xfs_buf.c | 2 +- fs/xfs/linux-2.6/xfs_linux.h | 1 + fs/xfs/linux-2.6/xfs_super.c | 58 +++++++++++----- 4 files changed, 132 insertions(+), 85 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index b55cb7f02e8..ed98c7ac7cf 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -104,22 +104,24 @@ xfs_page_trace( #define xfs_page_trace(tag, inode, page, mask) #endif -void -linvfs_unwritten_done( - struct buffer_head *bh, - int uptodate) +/* + * Schedule IO completion handling on a xfsdatad if this was + * the final hold on this ioend. + */ +STATIC void +xfs_finish_ioend( + xfs_ioend_t *ioend) { - xfs_buf_t *pb = (xfs_buf_t *)bh->b_private; + if (atomic_dec_and_test(&ioend->io_remaining)) + queue_work(xfsdatad_workqueue, &ioend->io_work); +} - ASSERT(buffer_unwritten(bh)); - bh->b_end_io = NULL; - clear_buffer_unwritten(bh); - if (!uptodate) - pagebuf_ioerror(pb, EIO); - if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) { - pagebuf_iodone(pb, 1, 1); - } - end_buffer_async_write(bh, uptodate); +STATIC void +xfs_destroy_ioend( + xfs_ioend_t *ioend) +{ + vn_iowake(ioend->io_vnode); + mempool_free(ioend, xfs_ioend_pool); } /* @@ -127,20 +129,66 @@ linvfs_unwritten_done( * to written extents (buffered IO). */ STATIC void -linvfs_unwritten_convert( - xfs_buf_t *bp) +xfs_end_bio_unwritten( + void *data) { - vnode_t *vp = XFS_BUF_FSPRIVATE(bp, vnode_t *); - int error; + xfs_ioend_t *ioend = data; + vnode_t *vp = ioend->io_vnode; + xfs_off_t offset = ioend->io_offset; + size_t size = ioend->io_size; + int error; + + if (ioend->io_uptodate) + VOP_BMAP(vp, offset, size, BMAPI_UNWRITTEN, NULL, NULL, error); + xfs_destroy_ioend(ioend); +} + +/* + * Allocate and initialise an IO completion structure. + * We need to track unwritten extent write completion here initially. + * We'll need to extend this for updating the ondisk inode size later + * (vs. incore size). + */ +STATIC xfs_ioend_t * +xfs_alloc_ioend( + struct inode *inode) +{ + xfs_ioend_t *ioend; - BUG_ON(atomic_read(&bp->pb_hold) < 1); - VOP_BMAP(vp, XFS_BUF_OFFSET(bp), XFS_BUF_SIZE(bp), - BMAPI_UNWRITTEN, NULL, NULL, error); - XFS_BUF_SET_FSPRIVATE(bp, NULL); - XFS_BUF_CLR_IODONE_FUNC(bp); - XFS_BUF_UNDATAIO(bp); - vn_iowake(vp); - pagebuf_iodone(bp, 0, 0); + ioend = mempool_alloc(xfs_ioend_pool, GFP_NOFS); + + /* + * Set the count to 1 initially, which will prevent an I/O + * completion callback from happening before we have started + * all the I/O from calling the completion routine too early. + */ + atomic_set(&ioend->io_remaining, 1); + ioend->io_uptodate = 1; /* cleared if any I/O fails */ + ioend->io_vnode = LINVFS_GET_VP(inode); + atomic_inc(&ioend->io_vnode->v_iocount); + ioend->io_offset = 0; + ioend->io_size = 0; + + INIT_WORK(&ioend->io_work, xfs_end_bio_unwritten, ioend); + + return ioend; +} + +void +linvfs_unwritten_done( + struct buffer_head *bh, + int uptodate) +{ + xfs_ioend_t *ioend = bh->b_private; + + ASSERT(buffer_unwritten(bh)); + bh->b_end_io = NULL; + clear_buffer_unwritten(bh); + if (!uptodate) + ioend->io_uptodate = 0; + + xfs_finish_ioend(ioend); + end_buffer_async_write(bh, uptodate); } /* @@ -255,7 +303,7 @@ xfs_probe_unwritten_page( struct address_space *mapping, pgoff_t index, xfs_iomap_t *iomapp, - xfs_buf_t *pb, + xfs_ioend_t *ioend, unsigned long max_offset, unsigned long *fsbs, unsigned int bbits) @@ -283,7 +331,7 @@ xfs_probe_unwritten_page( break; xfs_map_at_offset(page, bh, p_offset, bbits, iomapp); set_buffer_unwritten_io(bh); - bh->b_private = pb; + bh->b_private = ioend; p_offset += bh->b_size; (*fsbs)++; } while ((bh = bh->b_this_page) != head); @@ -434,27 +482,15 @@ xfs_map_unwritten( { struct buffer_head *bh = curr; xfs_iomap_t *tmp; - xfs_buf_t *pb; - loff_t offset, size; + xfs_ioend_t *ioend; + loff_t offset; unsigned long nblocks = 0; offset = start_page->index; offset <<= PAGE_CACHE_SHIFT; offset += p_offset; - /* get an "empty" pagebuf to manage IO completion - * Proper values will be set before returning */ - pb = pagebuf_lookup(iomapp->iomap_target, 0, 0, 0); - if (!pb) - return -EAGAIN; - - atomic_inc(&LINVFS_GET_VP(inode)->v_iocount); - - /* Set the count to 1 initially, this will stop an I/O - * completion callout which happens before we have started - * all the I/O from calling pagebuf_iodone too early. - */ - atomic_set(&pb->pb_io_remaining, 1); + ioend = xfs_alloc_ioend(inode); /* First map forwards in the page consecutive buffers * covering this unwritten extent @@ -467,12 +503,12 @@ xfs_map_unwritten( break; xfs_map_at_offset(start_page, bh, p_offset, block_bits, iomapp); set_buffer_unwritten_io(bh); - bh->b_private = pb; + bh->b_private = ioend; p_offset += bh->b_size; nblocks++; } while ((bh = bh->b_this_page) != head); - atomic_add(nblocks, &pb->pb_io_remaining); + atomic_add(nblocks, &ioend->io_remaining); /* If we reached the end of the page, map forwards in any * following pages which are also covered by this extent. @@ -489,13 +525,13 @@ xfs_map_unwritten( tloff = min(tlast, tloff); for (tindex = start_page->index + 1; tindex < tloff; tindex++) { page = xfs_probe_unwritten_page(mapping, - tindex, iomapp, pb, + tindex, iomapp, ioend, PAGE_CACHE_SIZE, &bs, bbits); if (!page) break; nblocks += bs; - atomic_add(bs, &pb->pb_io_remaining); - xfs_convert_page(inode, page, iomapp, wbc, pb, + atomic_add(bs, &ioend->io_remaining); + xfs_convert_page(inode, page, iomapp, wbc, ioend, startio, all_bh); /* stop if converting the next page might add * enough blocks that the corresponding byte @@ -507,12 +543,12 @@ xfs_map_unwritten( if (tindex == tlast && (pg_offset = (i_size_read(inode) & (PAGE_CACHE_SIZE - 1)))) { page = xfs_probe_unwritten_page(mapping, - tindex, iomapp, pb, + tindex, iomapp, ioend, pg_offset, &bs, bbits); if (page) { nblocks += bs; - atomic_add(bs, &pb->pb_io_remaining); - xfs_convert_page(inode, page, iomapp, wbc, pb, + atomic_add(bs, &ioend->io_remaining); + xfs_convert_page(inode, page, iomapp, wbc, ioend, startio, all_bh); if (nblocks >= ((ULONG_MAX - PAGE_SIZE) >> block_bits)) goto enough; @@ -521,21 +557,9 @@ xfs_map_unwritten( } enough: - size = nblocks; /* NB: using 64bit number here */ - size <<= block_bits; /* convert fsb's to byte range */ - - XFS_BUF_DATAIO(pb); - XFS_BUF_ASYNC(pb); - XFS_BUF_SET_SIZE(pb, size); - XFS_BUF_SET_COUNT(pb, size); - XFS_BUF_SET_OFFSET(pb, offset); - XFS_BUF_SET_FSPRIVATE(pb, LINVFS_GET_VP(inode)); - XFS_BUF_SET_IODONE_FUNC(pb, linvfs_unwritten_convert); - - if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) { - pagebuf_iodone(pb, 1, 1); - } - + ioend->io_size = (xfs_off_t)nblocks << block_bits; + ioend->io_offset = offset; + xfs_finish_ioend(ioend); return 0; } diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 58286b1d733..fba40cbdbcf 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -67,7 +67,7 @@ STATIC int xfsbufd_wakeup(int, unsigned int); STATIC void pagebuf_delwri_queue(xfs_buf_t *, int); STATIC struct workqueue_struct *xfslogd_workqueue; -STATIC struct workqueue_struct *xfsdatad_workqueue; +struct workqueue_struct *xfsdatad_workqueue; /* * Pagebuf debugging diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h index 42dc5e4662e..1c63fd3118d 100644 --- a/fs/xfs/linux-2.6/xfs_linux.h +++ b/fs/xfs/linux-2.6/xfs_linux.h @@ -104,6 +104,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index d2c8a11e22b..1a0bcbbc0a8 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -70,11 +70,14 @@ #include #include #include +#include #include STATIC struct quotactl_ops linvfs_qops; STATIC struct super_operations linvfs_sops; -STATIC kmem_zone_t *linvfs_inode_zone; +STATIC kmem_zone_t *xfs_vnode_zone; +STATIC kmem_zone_t *xfs_ioend_zone; +mempool_t *xfs_ioend_pool; STATIC struct xfs_mount_args * xfs_args_allocate( @@ -281,8 +284,7 @@ linvfs_alloc_inode( { vnode_t *vp; - vp = (vnode_t *)kmem_cache_alloc(linvfs_inode_zone, - kmem_flags_convert(KM_SLEEP)); + vp = kmem_cache_alloc(xfs_vnode_zone, kmem_flags_convert(KM_SLEEP)); if (!vp) return NULL; return LINVFS_GET_IP(vp); @@ -292,11 +294,11 @@ STATIC void linvfs_destroy_inode( struct inode *inode) { - kmem_cache_free(linvfs_inode_zone, LINVFS_GET_VP(inode)); + kmem_zone_free(xfs_vnode_zone, LINVFS_GET_VP(inode)); } STATIC void -init_once( +linvfs_inode_init_once( void *data, kmem_cache_t *cachep, unsigned long flags) @@ -309,21 +311,41 @@ init_once( } STATIC int -init_inodecache( void ) +linvfs_init_zones(void) { - linvfs_inode_zone = kmem_cache_create("linvfs_icache", + xfs_vnode_zone = kmem_cache_create("xfs_vnode", sizeof(vnode_t), 0, SLAB_RECLAIM_ACCOUNT, - init_once, NULL); - if (linvfs_inode_zone == NULL) - return -ENOMEM; + linvfs_inode_init_once, NULL); + if (!xfs_vnode_zone) + goto out; + + xfs_ioend_zone = kmem_zone_init(sizeof(xfs_ioend_t), "xfs_ioend"); + if (!xfs_ioend_zone) + goto out_destroy_vnode_zone; + + xfs_ioend_pool = mempool_create(4 * MAX_BUF_PER_PAGE, + mempool_alloc_slab, mempool_free_slab, + xfs_ioend_zone); + if (!xfs_ioend_pool) + goto out_free_ioend_zone; + return 0; + + + out_free_ioend_zone: + kmem_zone_destroy(xfs_ioend_zone); + out_destroy_vnode_zone: + kmem_zone_destroy(xfs_vnode_zone); + out: + return -ENOMEM; } STATIC void -destroy_inodecache( void ) +linvfs_destroy_zones(void) { - if (kmem_cache_destroy(linvfs_inode_zone)) - printk(KERN_WARNING "%s: cache still in use!\n", __FUNCTION__); + mempool_destroy(xfs_ioend_pool); + kmem_zone_destroy(xfs_vnode_zone); + kmem_zone_destroy(xfs_ioend_zone); } /* @@ -873,9 +895,9 @@ init_xfs_fs( void ) ktrace_init(64); - error = init_inodecache(); + error = linvfs_init_zones(); if (error < 0) - goto undo_inodecache; + goto undo_zones; error = pagebuf_init(); if (error < 0) @@ -896,9 +918,9 @@ undo_register: pagebuf_terminate(); undo_pagebuf: - destroy_inodecache(); + linvfs_destroy_zones(); -undo_inodecache: +undo_zones: return error; } @@ -910,7 +932,7 @@ exit_xfs_fs( void ) unregister_filesystem(&xfs_fs_type); xfs_cleanup(); pagebuf_terminate(); - destroy_inodecache(); + linvfs_destroy_zones(); ktrace_uninit(); } -- cgit v1.2.3-70-g09d2 From 65b3da3705ff873d8704074a75ac983495863380 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 5 Sep 2005 08:18:12 +1000 Subject: [XFS] Add in the new xfs_aops.h header file for I/O completion struct. SGI-PV: 934766 SGI-Modid: xfs-linux:xfs-kern:196857a Signed-off-by: Christoph Hellwig Signed-off-by: Nathan Scott --- fs/xfs/linux-2.6/xfs_aops.h | 49 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 fs/xfs/linux-2.6/xfs_aops.h (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h new file mode 100644 index 00000000000..ee46307a732 --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_aops.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2005 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_AOPS_H__ +#define __XFS_AOPS_H__ + +extern struct workqueue_struct *xfsdatad_workqueue; +extern mempool_t *xfs_ioend_pool; + +typedef void (*xfs_ioend_func_t)(void *); + +typedef struct xfs_ioend { + unsigned int io_uptodate; /* I/O status register */ + atomic_t io_remaining; /* hold count */ + struct vnode *io_vnode; /* file being written to */ + size_t io_size; /* size of the extent */ + xfs_off_t io_offset; /* offset in the file */ + struct work_struct io_work; /* xfsdatad work queue */ +} xfs_ioend_t; + +#endif /* __XFS_IOPS_H__ */ -- cgit v1.2.3-70-g09d2 From f09738638d3bae6501e8e160c66233832d8c280f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 5 Sep 2005 08:22:52 +1000 Subject: [XFS] Delay direct I/O completion to a workqueue This is nessecary because aio+dio completions may happen from irq context but we need process context for converting unwritten extents. We also queue regular direct I/O completions to workqueue for regularity, there's only one queue_work call per syscall. SGI-PV: 934766 SGI-Modid: xfs-linux:xfs-kern:196857a Signed-off-by: Christoph Hellwig Signed-off-by: Nathan Scott --- fs/xfs/linux-2.6/xfs_aops.c | 74 +++++++++++++++++++++++++++++---------------- fs/xfs/linux-2.6/xfs_lrw.c | 3 -- 2 files changed, 48 insertions(+), 29 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index ed98c7ac7cf..2add9a8a8df 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -126,7 +126,7 @@ xfs_destroy_ioend( /* * Issue transactions to convert a buffer range from unwritten - * to written extents (buffered IO). + * to written extents. */ STATIC void xfs_end_bio_unwritten( @@ -191,29 +191,6 @@ linvfs_unwritten_done( end_buffer_async_write(bh, uptodate); } -/* - * Issue transactions to convert a buffer range from unwritten - * to written extents (direct IO). - */ -STATIC void -linvfs_unwritten_convert_direct( - struct kiocb *iocb, - loff_t offset, - ssize_t size, - void *private) -{ - struct inode *inode = iocb->ki_filp->f_dentry->d_inode; - ASSERT(!private || inode == (struct inode *)private); - - /* private indicates an unwritten extent lay beneath this IO */ - if (private && size > 0) { - vnode_t *vp = LINVFS_GET_VP(inode); - int error; - - VOP_BMAP(vp, offset, size, BMAPI_UNWRITTEN, NULL, NULL, error); - } -} - STATIC int xfs_map_blocks( struct inode *inode, @@ -1045,6 +1022,44 @@ linvfs_get_blocks_direct( create, 1, BMAPI_WRITE|BMAPI_DIRECT); } +STATIC void +linvfs_end_io_direct( + struct kiocb *iocb, + loff_t offset, + ssize_t size, + void *private) +{ + xfs_ioend_t *ioend = iocb->private; + + /* + * Non-NULL private data means we need to issue a transaction to + * convert a range from unwritten to written extents. This needs + * to happen from process contect but aio+dio I/O completion + * happens from irq context so we need to defer it to a workqueue. + * This is not nessecary for synchronous direct I/O, but we do + * it anyway to keep the code uniform and simpler. + * + * The core direct I/O code might be changed to always call the + * completion handler in the future, in which case all this can + * go away. + */ + if (private && size > 0) { + ioend->io_offset = offset; + ioend->io_size = size; + xfs_finish_ioend(ioend); + } else { + ASSERT(size >= 0); + xfs_destroy_ioend(ioend); + } + + /* + * blockdev_direct_IO can return an error even afer the I/O + * completion handler was called. Thus we need to protect + * against double-freeing. + */ + iocb->private = NULL; +} + STATIC ssize_t linvfs_direct_IO( int rw, @@ -1059,16 +1074,23 @@ linvfs_direct_IO( xfs_iomap_t iomap; int maps = 1; int error; + ssize_t ret; VOP_BMAP(vp, offset, 0, BMAPI_DEVICE, &iomap, &maps, error); if (error) return -error; - return blockdev_direct_IO_own_locking(rw, iocb, inode, + iocb->private = xfs_alloc_ioend(inode); + + ret = blockdev_direct_IO_own_locking(rw, iocb, inode, iomap.iomap_target->pbr_bdev, iov, offset, nr_segs, linvfs_get_blocks_direct, - linvfs_unwritten_convert_direct); + linvfs_end_io_direct); + + if (unlikely(ret <= 0 && iocb->private)) + xfs_destroy_ioend(iocb->private); + return ret; } diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c index acab58c4804..3b5fabe8dae 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.c +++ b/fs/xfs/linux-2.6/xfs_lrw.c @@ -660,9 +660,6 @@ xfs_write( (xip->i_d.di_flags & XFS_DIFLAG_REALTIME) ? mp->m_rtdev_targp : mp->m_ddev_targp; - if (ioflags & IO_ISAIO) - return XFS_ERROR(-ENOSYS); - if ((pos & target->pbr_smask) || (count & target->pbr_smask)) return XFS_ERROR(-EINVAL); -- cgit v1.2.3-70-g09d2 From c1a073bdff997216eac25254a2716faf640e4e8d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 5 Sep 2005 08:23:35 +1000 Subject: [XFS] Delay I/O completion for unwritten extents after conversion SGI-PV: 936584 SGI-Modid: xfs-linux:xfs-kern:196886a Signed-off-by: Christoph Hellwig Signed-off-by: Nathan Scott --- fs/xfs/linux-2.6/xfs_aops.c | 27 +++++++++++++++++++++++++-- fs/xfs/linux-2.6/xfs_aops.h | 1 + 2 files changed, 26 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index 2add9a8a8df..ea615e2f476 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -136,10 +136,21 @@ xfs_end_bio_unwritten( vnode_t *vp = ioend->io_vnode; xfs_off_t offset = ioend->io_offset; size_t size = ioend->io_size; + struct buffer_head *bh, *next; int error; if (ioend->io_uptodate) VOP_BMAP(vp, offset, size, BMAPI_UNWRITTEN, NULL, NULL, error); + + /* ioend->io_buffer_head is only non-NULL for buffered I/O */ + for (bh = ioend->io_buffer_head; bh; bh = next) { + next = bh->b_private; + + bh->b_end_io = NULL; + clear_buffer_unwritten(bh); + end_buffer_async_write(bh, ioend->io_uptodate); + } + xfs_destroy_ioend(ioend); } @@ -165,6 +176,7 @@ xfs_alloc_ioend( atomic_set(&ioend->io_remaining, 1); ioend->io_uptodate = 1; /* cleared if any I/O fails */ ioend->io_vnode = LINVFS_GET_VP(inode); + ioend->io_buffer_head = NULL; atomic_inc(&ioend->io_vnode->v_iocount); ioend->io_offset = 0; ioend->io_size = 0; @@ -180,15 +192,26 @@ linvfs_unwritten_done( int uptodate) { xfs_ioend_t *ioend = bh->b_private; + static spinlock_t unwritten_done_lock = SPIN_LOCK_UNLOCKED; + unsigned long flags; ASSERT(buffer_unwritten(bh)); bh->b_end_io = NULL; - clear_buffer_unwritten(bh); + if (!uptodate) ioend->io_uptodate = 0; + /* + * Deep magic here. We reuse b_private in the buffer_heads to build + * a chain for completing the I/O from user context after we've issued + * a transaction to convert the unwritten extent. + */ + spin_lock_irqsave(&unwritten_done_lock, flags); + bh->b_private = ioend->io_buffer_head; + ioend->io_buffer_head = bh; + spin_unlock_irqrestore(&unwritten_done_lock, flags); + xfs_finish_ioend(ioend); - end_buffer_async_write(bh, uptodate); } STATIC int diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h index ee46307a732..2fa62974a04 100644 --- a/fs/xfs/linux-2.6/xfs_aops.h +++ b/fs/xfs/linux-2.6/xfs_aops.h @@ -41,6 +41,7 @@ typedef struct xfs_ioend { unsigned int io_uptodate; /* I/O status register */ atomic_t io_remaining; /* hold count */ struct vnode *io_vnode; /* file being written to */ + struct buffer_head *io_buffer_head;/* buffer linked list head */ size_t io_size; /* size of the extent */ xfs_off_t io_offset; /* offset in the file */ struct work_struct io_work; /* xfsdatad work queue */ -- cgit v1.2.3-70-g09d2 From 56d433e430eb399a4b6d0e73d28af6e1d4713547 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 5 Sep 2005 08:23:54 +1000 Subject: [XFS] streamline the clear_inode path SGI-PV: 940531 SGI-Modid: xfs-linux:xfs-kern:196888a Signed-off-by: Christoph Hellwig Signed-off-by: Nathan Scott --- fs/xfs/linux-2.6/xfs_super.c | 34 +++++++--- fs/xfs/linux-2.6/xfs_vnode.c | 144 ------------------------------------------- fs/xfs/linux-2.6/xfs_vnode.h | 3 - 3 files changed, 25 insertions(+), 156 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 1a0bcbbc0a8..9b40a2799f7 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -383,17 +383,33 @@ linvfs_clear_inode( struct inode *inode) { vnode_t *vp = LINVFS_GET_VP(inode); + int error, cache; - if (vp) { - vn_rele(vp); - vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); - /* - * Do all our cleanup, and remove this vnode. - */ - vn_remove(vp); - } -} + vn_trace_entry(vp, "clear_inode", (inst_t *)__return_address); + + ASSERT(vp->v_fbhv != NULL); + + XFS_STATS_INC(vn_rele); + XFS_STATS_INC(vn_remove); + XFS_STATS_INC(vn_reclaim); + XFS_STATS_DEC(vn_active); + VOP_INACTIVE(vp, NULL, cache); + + VN_LOCK(vp); + vp->v_flag &= ~VMODIFIED; + VN_UNLOCK(vp, 0); + + VOP_RECLAIM(vp, error); + if (error) + panic("vn_purge: cannot reclaim"); + + ASSERT(vp->v_fbhv == NULL); + +#ifdef XFS_VNODE_TRACE + ktrace_free(vp->v_trace); +#endif +} /* * Enqueue a work item to be picked up by the vfs xfssyncd thread. diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c index 46afc86a286..268f45bf6a9 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.c +++ b/fs/xfs/linux-2.6/xfs_vnode.c @@ -71,39 +71,6 @@ vn_iowake( wake_up(vptosync(vp)); } -/* - * Clean a vnode of filesystem-specific data and prepare it for reuse. - */ -STATIC int -vn_reclaim( - struct vnode *vp) -{ - int error; - - XFS_STATS_INC(vn_reclaim); - vn_trace_entry(vp, "vn_reclaim", (inst_t *)__return_address); - - /* - * Only make the VOP_RECLAIM call if there are behaviors - * to call. - */ - if (vp->v_fbhv) { - VOP_RECLAIM(vp, error); - if (error) - return -error; - } - ASSERT(vp->v_fbhv == NULL); - - vp->v_fbhv = NULL; - -#ifdef XFS_VNODE_TRACE - ktrace_free(vp->v_trace); - vp->v_trace = NULL; -#endif - - return 0; -} - struct vnode * vn_initialize( struct inode *inode) @@ -197,51 +164,6 @@ vn_revalidate( return -error; } -/* - * purge a vnode from the cache - * At this point the vnode is guaranteed to have no references (vn_count == 0) - * The caller has to make sure that there are no ways someone could - * get a handle (via vn_get) on the vnode (usually done via a mount/vfs lock). - */ -void -vn_purge( - struct vnode *vp, - vmap_t *vmap) -{ - vn_trace_entry(vp, "vn_purge", (inst_t *)__return_address); - - /* - * Check whether vp has already been reclaimed since our caller - * sampled its version while holding a filesystem cache lock that - * its VOP_RECLAIM function acquires. - */ - VN_LOCK(vp); - if (vp->v_number != vmap->v_number) { - VN_UNLOCK(vp, 0); - return; - } - - /* - * Another process could have raced in and gotten this vnode... - */ - if (vn_count(vp) > 0) { - VN_UNLOCK(vp, 0); - return; - } - - XFS_STATS_DEC(vn_active); - VN_UNLOCK(vp, 0); - - /* - * Call VOP_RECLAIM and clean vp. The FSYNC_INVAL flag tells - * vp's filesystem to flush and invalidate all cached resources. - * When vn_reclaim returns, vp should have no private data, - * either in a system cache or attached to v_data. - */ - if (vn_reclaim(vp) != 0) - panic("vn_purge: cannot reclaim"); -} - /* * Add a reference to a referenced vnode. */ @@ -261,72 +183,6 @@ vn_hold( return vp; } -/* - * Call VOP_INACTIVE on last reference. - */ -void -vn_rele( - struct vnode *vp) -{ - int vcnt; - int cache; - - XFS_STATS_INC(vn_rele); - - VN_LOCK(vp); - - vn_trace_entry(vp, "vn_rele", (inst_t *)__return_address); - vcnt = vn_count(vp); - - /* - * Since we always get called from put_inode we know - * that i_count won't be decremented after we - * return. - */ - if (!vcnt) { - VN_UNLOCK(vp, 0); - - /* - * Do not make the VOP_INACTIVE call if there - * are no behaviors attached to the vnode to call. - */ - if (vp->v_fbhv) - VOP_INACTIVE(vp, NULL, cache); - - VN_LOCK(vp); - vp->v_flag &= ~VMODIFIED; - } - - VN_UNLOCK(vp, 0); - - vn_trace_exit(vp, "vn_rele", (inst_t *)__return_address); -} - -/* - * Finish the removal of a vnode. - */ -void -vn_remove( - struct vnode *vp) -{ - vmap_t vmap; - - /* Make sure we don't do this to the same vnode twice */ - if (!(vp->v_fbhv)) - return; - - XFS_STATS_INC(vn_remove); - vn_trace_exit(vp, "vn_remove", (inst_t *)__return_address); - - /* - * After the following purge the vnode - * will no longer exist. - */ - VMAP(vp, vmap); - vn_purge(vp, &vmap); -} - - #ifdef XFS_VNODE_TRACE #define KTRACE_ENTER(vp, vk, s, line, ra) \ diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h index 9977afa3890..35f306cebb8 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.h +++ b/fs/xfs/linux-2.6/xfs_vnode.h @@ -502,10 +502,8 @@ typedef struct vnode_map { (vmap).v_number = (vp)->v_number, \ (vmap).v_ino = (vp)->v_inode.i_ino; } -extern void vn_purge(struct vnode *, vmap_t *); extern int vn_revalidate(struct vnode *); extern void vn_revalidate_core(struct vnode *, vattr_t *); -extern void vn_remove(struct vnode *); extern void vn_iowait(struct vnode *vp); extern void vn_iowake(struct vnode *vp); @@ -519,7 +517,6 @@ static inline int vn_count(struct vnode *vp) * Vnode reference counting functions (and macros for compatibility). */ extern vnode_t *vn_hold(struct vnode *); -extern void vn_rele(struct vnode *); #if defined(XFS_VNODE_TRACE) #define VN_HOLD(vp) \ -- cgit v1.2.3-70-g09d2 From 4cd4a034a3ef020d9de48fe0a3f5f976e5134669 Mon Sep 17 00:00:00 2001 From: Tim Shimmin Date: Mon, 5 Sep 2005 08:24:10 +1000 Subject: [XFS] Need to be able to reset sb_qflags if not mounting with quotas having previously mounted with quotas. SGI-PV: 940491 SGI-Modid: xfs-linux:xfs-kern:23388a Signed-off-by: Tim Shimmin Signed-off-by: Nathan Scott --- fs/xfs/quota/xfs_dquot.h | 16 +--------- fs/xfs/quota/xfs_qm.c | 14 ++------- fs/xfs/quota/xfs_qm.h | 2 -- fs/xfs/quota/xfs_qm_bhv.c | 44 +------------------------- fs/xfs/xfs_qmops.c | 78 +++++++++++++++++++++++++++++++++++++++++++++-- fs/xfs/xfs_quota.h | 17 ++++++++++- 6 files changed, 95 insertions(+), 76 deletions(-) (limited to 'fs') diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h index 39175103c8e..8ebc87176c7 100644 --- a/fs/xfs/quota/xfs_dquot.h +++ b/fs/xfs/quota/xfs_dquot.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License as @@ -113,20 +113,6 @@ typedef struct xfs_dquot { #define XFS_DQHOLD(dqp) ((dqp)->q_nrefs++) -/* - * Quota Accounting/Enforcement flags - */ -#define XFS_ALL_QUOTA_ACCT \ - (XFS_UQUOTA_ACCT | XFS_GQUOTA_ACCT | XFS_PQUOTA_ACCT) -#define XFS_ALL_QUOTA_ENFD (XFS_UQUOTA_ENFD | XFS_OQUOTA_ENFD) -#define XFS_ALL_QUOTA_CHKD (XFS_UQUOTA_CHKD | XFS_OQUOTA_CHKD) - -#define XFS_IS_QUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_ALL_QUOTA_ACCT) -#define XFS_IS_QUOTA_ENFORCED(mp) ((mp)->m_qflags & XFS_ALL_QUOTA_ENFD) -#define XFS_IS_UQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_UQUOTA_ACCT) -#define XFS_IS_PQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_PQUOTA_ACCT) -#define XFS_IS_GQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_GQUOTA_ACCT) - #ifdef DEBUG static inline int XFS_DQ_IS_LOCKED(xfs_dquot_t *dqp) diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index 4badf38df5e..efde16e0a91 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License as @@ -365,16 +365,6 @@ xfs_qm_mount_quotas( int error = 0; uint sbf; - /* - * If a file system had quotas running earlier, but decided to - * mount without -o uquota/pquota/gquota options, revoke the - * quotachecked license, and bail out. - */ - if (! XFS_IS_QUOTA_ON(mp) && - (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT)) { - mp->m_qflags = 0; - goto write_changes; - } /* * If quotas on realtime volumes is not supported, we disable @@ -2002,7 +1992,7 @@ xfs_qm_quotacheck( ASSERT(mp->m_quotainfo != NULL); ASSERT(xfs_Gqm != NULL); xfs_qm_destroy_quotainfo(mp); - xfs_mount_reset_sbqflags(mp); + (void)xfs_mount_reset_sbqflags(mp); } else { cmn_err(CE_NOTE, "XFS quotacheck %s: Done.", mp->m_fsname); } diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h index b03eecf3b6c..0b00b3c6701 100644 --- a/fs/xfs/quota/xfs_qm.h +++ b/fs/xfs/quota/xfs_qm.h @@ -184,8 +184,6 @@ typedef struct xfs_dquot_acct { #define XFS_QM_HOLD(xqm) ((xqm)->qm_nrefs++) #define XFS_QM_RELE(xqm) ((xqm)->qm_nrefs--) -extern void xfs_mount_reset_sbqflags(xfs_mount_t *); - extern void xfs_qm_destroy_quotainfo(xfs_mount_t *); extern int xfs_qm_mount_quotas(xfs_mount_t *, int); extern void xfs_qm_mount_quotainit(xfs_mount_t *, uint); diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c index dc3c37a1e15..8890a18a99d 100644 --- a/fs/xfs/quota/xfs_qm_bhv.c +++ b/fs/xfs/quota/xfs_qm_bhv.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License as @@ -229,48 +229,6 @@ xfs_qm_syncall( return error; } -/* - * Clear the quotaflags in memory and in the superblock. - */ -void -xfs_mount_reset_sbqflags( - xfs_mount_t *mp) -{ - xfs_trans_t *tp; - unsigned long s; - - mp->m_qflags = 0; - /* - * It is OK to look at sb_qflags here in mount path, - * without SB_LOCK. - */ - if (mp->m_sb.sb_qflags == 0) - return; - s = XFS_SB_LOCK(mp); - mp->m_sb.sb_qflags = 0; - XFS_SB_UNLOCK(mp, s); - - /* - * if the fs is readonly, let the incore superblock run - * with quotas off but don't flush the update out to disk - */ - if (XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY) - return; -#ifdef QUOTADEBUG - xfs_fs_cmn_err(CE_NOTE, mp, "Writing superblock quota changes"); -#endif - tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE); - if (xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, - XFS_DEFAULT_LOG_COUNT)) { - xfs_trans_cancel(tp, 0); - xfs_fs_cmn_err(CE_ALERT, mp, - "xfs_mount_reset_sbqflags: Superblock update failed!"); - return; - } - xfs_mod_sb(tp, XFS_SB_QFLAGS); - xfs_trans_commit(tp, 0, NULL); -} - STATIC int xfs_qm_newmount( xfs_mount_t *mp, diff --git a/fs/xfs/xfs_qmops.c b/fs/xfs/xfs_qmops.c index 4f40c92863d..a6cd6324e94 100644 --- a/fs/xfs/xfs_qmops.c +++ b/fs/xfs/xfs_qmops.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License as @@ -42,7 +42,8 @@ #include "xfs_dir2.h" #include "xfs_dmapi.h" #include "xfs_mount.h" - +#include "xfs_quota.h" +#include "xfs_error.h" STATIC struct xfs_dquot * xfs_dqvopchown_default( @@ -54,8 +55,79 @@ xfs_dqvopchown_default( return NULL; } +/* + * Clear the quotaflags in memory and in the superblock. + */ +int +xfs_mount_reset_sbqflags(xfs_mount_t *mp) +{ + int error; + xfs_trans_t *tp; + unsigned long s; + + mp->m_qflags = 0; + /* + * It is OK to look at sb_qflags here in mount path, + * without SB_LOCK. + */ + if (mp->m_sb.sb_qflags == 0) + return 0; + s = XFS_SB_LOCK(mp); + mp->m_sb.sb_qflags = 0; + XFS_SB_UNLOCK(mp, s); + + /* + * if the fs is readonly, let the incore superblock run + * with quotas off but don't flush the update out to disk + */ + if (XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY) + return 0; +#ifdef QUOTADEBUG + xfs_fs_cmn_err(CE_NOTE, mp, "Writing superblock quota changes"); +#endif + tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE); + if ((error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, + XFS_DEFAULT_LOG_COUNT))) { + xfs_trans_cancel(tp, 0); + xfs_fs_cmn_err(CE_ALERT, mp, + "xfs_mount_reset_sbqflags: Superblock update failed!"); + return error; + } + xfs_mod_sb(tp, XFS_SB_QFLAGS); + error = xfs_trans_commit(tp, 0, NULL); + return error; +} + +STATIC int +xfs_noquota_init( + xfs_mount_t *mp, + uint *needquotamount, + uint *quotaflags) +{ + int error = 0; + + *quotaflags = 0; + *needquotamount = B_FALSE; + + ASSERT(!XFS_IS_QUOTA_ON(mp)); + + /* + * If a file system had quotas running earlier, but decided to + * mount without -o uquota/pquota/gquota options, revoke the + * quotachecked license. + */ + if (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT) { + cmn_err(CE_NOTE, + "XFS resetting qflags for filesystem %s", + mp->m_fsname); + + error = xfs_mount_reset_sbqflags(mp); + } + return error; +} + xfs_qmops_t xfs_qmcore_stub = { - .xfs_qminit = (xfs_qminit_t) fs_noerr, + .xfs_qminit = (xfs_qminit_t) xfs_noquota_init, .xfs_qmdone = (xfs_qmdone_t) fs_noerr, .xfs_qmmount = (xfs_qmmount_t) fs_noerr, .xfs_qmunmount = (xfs_qmunmount_t) fs_noerr, diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h index 7134576ae7f..32cb79752d5 100644 --- a/fs/xfs/xfs_quota.h +++ b/fs/xfs/xfs_quota.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License as @@ -159,6 +159,20 @@ typedef struct xfs_qoff_logformat { #define XFS_OQUOTA_CHKD 0x0020 /* quotacheck run on other (grp/prj) quotas */ #define XFS_GQUOTA_ACCT 0x0040 /* group quota accounting ON */ +/* + * Quota Accounting/Enforcement flags + */ +#define XFS_ALL_QUOTA_ACCT \ + (XFS_UQUOTA_ACCT | XFS_GQUOTA_ACCT | XFS_PQUOTA_ACCT) +#define XFS_ALL_QUOTA_ENFD (XFS_UQUOTA_ENFD | XFS_OQUOTA_ENFD) +#define XFS_ALL_QUOTA_CHKD (XFS_UQUOTA_CHKD | XFS_OQUOTA_CHKD) + +#define XFS_IS_QUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_ALL_QUOTA_ACCT) +#define XFS_IS_QUOTA_ENFORCED(mp) ((mp)->m_qflags & XFS_ALL_QUOTA_ENFD) +#define XFS_IS_UQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_UQUOTA_ACCT) +#define XFS_IS_PQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_PQUOTA_ACCT) +#define XFS_IS_GQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_GQUOTA_ACCT) + /* * Incore only flags for quotaoff - these bits get cleared when quota(s) * are in the process of getting turned off. These flags are in m_qflags but @@ -362,6 +376,7 @@ typedef struct xfs_dqtrxops { f | XFS_QMOPT_RES_REGBLKS) extern int xfs_qm_dqcheck(xfs_disk_dquot_t *, xfs_dqid_t, uint, uint, char *); +extern int xfs_mount_reset_sbqflags(struct xfs_mount *); extern struct bhv_vfsops xfs_qmops; -- cgit v1.2.3-70-g09d2 From 0c147f9a864f043e6f93a4bb3519c1166419bd74 Mon Sep 17 00:00:00 2001 From: Felix Blyakher Date: Mon, 5 Sep 2005 08:24:49 +1000 Subject: [XFS] Check if there is first behavior before calling VOP_RECLAIM from linvfs_clear_inode(). The behavior may go away in VOP_INACTIVE. SGI-PV: 941000 SGI-Modid: xfs-linux:xfs-kern:197355a Signed-off-by: Felix Blyakher Signed-off-by: Nathan Scott --- fs/xfs/linux-2.6/xfs_super.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 9b40a2799f7..cd3f8b3270a 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -400,9 +400,11 @@ linvfs_clear_inode( vp->v_flag &= ~VMODIFIED; VN_UNLOCK(vp, 0); - VOP_RECLAIM(vp, error); - if (error) - panic("vn_purge: cannot reclaim"); + if (vp->v_fbhv) { + VOP_RECLAIM(vp, error); + if (error) + panic("vn_purge: cannot reclaim"); + } ASSERT(vp->v_fbhv == NULL); -- cgit v1.2.3-70-g09d2 From 526c420c44b45b11e25a98f37702cc3044ba9bdc Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Mon, 5 Sep 2005 08:25:06 +1000 Subject: [XFS] add handlers to fix xfs_flock_t alignment issues in compat ioctls SGI-PV: 938899 SGI-Modid: xfs-linux:xfs-kern:197403a Signed-off-by: Eric Sandeen Signed-off-by: Nathan Scott --- fs/xfs/linux-2.6/xfs_ioctl32.c | 65 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 62 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c index 0f8f1384eb3..4636b7f86f1 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ b/fs/xfs/linux-2.6/xfs_ioctl32.c @@ -47,8 +47,52 @@ #include "xfs_vnode.h" #include "xfs_dfrag.h" +#define _NATIVE_IOC(cmd, type) \ + _IOC(_IOC_DIR(cmd), _IOC_TYPE(cmd), _IOC_NR(cmd), sizeof(type)) + #if defined(CONFIG_IA64) || defined(CONFIG_X86_64) #define BROKEN_X86_ALIGNMENT +/* on ia32 l_start is on a 32-bit boundary */ +typedef struct xfs_flock64_32 { + __s16 l_type; + __s16 l_whence; + __s64 l_start __attribute__((packed)); + /* len == 0 means until end of file */ + __s64 l_len __attribute__((packed)); + __s32 l_sysid; + __u32 l_pid; + __s32 l_pad[4]; /* reserve area */ +} xfs_flock64_32_t; + +#define XFS_IOC_ALLOCSP_32 _IOW ('X', 10, struct xfs_flock64_32) +#define XFS_IOC_FREESP_32 _IOW ('X', 11, struct xfs_flock64_32) +#define XFS_IOC_ALLOCSP64_32 _IOW ('X', 36, struct xfs_flock64_32) +#define XFS_IOC_FREESP64_32 _IOW ('X', 37, struct xfs_flock64_32) +#define XFS_IOC_RESVSP_32 _IOW ('X', 40, struct xfs_flock64_32) +#define XFS_IOC_UNRESVSP_32 _IOW ('X', 41, struct xfs_flock64_32) +#define XFS_IOC_RESVSP64_32 _IOW ('X', 42, struct xfs_flock64_32) +#define XFS_IOC_UNRESVSP64_32 _IOW ('X', 43, struct xfs_flock64_32) + +/* just account for different alignment */ +STATIC unsigned long +xfs_ioctl32_flock( + unsigned long arg) +{ + xfs_flock64_32_t __user *p32 = (void __user *)arg; + xfs_flock64_t __user *p = compat_alloc_user_space(sizeof(*p)); + + if (copy_in_user(&p->l_type, &p32->l_type, sizeof(s16)) || + copy_in_user(&p->l_whence, &p32->l_whence, sizeof(s16)) || + copy_in_user(&p->l_start, &p32->l_start, sizeof(s64)) || + copy_in_user(&p->l_len, &p32->l_len, sizeof(s64)) || + copy_in_user(&p->l_sysid, &p32->l_sysid, sizeof(s32)) || + copy_in_user(&p->l_pid, &p32->l_pid, sizeof(u32)) || + copy_in_user(&p->l_pad, &p32->l_pad, 4*sizeof(u32))) + return -EFAULT; + + return (unsigned long)p; +} + #else typedef struct xfs_fsop_bulkreq32 { @@ -103,7 +147,6 @@ __linvfs_compat_ioctl(int mode, struct file *f, unsigned cmd, unsigned long arg) /* not handled case XFS_IOC_FD_TO_HANDLE: case XFS_IOC_PATH_TO_HANDLE: - case XFS_IOC_PATH_TO_HANDLE: case XFS_IOC_PATH_TO_FSHANDLE: case XFS_IOC_OPEN_BY_HANDLE: case XFS_IOC_FSSETDM_BY_HANDLE: @@ -124,8 +167,21 @@ __linvfs_compat_ioctl(int mode, struct file *f, unsigned cmd, unsigned long arg) case XFS_IOC_ERROR_CLEARALL: break; -#ifndef BROKEN_X86_ALIGNMENT - /* xfs_flock_t and xfs_bstat_t have wrong u32 vs u64 alignment */ +#ifdef BROKEN_X86_ALIGNMENT + /* xfs_flock_t has wrong u32 vs u64 alignment */ + case XFS_IOC_ALLOCSP_32: + case XFS_IOC_FREESP_32: + case XFS_IOC_ALLOCSP64_32: + case XFS_IOC_FREESP64_32: + case XFS_IOC_RESVSP_32: + case XFS_IOC_UNRESVSP_32: + case XFS_IOC_RESVSP64_32: + case XFS_IOC_UNRESVSP64_32: + arg = xfs_ioctl32_flock(arg); + cmd = _NATIVE_IOC(cmd, struct xfs_flock64); + break; + +#else /* These are handled fine if no alignment issues */ case XFS_IOC_ALLOCSP: case XFS_IOC_FREESP: case XFS_IOC_RESVSP: @@ -134,6 +190,9 @@ __linvfs_compat_ioctl(int mode, struct file *f, unsigned cmd, unsigned long arg) case XFS_IOC_FREESP64: case XFS_IOC_RESVSP64: case XFS_IOC_UNRESVSP64: + break; + + /* xfs_bstat_t still has wrong u32 vs u64 alignment */ case XFS_IOC_SWAPEXT: break; -- cgit v1.2.3-70-g09d2 From 53937c52c3f1dff6100174f50a85c068f16713ae Mon Sep 17 00:00:00 2001 From: Nathan Scott Date: Mon, 5 Sep 2005 08:27:50 +1000 Subject: [XFS] Manage spinlock differences between kernel versions a bit. SGI-PV: 904196 SGI-Modid: xfs-linux:xfs-kern:23563a Signed-off-by: Nathan Scott --- fs/xfs/linux-2.6/spin.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/spin.h b/fs/xfs/linux-2.6/spin.h index bcf60a0b8df..0039504069a 100644 --- a/fs/xfs/linux-2.6/spin.h +++ b/fs/xfs/linux-2.6/spin.h @@ -45,6 +45,9 @@ typedef spinlock_t lock_t; #define SPLDECL(s) unsigned long s +#ifndef DEFINE_SPINLOCK +#define DEFINE_SPINLOCK(s) spinlock_t s = SPIN_LOCK_UNLOCKED +#endif #define spinlock_init(lock, name) spin_lock_init(lock) #define spinlock_destroy(lock) -- cgit v1.2.3-70-g09d2 From 02ba71de98d5eee63e82cc2d88f9ea8430810a9a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 5 Sep 2005 08:28:02 +1000 Subject: [XFS] allow a null behaviour pointer in linvfs_clear_inode SGI-PV: 940531 SGI-Modid: xfs-linux:xfs-kern:197782a Signed-off-by: Christoph Hellwig Signed-off-by: Nathan Scott --- fs/xfs/linux-2.6/xfs_super.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index cd3f8b3270a..910e43bfc95 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -387,14 +387,17 @@ linvfs_clear_inode( vn_trace_entry(vp, "clear_inode", (inst_t *)__return_address); - ASSERT(vp->v_fbhv != NULL); - XFS_STATS_INC(vn_rele); XFS_STATS_INC(vn_remove); XFS_STATS_INC(vn_reclaim); XFS_STATS_DEC(vn_active); - VOP_INACTIVE(vp, NULL, cache); + /* + * This can happen because xfs_iget_core calls xfs_idestroy if we + * find an inode with di_mode == 0 but without IGET_CREATE set. + */ + if (vp->v_fbhv) + VOP_INACTIVE(vp, NULL, cache); VN_LOCK(vp); vp->v_flag &= ~VMODIFIED; -- cgit v1.2.3-70-g09d2 From 0f9fffbcc1817c655d6dd40960ae2e0086b0f64f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 5 Sep 2005 08:28:16 +1000 Subject: [XFS] remove some dead code from pagebuf SGI-PV: 934766 SGI-Modid: xfs-linux:xfs-kern:197783a Signed-off-by: Christoph Hellwig Signed-off-by: Nathan Scott --- fs/xfs/linux-2.6/xfs_buf.c | 30 ------------------------------ fs/xfs/linux-2.6/xfs_buf.h | 7 ------- 2 files changed, 37 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index fba40cbdbcf..f43689754da 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -699,25 +699,6 @@ xfs_buf_read_flags( return NULL; } -/* - * Create a skeletal pagebuf (no pages associated with it). - */ -xfs_buf_t * -pagebuf_lookup( - xfs_buftarg_t *target, - loff_t ioff, - size_t isize, - page_buf_flags_t flags) -{ - xfs_buf_t *pb; - - pb = pagebuf_allocate(flags); - if (pb) { - _pagebuf_initialize(pb, target, ioff, isize, flags); - } - return pb; -} - /* * If we are not low on memory then do the readahead in a deadlock * safe manner. @@ -891,17 +872,6 @@ pagebuf_rele( PB_TRACE(pb, "rele", pb->pb_relse); - /* - * pagebuf_lookup buffers are not hashed, not delayed write, - * and don't have their own release routines. Special case. - */ - if (unlikely(!hash)) { - ASSERT(!pb->pb_relse); - if (atomic_dec_and_test(&pb->pb_hold)) - xfs_buf_free(pb); - return; - } - if (atomic_dec_and_lock(&pb->pb_hold, &hash->bh_lock)) { int do_free = 1; diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index 3f8f69a66ae..c322e9d71f3 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h @@ -206,13 +206,6 @@ extern xfs_buf_t *xfs_buf_read_flags( /* allocate and read a buffer */ #define xfs_buf_read(target, blkno, len, flags) \ xfs_buf_read_flags((target), (blkno), (len), PBF_LOCK | PBF_MAPPED) -extern xfs_buf_t *pagebuf_lookup( - xfs_buftarg_t *, - loff_t, /* starting offset of range */ - size_t, /* length of range */ - page_buf_flags_t); /* PBF_READ, PBF_WRITE, */ - /* PBF_FORCEIO, */ - extern xfs_buf_t *pagebuf_get_empty( /* allocate pagebuf struct with */ /* no memory or disk address */ size_t len, -- cgit v1.2.3-70-g09d2 From efa092f3d4c60be7e81de515db9f06e5f8426afc Mon Sep 17 00:00:00 2001 From: Tim Shimmin Date: Mon, 5 Sep 2005 08:29:01 +1000 Subject: [XFS] Fixes a bug in the quota code when allocating a new dquot record which can cause an extent hole to be filled and a free extent to be processed. In this case, we make a few mistakes: forget to pass back the transaction, forget to put a hold on the buffer and forget to add the buf to the new transaction. SGI-PV: 940366 SGI-Modid: xfs-linux:xfs-kern:23594a Signed-off-by: Tim Shimmin Signed-off-by: Nathan Scott --- fs/xfs/quota/xfs_dquot.c | 43 ++++++++++++++++++++++++++++++++++++------- fs/xfs/xfs_trans.h | 1 + fs/xfs/xfs_trans_buf.c | 23 +++++++++++++++++++++++ 3 files changed, 60 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c index 46ce1e3ce1d..e2e8d35fa4d 100644 --- a/fs/xfs/quota/xfs_dquot.c +++ b/fs/xfs/quota/xfs_dquot.c @@ -421,7 +421,7 @@ xfs_qm_init_dquot_blk( */ STATIC int xfs_qm_dqalloc( - xfs_trans_t *tp, + xfs_trans_t **tpp, xfs_mount_t *mp, xfs_dquot_t *dqp, xfs_inode_t *quotip, @@ -433,6 +433,7 @@ xfs_qm_dqalloc( xfs_bmbt_irec_t map; int nmaps, error, committed; xfs_buf_t *bp; + xfs_trans_t *tp = *tpp; ASSERT(tp != NULL); xfs_dqtrace_entry(dqp, "DQALLOC"); @@ -492,10 +493,32 @@ xfs_qm_dqalloc( xfs_qm_init_dquot_blk(tp, mp, INT_GET(dqp->q_core.d_id, ARCH_CONVERT), dqp->dq_flags & XFS_DQ_ALLTYPES, bp); - if ((error = xfs_bmap_finish(&tp, &flist, firstblock, &committed))) { + /* + * xfs_bmap_finish() may commit the current transaction and + * start a second transaction if the freelist is not empty. + * + * Since we still want to modify this buffer, we need to + * ensure that the buffer is not released on commit of + * the first transaction and ensure the buffer is added to the + * second transaction. + * + * If there is only one transaction then don't stop the buffer + * from being released when it commits later on. + */ + + xfs_trans_bhold(tp, bp); + + if ((error = xfs_bmap_finish(tpp, &flist, firstblock, &committed))) { goto error1; } + if (committed) { + tp = *tpp; + xfs_trans_bjoin(tp, bp); + } else { + xfs_trans_bhold_release(tp, bp); + } + *O_bpp = bp; return 0; @@ -514,7 +537,7 @@ xfs_qm_dqalloc( */ STATIC int xfs_qm_dqtobp( - xfs_trans_t *tp, + xfs_trans_t **tpp, xfs_dquot_t *dqp, xfs_disk_dquot_t **O_ddpp, xfs_buf_t **O_bpp, @@ -528,6 +551,7 @@ xfs_qm_dqtobp( xfs_disk_dquot_t *ddq; xfs_dqid_t id; boolean_t newdquot; + xfs_trans_t *tp = (tpp ? *tpp : NULL); mp = dqp->q_mount; id = INT_GET(dqp->q_core.d_id, ARCH_CONVERT); @@ -579,9 +603,10 @@ xfs_qm_dqtobp( return (ENOENT); ASSERT(tp); - if ((error = xfs_qm_dqalloc(tp, mp, dqp, quotip, + if ((error = xfs_qm_dqalloc(tpp, mp, dqp, quotip, dqp->q_fileoffset, &bp))) return (error); + tp = *tpp; newdquot = B_TRUE; } else { /* @@ -645,7 +670,7 @@ xfs_qm_dqtobp( /* ARGSUSED */ STATIC int xfs_qm_dqread( - xfs_trans_t *tp, + xfs_trans_t **tpp, xfs_dqid_t id, xfs_dquot_t *dqp, /* dquot to get filled in */ uint flags) @@ -653,15 +678,19 @@ xfs_qm_dqread( xfs_disk_dquot_t *ddqp; xfs_buf_t *bp; int error; + xfs_trans_t *tp; + + ASSERT(tpp); /* * get a pointer to the on-disk dquot and the buffer containing it * dqp already knows its own type (GROUP/USER). */ xfs_dqtrace_entry(dqp, "DQREAD"); - if ((error = xfs_qm_dqtobp(tp, dqp, &ddqp, &bp, flags))) { + if ((error = xfs_qm_dqtobp(tpp, dqp, &ddqp, &bp, flags))) { return (error); } + tp = *tpp; /* copy everything from disk dquot to the incore dquot */ memcpy(&dqp->q_core, ddqp, sizeof(xfs_disk_dquot_t)); @@ -740,7 +769,7 @@ xfs_qm_idtodq( * Read it from disk; xfs_dqread() takes care of * all the necessary initialization of dquot's fields (locks, etc) */ - if ((error = xfs_qm_dqread(tp, id, dqp, flags))) { + if ((error = xfs_qm_dqread(&tp, id, dqp, flags))) { /* * This can happen if quotas got turned off (ESRCH), * or if the dquot didn't exist on disk and we ask to diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 9ee5eeee802..a263aec8b3a 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -999,6 +999,7 @@ struct xfs_buf *xfs_trans_getsb(xfs_trans_t *, struct xfs_mount *, int); void xfs_trans_brelse(xfs_trans_t *, struct xfs_buf *); void xfs_trans_bjoin(xfs_trans_t *, struct xfs_buf *); void xfs_trans_bhold(xfs_trans_t *, struct xfs_buf *); +void xfs_trans_bhold_release(xfs_trans_t *, struct xfs_buf *); void xfs_trans_binval(xfs_trans_t *, struct xfs_buf *); void xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *); void xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *); diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 144da7a8546..e733293dd7f 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c @@ -713,6 +713,29 @@ xfs_trans_bhold(xfs_trans_t *tp, xfs_buf_item_trace("BHOLD", bip); } +/* + * Cancel the previous buffer hold request made on this buffer + * for this transaction. + */ +void +xfs_trans_bhold_release(xfs_trans_t *tp, + xfs_buf_t *bp) +{ + xfs_buf_log_item_t *bip; + + ASSERT(XFS_BUF_ISBUSY(bp)); + ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); + ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); + + bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); + ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); + ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL)); + ASSERT(atomic_read(&bip->bli_refcount) > 0); + ASSERT(bip->bli_flags & XFS_BLI_HOLD); + bip->bli_flags &= ~XFS_BLI_HOLD; + xfs_buf_item_trace("BHOLD RELEASE", bip); +} + /* * This is called to mark bytes first through last inclusive of the given * buffer as needing to be logged when the transaction is committed. -- cgit v1.2.3-70-g09d2 From ba403ab43e896c57f32995ccba9a6bd6ec8dd1b9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 5 Sep 2005 08:33:00 +1000 Subject: [XFS] Retry linux inode cacech lookup if we found a stale inode. This fixes crashes under high nfs load SGI-PV: 941429 SGI-Modid: xfs-linux:xfs-kern:197929a Signed-off-by: Christoph Hellwig Signed-off-by: Nathan Scott --- fs/xfs/xfs_iget.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index fa796910f3a..0d9ae8fb413 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -30,6 +30,8 @@ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ */ +#include + #include "xfs.h" #include "xfs_macros.h" @@ -507,14 +509,13 @@ xfs_iget( XFS_STATS_INC(xs_ig_attempts); +retry: if ((inode = iget_locked(XFS_MTOVFS(mp)->vfs_super, ino))) { bhv_desc_t *bdp; xfs_inode_t *ip; - int newnode; vp = LINVFS_GET_VP(inode); if (inode->i_state & I_NEW) { -inode_allocate: vn_initialize(inode); error = xfs_iget_core(vp, mp, tp, ino, flags, lock_flags, ipp, bno); @@ -525,22 +526,25 @@ inode_allocate: iput(inode); } } else { - if (is_bad_inode(inode)) { + /* + * If the inode is not fully constructed due to + * filehandle mistmatches wait for the inode to go + * away and try again. + * + * iget_locked will call __wait_on_freeing_inode + * to wait for the inode to go away. + */ + if (is_bad_inode(inode) || + ((bdp = vn_bhv_lookup(VN_BHV_HEAD(vp), + &xfs_vnodeops)) == NULL)) { iput(inode); - return EIO; + delay(1); + goto retry; } - bdp = vn_bhv_lookup(VN_BHV_HEAD(vp), &xfs_vnodeops); - if (bdp == NULL) { - XFS_STATS_INC(xs_ig_dup); - goto inode_allocate; - } ip = XFS_BHVTOI(bdp); if (lock_flags != 0) xfs_ilock(ip, lock_flags); - newnode = (ip->i_d.di_mode == 0); - if (newnode) - xfs_iocore_inode_reinit(ip); XFS_STATS_INC(xs_ig_found); *ipp = ip; error = 0; -- cgit v1.2.3-70-g09d2 From 2f926587512869ebf6bc820bd5f030e127aae774 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Mon, 5 Sep 2005 08:33:35 +1000 Subject: [XFS] Fix racy access to pb_flags. pagebuf_rele() modified pb_flags after the pagebuf had been unlocked if the buffer was delwri. At high load, this could result in a race when the superblock was being synced that would result the flags being incorrect and the iodone functions being executed incorrectly. This then leads to iclog callback failures or AIL list corruptions resulting in filesystem shutdowns. SGI-PV: 923981 SGI-Modid: xfs-linux:xfs-kern:23616a Signed-off-by: David Chinner Signed-off-by: Nathan Scott --- fs/xfs/linux-2.6/xfs_buf.c | 66 +++++++++++++++++++++++++++++++++++----------- fs/xfs/linux-2.6/xfs_buf.h | 3 +-- 2 files changed, 51 insertions(+), 18 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index f43689754da..e6340906342 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -590,8 +590,10 @@ found: PB_SET_OWNER(pb); } - if (pb->pb_flags & PBF_STALE) + if (pb->pb_flags & PBF_STALE) { + ASSERT((pb->pb_flags & _PBF_DELWRI_Q) == 0); pb->pb_flags &= PBF_MAPPED; + } PB_TRACE(pb, "got_lock", 0); XFS_STATS_INC(pb_get_locked); return (pb); @@ -872,6 +874,17 @@ pagebuf_rele( PB_TRACE(pb, "rele", pb->pb_relse); + /* + * pagebuf_lookup buffers are not hashed, not delayed write, + * and don't have their own release routines. Special case. + */ + if (unlikely(!hash)) { + ASSERT(!pb->pb_relse); + if (atomic_dec_and_test(&pb->pb_hold)) + xfs_buf_free(pb); + return; + } + if (atomic_dec_and_lock(&pb->pb_hold, &hash->bh_lock)) { int do_free = 1; @@ -883,22 +896,23 @@ pagebuf_rele( do_free = 0; } - if (pb->pb_flags & PBF_DELWRI) { - pb->pb_flags |= PBF_ASYNC; - atomic_inc(&pb->pb_hold); - pagebuf_delwri_queue(pb, 0); - do_free = 0; - } else if (pb->pb_flags & PBF_FS_MANAGED) { + if (pb->pb_flags & PBF_FS_MANAGED) { do_free = 0; } if (do_free) { + ASSERT((pb->pb_flags & (PBF_DELWRI|_PBF_DELWRI_Q)) == 0); list_del_init(&pb->pb_hash_list); spin_unlock(&hash->bh_lock); pagebuf_free(pb); } else { spin_unlock(&hash->bh_lock); } + } else { + /* + * Catch reference count leaks + */ + ASSERT(atomic_read(&pb->pb_hold) >= 0); } } @@ -976,13 +990,24 @@ pagebuf_lock( * pagebuf_unlock * * pagebuf_unlock releases the lock on the buffer object created by - * pagebuf_lock or pagebuf_cond_lock (not any - * pinning of underlying pages created by pagebuf_pin). + * pagebuf_lock or pagebuf_cond_lock (not any pinning of underlying pages + * created by pagebuf_pin). + * + * If the buffer is marked delwri but is not queued, do so before we + * unlock the buffer as we need to set flags correctly. We also need to + * take a reference for the delwri queue because the unlocker is going to + * drop their's and they don't know we just queued it. */ void pagebuf_unlock( /* unlock buffer */ xfs_buf_t *pb) /* buffer to unlock */ { + if ((pb->pb_flags & (PBF_DELWRI|_PBF_DELWRI_Q)) == PBF_DELWRI) { + atomic_inc(&pb->pb_hold); + pb->pb_flags |= PBF_ASYNC; + pagebuf_delwri_queue(pb, 0); + } + PB_CLEAR_OWNER(pb); up(&pb->pb_sema); PB_TRACE(pb, "unlock", 0); @@ -1486,6 +1511,11 @@ again: ASSERT(btp == bp->pb_target); if (!(bp->pb_flags & PBF_FS_MANAGED)) { spin_unlock(&hash->bh_lock); + /* + * Catch superblock reference count leaks + * immediately + */ + BUG_ON(bp->pb_bn == 0); delay(100); goto again; } @@ -1661,17 +1691,20 @@ pagebuf_delwri_queue( int unlock) { PB_TRACE(pb, "delwri_q", (long)unlock); - ASSERT(pb->pb_flags & PBF_DELWRI); + ASSERT((pb->pb_flags & (PBF_DELWRI|PBF_ASYNC)) == + (PBF_DELWRI|PBF_ASYNC)); spin_lock(&pbd_delwrite_lock); /* If already in the queue, dequeue and place at tail */ if (!list_empty(&pb->pb_list)) { + ASSERT(pb->pb_flags & _PBF_DELWRI_Q); if (unlock) { atomic_dec(&pb->pb_hold); } list_del(&pb->pb_list); } + pb->pb_flags |= _PBF_DELWRI_Q; list_add_tail(&pb->pb_list, &pbd_delwrite_queue); pb->pb_queuetime = jiffies; spin_unlock(&pbd_delwrite_lock); @@ -1688,10 +1721,11 @@ pagebuf_delwri_dequeue( spin_lock(&pbd_delwrite_lock); if ((pb->pb_flags & PBF_DELWRI) && !list_empty(&pb->pb_list)) { + ASSERT(pb->pb_flags & _PBF_DELWRI_Q); list_del_init(&pb->pb_list); dequeued = 1; } - pb->pb_flags &= ~PBF_DELWRI; + pb->pb_flags &= ~(PBF_DELWRI|_PBF_DELWRI_Q); spin_unlock(&pbd_delwrite_lock); if (dequeued) @@ -1770,7 +1804,7 @@ xfsbufd( break; } - pb->pb_flags &= ~PBF_DELWRI; + pb->pb_flags &= ~(PBF_DELWRI|_PBF_DELWRI_Q); pb->pb_flags |= PBF_WRITE; list_move(&pb->pb_list, &tmp); } @@ -1820,15 +1854,13 @@ xfs_flush_buftarg( if (pb->pb_target != target) continue; - ASSERT(pb->pb_flags & PBF_DELWRI); + ASSERT(pb->pb_flags & (PBF_DELWRI|_PBF_DELWRI_Q)); PB_TRACE(pb, "walkq2", (long)pagebuf_ispin(pb)); if (pagebuf_ispin(pb)) { pincount++; continue; } - pb->pb_flags &= ~PBF_DELWRI; - pb->pb_flags |= PBF_WRITE; list_move(&pb->pb_list, &tmp); } spin_unlock(&pbd_delwrite_lock); @@ -1837,12 +1869,14 @@ xfs_flush_buftarg( * Dropped the delayed write list lock, now walk the temporary list */ list_for_each_entry_safe(pb, n, &tmp, pb_list) { + pagebuf_lock(pb); + pb->pb_flags &= ~(PBF_DELWRI|_PBF_DELWRI_Q); + pb->pb_flags |= PBF_WRITE; if (wait) pb->pb_flags &= ~PBF_ASYNC; else list_del_init(&pb->pb_list); - pagebuf_lock(pb); pagebuf_iostrategy(pb); } diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index c322e9d71f3..4b7fe3b5e46 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h @@ -89,6 +89,7 @@ typedef enum page_buf_flags_e { /* pb_flags values */ _PBF_PAGE_CACHE = (1 << 17),/* backed by pagecache */ _PBF_KMEM_ALLOC = (1 << 18),/* backed by kmem_alloc() */ _PBF_RUN_QUEUES = (1 << 19),/* run block device task queue */ + _PBF_DELWRI_Q = (1 << 21), /* buffer on delwri queue */ } page_buf_flags_t; #define PBF_UPDATE (PBF_READ | PBF_WRITE) @@ -337,8 +338,6 @@ extern void pagebuf_trace( - - /* These are just for xfs_syncsub... it sets an internal variable * then passes it to VOP_FLUSH_PAGES or adds the flags to a newly gotten buf_t */ -- cgit v1.2.3-70-g09d2 From 4df08c52582be558e12316ae60bf077ca8f17a1e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 5 Sep 2005 08:34:18 +1000 Subject: [XFS] Switch kernel thread handling to the kthread_ API SGI-PV: 942063 SGI-Modid: xfs-linux:xfs-kern:198388a Signed-off-by: Christoph Hellwig Signed-off-by: Nathan Scott --- fs/xfs/linux-2.6/xfs_buf.c | 24 ++++++++---------------- fs/xfs/linux-2.6/xfs_super.c | 36 +++++++++--------------------------- fs/xfs/linux-2.6/xfs_vfs.c | 1 - fs/xfs/linux-2.6/xfs_vfs.h | 2 -- 4 files changed, 17 insertions(+), 46 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index e6340906342..655bf4a78af 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -54,6 +54,7 @@ #include #include #include +#include #include "xfs_linux.h" @@ -1742,9 +1743,7 @@ pagebuf_runall_queues( } /* Defines for pagebuf daemon */ -STATIC DECLARE_COMPLETION(xfsbufd_done); STATIC struct task_struct *xfsbufd_task; -STATIC int xfsbufd_active; STATIC int xfsbufd_force_flush; STATIC int xfsbufd_force_sleep; @@ -1770,14 +1769,8 @@ xfsbufd( xfs_buftarg_t *target; xfs_buf_t *pb, *n; - /* Set up the thread */ - daemonize("xfsbufd"); current->flags |= PF_MEMALLOC; - xfsbufd_task = current; - xfsbufd_active = 1; - barrier(); - INIT_LIST_HEAD(&tmp); do { if (unlikely(freezing(current))) { @@ -1825,9 +1818,9 @@ xfsbufd( purge_addresses(); xfsbufd_force_flush = 0; - } while (xfsbufd_active); + } while (!kthread_should_stop()); - complete_and_exit(&xfsbufd_done, 0); + return 0; } /* @@ -1910,9 +1903,11 @@ xfs_buf_daemons_start(void) if (!xfsdatad_workqueue) goto out_destroy_xfslogd_workqueue; - error = kernel_thread(xfsbufd, NULL, CLONE_FS|CLONE_FILES); - if (error < 0) + xfsbufd_task = kthread_run(xfsbufd, NULL, "xfsbufd"); + if (IS_ERR(xfsbufd_task)) { + error = PTR_ERR(xfsbufd_task); goto out_destroy_xfsdatad_workqueue; + } return 0; out_destroy_xfsdatad_workqueue: @@ -1929,10 +1924,7 @@ xfs_buf_daemons_start(void) STATIC void xfs_buf_daemons_stop(void) { - xfsbufd_active = 0; - barrier(); - wait_for_completion(&xfsbufd_done); - + kthread_stop(xfsbufd_task); destroy_workqueue(xfslogd_workqueue); destroy_workqueue(xfsdatad_workqueue); } diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 910e43bfc95..0da87bfc999 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -72,6 +72,7 @@ #include #include #include +#include STATIC struct quotactl_ops linvfs_qops; STATIC struct super_operations linvfs_sops; @@ -516,25 +517,16 @@ xfssyncd( { long timeleft; vfs_t *vfsp = (vfs_t *) arg; - struct list_head tmp; struct vfs_sync_work *work, *n; + LIST_HEAD (tmp); - daemonize("xfssyncd"); - - vfsp->vfs_sync_work.w_vfs = vfsp; - vfsp->vfs_sync_work.w_syncer = vfs_sync_worker; - vfsp->vfs_sync_task = current; - wmb(); - wake_up(&vfsp->vfs_wait_sync_task); - - INIT_LIST_HEAD(&tmp); timeleft = (xfs_syncd_centisecs * HZ) / 100; for (;;) { set_current_state(TASK_INTERRUPTIBLE); timeleft = schedule_timeout(timeleft); /* swsusp */ try_to_freeze(); - if (vfsp->vfs_flag & VFS_UMOUNT) + if (kthread_should_stop()) break; spin_lock(&vfsp->vfs_sync_lock); @@ -563,10 +555,6 @@ xfssyncd( } } - vfsp->vfs_sync_task = NULL; - wmb(); - wake_up(&vfsp->vfs_wait_sync_task); - return 0; } @@ -574,13 +562,11 @@ STATIC int linvfs_start_syncd( vfs_t *vfsp) { - int pid; - - pid = kernel_thread(xfssyncd, (void *) vfsp, - CLONE_VM | CLONE_FS | CLONE_FILES); - if (pid < 0) - return -pid; - wait_event(vfsp->vfs_wait_sync_task, vfsp->vfs_sync_task); + vfsp->vfs_sync_work.w_syncer = vfs_sync_worker; + vfsp->vfs_sync_work.w_vfs = vfsp; + vfsp->vfs_sync_task = kthread_run(xfssyncd, vfsp, "xfssyncd"); + if (IS_ERR(vfsp->vfs_sync_task)) + return -PTR_ERR(vfsp->vfs_sync_task); return 0; } @@ -588,11 +574,7 @@ STATIC void linvfs_stop_syncd( vfs_t *vfsp) { - vfsp->vfs_flag |= VFS_UMOUNT; - wmb(); - - wake_up_process(vfsp->vfs_sync_task); - wait_event(vfsp->vfs_wait_sync_task, !vfsp->vfs_sync_task); + kthread_stop(vfsp->vfs_sync_task); } STATIC void diff --git a/fs/xfs/linux-2.6/xfs_vfs.c b/fs/xfs/linux-2.6/xfs_vfs.c index 669c6164495..34cc902ec11 100644 --- a/fs/xfs/linux-2.6/xfs_vfs.c +++ b/fs/xfs/linux-2.6/xfs_vfs.c @@ -251,7 +251,6 @@ vfs_allocate( void ) bhv_head_init(VFS_BHVHEAD(vfsp), "vfs"); INIT_LIST_HEAD(&vfsp->vfs_sync_list); spin_lock_init(&vfsp->vfs_sync_lock); - init_waitqueue_head(&vfsp->vfs_wait_sync_task); init_waitqueue_head(&vfsp->vfs_wait_single_sync_task); return vfsp; } diff --git a/fs/xfs/linux-2.6/xfs_vfs.h b/fs/xfs/linux-2.6/xfs_vfs.h index 7ee1f714e9b..f0ab574fb47 100644 --- a/fs/xfs/linux-2.6/xfs_vfs.h +++ b/fs/xfs/linux-2.6/xfs_vfs.h @@ -65,7 +65,6 @@ typedef struct vfs { spinlock_t vfs_sync_lock; /* work item list lock */ int vfs_sync_seq; /* sync thread generation no. */ wait_queue_head_t vfs_wait_single_sync_task; - wait_queue_head_t vfs_wait_sync_task; } vfs_t; #define vfs_fbhv vfs_bh.bh_first /* 1st on vfs behavior chain */ @@ -96,7 +95,6 @@ typedef enum { #define VFS_RDONLY 0x0001 /* read-only vfs */ #define VFS_GRPID 0x0002 /* group-ID assigned from directory */ #define VFS_DMI 0x0004 /* filesystem has the DMI enabled */ -#define VFS_UMOUNT 0x0008 /* unmount in progress */ #define VFS_END 0x0008 /* max flag */ #define SYNC_ATTR 0x0001 /* sync attributes */ -- cgit v1.2.3-70-g09d2 From a3c476d8a19ded7c5f1e17ea07df377764d9d1d3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 5 Sep 2005 08:40:49 +1000 Subject: [XFS] replace "extern inline" with "static inline" Patch from Adrian Bunk , thanks a lot! SGI-PV: 942227 SGI-Modid: xfs-linux:xfs-kern:198642a Signed-off-by: Christoph Hellwig Signed-off-by: Nathan Scott --- fs/xfs/linux-2.6/xfs_buf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index 4b7fe3b5e46..67c19f79923 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h @@ -444,7 +444,7 @@ extern void pagebuf_trace( #define XFS_BUF_PTR(bp) (xfs_caddr_t)((bp)->pb_addr) -extern inline xfs_caddr_t xfs_buf_offset(xfs_buf_t *bp, size_t offset) +static inline xfs_caddr_t xfs_buf_offset(xfs_buf_t *bp, size_t offset) { if (bp->pb_flags & PBF_MAPPED) return XFS_BUF_PTR(bp) + offset; -- cgit v1.2.3-70-g09d2 From c31e887807a3eab26614ee142629ba447cbcc0dc Mon Sep 17 00:00:00 2001 From: Nathan Scott Date: Mon, 5 Sep 2005 10:06:55 +1000 Subject: [XFS] Fix incorrect use of BMAPI_READ in unwritten extent handling (luckily just cosmetic). SGI-PV: 942232 SGI-Modid: xfs-linux-melb:xfs-kern:23718a Signed-off-by: Nathan Scott --- fs/xfs/linux-2.6/xfs_aops.c | 2 +- fs/xfs/xfs_iomap.c | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index ea615e2f476..c6c077978fe 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -804,7 +804,7 @@ xfs_page_state_convert( continue; if (!iomp) { err = xfs_map_blocks(inode, offset, len, &iomap, - BMAPI_READ|BMAPI_IGNSTATE); + BMAPI_WRITE|BMAPI_IGNSTATE); if (err) { goto error; } diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 44999d557d8..d0f5be63cdd 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -226,13 +226,12 @@ xfs_iomap( xfs_iomap_enter_trace(XFS_IOMAP_READ_ENTER, io, offset, count); lockmode = XFS_LCK_MAP_SHARED(mp, io); bmapi_flags = XFS_BMAPI_ENTIRE; - if (flags & BMAPI_IGNSTATE) - bmapi_flags |= XFS_BMAPI_IGSTATE; break; case BMAPI_WRITE: xfs_iomap_enter_trace(XFS_IOMAP_WRITE_ENTER, io, offset, count); lockmode = XFS_ILOCK_EXCL|XFS_EXTSIZE_WR; - bmapi_flags = 0; + if (flags & BMAPI_IGNSTATE) + bmapi_flags |= XFS_BMAPI_IGSTATE|XFS_BMAPI_ENTIRE; XFS_ILOCK(mp, io, lockmode); break; case BMAPI_ALLOCATE: -- cgit v1.2.3-70-g09d2 From cde410a99d0dd38eb218be884d02034fcdf5125b Mon Sep 17 00:00:00 2001 From: Nathan Scott Date: Mon, 5 Sep 2005 11:47:01 +1000 Subject: [XFS] Sort out some cosmetic differences between XFS trees. SGI-PV: 904196 SGI-Modid: xfs-linux-melb:xfs-kern:23719a Signed-off-by: Nathan Scott --- fs/xfs/Makefile | 151 +--------------------------------------- fs/xfs/Makefile-linux-2.6 | 141 +++++++++++++++++++++++++++++++++++++ fs/xfs/linux-2.6/xfs_iops.c | 9 ++- fs/xfs/linux-2.6/xfs_linux.h | 12 ++-- fs/xfs/quota/Makefile | 1 + fs/xfs/quota/Makefile-linux-2.6 | 53 ++++++++++++++ fs/xfs/support/debug.c | 1 + fs/xfs/xfs_vfsops.c | 10 +-- 8 files changed, 217 insertions(+), 161 deletions(-) create mode 100644 fs/xfs/Makefile-linux-2.6 create mode 100644 fs/xfs/quota/Makefile create mode 100644 fs/xfs/quota/Makefile-linux-2.6 (limited to 'fs') diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index d3ff7835463..49e3e7e5e3d 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -1,150 +1 @@ -# -# Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. -# -# This program is free software; you can redistribute it and/or modify it -# under the terms of version 2 of the GNU General Public License as -# published by the Free Software Foundation. -# -# This program is distributed in the hope that it would be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -# -# Further, this software is distributed without any warranty that it is -# free of the rightful claim of any third person regarding infringement -# or the like. Any license provided herein, whether implied or -# otherwise, applies only to this software file. Patent licenses, if -# any, provided herein do not apply to combinations of this program with -# other software, or any other product whatsoever. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write the Free Software Foundation, Inc., 59 -# Temple Place - Suite 330, Boston MA 02111-1307, USA. -# -# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, -# Mountain View, CA 94043, or: -# -# http://www.sgi.com -# -# For further information regarding this notice, see: -# -# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ -# - -EXTRA_CFLAGS += -Ifs/xfs -Ifs/xfs/linux-2.6 -funsigned-char - -ifeq ($(CONFIG_XFS_DEBUG),y) - EXTRA_CFLAGS += -g -DSTATIC="" -DDEBUG - EXTRA_CFLAGS += -DPAGEBUF_LOCK_TRACKING -endif -ifeq ($(CONFIG_XFS_TRACE),y) - EXTRA_CFLAGS += -DXFS_ALLOC_TRACE - EXTRA_CFLAGS += -DXFS_ATTR_TRACE - EXTRA_CFLAGS += -DXFS_BLI_TRACE - EXTRA_CFLAGS += -DXFS_BMAP_TRACE - EXTRA_CFLAGS += -DXFS_BMBT_TRACE - EXTRA_CFLAGS += -DXFS_DIR_TRACE - EXTRA_CFLAGS += -DXFS_DIR2_TRACE - EXTRA_CFLAGS += -DXFS_DQUOT_TRACE - EXTRA_CFLAGS += -DXFS_ILOCK_TRACE - EXTRA_CFLAGS += -DXFS_LOG_TRACE - EXTRA_CFLAGS += -DXFS_RW_TRACE - EXTRA_CFLAGS += -DPAGEBUF_TRACE - EXTRA_CFLAGS += -DXFS_VNODE_TRACE -endif - -obj-$(CONFIG_XFS_FS) += xfs.o - -xfs-$(CONFIG_XFS_QUOTA) += $(addprefix quota/, \ - xfs_dquot.o \ - xfs_dquot_item.o \ - xfs_trans_dquot.o \ - xfs_qm_syscalls.o \ - xfs_qm_bhv.o \ - xfs_qm.o) -ifeq ($(CONFIG_XFS_QUOTA),y) -xfs-$(CONFIG_PROC_FS) += quota/xfs_qm_stats.o -endif - -xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o -xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o -xfs-$(CONFIG_PROC_FS) += linux-2.6/xfs_stats.o -xfs-$(CONFIG_SYSCTL) += linux-2.6/xfs_sysctl.o -xfs-$(CONFIG_COMPAT) += linux-2.6/xfs_ioctl32.o -xfs-$(CONFIG_XFS_EXPORT) += linux-2.6/xfs_export.o - - -xfs-y += xfs_alloc.o \ - xfs_alloc_btree.o \ - xfs_attr.o \ - xfs_attr_leaf.o \ - xfs_behavior.o \ - xfs_bit.o \ - xfs_bmap.o \ - xfs_bmap_btree.o \ - xfs_btree.o \ - xfs_buf_item.o \ - xfs_da_btree.o \ - xfs_dir.o \ - xfs_dir2.o \ - xfs_dir2_block.o \ - xfs_dir2_data.o \ - xfs_dir2_leaf.o \ - xfs_dir2_node.o \ - xfs_dir2_sf.o \ - xfs_dir_leaf.o \ - xfs_error.o \ - xfs_extfree_item.o \ - xfs_fsops.o \ - xfs_ialloc.o \ - xfs_ialloc_btree.o \ - xfs_iget.o \ - xfs_inode.o \ - xfs_inode_item.o \ - xfs_iocore.o \ - xfs_iomap.o \ - xfs_itable.o \ - xfs_dfrag.o \ - xfs_log.o \ - xfs_log_recover.o \ - xfs_macros.o \ - xfs_mount.o \ - xfs_rename.o \ - xfs_trans.o \ - xfs_trans_ail.o \ - xfs_trans_buf.o \ - xfs_trans_extfree.o \ - xfs_trans_inode.o \ - xfs_trans_item.o \ - xfs_utils.o \ - xfs_vfsops.o \ - xfs_vnodeops.o \ - xfs_rw.o \ - xfs_dmops.o \ - xfs_qmops.o - -xfs-$(CONFIG_XFS_TRACE) += xfs_dir2_trace.o - -# Objects in linux-2.6/ -xfs-y += $(addprefix linux-2.6/, \ - kmem.o \ - xfs_aops.o \ - xfs_buf.o \ - xfs_file.o \ - xfs_fs_subr.o \ - xfs_globals.o \ - xfs_ioctl.o \ - xfs_iops.o \ - xfs_lrw.o \ - xfs_super.o \ - xfs_vfs.o \ - xfs_vnode.o) - -# Objects in support/ -xfs-y += $(addprefix support/, \ - debug.o \ - move.o \ - qsort.o \ - uuid.o) - -xfs-$(CONFIG_XFS_TRACE) += support/ktrace.o - +include $(TOPDIR)/fs/xfs/Makefile-linux-$(VERSION).$(PATCHLEVEL) diff --git a/fs/xfs/Makefile-linux-2.6 b/fs/xfs/Makefile-linux-2.6 new file mode 100644 index 00000000000..fbfcbe5a7cd --- /dev/null +++ b/fs/xfs/Makefile-linux-2.6 @@ -0,0 +1,141 @@ +# +# Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Further, this software is distributed without any warranty that it is +# free of the rightful claim of any third person regarding infringement +# or the like. Any license provided herein, whether implied or +# otherwise, applies only to this software file. Patent licenses, if +# any, provided herein do not apply to combinations of this program with +# other software, or any other product whatsoever. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write the Free Software Foundation, Inc., 59 +# Temple Place - Suite 330, Boston MA 02111-1307, USA. +# +# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, +# Mountain View, CA 94043, or: +# +# http://www.sgi.com +# +# For further information regarding this notice, see: +# +# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ +# + +EXTRA_CFLAGS += -Ifs/xfs -Ifs/xfs/linux-2.6 -funsigned-char + +XFS_LINUX := linux-2.6 + +ifeq ($(CONFIG_XFS_DEBUG),y) + EXTRA_CFLAGS += -g -DSTATIC="" -DDEBUG + EXTRA_CFLAGS += -DPAGEBUF_LOCK_TRACKING +endif +ifeq ($(CONFIG_XFS_TRACE),y) + EXTRA_CFLAGS += -DXFS_ALLOC_TRACE + EXTRA_CFLAGS += -DXFS_ATTR_TRACE + EXTRA_CFLAGS += -DXFS_BLI_TRACE + EXTRA_CFLAGS += -DXFS_BMAP_TRACE + EXTRA_CFLAGS += -DXFS_BMBT_TRACE + EXTRA_CFLAGS += -DXFS_DIR_TRACE + EXTRA_CFLAGS += -DXFS_DIR2_TRACE + EXTRA_CFLAGS += -DXFS_DQUOT_TRACE + EXTRA_CFLAGS += -DXFS_ILOCK_TRACE + EXTRA_CFLAGS += -DXFS_LOG_TRACE + EXTRA_CFLAGS += -DXFS_RW_TRACE + EXTRA_CFLAGS += -DPAGEBUF_TRACE + EXTRA_CFLAGS += -DXFS_VNODE_TRACE +endif + +obj-$(CONFIG_XFS_FS) += xfs.o +obj-$(CONFIG_XFS_QUOTA) += quota/ + +xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o +xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o +xfs-$(CONFIG_PROC_FS) += $(XFS_LINUX)/xfs_stats.o +xfs-$(CONFIG_SYSCTL) += $(XFS_LINUX)/xfs_sysctl.o +xfs-$(CONFIG_COMPAT) += $(XFS_LINUX)/xfs_ioctl32.o +xfs-$(CONFIG_XFS_EXPORT) += $(XFS_LINUX)/xfs_export.o + + +xfs-y += xfs_alloc.o \ + xfs_alloc_btree.o \ + xfs_attr.o \ + xfs_attr_leaf.o \ + xfs_behavior.o \ + xfs_bit.o \ + xfs_bmap.o \ + xfs_bmap_btree.o \ + xfs_btree.o \ + xfs_buf_item.o \ + xfs_da_btree.o \ + xfs_dir.o \ + xfs_dir2.o \ + xfs_dir2_block.o \ + xfs_dir2_data.o \ + xfs_dir2_leaf.o \ + xfs_dir2_node.o \ + xfs_dir2_sf.o \ + xfs_dir_leaf.o \ + xfs_error.o \ + xfs_extfree_item.o \ + xfs_fsops.o \ + xfs_ialloc.o \ + xfs_ialloc_btree.o \ + xfs_iget.o \ + xfs_inode.o \ + xfs_inode_item.o \ + xfs_iocore.o \ + xfs_iomap.o \ + xfs_itable.o \ + xfs_dfrag.o \ + xfs_log.o \ + xfs_log_recover.o \ + xfs_macros.o \ + xfs_mount.o \ + xfs_rename.o \ + xfs_trans.o \ + xfs_trans_ail.o \ + xfs_trans_buf.o \ + xfs_trans_extfree.o \ + xfs_trans_inode.o \ + xfs_trans_item.o \ + xfs_utils.o \ + xfs_vfsops.o \ + xfs_vnodeops.o \ + xfs_rw.o \ + xfs_dmops.o \ + xfs_qmops.o + +xfs-$(CONFIG_XFS_TRACE) += xfs_dir2_trace.o + +# Objects in linux/ +xfs-y += $(addprefix $(XFS_LINUX)/, \ + kmem.o \ + xfs_aops.o \ + xfs_buf.o \ + xfs_file.o \ + xfs_fs_subr.o \ + xfs_globals.o \ + xfs_ioctl.o \ + xfs_iops.o \ + xfs_lrw.o \ + xfs_super.o \ + xfs_vfs.o \ + xfs_vnode.o) + +# Objects in support/ +xfs-y += $(addprefix support/, \ + debug.o \ + move.o \ + uuid.o) + +xfs-$(CONFIG_XFS_TRACE) += support/ktrace.o + diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index d237cc5be76..77708a8c9f8 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -423,9 +423,14 @@ linvfs_follow_link( return NULL; } -static void linvfs_put_link(struct dentry *dentry, struct nameidata *nd, void *p) +STATIC void +linvfs_put_link( + struct dentry *dentry, + struct nameidata *nd, + void *p) { - char *s = nd_get_link(nd); + char *s = nd_get_link(nd); + if (!IS_ERR(s)) kfree(s); } diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h index 1c63fd3118d..68c5d885ed9 100644 --- a/fs/xfs/linux-2.6/xfs_linux.h +++ b/fs/xfs/linux-2.6/xfs_linux.h @@ -64,7 +64,6 @@ #include #include -#include #include #include #include @@ -255,11 +254,18 @@ static inline void set_buffer_unwritten_io(struct buffer_head *bh) #define MAX(a,b) (max(a,b)) #define howmany(x, y) (((x)+((y)-1))/(y)) #define roundup(x, y) ((((x)+((y)-1))/(y))*(y)) +#define qsort(a,n,s,fn) sort(a,n,s,fn,NULL) +/* + * Various platform dependent calls that don't fit anywhere else + */ #define xfs_stack_trace() dump_stack() - #define xfs_itruncate_data(ip, off) \ (-vmtruncate(LINVFS_GET_IP(XFS_ITOV(ip)), (off))) +#define xfs_statvfs_fsid(statp, mp) \ + ({ u64 id = huge_encode_dev((mp)->m_dev); \ + __kernel_fsid_t *fsid = &(statp)->f_fsid; \ + (fsid->val[0] = (u32)id, fsid->val[1] = (u32)(id >> 32)); }) /* Move the kernel do_div definition off to one side */ @@ -372,6 +378,4 @@ static inline __uint64_t roundup_64(__uint64_t x, __uint32_t y) return(x * y); } -#define qsort(a, n, s, cmp) sort(a, n, s, cmp, NULL) - #endif /* __XFS_LINUX__ */ diff --git a/fs/xfs/quota/Makefile b/fs/xfs/quota/Makefile new file mode 100644 index 00000000000..7a4f725b282 --- /dev/null +++ b/fs/xfs/quota/Makefile @@ -0,0 +1 @@ +include $(TOPDIR)/fs/xfs/quota/Makefile-linux-$(VERSION).$(PATCHLEVEL) diff --git a/fs/xfs/quota/Makefile-linux-2.6 b/fs/xfs/quota/Makefile-linux-2.6 new file mode 100644 index 00000000000..8b7b676718b --- /dev/null +++ b/fs/xfs/quota/Makefile-linux-2.6 @@ -0,0 +1,53 @@ +# +# Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Further, this software is distributed without any warranty that it is +# free of the rightful claim of any third person regarding infringement +# or the like. Any license provided herein, whether implied or +# otherwise, applies only to this software file. Patent licenses, if +# any, provided herein do not apply to combinations of this program with +# other software, or any other product whatsoever. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write the Free Software Foundation, Inc., 59 +# Temple Place - Suite 330, Boston MA 02111-1307, USA. +# +# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, +# Mountain View, CA 94043, or: +# +# http://www.sgi.com +# +# For further information regarding this notice, see: +# +# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ +# + +EXTRA_CFLAGS += -I $(TOPDIR)/fs/xfs -I $(TOPDIR)/fs/xfs/linux-2.6 + +ifeq ($(CONFIG_XFS_DEBUG),y) + EXTRA_CFLAGS += -g -DDEBUG + #EXTRA_CFLAGS += -DQUOTADEBUG +endif +ifeq ($(CONFIG_XFS_TRACE),y) + EXTRA_CFLAGS += -DXFS_DQUOT_TRACE + EXTRA_CFLAGS += -DXFS_VNODE_TRACE +endif + +obj-$(CONFIG_XFS_QUOTA) += xfs_quota.o + +xfs_quota-y += xfs_dquot.o \ + xfs_dquot_item.o \ + xfs_trans_dquot.o \ + xfs_qm_syscalls.o \ + xfs_qm_bhv.o \ + xfs_qm.o + +xfs_quota-$(CONFIG_PROC_FS) += xfs_qm_stats.o diff --git a/fs/xfs/support/debug.c b/fs/xfs/support/debug.c index 4ed7b6928cd..4e1a5ec22fa 100644 --- a/fs/xfs/support/debug.c +++ b/fs/xfs/support/debug.c @@ -31,6 +31,7 @@ */ #include "debug.h" +#include "spin.h" #include #include diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c index d4b9545c2b5..f1a904e23ad 100644 --- a/fs/xfs/xfs_vfsops.c +++ b/fs/xfs/xfs_vfsops.c @@ -795,7 +795,6 @@ xfs_statvfs( xfs_mount_t *mp; xfs_sb_t *sbp; unsigned long s; - u64 id; mp = XFS_BHVTOM(bdp); sbp = &(mp->m_sb); @@ -823,9 +822,7 @@ xfs_statvfs( statp->f_ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree); XFS_SB_UNLOCK(mp, s); - id = huge_encode_dev(mp->m_dev); - statp->f_fsid.val[0] = (u32)id; - statp->f_fsid.val[1] = (u32)(id >> 32); + xfs_statvfs_fsid(statp, mp); statp->f_namelen = MAXNAMELEN - 1; return 0; @@ -1505,7 +1502,10 @@ xfs_syncsub( * eventually kicked out of the cache. */ if (flags & SYNC_REFCACHE) { - xfs_refcache_purge_some(mp); + if (flags & SYNC_WAIT) + xfs_refcache_purge_mp(mp); + else + xfs_refcache_purge_some(mp); } /* -- cgit v1.2.3-70-g09d2 From 6e21c8f145f5052c1c2fb4a4b41bee01c848159b Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sat, 3 Sep 2005 15:54:45 -0700 Subject: [PATCH] /proc//numa_maps to show on which nodes pages reside This patch was recently discussed on linux-mm: http://marc.theaimsgroup.com/?t=112085728500002&r=1&w=2 I inherited a large code base from Ray for page migration. There was a small patch in there that I find to be very useful since it allows the display of the locality of the pages in use by a process. I reworked that patch and came up with a /proc//numa_maps that gives more information about the vma's of a process. numa_maps is indexes by the start address found in /proc//maps. F.e. with this patch you can see the page use of the "getty" process: margin:/proc/12008 # cat maps 00000000-00004000 r--p 00000000 00:00 0 2000000000000000-200000000002c000 r-xp 00000000 08:04 516 /lib/ld-2.3.3.so 2000000000038000-2000000000040000 rw-p 00028000 08:04 516 /lib/ld-2.3.3.so 2000000000040000-2000000000044000 rw-p 2000000000040000 00:00 0 2000000000058000-2000000000260000 r-xp 00000000 08:04 54707842 /lib/tls/libc.so.6.1 2000000000260000-2000000000268000 ---p 00208000 08:04 54707842 /lib/tls/libc.so.6.1 2000000000268000-2000000000274000 rw-p 00200000 08:04 54707842 /lib/tls/libc.so.6.1 2000000000274000-2000000000280000 rw-p 2000000000274000 00:00 0 2000000000280000-20000000002b4000 r--p 00000000 08:04 9126923 /usr/lib/locale/en_US.utf8/LC_CTYPE 2000000000300000-2000000000308000 r--s 00000000 08:04 60071467 /usr/lib/gconv/gconv-modules.cache 2000000000318000-2000000000328000 rw-p 2000000000318000 00:00 0 4000000000000000-4000000000008000 r-xp 00000000 08:04 29576399 /sbin/mingetty 6000000000004000-6000000000008000 rw-p 00004000 08:04 29576399 /sbin/mingetty 6000000000008000-600000000002c000 rw-p 6000000000008000 00:00 0 [heap] 60000fff7fffc000-60000fff80000000 rw-p 60000fff7fffc000 00:00 0 60000ffffff44000-60000ffffff98000 rw-p 60000ffffff44000 00:00 0 [stack] a000000000000000-a000000000020000 ---p 00000000 00:00 0 [vdso] cat numa_maps 2000000000000000 default MaxRef=43 Pages=11 Mapped=11 N0=4 N1=3 N2=2 N3=2 2000000000038000 default MaxRef=1 Pages=2 Mapped=2 Anon=2 N0=2 2000000000040000 default MaxRef=1 Pages=1 Mapped=1 Anon=1 N0=1 2000000000058000 default MaxRef=43 Pages=61 Mapped=61 N0=14 N1=15 N2=16 N3=16 2000000000268000 default MaxRef=1 Pages=2 Mapped=2 Anon=2 N0=2 2000000000274000 default MaxRef=1 Pages=3 Mapped=3 Anon=3 N0=3 2000000000280000 default MaxRef=8 Pages=3 Mapped=3 N0=3 2000000000300000 default MaxRef=8 Pages=2 Mapped=2 N0=2 2000000000318000 default MaxRef=1 Pages=1 Mapped=1 Anon=1 N2=1 4000000000000000 default MaxRef=6 Pages=2 Mapped=2 N1=2 6000000000004000 default MaxRef=1 Pages=1 Mapped=1 Anon=1 N0=1 6000000000008000 default MaxRef=1 Pages=1 Mapped=1 Anon=1 N0=1 60000fff7fffc000 default MaxRef=1 Pages=1 Mapped=1 Anon=1 N0=1 60000ffffff44000 default MaxRef=1 Pages=1 Mapped=1 Anon=1 N0=1 getty uses ld.so. The first vma is the code segment which is used by 43 other processes and the pages are evenly distributed over the 4 nodes. The second vma is the process specific data portion for ld.so. This is only one page. The display format is: Links to information in /proc//map This can be "default" "interleave={}", "prefer=" or "bind={}" MaxRef= Pages= Mapped= Anon= Nx= The content of the proc-file is self-evident. If this would be tied into the sparsemem system then the contents of this file would not be too useful. Signed-off-by: Christoph Lameter Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/base.c | 35 ++++++++++++ fs/proc/task_mmu.c | 132 ++++++++++++++++++++++++++++++++++++++++++++++ include/linux/mempolicy.h | 3 ++ mm/mempolicy.c | 12 ++--- 4 files changed, 176 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/proc/base.c b/fs/proc/base.c index 491f2d9f89a..b796bf90a0b 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -65,6 +65,7 @@ enum pid_directory_inos { PROC_TGID_STAT, PROC_TGID_STATM, PROC_TGID_MAPS, + PROC_TGID_NUMA_MAPS, PROC_TGID_MOUNTS, PROC_TGID_WCHAN, #ifdef CONFIG_SCHEDSTATS @@ -102,6 +103,7 @@ enum pid_directory_inos { PROC_TID_STAT, PROC_TID_STATM, PROC_TID_MAPS, + PROC_TID_NUMA_MAPS, PROC_TID_MOUNTS, PROC_TID_WCHAN, #ifdef CONFIG_SCHEDSTATS @@ -144,6 +146,9 @@ static struct pid_entry tgid_base_stuff[] = { E(PROC_TGID_STAT, "stat", S_IFREG|S_IRUGO), E(PROC_TGID_STATM, "statm", S_IFREG|S_IRUGO), E(PROC_TGID_MAPS, "maps", S_IFREG|S_IRUGO), +#ifdef CONFIG_NUMA + E(PROC_TGID_NUMA_MAPS, "numa_maps", S_IFREG|S_IRUGO), +#endif E(PROC_TGID_MEM, "mem", S_IFREG|S_IRUSR|S_IWUSR), #ifdef CONFIG_SECCOMP E(PROC_TGID_SECCOMP, "seccomp", S_IFREG|S_IRUSR|S_IWUSR), @@ -180,6 +185,9 @@ static struct pid_entry tid_base_stuff[] = { E(PROC_TID_STAT, "stat", S_IFREG|S_IRUGO), E(PROC_TID_STATM, "statm", S_IFREG|S_IRUGO), E(PROC_TID_MAPS, "maps", S_IFREG|S_IRUGO), +#ifdef CONFIG_NUMA + E(PROC_TID_NUMA_MAPS, "numa_maps", S_IFREG|S_IRUGO), +#endif E(PROC_TID_MEM, "mem", S_IFREG|S_IRUSR|S_IWUSR), #ifdef CONFIG_SECCOMP E(PROC_TID_SECCOMP, "seccomp", S_IFREG|S_IRUSR|S_IWUSR), @@ -515,6 +523,27 @@ static struct file_operations proc_maps_operations = { .release = seq_release, }; +#ifdef CONFIG_NUMA +extern struct seq_operations proc_pid_numa_maps_op; +static int numa_maps_open(struct inode *inode, struct file *file) +{ + struct task_struct *task = proc_task(inode); + int ret = seq_open(file, &proc_pid_numa_maps_op); + if (!ret) { + struct seq_file *m = file->private_data; + m->private = task; + } + return ret; +} + +static struct file_operations proc_numa_maps_operations = { + .open = numa_maps_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; +#endif + extern struct seq_operations mounts_op; static int mounts_open(struct inode *inode, struct file *file) { @@ -1524,6 +1553,12 @@ static struct dentry *proc_pident_lookup(struct inode *dir, case PROC_TGID_MAPS: inode->i_fop = &proc_maps_operations; break; +#ifdef CONFIG_NUMA + case PROC_TID_NUMA_MAPS: + case PROC_TGID_NUMA_MAPS: + inode->i_fop = &proc_numa_maps_operations; + break; +#endif case PROC_TID_MEM: case PROC_TGID_MEM: inode->i_op = &proc_mem_inode_operations; diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 28b4a0253a9..64e84cadfa3 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -2,6 +2,8 @@ #include #include #include +#include +#include #include #include #include "internal.h" @@ -233,3 +235,133 @@ struct seq_operations proc_pid_maps_op = { .stop = m_stop, .show = show_map }; + +#ifdef CONFIG_NUMA + +struct numa_maps { + unsigned long pages; + unsigned long anon; + unsigned long mapped; + unsigned long mapcount_max; + unsigned long node[MAX_NUMNODES]; +}; + +/* + * Calculate numa node maps for a vma + */ +static struct numa_maps *get_numa_maps(const struct vm_area_struct *vma) +{ + struct page *page; + unsigned long vaddr; + struct mm_struct *mm = vma->vm_mm; + int i; + struct numa_maps *md = kmalloc(sizeof(struct numa_maps), GFP_KERNEL); + + if (!md) + return NULL; + md->pages = 0; + md->anon = 0; + md->mapped = 0; + md->mapcount_max = 0; + for_each_node(i) + md->node[i] =0; + + spin_lock(&mm->page_table_lock); + for (vaddr = vma->vm_start; vaddr < vma->vm_end; vaddr += PAGE_SIZE) { + page = follow_page(mm, vaddr, 0); + if (page) { + int count = page_mapcount(page); + + if (count) + md->mapped++; + if (count > md->mapcount_max) + md->mapcount_max = count; + md->pages++; + if (PageAnon(page)) + md->anon++; + md->node[page_to_nid(page)]++; + } + } + spin_unlock(&mm->page_table_lock); + return md; +} + +static int show_numa_map(struct seq_file *m, void *v) +{ + struct task_struct *task = m->private; + struct vm_area_struct *vma = v; + struct mempolicy *pol; + struct numa_maps *md; + struct zone **z; + int n; + int first; + + if (!vma->vm_mm) + return 0; + + md = get_numa_maps(vma); + if (!md) + return 0; + + seq_printf(m, "%08lx", vma->vm_start); + pol = get_vma_policy(task, vma, vma->vm_start); + /* Print policy */ + switch (pol->policy) { + case MPOL_PREFERRED: + seq_printf(m, " prefer=%d", pol->v.preferred_node); + break; + case MPOL_BIND: + seq_printf(m, " bind={"); + first = 1; + for (z = pol->v.zonelist->zones; *z; z++) { + + if (!first) + seq_putc(m, ','); + else + first = 0; + seq_printf(m, "%d/%s", (*z)->zone_pgdat->node_id, + (*z)->name); + } + seq_putc(m, '}'); + break; + case MPOL_INTERLEAVE: + seq_printf(m, " interleave={"); + first = 1; + for_each_node(n) { + if (test_bit(n, pol->v.nodes)) { + if (!first) + seq_putc(m,','); + else + first = 0; + seq_printf(m, "%d",n); + } + } + seq_putc(m, '}'); + break; + default: + seq_printf(m," default"); + break; + } + seq_printf(m, " MaxRef=%lu Pages=%lu Mapped=%lu", + md->mapcount_max, md->pages, md->mapped); + if (md->anon) + seq_printf(m," Anon=%lu",md->anon); + + for_each_online_node(n) { + if (md->node[n]) + seq_printf(m, " N%d=%lu", n, md->node[n]); + } + seq_putc(m, '\n'); + kfree(md); + if (m->count < m->size) /* vma is copied successfully */ + m->version = (vma != get_gate_vma(task)) ? vma->vm_start : 0; + return 0; +} + +struct seq_operations proc_pid_numa_maps_op = { + .start = m_start, + .next = m_next, + .stop = m_stop, + .show = show_numa_map +}; +#endif diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 8480aef10e6..94a46f38c53 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -150,6 +150,9 @@ void mpol_free_shared_policy(struct shared_policy *p); struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp, unsigned long idx); +struct mempolicy *get_vma_policy(struct task_struct *task, + struct vm_area_struct *vma, unsigned long addr); + extern void numa_default_policy(void); extern void numa_policy_init(void); diff --git a/mm/mempolicy.c b/mm/mempolicy.c index b4eababc819..13492d66b7c 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -664,10 +664,10 @@ asmlinkage long compat_sys_mbind(compat_ulong_t start, compat_ulong_t len, #endif /* Return effective policy for a VMA */ -static struct mempolicy * -get_vma_policy(struct vm_area_struct *vma, unsigned long addr) +struct mempolicy * +get_vma_policy(struct task_struct *task, struct vm_area_struct *vma, unsigned long addr) { - struct mempolicy *pol = current->mempolicy; + struct mempolicy *pol = task->mempolicy; if (vma) { if (vma->vm_ops && vma->vm_ops->get_policy) @@ -786,7 +786,7 @@ static struct page *alloc_page_interleave(unsigned int __nocast gfp, unsigned or struct page * alloc_page_vma(unsigned int __nocast gfp, struct vm_area_struct *vma, unsigned long addr) { - struct mempolicy *pol = get_vma_policy(vma, addr); + struct mempolicy *pol = get_vma_policy(current, vma, addr); cpuset_update_current_mems_allowed(); @@ -908,7 +908,7 @@ void __mpol_free(struct mempolicy *p) /* Find first node suitable for an allocation */ int mpol_first_node(struct vm_area_struct *vma, unsigned long addr) { - struct mempolicy *pol = get_vma_policy(vma, addr); + struct mempolicy *pol = get_vma_policy(current, vma, addr); switch (pol->policy) { case MPOL_DEFAULT: @@ -928,7 +928,7 @@ int mpol_first_node(struct vm_area_struct *vma, unsigned long addr) /* Find secondary valid nodes for an allocation */ int mpol_node_valid(int nid, struct vm_area_struct *vma, unsigned long addr) { - struct mempolicy *pol = get_vma_policy(vma, addr); + struct mempolicy *pol = get_vma_policy(current, vma, addr); switch (pol->policy) { case MPOL_PREFERRED: -- cgit v1.2.3-70-g09d2 From e070ad49f31155d872d8e96cab2142840993e3c0 Mon Sep 17 00:00:00 2001 From: Mauricio Lin Date: Sat, 3 Sep 2005 15:55:10 -0700 Subject: [PATCH] add /proc/pid/smaps Add a "smaps" entry to /proc/pid: show howmuch memory is resident in each mapping. People that want to perform a memory consumption analysing can use it mainly if someone needs to figure out which libraries can be reduced for embedded systems. So the new features are the physical size of shared and clean [or dirty]; private and clean [or dirty]. Take a look the example below: # cat /proc/4576/smaps 08048000-080dc000 r-xp /bin/bash Size: 592 KB Rss: 500 KB Shared_Clean: 500 KB Shared_Dirty: 0 KB Private_Clean: 0 KB Private_Dirty: 0 KB 080dc000-080e2000 rw-p /bin/bash Size: 24 KB Rss: 24 KB Shared_Clean: 0 KB Shared_Dirty: 0 KB Private_Clean: 0 KB Private_Dirty: 24 KB 080e2000-08116000 rw-p Size: 208 KB Rss: 208 KB Shared_Clean: 0 KB Shared_Dirty: 0 KB Private_Clean: 0 KB Private_Dirty: 208 KB b7e2b000-b7e34000 r-xp /lib/tls/libnss_files-2.3.2.so Size: 36 KB Rss: 12 KB Shared_Clean: 12 KB Shared_Dirty: 0 KB Private_Clean: 0 KB Private_Dirty: 0 KB ... (Includes a cleanup from "Richard Purdie" ) From: Torsten Foertsch show_smap calls first show_map and then prints its additional information to the seq_file. show_map checks if all it has to print fits into the buffer and if yes marks the current vma as written. While that is correct for show_map it is not for show_smap. Here the vma should be marked as written only after the additional information is also written. The attached patch cures the problem. It moves the functionality of the show_map function to a new function show_map_internal that is called with an additional struct mem_size_stats* argument. Then show_map calls show_map_internal with NULL as struct mem_size_stats* whereas show_smap calls it with a real pointer. Now the final if (m->count < m->size) /* vma is copied successfully */ m->version = (vma != get_gate_vma(task))? vma->vm_start: 0; is done only if the whole entry fits into the buffer. Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/filesystems/proc.txt | 1 + fs/proc/base.c | 61 ++++++++++ fs/proc/task_mmu.c | 225 ++++++++++++++++++++++++++++++------- 3 files changed, 245 insertions(+), 42 deletions(-) (limited to 'fs') diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 6c98f2bd421..5024ba7a592 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -133,6 +133,7 @@ Table 1-1: Process specific entries in /proc statm Process memory status information status Process status in human readable form wchan If CONFIG_KALLSYMS is set, a pre-decoded wchan + smaps Extension based on maps, presenting the rss size for each mapped file .............................................................................. For example, to get the status information of a process, all you have to do is diff --git a/fs/proc/base.c b/fs/proc/base.c index b796bf90a0b..520978e49e9 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -11,6 +11,40 @@ * go into icache. We cache the reference to task_struct upon lookup too. * Eventually it should become a filesystem in its own. We don't use the * rest of procfs anymore. + * + * + * Changelog: + * 17-Jan-2005 + * Allan Bezerra + * Bruna Moreira + * Edjard Mota + * Ilias Biris + * Mauricio Lin + * + * Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT + * + * A new process specific entry (smaps) included in /proc. It shows the + * size of rss for each memory area. The maps entry lacks information + * about physical memory size (rss) for each mapped file, i.e., + * rss information for executables and library files. + * This additional information is useful for any tools that need to know + * about physical memory consumption for a process specific library. + * + * Changelog: + * 21-Feb-2005 + * Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT + * Pud inclusion in the page table walking. + * + * ChangeLog: + * 10-Mar-2005 + * 10LE Instituto Nokia de Tecnologia - INdT: + * A better way to walks through the page table as suggested by Hugh Dickins. + * + * Simo Piiroinen : + * Smaps information related to shared, private, clean and dirty pages. + * + * Paul Mundt : + * Overall revision about smaps. */ #include @@ -68,6 +102,7 @@ enum pid_directory_inos { PROC_TGID_NUMA_MAPS, PROC_TGID_MOUNTS, PROC_TGID_WCHAN, + PROC_TGID_SMAPS, #ifdef CONFIG_SCHEDSTATS PROC_TGID_SCHEDSTAT, #endif @@ -106,6 +141,7 @@ enum pid_directory_inos { PROC_TID_NUMA_MAPS, PROC_TID_MOUNTS, PROC_TID_WCHAN, + PROC_TID_SMAPS, #ifdef CONFIG_SCHEDSTATS PROC_TID_SCHEDSTAT, #endif @@ -157,6 +193,7 @@ static struct pid_entry tgid_base_stuff[] = { E(PROC_TGID_ROOT, "root", S_IFLNK|S_IRWXUGO), E(PROC_TGID_EXE, "exe", S_IFLNK|S_IRWXUGO), E(PROC_TGID_MOUNTS, "mounts", S_IFREG|S_IRUGO), + E(PROC_TGID_SMAPS, "smaps", S_IFREG|S_IRUGO), #ifdef CONFIG_SECURITY E(PROC_TGID_ATTR, "attr", S_IFDIR|S_IRUGO|S_IXUGO), #endif @@ -196,6 +233,7 @@ static struct pid_entry tid_base_stuff[] = { E(PROC_TID_ROOT, "root", S_IFLNK|S_IRWXUGO), E(PROC_TID_EXE, "exe", S_IFLNK|S_IRWXUGO), E(PROC_TID_MOUNTS, "mounts", S_IFREG|S_IRUGO), + E(PROC_TID_SMAPS, "smaps", S_IFREG|S_IRUGO), #ifdef CONFIG_SECURITY E(PROC_TID_ATTR, "attr", S_IFDIR|S_IRUGO|S_IXUGO), #endif @@ -544,6 +582,25 @@ static struct file_operations proc_numa_maps_operations = { }; #endif +extern struct seq_operations proc_pid_smaps_op; +static int smaps_open(struct inode *inode, struct file *file) +{ + struct task_struct *task = proc_task(inode); + int ret = seq_open(file, &proc_pid_smaps_op); + if (!ret) { + struct seq_file *m = file->private_data; + m->private = task; + } + return ret; +} + +static struct file_operations proc_smaps_operations = { + .open = smaps_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + extern struct seq_operations mounts_op; static int mounts_open(struct inode *inode, struct file *file) { @@ -1574,6 +1631,10 @@ static struct dentry *proc_pident_lookup(struct inode *dir, case PROC_TGID_MOUNTS: inode->i_fop = &proc_mounts_operations; break; + case PROC_TID_SMAPS: + case PROC_TGID_SMAPS: + inode->i_fop = &proc_smaps_operations; + break; #ifdef CONFIG_SECURITY case PROC_TID_ATTR: inode->i_nlink = 2; diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 64e84cadfa3..c7ef3e48e35 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -2,10 +2,13 @@ #include #include #include +#include #include #include + #include #include +#include #include "internal.h" char *task_mem(struct mm_struct *mm, char *buffer) @@ -89,49 +92,58 @@ static void pad_len_spaces(struct seq_file *m, int len) seq_printf(m, "%*c", len, ' '); } -static int show_map(struct seq_file *m, void *v) +struct mem_size_stats +{ + unsigned long resident; + unsigned long shared_clean; + unsigned long shared_dirty; + unsigned long private_clean; + unsigned long private_dirty; +}; + +static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats *mss) { struct task_struct *task = m->private; - struct vm_area_struct *map = v; - struct mm_struct *mm = map->vm_mm; - struct file *file = map->vm_file; - int flags = map->vm_flags; + struct vm_area_struct *vma = v; + struct mm_struct *mm = vma->vm_mm; + struct file *file = vma->vm_file; + int flags = vma->vm_flags; unsigned long ino = 0; dev_t dev = 0; int len; if (file) { - struct inode *inode = map->vm_file->f_dentry->d_inode; + struct inode *inode = vma->vm_file->f_dentry->d_inode; dev = inode->i_sb->s_dev; ino = inode->i_ino; } seq_printf(m, "%08lx-%08lx %c%c%c%c %08lx %02x:%02x %lu %n", - map->vm_start, - map->vm_end, + vma->vm_start, + vma->vm_end, flags & VM_READ ? 'r' : '-', flags & VM_WRITE ? 'w' : '-', flags & VM_EXEC ? 'x' : '-', flags & VM_MAYSHARE ? 's' : 'p', - map->vm_pgoff << PAGE_SHIFT, + vma->vm_pgoff << PAGE_SHIFT, MAJOR(dev), MINOR(dev), ino, &len); /* * Print the dentry name for named mappings, and a * special [heap] marker for the heap: */ - if (map->vm_file) { + if (file) { pad_len_spaces(m, len); - seq_path(m, file->f_vfsmnt, file->f_dentry, ""); + seq_path(m, file->f_vfsmnt, file->f_dentry, "\n"); } else { if (mm) { - if (map->vm_start <= mm->start_brk && - map->vm_end >= mm->brk) { + if (vma->vm_start <= mm->start_brk && + vma->vm_end >= mm->brk) { pad_len_spaces(m, len); seq_puts(m, "[heap]"); } else { - if (map->vm_start <= mm->start_stack && - map->vm_end >= mm->start_stack) { + if (vma->vm_start <= mm->start_stack && + vma->vm_end >= mm->start_stack) { pad_len_spaces(m, len); seq_puts(m, "[stack]"); @@ -143,24 +155,146 @@ static int show_map(struct seq_file *m, void *v) } } seq_putc(m, '\n'); - if (m->count < m->size) /* map is copied successfully */ - m->version = (map != get_gate_vma(task))? map->vm_start: 0; + + if (mss) + seq_printf(m, + "Size: %8lu kB\n" + "Rss: %8lu kB\n" + "Shared_Clean: %8lu kB\n" + "Shared_Dirty: %8lu kB\n" + "Private_Clean: %8lu kB\n" + "Private_Dirty: %8lu kB\n", + (vma->vm_end - vma->vm_start) >> 10, + mss->resident >> 10, + mss->shared_clean >> 10, + mss->shared_dirty >> 10, + mss->private_clean >> 10, + mss->private_dirty >> 10); + + if (m->count < m->size) /* vma is copied successfully */ + m->version = (vma != get_gate_vma(task))? vma->vm_start: 0; return 0; } +static int show_map(struct seq_file *m, void *v) +{ + return show_map_internal(m, v, 0); +} + +static void smaps_pte_range(struct vm_area_struct *vma, pmd_t *pmd, + unsigned long addr, unsigned long end, + struct mem_size_stats *mss) +{ + pte_t *pte, ptent; + unsigned long pfn; + struct page *page; + + pte = pte_offset_map(pmd, addr); + do { + ptent = *pte; + if (pte_none(ptent) || !pte_present(ptent)) + continue; + + mss->resident += PAGE_SIZE; + pfn = pte_pfn(ptent); + if (!pfn_valid(pfn)) + continue; + + page = pfn_to_page(pfn); + if (page_count(page) >= 2) { + if (pte_dirty(ptent)) + mss->shared_dirty += PAGE_SIZE; + else + mss->shared_clean += PAGE_SIZE; + } else { + if (pte_dirty(ptent)) + mss->private_dirty += PAGE_SIZE; + else + mss->private_clean += PAGE_SIZE; + } + } while (pte++, addr += PAGE_SIZE, addr != end); + pte_unmap(pte - 1); + cond_resched_lock(&vma->vm_mm->page_table_lock); +} + +static inline void smaps_pmd_range(struct vm_area_struct *vma, pud_t *pud, + unsigned long addr, unsigned long end, + struct mem_size_stats *mss) +{ + pmd_t *pmd; + unsigned long next; + + pmd = pmd_offset(pud, addr); + do { + next = pmd_addr_end(addr, end); + if (pmd_none_or_clear_bad(pmd)) + continue; + smaps_pte_range(vma, pmd, addr, next, mss); + } while (pmd++, addr = next, addr != end); +} + +static inline void smaps_pud_range(struct vm_area_struct *vma, pgd_t *pgd, + unsigned long addr, unsigned long end, + struct mem_size_stats *mss) +{ + pud_t *pud; + unsigned long next; + + pud = pud_offset(pgd, addr); + do { + next = pud_addr_end(addr, end); + if (pud_none_or_clear_bad(pud)) + continue; + smaps_pmd_range(vma, pud, addr, next, mss); + } while (pud++, addr = next, addr != end); +} + +static inline void smaps_pgd_range(struct vm_area_struct *vma, + unsigned long addr, unsigned long end, + struct mem_size_stats *mss) +{ + pgd_t *pgd; + unsigned long next; + + pgd = pgd_offset(vma->vm_mm, addr); + do { + next = pgd_addr_end(addr, end); + if (pgd_none_or_clear_bad(pgd)) + continue; + smaps_pud_range(vma, pgd, addr, next, mss); + } while (pgd++, addr = next, addr != end); +} + +static int show_smap(struct seq_file *m, void *v) +{ + struct vm_area_struct *vma = v; + struct mm_struct *mm = vma->vm_mm; + struct mem_size_stats mss; + + memset(&mss, 0, sizeof mss); + + if (mm) { + spin_lock(&mm->page_table_lock); + smaps_pgd_range(vma, vma->vm_start, vma->vm_end, &mss); + spin_unlock(&mm->page_table_lock); + } + + return show_map_internal(m, v, &mss); +} + static void *m_start(struct seq_file *m, loff_t *pos) { struct task_struct *task = m->private; unsigned long last_addr = m->version; struct mm_struct *mm; - struct vm_area_struct *map, *tail_map; + struct vm_area_struct *vma, *tail_vma; loff_t l = *pos; /* * We remember last_addr rather than next_addr to hit with * mmap_cache most of the time. We have zero last_addr at - * the begining and also after lseek. We will have -1 last_addr - * after the end of the maps. + * the beginning and also after lseek. We will have -1 last_addr + * after the end of the vmas. */ if (last_addr == -1UL) @@ -170,47 +304,47 @@ static void *m_start(struct seq_file *m, loff_t *pos) if (!mm) return NULL; - tail_map = get_gate_vma(task); + tail_vma = get_gate_vma(task); down_read(&mm->mmap_sem); /* Start with last addr hint */ - if (last_addr && (map = find_vma(mm, last_addr))) { - map = map->vm_next; + if (last_addr && (vma = find_vma(mm, last_addr))) { + vma = vma->vm_next; goto out; } /* - * Check the map index is within the range and do + * Check the vma index is within the range and do * sequential scan until m_index. */ - map = NULL; + vma = NULL; if ((unsigned long)l < mm->map_count) { - map = mm->mmap; - while (l-- && map) - map = map->vm_next; + vma = mm->mmap; + while (l-- && vma) + vma = vma->vm_next; goto out; } if (l != mm->map_count) - tail_map = NULL; /* After gate map */ + tail_vma = NULL; /* After gate vma */ out: - if (map) - return map; + if (vma) + return vma; - /* End of maps has reached */ - m->version = (tail_map != NULL)? 0: -1UL; + /* End of vmas has been reached */ + m->version = (tail_vma != NULL)? 0: -1UL; up_read(&mm->mmap_sem); mmput(mm); - return tail_map; + return tail_vma; } static void m_stop(struct seq_file *m, void *v) { struct task_struct *task = m->private; - struct vm_area_struct *map = v; - if (map && map != get_gate_vma(task)) { - struct mm_struct *mm = map->vm_mm; + struct vm_area_struct *vma = v; + if (vma && vma != get_gate_vma(task)) { + struct mm_struct *mm = vma->vm_mm; up_read(&mm->mmap_sem); mmput(mm); } @@ -219,14 +353,14 @@ static void m_stop(struct seq_file *m, void *v) static void *m_next(struct seq_file *m, void *v, loff_t *pos) { struct task_struct *task = m->private; - struct vm_area_struct *map = v; - struct vm_area_struct *tail_map = get_gate_vma(task); + struct vm_area_struct *vma = v; + struct vm_area_struct *tail_vma = get_gate_vma(task); (*pos)++; - if (map && (map != tail_map) && map->vm_next) - return map->vm_next; + if (vma && (vma != tail_vma) && vma->vm_next) + return vma->vm_next; m_stop(m, v); - return (map != tail_map)? tail_map: NULL; + return (vma != tail_vma)? tail_vma: NULL; } struct seq_operations proc_pid_maps_op = { @@ -236,6 +370,13 @@ struct seq_operations proc_pid_maps_op = { .show = show_map }; +struct seq_operations proc_pid_smaps_op = { + .start = m_start, + .next = m_next, + .stop = m_stop, + .show = show_smap +}; + #ifdef CONFIG_NUMA struct numa_maps { -- cgit v1.2.3-70-g09d2 From f549d6c18c0e8e6cf1bf0e7a47acc1daf7e2cec1 Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Sat, 3 Sep 2005 15:55:18 -0700 Subject: [PATCH] Generic VFS fallback for security xattrs This patch modifies the VFS setxattr, getxattr, and listxattr code to fall back to the security module for security xattrs if the filesystem does not support xattrs natively. This allows security modules to export the incore inode security label information to userspace even if the filesystem does not provide xattr storage, and eliminates the need to individually patch various pseudo filesystem types to provide such access. The patch removes the existing xattr code from devpts and tmpfs as it is then no longer needed. The patch restructures the code flow slightly to reduce duplication between the normal path and the fallback path, but this should only have one user-visible side effect - a program may get -EACCES rather than -EOPNOTSUPP if policy denied access but the filesystem didn't support the operation anyway. Note that the post_setxattr hook call is not needed in the fallback case, as the inode_setsecurity hook call handles the incore inode security state update directly. In contrast, we do call fsnotify in both cases. Signed-off-by: Stephen Smalley Acked-by: James Morris Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/Kconfig | 43 ----------------------- fs/devpts/Makefile | 1 - fs/devpts/inode.c | 21 ------------ fs/devpts/xattr_security.c | 47 ------------------------- fs/xattr.c | 80 ++++++++++++++++++++++++++----------------- mm/shmem.c | 85 ---------------------------------------------- 6 files changed, 49 insertions(+), 228 deletions(-) delete mode 100644 fs/devpts/xattr_security.c (limited to 'fs') diff --git a/fs/Kconfig b/fs/Kconfig index e54be705835..ed78d24ee42 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -783,28 +783,6 @@ config SYSFS Designers of embedded systems may wish to say N here to conserve space. -config DEVPTS_FS_XATTR - bool "/dev/pts Extended Attributes" - depends on UNIX98_PTYS - help - Extended attributes are name:value pairs associated with inodes by - the kernel or by users (see the attr(5) manual page, or visit - for details). - - If unsure, say N. - -config DEVPTS_FS_SECURITY - bool "/dev/pts Security Labels" - depends on DEVPTS_FS_XATTR - help - Security labels support alternative access control models - implemented by security modules like SELinux. This option - enables an extended attribute handler for file security - labels in the /dev/pts filesystem. - - If you are not using a security module that requires using - extended attributes for file security labels, say N. - config TMPFS bool "Virtual memory file system support (former shm fs)" help @@ -817,27 +795,6 @@ config TMPFS See for details. -config TMPFS_XATTR - bool "tmpfs Extended Attributes" - depends on TMPFS - help - Extended attributes are name:value pairs associated with inodes by - the kernel or by users (see the attr(5) manual page, or visit - for details). - - If unsure, say N. - -config TMPFS_SECURITY - bool "tmpfs Security Labels" - depends on TMPFS_XATTR - help - Security labels support alternative access control models - implemented by security modules like SELinux. This option - enables an extended attribute handler for file security - labels in the tmpfs filesystem. - If you are not using a security module that requires using - extended attributes for file security labels, say N. - config HUGETLBFS bool "HugeTLB file system support" depends X86 || IA64 || PPC64 || SPARC64 || SUPERH || X86_64 || BROKEN diff --git a/fs/devpts/Makefile b/fs/devpts/Makefile index 5800df2e50c..236696efcba 100644 --- a/fs/devpts/Makefile +++ b/fs/devpts/Makefile @@ -5,4 +5,3 @@ obj-$(CONFIG_UNIX98_PTYS) += devpts.o devpts-$(CONFIG_UNIX98_PTYS) := inode.o -devpts-$(CONFIG_DEVPTS_FS_SECURITY) += xattr_security.o diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 1571c8d6c23..f2be44d4491 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -18,28 +18,9 @@ #include #include #include -#include #define DEVPTS_SUPER_MAGIC 0x1cd1 -extern struct xattr_handler devpts_xattr_security_handler; - -static struct xattr_handler *devpts_xattr_handlers[] = { -#ifdef CONFIG_DEVPTS_FS_SECURITY - &devpts_xattr_security_handler, -#endif - NULL -}; - -static struct inode_operations devpts_file_inode_operations = { -#ifdef CONFIG_DEVPTS_FS_XATTR - .setxattr = generic_setxattr, - .getxattr = generic_getxattr, - .listxattr = generic_listxattr, - .removexattr = generic_removexattr, -#endif -}; - static struct vfsmount *devpts_mnt; static struct dentry *devpts_root; @@ -102,7 +83,6 @@ devpts_fill_super(struct super_block *s, void *data, int silent) s->s_blocksize_bits = 10; s->s_magic = DEVPTS_SUPER_MAGIC; s->s_op = &devpts_sops; - s->s_xattr = devpts_xattr_handlers; s->s_time_gran = 1; inode = new_inode(s); @@ -175,7 +155,6 @@ int devpts_pty_new(struct tty_struct *tty) inode->i_gid = config.setgid ? config.gid : current->fsgid; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; init_special_inode(inode, S_IFCHR|config.mode, device); - inode->i_op = &devpts_file_inode_operations; inode->u.generic_ip = tty; dentry = get_node(number); diff --git a/fs/devpts/xattr_security.c b/fs/devpts/xattr_security.c deleted file mode 100644 index 864cb5c79ba..00000000000 --- a/fs/devpts/xattr_security.c +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Security xattr support for devpts. - * - * Author: Stephen Smalley - * Copyright (c) 2004 Red Hat, Inc., James Morris - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - */ -#include -#include -#include -#include - -static size_t -devpts_xattr_security_list(struct inode *inode, char *list, size_t list_len, - const char *name, size_t name_len) -{ - return security_inode_listsecurity(inode, list, list_len); -} - -static int -devpts_xattr_security_get(struct inode *inode, const char *name, - void *buffer, size_t size) -{ - if (strcmp(name, "") == 0) - return -EINVAL; - return security_inode_getsecurity(inode, name, buffer, size); -} - -static int -devpts_xattr_security_set(struct inode *inode, const char *name, - const void *value, size_t size, int flags) -{ - if (strcmp(name, "") == 0) - return -EINVAL; - return security_inode_setsecurity(inode, name, value, size, flags); -} - -struct xattr_handler devpts_xattr_security_handler = { - .prefix = XATTR_SECURITY_PREFIX, - .list = devpts_xattr_security_list, - .get = devpts_xattr_security_get, - .set = devpts_xattr_security_set, -}; diff --git a/fs/xattr.c b/fs/xattr.c index 6acd5c63da9..dc8bc7624f2 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -51,20 +51,29 @@ setxattr(struct dentry *d, char __user *name, void __user *value, } } + down(&d->d_inode->i_sem); + error = security_inode_setxattr(d, kname, kvalue, size, flags); + if (error) + goto out; error = -EOPNOTSUPP; if (d->d_inode->i_op && d->d_inode->i_op->setxattr) { - down(&d->d_inode->i_sem); - error = security_inode_setxattr(d, kname, kvalue, size, flags); - if (error) - goto out; - error = d->d_inode->i_op->setxattr(d, kname, kvalue, size, flags); + error = d->d_inode->i_op->setxattr(d, kname, kvalue, + size, flags); if (!error) { fsnotify_xattr(d); - security_inode_post_setxattr(d, kname, kvalue, size, flags); + security_inode_post_setxattr(d, kname, kvalue, + size, flags); } -out: - up(&d->d_inode->i_sem); + } else if (!strncmp(kname, XATTR_SECURITY_PREFIX, + sizeof XATTR_SECURITY_PREFIX - 1)) { + const char *suffix = kname + sizeof XATTR_SECURITY_PREFIX - 1; + error = security_inode_setsecurity(d->d_inode, suffix, kvalue, + size, flags); + if (!error) + fsnotify_xattr(d); } +out: + up(&d->d_inode->i_sem); if (kvalue) kfree(kvalue); return error; @@ -139,20 +148,25 @@ getxattr(struct dentry *d, char __user *name, void __user *value, size_t size) return -ENOMEM; } + error = security_inode_getxattr(d, kname); + if (error) + goto out; error = -EOPNOTSUPP; - if (d->d_inode->i_op && d->d_inode->i_op->getxattr) { - error = security_inode_getxattr(d, kname); - if (error) - goto out; + if (d->d_inode->i_op && d->d_inode->i_op->getxattr) error = d->d_inode->i_op->getxattr(d, kname, kvalue, size); - if (error > 0) { - if (size && copy_to_user(value, kvalue, error)) - error = -EFAULT; - } else if (error == -ERANGE && size >= XATTR_SIZE_MAX) { - /* The file system tried to returned a value bigger - than XATTR_SIZE_MAX bytes. Not possible. */ - error = -E2BIG; - } + else if (!strncmp(kname, XATTR_SECURITY_PREFIX, + sizeof XATTR_SECURITY_PREFIX - 1)) { + const char *suffix = kname + sizeof XATTR_SECURITY_PREFIX - 1; + error = security_inode_getsecurity(d->d_inode, suffix, kvalue, + size); + } + if (error > 0) { + if (size && copy_to_user(value, kvalue, error)) + error = -EFAULT; + } else if (error == -ERANGE && size >= XATTR_SIZE_MAX) { + /* The file system tried to returned a value bigger + than XATTR_SIZE_MAX bytes. Not possible. */ + error = -E2BIG; } out: if (kvalue) @@ -221,20 +235,24 @@ listxattr(struct dentry *d, char __user *list, size_t size) return -ENOMEM; } + error = security_inode_listxattr(d); + if (error) + goto out; error = -EOPNOTSUPP; if (d->d_inode->i_op && d->d_inode->i_op->listxattr) { - error = security_inode_listxattr(d); - if (error) - goto out; error = d->d_inode->i_op->listxattr(d, klist, size); - if (error > 0) { - if (size && copy_to_user(list, klist, error)) - error = -EFAULT; - } else if (error == -ERANGE && size >= XATTR_LIST_MAX) { - /* The file system tried to returned a list bigger - than XATTR_LIST_MAX bytes. Not possible. */ - error = -E2BIG; - } + } else { + error = security_inode_listsecurity(d->d_inode, klist, size); + if (size && error >= size) + error = -ERANGE; + } + if (error > 0) { + if (size && copy_to_user(list, klist, error)) + error = -EFAULT; + } else if (error == -ERANGE && size >= XATTR_LIST_MAX) { + /* The file system tried to returned a list bigger + than XATTR_LIST_MAX bytes. Not possible. */ + error = -E2BIG; } out: if (klist) diff --git a/mm/shmem.c b/mm/shmem.c index 08a3bc2fba6..bdc4bbb6ddb 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -45,7 +45,6 @@ #include #include #include -#include #include #include #include @@ -179,7 +178,6 @@ static struct address_space_operations shmem_aops; static struct file_operations shmem_file_operations; static struct inode_operations shmem_inode_operations; static struct inode_operations shmem_dir_inode_operations; -static struct inode_operations shmem_special_inode_operations; static struct vm_operations_struct shmem_vm_ops; static struct backing_dev_info shmem_backing_dev_info = { @@ -1300,7 +1298,6 @@ shmem_get_inode(struct super_block *sb, int mode, dev_t dev) switch (mode & S_IFMT) { default: - inode->i_op = &shmem_special_inode_operations; init_special_inode(inode, mode, dev); break; case S_IFREG: @@ -1804,12 +1801,6 @@ static void shmem_put_link(struct dentry *dentry, struct nameidata *nd, void *co static struct inode_operations shmem_symlink_inline_operations = { .readlink = generic_readlink, .follow_link = shmem_follow_link_inline, -#ifdef CONFIG_TMPFS_XATTR - .setxattr = generic_setxattr, - .getxattr = generic_getxattr, - .listxattr = generic_listxattr, - .removexattr = generic_removexattr, -#endif }; static struct inode_operations shmem_symlink_inode_operations = { @@ -1817,12 +1808,6 @@ static struct inode_operations shmem_symlink_inode_operations = { .readlink = generic_readlink, .follow_link = shmem_follow_link, .put_link = shmem_put_link, -#ifdef CONFIG_TMPFS_XATTR - .setxattr = generic_setxattr, - .getxattr = generic_getxattr, - .listxattr = generic_listxattr, - .removexattr = generic_removexattr, -#endif }; static int shmem_parse_options(char *options, int *mode, uid_t *uid, gid_t *gid, unsigned long *blocks, unsigned long *inodes) @@ -1942,12 +1927,6 @@ static void shmem_put_super(struct super_block *sb) sb->s_fs_info = NULL; } -#ifdef CONFIG_TMPFS_XATTR -static struct xattr_handler *shmem_xattr_handlers[]; -#else -#define shmem_xattr_handlers NULL -#endif - static int shmem_fill_super(struct super_block *sb, void *data, int silent) { @@ -1998,7 +1977,6 @@ static int shmem_fill_super(struct super_block *sb, sb->s_blocksize_bits = PAGE_CACHE_SHIFT; sb->s_magic = TMPFS_MAGIC; sb->s_op = &shmem_ops; - sb->s_xattr = shmem_xattr_handlers; inode = shmem_get_inode(sb, S_IFDIR | mode, 0); if (!inode) @@ -2087,12 +2065,6 @@ static struct file_operations shmem_file_operations = { static struct inode_operations shmem_inode_operations = { .truncate = shmem_truncate, .setattr = shmem_notify_change, -#ifdef CONFIG_TMPFS_XATTR - .setxattr = generic_setxattr, - .getxattr = generic_getxattr, - .listxattr = generic_listxattr, - .removexattr = generic_removexattr, -#endif }; static struct inode_operations shmem_dir_inode_operations = { @@ -2106,21 +2078,6 @@ static struct inode_operations shmem_dir_inode_operations = { .rmdir = shmem_rmdir, .mknod = shmem_mknod, .rename = shmem_rename, -#ifdef CONFIG_TMPFS_XATTR - .setxattr = generic_setxattr, - .getxattr = generic_getxattr, - .listxattr = generic_listxattr, - .removexattr = generic_removexattr, -#endif -#endif -}; - -static struct inode_operations shmem_special_inode_operations = { -#ifdef CONFIG_TMPFS_XATTR - .setxattr = generic_setxattr, - .getxattr = generic_getxattr, - .listxattr = generic_listxattr, - .removexattr = generic_removexattr, #endif }; @@ -2146,48 +2103,6 @@ static struct vm_operations_struct shmem_vm_ops = { }; -#ifdef CONFIG_TMPFS_SECURITY - -static size_t shmem_xattr_security_list(struct inode *inode, char *list, size_t list_len, - const char *name, size_t name_len) -{ - return security_inode_listsecurity(inode, list, list_len); -} - -static int shmem_xattr_security_get(struct inode *inode, const char *name, void *buffer, size_t size) -{ - if (strcmp(name, "") == 0) - return -EINVAL; - return security_inode_getsecurity(inode, name, buffer, size); -} - -static int shmem_xattr_security_set(struct inode *inode, const char *name, const void *value, size_t size, int flags) -{ - if (strcmp(name, "") == 0) - return -EINVAL; - return security_inode_setsecurity(inode, name, value, size, flags); -} - -static struct xattr_handler shmem_xattr_security_handler = { - .prefix = XATTR_SECURITY_PREFIX, - .list = shmem_xattr_security_list, - .get = shmem_xattr_security_get, - .set = shmem_xattr_security_set, -}; - -#endif /* CONFIG_TMPFS_SECURITY */ - -#ifdef CONFIG_TMPFS_XATTR - -static struct xattr_handler *shmem_xattr_handlers[] = { -#ifdef CONFIG_TMPFS_SECURITY - &shmem_xattr_security_handler, -#endif - NULL -}; - -#endif /* CONFIG_TMPFS_XATTR */ - static struct super_block *shmem_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { -- cgit v1.2.3-70-g09d2 From 1e40cd383ccc7c9f8b338c56ce28c326e25eb2fe Mon Sep 17 00:00:00 2001 From: Paolo 'Blaisorblade' Giarrusso Date: Sat, 3 Sep 2005 15:57:25 -0700 Subject: [PATCH] uml: fixes performance regression in activate_mm and thus exec() Normally, activate_mm() is called from exec(), and thus it used to be a no-op because we use a completely new "MM context" on the host (for instance, a new process), and so we didn't need to flush any "TLB entries" (which for us are the set of memory mappings for the host process from the virtual "RAM" file). Kernel threads, instead, are usually handled in a different way. So, when for AIO we call use_mm(), things used to break and so Benjamin implemented activate_mm(). However, that is only needed for AIO, and could slow down exec() inside UML, so be smart: detect being called for AIO (via PF_BORROWED_MM) and do the full flush only in that situation. Comment also the caller so that people won't go breaking UML without noticing. I also rely on the caller's locks for testing current->flags. Signed-off-by: Paolo 'Blaisorblade' Giarrusso CC: Benjamin LaHaise Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/aio.c | 4 ++++ include/asm-um/mmu_context.h | 10 +++++++++- 2 files changed, 13 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/aio.c b/fs/aio.c index 06d7d4390fe..4f641abac3c 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -567,6 +567,10 @@ static void use_mm(struct mm_struct *mm) atomic_inc(&mm->mm_count); tsk->mm = mm; tsk->active_mm = mm; + /* + * Note that on UML this *requires* PF_BORROWED_MM to be set, otherwise + * it won't work. Update it accordingly if you change it here + */ activate_mm(active_mm, mm); task_unlock(tsk); diff --git a/include/asm-um/mmu_context.h b/include/asm-um/mmu_context.h index 095bb627b96..2edb4f1f789 100644 --- a/include/asm-um/mmu_context.h +++ b/include/asm-um/mmu_context.h @@ -20,7 +20,15 @@ extern void force_flush_all(void); static inline void activate_mm(struct mm_struct *old, struct mm_struct *new) { - if (old != new) + /* + * This is called by fs/exec.c and fs/aio.c. In the first case, for an + * exec, we don't need to do anything as we're called from userspace + * and thus going to use a new host PID. In the second, we're called + * from a kernel thread, and thus need to go doing the mmap's on the + * host. Since they're very expensive, we want to avoid that as far as + * possible. + */ + if (old != new && (current->flags & PF_BORROWED_MM)) force_flush_all(); } -- cgit v1.2.3-70-g09d2 From e82894f84dbba130ab46c97748c03647f8204f92 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Tue, 6 Sep 2005 15:16:30 -0700 Subject: [PATCH] relayfs Here's the latest version of relayfs, against linux-2.6.11-mm2. I'm hoping you'll consider putting this version back into your tree - the previous rounds of comment seem to have shaken out all the API issues and the number of comments on the code itself have also steadily dwindled. This patch is essentially the same as the relayfs redux part 5 patch, with some minor changes based on reviewer comments. Thanks again to Pekka Enberg for those. The patch size without documentation is now a little smaller at just over 40k. Here's a detailed list of the changes: - removed the attribute_flags in relay open and changed it to a boolean specifying either overwrite or no-overwrite mode, and removed everything referencing the attribute flags. - added a check for NULL names in relayfs_create_entry() - got rid of the unnecessary multiple labels in relay_create_buf() - some minor simplification of relay_alloc_buf() which got rid of a couple params - updated the Documentation In addition, this version (through code contained in the relay-apps tarball linked to below, not as part of the relayfs patch) tries to make it as easy as possible to create the cooperating kernel/user pieces of a typical and common type of logging application, one where kernel logging is kicked off when a user space data collection app starts and stops when the collection app exits, with the data being automatically logged to disk in between. To create this type of application, you basically just include a header file (relay-app.h, included in the relay-apps tarball) in your kernel module, define a couple of callbacks and call an initialization function, and on the user side call a single function that sets up and continuously monitors the buffers, and writes data to files as it becomes available. Channels are created when the collection app is started and destroyed when it exits, not when the kernel module is inserted, so different channel buffer sizes can be specified for each separate run via command-line options. See the README in the relay-apps tarball for details. Also included in the relay-apps tarball are a couple examples demonstrating how you can use this to create quick and dirty kernel logging/debugging applications. They are: - tprintk, short for 'tee printk', which temporarily puts a kprobe on printk() and writes a duplicate stream of printk output to a relayfs channel. This could be used anywhere there's printk() debugging code in the kernel which you'd like to exercise, but would rather not have your system logs cluttered with debugging junk. You'd probably want to kill klogd while you do this, otherwise there wouldn't be much point (since putting a kprobe on printk() doesn't change the output of printk()). I've used this method to temporarily divert the packet logging output of the iptables LOG target from the system logs to relayfs files instead, for instance. - klog, which just provides a printk-like formatted logging function on top of relayfs. Again, you can use this to keep stuff out of your system logs if used in place of printk. The example applications can be found here: http://prdownloads.sourceforge.net/dprobes/relay-apps.tar.gz?download From: Christoph Hellwig avoid lookup_hash usage in relayfs Signed-off-by: Tom Zanussi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/filesystems/relayfs.txt | 362 ++++++++++++++++++++ fs/Kconfig | 12 + fs/Makefile | 1 + fs/relayfs/Makefile | 4 + fs/relayfs/buffers.c | 189 +++++++++++ fs/relayfs/buffers.h | 12 + fs/relayfs/inode.c | 609 ++++++++++++++++++++++++++++++++++ fs/relayfs/relay.c | 431 ++++++++++++++++++++++++ fs/relayfs/relay.h | 12 + include/linux/relayfs_fs.h | 255 ++++++++++++++ 10 files changed, 1887 insertions(+) create mode 100644 Documentation/filesystems/relayfs.txt create mode 100644 fs/relayfs/Makefile create mode 100644 fs/relayfs/buffers.c create mode 100644 fs/relayfs/buffers.h create mode 100644 fs/relayfs/inode.c create mode 100644 fs/relayfs/relay.c create mode 100644 fs/relayfs/relay.h create mode 100644 include/linux/relayfs_fs.h (limited to 'fs') diff --git a/Documentation/filesystems/relayfs.txt b/Documentation/filesystems/relayfs.txt new file mode 100644 index 00000000000..d24e1b0d4f3 --- /dev/null +++ b/Documentation/filesystems/relayfs.txt @@ -0,0 +1,362 @@ + +relayfs - a high-speed data relay filesystem +============================================ + +relayfs is a filesystem designed to provide an efficient mechanism for +tools and facilities to relay large and potentially sustained streams +of data from kernel space to user space. + +The main abstraction of relayfs is the 'channel'. A channel consists +of a set of per-cpu kernel buffers each represented by a file in the +relayfs filesystem. Kernel clients write into a channel using +efficient write functions which automatically log to the current cpu's +channel buffer. User space applications mmap() the per-cpu files and +retrieve the data as it becomes available. + +The format of the data logged into the channel buffers is completely +up to the relayfs client; relayfs does however provide hooks which +allow clients to impose some stucture on the buffer data. Nor does +relayfs implement any form of data filtering - this also is left to +the client. The purpose is to keep relayfs as simple as possible. + +This document provides an overview of the relayfs API. The details of +the function parameters are documented along with the functions in the +filesystem code - please see that for details. + +Semantics +========= + +Each relayfs channel has one buffer per CPU, each buffer has one or +more sub-buffers. Messages are written to the first sub-buffer until +it is too full to contain a new message, in which case it it is +written to the next (if available). Messages are never split across +sub-buffers. At this point, userspace can be notified so it empties +the first sub-buffer, while the kernel continues writing to the next. + +When notified that a sub-buffer is full, the kernel knows how many +bytes of it are padding i.e. unused. Userspace can use this knowledge +to copy only valid data. + +After copying it, userspace can notify the kernel that a sub-buffer +has been consumed. + +relayfs can operate in a mode where it will overwrite data not yet +collected by userspace, and not wait for it to consume it. + +relayfs itself does not provide for communication of such data between +userspace and kernel, allowing the kernel side to remain simple and not +impose a single interface on userspace. It does provide a separate +helper though, described below. + +klog, relay-app & librelay +========================== + +relayfs itself is ready to use, but to make things easier, two +additional systems are provided. klog is a simple wrapper to make +writing formatted text or raw data to a channel simpler, regardless of +whether a channel to write into exists or not, or whether relayfs is +compiled into the kernel or is configured as a module. relay-app is +the kernel counterpart of userspace librelay.c, combined these two +files provide glue to easily stream data to disk, without having to +bother with housekeeping. klog and relay-app can be used together, +with klog providing high-level logging functions to the kernel and +relay-app taking care of kernel-user control and disk-logging chores. + +It is possible to use relayfs without relay-app & librelay, but you'll +have to implement communication between userspace and kernel, allowing +both to convey the state of buffers (full, empty, amount of padding). + +klog, relay-app and librelay can be found in the relay-apps tarball on +http://relayfs.sourceforge.net + +The relayfs user space API +========================== + +relayfs implements basic file operations for user space access to +relayfs channel buffer data. Here are the file operations that are +available and some comments regarding their behavior: + +open() enables user to open an _existing_ buffer. + +mmap() results in channel buffer being mapped into the caller's + memory space. Note that you can't do a partial mmap - you must + map the entire file, which is NRBUF * SUBBUFSIZE. + +read() read the contents of a channel buffer. The bytes read are + 'consumed' by the reader i.e. they won't be available again + to subsequent reads. If the channel is being used in + no-overwrite mode (the default), it can be read at any time + even if there's an active kernel writer. If the channel is + being used in overwrite mode and there are active channel + writers, results may be unpredictable - users should make + sure that all logging to the channel has ended before using + read() with overwrite mode. + +poll() POLLIN/POLLRDNORM/POLLERR supported. User applications are + notified when sub-buffer boundaries are crossed. + +close() decrements the channel buffer's refcount. When the refcount + reaches 0 i.e. when no process or kernel client has the buffer + open, the channel buffer is freed. + + +In order for a user application to make use of relayfs files, the +relayfs filesystem must be mounted. For example, + + mount -t relayfs relayfs /mnt/relay + +NOTE: relayfs doesn't need to be mounted for kernel clients to create + or use channels - it only needs to be mounted when user space + applications need access to the buffer data. + + +The relayfs kernel API +====================== + +Here's a summary of the API relayfs provides to in-kernel clients: + + + channel management functions: + + relay_open(base_filename, parent, subbuf_size, n_subbufs, + callbacks) + relay_close(chan) + relay_flush(chan) + relay_reset(chan) + relayfs_create_dir(name, parent) + relayfs_remove_dir(dentry) + + channel management typically called on instigation of userspace: + + relay_subbufs_consumed(chan, cpu, subbufs_consumed) + + write functions: + + relay_write(chan, data, length) + __relay_write(chan, data, length) + relay_reserve(chan, length) + + callbacks: + + subbuf_start(buf, subbuf, prev_subbuf, prev_padding) + buf_mapped(buf, filp) + buf_unmapped(buf, filp) + + helper functions: + + relay_buf_full(buf) + subbuf_start_reserve(buf, length) + + +Creating a channel +------------------ + +relay_open() is used to create a channel, along with its per-cpu +channel buffers. Each channel buffer will have an associated file +created for it in the relayfs filesystem, which can be opened and +mmapped from user space if desired. The files are named +basename0...basenameN-1 where N is the number of online cpus, and by +default will be created in the root of the filesystem. If you want a +directory structure to contain your relayfs files, you can create it +with relayfs_create_dir() and pass the parent directory to +relay_open(). Clients are responsible for cleaning up any directory +structure they create when the channel is closed - use +relayfs_remove_dir() for that. + +The total size of each per-cpu buffer is calculated by multiplying the +number of sub-buffers by the sub-buffer size passed into relay_open(). +The idea behind sub-buffers is that they're basically an extension of +double-buffering to N buffers, and they also allow applications to +easily implement random-access-on-buffer-boundary schemes, which can +be important for some high-volume applications. The number and size +of sub-buffers is completely dependent on the application and even for +the same application, different conditions will warrant different +values for these parameters at different times. Typically, the right +values to use are best decided after some experimentation; in general, +though, it's safe to assume that having only 1 sub-buffer is a bad +idea - you're guaranteed to either overwrite data or lose events +depending on the channel mode being used. + +Channel 'modes' +--------------- + +relayfs channels can be used in either of two modes - 'overwrite' or +'no-overwrite'. The mode is entirely determined by the implementation +of the subbuf_start() callback, as described below. In 'overwrite' +mode, also known as 'flight recorder' mode, writes continuously cycle +around the buffer and will never fail, but will unconditionally +overwrite old data regardless of whether it's actually been consumed. +In no-overwrite mode, writes will fail i.e. data will be lost, if the +number of unconsumed sub-buffers equals the total number of +sub-buffers in the channel. It should be clear that if there is no +consumer or if the consumer can't consume sub-buffers fast enought, +data will be lost in either case; the only difference is whether data +is lost from the beginning or the end of a buffer. + +As explained above, a relayfs channel is made of up one or more +per-cpu channel buffers, each implemented as a circular buffer +subdivided into one or more sub-buffers. Messages are written into +the current sub-buffer of the channel's current per-cpu buffer via the +write functions described below. Whenever a message can't fit into +the current sub-buffer, because there's no room left for it, the +client is notified via the subbuf_start() callback that a switch to a +new sub-buffer is about to occur. The client uses this callback to 1) +initialize the next sub-buffer if appropriate 2) finalize the previous +sub-buffer if appropriate and 3) return a boolean value indicating +whether or not to actually go ahead with the sub-buffer switch. + +To implement 'no-overwrite' mode, the userspace client would provide +an implementation of the subbuf_start() callback something like the +following: + +static int subbuf_start(struct rchan_buf *buf, + void *subbuf, + void *prev_subbuf, + unsigned int prev_padding) +{ + if (prev_subbuf) + *((unsigned *)prev_subbuf) = prev_padding; + + if (relay_buf_full(buf)) + return 0; + + subbuf_start_reserve(buf, sizeof(unsigned int)); + + return 1; +} + +If the current buffer is full i.e. all sub-buffers remain unconsumed, +the callback returns 0 to indicate that the buffer switch should not +occur yet i.e. until the consumer has had a chance to read the current +set of ready sub-buffers. For the relay_buf_full() function to make +sense, the consumer is reponsible for notifying relayfs when +sub-buffers have been consumed via relay_subbufs_consumed(). Any +subsequent attempts to write into the buffer will again invoke the +subbuf_start() callback with the same parameters; only when the +consumer has consumed one or more of the ready sub-buffers will +relay_buf_full() return 0, in which case the buffer switch can +continue. + +The implementation of the subbuf_start() callback for 'overwrite' mode +would be very similar: + +static int subbuf_start(struct rchan_buf *buf, + void *subbuf, + void *prev_subbuf, + unsigned int prev_padding) +{ + if (prev_subbuf) + *((unsigned *)prev_subbuf) = prev_padding; + + subbuf_start_reserve(buf, sizeof(unsigned int)); + + return 1; +} + +In this case, the relay_buf_full() check is meaningless and the +callback always returns 1, causing the buffer switch to occur +unconditionally. It's also meaningless for the client to use the +relay_subbufs_consumed() function in this mode, as it's never +consulted. + +The default subbuf_start() implementation, used if the client doesn't +define any callbacks, or doesn't define the subbuf_start() callback, +implements the simplest possible 'no-overwrite' mode i.e. it does +nothing but return 0. + +Header information can be reserved at the beginning of each sub-buffer +by calling the subbuf_start_reserve() helper function from within the +subbuf_start() callback. This reserved area can be used to store +whatever information the client wants. In the example above, room is +reserved in each sub-buffer to store the padding count for that +sub-buffer. This is filled in for the previous sub-buffer in the +subbuf_start() implementation; the padding value for the previous +sub-buffer is passed into the subbuf_start() callback along with a +pointer to the previous sub-buffer, since the padding value isn't +known until a sub-buffer is filled. The subbuf_start() callback is +also called for the first sub-buffer when the channel is opened, to +give the client a chance to reserve space in it. In this case the +previous sub-buffer pointer passed into the callback will be NULL, so +the client should check the value of the prev_subbuf pointer before +writing into the previous sub-buffer. + +Writing to a channel +-------------------- + +kernel clients write data into the current cpu's channel buffer using +relay_write() or __relay_write(). relay_write() is the main logging +function - it uses local_irqsave() to protect the buffer and should be +used if you might be logging from interrupt context. If you know +you'll never be logging from interrupt context, you can use +__relay_write(), which only disables preemption. These functions +don't return a value, so you can't determine whether or not they +failed - the assumption is that you wouldn't want to check a return +value in the fast logging path anyway, and that they'll always succeed +unless the buffer is full and no-overwrite mode is being used, in +which case you can detect a failed write in the subbuf_start() +callback by calling the relay_buf_full() helper function. + +relay_reserve() is used to reserve a slot in a channel buffer which +can be written to later. This would typically be used in applications +that need to write directly into a channel buffer without having to +stage data in a temporary buffer beforehand. Because the actual write +may not happen immediately after the slot is reserved, applications +using relay_reserve() can keep a count of the number of bytes actually +written, either in space reserved in the sub-buffers themselves or as +a separate array. See the 'reserve' example in the relay-apps tarball +at http://relayfs.sourceforge.net for an example of how this can be +done. Because the write is under control of the client and is +separated from the reserve, relay_reserve() doesn't protect the buffer +at all - it's up to the client to provide the appropriate +synchronization when using relay_reserve(). + +Closing a channel +----------------- + +The client calls relay_close() when it's finished using the channel. +The channel and its associated buffers are destroyed when there are no +longer any references to any of the channel buffers. relay_flush() +forces a sub-buffer switch on all the channel buffers, and can be used +to finalize and process the last sub-buffers before the channel is +closed. + +Misc +---- + +Some applications may want to keep a channel around and re-use it +rather than open and close a new channel for each use. relay_reset() +can be used for this purpose - it resets a channel to its initial +state without reallocating channel buffer memory or destroying +existing mappings. It should however only be called when it's safe to +do so i.e. when the channel isn't currently being written to. + +Finally, there are a couple of utility callbacks that can be used for +different purposes. buf_mapped() is called whenever a channel buffer +is mmapped from user space and buf_unmapped() is called when it's +unmapped. The client can use this notification to trigger actions +within the kernel application, such as enabling/disabling logging to +the channel. + + +Resources +========= + +For news, example code, mailing list, etc. see the relayfs homepage: + + http://relayfs.sourceforge.net + + +Credits +======= + +The ideas and specs for relayfs came about as a result of discussions +on tracing involving the following: + +Michel Dagenais +Richard Moore +Bob Wisniewski +Karim Yaghmour +Tom Zanussi + +Also thanks to Hubertus Franke for a lot of useful suggestions and bug +reports. diff --git a/fs/Kconfig b/fs/Kconfig index ed78d24ee42..740d6ff0367 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -816,6 +816,18 @@ config RAMFS To compile this as a module, choose M here: the module will be called ramfs. +config RELAYFS_FS + tristate "Relayfs file system support" + ---help--- + Relayfs is a high-speed data relay filesystem designed to provide + an efficient mechanism for tools and facilities to relay large + amounts of data from kernel space to user space. + + To compile this code as a module, choose M here: the module will be + called relayfs. + + If unsure, say N. + endmenu menu "Miscellaneous filesystems" diff --git a/fs/Makefile b/fs/Makefile index cf95eb894fd..15158309dee 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -90,6 +90,7 @@ obj-$(CONFIG_AUTOFS_FS) += autofs/ obj-$(CONFIG_AUTOFS4_FS) += autofs4/ obj-$(CONFIG_ADFS_FS) += adfs/ obj-$(CONFIG_UDF_FS) += udf/ +obj-$(CONFIG_RELAYFS_FS) += relayfs/ obj-$(CONFIG_SUN_OPENPROMFS) += openpromfs/ obj-$(CONFIG_JFS_FS) += jfs/ obj-$(CONFIG_XFS_FS) += xfs/ diff --git a/fs/relayfs/Makefile b/fs/relayfs/Makefile new file mode 100644 index 00000000000..e76e182cdb3 --- /dev/null +++ b/fs/relayfs/Makefile @@ -0,0 +1,4 @@ +obj-$(CONFIG_RELAYFS_FS) += relayfs.o + +relayfs-y := relay.o inode.o buffers.o + diff --git a/fs/relayfs/buffers.c b/fs/relayfs/buffers.c new file mode 100644 index 00000000000..2aa8e271999 --- /dev/null +++ b/fs/relayfs/buffers.c @@ -0,0 +1,189 @@ +/* + * RelayFS buffer management code. + * + * Copyright (C) 2002-2005 - Tom Zanussi (zanussi@us.ibm.com), IBM Corp + * Copyright (C) 1999-2005 - Karim Yaghmour (karim@opersys.com) + * + * This file is released under the GPL. + */ + +#include +#include +#include +#include +#include "relay.h" +#include "buffers.h" + +/* + * close() vm_op implementation for relayfs file mapping. + */ +static void relay_file_mmap_close(struct vm_area_struct *vma) +{ + struct rchan_buf *buf = vma->vm_private_data; + buf->chan->cb->buf_unmapped(buf, vma->vm_file); +} + +/* + * nopage() vm_op implementation for relayfs file mapping. + */ +static struct page *relay_buf_nopage(struct vm_area_struct *vma, + unsigned long address, + int *type) +{ + struct page *page; + struct rchan_buf *buf = vma->vm_private_data; + unsigned long offset = address - vma->vm_start; + + if (address > vma->vm_end) + return NOPAGE_SIGBUS; /* Disallow mremap */ + if (!buf) + return NOPAGE_OOM; + + page = vmalloc_to_page(buf->start + offset); + if (!page) + return NOPAGE_OOM; + get_page(page); + + if (type) + *type = VM_FAULT_MINOR; + + return page; +} + +/* + * vm_ops for relay file mappings. + */ +static struct vm_operations_struct relay_file_mmap_ops = { + .nopage = relay_buf_nopage, + .close = relay_file_mmap_close, +}; + +/** + * relay_mmap_buf: - mmap channel buffer to process address space + * @buf: relay channel buffer + * @vma: vm_area_struct describing memory to be mapped + * + * Returns 0 if ok, negative on error + * + * Caller should already have grabbed mmap_sem. + */ +int relay_mmap_buf(struct rchan_buf *buf, struct vm_area_struct *vma) +{ + unsigned long length = vma->vm_end - vma->vm_start; + struct file *filp = vma->vm_file; + + if (!buf) + return -EBADF; + + if (length != (unsigned long)buf->chan->alloc_size) + return -EINVAL; + + vma->vm_ops = &relay_file_mmap_ops; + vma->vm_private_data = buf; + buf->chan->cb->buf_mapped(buf, filp); + + return 0; +} + +/** + * relay_alloc_buf - allocate a channel buffer + * @buf: the buffer struct + * @size: total size of the buffer + * + * Returns a pointer to the resulting buffer, NULL if unsuccessful + */ +static void *relay_alloc_buf(struct rchan_buf *buf, unsigned long size) +{ + void *mem; + unsigned int i, j, n_pages; + + size = PAGE_ALIGN(size); + n_pages = size >> PAGE_SHIFT; + + buf->page_array = kcalloc(n_pages, sizeof(struct page *), GFP_KERNEL); + if (!buf->page_array) + return NULL; + + for (i = 0; i < n_pages; i++) { + buf->page_array[i] = alloc_page(GFP_KERNEL); + if (unlikely(!buf->page_array[i])) + goto depopulate; + } + mem = vmap(buf->page_array, n_pages, GFP_KERNEL, PAGE_KERNEL); + if (!mem) + goto depopulate; + + memset(mem, 0, size); + buf->page_count = n_pages; + return mem; + +depopulate: + for (j = 0; j < i; j++) + __free_page(buf->page_array[j]); + kfree(buf->page_array); + return NULL; +} + +/** + * relay_create_buf - allocate and initialize a channel buffer + * @alloc_size: size of the buffer to allocate + * @n_subbufs: number of sub-buffers in the channel + * + * Returns channel buffer if successful, NULL otherwise + */ +struct rchan_buf *relay_create_buf(struct rchan *chan) +{ + struct rchan_buf *buf = kcalloc(1, sizeof(struct rchan_buf), GFP_KERNEL); + if (!buf) + return NULL; + + buf->padding = kmalloc(chan->n_subbufs * sizeof(size_t *), GFP_KERNEL); + if (!buf->padding) + goto free_buf; + + buf->start = relay_alloc_buf(buf, chan->alloc_size); + if (!buf->start) + goto free_buf; + + buf->chan = chan; + kref_get(&buf->chan->kref); + return buf; + +free_buf: + kfree(buf->padding); + kfree(buf); + return NULL; +} + +/** + * relay_destroy_buf - destroy an rchan_buf struct and associated buffer + * @buf: the buffer struct + */ +void relay_destroy_buf(struct rchan_buf *buf) +{ + struct rchan *chan = buf->chan; + unsigned int i; + + if (likely(buf->start)) { + vunmap(buf->start); + for (i = 0; i < buf->page_count; i++) + __free_page(buf->page_array[i]); + kfree(buf->page_array); + } + kfree(buf->padding); + kfree(buf); + kref_put(&chan->kref, relay_destroy_channel); +} + +/** + * relay_remove_buf - remove a channel buffer + * + * Removes the file from the relayfs fileystem, which also frees the + * rchan_buf_struct and the channel buffer. Should only be called from + * kref_put(). + */ +void relay_remove_buf(struct kref *kref) +{ + struct rchan_buf *buf = container_of(kref, struct rchan_buf, kref); + relayfs_remove(buf->dentry); +} diff --git a/fs/relayfs/buffers.h b/fs/relayfs/buffers.h new file mode 100644 index 00000000000..37a12493f64 --- /dev/null +++ b/fs/relayfs/buffers.h @@ -0,0 +1,12 @@ +#ifndef _BUFFERS_H +#define _BUFFERS_H + +/* This inspired by rtai/shmem */ +#define FIX_SIZE(x) (((x) - 1) & PAGE_MASK) + PAGE_SIZE + +extern int relay_mmap_buf(struct rchan_buf *buf, struct vm_area_struct *vma); +extern struct rchan_buf *relay_create_buf(struct rchan *chan); +extern void relay_destroy_buf(struct rchan_buf *buf); +extern void relay_remove_buf(struct kref *kref); + +#endif/* _BUFFERS_H */ diff --git a/fs/relayfs/inode.c b/fs/relayfs/inode.c new file mode 100644 index 00000000000..0f7f88d067a --- /dev/null +++ b/fs/relayfs/inode.c @@ -0,0 +1,609 @@ +/* + * VFS-related code for RelayFS, a high-speed data relay filesystem. + * + * Copyright (C) 2003-2005 - Tom Zanussi , IBM Corp + * Copyright (C) 2003-2005 - Karim Yaghmour + * + * Based on ramfs, Copyright (C) 2002 - Linus Torvalds + * + * This file is released under the GPL. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "relay.h" +#include "buffers.h" + +#define RELAYFS_MAGIC 0xF0B4A981 + +static struct vfsmount * relayfs_mount; +static int relayfs_mount_count; +static kmem_cache_t * relayfs_inode_cachep; + +static struct backing_dev_info relayfs_backing_dev_info = { + .ra_pages = 0, /* No readahead */ + .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, +}; + +static struct inode *relayfs_get_inode(struct super_block *sb, int mode, + struct rchan *chan) +{ + struct rchan_buf *buf = NULL; + struct inode *inode; + + if (S_ISREG(mode)) { + BUG_ON(!chan); + buf = relay_create_buf(chan); + if (!buf) + return NULL; + } + + inode = new_inode(sb); + if (!inode) { + relay_destroy_buf(buf); + return NULL; + } + + inode->i_mode = mode; + inode->i_uid = 0; + inode->i_gid = 0; + inode->i_blksize = PAGE_CACHE_SIZE; + inode->i_blocks = 0; + inode->i_mapping->backing_dev_info = &relayfs_backing_dev_info; + inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + switch (mode & S_IFMT) { + case S_IFREG: + inode->i_fop = &relayfs_file_operations; + RELAYFS_I(inode)->buf = buf; + break; + case S_IFDIR: + inode->i_op = &simple_dir_inode_operations; + inode->i_fop = &simple_dir_operations; + + /* directory inodes start off with i_nlink == 2 (for "." entry) */ + inode->i_nlink++; + break; + default: + break; + } + + return inode; +} + +/** + * relayfs_create_entry - create a relayfs directory or file + * @name: the name of the file to create + * @parent: parent directory + * @mode: mode + * @chan: relay channel associated with the file + * + * Returns the new dentry, NULL on failure + * + * Creates a file or directory with the specifed permissions. + */ +static struct dentry *relayfs_create_entry(const char *name, + struct dentry *parent, + int mode, + struct rchan *chan) +{ + struct dentry *d; + struct inode *inode; + int error = 0; + + BUG_ON(!name || !(S_ISREG(mode) || S_ISDIR(mode))); + + error = simple_pin_fs("relayfs", &relayfs_mount, &relayfs_mount_count); + if (error) { + printk(KERN_ERR "Couldn't mount relayfs: errcode %d\n", error); + return NULL; + } + + if (!parent && relayfs_mount && relayfs_mount->mnt_sb) + parent = relayfs_mount->mnt_sb->s_root; + + if (!parent) { + simple_release_fs(&relayfs_mount, &relayfs_mount_count); + return NULL; + } + + parent = dget(parent); + down(&parent->d_inode->i_sem); + d = lookup_one_len(name, parent, strlen(name)); + if (IS_ERR(d)) { + d = NULL; + goto release_mount; + } + + if (d->d_inode) { + d = NULL; + goto release_mount; + } + + inode = relayfs_get_inode(parent->d_inode->i_sb, mode, chan); + if (!inode) { + d = NULL; + goto release_mount; + } + + d_instantiate(d, inode); + dget(d); /* Extra count - pin the dentry in core */ + + if (S_ISDIR(mode)) + parent->d_inode->i_nlink++; + + goto exit; + +release_mount: + simple_release_fs(&relayfs_mount, &relayfs_mount_count); + +exit: + up(&parent->d_inode->i_sem); + dput(parent); + return d; +} + +/** + * relayfs_create_file - create a file in the relay filesystem + * @name: the name of the file to create + * @parent: parent directory + * @mode: mode, if not specied the default perms are used + * @chan: channel associated with the file + * + * Returns file dentry if successful, NULL otherwise. + * + * The file will be created user r on behalf of current user. + */ +struct dentry *relayfs_create_file(const char *name, struct dentry *parent, + int mode, struct rchan *chan) +{ + if (!mode) + mode = S_IRUSR; + mode = (mode & S_IALLUGO) | S_IFREG; + + return relayfs_create_entry(name, parent, mode, chan); +} + +/** + * relayfs_create_dir - create a directory in the relay filesystem + * @name: the name of the directory to create + * @parent: parent directory, NULL if parent should be fs root + * + * Returns directory dentry if successful, NULL otherwise. + * + * The directory will be created world rwx on behalf of current user. + */ +struct dentry *relayfs_create_dir(const char *name, struct dentry *parent) +{ + int mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO; + return relayfs_create_entry(name, parent, mode, NULL); +} + +/** + * relayfs_remove - remove a file or directory in the relay filesystem + * @dentry: file or directory dentry + * + * Returns 0 if successful, negative otherwise. + */ +int relayfs_remove(struct dentry *dentry) +{ + struct dentry *parent; + int error = 0; + + if (!dentry) + return -EINVAL; + parent = dentry->d_parent; + if (!parent) + return -EINVAL; + + parent = dget(parent); + down(&parent->d_inode->i_sem); + if (dentry->d_inode) { + if (S_ISDIR(dentry->d_inode->i_mode)) + error = simple_rmdir(parent->d_inode, dentry); + else + error = simple_unlink(parent->d_inode, dentry); + if (!error) + d_delete(dentry); + } + if (!error) + dput(dentry); + up(&parent->d_inode->i_sem); + dput(parent); + + if (!error) + simple_release_fs(&relayfs_mount, &relayfs_mount_count); + + return error; +} + +/** + * relayfs_remove_dir - remove a directory in the relay filesystem + * @dentry: directory dentry + * + * Returns 0 if successful, negative otherwise. + */ +int relayfs_remove_dir(struct dentry *dentry) +{ + return relayfs_remove(dentry); +} + +/** + * relayfs_open - open file op for relayfs files + * @inode: the inode + * @filp: the file + * + * Increments the channel buffer refcount. + */ +static int relayfs_open(struct inode *inode, struct file *filp) +{ + struct rchan_buf *buf = RELAYFS_I(inode)->buf; + kref_get(&buf->kref); + + return 0; +} + +/** + * relayfs_mmap - mmap file op for relayfs files + * @filp: the file + * @vma: the vma describing what to map + * + * Calls upon relay_mmap_buf to map the file into user space. + */ +static int relayfs_mmap(struct file *filp, struct vm_area_struct *vma) +{ + struct inode *inode = filp->f_dentry->d_inode; + return relay_mmap_buf(RELAYFS_I(inode)->buf, vma); +} + +/** + * relayfs_poll - poll file op for relayfs files + * @filp: the file + * @wait: poll table + * + * Poll implemention. + */ +static unsigned int relayfs_poll(struct file *filp, poll_table *wait) +{ + unsigned int mask = 0; + struct inode *inode = filp->f_dentry->d_inode; + struct rchan_buf *buf = RELAYFS_I(inode)->buf; + + if (buf->finalized) + return POLLERR; + + if (filp->f_mode & FMODE_READ) { + poll_wait(filp, &buf->read_wait, wait); + if (!relay_buf_empty(buf)) + mask |= POLLIN | POLLRDNORM; + } + + return mask; +} + +/** + * relayfs_release - release file op for relayfs files + * @inode: the inode + * @filp: the file + * + * Decrements the channel refcount, as the filesystem is + * no longer using it. + */ +static int relayfs_release(struct inode *inode, struct file *filp) +{ + struct rchan_buf *buf = RELAYFS_I(inode)->buf; + kref_put(&buf->kref, relay_remove_buf); + + return 0; +} + +/** + * relayfs_read_consume - update the consumed count for the buffer + */ +static void relayfs_read_consume(struct rchan_buf *buf, + size_t read_pos, + size_t bytes_consumed) +{ + size_t subbuf_size = buf->chan->subbuf_size; + size_t n_subbufs = buf->chan->n_subbufs; + size_t read_subbuf; + + if (buf->bytes_consumed + bytes_consumed > subbuf_size) { + relay_subbufs_consumed(buf->chan, buf->cpu, 1); + buf->bytes_consumed = 0; + } + + buf->bytes_consumed += bytes_consumed; + read_subbuf = read_pos / buf->chan->subbuf_size; + if (buf->bytes_consumed + buf->padding[read_subbuf] == subbuf_size) { + if ((read_subbuf == buf->subbufs_produced % n_subbufs) && + (buf->offset == subbuf_size)) + return; + relay_subbufs_consumed(buf->chan, buf->cpu, 1); + buf->bytes_consumed = 0; + } +} + +/** + * relayfs_read_avail - boolean, are there unconsumed bytes available? + */ +static int relayfs_read_avail(struct rchan_buf *buf, size_t read_pos) +{ + size_t bytes_produced, bytes_consumed, write_offset; + size_t subbuf_size = buf->chan->subbuf_size; + size_t n_subbufs = buf->chan->n_subbufs; + size_t produced = buf->subbufs_produced % n_subbufs; + size_t consumed = buf->subbufs_consumed % n_subbufs; + + write_offset = buf->offset > subbuf_size ? subbuf_size : buf->offset; + + if (consumed > produced) { + if ((produced > n_subbufs) && + (produced + n_subbufs - consumed <= n_subbufs)) + produced += n_subbufs; + } else if (consumed == produced) { + if (buf->offset > subbuf_size) { + produced += n_subbufs; + if (buf->subbufs_produced == buf->subbufs_consumed) + consumed += n_subbufs; + } + } + + if (buf->offset > subbuf_size) + bytes_produced = (produced - 1) * subbuf_size + write_offset; + else + bytes_produced = produced * subbuf_size + write_offset; + bytes_consumed = consumed * subbuf_size + buf->bytes_consumed; + + if (bytes_produced == bytes_consumed) + return 0; + + relayfs_read_consume(buf, read_pos, 0); + + return 1; +} + +/** + * relayfs_read_subbuf_avail - return bytes available in sub-buffer + */ +static size_t relayfs_read_subbuf_avail(size_t read_pos, + struct rchan_buf *buf) +{ + size_t padding, avail = 0; + size_t read_subbuf, read_offset, write_subbuf, write_offset; + size_t subbuf_size = buf->chan->subbuf_size; + + write_subbuf = (buf->data - buf->start) / subbuf_size; + write_offset = buf->offset > subbuf_size ? subbuf_size : buf->offset; + read_subbuf = read_pos / subbuf_size; + read_offset = read_pos % subbuf_size; + padding = buf->padding[read_subbuf]; + + if (read_subbuf == write_subbuf) { + if (read_offset + padding < write_offset) + avail = write_offset - (read_offset + padding); + } else + avail = (subbuf_size - padding) - read_offset; + + return avail; +} + +/** + * relayfs_read_start_pos - find the first available byte to read + * + * If the read_pos is in the middle of padding, return the + * position of the first actually available byte, otherwise + * return the original value. + */ +static size_t relayfs_read_start_pos(size_t read_pos, + struct rchan_buf *buf) +{ + size_t read_subbuf, padding, padding_start, padding_end; + size_t subbuf_size = buf->chan->subbuf_size; + size_t n_subbufs = buf->chan->n_subbufs; + + read_subbuf = read_pos / subbuf_size; + padding = buf->padding[read_subbuf]; + padding_start = (read_subbuf + 1) * subbuf_size - padding; + padding_end = (read_subbuf + 1) * subbuf_size; + if (read_pos >= padding_start && read_pos < padding_end) { + read_subbuf = (read_subbuf + 1) % n_subbufs; + read_pos = read_subbuf * subbuf_size; + } + + return read_pos; +} + +/** + * relayfs_read_end_pos - return the new read position + */ +static size_t relayfs_read_end_pos(struct rchan_buf *buf, + size_t read_pos, + size_t count) +{ + size_t read_subbuf, padding, end_pos; + size_t subbuf_size = buf->chan->subbuf_size; + size_t n_subbufs = buf->chan->n_subbufs; + + read_subbuf = read_pos / subbuf_size; + padding = buf->padding[read_subbuf]; + if (read_pos % subbuf_size + count + padding == subbuf_size) + end_pos = (read_subbuf + 1) * subbuf_size; + else + end_pos = read_pos + count; + if (end_pos >= subbuf_size * n_subbufs) + end_pos = 0; + + return end_pos; +} + +/** + * relayfs_read - read file op for relayfs files + * @filp: the file + * @buffer: the userspace buffer + * @count: number of bytes to read + * @ppos: position to read from + * + * Reads count bytes or the number of bytes available in the + * current sub-buffer being read, whichever is smaller. + */ +static ssize_t relayfs_read(struct file *filp, + char __user *buffer, + size_t count, + loff_t *ppos) +{ + struct inode *inode = filp->f_dentry->d_inode; + struct rchan_buf *buf = RELAYFS_I(inode)->buf; + size_t read_start, avail; + ssize_t ret = 0; + void *from; + + down(&inode->i_sem); + if(!relayfs_read_avail(buf, *ppos)) + goto out; + + read_start = relayfs_read_start_pos(*ppos, buf); + avail = relayfs_read_subbuf_avail(read_start, buf); + if (!avail) + goto out; + + from = buf->start + read_start; + ret = count = min(count, avail); + if (copy_to_user(buffer, from, count)) { + ret = -EFAULT; + goto out; + } + relayfs_read_consume(buf, read_start, count); + *ppos = relayfs_read_end_pos(buf, read_start, count); +out: + up(&inode->i_sem); + return ret; +} + +/** + * relayfs alloc_inode() implementation + */ +static struct inode *relayfs_alloc_inode(struct super_block *sb) +{ + struct relayfs_inode_info *p = kmem_cache_alloc(relayfs_inode_cachep, SLAB_KERNEL); + if (!p) + return NULL; + p->buf = NULL; + + return &p->vfs_inode; +} + +/** + * relayfs destroy_inode() implementation + */ +static void relayfs_destroy_inode(struct inode *inode) +{ + if (RELAYFS_I(inode)->buf) + relay_destroy_buf(RELAYFS_I(inode)->buf); + + kmem_cache_free(relayfs_inode_cachep, RELAYFS_I(inode)); +} + +static void init_once(void *p, kmem_cache_t *cachep, unsigned long flags) +{ + struct relayfs_inode_info *i = p; + if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) == SLAB_CTOR_CONSTRUCTOR) + inode_init_once(&i->vfs_inode); +} + +struct file_operations relayfs_file_operations = { + .open = relayfs_open, + .poll = relayfs_poll, + .mmap = relayfs_mmap, + .read = relayfs_read, + .llseek = no_llseek, + .release = relayfs_release, +}; + +static struct super_operations relayfs_ops = { + .statfs = simple_statfs, + .drop_inode = generic_delete_inode, + .alloc_inode = relayfs_alloc_inode, + .destroy_inode = relayfs_destroy_inode, +}; + +static int relayfs_fill_super(struct super_block * sb, void * data, int silent) +{ + struct inode *inode; + struct dentry *root; + int mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO; + + sb->s_blocksize = PAGE_CACHE_SIZE; + sb->s_blocksize_bits = PAGE_CACHE_SHIFT; + sb->s_magic = RELAYFS_MAGIC; + sb->s_op = &relayfs_ops; + inode = relayfs_get_inode(sb, mode, NULL); + + if (!inode) + return -ENOMEM; + + root = d_alloc_root(inode); + if (!root) { + iput(inode); + return -ENOMEM; + } + sb->s_root = root; + + return 0; +} + +static struct super_block * relayfs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, + void *data) +{ + return get_sb_single(fs_type, flags, data, relayfs_fill_super); +} + +static struct file_system_type relayfs_fs_type = { + .owner = THIS_MODULE, + .name = "relayfs", + .get_sb = relayfs_get_sb, + .kill_sb = kill_litter_super, +}; + +static int __init init_relayfs_fs(void) +{ + int err; + + relayfs_inode_cachep = kmem_cache_create("relayfs_inode_cache", + sizeof(struct relayfs_inode_info), 0, + 0, init_once, NULL); + if (!relayfs_inode_cachep) + return -ENOMEM; + + err = register_filesystem(&relayfs_fs_type); + if (err) + kmem_cache_destroy(relayfs_inode_cachep); + + return err; +} + +static void __exit exit_relayfs_fs(void) +{ + unregister_filesystem(&relayfs_fs_type); + kmem_cache_destroy(relayfs_inode_cachep); +} + +module_init(init_relayfs_fs) +module_exit(exit_relayfs_fs) + +EXPORT_SYMBOL_GPL(relayfs_file_operations); +EXPORT_SYMBOL_GPL(relayfs_create_dir); +EXPORT_SYMBOL_GPL(relayfs_remove_dir); + +MODULE_AUTHOR("Tom Zanussi and Karim Yaghmour "); +MODULE_DESCRIPTION("Relay Filesystem"); +MODULE_LICENSE("GPL"); + diff --git a/fs/relayfs/relay.c b/fs/relayfs/relay.c new file mode 100644 index 00000000000..16446a15c96 --- /dev/null +++ b/fs/relayfs/relay.c @@ -0,0 +1,431 @@ +/* + * Public API and common code for RelayFS. + * + * See Documentation/filesystems/relayfs.txt for an overview of relayfs. + * + * Copyright (C) 2002-2005 - Tom Zanussi (zanussi@us.ibm.com), IBM Corp + * Copyright (C) 1999-2005 - Karim Yaghmour (karim@opersys.com) + * + * This file is released under the GPL. + */ + +#include +#include +#include +#include +#include +#include +#include "relay.h" +#include "buffers.h" + +/** + * relay_buf_empty - boolean, is the channel buffer empty? + * @buf: channel buffer + * + * Returns 1 if the buffer is empty, 0 otherwise. + */ +int relay_buf_empty(struct rchan_buf *buf) +{ + return (buf->subbufs_produced - buf->subbufs_consumed) ? 0 : 1; +} + +/** + * relay_buf_full - boolean, is the channel buffer full? + * @buf: channel buffer + * + * Returns 1 if the buffer is full, 0 otherwise. + */ +int relay_buf_full(struct rchan_buf *buf) +{ + size_t ready = buf->subbufs_produced - buf->subbufs_consumed; + return (ready >= buf->chan->n_subbufs) ? 1 : 0; +} + +/* + * High-level relayfs kernel API and associated functions. + */ + +/* + * rchan_callback implementations defining default channel behavior. Used + * in place of corresponding NULL values in client callback struct. + */ + +/* + * subbuf_start() default callback. Does nothing. + */ +static int subbuf_start_default_callback (struct rchan_buf *buf, + void *subbuf, + void *prev_subbuf, + size_t prev_padding) +{ + if (relay_buf_full(buf)) + return 0; + + return 1; +} + +/* + * buf_mapped() default callback. Does nothing. + */ +static void buf_mapped_default_callback(struct rchan_buf *buf, + struct file *filp) +{ +} + +/* + * buf_unmapped() default callback. Does nothing. + */ +static void buf_unmapped_default_callback(struct rchan_buf *buf, + struct file *filp) +{ +} + +/* relay channel default callbacks */ +static struct rchan_callbacks default_channel_callbacks = { + .subbuf_start = subbuf_start_default_callback, + .buf_mapped = buf_mapped_default_callback, + .buf_unmapped = buf_unmapped_default_callback, +}; + +/** + * wakeup_readers - wake up readers waiting on a channel + * @private: the channel buffer + * + * This is the work function used to defer reader waking. The + * reason waking is deferred is that calling directly from write + * causes problems if you're writing from say the scheduler. + */ +static void wakeup_readers(void *private) +{ + struct rchan_buf *buf = private; + wake_up_interruptible(&buf->read_wait); +} + +/** + * __relay_reset - reset a channel buffer + * @buf: the channel buffer + * @init: 1 if this is a first-time initialization + * + * See relay_reset for description of effect. + */ +static inline void __relay_reset(struct rchan_buf *buf, unsigned int init) +{ + size_t i; + + if (init) { + init_waitqueue_head(&buf->read_wait); + kref_init(&buf->kref); + INIT_WORK(&buf->wake_readers, NULL, NULL); + } else { + cancel_delayed_work(&buf->wake_readers); + flush_scheduled_work(); + } + + buf->subbufs_produced = 0; + buf->subbufs_consumed = 0; + buf->bytes_consumed = 0; + buf->finalized = 0; + buf->data = buf->start; + buf->offset = 0; + + for (i = 0; i < buf->chan->n_subbufs; i++) + buf->padding[i] = 0; + + buf->chan->cb->subbuf_start(buf, buf->data, NULL, 0); +} + +/** + * relay_reset - reset the channel + * @chan: the channel + * + * This has the effect of erasing all data from all channel buffers + * and restarting the channel in its initial state. The buffers + * are not freed, so any mappings are still in effect. + * + * NOTE: Care should be taken that the channel isn't actually + * being used by anything when this call is made. + */ +void relay_reset(struct rchan *chan) +{ + unsigned int i; + + if (!chan) + return; + + for (i = 0; i < NR_CPUS; i++) { + if (!chan->buf[i]) + continue; + __relay_reset(chan->buf[i], 0); + } +} + +/** + * relay_open_buf - create a new channel buffer in relayfs + * + * Internal - used by relay_open(). + */ +static struct rchan_buf *relay_open_buf(struct rchan *chan, + const char *filename, + struct dentry *parent) +{ + struct rchan_buf *buf; + struct dentry *dentry; + + /* Create file in fs */ + dentry = relayfs_create_file(filename, parent, S_IRUSR, chan); + if (!dentry) + return NULL; + + buf = RELAYFS_I(dentry->d_inode)->buf; + buf->dentry = dentry; + __relay_reset(buf, 1); + + return buf; +} + +/** + * relay_close_buf - close a channel buffer + * @buf: channel buffer + * + * Marks the buffer finalized and restores the default callbacks. + * The channel buffer and channel buffer data structure are then freed + * automatically when the last reference is given up. + */ +static inline void relay_close_buf(struct rchan_buf *buf) +{ + buf->finalized = 1; + buf->chan->cb = &default_channel_callbacks; + cancel_delayed_work(&buf->wake_readers); + flush_scheduled_work(); + kref_put(&buf->kref, relay_remove_buf); +} + +static inline void setup_callbacks(struct rchan *chan, + struct rchan_callbacks *cb) +{ + if (!cb) { + chan->cb = &default_channel_callbacks; + return; + } + + if (!cb->subbuf_start) + cb->subbuf_start = subbuf_start_default_callback; + if (!cb->buf_mapped) + cb->buf_mapped = buf_mapped_default_callback; + if (!cb->buf_unmapped) + cb->buf_unmapped = buf_unmapped_default_callback; + chan->cb = cb; +} + +/** + * relay_open - create a new relayfs channel + * @base_filename: base name of files to create + * @parent: dentry of parent directory, NULL for root directory + * @subbuf_size: size of sub-buffers + * @n_subbufs: number of sub-buffers + * @cb: client callback functions + * + * Returns channel pointer if successful, NULL otherwise. + * + * Creates a channel buffer for each cpu using the sizes and + * attributes specified. The created channel buffer files + * will be named base_filename0...base_filenameN-1. File + * permissions will be S_IRUSR. + */ +struct rchan *relay_open(const char *base_filename, + struct dentry *parent, + size_t subbuf_size, + size_t n_subbufs, + struct rchan_callbacks *cb) +{ + unsigned int i; + struct rchan *chan; + char *tmpname; + + if (!base_filename) + return NULL; + + if (!(subbuf_size && n_subbufs)) + return NULL; + + chan = kcalloc(1, sizeof(struct rchan), GFP_KERNEL); + if (!chan) + return NULL; + + chan->version = RELAYFS_CHANNEL_VERSION; + chan->n_subbufs = n_subbufs; + chan->subbuf_size = subbuf_size; + chan->alloc_size = FIX_SIZE(subbuf_size * n_subbufs); + setup_callbacks(chan, cb); + kref_init(&chan->kref); + + tmpname = kmalloc(NAME_MAX + 1, GFP_KERNEL); + if (!tmpname) + goto free_chan; + + for_each_online_cpu(i) { + sprintf(tmpname, "%s%d", base_filename, i); + chan->buf[i] = relay_open_buf(chan, tmpname, parent); + chan->buf[i]->cpu = i; + if (!chan->buf[i]) + goto free_bufs; + } + + kfree(tmpname); + return chan; + +free_bufs: + for (i = 0; i < NR_CPUS; i++) { + if (!chan->buf[i]) + break; + relay_close_buf(chan->buf[i]); + } + kfree(tmpname); + +free_chan: + kref_put(&chan->kref, relay_destroy_channel); + return NULL; +} + +/** + * relay_switch_subbuf - switch to a new sub-buffer + * @buf: channel buffer + * @length: size of current event + * + * Returns either the length passed in or 0 if full. + + * Performs sub-buffer-switch tasks such as invoking callbacks, + * updating padding counts, waking up readers, etc. + */ +size_t relay_switch_subbuf(struct rchan_buf *buf, size_t length) +{ + void *old, *new; + size_t old_subbuf, new_subbuf; + + if (unlikely(length > buf->chan->subbuf_size)) + goto toobig; + + if (buf->offset != buf->chan->subbuf_size + 1) { + buf->prev_padding = buf->chan->subbuf_size - buf->offset; + old_subbuf = buf->subbufs_produced % buf->chan->n_subbufs; + buf->padding[old_subbuf] = buf->prev_padding; + buf->subbufs_produced++; + if (waitqueue_active(&buf->read_wait)) { + PREPARE_WORK(&buf->wake_readers, wakeup_readers, buf); + schedule_delayed_work(&buf->wake_readers, 1); + } + } + + old = buf->data; + new_subbuf = buf->subbufs_produced % buf->chan->n_subbufs; + new = buf->start + new_subbuf * buf->chan->subbuf_size; + buf->offset = 0; + if (!buf->chan->cb->subbuf_start(buf, new, old, buf->prev_padding)) { + buf->offset = buf->chan->subbuf_size + 1; + return 0; + } + buf->data = new; + buf->padding[new_subbuf] = 0; + + if (unlikely(length + buf->offset > buf->chan->subbuf_size)) + goto toobig; + + return length; + +toobig: + printk(KERN_WARNING "relayfs: event too large (%Zd)\n", length); + WARN_ON(1); + return 0; +} + +/** + * relay_subbufs_consumed - update the buffer's sub-buffers-consumed count + * @chan: the channel + * @cpu: the cpu associated with the channel buffer to update + * @subbufs_consumed: number of sub-buffers to add to current buf's count + * + * Adds to the channel buffer's consumed sub-buffer count. + * subbufs_consumed should be the number of sub-buffers newly consumed, + * not the total consumed. + * + * NOTE: kernel clients don't need to call this function if the channel + * mode is 'overwrite'. + */ +void relay_subbufs_consumed(struct rchan *chan, + unsigned int cpu, + size_t subbufs_consumed) +{ + struct rchan_buf *buf; + + if (!chan) + return; + + if (cpu >= NR_CPUS || !chan->buf[cpu]) + return; + + buf = chan->buf[cpu]; + buf->subbufs_consumed += subbufs_consumed; + if (buf->subbufs_consumed > buf->subbufs_produced) + buf->subbufs_consumed = buf->subbufs_produced; +} + +/** + * relay_destroy_channel - free the channel struct + * + * Should only be called from kref_put(). + */ +void relay_destroy_channel(struct kref *kref) +{ + struct rchan *chan = container_of(kref, struct rchan, kref); + kfree(chan); +} + +/** + * relay_close - close the channel + * @chan: the channel + * + * Closes all channel buffers and frees the channel. + */ +void relay_close(struct rchan *chan) +{ + unsigned int i; + + if (!chan) + return; + + for (i = 0; i < NR_CPUS; i++) { + if (!chan->buf[i]) + continue; + relay_close_buf(chan->buf[i]); + } + + kref_put(&chan->kref, relay_destroy_channel); +} + +/** + * relay_flush - close the channel + * @chan: the channel + * + * Flushes all channel buffers i.e. forces buffer switch. + */ +void relay_flush(struct rchan *chan) +{ + unsigned int i; + + if (!chan) + return; + + for (i = 0; i < NR_CPUS; i++) { + if (!chan->buf[i]) + continue; + relay_switch_subbuf(chan->buf[i], 0); + } +} + +EXPORT_SYMBOL_GPL(relay_open); +EXPORT_SYMBOL_GPL(relay_close); +EXPORT_SYMBOL_GPL(relay_flush); +EXPORT_SYMBOL_GPL(relay_reset); +EXPORT_SYMBOL_GPL(relay_subbufs_consumed); +EXPORT_SYMBOL_GPL(relay_switch_subbuf); +EXPORT_SYMBOL_GPL(relay_buf_full); diff --git a/fs/relayfs/relay.h b/fs/relayfs/relay.h new file mode 100644 index 00000000000..703503fa22b --- /dev/null +++ b/fs/relayfs/relay.h @@ -0,0 +1,12 @@ +#ifndef _RELAY_H +#define _RELAY_H + +struct dentry *relayfs_create_file(const char *name, + struct dentry *parent, + int mode, + struct rchan *chan); +extern int relayfs_remove(struct dentry *dentry); +extern int relay_buf_empty(struct rchan_buf *buf); +extern void relay_destroy_channel(struct kref *kref); + +#endif /* _RELAY_H */ diff --git a/include/linux/relayfs_fs.h b/include/linux/relayfs_fs.h new file mode 100644 index 00000000000..cfafc3e76bc --- /dev/null +++ b/include/linux/relayfs_fs.h @@ -0,0 +1,255 @@ +/* + * linux/include/linux/relayfs_fs.h + * + * Copyright (C) 2002, 2003 - Tom Zanussi (zanussi@us.ibm.com), IBM Corp + * Copyright (C) 1999, 2000, 2001, 2002 - Karim Yaghmour (karim@opersys.com) + * + * RelayFS definitions and declarations + */ + +#ifndef _LINUX_RELAYFS_FS_H +#define _LINUX_RELAYFS_FS_H + +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Tracks changes to rchan_buf struct + */ +#define RELAYFS_CHANNEL_VERSION 5 + +/* + * Per-cpu relay channel buffer + */ +struct rchan_buf +{ + void *start; /* start of channel buffer */ + void *data; /* start of current sub-buffer */ + size_t offset; /* current offset into sub-buffer */ + size_t subbufs_produced; /* count of sub-buffers produced */ + size_t subbufs_consumed; /* count of sub-buffers consumed */ + struct rchan *chan; /* associated channel */ + wait_queue_head_t read_wait; /* reader wait queue */ + struct work_struct wake_readers; /* reader wake-up work struct */ + struct dentry *dentry; /* channel file dentry */ + struct kref kref; /* channel buffer refcount */ + struct page **page_array; /* array of current buffer pages */ + unsigned int page_count; /* number of current buffer pages */ + unsigned int finalized; /* buffer has been finalized */ + size_t *padding; /* padding counts per sub-buffer */ + size_t prev_padding; /* temporary variable */ + size_t bytes_consumed; /* bytes consumed in cur read subbuf */ + unsigned int cpu; /* this buf's cpu */ +} ____cacheline_aligned; + +/* + * Relay channel data structure + */ +struct rchan +{ + u32 version; /* the version of this struct */ + size_t subbuf_size; /* sub-buffer size */ + size_t n_subbufs; /* number of sub-buffers per buffer */ + size_t alloc_size; /* total buffer size allocated */ + struct rchan_callbacks *cb; /* client callbacks */ + struct kref kref; /* channel refcount */ + void *private_data; /* for user-defined data */ + struct rchan_buf *buf[NR_CPUS]; /* per-cpu channel buffers */ +}; + +/* + * Relayfs inode + */ +struct relayfs_inode_info +{ + struct inode vfs_inode; + struct rchan_buf *buf; +}; + +static inline struct relayfs_inode_info *RELAYFS_I(struct inode *inode) +{ + return container_of(inode, struct relayfs_inode_info, vfs_inode); +} + +/* + * Relay channel client callbacks + */ +struct rchan_callbacks +{ + /* + * subbuf_start - called on buffer-switch to a new sub-buffer + * @buf: the channel buffer containing the new sub-buffer + * @subbuf: the start of the new sub-buffer + * @prev_subbuf: the start of the previous sub-buffer + * @prev_padding: unused space at the end of previous sub-buffer + * + * The client should return 1 to continue logging, 0 to stop + * logging. + * + * NOTE: subbuf_start will also be invoked when the buffer is + * created, so that the first sub-buffer can be initialized + * if necessary. In this case, prev_subbuf will be NULL. + * + * NOTE: the client can reserve bytes at the beginning of the new + * sub-buffer by calling subbuf_start_reserve() in this callback. + */ + int (*subbuf_start) (struct rchan_buf *buf, + void *subbuf, + void *prev_subbuf, + size_t prev_padding); + + /* + * buf_mapped - relayfs buffer mmap notification + * @buf: the channel buffer + * @filp: relayfs file pointer + * + * Called when a relayfs file is successfully mmapped + */ + void (*buf_mapped)(struct rchan_buf *buf, + struct file *filp); + + /* + * buf_unmapped - relayfs buffer unmap notification + * @buf: the channel buffer + * @filp: relayfs file pointer + * + * Called when a relayfs file is successfully unmapped + */ + void (*buf_unmapped)(struct rchan_buf *buf, + struct file *filp); +}; + +/* + * relayfs kernel API, fs/relayfs/relay.c + */ + +struct rchan *relay_open(const char *base_filename, + struct dentry *parent, + size_t subbuf_size, + size_t n_subbufs, + struct rchan_callbacks *cb); +extern void relay_close(struct rchan *chan); +extern void relay_flush(struct rchan *chan); +extern void relay_subbufs_consumed(struct rchan *chan, + unsigned int cpu, + size_t consumed); +extern void relay_reset(struct rchan *chan); +extern int relay_buf_full(struct rchan_buf *buf); + +extern size_t relay_switch_subbuf(struct rchan_buf *buf, + size_t length); +extern struct dentry *relayfs_create_dir(const char *name, + struct dentry *parent); +extern int relayfs_remove_dir(struct dentry *dentry); + +/** + * relay_write - write data into the channel + * @chan: relay channel + * @data: data to be written + * @length: number of bytes to write + * + * Writes data into the current cpu's channel buffer. + * + * Protects the buffer by disabling interrupts. Use this + * if you might be logging from interrupt context. Try + * __relay_write() if you know you won't be logging from + * interrupt context. + */ +static inline void relay_write(struct rchan *chan, + const void *data, + size_t length) +{ + unsigned long flags; + struct rchan_buf *buf; + + local_irq_save(flags); + buf = chan->buf[smp_processor_id()]; + if (unlikely(buf->offset + length > chan->subbuf_size)) + length = relay_switch_subbuf(buf, length); + memcpy(buf->data + buf->offset, data, length); + buf->offset += length; + local_irq_restore(flags); +} + +/** + * __relay_write - write data into the channel + * @chan: relay channel + * @data: data to be written + * @length: number of bytes to write + * + * Writes data into the current cpu's channel buffer. + * + * Protects the buffer by disabling preemption. Use + * relay_write() if you might be logging from interrupt + * context. + */ +static inline void __relay_write(struct rchan *chan, + const void *data, + size_t length) +{ + struct rchan_buf *buf; + + buf = chan->buf[get_cpu()]; + if (unlikely(buf->offset + length > buf->chan->subbuf_size)) + length = relay_switch_subbuf(buf, length); + memcpy(buf->data + buf->offset, data, length); + buf->offset += length; + put_cpu(); +} + +/** + * relay_reserve - reserve slot in channel buffer + * @chan: relay channel + * @length: number of bytes to reserve + * + * Returns pointer to reserved slot, NULL if full. + * + * Reserves a slot in the current cpu's channel buffer. + * Does not protect the buffer at all - caller must provide + * appropriate synchronization. + */ +static inline void *relay_reserve(struct rchan *chan, size_t length) +{ + void *reserved; + struct rchan_buf *buf = chan->buf[smp_processor_id()]; + + if (unlikely(buf->offset + length > buf->chan->subbuf_size)) { + length = relay_switch_subbuf(buf, length); + if (!length) + return NULL; + } + reserved = buf->data + buf->offset; + buf->offset += length; + + return reserved; +} + +/** + * subbuf_start_reserve - reserve bytes at the start of a sub-buffer + * @buf: relay channel buffer + * @length: number of bytes to reserve + * + * Helper function used to reserve bytes at the beginning of + * a sub-buffer in the subbuf_start() callback. + */ +static inline void subbuf_start_reserve(struct rchan_buf *buf, + size_t length) +{ + BUG_ON(length >= buf->chan->subbuf_size - 1); + buf->offset = length; +} + +/* + * exported relayfs file operations, fs/relayfs/inode.c + */ + +extern struct file_operations relayfs_file_operations; + +#endif /* _LINUX_RELAYFS_FS_H */ + -- cgit v1.2.3-70-g09d2 From 820249bafe441dce5336ad544a5e709df42fceb5 Mon Sep 17 00:00:00 2001 From: John McCutchan Date: Tue, 6 Sep 2005 15:16:38 -0700 Subject: [PATCH] inotify speedup Bypass an inotify-related fastpath spinlock and several function calls on systems which have no inotify watches registered. Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/inotify.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'fs') diff --git a/fs/inotify.c b/fs/inotify.c index 2e4e2a57708..2fd97ef547f 100644 --- a/fs/inotify.c +++ b/fs/inotify.c @@ -37,6 +37,7 @@ #include static atomic_t inotify_cookie; +static atomic_t inotify_watches; static kmem_cache_t *watch_cachep; static kmem_cache_t *event_cachep; @@ -422,6 +423,7 @@ static struct inotify_watch *create_watch(struct inotify_device *dev, get_inotify_watch(watch); atomic_inc(&dev->user->inotify_watches); + atomic_inc(&inotify_watches); return watch; } @@ -454,6 +456,7 @@ static void remove_watch_no_event(struct inotify_watch *watch, list_del(&watch->d_list); atomic_dec(&dev->user->inotify_watches); + atomic_dec(&inotify_watches); idr_remove(&dev->idr, watch->wd); put_inotify_watch(watch); } @@ -532,6 +535,9 @@ void inotify_dentry_parent_queue_event(struct dentry *dentry, u32 mask, struct dentry *parent; struct inode *inode; + if (!atomic_read (&inotify_watches)) + return; + spin_lock(&dentry->d_lock); parent = dentry->d_parent; inode = parent->d_inode; @@ -1043,6 +1049,7 @@ static int __init inotify_setup(void) inotify_max_user_watches = 8192; atomic_set(&inotify_cookie, 0); + atomic_set(&inotify_watches, 0); watch_cachep = kmem_cache_create("inotify_watch_cache", sizeof(struct inotify_watch), -- cgit v1.2.3-70-g09d2 From 202e5979af4d91c7ca05892641131dee22653259 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 6 Sep 2005 15:16:40 -0700 Subject: [PATCH] compat: be more consistent about [ug]id_t When I first wrote the compat layer patches, I was somewhat cavalier about the definition of compat_uid_t and compat_gid_t (or maybe I just misunderstood :-)). This patch makes the compat types much more consistent with the types we are being compatible with and hopefully will fix a few bugs along the way. compat type type in compat arch __compat_[ug]id_t __kernel_[ug]id_t __compat_[ug]id32_t __kernel_[ug]id32_t compat_[ug]id_t [ug]id_t The difference is that compat_uid_t is always 32 bits (for the archs we care about) but __compat_uid_t may be 16 bits on some. Signed-off-by: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/mips/kernel/linux32.c | 16 ++++++++-------- fs/compat.c | 16 ++++++++-------- include/asm-ia64/compat.h | 20 ++++++++++---------- include/asm-mips/compat.h | 10 ++++++---- include/asm-parisc/compat.h | 10 ++++++---- include/asm-ppc64/compat.h | 18 ++++++++++-------- include/asm-s390/compat.h | 20 ++++++++++---------- include/asm-sparc64/compat.h | 18 ++++++++++-------- include/asm-x86_64/compat.h | 20 ++++++++++---------- include/linux/compat.h | 3 +++ ipc/compat.c | 12 ++++++------ 11 files changed, 87 insertions(+), 76 deletions(-) (limited to 'fs') diff --git a/arch/mips/kernel/linux32.c b/arch/mips/kernel/linux32.c index 4613219dd73..ece4564919d 100644 --- a/arch/mips/kernel/linux32.c +++ b/arch/mips/kernel/linux32.c @@ -546,20 +546,20 @@ struct msgbuf32 { s32 mtype; char mtext[1]; }; struct ipc_perm32 { key_t key; - compat_uid_t uid; - compat_gid_t gid; - compat_uid_t cuid; - compat_gid_t cgid; + __compat_uid_t uid; + __compat_gid_t gid; + __compat_uid_t cuid; + __compat_gid_t cgid; compat_mode_t mode; unsigned short seq; }; struct ipc64_perm32 { key_t key; - compat_uid_t uid; - compat_gid_t gid; - compat_uid_t cuid; - compat_gid_t cgid; + __compat_uid_t uid; + __compat_gid_t gid; + __compat_uid_t cuid; + __compat_gid_t cgid; compat_mode_t mode; unsigned short seq; unsigned short __pad1; diff --git a/fs/compat.c b/fs/compat.c index 6b06b6bae35..8e03d31eec3 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -720,14 +720,14 @@ compat_sys_io_submit(aio_context_t ctx_id, int nr, u32 __user *iocb) struct compat_ncp_mount_data { compat_int_t version; compat_uint_t ncp_fd; - compat_uid_t mounted_uid; + __compat_uid_t mounted_uid; compat_pid_t wdog_pid; unsigned char mounted_vol[NCP_VOLNAME_LEN + 1]; compat_uint_t time_out; compat_uint_t retry_count; compat_uint_t flags; - compat_uid_t uid; - compat_gid_t gid; + __compat_uid_t uid; + __compat_gid_t gid; compat_mode_t file_mode; compat_mode_t dir_mode; }; @@ -784,9 +784,9 @@ static void *do_ncp_super_data_conv(void *raw_data) struct compat_smb_mount_data { compat_int_t version; - compat_uid_t mounted_uid; - compat_uid_t uid; - compat_gid_t gid; + __compat_uid_t mounted_uid; + __compat_uid_t uid; + __compat_gid_t gid; compat_mode_t file_mode; compat_mode_t dir_mode; }; @@ -1808,8 +1808,8 @@ struct compat_nfsctl_export { compat_dev_t ex32_dev; compat_ino_t ex32_ino; compat_int_t ex32_flags; - compat_uid_t ex32_anon_uid; - compat_gid_t ex32_anon_gid; + __compat_uid_t ex32_anon_uid; + __compat_gid_t ex32_anon_gid; }; struct compat_nfsctl_fdparm { diff --git a/include/asm-ia64/compat.h b/include/asm-ia64/compat.h index 0c05e5bad8a..aaf11f4e916 100644 --- a/include/asm-ia64/compat.h +++ b/include/asm-ia64/compat.h @@ -13,10 +13,10 @@ typedef s32 compat_time_t; typedef s32 compat_clock_t; typedef s32 compat_key_t; typedef s32 compat_pid_t; -typedef u16 compat_uid_t; -typedef u16 compat_gid_t; -typedef u32 compat_uid32_t; -typedef u32 compat_gid32_t; +typedef u16 __compat_uid_t; +typedef u16 __compat_gid_t; +typedef u32 __compat_uid32_t; +typedef u32 __compat_gid32_t; typedef u16 compat_mode_t; typedef u32 compat_ino_t; typedef u16 compat_dev_t; @@ -50,8 +50,8 @@ struct compat_stat { compat_ino_t st_ino; compat_mode_t st_mode; compat_nlink_t st_nlink; - compat_uid_t st_uid; - compat_gid_t st_gid; + __compat_uid_t st_uid; + __compat_gid_t st_gid; compat_dev_t st_rdev; u16 __pad2; u32 st_size; @@ -120,10 +120,10 @@ typedef u32 compat_sigset_word; struct compat_ipc64_perm { compat_key_t key; - compat_uid32_t uid; - compat_gid32_t gid; - compat_uid32_t cuid; - compat_gid32_t cgid; + __compat_uid32_t uid; + __compat_gid32_t gid; + __compat_uid32_t cuid; + __compat_gid32_t cgid; unsigned short mode; unsigned short __pad1; unsigned short seq; diff --git a/include/asm-mips/compat.h b/include/asm-mips/compat.h index d78002afb1e..2c084cd4bc0 100644 --- a/include/asm-mips/compat.h +++ b/include/asm-mips/compat.h @@ -15,8 +15,10 @@ typedef s32 compat_clock_t; typedef s32 compat_suseconds_t; typedef s32 compat_pid_t; -typedef s32 compat_uid_t; -typedef s32 compat_gid_t; +typedef u32 __compat_uid_t; +typedef u32 __compat_gid_t; +typedef u32 __compat_uid32_t; +typedef u32 __compat_gid32_t; typedef u32 compat_mode_t; typedef u32 compat_ino_t; typedef u32 compat_dev_t; @@ -52,8 +54,8 @@ struct compat_stat { compat_ino_t st_ino; compat_mode_t st_mode; compat_nlink_t st_nlink; - compat_uid_t st_uid; - compat_gid_t st_gid; + __compat_uid32_t st_uid; + __compat_gid32_t st_gid; compat_dev_t st_rdev; s32 st_pad2[2]; compat_off_t st_size; diff --git a/include/asm-parisc/compat.h b/include/asm-parisc/compat.h index 7630d1ad239..38b918feead 100644 --- a/include/asm-parisc/compat.h +++ b/include/asm-parisc/compat.h @@ -13,8 +13,10 @@ typedef s32 compat_ssize_t; typedef s32 compat_time_t; typedef s32 compat_clock_t; typedef s32 compat_pid_t; -typedef u32 compat_uid_t; -typedef u32 compat_gid_t; +typedef u32 __compat_uid_t; +typedef u32 __compat_gid_t; +typedef u32 __compat_uid32_t; +typedef u32 __compat_gid32_t; typedef u16 compat_mode_t; typedef u32 compat_ino_t; typedef u32 compat_dev_t; @@ -67,8 +69,8 @@ struct compat_stat { compat_dev_t st_realdev; u16 st_basemode; u16 st_spareshort; - compat_uid_t st_uid; - compat_gid_t st_gid; + __compat_uid32_t st_uid; + __compat_gid32_t st_gid; u32 st_spare4[3]; }; diff --git a/include/asm-ppc64/compat.h b/include/asm-ppc64/compat.h index 12414f5fc66..6ec62cd2d1d 100644 --- a/include/asm-ppc64/compat.h +++ b/include/asm-ppc64/compat.h @@ -13,8 +13,10 @@ typedef s32 compat_ssize_t; typedef s32 compat_time_t; typedef s32 compat_clock_t; typedef s32 compat_pid_t; -typedef u32 compat_uid_t; -typedef u32 compat_gid_t; +typedef u32 __compat_uid_t; +typedef u32 __compat_gid_t; +typedef u32 __compat_uid32_t; +typedef u32 __compat_gid32_t; typedef u32 compat_mode_t; typedef u32 compat_ino_t; typedef u32 compat_dev_t; @@ -48,8 +50,8 @@ struct compat_stat { compat_ino_t st_ino; compat_mode_t st_mode; compat_nlink_t st_nlink; - compat_uid_t st_uid; - compat_gid_t st_gid; + __compat_uid32_t st_uid; + __compat_gid32_t st_gid; compat_dev_t st_rdev; compat_off_t st_size; compat_off_t st_blksize; @@ -144,10 +146,10 @@ static inline void __user *compat_alloc_user_space(long len) */ struct compat_ipc64_perm { compat_key_t key; - compat_uid_t uid; - compat_gid_t gid; - compat_uid_t cuid; - compat_gid_t cgid; + __compat_uid_t uid; + __compat_gid_t gid; + __compat_uid_t cuid; + __compat_gid_t cgid; compat_mode_t mode; unsigned int seq; unsigned int __pad2; diff --git a/include/asm-s390/compat.h b/include/asm-s390/compat.h index 7f8f544eb26..a007715f4ae 100644 --- a/include/asm-s390/compat.h +++ b/include/asm-s390/compat.h @@ -13,10 +13,10 @@ typedef s32 compat_ssize_t; typedef s32 compat_time_t; typedef s32 compat_clock_t; typedef s32 compat_pid_t; -typedef u16 compat_uid_t; -typedef u16 compat_gid_t; -typedef u32 compat_uid32_t; -typedef u32 compat_gid32_t; +typedef u16 __compat_uid_t; +typedef u16 __compat_gid_t; +typedef u32 __compat_uid32_t; +typedef u32 __compat_gid32_t; typedef u16 compat_mode_t; typedef u32 compat_ino_t; typedef u16 compat_dev_t; @@ -51,8 +51,8 @@ struct compat_stat { compat_ino_t st_ino; compat_mode_t st_mode; compat_nlink_t st_nlink; - compat_uid_t st_uid; - compat_gid_t st_gid; + __compat_uid_t st_uid; + __compat_gid_t st_gid; compat_dev_t st_rdev; u16 __pad2; u32 st_size; @@ -140,10 +140,10 @@ static inline void __user *compat_alloc_user_space(long len) struct compat_ipc64_perm { compat_key_t key; - compat_uid32_t uid; - compat_gid32_t gid; - compat_uid32_t cuid; - compat_gid32_t cgid; + __compat_uid32_t uid; + __compat_gid32_t gid; + __compat_uid32_t cuid; + __compat_gid32_t cgid; compat_mode_t mode; unsigned short __pad1; unsigned short seq; diff --git a/include/asm-sparc64/compat.h b/include/asm-sparc64/compat.h index b59122dd176..c73935dc7ba 100644 --- a/include/asm-sparc64/compat.h +++ b/include/asm-sparc64/compat.h @@ -12,8 +12,10 @@ typedef s32 compat_ssize_t; typedef s32 compat_time_t; typedef s32 compat_clock_t; typedef s32 compat_pid_t; -typedef u16 compat_uid_t; -typedef u16 compat_gid_t; +typedef u16 __compat_uid_t; +typedef u16 __compat_gid_t; +typedef u32 __compat_uid32_t; +typedef u32 __compat_gid32_t; typedef u16 compat_mode_t; typedef u32 compat_ino_t; typedef u16 compat_dev_t; @@ -47,8 +49,8 @@ struct compat_stat { compat_ino_t st_ino; compat_mode_t st_mode; compat_nlink_t st_nlink; - compat_uid_t st_uid; - compat_gid_t st_gid; + __compat_uid_t st_uid; + __compat_gid_t st_gid; compat_dev_t st_rdev; compat_off_t st_size; compat_time_t st_atime; @@ -177,10 +179,10 @@ static __inline__ void __user *compat_alloc_user_space(long len) struct compat_ipc64_perm { compat_key_t key; - __kernel_uid_t uid; - __kernel_gid_t gid; - __kernel_uid_t cuid; - __kernel_gid_t cgid; + __compat_uid32_t uid; + __compat_gid32_t gid; + __compat_uid32_t cuid; + __compat_gid32_t cgid; unsigned short __pad1; compat_mode_t mode; unsigned short __pad2; diff --git a/include/asm-x86_64/compat.h b/include/asm-x86_64/compat.h index d0f453c5adf..f0155c38f63 100644 --- a/include/asm-x86_64/compat.h +++ b/include/asm-x86_64/compat.h @@ -14,10 +14,10 @@ typedef s32 compat_ssize_t; typedef s32 compat_time_t; typedef s32 compat_clock_t; typedef s32 compat_pid_t; -typedef u16 compat_uid_t; -typedef u16 compat_gid_t; -typedef u32 compat_uid32_t; -typedef u32 compat_gid32_t; +typedef u16 __compat_uid_t; +typedef u16 __compat_gid_t; +typedef u32 __compat_uid32_t; +typedef u32 __compat_gid32_t; typedef u16 compat_mode_t; typedef u32 compat_ino_t; typedef u16 compat_dev_t; @@ -52,8 +52,8 @@ struct compat_stat { compat_ino_t st_ino; compat_mode_t st_mode; compat_nlink_t st_nlink; - compat_uid_t st_uid; - compat_gid_t st_gid; + __compat_uid_t st_uid; + __compat_gid_t st_gid; compat_dev_t st_rdev; u16 __pad2; u32 st_size; @@ -122,10 +122,10 @@ typedef u32 compat_sigset_word; struct compat_ipc64_perm { compat_key_t key; - compat_uid32_t uid; - compat_gid32_t gid; - compat_uid32_t cuid; - compat_gid32_t cgid; + __compat_uid32_t uid; + __compat_gid32_t gid; + __compat_uid32_t cuid; + __compat_gid32_t cgid; unsigned short mode; unsigned short __pad1; unsigned short seq; diff --git a/include/linux/compat.h b/include/linux/compat.h index b58b7d6f2fd..f9ca534787e 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -18,6 +18,9 @@ #define compat_jiffies_to_clock_t(x) \ (((unsigned long)(x) * COMPAT_USER_HZ) / HZ) +typedef __compat_uid32_t compat_uid_t; +typedef __compat_gid32_t compat_gid_t; + struct rusage; struct compat_itimerspec { diff --git a/ipc/compat.c b/ipc/compat.c index 3881d564c66..1fe95f6659d 100644 --- a/ipc/compat.c +++ b/ipc/compat.c @@ -42,10 +42,10 @@ struct compat_msgbuf { struct compat_ipc_perm { key_t key; - compat_uid_t uid; - compat_gid_t gid; - compat_uid_t cuid; - compat_gid_t cgid; + __compat_uid_t uid; + __compat_gid_t gid; + __compat_uid_t cuid; + __compat_gid_t cgid; compat_mode_t mode; unsigned short seq; }; @@ -174,8 +174,8 @@ static inline int __put_compat_ipc_perm(struct ipc64_perm *p, struct compat_ipc_perm __user *up) { int err; - compat_uid_t u; - compat_gid_t g; + __compat_uid_t u; + __compat_gid_t g; err = __put_user(p->key, &up->key); SET_UID(u, p->uid); -- cgit v1.2.3-70-g09d2 From 022a4a7bbdefdedc2706a13c81c832d8c3173c6d Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 6 Sep 2005 15:16:41 -0700 Subject: [PATCH] fs/jbd/: cleanups This patch contains the following cleanups: - make needlessly global functions static - journal.c: remove the unused global function __journal_internal_check and move the check to journal_init - remove the following write-only global variable: - journal.c: current_journal - remove the following unneeded EXPORT_SYMBOL: - journal.c: journal_recover Signed-off-by: Adrian Bunk Acked-by: Andreas Dilger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/jbd/journal.c | 34 ++++++++++++++-------------------- fs/jbd/revoke.c | 3 ++- include/linux/jbd.h | 1 - 3 files changed, 16 insertions(+), 22 deletions(-) (limited to 'fs') diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index 5e7b4394951..71cfe25d716 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -65,7 +65,6 @@ EXPORT_SYMBOL(journal_set_features); EXPORT_SYMBOL(journal_create); EXPORT_SYMBOL(journal_load); EXPORT_SYMBOL(journal_destroy); -EXPORT_SYMBOL(journal_recover); EXPORT_SYMBOL(journal_update_superblock); EXPORT_SYMBOL(journal_abort); EXPORT_SYMBOL(journal_errno); @@ -81,6 +80,7 @@ EXPORT_SYMBOL(journal_try_to_free_buffers); EXPORT_SYMBOL(journal_force_commit); static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *); +static void __journal_abort_soft (journal_t *journal, int errno); /* * Helper function used to manage commit timeouts @@ -93,16 +93,6 @@ static void commit_timeout(unsigned long __data) wake_up_process(p); } -/* Static check for data structure consistency. There's no code - * invoked --- we'll just get a linker failure if things aren't right. - */ -void __journal_internal_check(void) -{ - extern void journal_bad_superblock_size(void); - if (sizeof(struct journal_superblock_s) != 1024) - journal_bad_superblock_size(); -} - /* * kjournald: The main thread function used to manage a logging device * journal. @@ -119,16 +109,12 @@ void __journal_internal_check(void) * known as checkpointing, and this thread is responsible for that job. */ -journal_t *current_journal; // AKPM: debug - -int kjournald(void *arg) +static int kjournald(void *arg) { journal_t *journal = (journal_t *) arg; transaction_t *transaction; struct timer_list timer; - current_journal = journal; - daemonize("kjournald"); /* Set up an interval timer which can be used to trigger a @@ -1439,7 +1425,7 @@ int journal_wipe(journal_t *journal, int write) * device this journal is present. */ -const char *journal_dev_name(journal_t *journal, char *buffer) +static const char *journal_dev_name(journal_t *journal, char *buffer) { struct block_device *bdev; @@ -1485,7 +1471,7 @@ void __journal_abort_hard(journal_t *journal) /* Soft abort: record the abort error status in the journal superblock, * but don't do any other IO. */ -void __journal_abort_soft (journal_t *journal, int errno) +static void __journal_abort_soft (journal_t *journal, int errno) { if (journal->j_flags & JFS_ABORT) return; @@ -1880,7 +1866,7 @@ EXPORT_SYMBOL(journal_enable_debug); static struct proc_dir_entry *proc_jbd_debug; -int read_jbd_debug(char *page, char **start, off_t off, +static int read_jbd_debug(char *page, char **start, off_t off, int count, int *eof, void *data) { int ret; @@ -1890,7 +1876,7 @@ int read_jbd_debug(char *page, char **start, off_t off, return ret; } -int write_jbd_debug(struct file *file, const char __user *buffer, +static int write_jbd_debug(struct file *file, const char __user *buffer, unsigned long count, void *data) { char buf[32]; @@ -1979,6 +1965,14 @@ static int __init journal_init(void) { int ret; +/* Static check for data structure consistency. There's no code + * invoked --- we'll just get a linker failure if things aren't right. + */ + extern void journal_bad_superblock_size(void); + if (sizeof(struct journal_superblock_s) != 1024) + journal_bad_superblock_size(); + + ret = journal_init_caches(); if (ret != 0) journal_destroy_caches(); diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c index d327a598f86..93b9f45eebd 100644 --- a/fs/jbd/revoke.c +++ b/fs/jbd/revoke.c @@ -116,7 +116,8 @@ static inline int hash(journal_t *journal, unsigned long block) (block << (hash_shift - 12))) & (table->hash_size - 1); } -int insert_revoke_hash(journal_t *journal, unsigned long blocknr, tid_t seq) +static int insert_revoke_hash(journal_t *journal, unsigned long blocknr, + tid_t seq) { struct list_head *hash_list; struct jbd_revoke_record_s *record; diff --git a/include/linux/jbd.h b/include/linux/jbd.h index 593407e865b..84321a4cac9 100644 --- a/include/linux/jbd.h +++ b/include/linux/jbd.h @@ -914,7 +914,6 @@ extern int journal_wipe (journal_t *, int); extern int journal_skip_recovery (journal_t *); extern void journal_update_superblock (journal_t *, int); extern void __journal_abort_hard (journal_t *); -extern void __journal_abort_soft (journal_t *, int); extern void journal_abort (journal_t *, int); extern int journal_errno (journal_t *); extern void journal_ack_err (journal_t *); -- cgit v1.2.3-70-g09d2 From 3676347a5e216a7fec7f8eedbbcf8bed6b9c4e40 Mon Sep 17 00:00:00 2001 From: Peter Osterlund Date: Tue, 6 Sep 2005 15:16:42 -0700 Subject: [PATCH] kill bio->bi_set Jens: ->bi_set is totally unnecessary bloat of struct bio. Just define a proper destructor for the bio and it already knows what bio_set it belongs too. Peter: Fixed the bugs. Signed-off-by: Jens Axboe Signed-off-by: Peter Osterlund Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/dm-io.c | 6 ++++++ drivers/md/dm.c | 6 ++++++ fs/bio.c | 32 +++++++++++++++++++++----------- include/linux/bio.h | 2 +- 4 files changed, 34 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index 45754bb6a79..9de000131a8 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -239,6 +239,11 @@ static void vm_dp_init(struct dpages *dp, void *data) dp->context_ptr = data; } +static void dm_bio_destructor(struct bio *bio) +{ + bio_free(bio, _bios); +} + /*----------------------------------------------------------------- * IO routines that accept a list of pages. *---------------------------------------------------------------*/ @@ -263,6 +268,7 @@ static void do_region(int rw, unsigned int region, struct io_region *where, bio->bi_bdev = where->bdev; bio->bi_end_io = endio; bio->bi_private = io; + bio->bi_destructor = dm_bio_destructor; bio_set_region(bio, region); /* diff --git a/drivers/md/dm.c b/drivers/md/dm.c index d487d9deb98..930b9fc2795 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -399,6 +399,11 @@ struct clone_info { unsigned short idx; }; +static void dm_bio_destructor(struct bio *bio) +{ + bio_free(bio, dm_set); +} + /* * Creates a little bio that is just does part of a bvec. */ @@ -410,6 +415,7 @@ static struct bio *split_bvec(struct bio *bio, sector_t sector, struct bio_vec *bv = bio->bi_io_vec + idx; clone = bio_alloc_bioset(GFP_NOIO, 1, dm_set); + clone->bi_destructor = dm_bio_destructor; *clone->bi_io_vec = *bv; clone->bi_sector = sector; diff --git a/fs/bio.c b/fs/bio.c index 1f2d4649b18..bf3ec9d2b54 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -104,18 +104,22 @@ static inline struct bio_vec *bvec_alloc_bs(unsigned int __nocast gfp_mask, int return bvl; } -/* - * default destructor for a bio allocated with bio_alloc_bioset() - */ -static void bio_destructor(struct bio *bio) +void bio_free(struct bio *bio, struct bio_set *bio_set) { const int pool_idx = BIO_POOL_IDX(bio); - struct bio_set *bs = bio->bi_set; BIO_BUG_ON(pool_idx >= BIOVEC_NR_POOLS); - mempool_free(bio->bi_io_vec, bs->bvec_pools[pool_idx]); - mempool_free(bio, bs->bio_pool); + mempool_free(bio->bi_io_vec, bio_set->bvec_pools[pool_idx]); + mempool_free(bio, bio_set->bio_pool); +} + +/* + * default destructor for a bio allocated with bio_alloc_bioset() + */ +static void bio_fs_destructor(struct bio *bio) +{ + bio_free(bio, fs_bio_set); } inline void bio_init(struct bio *bio) @@ -171,8 +175,6 @@ struct bio *bio_alloc_bioset(unsigned int __nocast gfp_mask, int nr_iovecs, stru bio->bi_max_vecs = bvec_slabs[idx].nr_vecs; } bio->bi_io_vec = bvl; - bio->bi_destructor = bio_destructor; - bio->bi_set = bs; } out: return bio; @@ -180,7 +182,12 @@ out: struct bio *bio_alloc(unsigned int __nocast gfp_mask, int nr_iovecs) { - return bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set); + struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set); + + if (bio) + bio->bi_destructor = bio_fs_destructor; + + return bio; } void zero_fill_bio(struct bio *bio) @@ -273,8 +280,10 @@ struct bio *bio_clone(struct bio *bio, unsigned int __nocast gfp_mask) { struct bio *b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs, fs_bio_set); - if (b) + if (b) { + b->bi_destructor = bio_fs_destructor; __bio_clone(b, bio); + } return b; } @@ -1075,6 +1084,7 @@ subsys_initcall(init_bio); EXPORT_SYMBOL(bio_alloc); EXPORT_SYMBOL(bio_put); +EXPORT_SYMBOL(bio_free); EXPORT_SYMBOL(bio_endio); EXPORT_SYMBOL(bio_init); EXPORT_SYMBOL(__bio_clone); diff --git a/include/linux/bio.h b/include/linux/bio.h index 36ef29fa0d8..69e047989f1 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -111,7 +111,6 @@ struct bio { void *bi_private; bio_destructor_t *bi_destructor; /* destructor */ - struct bio_set *bi_set; /* memory pools set */ }; /* @@ -280,6 +279,7 @@ extern void bioset_free(struct bio_set *); extern struct bio *bio_alloc(unsigned int __nocast, int); extern struct bio *bio_alloc_bioset(unsigned int __nocast, int, struct bio_set *); extern void bio_put(struct bio *); +extern void bio_free(struct bio *, struct bio_set *); extern void bio_endio(struct bio *, unsigned int, int); struct request_queue; -- cgit v1.2.3-70-g09d2 From 5dd42c262bd742fa3602180bbe5550b4828de8f3 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 6 Sep 2005 15:16:43 -0700 Subject: [PATCH] remove register_ioctl32_conversion and unregister_ioctl32_conversion All users have been converted. Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/feature-removal-schedule.txt | 8 --- fs/compat.c | 90 ------------------------------ include/linux/ioctl32.h | 22 -------- 3 files changed, 120 deletions(-) (limited to 'fs') diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 363909056e4..95e74435312 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -51,14 +51,6 @@ Who: Adrian Bunk --------------------------- -What: register_ioctl32_conversion() / unregister_ioctl32_conversion() -When: April 2005 -Why: Replaced by ->compat_ioctl in file_operations and other method - vecors. -Who: Andi Kleen , Christoph Hellwig - ---------------------------- - What: RCU API moves to EXPORT_SYMBOL_GPL When: April 2006 Files: include/linux/rcupdate.h, kernel/rcupdate.c diff --git a/fs/compat.c b/fs/compat.c index 8e03d31eec3..2eb03c49b07 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -310,96 +310,6 @@ static int __init init_sys32_ioctl(void) __initcall(init_sys32_ioctl); -int register_ioctl32_conversion(unsigned int cmd, - ioctl_trans_handler_t handler) -{ - struct ioctl_trans *t; - struct ioctl_trans *new_t; - unsigned long hash = ioctl32_hash(cmd); - - new_t = kmalloc(sizeof(*new_t), GFP_KERNEL); - if (!new_t) - return -ENOMEM; - - down_write(&ioctl32_sem); - for (t = ioctl32_hash_table[hash]; t; t = t->next) { - if (t->cmd == cmd) { - printk(KERN_ERR "Trying to register duplicated ioctl32 " - "handler %x\n", cmd); - up_write(&ioctl32_sem); - kfree(new_t); - return -EINVAL; - } - } - new_t->next = NULL; - new_t->cmd = cmd; - new_t->handler = handler; - ioctl32_insert_translation(new_t); - - up_write(&ioctl32_sem); - return 0; -} -EXPORT_SYMBOL(register_ioctl32_conversion); - -static inline int builtin_ioctl(struct ioctl_trans *t) -{ - return t >= ioctl_start && t < (ioctl_start + ioctl_table_size); -} - -/* Problem: - This function cannot unregister duplicate ioctls, because they are not - unique. - When they happen we need to extend the prototype to pass the handler too. */ - -int unregister_ioctl32_conversion(unsigned int cmd) -{ - unsigned long hash = ioctl32_hash(cmd); - struct ioctl_trans *t, *t1; - - down_write(&ioctl32_sem); - - t = ioctl32_hash_table[hash]; - if (!t) { - up_write(&ioctl32_sem); - return -EINVAL; - } - - if (t->cmd == cmd) { - if (builtin_ioctl(t)) { - printk("%p tried to unregister builtin ioctl %x\n", - __builtin_return_address(0), cmd); - } else { - ioctl32_hash_table[hash] = t->next; - up_write(&ioctl32_sem); - kfree(t); - return 0; - } - } - while (t->next) { - t1 = t->next; - if (t1->cmd == cmd) { - if (builtin_ioctl(t1)) { - printk("%p tried to unregister builtin " - "ioctl %x\n", - __builtin_return_address(0), cmd); - goto out; - } else { - t->next = t1->next; - up_write(&ioctl32_sem); - kfree(t1); - return 0; - } - } - t = t1; - } - printk(KERN_ERR "Trying to free unknown 32bit ioctl handler %x\n", - cmd); -out: - up_write(&ioctl32_sem); - return -EINVAL; -} -EXPORT_SYMBOL(unregister_ioctl32_conversion); - static void compat_ioctl_error(struct file *filp, unsigned int fd, unsigned int cmd, unsigned long arg) { diff --git a/include/linux/ioctl32.h b/include/linux/ioctl32.h index e8c4af32b3b..948809d9991 100644 --- a/include/linux/ioctl32.h +++ b/include/linux/ioctl32.h @@ -14,26 +14,4 @@ struct ioctl_trans { struct ioctl_trans *next; }; -/* - * Register an 32bit ioctl translation handler for ioctl cmd. - * - * handler == NULL: use 64bit ioctl handler. - * arguments to handler: fd: file descriptor - * cmd: ioctl command. - * arg: ioctl argument - * struct file *file: file descriptor pointer. - */ - -#ifdef CONFIG_COMPAT -extern int __deprecated register_ioctl32_conversion(unsigned int cmd, - ioctl_trans_handler_t handler); -extern int __deprecated unregister_ioctl32_conversion(unsigned int cmd); - -#else - -#define register_ioctl32_conversion(cmd, handler) ({ 0; }) -#define unregister_ioctl32_conversion(cmd) ({ 0; }) - -#endif - #endif -- cgit v1.2.3-70-g09d2 From 8fc2751beb0941966d3a97b26544e8585e428c08 Mon Sep 17 00:00:00 2001 From: Mark Bellon Date: Tue, 6 Sep 2005 15:16:54 -0700 Subject: [PATCH] disk quotas fail when /etc/mtab is symlinked to /proc/mounts If /etc/mtab is a regular file all of the mount options (of a file system) are written to /etc/mtab by the mount command. The quota tools look there for the quota strings for their operation. If, however, /etc/mtab is a symlink to /proc/mounts (a "good thing" in some environments) the tools don't write anything - they assume the kernel will take care of things. While the quota options are sent down to the kernel via the mount system call and the file system codes handle them properly unfortunately there is no code to echo the quota strings into /proc/mounts and the quota tools fail in the symlink case. The attached patchs modify the EXT[2|3] and JFS codes to add the necessary hooks. The show_options function of each file system in these patches currently deal with only those things that seemed related to quotas; especially in the EXT3 case more can be done (later?). Jan Kara also noted the difficulty in moving these changes above the FS codes responding similarly to myself to Andrew's comment about possible VFS migration. Issue summary: - FS codes have to process the entire string of options anyway. - Only FS codes that use quotas must have a show_options function (for quotas to work properly) however quotas are only used in a small number of FS. - Since most of the quota using FS support other options these FS codes should have the a show_options function to show those options - and the quota echoing becomes virtually negligible. Based on feedback I have modified my patches from the original: JFS a missing patch has been restored to the posting EXT[2|3] and JFS always use the show_options function - Each FS has at least one FS specific option displayed - QUOTA output is under a CONFIG_QUOTA ifdef - a follow-on patch will add a multitude of options for each FS EXT[2|3] and JFS "quota" is treated as "usrquota" EXT3 journalled data check for journalled quota removed EXT[2|3] mount when quota specified but not compiled in - no changes from my original patch. I tested the patch and the codes warn but - still mount. With all due respection I believe the comments otherwise were a - misread of the patch. Please reread/test and comment. XFS patch removed - the XFS team already made the necessary changes EXT3 mixing old and new quotas are handled differently (not purely exclusive) - if old and new quotas for the same type are used together the old type is silently depricated for compatability (e.g. usrquota and usrjquota) - mixing of old and new quotas is an error (e.g. usrjquota and grpquota) Signed-off-by: Mark Bellon Acked-by: Dave Kleikamp Cc: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ext2/super.c | 59 +++++++++++++++++++++++++++---- fs/ext3/super.c | 92 +++++++++++++++++++++++++++++++++++++++++++------ fs/jfs/jfs_filsys.h | 3 ++ fs/jfs/super.c | 48 ++++++++++++++++++++++++-- include/linux/ext2_fs.h | 3 ++ include/linux/ext3_fs.h | 2 ++ 6 files changed, 186 insertions(+), 21 deletions(-) (limited to 'fs') diff --git a/fs/ext2/super.c b/fs/ext2/super.c index dcfe331dc4c..3c0c7c6a5b4 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -27,6 +28,8 @@ #include #include #include +#include +#include #include #include "ext2.h" #include "xattr.h" @@ -201,6 +204,26 @@ static void ext2_clear_inode(struct inode *inode) #endif } +static int ext2_show_options(struct seq_file *seq, struct vfsmount *vfs) +{ + struct ext2_sb_info *sbi = EXT2_SB(vfs->mnt_sb); + + if (sbi->s_mount_opt & EXT2_MOUNT_GRPID) + seq_puts(seq, ",grpid"); + else + seq_puts(seq, ",nogrpid"); + +#if defined(CONFIG_QUOTA) + if (sbi->s_mount_opt & EXT2_MOUNT_USRQUOTA) + seq_puts(seq, ",usrquota"); + + if (sbi->s_mount_opt & EXT2_MOUNT_GRPQUOTA) + seq_puts(seq, ",grpquota"); +#endif + + return 0; +} + #ifdef CONFIG_QUOTA static ssize_t ext2_quota_read(struct super_block *sb, int type, char *data, size_t len, loff_t off); static ssize_t ext2_quota_write(struct super_block *sb, int type, const char *data, size_t len, loff_t off); @@ -218,6 +241,7 @@ static struct super_operations ext2_sops = { .statfs = ext2_statfs, .remount_fs = ext2_remount, .clear_inode = ext2_clear_inode, + .show_options = ext2_show_options, #ifdef CONFIG_QUOTA .quota_read = ext2_quota_read, .quota_write = ext2_quota_write, @@ -256,10 +280,11 @@ static unsigned long get_sb_block(void **data) enum { Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, - Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, - Opt_nouid32, Opt_check, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov, Opt_nobh, - Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, Opt_xip, - Opt_ignore, Opt_err, + Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, + Opt_err_ro, Opt_nouid32, Opt_check, Opt_nocheck, Opt_debug, + Opt_oldalloc, Opt_orlov, Opt_nobh, Opt_user_xattr, Opt_nouser_xattr, + Opt_acl, Opt_noacl, Opt_xip, Opt_ignore, Opt_err, Opt_quota, + Opt_usrquota, Opt_grpquota }; static match_table_t tokens = { @@ -288,10 +313,10 @@ static match_table_t tokens = { {Opt_acl, "acl"}, {Opt_noacl, "noacl"}, {Opt_xip, "xip"}, - {Opt_ignore, "grpquota"}, + {Opt_grpquota, "grpquota"}, {Opt_ignore, "noquota"}, - {Opt_ignore, "quota"}, - {Opt_ignore, "usrquota"}, + {Opt_quota, "quota"}, + {Opt_usrquota, "usrquota"}, {Opt_err, NULL} }; @@ -406,6 +431,26 @@ static int parse_options (char * options, printk("EXT2 xip option not supported\n"); #endif break; + +#if defined(CONFIG_QUOTA) + case Opt_quota: + case Opt_usrquota: + set_opt(sbi->s_mount_opt, USRQUOTA); + break; + + case Opt_grpquota: + set_opt(sbi->s_mount_opt, GRPQUOTA); + break; +#else + case Opt_quota: + case Opt_usrquota: + case Opt_grpquota: + printk(KERN_ERR + "EXT2-fs: quota operations not supported.\n"); + + break; +#endif + case Opt_ignore: break; default: diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 3c3c6e399fb..a93c3609025 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include "xattr.h" #include "acl.h" @@ -509,8 +510,41 @@ static void ext3_clear_inode(struct inode *inode) kfree(rsv); } -#ifdef CONFIG_QUOTA +static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs) +{ + struct ext3_sb_info *sbi = EXT3_SB(vfs->mnt_sb); + + if (sbi->s_mount_opt & EXT3_MOUNT_JOURNAL_DATA) + seq_puts(seq, ",data=journal"); + + if (sbi->s_mount_opt & EXT3_MOUNT_ORDERED_DATA) + seq_puts(seq, ",data=ordered"); + + if (sbi->s_mount_opt & EXT3_MOUNT_WRITEBACK_DATA) + seq_puts(seq, ",data=writeback"); + +#if defined(CONFIG_QUOTA) + if (sbi->s_jquota_fmt) + seq_printf(seq, ",jqfmt=%s", + (sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold": "vfsv0"); + + if (sbi->s_qf_names[USRQUOTA]) + seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]); + + if (sbi->s_qf_names[GRPQUOTA]) + seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]); + if (sbi->s_mount_opt & EXT3_MOUNT_USRQUOTA) + seq_puts(seq, ",usrquota"); + + if (sbi->s_mount_opt & EXT3_MOUNT_GRPQUOTA) + seq_puts(seq, ",grpquota"); +#endif + + return 0; +} + +#ifdef CONFIG_QUOTA #define QTYPE2NAME(t) ((t)==USRQUOTA?"user":"group") #define QTYPE2MOPT(on, t) ((t)==USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA)) @@ -569,6 +603,7 @@ static struct super_operations ext3_sops = { .statfs = ext3_statfs, .remount_fs = ext3_remount, .clear_inode = ext3_clear_inode, + .show_options = ext3_show_options, #ifdef CONFIG_QUOTA .quota_read = ext3_quota_read, .quota_write = ext3_quota_write, @@ -590,7 +625,8 @@ enum { Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, - Opt_ignore, Opt_barrier, Opt_err, Opt_resize, + Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, + Opt_grpquota }; static match_table_t tokens = { @@ -634,10 +670,10 @@ static match_table_t tokens = { {Opt_grpjquota, "grpjquota=%s"}, {Opt_jqfmt_vfsold, "jqfmt=vfsold"}, {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"}, - {Opt_quota, "grpquota"}, + {Opt_grpquota, "grpquota"}, {Opt_noquota, "noquota"}, {Opt_quota, "quota"}, - {Opt_quota, "usrquota"}, + {Opt_usrquota, "usrquota"}, {Opt_barrier, "barrier=%u"}, {Opt_err, NULL}, {Opt_resize, "resize"}, @@ -903,7 +939,13 @@ clear_qf_name: sbi->s_jquota_fmt = QFMT_VFS_V0; break; case Opt_quota: + case Opt_usrquota: set_opt(sbi->s_mount_opt, QUOTA); + set_opt(sbi->s_mount_opt, USRQUOTA); + break; + case Opt_grpquota: + set_opt(sbi->s_mount_opt, QUOTA); + set_opt(sbi->s_mount_opt, GRPQUOTA); break; case Opt_noquota: if (sb_any_quota_enabled(sb)) { @@ -912,8 +954,13 @@ clear_qf_name: return 0; } clear_opt(sbi->s_mount_opt, QUOTA); + clear_opt(sbi->s_mount_opt, USRQUOTA); + clear_opt(sbi->s_mount_opt, GRPQUOTA); break; #else + case Opt_quota: + case Opt_usrquota: + case Opt_grpquota: case Opt_usrjquota: case Opt_grpjquota: case Opt_offusrjquota: @@ -924,7 +971,6 @@ clear_qf_name: "EXT3-fs: journalled quota options not " "supported.\n"); break; - case Opt_quota: case Opt_noquota: break; #endif @@ -962,14 +1008,38 @@ clear_qf_name: } } #ifdef CONFIG_QUOTA - if (!sbi->s_jquota_fmt && (sbi->s_qf_names[USRQUOTA] || - sbi->s_qf_names[GRPQUOTA])) { - printk(KERN_ERR - "EXT3-fs: journalled quota format not specified.\n"); - return 0; + if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { + if ((sbi->s_mount_opt & EXT3_MOUNT_USRQUOTA) && + sbi->s_qf_names[USRQUOTA]) + clear_opt(sbi->s_mount_opt, USRQUOTA); + + if ((sbi->s_mount_opt & EXT3_MOUNT_GRPQUOTA) && + sbi->s_qf_names[GRPQUOTA]) + clear_opt(sbi->s_mount_opt, GRPQUOTA); + + if ((sbi->s_qf_names[USRQUOTA] && + (sbi->s_mount_opt & EXT3_MOUNT_GRPQUOTA)) || + (sbi->s_qf_names[GRPQUOTA] && + (sbi->s_mount_opt & EXT3_MOUNT_USRQUOTA))) { + printk(KERN_ERR "EXT3-fs: old and new quota " + "format mixing.\n"); + return 0; + } + + if (!sbi->s_jquota_fmt) { + printk(KERN_ERR "EXT3-fs: journalled quota format " + "not specified.\n"); + return 0; + } + } else { + if (sbi->s_jquota_fmt) { + printk(KERN_ERR "EXT3-fs: journalled quota format " + "specified with no journalling " + "enabled.\n"); + return 0; + } } #endif - return 1; } diff --git a/fs/jfs/jfs_filsys.h b/fs/jfs/jfs_filsys.h index 86ccac80f0a..72a5588faec 100644 --- a/fs/jfs/jfs_filsys.h +++ b/fs/jfs/jfs_filsys.h @@ -37,6 +37,9 @@ #define JFS_ERR_CONTINUE 0x00000004 /* continue */ #define JFS_ERR_PANIC 0x00000008 /* panic */ +#define JFS_USRQUOTA 0x00000010 +#define JFS_GRPQUOTA 0x00000020 + /* platform option (conditional compilation) */ #define JFS_AIX 0x80000000 /* AIX support */ /* POSIX name/directory support */ diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 9ff89720f93..71bc34b96b2 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -23,9 +23,11 @@ #include #include #include +#include #include #include #include +#include #include "jfs_incore.h" #include "jfs_filsys.h" @@ -192,7 +194,8 @@ static void jfs_put_super(struct super_block *sb) enum { Opt_integrity, Opt_nointegrity, Opt_iocharset, Opt_resize, - Opt_resize_nosize, Opt_errors, Opt_ignore, Opt_err, + Opt_resize_nosize, Opt_errors, Opt_ignore, Opt_err, Opt_quota, + Opt_usrquota, Opt_grpquota }; static match_table_t tokens = { @@ -204,8 +207,8 @@ static match_table_t tokens = { {Opt_errors, "errors=%s"}, {Opt_ignore, "noquota"}, {Opt_ignore, "quota"}, - {Opt_ignore, "usrquota"}, - {Opt_ignore, "grpquota"}, + {Opt_usrquota, "usrquota"}, + {Opt_grpquota, "grpquota"}, {Opt_err, NULL} }; @@ -293,6 +296,24 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize, } break; } + +#if defined(CONFIG_QUOTA) + case Opt_quota: + case Opt_usrquota: + *flag |= JFS_USRQUOTA; + break; + case Opt_grpquota: + *flag |= JFS_GRPQUOTA; + break; +#else + case Opt_usrquota: + case Opt_grpquota: + case Opt_quota: + printk(KERN_ERR + "JFS: quota operations not supported\n"); + break; +#endif + default: printk("jfs: Unrecognized mount option \"%s\" " " or missing value\n", p); @@ -539,6 +560,26 @@ static int jfs_sync_fs(struct super_block *sb, int wait) return 0; } +static int jfs_show_options(struct seq_file *seq, struct vfsmount *vfs) +{ + struct jfs_sb_info *sbi = JFS_SBI(vfs->mnt_sb); + + if (sbi->flag & JFS_NOINTEGRITY) + seq_puts(seq, ",nointegrity"); + else + seq_puts(seq, ",integrity"); + +#if defined(CONFIG_QUOTA) + if (sbi->flag & JFS_USRQUOTA) + seq_puts(seq, ",usrquota"); + + if (sbi->flag & JFS_GRPQUOTA) + seq_puts(seq, ",grpquota"); +#endif + + return 0; +} + static struct super_operations jfs_super_operations = { .alloc_inode = jfs_alloc_inode, .destroy_inode = jfs_destroy_inode, @@ -552,6 +593,7 @@ static struct super_operations jfs_super_operations = { .unlockfs = jfs_unlockfs, .statfs = jfs_statfs, .remount_fs = jfs_remount, + .show_options = jfs_show_options }; static struct export_operations jfs_export_operations = { diff --git a/include/linux/ext2_fs.h b/include/linux/ext2_fs.h index a657130ba03..f7bd1c7ebef 100644 --- a/include/linux/ext2_fs.h +++ b/include/linux/ext2_fs.h @@ -313,6 +313,9 @@ struct ext2_inode { #define EXT2_MOUNT_XATTR_USER 0x004000 /* Extended user attributes */ #define EXT2_MOUNT_POSIX_ACL 0x008000 /* POSIX Access Control Lists */ #define EXT2_MOUNT_XIP 0x010000 /* Execute in place */ +#define EXT2_MOUNT_USRQUOTA 0x020000 /* user quota */ +#define EXT2_MOUNT_GRPQUOTA 0x040000 /* group quota */ + #define clear_opt(o, opt) o &= ~EXT2_MOUNT_##opt #define set_opt(o, opt) o |= EXT2_MOUNT_##opt diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h index c16662836c5..c0272d73ab2 100644 --- a/include/linux/ext3_fs.h +++ b/include/linux/ext3_fs.h @@ -373,6 +373,8 @@ struct ext3_inode { #define EXT3_MOUNT_BARRIER 0x20000 /* Use block barriers */ #define EXT3_MOUNT_NOBH 0x40000 /* No bufferheads */ #define EXT3_MOUNT_QUOTA 0x80000 /* Some quota option set */ +#define EXT3_MOUNT_USRQUOTA 0x100000 /* "old" user quota */ +#define EXT3_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */ /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ #ifndef _LINUX_EXT2_FS_H -- cgit v1.2.3-70-g09d2 From f35279d3f713e5c97b98cbdbf47d98f79942c11f Mon Sep 17 00:00:00 2001 From: Bruce Allan Date: Tue, 6 Sep 2005 15:17:08 -0700 Subject: [PATCH] sunrpc: cache_register can use wrong module reference When registering an RPC cache, cache_register() always sets the owner as the sunrpc module. However, there are RPC caches owned by other modules. With the incorrect owner setting, the real owning module can be removed potentially with an open reference to the cache from userspace. For example, if one were to stop the nfs server and unmount the nfsd filesystem, the nfsd module could be removed eventhough rpc.idmapd had references to the idtoname and nametoid caches (i.e. /proc/net/rpc/nfs4./channel is still open). This resulted in a system panic on one of our machines when attempting to restart the nfs services after reloading the nfsd module. The following patch adds a 'struct module *owner' field in struct cache_detail. The owner is further assigned to the struct proc_dir_entry in cache_register() so that the module cannot be unloaded while user-space daemons have an open reference on the associated file under /proc. Signed-off-by: Bruce Allan Cc: Trond Myklebust Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfsd/export.c | 3 +++ fs/nfsd/nfs4idmap.c | 8 ++++++-- include/linux/sunrpc/cache.h | 1 + net/sunrpc/auth_gss/svcauth_gss.c | 8 ++++++-- net/sunrpc/cache.c | 8 ++++---- net/sunrpc/sunrpc_syms.c | 6 ++++-- net/sunrpc/svcauth.c | 1 + net/sunrpc/svcauth_unix.c | 1 + 8 files changed, 26 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 9a11aa39e2e..057aff74550 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -221,6 +222,7 @@ static int expkey_show(struct seq_file *m, } struct cache_detail svc_expkey_cache = { + .owner = THIS_MODULE, .hash_size = EXPKEY_HASHMAX, .hash_table = expkey_table, .name = "nfsd.fh", @@ -456,6 +458,7 @@ static int svc_export_show(struct seq_file *m, return 0; } struct cache_detail svc_export_cache = { + .owner = THIS_MODULE, .hash_size = EXPORT_HASHMAX, .hash_table = export_table, .name = "nfsd.export", diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index 5605a26efc5..13369650cdf 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -187,6 +187,7 @@ static int idtoname_parse(struct cache_detail *, char *, int); static struct ent *idtoname_lookup(struct ent *, int); static struct cache_detail idtoname_cache = { + .owner = THIS_MODULE, .hash_size = ENT_HASHMAX, .hash_table = idtoname_table, .name = "nfs4.idtoname", @@ -320,6 +321,7 @@ static struct ent *nametoid_lookup(struct ent *, int); static int nametoid_parse(struct cache_detail *, char *, int); static struct cache_detail nametoid_cache = { + .owner = THIS_MODULE, .hash_size = ENT_HASHMAX, .hash_table = nametoid_table, .name = "nfs4.nametoid", @@ -404,8 +406,10 @@ nfsd_idmap_init(void) void nfsd_idmap_shutdown(void) { - cache_unregister(&idtoname_cache); - cache_unregister(&nametoid_cache); + if (cache_unregister(&idtoname_cache)) + printk(KERN_ERR "nfsd: failed to unregister idtoname cache\n"); + if (cache_unregister(&nametoid_cache)) + printk(KERN_ERR "nfsd: failed to unregister nametoid cache\n"); } /* diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 6864063d1b9..c4e3ea7cf15 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -60,6 +60,7 @@ struct cache_head { #define CACHE_NEW_EXPIRY 120 /* keep new things pending confirmation for 120 seconds */ struct cache_detail { + struct module * owner; int hash_size; struct cache_head ** hash_table; rwlock_t hash_lock; diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 5c8fe3bfc49..e3308195374 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -250,6 +250,7 @@ out: } static struct cache_detail rsi_cache = { + .owner = THIS_MODULE, .hash_size = RSI_HASHMAX, .hash_table = rsi_table, .name = "auth.rpcsec.init", @@ -436,6 +437,7 @@ out: } static struct cache_detail rsc_cache = { + .owner = THIS_MODULE, .hash_size = RSC_HASHMAX, .hash_table = rsc_table, .name = "auth.rpcsec.context", @@ -1074,7 +1076,9 @@ gss_svc_init(void) void gss_svc_shutdown(void) { - cache_unregister(&rsc_cache); - cache_unregister(&rsi_cache); + if (cache_unregister(&rsc_cache)) + printk(KERN_ERR "auth_rpcgss: failed to unregister rsc cache\n"); + if (cache_unregister(&rsi_cache)) + printk(KERN_ERR "auth_rpcgss: failed to unregister rsi cache\n"); svc_auth_unregister(RPC_AUTH_GSS); } diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 900f5bc7e33..f509e999276 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -177,7 +177,7 @@ void cache_register(struct cache_detail *cd) cd->proc_ent = proc_mkdir(cd->name, proc_net_rpc); if (cd->proc_ent) { struct proc_dir_entry *p; - cd->proc_ent->owner = THIS_MODULE; + cd->proc_ent->owner = cd->owner; cd->channel_ent = cd->content_ent = NULL; p = create_proc_entry("flush", S_IFREG|S_IRUSR|S_IWUSR, @@ -185,7 +185,7 @@ void cache_register(struct cache_detail *cd) cd->flush_ent = p; if (p) { p->proc_fops = &cache_flush_operations; - p->owner = THIS_MODULE; + p->owner = cd->owner; p->data = cd; } @@ -195,7 +195,7 @@ void cache_register(struct cache_detail *cd) cd->channel_ent = p; if (p) { p->proc_fops = &cache_file_operations; - p->owner = THIS_MODULE; + p->owner = cd->owner; p->data = cd; } } @@ -205,7 +205,7 @@ void cache_register(struct cache_detail *cd) cd->content_ent = p; if (p) { p->proc_fops = &content_file_operations; - p->owner = THIS_MODULE; + p->owner = cd->owner; p->data = cd; } } diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index 62a07349527..ed48ff022d3 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -176,8 +176,10 @@ cleanup_sunrpc(void) { unregister_rpc_pipefs(); rpc_destroy_mempool(); - cache_unregister(&auth_domain_cache); - cache_unregister(&ip_map_cache); + if (cache_unregister(&auth_domain_cache)) + printk(KERN_ERR "sunrpc: failed to unregister auth_domain cache\n"); + if (cache_unregister(&ip_map_cache)) + printk(KERN_ERR "sunrpc: failed to unregister ip_map cache\n"); #ifdef RPC_DEBUG rpc_unregister_sysctl(); #endif diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c index bde8147ef2d..dda4f0c6351 100644 --- a/net/sunrpc/svcauth.c +++ b/net/sunrpc/svcauth.c @@ -143,6 +143,7 @@ static void auth_domain_drop(struct cache_head *item, struct cache_detail *cd) struct cache_detail auth_domain_cache = { + .owner = THIS_MODULE, .hash_size = DN_HASHMAX, .hash_table = auth_domain_table, .name = "auth.domain", diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index d6baf6fdf8a..cac2e774dd8 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -242,6 +242,7 @@ static int ip_map_show(struct seq_file *m, struct cache_detail ip_map_cache = { + .owner = THIS_MODULE, .hash_size = IP_HASHMAX, .hash_table = ip_table, .name = "auth.unix.ip", -- cgit v1.2.3-70-g09d2 From f3ef6f63e5c575c136b39bb423a6e9a002932da7 Mon Sep 17 00:00:00 2001 From: Karsten Wiese Date: Tue, 6 Sep 2005 15:17:12 -0700 Subject: [PATCH] Speedup FAT filesystem directory reads OGAWA Hirofumi This speeds up directory reads for large FAT partitions, if the buffercache has to be filled from the drive. Following values were taken from: $ time find path_to_freshly_mounted_fat > /dev/null on an otherwise idle system. FAT with 16KB Clusters on IDE attached drive: Factor 2 FAT with 32KB Clusters on USB2 attached drive: Factor 10 (!) Its less than 1/10 slower, if the buffercache is uptodate. The patch introduces the new function fat_dir_readahead(). fat_dir_readahead() calls sb_breadahead() to readahead a whole cluster, if the requested sector is the first one in a cluster. It is usefull to do this, because on FAT directories occupy whole clusters, with the exception of FAT12/FAT16 root dirs. Readahead is only done, if the cluster's first sector is not uptodate to avoid overhead, when the buffer cache is already uptodate. Note that under memory pressure, the maximal byte count wasted (read: has to be red from disk twice) is 1 cluster's size. Thats 64KB. fat_dir_readahead() is called from fat__get_entry(). There is also an unrelated cleanup at one spot: if (bh) brelse(bh); is replaced with: brelse(bh); brelse() can handle NULL pointer arguments by itself. Signed-off-by: Karsten Wiese Signed-off-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/dir.c | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/fat/dir.c b/fs/fat/dir.c index e5ae1b720dd..895049b2ac9 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -30,6 +30,29 @@ static inline loff_t fat_make_i_pos(struct super_block *sb, | (de - (struct msdos_dir_entry *)bh->b_data); } +static inline void fat_dir_readahead(struct inode *dir, sector_t iblock, + sector_t phys) +{ + struct super_block *sb = dir->i_sb; + struct msdos_sb_info *sbi = MSDOS_SB(sb); + struct buffer_head *bh; + int sec; + + /* This is not a first sector of cluster, or sec_per_clus == 1 */ + if ((iblock & (sbi->sec_per_clus - 1)) || sbi->sec_per_clus == 1) + return; + /* root dir of FAT12/FAT16 */ + if ((sbi->fat_bits != 32) && (dir->i_ino == MSDOS_ROOT_INO)) + return; + + bh = sb_getblk(sb, phys); + if (bh && !buffer_uptodate(bh)) { + for (sec = 0; sec < sbi->sec_per_clus; sec++) + sb_breadahead(sb, phys + sec); + } + brelse(bh); +} + /* Returns the inode number of the directory entry at offset pos. If bh is non-NULL, it is brelse'd before. Pos is incremented. The buffer header is returned in bh. @@ -58,6 +81,8 @@ next: if (err || !phys) return -1; /* beyond EOF or error */ + fat_dir_readahead(dir, iblock, phys); + *bh = sb_bread(sb, phys); if (*bh == NULL) { printk(KERN_ERR "FAT: Directory bread(block %llu) failed\n", @@ -635,8 +660,7 @@ RecEnd: EODir: filp->f_pos = cpos; FillFailed: - if (bh) - brelse(bh); + brelse(bh); if (unicode) free_page((unsigned long)unicode); out: -- cgit v1.2.3-70-g09d2 From b80068543794864f533163c586be2a1a9880a65d Mon Sep 17 00:00:00 2001 From: Robert Love Date: Tue, 6 Sep 2005 15:17:16 -0700 Subject: [PATCH] fsnotify: hook on removexattr, too Add fsnotify_xattr() hook to removexattr(). Signed-off-by: Robert Love Signed-off-by: John McCtuchan Cc: Andreas Gruenbacher Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/xattr.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/xattr.c b/fs/xattr.c index dc8bc7624f2..3f9c64bea15 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -325,6 +325,8 @@ removexattr(struct dentry *d, char __user *name) down(&d->d_inode->i_sem); error = d->d_inode->i_op->removexattr(d, kname); up(&d->d_inode->i_sem); + if (!error) + fsnotify_xattr(d); } out: return error; -- cgit v1.2.3-70-g09d2 From 2b579beec255d6589fabe51b60933d723630bcd4 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 6 Sep 2005 15:17:18 -0700 Subject: [PATCH] proc: link count fix This patch fixes bug titled "sunrpc as module and bad proc/sys link count" reported by Jiri Slaby. The problem was, that only proc_dir_entry->nlink was updated and the corresponding inode->i_nlink was not. The fix is to implement the inode->getattr() method, and update i_nlink (if necessary). A quick audit of proc code shows that no other attribute changes after creation. Signed-off-by: Miklos Szeredi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/generic.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'fs') diff --git a/fs/proc/generic.c b/fs/proc/generic.c index abe8920313f..8a8c34461d4 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -249,6 +249,18 @@ out: return error; } +static int proc_getattr(struct vfsmount *mnt, struct dentry *dentry, + struct kstat *stat) +{ + struct inode *inode = dentry->d_inode; + struct proc_dir_entry *de = PROC_I(inode)->pde; + if (de && de->nlink) + inode->i_nlink = de->nlink; + + generic_fillattr(inode, stat); + return 0; +} + static struct inode_operations proc_file_inode_operations = { .setattr = proc_notify_change, }; @@ -475,6 +487,7 @@ static struct file_operations proc_dir_operations = { */ static struct inode_operations proc_dir_inode_operations = { .lookup = proc_lookup, + .getattr = proc_getattr, .setattr = proc_notify_change, }; -- cgit v1.2.3-70-g09d2 From 919532a54518c3a4a8258319e2bba0e07f69a925 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 6 Sep 2005 15:17:22 -0700 Subject: [PATCH] fs/Kconfig: quota help text updates This patch contains the following updates to the help texts: - QUOTA: most people will get the quota utilities from their distribution, and if not the mini-HOWTO will tell them - QFMT_V2: quota utilities 3.01 are no longer recent, they are now ancient and 3.01 is lower than the minimal version documented in Documentation/Changes Signed-off-by: Adrian Bunk Acked-by: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/Kconfig | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/Kconfig b/fs/Kconfig index 740d6ff0367..5e817902cb3 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -382,10 +382,8 @@ config QUOTA usage (also called disk quotas). Currently, it works for the ext2, ext3, and reiserfs file system. ext3 also supports journalled quotas for which you don't need to run quotacheck(8) after an unclean - shutdown. You need additional software in order to use quota support - (you can download sources from - ). For further details, read - the Quota mini-HOWTO, available from + shutdown. + For further details, read the Quota mini-HOWTO, available from , or the documentation provided with the quota tools. Probably the quota support is only useful for multi user systems. If unsure, say N. @@ -403,8 +401,7 @@ config QFMT_V2 depends on QUOTA help This quota format allows using quotas with 32-bit UIDs/GIDs. If you - need this functionality say Y here. Note that you will need recent - quota utilities (>= 3.01) for new quota format with this kernel. + need this functionality say Y here. config QUOTACTL bool -- cgit v1.2.3-70-g09d2 From 5e1efe4931bf7d95b2f3d48ca0b79ea0e8341cc2 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 6 Sep 2005 15:17:23 -0700 Subject: [PATCH] jffs/jffs2: remove wrong function prototypes This patch removes prototypes for the generic_file_open and generic_file_llseek functions. Besides being superfluous because they are already present in fs.h, they were also wrong because the actual functions aren't weak functions. Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/jffs/inode-v23.c | 3 --- fs/jffs2/file.c | 3 --- 2 files changed, 6 deletions(-) (limited to 'fs') diff --git a/fs/jffs/inode-v23.c b/fs/jffs/inode-v23.c index bfbeb4c86e0..777b90057b8 100644 --- a/fs/jffs/inode-v23.c +++ b/fs/jffs/inode-v23.c @@ -1629,9 +1629,6 @@ static int jffs_fsync(struct file *f, struct dentry *d, int datasync) } -extern int generic_file_open(struct inode *, struct file *) __attribute__((weak)); -extern loff_t generic_file_llseek(struct file *, loff_t, int) __attribute__((weak)); - static struct file_operations jffs_file_operations = { .open = generic_file_open, diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c index bd9ed9b0247..8279bf0133f 100644 --- a/fs/jffs2/file.c +++ b/fs/jffs2/file.c @@ -21,9 +21,6 @@ #include #include "nodelist.h" -extern int generic_file_open(struct inode *, struct file *) __attribute__((weak)); -extern loff_t generic_file_llseek(struct file *file, loff_t offset, int origin) __attribute__((weak)); - static int jffs2_commit_write (struct file *filp, struct page *pg, unsigned start, unsigned end); static int jffs2_prepare_write (struct file *filp, struct page *pg, -- cgit v1.2.3-70-g09d2 From 5acd57936c3224fd86e838201e528e0169373e9b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 6 Sep 2005 15:17:36 -0700 Subject: [PATCH] fs: remove redundant timespec_equal test in update_atime() In update_atime(), timespec_equal() test is done twice in succession and the second is always false. This patch removes the second test. Signed-off-by: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/inode.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'fs') diff --git a/fs/inode.c b/fs/inode.c index e57f1724db3..71df1b1e8f7 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1195,9 +1195,6 @@ void update_atime(struct inode *inode) if (!timespec_equal(&inode->i_atime, &now)) { inode->i_atime = now; mark_inode_dirty_sync(inode); - } else { - if (!timespec_equal(&inode->i_atime, &now)) - inode->i_atime = now; } } -- cgit v1.2.3-70-g09d2 From 2832e9366a1fcd6f76957a42157be041240f994e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 6 Sep 2005 15:17:38 -0700 Subject: [PATCH] remove file.f_maxcount struct file cleanup: f_maxcount has an unique value (INT_MAX). Just use the hard-wired value. Signed-off-by: Eric Dumazet Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/file_table.c | 1 - fs/read_write.c | 2 +- include/linux/fs.h | 1 - 3 files changed, 1 insertion(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/file_table.c b/fs/file_table.c index 1d3de78e6bc..43e9e1737de 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -89,7 +89,6 @@ struct file *get_empty_filp(void) rwlock_init(&f->f_owner.lock); /* f->f_version: 0 */ INIT_LIST_HEAD(&f->f_list); - f->f_maxcount = INT_MAX; return f; over: diff --git a/fs/read_write.c b/fs/read_write.c index 563abd09b5c..b60324aaa2b 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -188,7 +188,7 @@ int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count struct inode *inode; loff_t pos; - if (unlikely(count > file->f_maxcount)) + if (unlikely(count > INT_MAX)) goto Einval; pos = *ppos; if (unlikely((pos < 0) || (loff_t) (pos + count) < 0)) diff --git a/include/linux/fs.h b/include/linux/fs.h index 67e6732d4fd..2036747c7d1 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -594,7 +594,6 @@ struct file { unsigned int f_uid, f_gid; struct file_ra_state f_ra; - size_t f_maxcount; unsigned long f_version; void *f_security; -- cgit v1.2.3-70-g09d2 From a97c9bf33f4612e2aed6f000f6b1d268b6814f3c Mon Sep 17 00:00:00 2001 From: Dave Johnson Date: Tue, 6 Sep 2005 15:17:40 -0700 Subject: [PATCH] fix cramfs making duplicate entries in inode cache Every time cramfs_lookup() is called to lookup and inode for a dentry, get_cramfs_inode() will allocate a new inode without checking to see if that inode already exists in the inode cache. This is fine the first time, but if the dentry cache entry(ies) associated with that inode are aged out, but the inode entry is not aged out (which can be quite common if the inode has buffer cache linked to it), cramfs_lookup() will be called again and another inode will be allocated and added to the inode cache creating a duplicate in the inode cache. The big issue here is that the buffers associated with each inode cache entry are not shared between the duplicates! The older inode entries are now orphaned as no dentry points to it and won't be freed until the buffer cache assoicated with them are first freed. The newest entry will have to create all new buffer cache for each part of its file as the old buffer cache is now orphaned as well. Patch below fixes this by making get_cramfs_inode() use the inode cache before blindly creating a new entry every time. This eliminates the duplicate inodes and duplicate buffer cache. Cc: Phillip Lougher Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/cramfs/inode.c | 43 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 39 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index 6c285efa200..7fe85415ae7 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c @@ -39,12 +39,47 @@ static DECLARE_MUTEX(read_mutex); #define CRAMINO(x) ((x)->offset?(x)->offset<<2:1) #define OFFSET(x) ((x)->i_ino) -static struct inode *get_cramfs_inode(struct super_block *sb, struct cramfs_inode * cramfs_inode) + +static int cramfs_iget5_test(struct inode *inode, void *opaque) +{ + struct cramfs_inode *cramfs_inode = opaque; + + if (inode->i_ino != CRAMINO(cramfs_inode)) + return 0; /* does not match */ + + if (inode->i_ino != 1) + return 1; + + /* all empty directories, char, block, pipe, and sock, share inode #1 */ + + if ((inode->i_mode != cramfs_inode->mode) || + (inode->i_gid != cramfs_inode->gid) || + (inode->i_uid != cramfs_inode->uid)) + return 0; /* does not match */ + + if ((S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) && + (inode->i_rdev != old_decode_dev(cramfs_inode->size))) + return 0; /* does not match */ + + return 1; /* matches */ +} + +static int cramfs_iget5_set(struct inode *inode, void *opaque) +{ + struct cramfs_inode *cramfs_inode = opaque; + inode->i_ino = CRAMINO(cramfs_inode); + return 0; +} + +static struct inode *get_cramfs_inode(struct super_block *sb, + struct cramfs_inode * cramfs_inode) { - struct inode * inode = new_inode(sb); + struct inode *inode = iget5_locked(sb, CRAMINO(cramfs_inode), + cramfs_iget5_test, cramfs_iget5_set, + cramfs_inode); static struct timespec zerotime; - if (inode) { + if (inode && (inode->i_state & I_NEW)) { inode->i_mode = cramfs_inode->mode; inode->i_uid = cramfs_inode->uid; inode->i_size = cramfs_inode->size; @@ -58,7 +93,6 @@ static struct inode *get_cramfs_inode(struct super_block *sb, struct cramfs_inod but it's the best we can do without reading the directory contents. 1 yields the right result in GNU find, even without -noleaf option. */ - insert_inode_hash(inode); if (S_ISREG(inode->i_mode)) { inode->i_fop = &generic_ro_fops; inode->i_data.a_ops = &cramfs_aops; @@ -74,6 +108,7 @@ static struct inode *get_cramfs_inode(struct super_block *sb, struct cramfs_inod init_special_inode(inode, inode->i_mode, old_decode_dev(cramfs_inode->size)); } + unlock_new_inode(inode); } return inode; } -- cgit v1.2.3-70-g09d2 From 6c231b7bab0aa6860cd9da2de8a064eddc34c146 Mon Sep 17 00:00:00 2001 From: Ravikiran G Thirumalai Date: Tue, 6 Sep 2005 15:17:45 -0700 Subject: [PATCH] Additions to .data.read_mostly section Mark variables which are usually accessed for reads with __readmostly. Signed-off-by: Alok N Kataria Signed-off-by: Shai Fultheim Signed-off-by: Ravikiran Thirumalai Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/io_apic.c | 10 +++++----- arch/i386/kernel/timers/timer_hpet.c | 2 +- arch/i386/mm/discontig.c | 8 ++++---- arch/i386/mm/init.c | 2 +- arch/x86_64/kernel/genapic.c | 2 +- arch/x86_64/kernel/io_apic.c | 10 +++++----- arch/x86_64/kernel/setup.c | 2 +- arch/x86_64/kernel/setup64.c | 2 +- arch/x86_64/kernel/smpboot.c | 10 +++++----- arch/x86_64/mm/numa.c | 6 +++--- fs/namespace.c | 2 +- mm/page_alloc.c | 4 ++-- mm/shmem.c | 2 +- 13 files changed, 31 insertions(+), 31 deletions(-) (limited to 'fs') diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index 4a594043157..0e727e6da5c 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c @@ -78,7 +78,7 @@ static struct irq_pin_list { int apic, pin, next; } irq_2_pin[PIN_MAP_SIZE]; -int vector_irq[NR_VECTORS] = { [0 ... NR_VECTORS - 1] = -1}; +int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1}; #ifdef CONFIG_PCI_MSI #define vector_to_irq(vector) \ (platform_legacy_irq(vector) ? vector : vector_irq[vector]) @@ -1119,7 +1119,7 @@ static inline int IO_APIC_irq_trigger(int irq) } /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */ -u8 irq_vector[NR_IRQ_VECTORS] = { FIRST_DEVICE_VECTOR , 0 }; +u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 }; int assign_irq_vector(int irq) { @@ -1990,7 +1990,7 @@ static void set_ioapic_affinity_vector (unsigned int vector, * edge-triggered handler, without risking IRQ storms and other ugly * races. */ -static struct hw_interrupt_type ioapic_edge_type = { +static struct hw_interrupt_type ioapic_edge_type __read_mostly = { .typename = "IO-APIC-edge", .startup = startup_edge_ioapic, .shutdown = shutdown_edge_ioapic, @@ -2003,7 +2003,7 @@ static struct hw_interrupt_type ioapic_edge_type = { #endif }; -static struct hw_interrupt_type ioapic_level_type = { +static struct hw_interrupt_type ioapic_level_type __read_mostly = { .typename = "IO-APIC-level", .startup = startup_level_ioapic, .shutdown = shutdown_level_ioapic, @@ -2076,7 +2076,7 @@ static void ack_lapic_irq (unsigned int irq) static void end_lapic_irq (unsigned int i) { /* nothing */ } -static struct hw_interrupt_type lapic_irq_type = { +static struct hw_interrupt_type lapic_irq_type __read_mostly = { .typename = "local-APIC-edge", .startup = NULL, /* startup_irq() not used for IRQ0 */ .shutdown = NULL, /* shutdown_irq() not used for IRQ0 */ diff --git a/arch/i386/kernel/timers/timer_hpet.c b/arch/i386/kernel/timers/timer_hpet.c index 6dbb29f834e..d973a8b681f 100644 --- a/arch/i386/kernel/timers/timer_hpet.c +++ b/arch/i386/kernel/timers/timer_hpet.c @@ -19,7 +19,7 @@ #include static unsigned long hpet_usec_quotient __read_mostly; /* convert hpet clks to usec */ -static unsigned long tsc_hpet_quotient; /* convert tsc to hpet clks */ +static unsigned long tsc_hpet_quotient __read_mostly; /* convert tsc to hpet clks */ static unsigned long hpet_last; /* hpet counter value at last tick*/ static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */ static unsigned long last_tsc_high; /* msb 32 bits of Time Stamp Counter */ diff --git a/arch/i386/mm/discontig.c b/arch/i386/mm/discontig.c index 6711ce3f691..244d8ec66be 100644 --- a/arch/i386/mm/discontig.c +++ b/arch/i386/mm/discontig.c @@ -37,7 +37,7 @@ #include #include -struct pglist_data *node_data[MAX_NUMNODES]; +struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; EXPORT_SYMBOL(node_data); bootmem_data_t node0_bdata; @@ -49,8 +49,8 @@ bootmem_data_t node0_bdata; * 2) node_start_pfn - the starting page frame number for a node * 3) node_end_pfn - the ending page fram number for a node */ -unsigned long node_start_pfn[MAX_NUMNODES]; -unsigned long node_end_pfn[MAX_NUMNODES]; +unsigned long node_start_pfn[MAX_NUMNODES] __read_mostly; +unsigned long node_end_pfn[MAX_NUMNODES] __read_mostly; #ifdef CONFIG_DISCONTIGMEM @@ -66,7 +66,7 @@ unsigned long node_end_pfn[MAX_NUMNODES]; * physnode_map[4-7] = 1; * physnode_map[8- ] = -1; */ -s8 physnode_map[MAX_ELEMENTS] = { [0 ... (MAX_ELEMENTS - 1)] = -1}; +s8 physnode_map[MAX_ELEMENTS] __read_mostly = { [0 ... (MAX_ELEMENTS - 1)] = -1}; EXPORT_SYMBOL(physnode_map); void memory_present(int nid, unsigned long start, unsigned long end) diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c index 9edfc058b89..2ebaf75f732 100644 --- a/arch/i386/mm/init.c +++ b/arch/i386/mm/init.c @@ -393,7 +393,7 @@ void zap_low_mappings (void) } static int disable_nx __initdata = 0; -u64 __supported_pte_mask = ~_PAGE_NX; +u64 __supported_pte_mask __read_mostly = ~_PAGE_NX; /* * noexec = on|off diff --git a/arch/x86_64/kernel/genapic.c b/arch/x86_64/kernel/genapic.c index 30c843a5efd..f062aa03bab 100644 --- a/arch/x86_64/kernel/genapic.c +++ b/arch/x86_64/kernel/genapic.c @@ -25,7 +25,7 @@ #endif /* which logical CPU number maps to which CPU (physical APIC ID) */ -u8 x86_cpu_to_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; +u8 x86_cpu_to_apicid[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; EXPORT_SYMBOL(x86_cpu_to_apicid); u8 x86_cpu_to_log_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c index 76bcc4e6979..ba1a744e9bf 100644 --- a/arch/x86_64/kernel/io_apic.c +++ b/arch/x86_64/kernel/io_apic.c @@ -70,7 +70,7 @@ static struct irq_pin_list { short apic, pin, next; } irq_2_pin[PIN_MAP_SIZE]; -int vector_irq[NR_VECTORS] = { [0 ... NR_VECTORS - 1] = -1}; +int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1}; #ifdef CONFIG_PCI_MSI #define vector_to_irq(vector) \ (platform_legacy_irq(vector) ? vector : vector_irq[vector]) @@ -683,7 +683,7 @@ static inline int IO_APIC_irq_trigger(int irq) } /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */ -u8 irq_vector[NR_IRQ_VECTORS] = { FIRST_DEVICE_VECTOR , 0 }; +u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 }; int assign_irq_vector(int irq) { @@ -1443,7 +1443,7 @@ static void set_ioapic_affinity_vector (unsigned int vector, * races. */ -static struct hw_interrupt_type ioapic_edge_type = { +static struct hw_interrupt_type ioapic_edge_type __read_mostly = { .typename = "IO-APIC-edge", .startup = startup_edge_ioapic, .shutdown = shutdown_edge_ioapic, @@ -1456,7 +1456,7 @@ static struct hw_interrupt_type ioapic_edge_type = { #endif }; -static struct hw_interrupt_type ioapic_level_type = { +static struct hw_interrupt_type ioapic_level_type __read_mostly = { .typename = "IO-APIC-level", .startup = startup_level_ioapic, .shutdown = shutdown_level_ioapic, @@ -1529,7 +1529,7 @@ static void ack_lapic_irq (unsigned int irq) static void end_lapic_irq (unsigned int i) { /* nothing */ } -static struct hw_interrupt_type lapic_irq_type = { +static struct hw_interrupt_type lapic_irq_type __read_mostly = { .typename = "local-APIC-edge", .startup = NULL, /* startup_irq() not used for IRQ0 */ .shutdown = NULL, /* shutdown_irq() not used for IRQ0 */ diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 116a491e296..b356f8e6adf 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -65,7 +65,7 @@ * Machine setup.. */ -struct cpuinfo_x86 boot_cpu_data; +struct cpuinfo_x86 boot_cpu_data __read_mostly; unsigned long mmu_cr4_features; diff --git a/arch/x86_64/kernel/setup64.c b/arch/x86_64/kernel/setup64.c index 34082c1cc41..e3ffcacc8c9 100644 --- a/arch/x86_64/kernel/setup64.c +++ b/arch/x86_64/kernel/setup64.c @@ -36,7 +36,7 @@ struct desc_ptr idt_descr = { 256 * 16, (unsigned long) idt_table }; char boot_cpu_stack[IRQSTACKSIZE] __attribute__((section(".bss.page_aligned"))); -unsigned long __supported_pte_mask = ~0UL; +unsigned long __supported_pte_mask __read_mostly = ~0UL; static int do_not_nx __initdata = 0; /* noexec=on|off diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index fa25e39fe54..90aeccd1519 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c @@ -62,13 +62,13 @@ /* Number of siblings per CPU package */ int smp_num_siblings = 1; /* Package ID of each logical CPU */ -u8 phys_proc_id[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; -u8 cpu_core_id[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; +u8 phys_proc_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; +u8 cpu_core_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; EXPORT_SYMBOL(phys_proc_id); EXPORT_SYMBOL(cpu_core_id); /* Bitmask of currently online CPUs */ -cpumask_t cpu_online_map; +cpumask_t cpu_online_map __read_mostly; EXPORT_SYMBOL(cpu_online_map); @@ -88,8 +88,8 @@ struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; /* Set when the idlers are all forked */ int smp_threads_ready; -cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned; -cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned; +cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; +cpumask_t cpu_core_map[NR_CPUS] __read_mostly; EXPORT_SYMBOL(cpu_core_map); /* diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c index 6a156f5692a..04f7a33e144 100644 --- a/arch/x86_64/mm/numa.c +++ b/arch/x86_64/mm/numa.c @@ -22,14 +22,14 @@ #define Dprintk(x...) #endif -struct pglist_data *node_data[MAX_NUMNODES]; +struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; bootmem_data_t plat_node_bdata[MAX_NUMNODES]; int memnode_shift; u8 memnodemap[NODEMAPSIZE]; -unsigned char cpu_to_node[NR_CPUS] = { [0 ... NR_CPUS-1] = NUMA_NO_NODE }; -cpumask_t node_to_cpumask[MAX_NUMNODES]; +unsigned char cpu_to_node[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = NUMA_NO_NODE }; +cpumask_t node_to_cpumask[MAX_NUMNODES] __read_mostly; int numa_off __initdata; diff --git a/fs/namespace.c b/fs/namespace.c index 79bd8a46e1e..29b70669435 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -40,7 +40,7 @@ static inline int sysfs_init(void) __cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock); static struct list_head *mount_hashtable; -static int hash_mask, hash_bits; +static int hash_mask __read_mostly, hash_bits __read_mostly; static kmem_cache_t *mnt_cache; static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 34bba8f1144..14d7032c1d1 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -47,8 +47,8 @@ EXPORT_SYMBOL(node_online_map); nodemask_t node_possible_map __read_mostly = NODE_MASK_ALL; EXPORT_SYMBOL(node_possible_map); struct pglist_data *pgdat_list __read_mostly; -unsigned long totalram_pages; -unsigned long totalhigh_pages; +unsigned long totalram_pages __read_mostly; +unsigned long totalhigh_pages __read_mostly; long nr_swap_pages; /* diff --git a/mm/shmem.c b/mm/shmem.c index bdc4bbb6ddb..db2c9e8d990 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -180,7 +180,7 @@ static struct inode_operations shmem_inode_operations; static struct inode_operations shmem_dir_inode_operations; static struct vm_operations_struct shmem_vm_ops; -static struct backing_dev_info shmem_backing_dev_info = { +static struct backing_dev_info shmem_backing_dev_info __read_mostly = { .ra_pages = 0, /* No readahead */ .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, .unplug_io_fn = default_unplug_io_fn, -- cgit v1.2.3-70-g09d2 From 5e5d7a22292613e55da8e91d75bcc062fd861f41 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Tue, 6 Sep 2005 15:17:48 -0700 Subject: [PATCH] pipe: remove redundant fifo_poll abstraction Remove a redundant fifo_poll() abstraction from fs/pipe.c and adds a big fat comment stating we set POLLERR for FIFOs too on Linux unlike most Unices. Signed-off-by: Pekka Enberg Cc: Manfred Spraul Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/pipe.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/pipe.c b/fs/pipe.c index 25aa09f9d09..2c7a23dde2d 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -415,6 +415,10 @@ pipe_poll(struct file *filp, poll_table *wait) if (filp->f_mode & FMODE_WRITE) { mask |= (nrbufs < PIPE_BUFFERS) ? POLLOUT | POLLWRNORM : 0; + /* + * Most Unices do not set POLLERR for FIFOs but on Linux they + * behave exactly like pipes for poll(). + */ if (!PIPE_READERS(*inode)) mask |= POLLERR; } @@ -422,9 +426,6 @@ pipe_poll(struct file *filp, poll_table *wait) return mask; } -/* FIXME: most Unices do not set POLLERR for fifos */ -#define fifo_poll pipe_poll - static int pipe_release(struct inode *inode, int decr, int decw) { @@ -568,7 +569,7 @@ struct file_operations read_fifo_fops = { .read = pipe_read, .readv = pipe_readv, .write = bad_pipe_w, - .poll = fifo_poll, + .poll = pipe_poll, .ioctl = pipe_ioctl, .open = pipe_read_open, .release = pipe_read_release, @@ -580,7 +581,7 @@ struct file_operations write_fifo_fops = { .read = bad_pipe_r, .write = pipe_write, .writev = pipe_writev, - .poll = fifo_poll, + .poll = pipe_poll, .ioctl = pipe_ioctl, .open = pipe_write_open, .release = pipe_write_release, @@ -593,7 +594,7 @@ struct file_operations rdwr_fifo_fops = { .readv = pipe_readv, .write = pipe_write, .writev = pipe_writev, - .poll = fifo_poll, + .poll = pipe_poll, .ioctl = pipe_ioctl, .open = pipe_rdwr_open, .release = pipe_rdwr_release, -- cgit v1.2.3-70-g09d2 From 82a25b5670eef736a20613f8b93fe55ecb5ca4bc Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Tue, 6 Sep 2005 15:17:51 -0700 Subject: [PATCH] remove verify_area(): remove fs/umsdos/notes as it only contain a verify_area related note The file `fs/umsdos/notes' contains only a small note about a possible bug involving verify_area(). Since umsdos is no longer in the kernel and verify_area() is also gone, it seems to make sense that this file goes the way of the Dodo. After applying this patch the `fs/umsdos/' directory will be empty and can be removed entirely. Signed-off-by: Jesper Juhl Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/umsdos/notes | 17 ----------------- 1 file changed, 17 deletions(-) delete mode 100644 fs/umsdos/notes (limited to 'fs') diff --git a/fs/umsdos/notes b/fs/umsdos/notes deleted file mode 100644 index 3c47d1f4fc4..00000000000 --- a/fs/umsdos/notes +++ /dev/null @@ -1,17 +0,0 @@ -This file contain idea and things I don't want to forget - -Possible bug in fs/read_write.c -Function sys_readdir() - - There is a call the verify_area that does not take in account - the count parameter. I guess it should read - - error = verify_area(VERIFY_WRITE, dirent, count*sizeof (*dirent)); - - instead of - - error = verify_area(VERIFY_WRITE, dirent, sizeof (*dirent)); - - Of course, now , count is always 1 - - -- cgit v1.2.3-70-g09d2 From 7ea6040b0eff07d3a9a4e2d248ac137c6ad02d42 Mon Sep 17 00:00:00 2001 From: John McCutchan Date: Tue, 6 Sep 2005 15:18:02 -0700 Subject: [PATCH] inotify: fix event loss on hardlinked files People have run into a problem when they do this: watch (file1, all_events); watch (file2, some_events); if file2 is a hard link to file1, some events will be missed because by default we replace the mask. The patch below adds a flag IN_MASK_ADD which will cause inotify to add to the existing mask if present. Signed-off-by: John McCutchan Signed-off-by: Robert Love Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/inotify.c | 9 ++++++++- include/linux/inotify.h | 1 + 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/inotify.c b/fs/inotify.c index 2fd97ef547f..a37e9fb1da5 100644 --- a/fs/inotify.c +++ b/fs/inotify.c @@ -931,6 +931,7 @@ asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask) struct nameidata nd; struct file *filp; int ret, fput_needed; + int mask_add = 0; filp = fget_light(fd, &fput_needed); if (unlikely(!filp)) @@ -953,6 +954,9 @@ asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask) down(&inode->inotify_sem); down(&dev->sem); + if (mask & IN_MASK_ADD) + mask_add = 1; + /* don't let user-space set invalid bits: we don't want flags set */ mask &= IN_ALL_EVENTS; if (unlikely(!mask)) { @@ -966,7 +970,10 @@ asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask) */ old = inode_find_dev(inode, dev); if (unlikely(old)) { - old->mask = mask; + if (mask_add) + old->mask |= mask; + else + old->mask = mask; ret = old->wd; goto out; } diff --git a/include/linux/inotify.h b/include/linux/inotify.h index 93bb3afe646..ee5b239092e 100644 --- a/include/linux/inotify.h +++ b/include/linux/inotify.h @@ -47,6 +47,7 @@ struct inotify_event { #define IN_MOVE (IN_MOVED_FROM | IN_MOVED_TO) /* moves */ /* special flags */ +#define IN_MASK_ADD 0x20000000 /* add to the mask of an already existing watch */ #define IN_ISDIR 0x40000000 /* event occurred against dir */ #define IN_ONESHOT 0x80000000 /* only send event once */ -- cgit v1.2.3-70-g09d2 From 736c7b808f38f3bb72941345e11e236ec65dec3d Mon Sep 17 00:00:00 2001 From: Coywolf Qi Hunt Date: Tue, 6 Sep 2005 15:18:17 -0700 Subject: [PATCH] alloc_buffer_head() and free_buffer_head() cleanup Signed-off-by: Coywolf Qi Hunt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/buffer.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/buffer.c b/fs/buffer.c index 6a25d7df89b..a92b8140355 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -3046,10 +3046,9 @@ struct buffer_head *alloc_buffer_head(unsigned int __nocast gfp_flags) { struct buffer_head *ret = kmem_cache_alloc(bh_cachep, gfp_flags); if (ret) { - preempt_disable(); - __get_cpu_var(bh_accounting).nr++; + get_cpu_var(bh_accounting).nr++; recalc_bh_state(); - preempt_enable(); + put_cpu_var(bh_accounting); } return ret; } @@ -3059,10 +3058,9 @@ void free_buffer_head(struct buffer_head *bh) { BUG_ON(!list_empty(&bh->b_assoc_buffers)); kmem_cache_free(bh_cachep, bh); - preempt_disable(); - __get_cpu_var(bh_accounting).nr--; + get_cpu_var(bh_accounting).nr--; recalc_bh_state(); - preempt_enable(); + put_cpu_var(bh_accounting); } EXPORT_SYMBOL(free_buffer_head); -- cgit v1.2.3-70-g09d2 From e89bbd3a0b3c054d9a94feb0db7bbae1cdb99e54 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 6 Sep 2005 15:18:21 -0700 Subject: [PATCH] remove iattr.ia_attr_flags Remove unused ia_attr_flags from struct iattr, and related defines. Signed-off-by: Miklos Szeredi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hostfs/hostfs.h | 1 - include/linux/fs.h | 10 ---------- 2 files changed, 11 deletions(-) (limited to 'fs') diff --git a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h index 67bca0d4a33..cca3fb693f9 100644 --- a/fs/hostfs/hostfs.h +++ b/fs/hostfs/hostfs.h @@ -49,7 +49,6 @@ struct hostfs_iattr { struct timespec ia_atime; struct timespec ia_mtime; struct timespec ia_ctime; - unsigned int ia_attr_flags; }; extern int stat_file(const char *path, unsigned long long *inode_out, diff --git a/include/linux/fs.h b/include/linux/fs.h index 2036747c7d1..57e294bf83f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -281,18 +281,8 @@ struct iattr { struct timespec ia_atime; struct timespec ia_mtime; struct timespec ia_ctime; - unsigned int ia_attr_flags; }; -/* - * This is the inode attributes flag definitions - */ -#define ATTR_FLAG_SYNCRONOUS 1 /* Syncronous write */ -#define ATTR_FLAG_NOATIME 2 /* Don't update atime */ -#define ATTR_FLAG_APPEND 4 /* Append-only file */ -#define ATTR_FLAG_IMMUTABLE 8 /* Immutable file */ -#define ATTR_FLAG_NODIRATIME 16 /* Don't update atime for directory */ - /* * Includes for diskquotas. */ -- cgit v1.2.3-70-g09d2 From 09dd17d3e5e43ea6d3f3a12829108c4ca13ff810 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 6 Sep 2005 15:18:21 -0700 Subject: [PATCH] namei cleanup Extract common code into inline functions to make reading easier. Signed-off-by: Miklos Szeredi Cc: Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/namei.c | 52 ++++++++++++++++++++++++---------------------------- 1 file changed, 24 insertions(+), 28 deletions(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index 6ec1f0fefc5..145e852c4bd 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -525,6 +525,22 @@ static inline int __do_follow_link(struct path *path, struct nameidata *nd) return error; } +static inline void dput_path(struct path *path, struct nameidata *nd) +{ + dput(path->dentry); + if (path->mnt != nd->mnt) + mntput(path->mnt); +} + +static inline void path_to_nameidata(struct path *path, struct nameidata *nd) +{ + dput(nd->dentry); + if (nd->mnt != path->mnt) + mntput(nd->mnt); + nd->mnt = path->mnt; + nd->dentry = path->dentry; +} + /* * This limits recursive symlink follows to 8, while * limiting consecutive symlinks to 40. @@ -552,9 +568,7 @@ static inline int do_follow_link(struct path *path, struct nameidata *nd) nd->depth--; return err; loop: - dput(path->dentry); - if (path->mnt != nd->mnt) - mntput(path->mnt); + dput_path(path, nd); path_release(nd); return err; } @@ -813,13 +827,8 @@ static fastcall int __link_path_walk(const char * name, struct nameidata *nd) err = -ENOTDIR; if (!inode->i_op) break; - } else { - dput(nd->dentry); - if (nd->mnt != next.mnt) - mntput(nd->mnt); - nd->mnt = next.mnt; - nd->dentry = next.dentry; - } + } else + path_to_nameidata(&next, nd); err = -ENOTDIR; if (!inode->i_op->lookup) break; @@ -859,13 +868,8 @@ last_component: if (err) goto return_err; inode = nd->dentry->d_inode; - } else { - dput(nd->dentry); - if (nd->mnt != next.mnt) - mntput(nd->mnt); - nd->mnt = next.mnt; - nd->dentry = next.dentry; - } + } else + path_to_nameidata(&next, nd); err = -ENOENT; if (!inode) break; @@ -901,9 +905,7 @@ return_reval: return_base: return 0; out_dput: - dput(next.dentry); - if (nd->mnt != next.mnt) - mntput(next.mnt); + dput_path(&next, nd); break; } path_release(nd); @@ -1507,11 +1509,7 @@ do_last: if (path.dentry->d_inode->i_op && path.dentry->d_inode->i_op->follow_link) goto do_link; - dput(nd->dentry); - nd->dentry = path.dentry; - if (nd->mnt != path.mnt) - mntput(nd->mnt); - nd->mnt = path.mnt; + path_to_nameidata(&path, nd); error = -EISDIR; if (path.dentry->d_inode && S_ISDIR(path.dentry->d_inode->i_mode)) goto exit; @@ -1522,9 +1520,7 @@ ok: return 0; exit_dput: - dput(path.dentry); - if (nd->mnt != path.mnt) - mntput(path.mnt); + dput_path(&path, nd); exit: path_release(nd); return error; -- cgit v1.2.3-70-g09d2 From 0494f6ec5d3a015d53b57e37280b93c19446676a Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 6 Sep 2005 15:18:22 -0700 Subject: [PATCH] use get_fs_struct() in proc This patch cleans up proc_cwd_link() and proc_root_link() by factoring out common code into get_fs_struct(). Signed-off-by: Miklos Szeredi Cc: Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/base.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/proc/base.c b/fs/proc/base.c index 520978e49e9..73b1e94171b 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -297,15 +297,21 @@ static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsm return -ENOENT; } -static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) +static struct fs_struct *get_fs_struct(struct task_struct *task) { struct fs_struct *fs; - int result = -ENOENT; - task_lock(proc_task(inode)); - fs = proc_task(inode)->fs; + task_lock(task); + fs = task->fs; if(fs) atomic_inc(&fs->count); - task_unlock(proc_task(inode)); + task_unlock(task); + return fs; +} + +static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) +{ + struct fs_struct *fs = get_fs_struct(proc_task(inode)); + int result = -ENOENT; if (fs) { read_lock(&fs->lock); *mnt = mntget(fs->pwdmnt); @@ -319,13 +325,8 @@ static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfs static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) { - struct fs_struct *fs; + struct fs_struct *fs = get_fs_struct(proc_task(inode)); int result = -ENOENT; - task_lock(proc_task(inode)); - fs = proc_task(inode)->fs; - if(fs) - atomic_inc(&fs->count); - task_unlock(proc_task(inode)); if (fs) { read_lock(&fs->lock); *mnt = mntget(fs->rootmnt); -- cgit v1.2.3-70-g09d2 From 5e21ccb136047e556acf0fdf227cab5db05c1c25 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 6 Sep 2005 15:18:23 -0700 Subject: [PATCH] fix enum pid_directory_inos in proc/base.c This patch fixes wrongly placed elements in the pid_directory_inos enum. Also add comment so this mistake is not repeated. Signed-off-by: Miklos Szeredi Cc: Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/base.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/proc/base.c b/fs/proc/base.c index 73b1e94171b..24eed139e54 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -119,7 +119,6 @@ enum pid_directory_inos { #ifdef CONFIG_AUDITSYSCALL PROC_TGID_LOGINUID, #endif - PROC_TGID_FD_DIR, PROC_TGID_OOM_SCORE, PROC_TGID_OOM_ADJUST, PROC_TID_INO, @@ -158,9 +157,11 @@ enum pid_directory_inos { #ifdef CONFIG_AUDITSYSCALL PROC_TID_LOGINUID, #endif - PROC_TID_FD_DIR = 0x8000, /* 0x8000-0xffff */ PROC_TID_OOM_SCORE, PROC_TID_OOM_ADJUST, + + /* Add new entries before this */ + PROC_TID_FD_DIR = 0x8000, /* 0x8000-0xffff */ }; struct pid_entry { -- cgit v1.2.3-70-g09d2 From ab8d11beb46f0bd0617e04205c01f5c1fe845b61 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 6 Sep 2005 15:18:24 -0700 Subject: [PATCH] remove duplicated code from proc and ptrace Extract common code used by ptrace_attach() and may_ptrace_attach() into a separate function. Signed-off-by: Miklos Szeredi Cc: Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/base.c | 35 ++++------------------------------- include/linux/ptrace.h | 1 + kernel/ptrace.c | 41 ++++++++++++++++++++++++++++------------- 3 files changed, 33 insertions(+), 44 deletions(-) (limited to 'fs') diff --git a/fs/proc/base.c b/fs/proc/base.c index 24eed139e54..84751f3f52d 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -346,33 +346,6 @@ static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vf (task->state == TASK_STOPPED || task->state == TASK_TRACED) && \ security_ptrace(current,task) == 0)) -static int may_ptrace_attach(struct task_struct *task) -{ - int retval = 0; - - task_lock(task); - - if (!task->mm) - goto out; - if (((current->uid != task->euid) || - (current->uid != task->suid) || - (current->uid != task->uid) || - (current->gid != task->egid) || - (current->gid != task->sgid) || - (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE)) - goto out; - rmb(); - if (task->mm->dumpable != 1 && !capable(CAP_SYS_PTRACE)) - goto out; - if (security_ptrace(current, task)) - goto out; - - retval = 1; -out: - task_unlock(task); - return retval; -} - static int proc_pid_environ(struct task_struct *task, char * buffer) { int res = 0; @@ -382,7 +355,7 @@ static int proc_pid_environ(struct task_struct *task, char * buffer) if (len > PAGE_SIZE) len = PAGE_SIZE; res = access_process_vm(task, mm->env_start, buffer, len, 0); - if (!may_ptrace_attach(task)) + if (!ptrace_may_attach(task)) res = -ESRCH; mmput(mm); } @@ -685,7 +658,7 @@ static ssize_t mem_read(struct file * file, char __user * buf, int ret = -ESRCH; struct mm_struct *mm; - if (!MAY_PTRACE(task) || !may_ptrace_attach(task)) + if (!MAY_PTRACE(task) || !ptrace_may_attach(task)) goto out; ret = -ENOMEM; @@ -711,7 +684,7 @@ static ssize_t mem_read(struct file * file, char __user * buf, this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; retval = access_process_vm(task, src, page, this_len, 0); - if (!retval || !MAY_PTRACE(task) || !may_ptrace_attach(task)) { + if (!retval || !MAY_PTRACE(task) || !ptrace_may_attach(task)) { if (!ret) ret = -EIO; break; @@ -749,7 +722,7 @@ static ssize_t mem_write(struct file * file, const char * buf, struct task_struct *task = proc_task(file->f_dentry->d_inode); unsigned long dst = *ppos; - if (!MAY_PTRACE(task) || !may_ptrace_attach(task)) + if (!MAY_PTRACE(task) || !ptrace_may_attach(task)) return -ESRCH; page = (char *)__get_free_page(GFP_USER); diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 2afdafb6212..dc6f3647bfb 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -90,6 +90,7 @@ extern void __ptrace_link(struct task_struct *child, struct task_struct *new_parent); extern void __ptrace_unlink(struct task_struct *child); extern void ptrace_untrace(struct task_struct *child); +extern int ptrace_may_attach(struct task_struct *task); static inline void ptrace_link(struct task_struct *child, struct task_struct *new_parent) diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 8dcb8f6288b..019e04ec065 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -118,6 +118,33 @@ int ptrace_check_attach(struct task_struct *child, int kill) return ret; } +static int may_attach(struct task_struct *task) +{ + if (!task->mm) + return -EPERM; + if (((current->uid != task->euid) || + (current->uid != task->suid) || + (current->uid != task->uid) || + (current->gid != task->egid) || + (current->gid != task->sgid) || + (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE)) + return -EPERM; + smp_rmb(); + if (!task->mm->dumpable && !capable(CAP_SYS_PTRACE)) + return -EPERM; + + return security_ptrace(current, task); +} + +int ptrace_may_attach(struct task_struct *task) +{ + int err; + task_lock(task); + err = may_attach(task); + task_unlock(task); + return !err; +} + int ptrace_attach(struct task_struct *task) { int retval; @@ -127,22 +154,10 @@ int ptrace_attach(struct task_struct *task) goto bad; if (task == current) goto bad; - if (!task->mm) - goto bad; - if(((current->uid != task->euid) || - (current->uid != task->suid) || - (current->uid != task->uid) || - (current->gid != task->egid) || - (current->gid != task->sgid) || - (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE)) - goto bad; - smp_rmb(); - if (!task->mm->dumpable && !capable(CAP_SYS_PTRACE)) - goto bad; /* the same process cannot be attached many times */ if (task->ptrace & PT_PTRACED) goto bad; - retval = security_ptrace(current, task); + retval = may_attach(task); if (retval) goto bad; -- cgit v1.2.3-70-g09d2 From e922efc342d565a38eed3af377ff403f52148864 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 6 Sep 2005 15:18:25 -0700 Subject: [PATCH] remove duplicated sys_open32() code from 64bit archs 64 bit architectures all implement their own compatibility sys_open(), when in fact the difference is simply not forcing the O_LARGEFILE flag. So use the a common function instead. Signed-off-by: Miklos Szeredi Cc: Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/ia32/ia32_entry.S | 2 +- arch/ia64/ia32/sys_ia32.c | 31 ------------------------------- arch/ppc64/kernel/misc.S | 2 +- arch/ppc64/kernel/sys_ppc32.c | 31 ------------------------------- arch/sparc64/kernel/sys_sparc32.c | 24 +----------------------- arch/x86_64/ia32/ia32entry.S | 2 +- arch/x86_64/ia32/sys_ia32.c | 26 -------------------------- fs/compat.c | 10 ++++++++++ fs/open.c | 19 +++++++++++-------- include/linux/fs.h | 1 + 10 files changed, 26 insertions(+), 122 deletions(-) (limited to 'fs') diff --git a/arch/ia64/ia32/ia32_entry.S b/arch/ia64/ia32/ia32_entry.S index 829a6d80711..0708edb06cc 100644 --- a/arch/ia64/ia32/ia32_entry.S +++ b/arch/ia64/ia32/ia32_entry.S @@ -215,7 +215,7 @@ ia32_syscall_table: data8 sys32_fork data8 sys_read data8 sys_write - data8 sys32_open /* 5 */ + data8 compat_sys_open /* 5 */ data8 sys_close data8 sys32_waitpid data8 sys_creat diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c index c1e20d65dd6..e29a8a55486 100644 --- a/arch/ia64/ia32/sys_ia32.c +++ b/arch/ia64/ia32/sys_ia32.c @@ -2359,37 +2359,6 @@ sys32_brk (unsigned int brk) return ret; } -/* - * Exactly like fs/open.c:sys_open(), except that it doesn't set the O_LARGEFILE flag. - */ -asmlinkage long -sys32_open (const char __user * filename, int flags, int mode) -{ - char * tmp; - int fd, error; - - tmp = getname(filename); - fd = PTR_ERR(tmp); - if (!IS_ERR(tmp)) { - fd = get_unused_fd(); - if (fd >= 0) { - struct file *f = filp_open(tmp, flags, mode); - error = PTR_ERR(f); - if (IS_ERR(f)) - goto out_error; - fd_install(fd, f); - } -out: - putname(tmp); - } - return fd; - -out_error: - put_unused_fd(fd); - fd = error; - goto out; -} - /* Structure for ia32 emulation on ia64 */ struct epoll_event32 { diff --git a/arch/ppc64/kernel/misc.S b/arch/ppc64/kernel/misc.S index 474df0a862b..2164bd7b4ef 100644 --- a/arch/ppc64/kernel/misc.S +++ b/arch/ppc64/kernel/misc.S @@ -957,7 +957,7 @@ _GLOBAL(sys_call_table32) .llong .ppc_fork .llong .sys_read .llong .sys_write - .llong .sys32_open /* 5 */ + .llong .compat_sys_open /* 5 */ .llong .sys_close .llong .sys32_waitpid .llong .sys32_creat diff --git a/arch/ppc64/kernel/sys_ppc32.c b/arch/ppc64/kernel/sys_ppc32.c index 206619080e6..214914a95a5 100644 --- a/arch/ppc64/kernel/sys_ppc32.c +++ b/arch/ppc64/kernel/sys_ppc32.c @@ -867,37 +867,6 @@ off_t ppc32_lseek(unsigned int fd, u32 offset, unsigned int origin) return sys_lseek(fd, (int)offset, origin); } -/* - * This is just a version for 32-bit applications which does - * not force O_LARGEFILE on. - */ -asmlinkage long sys32_open(const char __user * filename, int flags, int mode) -{ - char * tmp; - int fd, error; - - tmp = getname(filename); - fd = PTR_ERR(tmp); - if (!IS_ERR(tmp)) { - fd = get_unused_fd(); - if (fd >= 0) { - struct file * f = filp_open(tmp, flags, mode); - error = PTR_ERR(f); - if (IS_ERR(f)) - goto out_error; - fd_install(fd, f); - } -out: - putname(tmp); - } - return fd; - -out_error: - put_unused_fd(fd); - fd = error; - goto out; -} - /* Note: it is necessary to treat bufsiz as an unsigned int, * with the corresponding cast to a signed int to insure that the * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) diff --git a/arch/sparc64/kernel/sys_sparc32.c b/arch/sparc64/kernel/sys_sparc32.c index 1d3aa588df8..7f6239ed252 100644 --- a/arch/sparc64/kernel/sys_sparc32.c +++ b/arch/sparc64/kernel/sys_sparc32.c @@ -1002,29 +1002,7 @@ asmlinkage long sys32_adjtimex(struct timex32 __user *utp) asmlinkage long sparc32_open(const char __user *filename, int flags, int mode) { - char * tmp; - int fd, error; - - tmp = getname(filename); - fd = PTR_ERR(tmp); - if (!IS_ERR(tmp)) { - fd = get_unused_fd(); - if (fd >= 0) { - struct file * f = filp_open(tmp, flags, mode); - error = PTR_ERR(f); - if (IS_ERR(f)) - goto out_error; - fd_install(fd, f); - } -out: - putname(tmp); - } - return fd; - -out_error: - put_unused_fd(fd); - fd = error; - goto out; + return do_sys_open(filename, flags, mode); } extern unsigned long do_mremap(unsigned long addr, diff --git a/arch/x86_64/ia32/ia32entry.S b/arch/x86_64/ia32/ia32entry.S index c45d6a05b98..f174083d556 100644 --- a/arch/x86_64/ia32/ia32entry.S +++ b/arch/x86_64/ia32/ia32entry.S @@ -307,7 +307,7 @@ ia32_sys_call_table: .quad stub32_fork .quad sys_read .quad sys_write - .quad sys32_open /* 5 */ + .quad compat_sys_open /* 5 */ .quad sys_close .quad sys32_waitpid .quad sys_creat diff --git a/arch/x86_64/ia32/sys_ia32.c b/arch/x86_64/ia32/sys_ia32.c index be996d1b691..04d80406ce4 100644 --- a/arch/x86_64/ia32/sys_ia32.c +++ b/arch/x86_64/ia32/sys_ia32.c @@ -969,32 +969,6 @@ long sys32_kill(int pid, int sig) return sys_kill(pid, sig); } -asmlinkage long sys32_open(const char __user * filename, int flags, int mode) -{ - char * tmp; - int fd, error; - - /* don't force O_LARGEFILE */ - tmp = getname(filename); - fd = PTR_ERR(tmp); - if (!IS_ERR(tmp)) { - fd = get_unused_fd(); - if (fd >= 0) { - struct file *f = filp_open(tmp, flags, mode); - error = PTR_ERR(f); - if (IS_ERR(f)) { - put_unused_fd(fd); - fd = error; - } else { - fsnotify_open(f->f_dentry); - fd_install(fd, f); - } - } - putname(tmp); - } - return fd; -} - extern asmlinkage long sys_timer_create(clockid_t which_clock, struct sigevent __user *timer_event_spec, diff --git a/fs/compat.c b/fs/compat.c index 2eb03c49b07..8c665705c6a 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -1274,6 +1274,16 @@ out: return ret; } +/* + * Exactly like fs/open.c:sys_open(), except that it doesn't set the + * O_LARGEFILE flag. + */ +asmlinkage long +compat_sys_open(const char __user *filename, int flags, int mode) +{ + return do_sys_open(filename, flags, mode); +} + /* * compat_count() counts the number of arguments/envelopes. It is basically * a copy of count() from fs/exec.c, except that it works with 32 bit argv diff --git a/fs/open.c b/fs/open.c index 32bf05e2996..4ee2dcc31c2 100644 --- a/fs/open.c +++ b/fs/open.c @@ -933,16 +933,11 @@ void fastcall fd_install(unsigned int fd, struct file * file) EXPORT_SYMBOL(fd_install); -asmlinkage long sys_open(const char __user * filename, int flags, int mode) +long do_sys_open(const char __user *filename, int flags, int mode) { - char * tmp; - int fd; + char *tmp = getname(filename); + int fd = PTR_ERR(tmp); - if (force_o_largefile()) - flags |= O_LARGEFILE; - - tmp = getname(filename); - fd = PTR_ERR(tmp); if (!IS_ERR(tmp)) { fd = get_unused_fd(); if (fd >= 0) { @@ -959,6 +954,14 @@ asmlinkage long sys_open(const char __user * filename, int flags, int mode) } return fd; } + +asmlinkage long sys_open(const char __user *filename, int flags, int mode) +{ + if (force_o_largefile()) + flags |= O_LARGEFILE; + + return do_sys_open(filename, flags, mode); +} EXPORT_SYMBOL_GPL(sys_open); #ifndef __alpha__ diff --git a/include/linux/fs.h b/include/linux/fs.h index 57e294bf83f..7e1b589842a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1280,6 +1280,7 @@ static inline int break_lease(struct inode *inode, unsigned int mode) /* fs/open.c */ extern int do_truncate(struct dentry *, loff_t start); +extern long do_sys_open(const char __user *filename, int flags, int mode); extern struct file *filp_open(const char *, int, int); extern struct file * dentry_open(struct dentry *, struct vfsmount *, int); extern int filp_close(struct file *, fl_owner_t id); -- cgit v1.2.3-70-g09d2 From e08fc0457af28f2ebec36296ea7ada6024fde81b Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 6 Sep 2005 15:18:26 -0700 Subject: [PATCH] cifs_create() fix cifs_create() did totally the wrong thing with nd->intent.open.flags: it interpreted nd->intent.open.flags as the original open flags, not the one transformed for open_namei(). Also it used the intent data even if it was not filled in (if called from sys_mknod()). Signed-off-by: Miklos Szeredi Cc: Cc: Christoph Hellwig Cc: Steven French Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/cifs/dir.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 3f3538d4a1f..d335269bd91 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -145,24 +145,23 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, return -ENOMEM; } - if(nd) { - if ((nd->intent.open.flags & O_ACCMODE) == O_RDONLY) - desiredAccess = GENERIC_READ; - else if ((nd->intent.open.flags & O_ACCMODE) == O_WRONLY) { - desiredAccess = GENERIC_WRITE; - write_only = TRUE; - } else if ((nd->intent.open.flags & O_ACCMODE) == O_RDWR) { - /* GENERIC_ALL is too much permission to request */ - /* can cause unnecessary access denied on create */ - /* desiredAccess = GENERIC_ALL; */ - desiredAccess = GENERIC_READ | GENERIC_WRITE; + if(nd && (nd->flags & LOOKUP_OPEN)) { + int oflags = nd->intent.open.flags; + + desiredAccess = 0; + if (oflags & FMODE_READ) + desiredAccess |= GENERIC_READ; + if (oflags & FMODE_WRITE) { + desiredAccess |= GENERIC_WRITE; + if (!(oflags & FMODE_READ)) + write_only = TRUE; } - if((nd->intent.open.flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) + if((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) disposition = FILE_CREATE; - else if((nd->intent.open.flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC)) + else if((oflags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC)) disposition = FILE_OVERWRITE_IF; - else if((nd->intent.open.flags & O_CREAT) == O_CREAT) + else if((oflags & O_CREAT) == O_CREAT) disposition = FILE_OPEN_IF; else { cFYI(1,("Create flag not set in create function")); -- cgit v1.2.3-70-g09d2 From e915fc497a8da551f32b7e5fda687eb4a10bc23b Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Tue, 6 Sep 2005 15:18:35 -0700 Subject: [PATCH] fs: convert kcalloc to kzalloc This patch converts kcalloc(1, ...) calls to use the new kzalloc() function. Signed-off-by: Pekka Enberg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/cifs/connect.c | 82 ++++++++++++++++++++++++------------------------ fs/freevxfs/vxfs_super.c | 2 +- 2 files changed, 42 insertions(+), 42 deletions(-) (limited to 'fs') diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index e568cc47a7f..3217ac5f6bd 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -836,7 +836,7 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol) /* go from value to value + temp_len condensing double commas to singles. Note that this ends up allocating a few bytes too many, which is ok */ - vol->password = kcalloc(1, temp_len, GFP_KERNEL); + vol->password = kzalloc(temp_len, GFP_KERNEL); if(vol->password == NULL) { printk("CIFS: no memory for pass\n"); return 1; @@ -851,7 +851,7 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol) } vol->password[j] = 0; } else { - vol->password = kcalloc(1, temp_len+1, GFP_KERNEL); + vol->password = kzalloc(temp_len+1, GFP_KERNEL); if(vol->password == NULL) { printk("CIFS: no memory for pass\n"); return 1; @@ -1317,7 +1317,7 @@ ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket, sessinit is sent but no second negprot */ struct rfc1002_session_packet * ses_init_buf; struct smb_hdr * smb_buf; - ses_init_buf = kcalloc(1, sizeof(struct rfc1002_session_packet), GFP_KERNEL); + ses_init_buf = kzalloc(sizeof(struct rfc1002_session_packet), GFP_KERNEL); if(ses_init_buf) { ses_init_buf->trailer.session_req.called_len = 32; rfc1002mangle(ses_init_buf->trailer.session_req.called_name, @@ -1964,7 +1964,7 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses, /* We look for obvious messed up bcc or strings in response so we do not go off the end since (at least) WIN2K and Windows XP have a major bug in not null terminating last Unicode string in response */ - ses->serverOS = kcalloc(1, 2 * (len + 1), GFP_KERNEL); + ses->serverOS = kzalloc(2 * (len + 1), GFP_KERNEL); if(ses->serverOS == NULL) goto sesssetup_nomem; cifs_strfromUCS_le(ses->serverOS, @@ -1976,7 +1976,7 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses, if (remaining_words > 0) { len = UniStrnlen((wchar_t *)bcc_ptr, remaining_words-1); - ses->serverNOS = kcalloc(1, 2 * (len + 1),GFP_KERNEL); + ses->serverNOS = kzalloc(2 * (len + 1),GFP_KERNEL); if(ses->serverNOS == NULL) goto sesssetup_nomem; cifs_strfromUCS_le(ses->serverNOS, @@ -1994,7 +1994,7 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses, len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words); /* last string is not always null terminated (for e.g. for Windows XP & 2000) */ ses->serverDomain = - kcalloc(1, 2*(len+1),GFP_KERNEL); + kzalloc(2*(len+1),GFP_KERNEL); if(ses->serverDomain == NULL) goto sesssetup_nomem; cifs_strfromUCS_le(ses->serverDomain, @@ -2005,22 +2005,22 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses, } /* else no more room so create dummy domain string */ else ses->serverDomain = - kcalloc(1, 2, GFP_KERNEL); + kzalloc(2, GFP_KERNEL); } else { /* no room so create dummy domain and NOS string */ /* if these kcallocs fail not much we can do, but better to not fail the sesssetup itself */ ses->serverDomain = - kcalloc(1, 2, GFP_KERNEL); + kzalloc(2, GFP_KERNEL); ses->serverNOS = - kcalloc(1, 2, GFP_KERNEL); + kzalloc(2, GFP_KERNEL); } } else { /* ASCII */ len = strnlen(bcc_ptr, 1024); if (((long) bcc_ptr + len) - (long) pByteArea(smb_buffer_response) <= BCC(smb_buffer_response)) { - ses->serverOS = kcalloc(1, len + 1,GFP_KERNEL); + ses->serverOS = kzalloc(len + 1,GFP_KERNEL); if(ses->serverOS == NULL) goto sesssetup_nomem; strncpy(ses->serverOS,bcc_ptr, len); @@ -2030,7 +2030,7 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses, bcc_ptr++; len = strnlen(bcc_ptr, 1024); - ses->serverNOS = kcalloc(1, len + 1,GFP_KERNEL); + ses->serverNOS = kzalloc(len + 1,GFP_KERNEL); if(ses->serverNOS == NULL) goto sesssetup_nomem; strncpy(ses->serverNOS, bcc_ptr, len); @@ -2039,7 +2039,7 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses, bcc_ptr++; len = strnlen(bcc_ptr, 1024); - ses->serverDomain = kcalloc(1, len + 1,GFP_KERNEL); + ses->serverDomain = kzalloc(len + 1,GFP_KERNEL); if(ses->serverDomain == NULL) goto sesssetup_nomem; strncpy(ses->serverDomain, bcc_ptr, len); @@ -2240,7 +2240,7 @@ CIFSSpnegoSessSetup(unsigned int xid, struct cifsSesInfo *ses, the end since (at least) WIN2K and Windows XP have a major bug in not null terminating last Unicode string in response */ ses->serverOS = - kcalloc(1, 2 * (len + 1), GFP_KERNEL); + kzalloc(2 * (len + 1), GFP_KERNEL); cifs_strfromUCS_le(ses->serverOS, (wchar_t *) bcc_ptr, len, @@ -2254,7 +2254,7 @@ CIFSSpnegoSessSetup(unsigned int xid, struct cifsSesInfo *ses, remaining_words - 1); ses->serverNOS = - kcalloc(1, 2 * (len + 1), + kzalloc(2 * (len + 1), GFP_KERNEL); cifs_strfromUCS_le(ses->serverNOS, (wchar_t *)bcc_ptr, @@ -2267,7 +2267,7 @@ CIFSSpnegoSessSetup(unsigned int xid, struct cifsSesInfo *ses, if (remaining_words > 0) { len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words); /* last string is not always null terminated (for e.g. for Windows XP & 2000) */ - ses->serverDomain = kcalloc(1, 2*(len+1),GFP_KERNEL); + ses->serverDomain = kzalloc(2*(len+1),GFP_KERNEL); cifs_strfromUCS_le(ses->serverDomain, (wchar_t *)bcc_ptr, len, @@ -2278,10 +2278,10 @@ CIFSSpnegoSessSetup(unsigned int xid, struct cifsSesInfo *ses, } /* else no more room so create dummy domain string */ else ses->serverDomain = - kcalloc(1, 2,GFP_KERNEL); + kzalloc(2,GFP_KERNEL); } else { /* no room so create dummy domain and NOS string */ - ses->serverDomain = kcalloc(1, 2, GFP_KERNEL); - ses->serverNOS = kcalloc(1, 2, GFP_KERNEL); + ses->serverDomain = kzalloc(2, GFP_KERNEL); + ses->serverNOS = kzalloc(2, GFP_KERNEL); } } else { /* ASCII */ @@ -2289,7 +2289,7 @@ CIFSSpnegoSessSetup(unsigned int xid, struct cifsSesInfo *ses, if (((long) bcc_ptr + len) - (long) pByteArea(smb_buffer_response) <= BCC(smb_buffer_response)) { - ses->serverOS = kcalloc(1, len + 1, GFP_KERNEL); + ses->serverOS = kzalloc(len + 1, GFP_KERNEL); strncpy(ses->serverOS, bcc_ptr, len); bcc_ptr += len; @@ -2297,14 +2297,14 @@ CIFSSpnegoSessSetup(unsigned int xid, struct cifsSesInfo *ses, bcc_ptr++; len = strnlen(bcc_ptr, 1024); - ses->serverNOS = kcalloc(1, len + 1,GFP_KERNEL); + ses->serverNOS = kzalloc(len + 1,GFP_KERNEL); strncpy(ses->serverNOS, bcc_ptr, len); bcc_ptr += len; bcc_ptr[0] = 0; bcc_ptr++; len = strnlen(bcc_ptr, 1024); - ses->serverDomain = kcalloc(1, len + 1, GFP_KERNEL); + ses->serverDomain = kzalloc(len + 1, GFP_KERNEL); strncpy(ses->serverDomain, bcc_ptr, len); bcc_ptr += len; bcc_ptr[0] = 0; @@ -2554,7 +2554,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid, the end since (at least) WIN2K and Windows XP have a major bug in not null terminating last Unicode string in response */ ses->serverOS = - kcalloc(1, 2 * (len + 1), GFP_KERNEL); + kzalloc(2 * (len + 1), GFP_KERNEL); cifs_strfromUCS_le(ses->serverOS, (wchar_t *) bcc_ptr, len, @@ -2569,7 +2569,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid, remaining_words - 1); ses->serverNOS = - kcalloc(1, 2 * (len + 1), + kzalloc(2 * (len + 1), GFP_KERNEL); cifs_strfromUCS_le(ses-> serverNOS, @@ -2586,7 +2586,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid, len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words); /* last string is not always null terminated (for e.g. for Windows XP & 2000) */ ses->serverDomain = - kcalloc(1, 2 * + kzalloc(2 * (len + 1), GFP_KERNEL); @@ -2612,13 +2612,13 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid, } /* else no more room so create dummy domain string */ else ses->serverDomain = - kcalloc(1, 2, + kzalloc(2, GFP_KERNEL); } else { /* no room so create dummy domain and NOS string */ ses->serverDomain = - kcalloc(1, 2, GFP_KERNEL); + kzalloc(2, GFP_KERNEL); ses->serverNOS = - kcalloc(1, 2, GFP_KERNEL); + kzalloc(2, GFP_KERNEL); } } else { /* ASCII */ len = strnlen(bcc_ptr, 1024); @@ -2626,7 +2626,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid, pByteArea(smb_buffer_response) <= BCC(smb_buffer_response)) { ses->serverOS = - kcalloc(1, len + 1, + kzalloc(len + 1, GFP_KERNEL); strncpy(ses->serverOS, bcc_ptr, len); @@ -2637,7 +2637,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid, len = strnlen(bcc_ptr, 1024); ses->serverNOS = - kcalloc(1, len + 1, + kzalloc(len + 1, GFP_KERNEL); strncpy(ses->serverNOS, bcc_ptr, len); bcc_ptr += len; @@ -2646,7 +2646,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid, len = strnlen(bcc_ptr, 1024); ses->serverDomain = - kcalloc(1, len + 1, + kzalloc(len + 1, GFP_KERNEL); strncpy(ses->serverDomain, bcc_ptr, len); bcc_ptr += len; @@ -2948,7 +2948,7 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses, the end since (at least) WIN2K and Windows XP have a major bug in not null terminating last Unicode string in response */ ses->serverOS = - kcalloc(1, 2 * (len + 1), GFP_KERNEL); + kzalloc(2 * (len + 1), GFP_KERNEL); cifs_strfromUCS_le(ses->serverOS, (wchar_t *) bcc_ptr, len, @@ -2963,7 +2963,7 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses, remaining_words - 1); ses->serverNOS = - kcalloc(1, 2 * (len + 1), + kzalloc(2 * (len + 1), GFP_KERNEL); cifs_strfromUCS_le(ses-> serverNOS, @@ -2979,7 +2979,7 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses, len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words); /* last string not always null terminated (e.g. for Windows XP & 2000) */ ses->serverDomain = - kcalloc(1, 2 * + kzalloc(2 * (len + 1), GFP_KERNEL); @@ -3004,17 +3004,17 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses, = 0; } /* else no more room so create dummy domain string */ else - ses->serverDomain = kcalloc(1, 2,GFP_KERNEL); + ses->serverDomain = kzalloc(2,GFP_KERNEL); } else { /* no room so create dummy domain and NOS string */ - ses->serverDomain = kcalloc(1, 2, GFP_KERNEL); - ses->serverNOS = kcalloc(1, 2, GFP_KERNEL); + ses->serverDomain = kzalloc(2, GFP_KERNEL); + ses->serverNOS = kzalloc(2, GFP_KERNEL); } } else { /* ASCII */ len = strnlen(bcc_ptr, 1024); if (((long) bcc_ptr + len) - (long) pByteArea(smb_buffer_response) <= BCC(smb_buffer_response)) { - ses->serverOS = kcalloc(1, len + 1,GFP_KERNEL); + ses->serverOS = kzalloc(len + 1,GFP_KERNEL); strncpy(ses->serverOS,bcc_ptr, len); bcc_ptr += len; @@ -3022,14 +3022,14 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses, bcc_ptr++; len = strnlen(bcc_ptr, 1024); - ses->serverNOS = kcalloc(1, len+1,GFP_KERNEL); + ses->serverNOS = kzalloc(len+1,GFP_KERNEL); strncpy(ses->serverNOS, bcc_ptr, len); bcc_ptr += len; bcc_ptr[0] = 0; bcc_ptr++; len = strnlen(bcc_ptr, 1024); - ses->serverDomain = kcalloc(1, len+1,GFP_KERNEL); + ses->serverDomain = kzalloc(len+1,GFP_KERNEL); strncpy(ses->serverDomain, bcc_ptr, len); bcc_ptr += len; bcc_ptr[0] = 0; @@ -3141,7 +3141,7 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses, if(tcon->nativeFileSystem) kfree(tcon->nativeFileSystem); tcon->nativeFileSystem = - kcalloc(1, length + 2, GFP_KERNEL); + kzalloc(length + 2, GFP_KERNEL); cifs_strfromUCS_le(tcon->nativeFileSystem, (wchar_t *) bcc_ptr, length, nls_codepage); @@ -3159,7 +3159,7 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses, if(tcon->nativeFileSystem) kfree(tcon->nativeFileSystem); tcon->nativeFileSystem = - kcalloc(1, length + 1, GFP_KERNEL); + kzalloc(length + 1, GFP_KERNEL); strncpy(tcon->nativeFileSystem, bcc_ptr, length); } diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c index 27f66d3e8a0..6aa6fbe4f8e 100644 --- a/fs/freevxfs/vxfs_super.c +++ b/fs/freevxfs/vxfs_super.c @@ -155,7 +155,7 @@ static int vxfs_fill_super(struct super_block *sbp, void *dp, int silent) sbp->s_flags |= MS_RDONLY; - infp = kcalloc(1, sizeof(*infp), GFP_KERNEL); + infp = kzalloc(sizeof(*infp), GFP_KERNEL); if (!infp) { printk(KERN_WARNING "vxfs: unable to allocate incore superblock\n"); return -ENOMEM; -- cgit v1.2.3-70-g09d2 From a5e3985fa014029eb6795664c704953720cc7f7d Mon Sep 17 00:00:00 2001 From: Roman Zippel Date: Tue, 6 Sep 2005 15:18:47 -0700 Subject: [PATCH] hfs: remove debug code This removes some old debug code, which is no longer needed. Signed-off-by: Roman Zippel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hfs/bnode.c | 21 +-------------------- fs/hfsplus/bnode.c | 21 +-------------------- 2 files changed, 2 insertions(+), 40 deletions(-) (limited to 'fs') diff --git a/fs/hfs/bnode.c b/fs/hfs/bnode.c index a096c5a5666..3d5cdc6847c 100644 --- a/fs/hfs/bnode.c +++ b/fs/hfs/bnode.c @@ -13,8 +13,6 @@ #include "btree.h" -#define REF_PAGES 0 - void hfs_bnode_read(struct hfs_bnode *node, void *buf, int off, int len) { @@ -289,9 +287,7 @@ static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid) page_cache_release(page); goto fail; } -#if !REF_PAGES page_cache_release(page); -#endif node->page[i] = page; } @@ -449,13 +445,6 @@ void hfs_bnode_get(struct hfs_bnode *node) { if (node) { atomic_inc(&node->refcnt); -#if REF_PAGES - { - int i; - for (i = 0; i < node->tree->pages_per_bnode; i++) - get_page(node->page[i]); - } -#endif dprint(DBG_BNODE_REFS, "get_node(%d:%d): %d\n", node->tree->cnid, node->this, atomic_read(&node->refcnt)); } @@ -472,20 +461,12 @@ void hfs_bnode_put(struct hfs_bnode *node) node->tree->cnid, node->this, atomic_read(&node->refcnt)); if (!atomic_read(&node->refcnt)) BUG(); - if (!atomic_dec_and_lock(&node->refcnt, &tree->hash_lock)) { -#if REF_PAGES - for (i = 0; i < tree->pages_per_bnode; i++) - put_page(node->page[i]); -#endif + if (!atomic_dec_and_lock(&node->refcnt, &tree->hash_lock)) return; - } for (i = 0; i < tree->pages_per_bnode; i++) { if (!node->page[i]) continue; mark_page_accessed(node->page[i]); -#if REF_PAGES - put_page(node->page[i]); -#endif } if (test_bit(HFS_BNODE_DELETED, &node->flags)) { diff --git a/fs/hfsplus/bnode.c b/fs/hfsplus/bnode.c index 8868d3b766f..b85abc6e6f8 100644 --- a/fs/hfsplus/bnode.c +++ b/fs/hfsplus/bnode.c @@ -18,8 +18,6 @@ #include "hfsplus_fs.h" #include "hfsplus_raw.h" -#define REF_PAGES 0 - /* Copy a specified range of bytes from the raw data of a node */ void hfs_bnode_read(struct hfs_bnode *node, void *buf, int off, int len) { @@ -450,9 +448,7 @@ static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid) page_cache_release(page); goto fail; } -#if !REF_PAGES page_cache_release(page); -#endif node->page[i] = page; } @@ -612,13 +608,6 @@ void hfs_bnode_get(struct hfs_bnode *node) { if (node) { atomic_inc(&node->refcnt); -#if REF_PAGES - { - int i; - for (i = 0; i < node->tree->pages_per_bnode; i++) - get_page(node->page[i]); - } -#endif dprint(DBG_BNODE_REFS, "get_node(%d:%d): %d\n", node->tree->cnid, node->this, atomic_read(&node->refcnt)); } @@ -635,20 +624,12 @@ void hfs_bnode_put(struct hfs_bnode *node) node->tree->cnid, node->this, atomic_read(&node->refcnt)); if (!atomic_read(&node->refcnt)) BUG(); - if (!atomic_dec_and_lock(&node->refcnt, &tree->hash_lock)) { -#if REF_PAGES - for (i = 0; i < tree->pages_per_bnode; i++) - put_page(node->page[i]); -#endif + if (!atomic_dec_and_lock(&node->refcnt, &tree->hash_lock)) return; - } for (i = 0; i < tree->pages_per_bnode; i++) { if (!node->page[i]) continue; mark_page_accessed(node->page[i]); -#if REF_PAGES - put_page(node->page[i]); -#endif } if (test_bit(HFS_BNODE_DELETED, &node->flags)) { -- cgit v1.2.3-70-g09d2 From 717dd80e999cdc84fb611decec5c5054d37c40d2 Mon Sep 17 00:00:00 2001 From: Roman Zippel Date: Tue, 6 Sep 2005 15:18:48 -0700 Subject: [PATCH] hfs: show_options support This adds support for show_options. It also fixes some namespace polution in the hfsplus driver. Signed-off-by: Roman Zippel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hfs/super.c | 25 +++++++++++++++++++++++++ fs/hfsplus/hfsplus_fs.h | 5 +++-- fs/hfsplus/options.c | 26 ++++++++++++++++++++++++-- fs/hfsplus/super.c | 5 +++-- 4 files changed, 55 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/hfs/super.c b/fs/hfs/super.c index ab783f6afa3..fd78414ee7e 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -15,8 +15,10 @@ #include #include #include +#include #include #include +#include #include #include "hfs_fs.h" @@ -111,6 +113,28 @@ static int hfs_remount(struct super_block *sb, int *flags, char *data) return 0; } +static int hfs_show_options(struct seq_file *seq, struct vfsmount *mnt) +{ + struct hfs_sb_info *sbi = HFS_SB(mnt->mnt_sb); + + if (sbi->s_creator != cpu_to_be32(0x3f3f3f3f)) + seq_printf(seq, ",creator=%.4s", (char *)&sbi->s_creator); + if (sbi->s_type != cpu_to_be32(0x3f3f3f3f)) + seq_printf(seq, ",type=%.4s", (char *)&sbi->s_type); + seq_printf(seq, ",uid=%u,gid=%u", sbi->s_uid, sbi->s_gid); + if (sbi->s_file_umask != 0133) + seq_printf(seq, ",file_umask=%o", sbi->s_file_umask); + if (sbi->s_dir_umask != 0022) + seq_printf(seq, ",dir_umask=%o", sbi->s_dir_umask); + if (sbi->part >= 0) + seq_printf(seq, ",part=%u", sbi->part); + if (sbi->session >= 0) + seq_printf(seq, ",session=%u", sbi->session); + if (sbi->s_quiet) + seq_printf(seq, ",quiet"); + return 0; +} + static struct inode *hfs_alloc_inode(struct super_block *sb) { struct hfs_inode_info *i; @@ -133,6 +157,7 @@ static struct super_operations hfs_super_operations = { .write_super = hfs_write_super, .statfs = hfs_statfs, .remount_fs = hfs_remount, + .show_options = hfs_show_options, }; enum { diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index 533094a570d..2bc0cdd30e5 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -343,8 +343,9 @@ ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name, ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size); /* options.c */ -int parse_options(char *, struct hfsplus_sb_info *); -void fill_defaults(struct hfsplus_sb_info *); +int hfsplus_parse_options(char *, struct hfsplus_sb_info *); +void hfsplus_fill_defaults(struct hfsplus_sb_info *); +int hfsplus_show_options(struct seq_file *, struct vfsmount *); /* tables.c */ extern u16 hfsplus_case_fold_table[]; diff --git a/fs/hfsplus/options.c b/fs/hfsplus/options.c index 1cca0102c98..cca0818aa4c 100644 --- a/fs/hfsplus/options.c +++ b/fs/hfsplus/options.c @@ -13,6 +13,8 @@ #include #include #include +#include +#include #include "hfsplus_fs.h" enum { @@ -38,7 +40,7 @@ static match_table_t tokens = { }; /* Initialize an options object to reasonable defaults */ -void fill_defaults(struct hfsplus_sb_info *opts) +void hfsplus_fill_defaults(struct hfsplus_sb_info *opts) { if (!opts) return; @@ -63,7 +65,7 @@ static inline int match_fourchar(substring_t *arg, u32 *result) /* Parse options from mount. Returns 0 on failure */ /* input is the options passed to mount() as a string */ -int parse_options(char *input, struct hfsplus_sb_info *sbi) +int hfsplus_parse_options(char *input, struct hfsplus_sb_info *sbi) { char *p; substring_t args[MAX_OPT_ARGS]; @@ -160,3 +162,23 @@ done: return 1; } + +int hfsplus_show_options(struct seq_file *seq, struct vfsmount *mnt) +{ + struct hfsplus_sb_info *sbi = &HFSPLUS_SB(mnt->mnt_sb); + + if (sbi->creator != HFSPLUS_DEF_CR_TYPE) + seq_printf(seq, ",creator=%.4s", (char *)&sbi->creator); + if (sbi->type != HFSPLUS_DEF_CR_TYPE) + seq_printf(seq, ",type=%.4s", (char *)&sbi->type); + seq_printf(seq, ",umask=%o,uid=%u,gid=%u", sbi->umask, sbi->uid, sbi->gid); + if (sbi->part >= 0) + seq_printf(seq, ",part=%u", sbi->part); + if (sbi->session >= 0) + seq_printf(seq, ",session=%u", sbi->session); + if (sbi->nls) + seq_printf(seq, ",nls=%s", sbi->nls->charset); + if (sbi->flags & HFSPLUS_SB_NODECOMPOSE) + seq_printf(seq, ",nodecompose"); + return 0; +} diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index d55ad67b8e4..b0689955dae 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -277,6 +277,7 @@ static struct super_operations hfsplus_sops = { .write_super = hfsplus_write_super, .statfs = hfsplus_statfs, .remount_fs = hfsplus_remount, + .show_options = hfsplus_show_options, }; static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) @@ -297,8 +298,8 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) memset(sbi, 0, sizeof(HFSPLUS_SB(sb))); sb->s_fs_info = sbi; INIT_HLIST_HEAD(&sbi->rsrc_inodes); - fill_defaults(sbi); - if (!parse_options(data, sbi)) { + hfsplus_fill_defaults(sbi); + if (!hfsplus_parse_options(data, sbi)) { if (!silent) printk("HFS+-fs: unable to parse mount options\n"); err = -EINVAL; -- cgit v1.2.3-70-g09d2 From 328b9227865026268261a24a97a578907b280415 Mon Sep 17 00:00:00 2001 From: Roman Zippel Date: Tue, 6 Sep 2005 15:18:49 -0700 Subject: [PATCH] hfs: NLS support This adds NLS support to HFS. Using the kernel options iocharset and codepage it's possible to map the disk encoding to a local mapping. If these options are not used, it falls back to the old direct mapping. Signed-off-by: Roman Zippel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hfs/catalog.c | 35 +++++++++-------- fs/hfs/dir.c | 11 +++--- fs/hfs/hfs.h | 1 + fs/hfs/hfs_fs.h | 8 ++-- fs/hfs/inode.c | 2 +- fs/hfs/mdb.c | 6 +++ fs/hfs/super.c | 43 +++++++++++++++++++++ fs/hfs/trans.c | 116 +++++++++++++++++++++++++++++++++++++++++++++---------- 8 files changed, 176 insertions(+), 46 deletions(-) (limited to 'fs') diff --git a/fs/hfs/catalog.c b/fs/hfs/catalog.c index 65dedefcabf..2fcd679f023 100644 --- a/fs/hfs/catalog.c +++ b/fs/hfs/catalog.c @@ -20,12 +20,12 @@ * * Given the ID of the parent and the name build a search key. */ -void hfs_cat_build_key(btree_key *key, u32 parent, struct qstr *name) +void hfs_cat_build_key(struct super_block *sb, btree_key *key, u32 parent, struct qstr *name) { key->cat.reserved = 0; key->cat.ParID = cpu_to_be32(parent); if (name) { - hfs_triv2mac(&key->cat.CName, name); + hfs_asc2mac(sb, &key->cat.CName, name); key->key_len = 6 + key->cat.CName.len; } else { memset(&key->cat.CName, 0, sizeof(struct hfs_name)); @@ -62,13 +62,14 @@ static int hfs_cat_build_record(hfs_cat_rec *rec, u32 cnid, struct inode *inode) } } -static int hfs_cat_build_thread(hfs_cat_rec *rec, int type, +static int hfs_cat_build_thread(struct super_block *sb, + hfs_cat_rec *rec, int type, u32 parentid, struct qstr *name) { rec->type = type; memset(rec->thread.reserved, 0, sizeof(rec->thread.reserved)); rec->thread.ParID = cpu_to_be32(parentid); - hfs_triv2mac(&rec->thread.CName, name); + hfs_asc2mac(sb, &rec->thread.CName, name); return sizeof(struct hfs_cat_thread); } @@ -93,8 +94,8 @@ int hfs_cat_create(u32 cnid, struct inode *dir, struct qstr *str, struct inode * sb = dir->i_sb; hfs_find_init(HFS_SB(sb)->cat_tree, &fd); - hfs_cat_build_key(fd.search_key, cnid, NULL); - entry_size = hfs_cat_build_thread(&entry, S_ISDIR(inode->i_mode) ? + hfs_cat_build_key(sb, fd.search_key, cnid, NULL); + entry_size = hfs_cat_build_thread(sb, &entry, S_ISDIR(inode->i_mode) ? HFS_CDR_THD : HFS_CDR_FTH, dir->i_ino, str); err = hfs_brec_find(&fd); @@ -107,7 +108,7 @@ int hfs_cat_create(u32 cnid, struct inode *dir, struct qstr *str, struct inode * if (err) goto err2; - hfs_cat_build_key(fd.search_key, dir->i_ino, str); + hfs_cat_build_key(sb, fd.search_key, dir->i_ino, str); entry_size = hfs_cat_build_record(&entry, cnid, inode); err = hfs_brec_find(&fd); if (err != -ENOENT) { @@ -127,7 +128,7 @@ int hfs_cat_create(u32 cnid, struct inode *dir, struct qstr *str, struct inode * return 0; err1: - hfs_cat_build_key(fd.search_key, cnid, NULL); + hfs_cat_build_key(sb, fd.search_key, cnid, NULL); if (!hfs_brec_find(&fd)) hfs_brec_remove(&fd); err2: @@ -176,7 +177,7 @@ int hfs_cat_find_brec(struct super_block *sb, u32 cnid, hfs_cat_rec rec; int res, len, type; - hfs_cat_build_key(fd->search_key, cnid, NULL); + hfs_cat_build_key(sb, fd->search_key, cnid, NULL); res = hfs_brec_read(fd, &rec, sizeof(rec)); if (res) return res; @@ -211,7 +212,7 @@ int hfs_cat_delete(u32 cnid, struct inode *dir, struct qstr *str) sb = dir->i_sb; hfs_find_init(HFS_SB(sb)->cat_tree, &fd); - hfs_cat_build_key(fd.search_key, dir->i_ino, str); + hfs_cat_build_key(sb, fd.search_key, dir->i_ino, str); res = hfs_brec_find(&fd); if (res) goto out; @@ -239,7 +240,7 @@ int hfs_cat_delete(u32 cnid, struct inode *dir, struct qstr *str) if (res) goto out; - hfs_cat_build_key(fd.search_key, cnid, NULL); + hfs_cat_build_key(sb, fd.search_key, cnid, NULL); res = hfs_brec_find(&fd); if (!res) { res = hfs_brec_remove(&fd); @@ -280,7 +281,7 @@ int hfs_cat_move(u32 cnid, struct inode *src_dir, struct qstr *src_name, dst_fd = src_fd; /* find the old dir entry and read the data */ - hfs_cat_build_key(src_fd.search_key, src_dir->i_ino, src_name); + hfs_cat_build_key(sb, src_fd.search_key, src_dir->i_ino, src_name); err = hfs_brec_find(&src_fd); if (err) goto out; @@ -289,7 +290,7 @@ int hfs_cat_move(u32 cnid, struct inode *src_dir, struct qstr *src_name, src_fd.entrylength); /* create new dir entry with the data from the old entry */ - hfs_cat_build_key(dst_fd.search_key, dst_dir->i_ino, dst_name); + hfs_cat_build_key(sb, dst_fd.search_key, dst_dir->i_ino, dst_name); err = hfs_brec_find(&dst_fd); if (err != -ENOENT) { if (!err) @@ -305,7 +306,7 @@ int hfs_cat_move(u32 cnid, struct inode *src_dir, struct qstr *src_name, mark_inode_dirty(dst_dir); /* finally remove the old entry */ - hfs_cat_build_key(src_fd.search_key, src_dir->i_ino, src_name); + hfs_cat_build_key(sb, src_fd.search_key, src_dir->i_ino, src_name); err = hfs_brec_find(&src_fd); if (err) goto out; @@ -321,7 +322,7 @@ int hfs_cat_move(u32 cnid, struct inode *src_dir, struct qstr *src_name, goto out; /* remove old thread entry */ - hfs_cat_build_key(src_fd.search_key, cnid, NULL); + hfs_cat_build_key(sb, src_fd.search_key, cnid, NULL); err = hfs_brec_find(&src_fd); if (err) goto out; @@ -330,8 +331,8 @@ int hfs_cat_move(u32 cnid, struct inode *src_dir, struct qstr *src_name, goto out; /* create new thread entry */ - hfs_cat_build_key(dst_fd.search_key, cnid, NULL); - entry_size = hfs_cat_build_thread(&entry, type == HFS_CDR_FIL ? HFS_CDR_FTH : HFS_CDR_THD, + hfs_cat_build_key(sb, dst_fd.search_key, cnid, NULL); + entry_size = hfs_cat_build_thread(sb, &entry, type == HFS_CDR_FIL ? HFS_CDR_FTH : HFS_CDR_THD, dst_dir->i_ino, dst_name); err = hfs_brec_find(&dst_fd); if (err != -ENOENT) { diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c index c55998262ae..e1f24befba5 100644 --- a/fs/hfs/dir.c +++ b/fs/hfs/dir.c @@ -28,7 +28,7 @@ static struct dentry *hfs_lookup(struct inode *dir, struct dentry *dentry, dentry->d_op = &hfs_dentry_operations; hfs_find_init(HFS_SB(dir->i_sb)->cat_tree, &fd); - hfs_cat_build_key(fd.search_key, dir->i_ino, &dentry->d_name); + hfs_cat_build_key(dir->i_sb, fd.search_key, dir->i_ino, &dentry->d_name); res = hfs_brec_read(&fd, &rec, sizeof(rec)); if (res) { hfs_find_exit(&fd); @@ -56,7 +56,7 @@ static int hfs_readdir(struct file *filp, void *dirent, filldir_t filldir) struct inode *inode = filp->f_dentry->d_inode; struct super_block *sb = inode->i_sb; int len, err; - char strbuf[HFS_NAMELEN + 1]; + char strbuf[HFS_MAX_NAMELEN]; union hfs_cat_rec entry; struct hfs_find_data fd; struct hfs_readdir_data *rd; @@ -66,7 +66,7 @@ static int hfs_readdir(struct file *filp, void *dirent, filldir_t filldir) return 0; hfs_find_init(HFS_SB(sb)->cat_tree, &fd); - hfs_cat_build_key(fd.search_key, inode->i_ino, NULL); + hfs_cat_build_key(sb, fd.search_key, inode->i_ino, NULL); err = hfs_brec_find(&fd); if (err) goto out; @@ -111,7 +111,7 @@ static int hfs_readdir(struct file *filp, void *dirent, filldir_t filldir) } hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, fd.entrylength); type = entry.type; - len = hfs_mac2triv(strbuf, &fd.key->cat.CName); + len = hfs_mac2asc(sb, strbuf, &fd.key->cat.CName); if (type == HFS_CDR_DIR) { if (fd.entrylength < sizeof(struct hfs_cat_dir)) { printk("HFS: small dir entry\n"); @@ -307,7 +307,8 @@ static int hfs_rename(struct inode *old_dir, struct dentry *old_dentry, old_dir, &old_dentry->d_name, new_dir, &new_dentry->d_name); if (!res) - hfs_cat_build_key((btree_key *)&HFS_I(old_dentry->d_inode)->cat_key, + hfs_cat_build_key(old_dir->i_sb, + (btree_key *)&HFS_I(old_dentry->d_inode)->cat_key, new_dir->i_ino, &new_dentry->d_name); return res; } diff --git a/fs/hfs/hfs.h b/fs/hfs/hfs.h index df6b33adee3..88099ab1a18 100644 --- a/fs/hfs/hfs.h +++ b/fs/hfs/hfs.h @@ -25,6 +25,7 @@ #define HFS_SECTOR_SIZE 512 /* size of an HFS sector */ #define HFS_SECTOR_SIZE_BITS 9 /* log_2(HFS_SECTOR_SIZE) */ #define HFS_NAMELEN 31 /* maximum length of an HFS filename */ +#define HFS_MAX_NAMELEN 128 #define HFS_MAX_VALENCE 32767U /* Meanings of the drAtrb field of the MDB, diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h index 0dc8ef8e14d..aae019aadf8 100644 --- a/fs/hfs/hfs_fs.h +++ b/fs/hfs/hfs_fs.h @@ -141,6 +141,8 @@ struct hfs_sb_info { int session, part; + struct nls_table *nls_io, *nls_disk; + struct semaphore bitmap_lock; unsigned long flags; @@ -168,7 +170,7 @@ extern int hfs_cat_create(u32, struct inode *, struct qstr *, struct inode *); extern int hfs_cat_delete(u32, struct inode *, struct qstr *); extern int hfs_cat_move(u32, struct inode *, struct qstr *, struct inode *, struct qstr *); -extern void hfs_cat_build_key(btree_key *, u32, struct qstr *); +extern void hfs_cat_build_key(struct super_block *, btree_key *, u32, struct qstr *); /* dir.c */ extern struct file_operations hfs_dir_operations; @@ -222,8 +224,8 @@ extern int hfs_strcmp(const unsigned char *, unsigned int, extern int hfs_compare_dentry(struct dentry *, struct qstr *, struct qstr *); /* trans.c */ -extern void hfs_triv2mac(struct hfs_name *, struct qstr *); -extern int hfs_mac2triv(char *, const struct hfs_name *); +extern void hfs_asc2mac(struct super_block *, struct hfs_name *, struct qstr *); +extern int hfs_mac2asc(struct super_block *, char *, const struct hfs_name *); extern struct timezone sys_tz; diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index 75191232609..f1570b9f9de 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -160,7 +160,7 @@ struct inode *hfs_new_inode(struct inode *dir, struct qstr *name, int mode) init_MUTEX(&HFS_I(inode)->extents_lock); INIT_LIST_HEAD(&HFS_I(inode)->open_dir_list); - hfs_cat_build_key((btree_key *)&HFS_I(inode)->cat_key, dir->i_ino, name); + hfs_cat_build_key(sb, (btree_key *)&HFS_I(inode)->cat_key, dir->i_ino, name); inode->i_ino = HFS_SB(sb)->next_id++; inode->i_mode = mode; inode->i_uid = current->fsuid; diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c index 217e32f37e0..0a473f79c89 100644 --- a/fs/hfs/mdb.c +++ b/fs/hfs/mdb.c @@ -10,6 +10,7 @@ #include #include +#include #include "hfs_fs.h" #include "btree.h" @@ -343,6 +344,11 @@ void hfs_mdb_put(struct super_block *sb) brelse(HFS_SB(sb)->mdb_bh); brelse(HFS_SB(sb)->alt_mdb_bh); + if (HFS_SB(sb)->nls_io) + unload_nls(HFS_SB(sb)->nls_io); + if (HFS_SB(sb)->nls_disk) + unload_nls(HFS_SB(sb)->nls_disk); + kfree(HFS_SB(sb)); sb->s_fs_info = NULL; } diff --git a/fs/hfs/super.c b/fs/hfs/super.c index fd78414ee7e..c5074aeafca 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -130,6 +131,10 @@ static int hfs_show_options(struct seq_file *seq, struct vfsmount *mnt) seq_printf(seq, ",part=%u", sbi->part); if (sbi->session >= 0) seq_printf(seq, ",session=%u", sbi->session); + if (sbi->nls_disk) + seq_printf(seq, ",codepage=%s", sbi->nls_disk->charset); + if (sbi->nls_io) + seq_printf(seq, ",iocharset=%s", sbi->nls_io->charset); if (sbi->s_quiet) seq_printf(seq, ",quiet"); return 0; @@ -163,6 +168,7 @@ static struct super_operations hfs_super_operations = { enum { opt_uid, opt_gid, opt_umask, opt_file_umask, opt_dir_umask, opt_part, opt_session, opt_type, opt_creator, opt_quiet, + opt_codepage, opt_iocharset, opt_err }; @@ -177,6 +183,8 @@ static match_table_t tokens = { { opt_type, "type=%s" }, { opt_creator, "creator=%s" }, { opt_quiet, "quiet" }, + { opt_codepage, "codepage=%s" }, + { opt_iocharset, "iocharset=%s" }, { opt_err, NULL } }; @@ -282,11 +290,46 @@ static int parse_options(char *options, struct hfs_sb_info *hsb) case opt_quiet: hsb->s_quiet = 1; break; + case opt_codepage: + if (hsb->nls_disk) { + printk("HFS+-fs: unable to change codepage\n"); + return 0; + } + p = match_strdup(&args[0]); + hsb->nls_disk = load_nls(p); + if (!hsb->nls_disk) { + printk("HFS+-fs: unable to load codepage \"%s\"\n", p); + kfree(p); + return 0; + } + kfree(p); + break; + case opt_iocharset: + if (hsb->nls_io) { + printk("HFS: unable to change iocharset\n"); + return 0; + } + p = match_strdup(&args[0]); + hsb->nls_io = load_nls(p); + if (!hsb->nls_io) { + printk("HFS: unable to load iocharset \"%s\"\n", p); + kfree(p); + return 0; + } + kfree(p); + break; default: return 0; } } + if (hsb->nls_disk && !hsb->nls_io) { + hsb->nls_io = load_nls_default(); + if (!hsb->nls_io) { + printk("HFS: unable to load default iocharset\n"); + return 0; + } + } hsb->s_dir_umask &= 0777; hsb->s_file_umask &= 0577; diff --git a/fs/hfs/trans.c b/fs/hfs/trans.c index fb9720abbad..e673a88b8ae 100644 --- a/fs/hfs/trans.c +++ b/fs/hfs/trans.c @@ -9,12 +9,15 @@ * with ':' vs. '/' as the path-element separator. */ +#include +#include + #include "hfs_fs.h" /*================ Global functions ================*/ /* - * hfs_mac2triv() + * hfs_mac2asc() * * Given a 'Pascal String' (a string preceded by a length byte) in * the Macintosh character set produce the corresponding filename using @@ -27,23 +30,58 @@ * by ':' which never appears in HFS filenames. All other characters * are passed unchanged from input to output. */ -int hfs_mac2triv(char *out, const struct hfs_name *in) +int hfs_mac2asc(struct super_block *sb, char *out, const struct hfs_name *in) { - const char *p; - char c; - int i, len; + struct nls_table *nls_disk = HFS_SB(sb)->nls_disk; + struct nls_table *nls_io = HFS_SB(sb)->nls_io; + const char *src; + char *dst; + int srclen, dstlen, size; + + src = in->name; + srclen = in->len; + dst = out; + dstlen = HFS_MAX_NAMELEN; + if (nls_io) { + wchar_t ch; - len = in->len; - p = in->name; - for (i = 0; i < len; i++) { - c = *p++; - *out++ = c == '/' ? ':' : c; + while (srclen > 0) { + if (nls_disk) { + size = nls_disk->char2uni(src, srclen, &ch); + if (size <= 0) { + ch = '?'; + size = 1; + } + src += size; + srclen -= size; + } else { + ch = *src++; + srclen--; + } + if (ch == '/') + ch = ':'; + size = nls_io->uni2char(ch, dst, dstlen); + if (size < 0) { + if (size == -ENAMETOOLONG) + goto out; + *dst = '?'; + size = 1; + } + dst += size; + dstlen -= size; + } + } else { + char ch; + + while (--srclen >= 0) + *dst++ = (ch = *src++) == '/' ? ':' : ch; } - return i; +out: + return dst - out; } /* - * hfs_triv2mac() + * hfs_asc2mac() * * Given an ASCII string (not null-terminated) and its length, * generate the corresponding filename in the Macintosh character set @@ -54,19 +92,57 @@ int hfs_mac2triv(char *out, const struct hfs_name *in) * This routine is a inverse to hfs_mac2triv(). * A ':' is replaced by a '/'. */ -void hfs_triv2mac(struct hfs_name *out, struct qstr *in) +void hfs_asc2mac(struct super_block *sb, struct hfs_name *out, struct qstr *in) { + struct nls_table *nls_disk = HFS_SB(sb)->nls_disk; + struct nls_table *nls_io = HFS_SB(sb)->nls_io; const char *src; - char *dst, c; - int i, len; + char *dst; + int srclen, dstlen, size; - out->len = len = min((unsigned int)HFS_NAMELEN, in->len); src = in->name; + srclen = in->len; dst = out->name; - for (i = 0; i < len; i++) { - c = *src++; - *dst++ = c == ':' ? '/' : c; + dstlen = HFS_NAMELEN; + if (nls_io) { + wchar_t ch; + + while (srclen > 0) { + size = nls_io->char2uni(src, srclen, &ch); + if (size < 0) { + ch = '?'; + size = 1; + } + src += size; + srclen -= size; + if (ch == ':') + ch = '/'; + if (nls_disk) { + size = nls_disk->uni2char(ch, dst, dstlen); + if (size < 0) { + if (size == -ENAMETOOLONG) + goto out; + *dst = '?'; + size = 1; + } + dst += size; + dstlen -= size; + } else { + *dst++ = ch > 0xff ? '?' : ch; + dstlen--; + } + } + } else { + char ch; + + if (dstlen > srclen) + dstlen = srclen; + while (--dstlen >= 0) + *dst++ = (ch = *src++) == ':' ? '/' : ch; } - for (; i < HFS_NAMELEN; i++) +out: + out->len = dst - (char *)out->name; + dstlen = HFS_NAMELEN - out->len; + while (--dstlen >= 0) *dst++ = 0; } -- cgit v1.2.3-70-g09d2 From cbf0d27a131639f4f3e4faa94373c5c6f89f8f07 Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Tue, 6 Sep 2005 15:19:08 -0700 Subject: [PATCH] kjournald: missing JFS_UNMOUNT check It seems that kjournald() may be missing a check of the JFS_UNMOUNT flag before calling schedule(). This showed up in testing of OCFS2 recovery where our recovery thread would hang in journal_kill_thread() called from journal_destroy() because kjournald never got a chance to read the flag to shut down before the schedule(). Zach pointed out the missing check which led me to hack up this trivial patch. It's been tested many times now and I have yet to reproduce the hang, which was happening very regularly before. I'm guessing that we could really use some wait_event() calls with helper functions in, well, most of jbd these days which would make a ton of the wait code there vastly cleaner. As for why this doesn't happen in ext3 (or OCFS2 during normal mount/unmount of the local nodes journal), I think it may that the specific timing of events in the ocfs2 recovery thread exposes a race there. Because ocfs2_replay_journal() is only interested in playing back the journal, initialization and shutdown happen very quicky with no other metadata put into that specific journal. Acked-by: "Stephen C. Tweedie" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/jbd/journal.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index 71cfe25d716..334f4cf0823 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -179,6 +179,8 @@ loop: if (transaction && time_after_eq(jiffies, transaction->t_expires)) should_sleep = 0; + if (journal->j_flags & JFS_UNMOUNT) + should_sleep = 0; if (should_sleep) { spin_unlock(&journal->j_state_lock); schedule(); -- cgit v1.2.3-70-g09d2 From e6c9f5c1888097c936334bf9740024520ca47b8e Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 6 Sep 2005 15:19:09 -0700 Subject: [PATCH] Fix JBD race in t_forget list handling Fix race between journal_commit_transaction() and other places as journal_unmap_buffer() that are adding buffers to transaction's t_forget list. We have to protect against such places by holding j_list_lock even when traversing the t_forget list. The fact that other places can only add buffers to the list makes the locking easier. OTOH the lock ranking complicates the stuff... Signed-off-by: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/jbd/commit.c | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index dac720c837a..9d0494dcc57 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c @@ -720,11 +720,17 @@ wait_for_iobuf: J_ASSERT(commit_transaction->t_log_list == NULL); restart_loop: + /* + * As there are other places (journal_unmap_buffer()) adding buffers + * to this list we have to be careful and hold the j_list_lock. + */ + spin_lock(&journal->j_list_lock); while (commit_transaction->t_forget) { transaction_t *cp_transaction; struct buffer_head *bh; jh = commit_transaction->t_forget; + spin_unlock(&journal->j_list_lock); bh = jh2bh(jh); jbd_lock_bh_state(bh); J_ASSERT_JH(jh, jh->b_transaction == commit_transaction || @@ -792,9 +798,25 @@ restart_loop: journal_remove_journal_head(bh); /* needs a brelse */ release_buffer_page(bh); } + cond_resched_lock(&journal->j_list_lock); + } + spin_unlock(&journal->j_list_lock); + /* + * This is a bit sleazy. We borrow j_list_lock to protect + * journal->j_committing_transaction in __journal_remove_checkpoint. + * Really, __journal_remove_checkpoint should be using j_state_lock but + * it's a bit hassle to hold that across __journal_remove_checkpoint + */ + spin_lock(&journal->j_state_lock); + spin_lock(&journal->j_list_lock); + /* + * Now recheck if some buffers did not get attached to the transaction + * while the lock was dropped... + */ + if (commit_transaction->t_forget) { spin_unlock(&journal->j_list_lock); - if (cond_resched()) - goto restart_loop; + spin_unlock(&journal->j_state_lock); + goto restart_loop; } /* Done with this transaction! */ @@ -803,14 +825,6 @@ restart_loop: J_ASSERT(commit_transaction->t_state == T_COMMIT); - /* - * This is a bit sleazy. We borrow j_list_lock to protect - * journal->j_committing_transaction in __journal_remove_checkpoint. - * Really, __jornal_remove_checkpoint should be using j_state_lock but - * it's a bit hassle to hold that across __journal_remove_checkpoint - */ - spin_lock(&journal->j_state_lock); - spin_lock(&journal->j_list_lock); commit_transaction->t_state = T_FINISHED; J_ASSERT(commit_transaction == journal->j_committing_transaction); journal->j_commit_sequence = commit_transaction->t_tid; -- cgit v1.2.3-70-g09d2 From a7662236253374012d364106b6dc9161bd929e2e Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 6 Sep 2005 15:19:10 -0700 Subject: [PATCH] Make ll_rw_block() wait for buffer lock Introduce new ll_rw_block() operation SWRITE meaning that block layer should wait for the buffer lock and write-out afterwards. Hence data in buffers at the time of call are guaranteed to be submitted to the disk. Signed-off-by: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/buffer.c | 30 ++++++++++++++++-------------- include/linux/fs.h | 1 + 2 files changed, 17 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/buffer.c b/fs/buffer.c index a92b8140355..1c62203a490 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -917,8 +917,7 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list) * contents - it is a noop if I/O is still in * flight on potentially older contents. */ - wait_on_buffer(bh); - ll_rw_block(WRITE, 1, &bh); + ll_rw_block(SWRITE, 1, &bh); brelse(bh); spin_lock(lock); } @@ -2793,21 +2792,22 @@ int submit_bh(int rw, struct buffer_head * bh) /** * ll_rw_block: low-level access to block devices (DEPRECATED) - * @rw: whether to %READ or %WRITE or maybe %READA (readahead) + * @rw: whether to %READ or %WRITE or %SWRITE or maybe %READA (readahead) * @nr: number of &struct buffer_heads in the array * @bhs: array of pointers to &struct buffer_head * - * ll_rw_block() takes an array of pointers to &struct buffer_heads, - * and requests an I/O operation on them, either a %READ or a %WRITE. - * The third %READA option is described in the documentation for - * generic_make_request() which ll_rw_block() calls. + * ll_rw_block() takes an array of pointers to &struct buffer_heads, and + * requests an I/O operation on them, either a %READ or a %WRITE. The third + * %SWRITE is like %WRITE only we make sure that the *current* data in buffers + * are sent to disk. The fourth %READA option is described in the documentation + * for generic_make_request() which ll_rw_block() calls. * * This function drops any buffer that it cannot get a lock on (with the - * BH_Lock state bit), any buffer that appears to be clean when doing a - * write request, and any buffer that appears to be up-to-date when doing - * read request. Further it marks as clean buffers that are processed for - * writing (the buffer cache won't assume that they are actually clean until - * the buffer gets unlocked). + * BH_Lock state bit) unless SWRITE is required, any buffer that appears to be + * clean when doing a write request, and any buffer that appears to be + * up-to-date when doing read request. Further it marks as clean buffers that + * are processed for writing (the buffer cache won't assume that they are + * actually clean until the buffer gets unlocked). * * ll_rw_block sets b_end_io to simple completion handler that marks * the buffer up-to-date (if approriate), unlocks the buffer and wakes @@ -2823,11 +2823,13 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[]) for (i = 0; i < nr; i++) { struct buffer_head *bh = bhs[i]; - if (test_set_buffer_locked(bh)) + if (rw == SWRITE) + lock_buffer(bh); + else if (test_set_buffer_locked(bh)) continue; get_bh(bh); - if (rw == WRITE) { + if (rw == WRITE || rw == SWRITE) { if (test_clear_buffer_dirty(bh)) { bh->b_end_io = end_buffer_write_sync; submit_bh(WRITE, bh); diff --git a/include/linux/fs.h b/include/linux/fs.h index 7e1b589842a..fd93ab7da90 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -69,6 +69,7 @@ extern int dir_notify_enable; #define READ 0 #define WRITE 1 #define READA 2 /* read-ahead - don't block if no resources */ +#define SWRITE 3 /* for ll_rw_block() - wait for buffer lock */ #define SPECIAL 4 /* For non-blockdevice requests in request queue */ #define READ_SYNC (READ | (1 << BIO_RW_SYNC)) #define WRITE_SYNC (WRITE | (1 << BIO_RW_SYNC)) -- cgit v1.2.3-70-g09d2 From 26707699b5337ea471ba1774447e8a1170c99e52 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 6 Sep 2005 15:19:12 -0700 Subject: [PATCH] Change ll_rw_block() calls in JBD We must be sure that the current data in buffer are sent to disk. Hence we have to call ll_rw_block() with SWRITE. Signed-off-by: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/jbd/checkpoint.c | 2 +- fs/jbd/commit.c | 4 ++-- fs/jbd/journal.c | 2 +- fs/jbd/revoke.c | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c index 5a97e346bd9..014a51fd00d 100644 --- a/fs/jbd/checkpoint.c +++ b/fs/jbd/checkpoint.c @@ -204,7 +204,7 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count) int i; spin_unlock(&journal->j_list_lock); - ll_rw_block(WRITE, *batch_count, bhs); + ll_rw_block(SWRITE, *batch_count, bhs); spin_lock(&journal->j_list_lock); for (i = 0; i < *batch_count; i++) { struct buffer_head *bh = bhs[i]; diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 9d0494dcc57..2a3e310f79e 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c @@ -358,7 +358,7 @@ write_out_data: jbd_debug(2, "submit %d writes\n", bufs); spin_unlock(&journal->j_list_lock); - ll_rw_block(WRITE, bufs, wbuf); + ll_rw_block(SWRITE, bufs, wbuf); journal_brelse_array(wbuf, bufs); bufs = 0; goto write_out_data; @@ -381,7 +381,7 @@ write_out_data: if (bufs) { spin_unlock(&journal->j_list_lock); - ll_rw_block(WRITE, bufs, wbuf); + ll_rw_block(SWRITE, bufs, wbuf); journal_brelse_array(wbuf, bufs); spin_lock(&journal->j_list_lock); } diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index 334f4cf0823..7ae2c4fe506 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -957,7 +957,7 @@ void journal_update_superblock(journal_t *journal, int wait) if (wait) sync_dirty_buffer(bh); else - ll_rw_block(WRITE, 1, &bh); + ll_rw_block(SWRITE, 1, &bh); out: /* If we have just flushed the log (by marking s_start==0), then diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c index 93b9f45eebd..a5614418346 100644 --- a/fs/jbd/revoke.c +++ b/fs/jbd/revoke.c @@ -614,7 +614,7 @@ static void flush_descriptor(journal_t *journal, set_buffer_jwrite(bh); BUFFER_TRACE(bh, "write"); set_buffer_dirty(bh); - ll_rw_block(WRITE, 1, &bh); + ll_rw_block(SWRITE, 1, &bh); } #endif -- cgit v1.2.3-70-g09d2 From 53778ffde601c962ad9250c4998df7de6f428246 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 6 Sep 2005 15:19:14 -0700 Subject: [PATCH] Change ll_rw_block() calls in Reiser We need to be sure that current data in buffer are sent to disk. Hence we need to call ll_rw_block() with SWRITE. Signed-off-by: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/reiserfs/journal.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index ca7989b04be..a8e29e9bbbd 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -1034,7 +1034,7 @@ static int flush_commit_list(struct super_block *s, SB_ONDISK_JOURNAL_SIZE(s); tbh = journal_find_get_block(s, bn); if (buffer_dirty(tbh)) /* redundant, ll_rw_block() checks */ - ll_rw_block(WRITE, 1, &tbh); + ll_rw_block(SWRITE, 1, &tbh); put_bh(tbh); } atomic_dec(&journal->j_async_throttle); @@ -2172,7 +2172,7 @@ static int journal_read_transaction(struct super_block *p_s_sb, /* flush out the real blocks */ for (i = 0; i < get_desc_trans_len(desc); i++) { set_buffer_dirty(real_blocks[i]); - ll_rw_block(WRITE, 1, real_blocks + i); + ll_rw_block(SWRITE, 1, real_blocks + i); } for (i = 0; i < get_desc_trans_len(desc); i++) { wait_on_buffer(real_blocks[i]); -- cgit v1.2.3-70-g09d2 From 096125f31ae3aa2c7271463b9986dd228e0da089 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 6 Sep 2005 15:19:15 -0700 Subject: [PATCH] Change ll_rw_block() calls in UFS We need to be sure that current data are sent to disk. Hence we call ll_rw_block() with SWRITE. Signed-off-by: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ufs/balloc.c | 12 ++++-------- fs/ufs/ialloc.c | 6 ++---- fs/ufs/truncate.c | 9 +++------ 3 files changed, 9 insertions(+), 18 deletions(-) (limited to 'fs') diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c index 997640c99c7..faf1512173e 100644 --- a/fs/ufs/balloc.c +++ b/fs/ufs/balloc.c @@ -114,8 +114,7 @@ void ufs_free_fragments (struct inode * inode, unsigned fragment, unsigned count ubh_mark_buffer_dirty (USPI_UBH); ubh_mark_buffer_dirty (UCPI_UBH); if (sb->s_flags & MS_SYNCHRONOUS) { - ubh_wait_on_buffer (UCPI_UBH); - ubh_ll_rw_block (WRITE, 1, (struct ufs_buffer_head **)&ucpi); + ubh_ll_rw_block (SWRITE, 1, (struct ufs_buffer_head **)&ucpi); ubh_wait_on_buffer (UCPI_UBH); } sb->s_dirt = 1; @@ -200,8 +199,7 @@ do_more: ubh_mark_buffer_dirty (USPI_UBH); ubh_mark_buffer_dirty (UCPI_UBH); if (sb->s_flags & MS_SYNCHRONOUS) { - ubh_wait_on_buffer (UCPI_UBH); - ubh_ll_rw_block (WRITE, 1, (struct ufs_buffer_head **)&ucpi); + ubh_ll_rw_block (SWRITE, 1, (struct ufs_buffer_head **)&ucpi); ubh_wait_on_buffer (UCPI_UBH); } @@ -459,8 +457,7 @@ ufs_add_fragments (struct inode * inode, unsigned fragment, ubh_mark_buffer_dirty (USPI_UBH); ubh_mark_buffer_dirty (UCPI_UBH); if (sb->s_flags & MS_SYNCHRONOUS) { - ubh_wait_on_buffer (UCPI_UBH); - ubh_ll_rw_block (WRITE, 1, (struct ufs_buffer_head **)&ucpi); + ubh_ll_rw_block (SWRITE, 1, (struct ufs_buffer_head **)&ucpi); ubh_wait_on_buffer (UCPI_UBH); } sb->s_dirt = 1; @@ -585,8 +582,7 @@ succed: ubh_mark_buffer_dirty (USPI_UBH); ubh_mark_buffer_dirty (UCPI_UBH); if (sb->s_flags & MS_SYNCHRONOUS) { - ubh_wait_on_buffer (UCPI_UBH); - ubh_ll_rw_block (WRITE, 1, (struct ufs_buffer_head **)&ucpi); + ubh_ll_rw_block (SWRITE, 1, (struct ufs_buffer_head **)&ucpi); ubh_wait_on_buffer (UCPI_UBH); } sb->s_dirt = 1; diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c index 61a6b1542fc..0938945b9cb 100644 --- a/fs/ufs/ialloc.c +++ b/fs/ufs/ialloc.c @@ -124,8 +124,7 @@ void ufs_free_inode (struct inode * inode) ubh_mark_buffer_dirty (USPI_UBH); ubh_mark_buffer_dirty (UCPI_UBH); if (sb->s_flags & MS_SYNCHRONOUS) { - ubh_wait_on_buffer (UCPI_UBH); - ubh_ll_rw_block (WRITE, 1, (struct ufs_buffer_head **) &ucpi); + ubh_ll_rw_block (SWRITE, 1, (struct ufs_buffer_head **) &ucpi); ubh_wait_on_buffer (UCPI_UBH); } @@ -249,8 +248,7 @@ cg_found: ubh_mark_buffer_dirty (USPI_UBH); ubh_mark_buffer_dirty (UCPI_UBH); if (sb->s_flags & MS_SYNCHRONOUS) { - ubh_wait_on_buffer (UCPI_UBH); - ubh_ll_rw_block (WRITE, 1, (struct ufs_buffer_head **) &ucpi); + ubh_ll_rw_block (SWRITE, 1, (struct ufs_buffer_head **) &ucpi); ubh_wait_on_buffer (UCPI_UBH); } sb->s_dirt = 1; diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c index e312bf8bad9..61d2e35012a 100644 --- a/fs/ufs/truncate.c +++ b/fs/ufs/truncate.c @@ -285,8 +285,7 @@ next:; } } if (IS_SYNC(inode) && ind_ubh && ubh_buffer_dirty(ind_ubh)) { - ubh_wait_on_buffer (ind_ubh); - ubh_ll_rw_block (WRITE, 1, &ind_ubh); + ubh_ll_rw_block (SWRITE, 1, &ind_ubh); ubh_wait_on_buffer (ind_ubh); } ubh_brelse (ind_ubh); @@ -353,8 +352,7 @@ static int ufs_trunc_dindirect (struct inode *inode, unsigned offset, __fs32 *p) } } if (IS_SYNC(inode) && dind_bh && ubh_buffer_dirty(dind_bh)) { - ubh_wait_on_buffer (dind_bh); - ubh_ll_rw_block (WRITE, 1, &dind_bh); + ubh_ll_rw_block (SWRITE, 1, &dind_bh); ubh_wait_on_buffer (dind_bh); } ubh_brelse (dind_bh); @@ -418,8 +416,7 @@ static int ufs_trunc_tindirect (struct inode * inode) } } if (IS_SYNC(inode) && tind_bh && ubh_buffer_dirty(tind_bh)) { - ubh_wait_on_buffer (tind_bh); - ubh_ll_rw_block (WRITE, 1, &tind_bh); + ubh_ll_rw_block (SWRITE, 1, &tind_bh); ubh_wait_on_buffer (tind_bh); } ubh_brelse (tind_bh); -- cgit v1.2.3-70-g09d2 From e39f07c83bac96850265b87a69dfc5c90ed4f1f5 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 6 Sep 2005 15:19:16 -0700 Subject: [PATCH] Change HFS+ to not use ll_rw_block() Use block layer predefined function. Signed-off-by: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hfsplus/super.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index b0689955dae..fd0f0f050e1 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -217,8 +217,7 @@ static void hfsplus_put_super(struct super_block *sb) vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_UNMNT); vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_INCNSTNT); mark_buffer_dirty(HFSPLUS_SB(sb).s_vhbh); - ll_rw_block(WRITE, 1, &HFSPLUS_SB(sb).s_vhbh); - wait_on_buffer(HFSPLUS_SB(sb).s_vhbh); + sync_dirty_buffer(HFSPLUS_SB(sb).s_vhbh); } hfs_btree_close(HFSPLUS_SB(sb).cat_tree); @@ -416,8 +415,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_UNMNT); vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_INCNSTNT); mark_buffer_dirty(HFSPLUS_SB(sb).s_vhbh); - ll_rw_block(WRITE, 1, &HFSPLUS_SB(sb).s_vhbh); - wait_on_buffer(HFSPLUS_SB(sb).s_vhbh); + sync_dirty_buffer(HFSPLUS_SB(sb).s_vhbh); if (!HFSPLUS_SB(sb).hidden_dir) { printk("HFS+: create hidden dir...\n"); -- cgit v1.2.3-70-g09d2 From 4407c2b6b297339e296facf62e020cf66e55053d Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 6 Sep 2005 15:19:17 -0700 Subject: [PATCH] Fix race in do_get_write_access() attached patch should fix the following race: Proc 1 Proc 2 __flush_batch() ll_rw_block() do_get_write_access() lock_buffer jh is only waiting for checkpoint -> b_transaction == NULL -> do nothing unlock_buffer test_set_buffer_locked() test_clear_buffer_dirty() __journal_file_buffer() change the data submit_bh() and we have sent wrong data to disk... We now clean the dirty buffer flag under buffer lock in all cases and hence we know that whenever a buffer is starting to be journaled we either finish the pending write-out before attaching a buffer to a transaction or we won't write the buffer until the transaction is going to be committed. The test in jbd_unexpected_dirty_buffer() is redundant - remove it. Furthermore we have to clear the buffer dirty bit under the buffer lock to prevent races with buffer write-out (and hence prevent returning a buffer with IO happening). Signed-off-by: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/jbd/transaction.c | 39 +++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-) (limited to 'fs') diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index 77b7662b840..c6ec66fd876 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c @@ -490,23 +490,21 @@ void journal_unlock_updates (journal_t *journal) */ static void jbd_unexpected_dirty_buffer(struct journal_head *jh) { - struct buffer_head *bh = jh2bh(jh); int jlist; - if (buffer_dirty(bh)) { - /* If this buffer is one which might reasonably be dirty - * --- ie. data, or not part of this journal --- then - * we're OK to leave it alone, but otherwise we need to - * move the dirty bit to the journal's own internal - * JBDDirty bit. */ - jlist = jh->b_jlist; - - if (jlist == BJ_Metadata || jlist == BJ_Reserved || - jlist == BJ_Shadow || jlist == BJ_Forget) { - if (test_clear_buffer_dirty(jh2bh(jh))) { - set_bit(BH_JBDDirty, &jh2bh(jh)->b_state); - } - } + /* If this buffer is one which might reasonably be dirty + * --- ie. data, or not part of this journal --- then + * we're OK to leave it alone, but otherwise we need to + * move the dirty bit to the journal's own internal + * JBDDirty bit. */ + jlist = jh->b_jlist; + + if (jlist == BJ_Metadata || jlist == BJ_Reserved || + jlist == BJ_Shadow || jlist == BJ_Forget) { + struct buffer_head *bh = jh2bh(jh); + + if (test_clear_buffer_dirty(bh)) + set_buffer_jbddirty(bh); } } @@ -574,9 +572,14 @@ repeat: if (jh->b_next_transaction) J_ASSERT_JH(jh, jh->b_next_transaction == transaction); - JBUFFER_TRACE(jh, "Unexpected dirty buffer"); - jbd_unexpected_dirty_buffer(jh); - } + } + /* + * In any case we need to clean the dirty flag and we must + * do it under the buffer lock to be sure we don't race + * with running write-out. + */ + JBUFFER_TRACE(jh, "Unexpected dirty buffer"); + jbd_unexpected_dirty_buffer(jh); } unlock_buffer(bh); -- cgit v1.2.3-70-g09d2 From 0bb6fcc13ae4fad98e0d610458975e47be0d2203 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 6 Sep 2005 15:19:36 -0700 Subject: [PATCH] pivot_root() circular reference fix Fix http://bugzilla.kernel.org/show_bug.cgi?id=4857 When pivot_root is called from an init script in an initramfs environment, it causes a circular reference in the mount tree. The cause of this is that pivot_root() is not prepared to handle pivoting an unattached mount. In an initramfs environment, rootfs is the root of the namespace, and so it is not attached. This patch fixes this and related problems, by returning -EINVAL if either the current root or the new root is detached. Signed-off-by: Miklos Szeredi Acked-by: Al Viro Cc: Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/namespace.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs') diff --git a/fs/namespace.c b/fs/namespace.c index 29b70669435..34156260c9b 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1334,8 +1334,12 @@ asmlinkage long sys_pivot_root(const char __user *new_root, const char __user *p error = -EINVAL; if (user_nd.mnt->mnt_root != user_nd.dentry) goto out2; /* not a mountpoint */ + if (user_nd.mnt->mnt_parent == user_nd.mnt) + goto out2; /* not attached */ if (new_nd.mnt->mnt_root != new_nd.dentry) goto out2; /* not a mountpoint */ + if (new_nd.mnt->mnt_parent == new_nd.mnt) + goto out2; /* not attached */ tmp = old_nd.mnt; /* make sure we can reach put_old from new_root */ spin_lock(&vfsmount_lock); if (tmp != new_nd.mnt) { -- cgit v1.2.3-70-g09d2