diff options
Diffstat (limited to 'drivers')
56 files changed, 2859 insertions, 313 deletions
diff --git a/drivers/atm/iphase.c b/drivers/atm/iphase.c index ee9ddeb5341..8cb0347dec2 100644 --- a/drivers/atm/iphase.c +++ b/drivers/atm/iphase.c @@ -3156,7 +3156,6 @@ static int __devinit ia_init_one(struct pci_dev *pdev, { struct atm_dev *dev; IADEV *iadev; - unsigned long flags; int ret; iadev = kzalloc(sizeof(*iadev), GFP_KERNEL); @@ -3188,19 +3187,14 @@ static int __devinit ia_init_one(struct pci_dev *pdev, ia_dev[iadev_count] = iadev; _ia_dev[iadev_count] = dev; iadev_count++; - spin_lock_init(&iadev->misc_lock); - /* First fixes first. I don't want to think about this now. */ - spin_lock_irqsave(&iadev->misc_lock, flags); if (ia_init(dev) || ia_start(dev)) { IF_INIT(printk("IA register failed!\n");) iadev_count--; ia_dev[iadev_count] = NULL; _ia_dev[iadev_count] = NULL; - spin_unlock_irqrestore(&iadev->misc_lock, flags); ret = -EINVAL; goto err_out_deregister_dev; } - spin_unlock_irqrestore(&iadev->misc_lock, flags); IF_EVENT(printk("iadev_count = %d\n", iadev_count);) iadev->next_board = ia_boards; diff --git a/drivers/atm/iphase.h b/drivers/atm/iphase.h index b2cd20f549c..077735e0e04 100644 --- a/drivers/atm/iphase.h +++ b/drivers/atm/iphase.h @@ -1022,7 +1022,7 @@ typedef struct iadev_t { struct dle_q rx_dle_q; struct free_desc_q *rx_free_desc_qhead; struct sk_buff_head rx_dma_q; - spinlock_t rx_lock, misc_lock; + spinlock_t rx_lock; struct atm_vcc **rx_open; /* list of all open VCs */ u16 num_rx_desc, rx_buf_sz, rxing; u32 rx_pkt_ram, rx_tmp_cnt; diff --git a/drivers/atm/solos-pci.c b/drivers/atm/solos-pci.c index f916ddf6393..f46138ab38b 100644 --- a/drivers/atm/solos-pci.c +++ b/drivers/atm/solos-pci.c @@ -444,6 +444,7 @@ static ssize_t console_show(struct device *dev, struct device_attribute *attr, struct atm_dev *atmdev = container_of(dev, struct atm_dev, class_dev); struct solos_card *card = atmdev->dev_data; struct sk_buff *skb; + unsigned int len; spin_lock(&card->cli_queue_lock); skb = skb_dequeue(&card->cli_queue[SOLOS_CHAN(atmdev)]); @@ -451,11 +452,12 @@ static ssize_t console_show(struct device *dev, struct device_attribute *attr, if(skb == NULL) return sprintf(buf, "No data.\n"); - memcpy(buf, skb->data, skb->len); - dev_dbg(&card->dev->dev, "len: %d\n", skb->len); + len = skb->len; + memcpy(buf, skb->data, len); + dev_dbg(&card->dev->dev, "len: %d\n", len); kfree_skb(skb); - return skb->len; + return len; } static int send_command(struct solos_card *card, int dev, const char *buf, size_t size) diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index de277689da6..4b9359a6f6c 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -488,4 +488,21 @@ config BLK_DEV_HD If unsure, say N. +config BLK_DEV_RBD + tristate "Rados block device (RBD)" + depends on INET && EXPERIMENTAL && BLOCK + select CEPH_LIB + select LIBCRC32C + select CRYPTO_AES + select CRYPTO + default n + help + Say Y here if you want include the Rados block device, which stripes + a block device over objects stored in the Ceph distributed object + store. + + More information at http://ceph.newdream.net/. + + If unsure, say N. + endif # BLK_DEV diff --git a/drivers/block/Makefile b/drivers/block/Makefile index aff5ac925c3..d7f463d6312 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -37,5 +37,6 @@ obj-$(CONFIG_BLK_DEV_HD) += hd.o obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o obj-$(CONFIG_BLK_DEV_DRBD) += drbd/ +obj-$(CONFIG_BLK_DEV_RBD) += rbd.o swim_mod-objs := swim.o swim_asm.o diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c index e9da874d041..03688c2da31 100644 --- a/drivers/block/ps3disk.c +++ b/drivers/block/ps3disk.c @@ -113,7 +113,7 @@ static void ps3disk_scatter_gather(struct ps3_storage_device *dev, memcpy(buf, dev->bounce_buf+offset, size); offset += size; flush_kernel_dcache_page(bvec->bv_page); - bvec_kunmap_irq(bvec, &flags); + bvec_kunmap_irq(buf, &flags); i++; } } diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c new file mode 100644 index 00000000000..6ec9d53806c --- /dev/null +++ b/drivers/block/rbd.c @@ -0,0 +1,1841 @@ +/* + rbd.c -- Export ceph rados objects as a Linux block device + + + based on drivers/block/osdblk.c: + + Copyright 2009 Red Hat, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; see the file COPYING. If not, write to + the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + + + + Instructions for use + -------------------- + + 1) Map a Linux block device to an existing rbd image. + + Usage: <mon ip addr> <options> <pool name> <rbd image name> [snap name] + + $ echo "192.168.0.1 name=admin rbd foo" > /sys/class/rbd/add + + The snapshot name can be "-" or omitted to map the image read/write. + + 2) List all active blkdev<->object mappings. + + In this example, we have performed step #1 twice, creating two blkdevs, + mapped to two separate rados objects in the rados rbd pool + + $ cat /sys/class/rbd/list + #id major client_name pool name snap KB + 0 254 client4143 rbd foo - 1024000 + + The columns, in order, are: + - blkdev unique id + - blkdev assigned major + - rados client id + - rados pool name + - rados block device name + - mapped snapshot ("-" if none) + - device size in KB + + + 3) Create a snapshot. + + Usage: <blkdev id> <snapname> + + $ echo "0 mysnap" > /sys/class/rbd/snap_create + + + 4) Listing a snapshot. + + $ cat /sys/class/rbd/snaps_list + #id snap KB + 0 - 1024000 (*) + 0 foo 1024000 + + The columns, in order, are: + - blkdev unique id + - snapshot name, '-' means none (active read/write version) + - size of device at time of snapshot + - the (*) indicates this is the active version + + 5) Rollback to snapshot. + + Usage: <blkdev id> <snapname> + + $ echo "0 mysnap" > /sys/class/rbd/snap_rollback + + + 6) Mapping an image using snapshot. + + A snapshot mapping is read-only. This is being done by passing + snap=<snapname> to the options when adding a device. + + $ echo "192.168.0.1 name=admin,snap=mysnap rbd foo" > /sys/class/rbd/add + + + 7) Remove an active blkdev<->rbd image mapping. + + In this example, we remove the mapping with blkdev unique id 1. + + $ echo 1 > /sys/class/rbd/remove + + + NOTE: The actual creation and deletion of rados objects is outside the scope + of this driver. + + */ + +#include <linux/ceph/libceph.h> +#include <linux/ceph/osd_client.h> +#include <linux/ceph/mon_client.h> +#include <linux/ceph/decode.h> + +#include <linux/kernel.h> +#include <linux/device.h> +#include <linux/module.h> +#include <linux/fs.h> +#include <linux/blkdev.h> + +#include "rbd_types.h" + +#define DRV_NAME "rbd" +#define DRV_NAME_LONG "rbd (rados block device)" + +#define RBD_MINORS_PER_MAJOR 256 /* max minors per blkdev */ + +#define RBD_MAX_MD_NAME_LEN (96 + sizeof(RBD_SUFFIX)) +#define RBD_MAX_POOL_NAME_LEN 64 +#define RBD_MAX_SNAP_NAME_LEN 32 +#define RBD_MAX_OPT_LEN 1024 + +#define RBD_SNAP_HEAD_NAME "-" + +#define DEV_NAME_LEN 32 + +/* + * block device image metadata (in-memory version) + */ +struct rbd_image_header { + u64 image_size; + char block_name[32]; + __u8 obj_order; + __u8 crypt_type; + __u8 comp_type; + struct rw_semaphore snap_rwsem; + struct ceph_snap_context *snapc; + size_t snap_names_len; + u64 snap_seq; + u32 total_snaps; + + char *snap_names; + u64 *snap_sizes; +}; + +/* + * an instance of the client. multiple devices may share a client. + */ +struct rbd_client { + struct ceph_client *client; + struct kref kref; + struct list_head node; +}; + +/* + * a single io request + */ +struct rbd_request { + struct request *rq; /* blk layer request */ + struct bio *bio; /* cloned bio */ + struct page **pages; /* list of used pages */ + u64 len; +}; + +/* + * a single device + */ +struct rbd_device { + int id; /* blkdev unique id */ + + int major; /* blkdev assigned major */ + struct gendisk *disk; /* blkdev's gendisk and rq */ + struct request_queue *q; + + struct ceph_client *client; + struct rbd_client *rbd_client; + + char name[DEV_NAME_LEN]; /* blkdev name, e.g. rbd3 */ + + spinlock_t lock; /* queue lock */ + + struct rbd_image_header header; + char obj[RBD_MAX_OBJ_NAME_LEN]; /* rbd image name */ + int obj_len; + char obj_md_name[RBD_MAX_MD_NAME_LEN]; /* hdr nm. */ + char pool_name[RBD_MAX_POOL_NAME_LEN]; + int poolid; + + char snap_name[RBD_MAX_SNAP_NAME_LEN]; + u32 cur_snap; /* index+1 of current snapshot within snap context + 0 - for the head */ + int read_only; + + struct list_head node; +}; + +static spinlock_t node_lock; /* protects client get/put */ + +static struct class *class_rbd; /* /sys/class/rbd */ +static DEFINE_MUTEX(ctl_mutex); /* Serialize open/close/setup/teardown */ +static LIST_HEAD(rbd_dev_list); /* devices */ +static LIST_HEAD(rbd_client_list); /* clients */ + + +static int rbd_open(struct block_device *bdev, fmode_t mode) +{ + struct gendisk *disk = bdev->bd_disk; + struct rbd_device *rbd_dev = disk->private_data; + + set_device_ro(bdev, rbd_dev->read_only); + + if ((mode & FMODE_WRITE) && rbd_dev->read_only) + return -EROFS; + + return 0; +} + +static const struct block_device_operations rbd_bd_ops = { + .owner = THIS_MODULE, + .open = rbd_open, +}; + +/* + * Initialize an rbd client instance. + * We own *opt. + */ +static struct rbd_client *rbd_client_create(struct ceph_options *opt) +{ + struct rbd_client *rbdc; + int ret = -ENOMEM; + + dout("rbd_client_create\n"); + rbdc = kmalloc(sizeof(struct rbd_client), GFP_KERNEL); + if (!rbdc) + goto out_opt; + + kref_init(&rbdc->kref); + INIT_LIST_HEAD(&rbdc->node); + + rbdc->client = ceph_create_client(opt, rbdc); + if (IS_ERR(rbdc->client)) + goto out_rbdc; + opt = NULL; /* Now rbdc->client is responsible for opt */ + + ret = ceph_open_session(rbdc->client); + if (ret < 0) + goto out_err; + + spin_lock(&node_lock); + list_add_tail(&rbdc->node, &rbd_client_list); + spin_unlock(&node_lock); + + dout("rbd_client_create created %p\n", rbdc); + return rbdc; + +out_err: + ceph_destroy_client(rbdc->client); +out_rbdc: + kfree(rbdc); +out_opt: + if (opt) + ceph_destroy_options(opt); + return ERR_PTR(ret); +} + +/* + * Find a ceph client with specific addr and configuration. + */ +static struct rbd_client *__rbd_client_find(struct ceph_options *opt) +{ + struct rbd_client *client_node; + + if (opt->flags & CEPH_OPT_NOSHARE) + return NULL; + + list_for_each_entry(client_node, &rbd_client_list, node) + if (ceph_compare_options(opt, client_node->client) == 0) + return client_node; + return NULL; +} + +/* + * Get a ceph client with specific addr and configuration, if one does + * not exist create it. + */ +static int rbd_get_client(struct rbd_device *rbd_dev, const char *mon_addr, + char *options) +{ + struct rbd_client *rbdc; + struct ceph_options *opt; + int ret; + + ret = ceph_parse_options(&opt, options, mon_addr, + mon_addr + strlen(mon_addr), NULL, NULL); + if (ret < 0) + return ret; + + spin_lock(&node_lock); + rbdc = __rbd_client_find(opt); + if (rbdc) { + ceph_destroy_options(opt); + + /* using an existing client */ + kref_get(&rbdc->kref); + rbd_dev->rbd_client = rbdc; + rbd_dev->client = rbdc->client; + spin_unlock(&node_lock); + return 0; + } + spin_unlock(&node_lock); + + rbdc = rbd_client_create(opt); + if (IS_ERR(rbdc)) + return PTR_ERR(rbdc); + + rbd_dev->rbd_client = rbdc; + rbd_dev->client = rbdc->client; + return 0; +} + +/* + * Destroy ceph client + */ +static void rbd_client_release(struct kref *kref) +{ + struct rbd_client *rbdc = container_of(kref, struct rbd_client, kref); + + dout("rbd_release_client %p\n", rbdc); + spin_lock(&node_lock); + list_del(&rbdc->node); + spin_unlock(&node_lock); + + ceph_destroy_client(rbdc->client); + kfree(rbdc); +} + +/* + * Drop reference to ceph client node. If it's not referenced anymore, release + * it. + */ +static void rbd_put_client(struct rbd_device *rbd_dev) +{ + kref_put(&rbd_dev->rbd_client->kref, rbd_client_release); + rbd_dev->rbd_client = NULL; + rbd_dev->client = NULL; +} + + +/* + * Create a new header structure, translate header format from the on-disk + * header. + */ +static int rbd_header_from_disk(struct rbd_image_header *header, + struct rbd_image_header_ondisk *ondisk, + int allocated_snaps, + gfp_t gfp_flags) +{ + int i; + u32 snap_count = le32_to_cpu(ondisk->snap_count); + int ret = -ENOMEM; + + init_rwsem(&header->snap_rwsem); + + header->snap_names_len = le64_to_cpu(ondisk->snap_names_len); + header->snapc = kmalloc(sizeof(struct ceph_snap_context) + + snap_count * + sizeof(struct rbd_image_snap_ondisk), + gfp_flags); + if (!header->snapc) + return -ENOMEM; + if (snap_count) { + header->snap_names = kmalloc(header->snap_names_len, + GFP_KERNEL); + if (!header->snap_names) + goto err_snapc; + header->snap_sizes = kmalloc(snap_count * sizeof(u64), + GFP_KERNEL); + if (!header->snap_sizes) + goto err_names; + } else { + header->snap_names = NULL; + header->snap_sizes = NULL; + } + memcpy(header->block_name, ondisk->block_name, + sizeof(ondisk->block_name)); + + header->image_size = le64_to_cpu(ondisk->image_size); + header->obj_order = ondisk->options.order; + header->crypt_type = ondisk->options.crypt_type; + header->comp_type = ondisk->options.comp_type; + + atomic_set(&header->snapc->nref, 1); + header->snap_seq = le64_to_cpu(ondisk->snap_seq); + header->snapc->num_snaps = snap_count; + header->total_snaps = snap_count; + + if (snap_count && + allocated_snaps == snap_count) { + for (i = 0; i < snap_count; i++) { + header->snapc->snaps[i] = + le64_to_cpu(ondisk->snaps[i].id); + header->snap_sizes[i] = + le64_to_cpu(ondisk->snaps[i].image_size); + } + + /* copy snapshot names */ + memcpy(header->snap_names, &ondisk->snaps[i], + header->snap_names_len); + } + + return 0; + +err_names: + kfree(header->snap_names); +err_snapc: + kfree(header->snapc); + return ret; +} + +static int snap_index(struct rbd_image_header *header, int snap_num) +{ + return header->total_snaps - snap_num; +} + +static u64 cur_snap_id(struct rbd_device *rbd_dev) +{ + struct rbd_image_header *header = &rbd_dev->header; + + if (!rbd_dev->cur_snap) + return 0; + + return header->snapc->snaps[snap_index(header, rbd_dev->cur_snap)]; +} + +static int snap_by_name(struct rbd_image_header *header, const char *snap_name, + u64 *seq, u64 *size) +{ + int i; + char *p = header->snap_names; + + for (i = 0; i < header->total_snaps; i++, p += strlen(p) + 1) { + if (strcmp(snap_name, p) == 0) + break; + } + if (i == header->total_snaps) + return -ENOENT; + if (seq) + *seq = header->snapc->snaps[i]; + + if (size) + *size = header->snap_sizes[i]; + + return i; +} + +static int rbd_header_set_snap(struct rbd_device *dev, + const char *snap_name, + u64 *size) +{ + struct rbd_image_header *header = &dev->header; + struct ceph_snap_context *snapc = header->snapc; + int ret = -ENOENT; + + down_write(&header->snap_rwsem); + + if (!snap_name || + !*snap_name || + strcmp(snap_name, "-") == 0 || + strcmp(snap_name, RBD_SNAP_HEAD_NAME) == 0) { + if (header->total_snaps) + snapc->seq = header->snap_seq; + else + snapc->seq = 0; + dev->cur_snap = 0; + dev->read_only = 0; + if (size) + *size = header->image_size; + } else { + ret = snap_by_name(header, snap_name, &snapc->seq, size); + if (ret < 0) + goto done; + + dev->cur_snap = header->total_snaps - ret; + dev->read_only = 1; + } + + ret = 0; +done: + up_write(&header->snap_rwsem); + return ret; +} + +static void rbd_header_free(struct rbd_image_header *header) +{ + kfree(header->snapc); + kfree(header->snap_names); + kfree(header->snap_sizes); +} + +/* + * get the actual striped segment name, offset and length + */ +static u64 rbd_get_segment(struct rbd_image_header *header, + const char *block_name, + u64 ofs, u64 len, + char *seg_name, u64 *segofs) +{ + u64 seg = ofs >> header->obj_order; + + if (seg_name) + snprintf(seg_name, RBD_MAX_SEG_NAME_LEN, + "%s.%012llx", block_name, seg); + + ofs = ofs & ((1 << header->obj_order) - 1); + len = min_t(u64, len, (1 << header->obj_order) - ofs); + + if (segofs) + *segofs = ofs; + + return len; +} + +/* + * bio helpers + */ + +static void bio_chain_put(struct bio *chain) +{ + struct bio *tmp; + + while (chain) { + tmp = chain; + chain = chain->bi_next; + bio_put(tmp); + } +} + +/* + * zeros a bio chain, starting at specific offset + */ +static void zero_bio_chain(struct bio *chain, int start_ofs) +{ + struct bio_vec *bv; + unsigned long flags; + void *buf; + int i; + int pos = 0; + + while (chain) { + bio_for_each_segment(bv, chain, i) { + if (pos + bv->bv_len > start_ofs) { + int remainder = max(start_ofs - pos, 0); + buf = bvec_kmap_irq(bv, &flags); + memset(buf + remainder, 0, + bv->bv_len - remainder); + bvec_kunmap_irq(buf, &flags); + } + pos += bv->bv_len; + } + + chain = chain->bi_next; + } +} + +/* + * bio_chain_clone - clone a chain of bios up to a certain length. + * might return a bio_pair that will need to be released. + */ +static struct bio *bio_chain_clone(struct bio **old, struct bio **next, + struct bio_pair **bp, + int len, gfp_t gfpmask) +{ + struct bio *tmp, *old_chain = *old, *new_chain = NULL, *tail = NULL; + int total = 0; + + if (*bp) { + bio_pair_release(*bp); + *bp = NULL; + } + + while (old_chain && (total < len)) { + tmp = bio_kmalloc(gfpmask, old_chain->bi_max_vecs); + if (!tmp) + goto err_out; + + if (total + old_chain->bi_size > len) { + struct bio_pair *bp; + + /* + * this split can only happen with a single paged bio, + * split_bio will BUG_ON if this is not the case + */ + dout("bio_chain_clone split! total=%d remaining=%d" + "bi_size=%d\n", + (int)total, (int)len-total, + (int)old_chain->bi_size); + + /* split the bio. We'll release it either in the next + call, or it will have to be released outside */ + bp = bio_split(old_chain, (len - total) / 512ULL); + if (!bp) + goto err_out; + + __bio_clone(tmp, &bp->bio1); + + *next = &bp->bio2; + } else { + __bio_clone(tmp, old_chain); + *next = old_chain->bi_next; + } + + tmp->bi_bdev = NULL; + gfpmask &= ~__GFP_WAIT; + tmp->bi_next = NULL; + + if (!new_chain) { + new_chain = tail = tmp; + } else { + tail->bi_next = tmp; + tail = tmp; + } + old_chain = old_chain->bi_next; + + total += tmp->bi_size; + } + + BUG_ON(total < len); + + if (tail) + tail->bi_next = NULL; + + *old = old_chain; + + return new_chain; + +err_out: + dout("bio_chain_clone with err\n"); + bio_chain_put(new_chain); + return NULL; +} + +/* + * helpers for osd request op vectors. + */ +static int rbd_create_rw_ops(struct ceph_osd_req_op **ops, + int num_ops, + int opcode, + u32 payload_len) +{ + *ops = kzalloc(sizeof(struct ceph_osd_req_op) * (num_ops + 1), + GFP_NOIO); + if (!*ops) + return -ENOMEM; + (*ops)[0].op = opcode; + /* + * op extent offset and length will be set later on + * in calc_raw_layout() + */ + (*ops)[0].payload_len = payload_len; + return 0; +} + +static void rbd_destroy_ops(struct ceph_osd_req_op *ops) +{ + kfree(ops); +} + +/* + * Send ceph osd request + */ +static int rbd_do_request(struct request *rq, + struct rbd_device *dev, + struct ceph_snap_context *snapc, + u64 snapid, + const char *obj, u64 ofs, u64 len, + struct bio *bio, + struct page **pages, + int num_pages, + int flags, + struct ceph_osd_req_op *ops, + int num_reply, + void (*rbd_cb)(struct ceph_osd_request *req, + struct ceph_msg *msg)) +{ + struct ceph_osd_request *req; + struct ceph_file_layout *layout; + int ret; + u64 bno; + struct timespec mtime = CURRENT_TIME; + struct rbd_request *req_data; + struct ceph_osd_request_head *reqhead; + struct rbd_image_header *header = &dev->header; + + ret = -ENOMEM; + req_data = kzalloc(sizeof(*req_data), GFP_NOIO); + if (!req_data) + goto done; + + dout("rbd_do_request len=%lld ofs=%lld\n", len, ofs); + + down_read(&header->snap_rwsem); + + req = ceph_osdc_alloc_request(&dev->client->osdc, flags, + snapc, + ops, + false, + GFP_NOIO, pages, bio); + if (IS_ERR(req)) { + up_read(&header->snap_rwsem); + ret = PTR_ERR(req); + goto done_pages; + } + + req->r_callback = rbd_cb; + + req_data->rq = rq; + req_data->bio = bio; + req_data->pages = pages; + req_data->len = len; + + req->r_priv = req_data; + + reqhead = req->r_request->front.iov_base; + reqhead->snapid = cpu_to_le64(CEPH_NOSNAP); + + strncpy(req->r_oid, obj, sizeof(req->r_oid)); + req->r_oid_len = strlen(req->r_oid); + + layout = &req->r_file_layout; + memset(layout, 0, sizeof(*layout)); + layout->fl_stripe_unit = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); + layout->fl_stripe_count = cpu_to_le32(1); + layout->fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); + layout->fl_pg_preferred = cpu_to_le32(-1); + layout->fl_pg_pool = cpu_to_le32(dev->poolid); + ceph_calc_raw_layout(&dev->client->osdc, layout, snapid, + ofs, &len, &bno, req, ops); + + ceph_osdc_build_request(req, ofs, &len, + ops, + snapc, + &mtime, + req->r_oid, req->r_oid_len); + up_read(&header->snap_rwsem); + + ret = ceph_osdc_start_request(&dev->client->osdc, req, false); + if (ret < 0) + goto done_err; + + if (!rbd_cb) { + ret = ceph_osdc_wait_request(&dev->client->osdc, req); + ceph_osdc_put_request(req); + } + return ret; + +done_err: + bio_chain_put(req_data->bio); + ceph_osdc_put_request(req); +done_pages: + kfree(req_data); +done: + if (rq) + blk_end_request(rq, ret, len); + return ret; +} + +/* + * Ceph osd op callback + */ +static void rbd_req_cb(struct ceph_osd_request *req, struct ceph_msg *msg) +{ + struct rbd_request *req_data = req->r_priv; + struct ceph_osd_reply_head *replyhead; + struct ceph_osd_op *op; + __s32 rc; + u64 bytes; + int read_op; + + /* parse reply */ + replyhead = msg->front.iov_base; + WARN_ON(le32_to_cpu(replyhead->num_ops) == 0); + op = (void *)(replyhead + 1); + rc = le32_to_cpu(replyhead->result); + bytes = le64_to_cpu(op->extent.length); + read_op = (le32_to_cpu(op->op) == CEPH_OSD_OP_READ); + + dout("rbd_req_cb bytes=%lld readop=%d rc=%d\n", bytes, read_op, rc); + + if (rc == -ENOENT && read_op) { + zero_bio_chain(req_data->bio, 0); + rc = 0; + } else if (rc == 0 && read_op && bytes < req_data->len) { + zero_bio_chain(req_data->bio, bytes); + bytes = req_data->len; + } + + blk_end_request(req_data->rq, rc, bytes); + + if (req_data->bio) + bio_chain_put(req_data->bio); + + ceph_osdc_put_request(req); + kfree(req_data); +} + +/* + * Do a synchronous ceph osd operation + */ +static int rbd_req_sync_op(struct rbd_device *dev, + struct ceph_snap_context *snapc, + u64 snapid, + int opcode, + int flags, + struct ceph_osd_req_op *orig_ops, + int num_reply, + const char *obj, + u64 ofs, u64 len, + char *buf) +{ + int ret; + struct page **pages; + int num_pages; + struct ceph_osd_req_op *ops = orig_ops; + u32 payload_len; + + num_pages = calc_pages_for(ofs , len); + pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL); + if (IS_ERR(pages)) + return PTR_ERR(pages); + + if (!orig_ops) { + payload_len = (flags & CEPH_OSD_FLAG_WRITE ? len : 0); + ret = rbd_create_rw_ops(&ops, 1, opcode, payload_len); + if (ret < 0) + goto done; + + if ((flags & CEPH_OSD_FLAG_WRITE) && buf) { + ret = ceph_copy_to_page_vector(pages, buf, ofs, len); + if (ret < 0) + goto done_ops; + } + } + + ret = rbd_do_request(NULL, dev, snapc, snapid, + obj, ofs, len, NULL, + pages, num_pages, + flags, + ops, + 2, + NULL); + if (ret < 0) + goto done_ops; + + if ((flags & CEPH_OSD_FLAG_READ) && buf) + ret = ceph_copy_from_page_vector(pages, buf, ofs, ret); + +done_ops: + if (!orig_ops) + rbd_destroy_ops(ops); +done: + ceph_release_page_vector(pages, num_pages); + return ret; +} + +/* + * Do an asynchronous ceph osd operation + */ +static int rbd_do_op(struct request *rq, + struct rbd_device *rbd_dev , + struct ceph_snap_context *snapc, + u64 snapid, + int opcode, int flags, int num_reply, + u64 ofs, u64 len, + struct bio *bio) +{ + char *seg_name; + u64 seg_ofs; + u64 seg_len; + int ret; + struct ceph_osd_req_op *ops; + u32 payload_len; + + seg_name = kmalloc(RBD_MAX_SEG_NAME_LEN + 1, GFP_NOIO); + if (!seg_name) + return -ENOMEM; + + seg_len = rbd_get_segment(&rbd_dev->header, + rbd_dev->header.block_name, + ofs, len, + seg_name, &seg_ofs); + + payload_len = (flags & CEPH_OSD_FLAG_WRITE ? seg_len : 0); + + ret = rbd_create_rw_ops(&ops, 1, opcode, payload_len); + if (ret < 0) + goto done; + + /* we've taken care of segment sizes earlier when we + cloned the bios. We should never have a segment + truncated at this point */ + BUG_ON(seg_len < len); + + ret = rbd_do_request(rq, rbd_dev, snapc, snapid, + seg_name, seg_ofs, seg_len, + bio, + NULL, 0, + flags, + ops, + num_reply, + rbd_req_cb); +done: + kfree(seg_name); + return ret; +} + +/* + * Request async osd write + */ +static int rbd_req_write(struct request *rq, + struct rbd_device *rbd_dev, + struct ceph_snap_context *snapc, + u64 ofs, u64 len, + struct bio *bio) +{ + return rbd_do_op(rq, rbd_dev, snapc, CEPH_NOSNAP, + CEPH_OSD_OP_WRITE, + CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, + 2, + ofs, len, bio); +} + +/* + * Request async osd read + */ +static int rbd_req_read(struct request *rq, + struct rbd_device *rbd_dev, + u64 snapid, + u64 ofs, u64 len, + struct bio *bio) +{ + return rbd_do_op(rq, rbd_dev, NULL, + (snapid ? snapid : CEPH_NOSNAP), + CEPH_OSD_OP_READ, + CEPH_OSD_FLAG_READ, + 2, + ofs, len, bio); +} + +/* + * Request sync osd read + */ +static int rbd_req_sync_read(struct rbd_device *dev, + struct ceph_snap_context *snapc, + u64 snapid, + const char *obj, + u64 ofs, u64 len, + char *buf) +{ + return rbd_req_sync_op(dev, NULL, + (snapid ? snapid : CEPH_NOSNAP), + CEPH_OSD_OP_READ, + CEPH_OSD_FLAG_READ, + NULL, + 1, obj, ofs, len, buf); +} + +/* + * Request sync osd read + */ +static int rbd_req_sync_rollback_obj(struct rbd_device *dev, + u64 snapid, + const char *obj) +{ + struct ceph_osd_req_op *ops; + int ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_ROLLBACK, 0); + if (ret < 0) + return ret; + + ops[0].snap.snapid = snapid; + + ret = rbd_req_sync_op(dev, NULL, + CEPH_NOSNAP, + 0, + CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, + ops, + 1, obj, 0, 0, NULL); + + rbd_destroy_ops(ops); + + if (ret < 0) + return ret; + + return ret; +} + +/* + * Request sync osd read + */ +static int rbd_req_sync_exec(struct rbd_device *dev, + const char *obj, + const char *cls, + const char *method, + const char *data, + int len) +{ + struct ceph_osd_req_op *ops; + int cls_len = strlen(cls); + int method_len = strlen(method); + int ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_CALL, + cls_len + method_len + len); + if (ret < 0) + return ret; + + ops[0].cls.class_name = cls; + ops[0].cls.class_len = (__u8)cls_len; + ops[0].cls.method_name = method; + ops[0].cls.method_len = (__u8)method_len; + ops[0].cls.argc = 0; + ops[0].cls.indata = data; + ops[0].cls.indata_len = len; + + ret = rbd_req_sync_op(dev, NULL, + CEPH_NOSNAP, + 0, + CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, + ops, + 1, obj, 0, 0, NULL); + + rbd_destroy_ops(ops); + + dout("cls_exec returned %d\n", ret); + return ret; +} + +/* + * block device queue callback + */ +static void rbd_rq_fn(struct request_queue *q) +{ + struct rbd_device *rbd_dev = q->queuedata; + struct request *rq; + struct bio_pair *bp = NULL; + + rq = blk_fetch_request(q); + + while (1) { + struct bio *bio; + struct bio *rq_bio, *next_bio = NULL; + bool do_write; + int size, op_size = 0; + u64 ofs; + + /* peek at request from block layer */ + if (!rq) + break; + + dout("fetched request\n"); + + /* filter out block requests we don't understand */ + if ((rq->cmd_type != REQ_TYPE_FS)) { + __blk_end_request_all(rq, 0); + goto next; + } + + /* deduce our operation (read, write) */ + do_write = (rq_data_dir(rq) == WRITE); + + size = blk_rq_bytes(rq); + ofs = blk_rq_pos(rq) * 512ULL; + rq_bio = rq->bio; + if (do_write && rbd_dev->read_only) { + __blk_end_request_all(rq, -EROFS); + goto next; + } + + spin_unlock_irq(q->queue_lock); + + dout("%s 0x%x bytes at 0x%llx\n", + do_write ? "write" : "read", + size, blk_rq_pos(rq) * 512ULL); + + do { + /* a bio clone to be passed down to OSD req */ + dout("rq->bio->bi_vcnt=%d\n", rq->bio->bi_vcnt); + op_size = rbd_get_segment(&rbd_dev->header, + rbd_dev->header.block_name, + ofs, size, + NULL, NULL); + bio = bio_chain_clone(&rq_bio, &next_bio, &bp, + op_size, GFP_ATOMIC); + if (!bio) { + spin_lock_irq(q->queue_lock); + __blk_end_request_all(rq, -ENOMEM); + goto next; + } + + /* init OSD command: write or read */ + if (do_write) + rbd_req_write(rq, rbd_dev, + rbd_dev->header.snapc, + ofs, + op_size, bio); + else + rbd_req_read(rq, rbd_dev, + cur_snap_id(rbd_dev), + ofs, + op_size, bio); + + size -= op_size; + ofs += op_size; + + rq_bio = next_bio; + } while (size > 0); + + if (bp) + bio_pair_release(bp); + + spin_lock_irq(q->queue_lock); +next: + rq = blk_fetch_request(q); + } +} + +/* + * a queue callback. Makes sure that we don't create a bio that spans across + * multiple osd objects. One exception would be with a single page bios, + * which we handle later at bio_chain_clone + */ +static int rbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bmd, + struct bio_vec *bvec) +{ + struct rbd_device *rbd_dev = q->queuedata; + unsigned int chunk_sectors = 1 << (rbd_dev->header.obj_order - 9); + sector_t sector = bmd->bi_sector + get_start_sect(bmd->bi_bdev); + unsigned int bio_sectors = bmd->bi_size >> 9; + int max; + + max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + + bio_sectors)) << 9; + if (max < 0) + max = 0; /* bio_add cannot handle a negative return */ + if (max <= bvec->bv_len && bio_sectors == 0) + return bvec->bv_len; + return max; +} + +static void rbd_free_disk(struct rbd_device *rbd_dev) +{ + struct gendisk *disk = rbd_dev->disk; + + if (!disk) + return; + + rbd_header_free(&rbd_dev->header); + + if (disk->flags & GENHD_FL_UP) + del_gendisk(disk); + if (disk->queue) + blk_cleanup_queue(disk->queue); + put_disk(disk); +} + +/* + * reload the ondisk the header + */ +static int rbd_read_header(struct rbd_device *rbd_dev, + struct rbd_image_header *header) +{ + ssize_t rc; + struct rbd_image_header_ondisk *dh; + int snap_count = 0; + u64 snap_names_len = 0; + + while (1) { + int len = sizeof(*dh) + + snap_count * sizeof(struct rbd_image_snap_ondisk) + + snap_names_len; + + rc = -ENOMEM; + dh = kmalloc(len, GFP_KERNEL); + if (!dh) + return -ENOMEM; + + rc = rbd_req_sync_read(rbd_dev, + NULL, CEPH_NOSNAP, + rbd_dev->obj_md_name, + 0, len, + (char *)dh); + if (rc < 0) + goto out_dh; + + rc = rbd_header_from_disk(header, dh, snap_count, GFP_KERNEL); + if (rc < 0) + goto out_dh; + + if (snap_count != header->total_snaps) { + snap_count = header->total_snaps; + snap_names_len = header->snap_names_len; + rbd_header_free(header); + kfree(dh); + continue; + } + break; + } + +out_dh: + kfree(dh); + return rc; +} + +/* + * create a snapshot + */ +static int rbd_header_add_snap(struct rbd_device *dev, + const char *snap_name, + gfp_t gfp_flags) +{ + int name_len = strlen(snap_name); + u64 new_snapid; + int ret; + void *data, *data_start, *data_end; + + /* we should create a snapshot only if we're pointing at the head */ + if (dev->cur_snap) + return -EINVAL; + + ret = ceph_monc_create_snapid(&dev->client->monc, dev->poolid, + &new_snapid); + dout("created snapid=%lld\n", new_snapid); + if (ret < 0) + return ret; + + data = kmalloc(name_len + 16, gfp_flags); + if (!data) + return -ENOMEM; + + data_start = data; + data_end = data + name_len + 16; + + ceph_encode_string_safe(&data, data_end, snap_name, name_len, bad); + ceph_encode_64_safe(&data, data_end, new_snapid, bad); + + ret = rbd_req_sync_exec(dev, dev->obj_md_name, "rbd", "snap_add", + data_start, data - data_start); + + kfree(data_start); + + if (ret < 0) + return ret; + + dev->header.snapc->seq = new_snapid; + + return 0; +bad: + return -ERANGE; +} + +/* + * only read the first part of the ondisk header, without the snaps info + */ +static int rbd_update_snaps(struct rbd_device *rbd_dev) +{ + int ret; + struct rbd_image_header h; + u64 snap_seq; + + ret = rbd_read_header(rbd_dev, &h); + if (ret < 0) + return ret; + + down_write(&rbd_dev->header.snap_rwsem); + + snap_seq = rbd_dev->header.snapc->seq; + + kfree(rbd_dev->header.snapc); + kfree(rbd_dev->header.snap_names); + kfree(rbd_dev->header.snap_sizes); + + rbd_dev->header.total_snaps = h.total_snaps; + rbd_dev->header.snapc = h.snapc; + rbd_dev->header.snap_names = h.snap_names; + rbd_dev->header.snap_sizes = h.snap_sizes; + rbd_dev->header.snapc->seq = snap_seq; + + up_write(&rbd_dev->header.snap_rwsem); + + return 0; +} + +static int rbd_init_disk(struct rbd_device *rbd_dev) +{ + struct gendisk *disk; + struct request_queue *q; + int rc; + u64 total_size = 0; + + /* contact OSD, request size info about the object being mapped */ + rc = rbd_read_header(rbd_dev, &rbd_dev->header); + if (rc) + return rc; + + rc = rbd_header_set_snap(rbd_dev, rbd_dev->snap_name, &total_size); + if (rc) + return rc; + + /* create gendisk info */ + rc = -ENOMEM; + disk = alloc_disk(RBD_MINORS_PER_MAJOR); + if (!disk) + goto out; + + sprintf(disk->disk_name, DRV_NAME "%d", rbd_dev->id); + disk->major = rbd_dev->major; + disk->first_minor = 0; + disk->fops = &rbd_bd_ops; + disk->private_data = rbd_dev; + + /* init rq */ + rc = -ENOMEM; + q = blk_init_queue(rbd_rq_fn, &rbd_dev->lock); + if (!q) + goto out_disk; + blk_queue_merge_bvec(q, rbd_merge_bvec); + disk->queue = q; + + q->queuedata = rbd_dev; + + rbd_dev->disk = disk; + rbd_dev->q = q; + + /* finally, announce the disk to the world */ + set_capacity(disk, total_size / 512ULL); + add_disk(disk); + + pr_info("%s: added with size 0x%llx\n", + disk->disk_name, (unsigned long long)total_size); + return 0; + +out_disk: + put_disk(disk); +out: + return rc; +} + +/******************************************************************** + * /sys/class/rbd/ + * add map rados objects to blkdev + * remove unmap rados objects + * list show mappings + *******************************************************************/ + +static void class_rbd_release(struct class *cls) +{ + kfree(cls); +} + +static ssize_t class_rbd_list(struct class *c, + struct class_attribute *attr, + char *data) +{ + int n = 0; + struct list_head *tmp; + int max = PAGE_SIZE; + + mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); + + n += snprintf(data, max, + "#id\tmajor\tclient_name\tpool\tname\tsnap\tKB\n"); + + list_for_each(tmp, &rbd_dev_list) { + struct rbd_device *rbd_dev; + + rbd_dev = list_entry(tmp, struct rbd_device, node); + n += snprintf(data+n, max-n, + "%d\t%d\tclient%lld\t%s\t%s\t%s\t%lld\n", + rbd_dev->id, + rbd_dev->major, + ceph_client_id(rbd_dev->client), + rbd_dev->pool_name, + rbd_dev->obj, rbd_dev->snap_name, + rbd_dev->header.image_size >> 10); + if (n == max) + break; + } + + mutex_unlock(&ctl_mutex); + return n; +} + +static ssize_t class_rbd_add(struct class *c, + struct class_attribute *attr, + const char *buf, size_t count) +{ + struct ceph_osd_client *osdc; + struct rbd_device *rbd_dev; + ssize_t rc = -ENOMEM; + int irc, new_id = 0; + struct list_head *tmp; + char *mon_dev_name; + char *options; + + if (!try_module_get(THIS_MODULE)) + return -ENODEV; + + mon_dev_name = kmalloc(RBD_MAX_OPT_LEN, GFP_KERNEL); + if (!mon_dev_name) + goto err_out_mod; + + options = kmalloc(RBD_MAX_OPT_LEN, GFP_KERNEL); + if (!options) + goto err_mon_dev; + + /* new rbd_device object */ + rbd_dev = kzalloc(sizeof(*rbd_dev), GFP_KERNEL); + if (!rbd_dev) + goto err_out_opt; + + /* static rbd_device initialization */ + spin_lock_init(&rbd_dev->lock); + INIT_LIST_HEAD(&rbd_dev->node); + + /* generate unique id: find highest unique id, add one */ + mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); + + list_for_each(tmp, &rbd_dev_list) { + struct rbd_device *rbd_dev; + + rbd_dev = list_entry(tmp, struct rbd_device, node); + if (rbd_dev->id >= new_id) + new_id = rbd_dev->id + 1; + } + + rbd_dev->id = new_id; + + /* add to global list */ + list_add_tail(&rbd_dev->node, &rbd_dev_list); + + /* parse add command */ + if (sscanf(buf, "%" __stringify(RBD_MAX_OPT_LEN) "s " + "%" __stringify(RBD_MAX_OPT_LEN) "s " + "%" __stringify(RBD_MAX_POOL_NAME_LEN) "s " + "%" __stringify(RBD_MAX_OBJ_NAME_LEN) "s" + "%" __stringify(RBD_MAX_SNAP_NAME_LEN) "s", + mon_dev_name, options, rbd_dev->pool_name, + rbd_dev->obj, rbd_dev->snap_name) < 4) { + rc = -EINVAL; + goto err_out_slot; + } + + if (rbd_dev->snap_name[0] == 0) + rbd_dev->snap_name[0] = '-'; + + rbd_dev->obj_len = strlen(rbd_dev->obj); + snprintf(rbd_dev->obj_md_name, sizeof(rbd_dev->obj_md_name), "%s%s", + rbd_dev->obj, RBD_SUFFIX); + + /* initialize rest of new object */ + snprintf(rbd_dev->name, DEV_NAME_LEN, DRV_NAME "%d", rbd_dev->id); + rc = rbd_get_client(rbd_dev, mon_dev_name, options); + if (rc < 0) + goto err_out_slot; + + mutex_unlock(&ctl_mutex); + + /* pick the pool */ + osdc = &rbd_dev->client->osdc; + rc = ceph_pg_poolid_by_name(osdc->osdmap, rbd_dev->pool_name); + if (rc < 0) + goto err_out_client; + rbd_dev->poolid = rc; + + /* register our block device */ + irc = register_blkdev(0, rbd_dev->name); + if (irc < 0) { + rc = irc; + goto err_out_client; + } + rbd_dev->major = irc; + + /* set up and announce blkdev mapping */ + rc = rbd_init_disk(rbd_dev); + if (rc) + goto err_out_blkdev; + + return count; + +err_out_blkdev: + unregister_blkdev(rbd_dev->major, rbd_dev->name); +err_out_client: + rbd_put_client(rbd_dev); + mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); +err_out_slot: + list_del_init(&rbd_dev->node); + mutex_unlock(&ctl_mutex); + + kfree(rbd_dev); +err_out_opt: + kfree(options); +err_mon_dev: + kfree(mon_dev_name); +err_out_mod: + dout("Error adding device %s\n", buf); + module_put(THIS_MODULE); + return rc; +} + +static struct rbd_device *__rbd_get_dev(unsigned long id) +{ + struct list_head *tmp; + struct rbd_device *rbd_dev; + + list_for_each(tmp, &rbd_dev_list) { + rbd_dev = list_entry(tmp, struct rbd_device, node); + if (rbd_dev->id == id) + return rbd_dev; + } + return NULL; +} + +static ssize_t class_rbd_remove(struct class *c, + struct class_attribute *attr, + const char *buf, + size_t count) +{ + struct rbd_device *rbd_dev = NULL; + int target_id, rc; + unsigned long ul; + + rc = strict_strtoul(buf, 10, &ul); + if (rc) + return rc; + + /* convert to int; abort if we lost anything in the conversion */ + target_id = (int) ul; + if (target_id != ul) + return -EINVAL; + + /* remove object from list immediately */ + mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); + + rbd_dev = __rbd_get_dev(target_id); + if (rbd_dev) + list_del_init(&rbd_dev->node); + + mutex_unlock(&ctl_mutex); + + if (!rbd_dev) + return -ENOENT; + + rbd_put_client(rbd_dev); + + /* clean up and free blkdev */ + rbd_free_disk(rbd_dev); + unregister_blkdev(rbd_dev->major, rbd_dev->name); + kfree(rbd_dev); + + /* release module ref */ + module_put(THIS_MODULE); + + return count; +} + +static ssize_t class_rbd_snaps_list(struct class *c, + struct class_attribute *attr, + char *data) +{ + struct rbd_device *rbd_dev = NULL; + struct list_head *tmp; + struct rbd_image_header *header; + int i, n = 0, max = PAGE_SIZE; + int ret; + + mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); + + n += snprintf(data, max, "#id\tsnap\tKB\n"); + + list_for_each(tmp, &rbd_dev_list) { + char *names, *p; + struct ceph_snap_context *snapc; + + rbd_dev = list_entry(tmp, struct rbd_device, node); + header = &rbd_dev->header; + + down_read(&header->snap_rwsem); + + names = header->snap_names; + snapc = header->snapc; + + n += snprintf(data + n, max - n, "%d\t%s\t%lld%s\n", + rbd_dev->id, RBD_SNAP_HEAD_NAME, + header->image_size >> 10, + (!rbd_dev->cur_snap ? " (*)" : "")); + if (n == max) + break; + + p = names; + for (i = 0; i < header->total_snaps; i++, p += strlen(p) + 1) { + n += snprintf(data + n, max - n, "%d\t%s\t%lld%s\n", + rbd_dev->id, p, header->snap_sizes[i] >> 10, + (rbd_dev->cur_snap && + (snap_index(header, i) == rbd_dev->cur_snap) ? + " (*)" : "")); + if (n == max) + break; + } + + up_read(&header->snap_rwsem); + } + + + ret = n; + mutex_unlock(&ctl_mutex); + return ret; +} + +static ssize_t class_rbd_snaps_refresh(struct class *c, + struct class_attribute *attr, + const char *buf, + size_t count) +{ + struct rbd_device *rbd_dev = NULL; + int target_id, rc; + unsigned long ul; + int ret = count; + + rc = strict_strtoul(buf, 10, &ul); + if (rc) + return rc; + + /* convert to int; abort if we lost anything in the conversion */ + target_id = (int) ul; + if (target_id != ul) + return -EINVAL; + + mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); + + rbd_dev = __rbd_get_dev(target_id); + if (!rbd_dev) { + ret = -ENOENT; + goto done; + } + + rc = rbd_update_snaps(rbd_dev); + if (rc < 0) + ret = rc; + +done: + mutex_unlock(&ctl_mutex); + return ret; +} + +static ssize_t class_rbd_snap_create(struct class *c, + struct class_attribute *attr, + const char *buf, + size_t count) +{ + struct rbd_device *rbd_dev = NULL; + int target_id, ret; + char *name; + + name = kmalloc(RBD_MAX_SNAP_NAME_LEN + 1, GFP_KERNEL); + if (!name) + return -ENOMEM; + + /* parse snaps add command */ + if (sscanf(buf, "%d " + "%" __stringify(RBD_MAX_SNAP_NAME_LEN) "s", + &target_id, + name) != 2) { + ret = -EINVAL; + goto done; + } + + mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); + + rbd_dev = __rbd_get_dev(target_id); + if (!rbd_dev) { + ret = -ENOENT; + goto done_unlock; + } + + ret = rbd_header_add_snap(rbd_dev, + name, GFP_KERNEL); + if (ret < 0) + goto done_unlock; + + ret = rbd_update_snaps(rbd_dev); + if (ret < 0) + goto done_unlock; + + ret = count; +done_unlock: + mutex_unlock(&ctl_mutex); +done: + kfree(name); + return ret; +} + +static ssize_t class_rbd_rollback(struct class *c, + struct class_attribute *attr, + const char *buf, + size_t count) +{ + struct rbd_device *rbd_dev = NULL; + int target_id, ret; + u64 snapid; + char snap_name[RBD_MAX_SNAP_NAME_LEN]; + u64 cur_ofs; + char *seg_name; + + /* parse snaps add command */ + if (sscanf(buf, "%d " + "%" __stringify(RBD_MAX_SNAP_NAME_LEN) "s", + &target_id, + snap_name) != 2) { + return -EINVAL; + } + + ret = -ENOMEM; + seg_name = kmalloc(RBD_MAX_SEG_NAME_LEN + 1, GFP_NOIO); + if (!seg_name) + return ret; + + mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); + + rbd_dev = __rbd_get_dev(target_id); + if (!rbd_dev) { + ret = -ENOENT; + goto done_unlock; + } + + ret = snap_by_name(&rbd_dev->header, snap_name, &snapid, NULL); + if (ret < 0) + goto done_unlock; + + dout("snapid=%lld\n", snapid); + + cur_ofs = 0; + while (cur_ofs < rbd_dev->header.image_size) { + cur_ofs += rbd_get_segment(&rbd_dev->header, + rbd_dev->obj, + cur_ofs, (u64)-1, + seg_name, NULL); + dout("seg_name=%s\n", seg_name); + + ret = rbd_req_sync_rollback_obj(rbd_dev, snapid, seg_name); + if (ret < 0) + pr_warning("could not roll back obj %s err=%d\n", + seg_name, ret); + } + + ret = rbd_update_snaps(rbd_dev); + if (ret < 0) + goto done_unlock; + + ret = count; + +done_unlock: + mutex_unlock(&ctl_mutex); + kfree(seg_name); + + return ret; +} + +static struct class_attribute class_rbd_attrs[] = { + __ATTR(add, 0200, NULL, class_rbd_add), + __ATTR(remove, 0200, NULL, class_rbd_remove), + __ATTR(list, 0444, class_rbd_list, NULL), + __ATTR(snaps_refresh, 0200, NULL, class_rbd_snaps_refresh), + __ATTR(snap_create, 0200, NULL, class_rbd_snap_create), + __ATTR(snaps_list, 0444, class_rbd_snaps_list, NULL), + __ATTR(snap_rollback, 0200, NULL, class_rbd_rollback), + __ATTR_NULL +}; + +/* + * create control files in sysfs + * /sys/class/rbd/... + */ +static int rbd_sysfs_init(void) +{ + int ret = -ENOMEM; + + class_rbd = kzalloc(sizeof(*class_rbd), GFP_KERNEL); + if (!class_rbd) + goto out; + + class_rbd->name = DRV_NAME; + class_rbd->owner = THIS_MODULE; + class_rbd->class_release = class_rbd_release; + class_rbd->class_attrs = class_rbd_attrs; + + ret = class_register(class_rbd); + if (ret) + goto out_class; + return 0; + +out_class: + kfree(class_rbd); + class_rbd = NULL; + pr_err(DRV_NAME ": failed to create class rbd\n"); +out: + return ret; +} + +static void rbd_sysfs_cleanup(void) +{ + if (class_rbd) + class_destroy(class_rbd); + class_rbd = NULL; +} + +int __init rbd_init(void) +{ + int rc; + + rc = rbd_sysfs_init(); + if (rc) + return rc; + spin_lock_init(&node_lock); + pr_info("loaded " DRV_NAME_LONG "\n"); + return 0; +} + +void __exit rbd_exit(void) +{ + rbd_sysfs_cleanup(); +} + +module_init(rbd_init); +module_exit(rbd_exit); + +MODULE_AUTHOR("Sage Weil <sage@newdream.net>"); +MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>"); +MODULE_DESCRIPTION("rados block device"); + +/* following authorship retained from original osdblk.c */ +MODULE_AUTHOR("Jeff Garzik <jeff@garzik.org>"); + +MODULE_LICENSE("GPL"); diff --git a/drivers/block/rbd_types.h b/drivers/block/rbd_types.h new file mode 100644 index 00000000000..fc6c678aa2c --- /dev/null +++ b/drivers/block/rbd_types.h @@ -0,0 +1,73 @@ +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2004-2010 Sage Weil <sage@newdream.net> + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#ifndef CEPH_RBD_TYPES_H +#define CEPH_RBD_TYPES_H + +#include <linux/types.h> + +/* + * rbd image 'foo' consists of objects + * foo.rbd - image metadata + * foo.00000000 + * foo.00000001 + * ... - data + */ + +#define RBD_SUFFIX ".rbd" +#define RBD_DIRECTORY "rbd_directory" +#define RBD_INFO "rbd_info" + +#define RBD_DEFAULT_OBJ_ORDER 22 /* 4MB */ +#define RBD_MIN_OBJ_ORDER 16 +#define RBD_MAX_OBJ_ORDER 30 + +#define RBD_MAX_OBJ_NAME_LEN 96 +#define RBD_MAX_SEG_NAME_LEN 128 + +#define RBD_COMP_NONE 0 +#define RBD_CRYPT_NONE 0 + +#define RBD_HEADER_TEXT "<<< Rados Block Device Image >>>\n" +#define RBD_HEADER_SIGNATURE "RBD" +#define RBD_HEADER_VERSION "001.005" + +struct rbd_info { + __le64 max_id; +} __attribute__ ((packed)); + +struct rbd_image_snap_ondisk { + __le64 id; + __le64 image_size; +} __attribute__((packed)); + +struct rbd_image_header_ondisk { + char text[40]; + char block_name[24]; + char signature[4]; + char version[8]; + struct { + __u8 order; + __u8 crypt_type; + __u8 comp_type; + __u8 unused; + } __attribute__((packed)) options; + __le64 image_size; + __le64 snap_seq; + __le32 snap_count; + __le32 reserved; + __le64 snap_names_len; + struct rbd_image_snap_ondisk snaps[0]; +} __attribute__((packed)); + + +#endif diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 1101e251a62..8320490226b 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -2,7 +2,6 @@ #include <linux/spinlock.h> #include <linux/slab.h> #include <linux/blkdev.h> -#include <linux/smp_lock.h> #include <linux/hdreg.h> #include <linux/virtio.h> #include <linux/virtio_blk.h> @@ -222,8 +221,8 @@ static int virtblk_get_id(struct gendisk *disk, char *id_str) return err; } -static int virtblk_locked_ioctl(struct block_device *bdev, fmode_t mode, - unsigned cmd, unsigned long data) +static int virtblk_ioctl(struct block_device *bdev, fmode_t mode, + unsigned int cmd, unsigned long data) { struct gendisk *disk = bdev->bd_disk; struct virtio_blk *vblk = disk->private_data; @@ -238,18 +237,6 @@ static int virtblk_locked_ioctl(struct block_device *bdev, fmode_t mode, (void __user *)data); } -static int virtblk_ioctl(struct block_device *bdev, fmode_t mode, - unsigned int cmd, unsigned long param) -{ - int ret; - - lock_kernel(); - ret = virtblk_locked_ioctl(bdev, mode, cmd, param); - unlock_kernel(); - - return ret; -} - /* We provide getgeo only to please some old bootloader/partitioning tools */ static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo) { diff --git a/drivers/char/agp/amd64-agp.c b/drivers/char/agp/amd64-agp.c index 70312da4c96..564808a5c3c 100644 --- a/drivers/char/agp/amd64-agp.c +++ b/drivers/char/agp/amd64-agp.c @@ -199,7 +199,7 @@ static void amd64_cleanup(void) struct pci_dev *dev = k8_northbridges[i]; /* disable gart translation */ pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &tmp); - tmp &= ~AMD64_GARTEN; + tmp &= ~GARTEN; pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, tmp); } } @@ -313,7 +313,7 @@ static __devinit int fix_northbridge(struct pci_dev *nb, struct pci_dev *agp, if (order < 0 || !agp_aperture_valid(aper, (32*1024*1024)<<order)) return -1; - pci_write_config_dword(nb, AMD64_GARTAPERTURECTL, order << 1); + gart_set_size_and_enable(nb, order); pci_write_config_dword(nb, AMD64_GARTAPERTUREBASE, aper >> 25); return 0; diff --git a/drivers/char/agp/generic.c b/drivers/char/agp/generic.c index d2abf514398..64255cef8a7 100644 --- a/drivers/char/agp/generic.c +++ b/drivers/char/agp/generic.c @@ -984,7 +984,9 @@ int agp_generic_create_gatt_table(struct agp_bridge_data *bridge) bridge->driver->cache_flush(); #ifdef CONFIG_X86 - set_memory_uc((unsigned long)table, 1 << page_order); + if (set_memory_uc((unsigned long)table, 1 << page_order)) + printk(KERN_WARNING "Could not set GATT table memory to UC!"); + bridge->gatt_table = (void *)table; #else bridge->gatt_table = ioremap_nocache(virt_to_phys(table), diff --git a/drivers/char/tpm/tpm.c b/drivers/char/tpm/tpm.c index 05ad4a17a28..7c4133582db 100644 --- a/drivers/char/tpm/tpm.c +++ b/drivers/char/tpm/tpm.c @@ -47,6 +47,16 @@ enum tpm_duration { #define TPM_MAX_PROTECTED_ORDINAL 12 #define TPM_PROTECTED_ORDINAL_MASK 0xFF +/* + * Bug workaround - some TPM's don't flush the most + * recently changed pcr on suspend, so force the flush + * with an extend to the selected _unused_ non-volatile pcr. + */ +static int tpm_suspend_pcr; +module_param_named(suspend_pcr, tpm_suspend_pcr, uint, 0644); +MODULE_PARM_DESC(suspend_pcr, + "PCR to use for dummy writes to faciltate flush on suspend."); + static LIST_HEAD(tpm_chip_list); static DEFINE_SPINLOCK(driver_lock); static DECLARE_BITMAP(dev_mask, TPM_NUM_DEVICES); @@ -1077,18 +1087,6 @@ static struct tpm_input_header savestate_header = { .ordinal = TPM_ORD_SAVESTATE }; -/* Bug workaround - some TPM's don't flush the most - * recently changed pcr on suspend, so force the flush - * with an extend to the selected _unused_ non-volatile pcr. - */ -static int tpm_suspend_pcr; -static int __init tpm_suspend_setup(char *str) -{ - get_option(&str, &tpm_suspend_pcr); - return 1; -} -__setup("tpm_suspend_pcr=", tpm_suspend_setup); - /* * We are about to suspend. Save the TPM state * so that it can be restored. diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index c810481a5bc..6c1b676643a 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -48,6 +48,9 @@ struct ports_driver_data { /* Used for exporting per-port information to debugfs */ struct dentry *debugfs_dir; + /* List of all the devices we're handling */ + struct list_head portdevs; + /* Number of devices this driver is handling */ unsigned int index; @@ -108,6 +111,9 @@ struct port_buffer { * ports for that device (vdev->priv). */ struct ports_device { + /* Next portdev in the list, head is in the pdrvdata struct */ + struct list_head list; + /* * Workqueue handlers where we process deferred work after * notification @@ -178,15 +184,21 @@ struct port { struct console cons; /* Each port associates with a separate char device */ - struct cdev cdev; + struct cdev *cdev; struct device *dev; + /* Reference-counting to handle port hot-unplugs and file operations */ + struct kref kref; + /* A waitqueue for poll() or blocking read operations */ wait_queue_head_t waitqueue; /* The 'name' of the port that we expose via sysfs properties */ char *name; + /* We can notify apps of host connect / disconnect events via SIGIO */ + struct fasync_struct *async_queue; + /* The 'id' to identify the port with the Host */ u32 id; @@ -221,6 +233,41 @@ out: return port; } +static struct port *find_port_by_devt_in_portdev(struct ports_device *portdev, + dev_t dev) +{ + struct port *port; + unsigned long flags; + + spin_lock_irqsave(&portdev->ports_lock, flags); + list_for_each_entry(port, &portdev->ports, list) + if (port->cdev->dev == dev) + goto out; + port = NULL; +out: + spin_unlock_irqrestore(&portdev->ports_lock, flags); + + return port; +} + +static struct port *find_port_by_devt(dev_t dev) +{ + struct ports_device *portdev; + struct port *port; + unsigned long flags; + + spin_lock_irqsave(&pdrvdata_lock, flags); + list_for_each_entry(portdev, &pdrvdata.portdevs, list) { + port = find_port_by_devt_in_portdev(portdev, dev); + if (port) + goto out; + } + port = NULL; +out: + spin_unlock_irqrestore(&pdrvdata_lock, flags); + return port; +} + static struct port *find_port_by_id(struct ports_device *portdev, u32 id) { struct port *port; @@ -410,7 +457,10 @@ static ssize_t __send_control_msg(struct ports_device *portdev, u32 port_id, static ssize_t send_control_msg(struct port *port, unsigned int event, unsigned int value) { - return __send_control_msg(port->portdev, port->id, event, value); + /* Did the port get unplugged before userspace closed it? */ + if (port->portdev) + return __send_control_msg(port->portdev, port->id, event, value); + return 0; } /* Callers must take the port->outvq_lock */ @@ -459,9 +509,12 @@ static ssize_t send_buf(struct port *port, void *in_buf, size_t in_count, /* * Wait till the host acknowledges it pushed out the data we - * sent. This is done for ports in blocking mode or for data - * from the hvc_console; the tty operations are performed with - * spinlocks held so we can't sleep here. + * sent. This is done for data from the hvc_console; the tty + * operations are performed with spinlocks held so we can't + * sleep here. An alternative would be to copy the data to a + * buffer and relax the spinning requirement. The downside is + * we need to kmalloc a GFP_ATOMIC buffer each time the + * console driver writes something out. */ while (!virtqueue_get_buf(out_vq, &len)) cpu_relax(); @@ -522,6 +575,10 @@ static ssize_t fill_readbuf(struct port *port, char *out_buf, size_t out_count, /* The condition that must be true for polling to end */ static bool will_read_block(struct port *port) { + if (!port->guest_connected) { + /* Port got hot-unplugged. Let's exit. */ + return false; + } return !port_has_data(port) && port->host_connected; } @@ -572,6 +629,9 @@ static ssize_t port_fops_read(struct file *filp, char __user *ubuf, if (ret < 0) return ret; } + /* Port got hot-unplugged. */ + if (!port->guest_connected) + return -ENODEV; /* * We could've received a disconnection message while we were * waiting for more data. @@ -613,6 +673,9 @@ static ssize_t port_fops_write(struct file *filp, const char __user *ubuf, if (ret < 0) return ret; } + /* Port got hot-unplugged. */ + if (!port->guest_connected) + return -ENODEV; count = min((size_t)(32 * 1024), count); @@ -626,6 +689,14 @@ static ssize_t port_fops_write(struct file *filp, const char __user *ubuf, goto free_buf; } + /* + * We now ask send_buf() to not spin for generic ports -- we + * can re-use the same code path that non-blocking file + * descriptors take for blocking file descriptors since the + * wait is already done and we're certain the write will go + * through to the host. + */ + nonblock = true; ret = send_buf(port, buf, count, nonblock); if (nonblock && ret > 0) @@ -645,6 +716,10 @@ static unsigned int port_fops_poll(struct file *filp, poll_table *wait) port = filp->private_data; poll_wait(filp, &port->waitqueue, wait); + if (!port->guest_connected) { + /* Port got unplugged */ + return POLLHUP; + } ret = 0; if (!will_read_block(port)) ret |= POLLIN | POLLRDNORM; @@ -656,6 +731,8 @@ static unsigned int port_fops_poll(struct file *filp, poll_table *wait) return ret; } +static void remove_port(struct kref *kref); + static int port_fops_release(struct inode *inode, struct file *filp) { struct port *port; @@ -676,6 +753,16 @@ static int port_fops_release(struct inode *inode, struct file *filp) reclaim_consumed_buffers(port); spin_unlock_irq(&port->outvq_lock); + /* + * Locks aren't necessary here as a port can't be opened after + * unplug, and if a port isn't unplugged, a kref would already + * exist for the port. Plus, taking ports_lock here would + * create a dependency on other locks taken by functions + * inside remove_port if we're the last holder of the port, + * creating many problems. + */ + kref_put(&port->kref, remove_port); + return 0; } @@ -683,22 +770,31 @@ static int port_fops_open(struct inode *inode, struct file *filp) { struct cdev *cdev = inode->i_cdev; struct port *port; + int ret; - port = container_of(cdev, struct port, cdev); + port = find_port_by_devt(cdev->dev); filp->private_data = port; + /* Prevent against a port getting hot-unplugged at the same time */ + spin_lock_irq(&port->portdev->ports_lock); + kref_get(&port->kref); + spin_unlock_irq(&port->portdev->ports_lock); + /* * Don't allow opening of console port devices -- that's done * via /dev/hvc */ - if (is_console_port(port)) - return -ENXIO; + if (is_console_port(port)) { + ret = -ENXIO; + goto out; + } /* Allow only one process to open a particular port at a time */ spin_lock_irq(&port->inbuf_lock); if (port->guest_connected) { spin_unlock_irq(&port->inbuf_lock); - return -EMFILE; + ret = -EMFILE; + goto out; } port->guest_connected = true; @@ -713,10 +809,23 @@ static int port_fops_open(struct inode *inode, struct file *filp) reclaim_consumed_buffers(port); spin_unlock_irq(&port->outvq_lock); + nonseekable_open(inode, filp); + /* Notify host of port being opened */ send_control_msg(filp->private_data, VIRTIO_CONSOLE_PORT_OPEN, 1); return 0; +out: + kref_put(&port->kref, remove_port); + return ret; +} + +static int port_fops_fasync(int fd, struct file *filp, int mode) +{ + struct port *port; + + port = filp->private_data; + return fasync_helper(fd, filp, mode, &port->async_queue); } /* @@ -732,6 +841,8 @@ static const struct file_operations port_fops = { .write = port_fops_write, .poll = port_fops_poll, .release = port_fops_release, + .fasync = port_fops_fasync, + .llseek = no_llseek, }; /* @@ -990,6 +1101,12 @@ static unsigned int fill_queue(struct virtqueue *vq, spinlock_t *lock) return nr_added_bufs; } +static void send_sigio_to_port(struct port *port) +{ + if (port->async_queue && port->guest_connected) + kill_fasync(&port->async_queue, SIGIO, POLL_OUT); +} + static int add_port(struct ports_device *portdev, u32 id) { char debugfs_name[16]; @@ -1004,6 +1121,7 @@ static int add_port(struct ports_device *portdev, u32 id) err = -ENOMEM; goto fail; } + kref_init(&port->kref); port->portdev = portdev; port->id = id; @@ -1011,6 +1129,7 @@ static int add_port(struct ports_device *portdev, u32 id) port->name = NULL; port->inbuf = NULL; port->cons.hvc = NULL; + port->async_queue = NULL; port->cons.ws.ws_row = port->cons.ws.ws_col = 0; @@ -1021,14 +1140,20 @@ static int add_port(struct ports_device *portdev, u32 id) port->in_vq = portdev->in_vqs[port->id]; port->out_vq = portdev->out_vqs[port->id]; - cdev_init(&port->cdev, &port_fops); + port->cdev = cdev_alloc(); + if (!port->cdev) { + dev_err(&port->portdev->vdev->dev, "Error allocating cdev\n"); + err = -ENOMEM; + goto free_port; + } + port->cdev->ops = &port_fops; devt = MKDEV(portdev->chr_major, id); - err = cdev_add(&port->cdev, devt, 1); + err = cdev_add(port->cdev, devt, 1); if (err < 0) { dev_err(&port->portdev->vdev->dev, "Error %d adding cdev for port %u\n", err, id); - goto free_port; + goto free_cdev; } port->dev = device_create(pdrvdata.class, &port->portdev->vdev->dev, devt, port, "vport%up%u", @@ -1093,7 +1218,7 @@ free_inbufs: free_device: device_destroy(pdrvdata.class, port->dev->devt); free_cdev: - cdev_del(&port->cdev); + cdev_del(port->cdev); free_port: kfree(port); fail: @@ -1102,21 +1227,45 @@ fail: return err; } -/* Remove all port-specific data. */ -static int remove_port(struct port *port) +/* No users remain, remove all port-specific data. */ +static void remove_port(struct kref *kref) +{ + struct port *port; + + port = container_of(kref, struct port, kref); + + sysfs_remove_group(&port->dev->kobj, &port_attribute_group); + device_destroy(pdrvdata.class, port->dev->devt); + cdev_del(port->cdev); + + kfree(port->name); + + debugfs_remove(port->debugfs_file); + + kfree(port); +} + +/* + * Port got unplugged. Remove port from portdev's list and drop the + * kref reference. If no userspace has this port opened, it will + * result in immediate removal the port. + */ +static void unplug_port(struct port *port) { struct port_buffer *buf; + spin_lock_irq(&port->portdev->ports_lock); + list_del(&port->list); + spin_unlock_irq(&port->portdev->ports_lock); + if (port->guest_connected) { port->guest_connected = false; port->host_connected = false; wake_up_interruptible(&port->waitqueue); - send_control_msg(port, VIRTIO_CONSOLE_PORT_OPEN, 0); - } - spin_lock_irq(&port->portdev->ports_lock); - list_del(&port->list); - spin_unlock_irq(&port->portdev->ports_lock); + /* Let the app know the port is going down. */ + send_sigio_to_port(port); + } if (is_console_port(port)) { spin_lock_irq(&pdrvdata_lock); @@ -1135,9 +1284,6 @@ static int remove_port(struct port *port) hvc_remove(port->cons.hvc); #endif } - sysfs_remove_group(&port->dev->kobj, &port_attribute_group); - device_destroy(pdrvdata.class, port->dev->devt); - cdev_del(&port->cdev); /* Remove unused data this port might have received. */ discard_port_data(port); @@ -1148,12 +1294,19 @@ static int remove_port(struct port *port) while ((buf = virtqueue_detach_unused_buf(port->in_vq))) free_buf(buf); - kfree(port->name); - - debugfs_remove(port->debugfs_file); + /* + * We should just assume the device itself has gone off -- + * else a close on an open port later will try to send out a + * control message. + */ + port->portdev = NULL; - kfree(port); - return 0; + /* + * Locks around here are not necessary - a port can't be + * opened after we removed the port struct from ports_list + * above. + */ + kref_put(&port->kref, remove_port); } /* Any private messages that the Host and Guest want to share */ @@ -1192,7 +1345,7 @@ static void handle_control_message(struct ports_device *portdev, add_port(portdev, cpkt->id); break; case VIRTIO_CONSOLE_PORT_REMOVE: - remove_port(port); + unplug_port(port); break; case VIRTIO_CONSOLE_CONSOLE_PORT: if (!cpkt->value) @@ -1234,6 +1387,12 @@ static void handle_control_message(struct ports_device *portdev, spin_lock_irq(&port->outvq_lock); reclaim_consumed_buffers(port); spin_unlock_irq(&port->outvq_lock); + + /* + * If the guest is connected, it'll be interested in + * knowing the host connection state changed. + */ + send_sigio_to_port(port); break; case VIRTIO_CONSOLE_PORT_NAME: /* @@ -1330,6 +1489,9 @@ static void in_intr(struct virtqueue *vq) wake_up_interruptible(&port->waitqueue); + /* Send a SIGIO indicating new data in case the process asked for it */ + send_sigio_to_port(port); + if (is_console_port(port) && hvc_poll(port->cons.hvc)) hvc_kick(); } @@ -1566,6 +1728,10 @@ static int __devinit virtcons_probe(struct virtio_device *vdev) add_port(portdev, 0); } + spin_lock_irq(&pdrvdata_lock); + list_add_tail(&portdev->list, &pdrvdata.portdevs); + spin_unlock_irq(&pdrvdata_lock); + __send_control_msg(portdev, VIRTIO_CONSOLE_BAD_ID, VIRTIO_CONSOLE_DEVICE_READY, 1); return 0; @@ -1589,23 +1755,41 @@ static void virtcons_remove(struct virtio_device *vdev) { struct ports_device *portdev; struct port *port, *port2; - struct port_buffer *buf; - unsigned int len; portdev = vdev->priv; + spin_lock_irq(&pdrvdata_lock); + list_del(&portdev->list); + spin_unlock_irq(&pdrvdata_lock); + + /* Disable interrupts for vqs */ + vdev->config->reset(vdev); + /* Finish up work that's lined up */ cancel_work_sync(&portdev->control_work); list_for_each_entry_safe(port, port2, &portdev->ports, list) - remove_port(port); + unplug_port(port); unregister_chrdev(portdev->chr_major, "virtio-portsdev"); - while ((buf = virtqueue_get_buf(portdev->c_ivq, &len))) - free_buf(buf); + /* + * When yanking out a device, we immediately lose the + * (device-side) queues. So there's no point in keeping the + * guest side around till we drop our final reference. This + * also means that any ports which are in an open state will + * have to just stop using the port, as the vqs are going + * away. + */ + if (use_multiport(portdev)) { + struct port_buffer *buf; + unsigned int len; - while ((buf = virtqueue_detach_unused_buf(portdev->c_ivq))) - free_buf(buf); + while ((buf = virtqueue_get_buf(portdev->c_ivq, &len))) + free_buf(buf); + + while ((buf = virtqueue_detach_unused_buf(portdev->c_ivq))) + free_buf(buf); + } vdev->config->del_vqs(vdev); kfree(portdev->in_vqs); @@ -1652,6 +1836,7 @@ static int __init init(void) PTR_ERR(pdrvdata.debugfs_dir)); } INIT_LIST_HEAD(&pdrvdata.consoles); + INIT_LIST_HEAD(&pdrvdata.portdevs); return register_virtio_driver(&virtio_console); } diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c index 1b05896648b..9dcb17d51ae 100644 --- a/drivers/firewire/ohci.c +++ b/drivers/firewire/ohci.c @@ -2840,7 +2840,7 @@ static int __devinit pci_probe(struct pci_dev *dev, const struct pci_device_id *ent) { struct fw_ohci *ohci; - u32 bus_options, max_receive, link_speed, version, link_enh; + u32 bus_options, max_receive, link_speed, version; u64 guid; int i, err, n_ir, n_it; size_t size; @@ -2894,23 +2894,6 @@ static int __devinit pci_probe(struct pci_dev *dev, if (param_quirks) ohci->quirks = param_quirks; - /* TI OHCI-Lynx and compatible: set recommended configuration bits. */ - if (dev->vendor == PCI_VENDOR_ID_TI) { - pci_read_config_dword(dev, PCI_CFG_TI_LinkEnh, &link_enh); - - /* adjust latency of ATx FIFO: use 1.7 KB threshold */ - link_enh &= ~TI_LinkEnh_atx_thresh_mask; - link_enh |= TI_LinkEnh_atx_thresh_1_7K; - - /* use priority arbitration for asynchronous responses */ - link_enh |= TI_LinkEnh_enab_unfair; - - /* required for aPhyEnhanceEnable to work */ - link_enh |= TI_LinkEnh_enab_accel; - - pci_write_config_dword(dev, PCI_CFG_TI_LinkEnh, link_enh); - } - ar_context_init(&ohci->ar_request_ctx, ohci, OHCI1394_AsReqRcvContextControlSet); diff --git a/drivers/firewire/ohci.h b/drivers/firewire/ohci.h index 0e6c5a46690..ef5e7336da6 100644 --- a/drivers/firewire/ohci.h +++ b/drivers/firewire/ohci.h @@ -155,12 +155,4 @@ #define OHCI1394_phy_tcode 0xe -/* TI extensions */ - -#define PCI_CFG_TI_LinkEnh 0xf4 -#define TI_LinkEnh_enab_accel 0x00000002 -#define TI_LinkEnh_enab_unfair 0x00000080 -#define TI_LinkEnh_atx_thresh_mask 0x00003000 -#define TI_LinkEnh_atx_thresh_1_7K 0x00001000 - #endif /* _FIREWIRE_OHCI_H */ diff --git a/drivers/gpu/drm/radeon/radeon_cursor.c b/drivers/gpu/drm/radeon/radeon_cursor.c index 5731fc9b1ae..3eef567b042 100644 --- a/drivers/gpu/drm/radeon/radeon_cursor.c +++ b/drivers/gpu/drm/radeon/radeon_cursor.c @@ -203,6 +203,7 @@ int radeon_crtc_cursor_move(struct drm_crtc *crtc, struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); struct radeon_device *rdev = crtc->dev->dev_private; int xorigin = 0, yorigin = 0; + int w = radeon_crtc->cursor_width; if (x < 0) xorigin = -x + 1; @@ -213,22 +214,7 @@ int radeon_crtc_cursor_move(struct drm_crtc *crtc, if (yorigin >= CURSOR_HEIGHT) yorigin = CURSOR_HEIGHT - 1; - radeon_lock_cursor(crtc, true); - if (ASIC_IS_DCE4(rdev)) { - /* cursors are offset into the total surface */ - x += crtc->x; - y += crtc->y; - DRM_DEBUG("x %d y %d c->x %d c->y %d\n", x, y, crtc->x, crtc->y); - - /* XXX: check if evergreen has the same issues as avivo chips */ - WREG32(EVERGREEN_CUR_POSITION + radeon_crtc->crtc_offset, - ((xorigin ? 0 : x) << 16) | - (yorigin ? 0 : y)); - WREG32(EVERGREEN_CUR_HOT_SPOT + radeon_crtc->crtc_offset, (xorigin << 16) | yorigin); - WREG32(EVERGREEN_CUR_SIZE + radeon_crtc->crtc_offset, - ((radeon_crtc->cursor_width - 1) << 16) | (radeon_crtc->cursor_height - 1)); - } else if (ASIC_IS_AVIVO(rdev)) { - int w = radeon_crtc->cursor_width; + if (ASIC_IS_AVIVO(rdev)) { int i = 0; struct drm_crtc *crtc_p; @@ -260,7 +246,17 @@ int radeon_crtc_cursor_move(struct drm_crtc *crtc, if (w <= 0) w = 1; } + } + radeon_lock_cursor(crtc, true); + if (ASIC_IS_DCE4(rdev)) { + WREG32(EVERGREEN_CUR_POSITION + radeon_crtc->crtc_offset, + ((xorigin ? 0 : x) << 16) | + (yorigin ? 0 : y)); + WREG32(EVERGREEN_CUR_HOT_SPOT + radeon_crtc->crtc_offset, (xorigin << 16) | yorigin); + WREG32(EVERGREEN_CUR_SIZE + radeon_crtc->crtc_offset, + ((w - 1) << 16) | (radeon_crtc->cursor_height - 1)); + } else if (ASIC_IS_AVIVO(rdev)) { WREG32(AVIVO_D1CUR_POSITION + radeon_crtc->crtc_offset, ((xorigin ? 0 : x) << 16) | (yorigin ? 0 : y)); diff --git a/drivers/hid/hid-cando.c b/drivers/hid/hid-cando.c index 4267a6fdc27..5925bdcd417 100644 --- a/drivers/hid/hid-cando.c +++ b/drivers/hid/hid-cando.c @@ -237,6 +237,8 @@ static const struct hid_device_id cando_devices[] = { USB_DEVICE_ID_CANDO_MULTI_TOUCH) }, { HID_USB_DEVICE(USB_VENDOR_ID_CANDO, USB_DEVICE_ID_CANDO_MULTI_TOUCH_11_6) }, + { HID_USB_DEVICE(USB_VENDOR_ID_CANDO, + USB_DEVICE_ID_CANDO_MULTI_TOUCH_15_6) }, { } }; MODULE_DEVICE_TABLE(hid, cando_devices); diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 3f729248602..a0dea3d1296 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -1292,6 +1292,7 @@ static const struct hid_device_id hid_blacklist[] = { { HID_USB_DEVICE(USB_VENDOR_ID_BTC, USB_DEVICE_ID_BTC_EMPREX_REMOTE_2) }, { HID_USB_DEVICE(USB_VENDOR_ID_CANDO, USB_DEVICE_ID_CANDO_MULTI_TOUCH) }, { HID_USB_DEVICE(USB_VENDOR_ID_CANDO, USB_DEVICE_ID_CANDO_MULTI_TOUCH_11_6) }, + { HID_USB_DEVICE(USB_VENDOR_ID_CANDO, USB_DEVICE_ID_CANDO_MULTI_TOUCH_15_6) }, { HID_USB_DEVICE(USB_VENDOR_ID_CHERRY, USB_DEVICE_ID_CHERRY_CYMOTION) }, { HID_USB_DEVICE(USB_VENDOR_ID_CHERRY, USB_DEVICE_ID_CHERRY_CYMOTION_SOLAR) }, { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_TACTICAL_PAD) }, diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 765a4f53eb5..c5ae5f1545b 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -134,6 +134,7 @@ #define USB_VENDOR_ID_CANDO 0x2087 #define USB_DEVICE_ID_CANDO_MULTI_TOUCH 0x0a01 #define USB_DEVICE_ID_CANDO_MULTI_TOUCH_11_6 0x0b03 +#define USB_DEVICE_ID_CANDO_MULTI_TOUCH_15_6 0x0f01 #define USB_VENDOR_ID_CH 0x068e #define USB_DEVICE_ID_CH_PRO_PEDALS 0x00f2 @@ -503,6 +504,7 @@ #define USB_VENDOR_ID_TURBOX 0x062a #define USB_DEVICE_ID_TURBOX_KEYBOARD 0x0201 +#define USB_DEVICE_ID_TURBOX_TOUCHSCREEN_MOSART 0x7100 #define USB_VENDOR_ID_TWINHAN 0x6253 #define USB_DEVICE_ID_TWINHAN_IR_REMOTE 0x0100 diff --git a/drivers/hid/hidraw.c b/drivers/hid/hidraw.c index 47d70c523d9..a3866b5c0c4 100644 --- a/drivers/hid/hidraw.c +++ b/drivers/hid/hidraw.c @@ -109,6 +109,12 @@ static ssize_t hidraw_write(struct file *file, const char __user *buffer, size_t int ret = 0; mutex_lock(&minors_lock); + + if (!hidraw_table[minor]) { + ret = -ENODEV; + goto out; + } + dev = hidraw_table[minor]->hid; if (!dev->hid_output_raw_report) { @@ -244,6 +250,10 @@ static long hidraw_ioctl(struct file *file, unsigned int cmd, mutex_lock(&minors_lock); dev = hidraw_table[minor]; + if (!dev) { + ret = -ENODEV; + goto out; + } switch (cmd) { case HIDIOCGRDESCSIZE: @@ -317,6 +327,7 @@ static long hidraw_ioctl(struct file *file, unsigned int cmd, ret = -ENOTTY; } +out: mutex_unlock(&minors_lock); return ret; } diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c index 70da3181c8a..f0260c699ad 100644 --- a/drivers/hid/usbhid/hid-quirks.c +++ b/drivers/hid/usbhid/hid-quirks.c @@ -36,6 +36,7 @@ static const struct hid_blacklist { { USB_VENDOR_ID_DWAV, USB_DEVICE_ID_EGALAX_TOUCHCONTROLLER, HID_QUIRK_MULTI_INPUT | HID_QUIRK_NOGET }, { USB_VENDOR_ID_DWAV, USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH, HID_QUIRK_MULTI_INPUT }, { USB_VENDOR_ID_MOJO, USB_DEVICE_ID_RETRO_ADAPTER, HID_QUIRK_MULTI_INPUT }, + { USB_VENDOR_ID_TURBOX, USB_DEVICE_ID_TURBOX_TOUCHSCREEN_MOSART, HID_QUIRK_MULTI_INPUT }, { USB_VENDOR_ID_HAPP, USB_DEVICE_ID_UGCI_DRIVING, HID_QUIRK_BADPAD | HID_QUIRK_MULTI_INPUT }, { USB_VENDOR_ID_HAPP, USB_DEVICE_ID_UGCI_FLYING, HID_QUIRK_BADPAD | HID_QUIRK_MULTI_INPUT }, { USB_VENDOR_ID_HAPP, USB_DEVICE_ID_UGCI_FIGHTING, HID_QUIRK_BADPAD | HID_QUIRK_MULTI_INPUT }, diff --git a/drivers/i2c/busses/i2c-davinci.c b/drivers/i2c/busses/i2c-davinci.c index b8feac5f2ef..5795c8398c7 100644 --- a/drivers/i2c/busses/i2c-davinci.c +++ b/drivers/i2c/busses/i2c-davinci.c @@ -331,21 +331,16 @@ i2c_davinci_xfer_msg(struct i2c_adapter *adap, struct i2c_msg *msg, int stop) INIT_COMPLETION(dev->cmd_complete); dev->cmd_err = 0; - /* Take I2C out of reset, configure it as master and set the - * start bit */ - flag = DAVINCI_I2C_MDR_IRS | DAVINCI_I2C_MDR_MST | DAVINCI_I2C_MDR_STT; + /* Take I2C out of reset and configure it as master */ + flag = DAVINCI_I2C_MDR_IRS | DAVINCI_I2C_MDR_MST; /* if the slave address is ten bit address, enable XA bit */ if (msg->flags & I2C_M_TEN) flag |= DAVINCI_I2C_MDR_XA; if (!(msg->flags & I2C_M_RD)) flag |= DAVINCI_I2C_MDR_TRX; - if (stop) - flag |= DAVINCI_I2C_MDR_STP; - if (msg->len == 0) { + if (msg->len == 0) flag |= DAVINCI_I2C_MDR_RM; - flag &= ~DAVINCI_I2C_MDR_STP; - } /* Enable receive or transmit interrupts */ w = davinci_i2c_read_reg(dev, DAVINCI_I2C_IMR_REG); @@ -358,17 +353,28 @@ i2c_davinci_xfer_msg(struct i2c_adapter *adap, struct i2c_msg *msg, int stop) dev->terminate = 0; /* + * Write mode register first as needed for correct behaviour + * on OMAP-L138, but don't set STT yet to avoid a race with XRDY + * occuring before we have loaded DXR + */ + davinci_i2c_write_reg(dev, DAVINCI_I2C_MDR_REG, flag); + + /* * First byte should be set here, not after interrupt, * because transmit-data-ready interrupt can come before * NACK-interrupt during sending of previous message and * ICDXR may have wrong data + * It also saves us one interrupt, slightly faster */ if ((!(msg->flags & I2C_M_RD)) && dev->buf_len) { davinci_i2c_write_reg(dev, DAVINCI_I2C_DXR_REG, *dev->buf++); dev->buf_len--; } - /* write the data into mode register; start transmitting */ + /* Set STT to begin transmit now DXR is loaded */ + flag |= DAVINCI_I2C_MDR_STT; + if (stop && msg->len != 0) + flag |= DAVINCI_I2C_MDR_STP; davinci_i2c_write_reg(dev, DAVINCI_I2C_MDR_REG, flag); r = wait_for_completion_interruptible_timeout(&dev->cmd_complete, diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c index d1ff9408dc1..4c2a62b75b5 100644 --- a/drivers/i2c/busses/i2c-imx.c +++ b/drivers/i2c/busses/i2c-imx.c @@ -159,15 +159,9 @@ static int i2c_imx_bus_busy(struct imx_i2c_struct *i2c_imx, int for_busy) static int i2c_imx_trx_complete(struct imx_i2c_struct *i2c_imx) { - int result; - - result = wait_event_interruptible_timeout(i2c_imx->queue, - i2c_imx->i2csr & I2SR_IIF, HZ / 10); + wait_event_timeout(i2c_imx->queue, i2c_imx->i2csr & I2SR_IIF, HZ / 10); - if (unlikely(result < 0)) { - dev_dbg(&i2c_imx->adapter.dev, "<%s> result < 0\n", __func__); - return result; - } else if (unlikely(!(i2c_imx->i2csr & I2SR_IIF))) { + if (unlikely(!(i2c_imx->i2csr & I2SR_IIF))) { dev_dbg(&i2c_imx->adapter.dev, "<%s> Timeout\n", __func__); return -ETIMEDOUT; } @@ -295,7 +289,7 @@ static irqreturn_t i2c_imx_isr(int irq, void *dev_id) i2c_imx->i2csr = temp; temp &= ~I2SR_IIF; writeb(temp, i2c_imx->base + IMX_I2C_I2SR); - wake_up_interruptible(&i2c_imx->queue); + wake_up(&i2c_imx->queue); return IRQ_HANDLED; } diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c index c908c5f8364..af9ee313c10 100644 --- a/drivers/input/evdev.c +++ b/drivers/input/evdev.c @@ -28,7 +28,7 @@ struct evdev { int minor; struct input_handle handle; wait_queue_head_t wait; - struct evdev_client *grab; + struct evdev_client __rcu *grab; struct list_head client_list; spinlock_t client_lock; /* protects client_list */ struct mutex mutex; @@ -669,6 +669,9 @@ static long evdev_do_ioctl(struct file *file, unsigned int cmd, if ((_IOC_NR(cmd) & ~ABS_MAX) == _IOC_NR(EVIOCGABS(0))) { + if (!dev->absinfo) + return -EINVAL; + t = _IOC_NR(cmd) & ABS_MAX; abs = dev->absinfo[t]; @@ -680,10 +683,13 @@ static long evdev_do_ioctl(struct file *file, unsigned int cmd, } } - if (_IOC_DIR(cmd) == _IOC_READ) { + if (_IOC_DIR(cmd) == _IOC_WRITE) { if ((_IOC_NR(cmd) & ~ABS_MAX) == _IOC_NR(EVIOCSABS(0))) { + if (!dev->absinfo) + return -EINVAL; + t = _IOC_NR(cmd) & ABS_MAX; if (copy_from_user(&abs, p, min_t(size_t, diff --git a/drivers/input/misc/hp_sdc_rtc.c b/drivers/input/misc/hp_sdc_rtc.c index c1906647905..7e2c12a5b83 100644 --- a/drivers/input/misc/hp_sdc_rtc.c +++ b/drivers/input/misc/hp_sdc_rtc.c @@ -104,7 +104,7 @@ static int hp_sdc_rtc_do_read_bbrtc (struct rtc_time *rtctm) t.endidx = 91; t.seq = tseq; t.act.semaphore = &tsem; - init_MUTEX_LOCKED(&tsem); + sema_init(&tsem, 0); if (hp_sdc_enqueue_transaction(&t)) return -1; @@ -698,7 +698,7 @@ static int __init hp_sdc_rtc_init(void) return -ENODEV; #endif - init_MUTEX(&i8042tregs); + sema_init(&i8042tregs, 1); if ((ret = hp_sdc_request_timer_irq(&hp_sdc_rtc_isr))) return ret; diff --git a/drivers/input/serio/hil_mlc.c b/drivers/input/serio/hil_mlc.c index c92f4edfee7..e5624d8f170 100644 --- a/drivers/input/serio/hil_mlc.c +++ b/drivers/input/serio/hil_mlc.c @@ -915,15 +915,15 @@ int hil_mlc_register(hil_mlc *mlc) mlc->ostarted = 0; rwlock_init(&mlc->lock); - init_MUTEX(&mlc->osem); + sema_init(&mlc->osem, 1); - init_MUTEX(&mlc->isem); + sema_init(&mlc->isem, 1); mlc->icount = -1; mlc->imatch = 0; mlc->opercnt = 0; - init_MUTEX_LOCKED(&(mlc->csem)); + sema_init(&(mlc->csem), 0); hil_mlc_clear_di_scratch(mlc); hil_mlc_clear_di_map(mlc, 0); diff --git a/drivers/input/serio/hp_sdc.c b/drivers/input/serio/hp_sdc.c index bcc2d30ec24..8c0b51c3142 100644 --- a/drivers/input/serio/hp_sdc.c +++ b/drivers/input/serio/hp_sdc.c @@ -905,7 +905,7 @@ static int __init hp_sdc_init(void) ts_sync[1] = 0x0f; ts_sync[2] = ts_sync[3] = ts_sync[4] = ts_sync[5] = 0; t_sync.act.semaphore = &s_sync; - init_MUTEX_LOCKED(&s_sync); + sema_init(&s_sync, 0); hp_sdc_enqueue_transaction(&t_sync); down(&s_sync); /* Wait for t_sync to complete */ @@ -1039,7 +1039,7 @@ static int __init hp_sdc_register(void) return hp_sdc.dev_err; } - init_MUTEX_LOCKED(&tq_init_sem); + sema_init(&tq_init_sem, 0); tq_init.actidx = 0; tq_init.idx = 1; diff --git a/drivers/macintosh/adb.c b/drivers/macintosh/adb.c index 1c4ee6e7793..bf64e49d996 100644 --- a/drivers/macintosh/adb.c +++ b/drivers/macintosh/adb.c @@ -83,7 +83,7 @@ static struct adb_driver *adb_controller; BLOCKING_NOTIFIER_HEAD(adb_client_list); static int adb_got_sleep; static int adb_inited; -static DECLARE_MUTEX(adb_probe_mutex); +static DEFINE_SEMAPHORE(adb_probe_mutex); static int sleepy_trackpad; static int autopoll_devs; int __adb_probe_sync; diff --git a/drivers/media/video/v4l2-compat-ioctl32.c b/drivers/media/video/v4l2-compat-ioctl32.c index 073f01390cd..86294ed35c9 100644 --- a/drivers/media/video/v4l2-compat-ioctl32.c +++ b/drivers/media/video/v4l2-compat-ioctl32.c @@ -193,17 +193,24 @@ static int put_video_window32(struct video_window *kp, struct video_window32 __u struct video_code32 { char loadwhat[16]; /* name or tag of file being passed */ compat_int_t datasize; - unsigned char *data; + compat_uptr_t data; }; -static int get_microcode32(struct video_code *kp, struct video_code32 __user *up) +static struct video_code __user *get_microcode32(struct video_code32 *kp) { - if (!access_ok(VERIFY_READ, up, sizeof(struct video_code32)) || - copy_from_user(kp->loadwhat, up->loadwhat, sizeof(up->loadwhat)) || - get_user(kp->datasize, &up->datasize) || - copy_from_user(kp->data, up->data, up->datasize)) - return -EFAULT; - return 0; + struct video_code __user *up; + + up = compat_alloc_user_space(sizeof(*up)); + + /* + * NOTE! We don't actually care if these fail. If the + * user address is invalid, the native ioctl will do + * the error handling for us + */ + (void) copy_to_user(up->loadwhat, kp->loadwhat, sizeof(up->loadwhat)); + (void) put_user(kp->datasize, &up->datasize); + (void) put_user(compat_ptr(kp->data), &up->data); + return up; } #define VIDIOCGTUNER32 _IOWR('v', 4, struct video_tuner32) @@ -739,7 +746,7 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar struct video_tuner vt; struct video_buffer vb; struct video_window vw; - struct video_code vc; + struct video_code32 vc; struct video_audio va; #endif struct v4l2_format v2f; @@ -818,8 +825,11 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar break; case VIDIOCSMICROCODE: - err = get_microcode32(&karg.vc, up); - compatible_arg = 0; + /* Copy the 32-bit "video_code32" to kernel space */ + if (copy_from_user(&karg.vc, up, sizeof(karg.vc))) + return -EFAULT; + /* Convert the 32-bit version to a 64-bit version in user space */ + up = get_microcode32(&karg.vc); break; case VIDIOCSFREQ: diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index 5db49b124ff..09eee6df065 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -1631,6 +1631,19 @@ int mmc_suspend_host(struct mmc_host *host) if (host->bus_ops && !host->bus_dead) { if (host->bus_ops->suspend) err = host->bus_ops->suspend(host); + if (err == -ENOSYS || !host->bus_ops->resume) { + /* + * We simply "remove" the card in this case. + * It will be redetected on resume. + */ + if (host->bus_ops->remove) + host->bus_ops->remove(host); + mmc_claim_host(host); + mmc_detach_bus(host); + mmc_release_host(host); + host->pm_flags = 0; + err = 0; + } } mmc_bus_put(host); diff --git a/drivers/mtd/nand/mxc_nand.c b/drivers/mtd/nand/mxc_nand.c index b2828e84d24..214b03afdd4 100644 --- a/drivers/mtd/nand/mxc_nand.c +++ b/drivers/mtd/nand/mxc_nand.c @@ -30,6 +30,8 @@ #include <linux/clk.h> #include <linux/err.h> #include <linux/io.h> +#include <linux/irq.h> +#include <linux/completion.h> #include <asm/mach/flash.h> #include <mach/mxc_nand.h> @@ -151,7 +153,7 @@ struct mxc_nand_host { int irq; int eccsize; - wait_queue_head_t irq_waitq; + struct completion op_completion; uint8_t *data_buf; unsigned int buf_start; @@ -164,6 +166,7 @@ struct mxc_nand_host { void (*send_read_id)(struct mxc_nand_host *); uint16_t (*get_dev_status)(struct mxc_nand_host *); int (*check_int)(struct mxc_nand_host *); + void (*irq_control)(struct mxc_nand_host *, int); }; /* OOB placement block for use with hardware ecc generation */ @@ -216,9 +219,12 @@ static irqreturn_t mxc_nfc_irq(int irq, void *dev_id) { struct mxc_nand_host *host = dev_id; - disable_irq_nosync(irq); + if (!host->check_int(host)) + return IRQ_NONE; - wake_up(&host->irq_waitq); + host->irq_control(host, 0); + + complete(&host->op_completion); return IRQ_HANDLED; } @@ -245,11 +251,54 @@ static int check_int_v1_v2(struct mxc_nand_host *host) if (!(tmp & NFC_V1_V2_CONFIG2_INT)) return 0; - writew(tmp & ~NFC_V1_V2_CONFIG2_INT, NFC_V1_V2_CONFIG2); + if (!cpu_is_mx21()) + writew(tmp & ~NFC_V1_V2_CONFIG2_INT, NFC_V1_V2_CONFIG2); return 1; } +/* + * It has been observed that the i.MX21 cannot read the CONFIG2:INT bit + * if interrupts are masked (CONFIG1:INT_MSK is set). To handle this, the + * driver can enable/disable the irq line rather than simply masking the + * interrupts. + */ +static void irq_control_mx21(struct mxc_nand_host *host, int activate) +{ + if (activate) + enable_irq(host->irq); + else + disable_irq_nosync(host->irq); +} + +static void irq_control_v1_v2(struct mxc_nand_host *host, int activate) +{ + uint16_t tmp; + + tmp = readw(NFC_V1_V2_CONFIG1); + + if (activate) + tmp &= ~NFC_V1_V2_CONFIG1_INT_MSK; + else + tmp |= NFC_V1_V2_CONFIG1_INT_MSK; + + writew(tmp, NFC_V1_V2_CONFIG1); +} + +static void irq_control_v3(struct mxc_nand_host *host, int activate) +{ + uint32_t tmp; + + tmp = readl(NFC_V3_CONFIG2); + + if (activate) + tmp &= ~NFC_V3_CONFIG2_INT_MSK; + else + tmp |= NFC_V3_CONFIG2_INT_MSK; + + writel(tmp, NFC_V3_CONFIG2); +} + /* This function polls the NANDFC to wait for the basic operation to * complete by checking the INT bit of config2 register. */ @@ -259,10 +308,9 @@ static void wait_op_done(struct mxc_nand_host *host, int useirq) if (useirq) { if (!host->check_int(host)) { - - enable_irq(host->irq); - - wait_event(host->irq_waitq, host->check_int(host)); + INIT_COMPLETION(host->op_completion); + host->irq_control(host, 1); + wait_for_completion(&host->op_completion); } } else { while (max_retries-- > 0) { @@ -799,6 +847,7 @@ static void preset_v3(struct mtd_info *mtd) NFC_V3_CONFIG2_2CMD_PHASES | NFC_V3_CONFIG2_SPAS(mtd->oobsize >> 1) | NFC_V3_CONFIG2_ST_CMD(0x70) | + NFC_V3_CONFIG2_INT_MSK | NFC_V3_CONFIG2_NUM_ADDR_PHASE0; if (chip->ecc.mode == NAND_ECC_HW) @@ -1024,6 +1073,10 @@ static int __init mxcnd_probe(struct platform_device *pdev) host->send_read_id = send_read_id_v1_v2; host->get_dev_status = get_dev_status_v1_v2; host->check_int = check_int_v1_v2; + if (cpu_is_mx21()) + host->irq_control = irq_control_mx21; + else + host->irq_control = irq_control_v1_v2; } if (nfc_is_v21()) { @@ -1062,6 +1115,7 @@ static int __init mxcnd_probe(struct platform_device *pdev) host->send_read_id = send_read_id_v3; host->check_int = check_int_v3; host->get_dev_status = get_dev_status_v3; + host->irq_control = irq_control_v3; oob_smallpage = &nandv2_hw_eccoob_smallpage; oob_largepage = &nandv2_hw_eccoob_largepage; } else @@ -1093,14 +1147,34 @@ static int __init mxcnd_probe(struct platform_device *pdev) this->options |= NAND_USE_FLASH_BBT; } - init_waitqueue_head(&host->irq_waitq); + init_completion(&host->op_completion); host->irq = platform_get_irq(pdev, 0); + /* + * mask the interrupt. For i.MX21 explicitely call + * irq_control_v1_v2 to use the mask bit. We can't call + * disable_irq_nosync() for an interrupt we do not own yet. + */ + if (cpu_is_mx21()) + irq_control_v1_v2(host, 0); + else + host->irq_control(host, 0); + err = request_irq(host->irq, mxc_nfc_irq, IRQF_DISABLED, DRIVER_NAME, host); if (err) goto eirq; + host->irq_control(host, 0); + + /* + * Now that the interrupt is disabled make sure the interrupt + * mask bit is cleared on i.MX21. Otherwise we can't read + * the interrupt status bit on this machine. + */ + if (cpu_is_mx21()) + irq_control_v1_v2(host, 1); + /* first scan to find the device and get the page size */ if (nand_scan_ident(mtd, 1, NULL)) { err = -ENXIO; diff --git a/drivers/net/3c527.c b/drivers/net/3c527.c index 70705d1306b..eca55c52bdf 100644 --- a/drivers/net/3c527.c +++ b/drivers/net/3c527.c @@ -522,7 +522,7 @@ static int __init mc32_probe1(struct net_device *dev, int slot) lp->tx_len = lp->exec_box->data[9]; /* Transmit list count */ lp->rx_len = lp->exec_box->data[11]; /* Receive list count */ - init_MUTEX_LOCKED(&lp->cmd_mutex); + sema_init(&lp->cmd_mutex, 0); init_completion(&lp->execution_cmd); init_completion(&lp->xceiver_cmd); diff --git a/drivers/net/b44.c b/drivers/net/b44.c index 1e620e287ae..efeffdf9e5f 100644 --- a/drivers/net/b44.c +++ b/drivers/net/b44.c @@ -2170,8 +2170,6 @@ static int __devinit b44_init_one(struct ssb_device *sdev, dev->irq = sdev->irq; SET_ETHTOOL_OPS(dev, &b44_ethtool_ops); - netif_carrier_off(dev); - err = ssb_bus_powerup(sdev->bus, 0); if (err) { dev_err(sdev->dev, @@ -2213,6 +2211,8 @@ static int __devinit b44_init_one(struct ssb_device *sdev, goto err_out_powerdown; } + netif_carrier_off(dev); + ssb_set_drvdata(sdev, dev); /* Chip reset provides power to the b44 MAC & PCI cores, which diff --git a/drivers/net/ehea/ehea_main.c b/drivers/net/ehea/ehea_main.c index a333b42111b..6372610ed24 100644 --- a/drivers/net/ehea/ehea_main.c +++ b/drivers/net/ehea/ehea_main.c @@ -533,8 +533,15 @@ static inline void ehea_fill_skb(struct net_device *dev, int length = cqe->num_bytes_transfered - 4; /*remove CRC */ skb_put(skb, length); - skb->ip_summed = CHECKSUM_UNNECESSARY; skb->protocol = eth_type_trans(skb, dev); + + /* The packet was not an IPV4 packet so a complemented checksum was + calculated. The value is found in the Internet Checksum field. */ + if (cqe->status & EHEA_CQE_BLIND_CKSUM) { + skb->ip_summed = CHECKSUM_COMPLETE; + skb->csum = csum_unfold(~cqe->inet_checksum_value); + } else + skb->ip_summed = CHECKSUM_UNNECESSARY; } static inline struct sk_buff *get_skb_by_index(struct sk_buff **skb_array, diff --git a/drivers/net/ehea/ehea_qmr.h b/drivers/net/ehea/ehea_qmr.h index f608a6c54af..38104734a3b 100644 --- a/drivers/net/ehea/ehea_qmr.h +++ b/drivers/net/ehea/ehea_qmr.h @@ -150,6 +150,7 @@ struct ehea_rwqe { #define EHEA_CQE_TYPE_RQ 0x60 #define EHEA_CQE_STAT_ERR_MASK 0x700F #define EHEA_CQE_STAT_FAT_ERR_MASK 0xF +#define EHEA_CQE_BLIND_CKSUM 0x8000 #define EHEA_CQE_STAT_ERR_TCP 0x4000 #define EHEA_CQE_STAT_ERR_IP 0x2000 #define EHEA_CQE_STAT_ERR_CRC 0x1000 diff --git a/drivers/net/fec.c b/drivers/net/fec.c index 768b840aeb6..cce32d43175 100644 --- a/drivers/net/fec.c +++ b/drivers/net/fec.c @@ -678,24 +678,37 @@ static int fec_enet_mii_probe(struct net_device *dev) { struct fec_enet_private *fep = netdev_priv(dev); struct phy_device *phy_dev = NULL; - int ret; + char mdio_bus_id[MII_BUS_ID_SIZE]; + char phy_name[MII_BUS_ID_SIZE + 3]; + int phy_id; fep->phy_dev = NULL; - /* find the first phy */ - phy_dev = phy_find_first(fep->mii_bus); - if (!phy_dev) { - printk(KERN_ERR "%s: no PHY found\n", dev->name); - return -ENODEV; + /* check for attached phy */ + for (phy_id = 0; (phy_id < PHY_MAX_ADDR); phy_id++) { + if ((fep->mii_bus->phy_mask & (1 << phy_id))) + continue; + if (fep->mii_bus->phy_map[phy_id] == NULL) + continue; + if (fep->mii_bus->phy_map[phy_id]->phy_id == 0) + continue; + strncpy(mdio_bus_id, fep->mii_bus->id, MII_BUS_ID_SIZE); + break; } - /* attach the mac to the phy */ - ret = phy_connect_direct(dev, phy_dev, - &fec_enet_adjust_link, 0, - PHY_INTERFACE_MODE_MII); - if (ret) { - printk(KERN_ERR "%s: Could not attach to PHY\n", dev->name); - return ret; + if (phy_id >= PHY_MAX_ADDR) { + printk(KERN_INFO "%s: no PHY, assuming direct connection " + "to switch\n", dev->name); + strncpy(mdio_bus_id, "0", MII_BUS_ID_SIZE); + phy_id = 0; + } + + snprintf(phy_name, MII_BUS_ID_SIZE, PHY_ID_FMT, mdio_bus_id, phy_id); + phy_dev = phy_connect(dev, phy_name, &fec_enet_adjust_link, 0, + PHY_INTERFACE_MODE_MII); + if (IS_ERR(phy_dev)) { + printk(KERN_ERR "%s: could not attach to PHY\n", dev->name); + return PTR_ERR(phy_dev); } /* mask with MAC supported features */ @@ -738,7 +751,7 @@ static int fec_enet_mii_init(struct platform_device *pdev) fep->mii_bus->read = fec_enet_mdio_read; fep->mii_bus->write = fec_enet_mdio_write; fep->mii_bus->reset = fec_enet_mdio_reset; - snprintf(fep->mii_bus->id, MII_BUS_ID_SIZE, "%x", pdev->id); + snprintf(fep->mii_bus->id, MII_BUS_ID_SIZE, "%x", pdev->id + 1); fep->mii_bus->priv = fep; fep->mii_bus->parent = &pdev->dev; @@ -1311,6 +1324,9 @@ fec_probe(struct platform_device *pdev) if (ret) goto failed_mii_init; + /* Carrier starts down, phylib will bring it up */ + netif_carrier_off(ndev); + ret = register_netdev(ndev); if (ret) goto failed_register; diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c index 4b52c767ad0..3e5d0b6b651 100644 --- a/drivers/net/hamradio/6pack.c +++ b/drivers/net/hamradio/6pack.c @@ -608,7 +608,7 @@ static int sixpack_open(struct tty_struct *tty) spin_lock_init(&sp->lock); atomic_set(&sp->refcnt, 1); - init_MUTEX_LOCKED(&sp->dead_sem); + sema_init(&sp->dead_sem, 0); /* !!! length of the buffers. MTU is IP MTU, not PACLEN! */ diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c index 66e88bd59ca..4c628393c8b 100644 --- a/drivers/net/hamradio/mkiss.c +++ b/drivers/net/hamradio/mkiss.c @@ -747,7 +747,7 @@ static int mkiss_open(struct tty_struct *tty) spin_lock_init(&ax->buflock); atomic_set(&ax->refcnt, 1); - init_MUTEX_LOCKED(&ax->dead_sem); + sema_init(&ax->dead_sem, 0); ax->tty = tty; tty->disc_data = ax; diff --git a/drivers/net/irda/sir_dev.c b/drivers/net/irda/sir_dev.c index 1b051dab7b2..51d74447f8f 100644 --- a/drivers/net/irda/sir_dev.c +++ b/drivers/net/irda/sir_dev.c @@ -909,7 +909,7 @@ struct sir_dev * sirdev_get_instance(const struct sir_driver *drv, const char *n dev->tx_skb = NULL; spin_lock_init(&dev->tx_lock); - init_MUTEX(&dev->fsm.sem); + sema_init(&dev->fsm.sem, 1); dev->drv = drv; dev->netdev = ndev; diff --git a/drivers/net/ppp_async.c b/drivers/net/ppp_async.c index af50a530dae..78d70a6481b 100644 --- a/drivers/net/ppp_async.c +++ b/drivers/net/ppp_async.c @@ -184,7 +184,7 @@ ppp_asynctty_open(struct tty_struct *tty) tasklet_init(&ap->tsk, ppp_async_process, (unsigned long) ap); atomic_set(&ap->refcnt, 1); - init_MUTEX_LOCKED(&ap->dead_sem); + sema_init(&ap->dead_sem, 0); ap->chan.private = ap; ap->chan.ops = &async_ops; diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c index a0da4a17b02..992db2fa136 100644 --- a/drivers/net/r8169.c +++ b/drivers/net/r8169.c @@ -1212,7 +1212,8 @@ static void rtl8169_update_counters(struct net_device *dev) if ((RTL_R8(ChipCmd) & CmdRxEnb) == 0) return; - counters = pci_alloc_consistent(tp->pci_dev, sizeof(*counters), &paddr); + counters = dma_alloc_coherent(&tp->pci_dev->dev, sizeof(*counters), + &paddr, GFP_KERNEL); if (!counters) return; @@ -1233,7 +1234,8 @@ static void rtl8169_update_counters(struct net_device *dev) RTL_W32(CounterAddrLow, 0); RTL_W32(CounterAddrHigh, 0); - pci_free_consistent(tp->pci_dev, sizeof(*counters), counters, paddr); + dma_free_coherent(&tp->pci_dev->dev, sizeof(*counters), counters, + paddr); } static void rtl8169_get_ethtool_stats(struct net_device *dev, @@ -3292,15 +3294,15 @@ static int rtl8169_open(struct net_device *dev) /* * Rx and Tx desscriptors needs 256 bytes alignment. - * pci_alloc_consistent provides more. + * dma_alloc_coherent provides more. */ - tp->TxDescArray = pci_alloc_consistent(pdev, R8169_TX_RING_BYTES, - &tp->TxPhyAddr); + tp->TxDescArray = dma_alloc_coherent(&pdev->dev, R8169_TX_RING_BYTES, + &tp->TxPhyAddr, GFP_KERNEL); if (!tp->TxDescArray) goto err_pm_runtime_put; - tp->RxDescArray = pci_alloc_consistent(pdev, R8169_RX_RING_BYTES, - &tp->RxPhyAddr); + tp->RxDescArray = dma_alloc_coherent(&pdev->dev, R8169_RX_RING_BYTES, + &tp->RxPhyAddr, GFP_KERNEL); if (!tp->RxDescArray) goto err_free_tx_0; @@ -3334,12 +3336,12 @@ out: err_release_ring_2: rtl8169_rx_clear(tp); err_free_rx_1: - pci_free_consistent(pdev, R8169_RX_RING_BYTES, tp->RxDescArray, - tp->RxPhyAddr); + dma_free_coherent(&pdev->dev, R8169_RX_RING_BYTES, tp->RxDescArray, + tp->RxPhyAddr); tp->RxDescArray = NULL; err_free_tx_0: - pci_free_consistent(pdev, R8169_TX_RING_BYTES, tp->TxDescArray, - tp->TxPhyAddr); + dma_free_coherent(&pdev->dev, R8169_TX_RING_BYTES, tp->TxDescArray, + tp->TxPhyAddr); tp->TxDescArray = NULL; err_pm_runtime_put: pm_runtime_put_noidle(&pdev->dev); @@ -3975,7 +3977,7 @@ static void rtl8169_free_rx_skb(struct rtl8169_private *tp, { struct pci_dev *pdev = tp->pci_dev; - pci_unmap_single(pdev, le64_to_cpu(desc->addr), tp->rx_buf_sz, + dma_unmap_single(&pdev->dev, le64_to_cpu(desc->addr), tp->rx_buf_sz, PCI_DMA_FROMDEVICE); dev_kfree_skb(*sk_buff); *sk_buff = NULL; @@ -4000,7 +4002,7 @@ static inline void rtl8169_map_to_asic(struct RxDesc *desc, dma_addr_t mapping, static struct sk_buff *rtl8169_alloc_rx_skb(struct pci_dev *pdev, struct net_device *dev, struct RxDesc *desc, int rx_buf_sz, - unsigned int align) + unsigned int align, gfp_t gfp) { struct sk_buff *skb; dma_addr_t mapping; @@ -4008,13 +4010,13 @@ static struct sk_buff *rtl8169_alloc_rx_skb(struct pci_dev *pdev, pad = align ? align : NET_IP_ALIGN; - skb = netdev_alloc_skb(dev, rx_buf_sz + pad); + skb = __netdev_alloc_skb(dev, rx_buf_sz + pad, gfp); if (!skb) goto err_out; skb_reserve(skb, align ? ((pad - 1) & (unsigned long)skb->data) : pad); - mapping = pci_map_single(pdev, skb->data, rx_buf_sz, + mapping = dma_map_single(&pdev->dev, skb->data, rx_buf_sz, PCI_DMA_FROMDEVICE); rtl8169_map_to_asic(desc, mapping, rx_buf_sz); @@ -4039,7 +4041,7 @@ static void rtl8169_rx_clear(struct rtl8169_private *tp) } static u32 rtl8169_rx_fill(struct rtl8169_private *tp, struct net_device *dev, - u32 start, u32 end) + u32 start, u32 end, gfp_t gfp) { u32 cur; @@ -4054,7 +4056,7 @@ static u32 rtl8169_rx_fill(struct rtl8169_private *tp, struct net_device *dev, skb = rtl8169_alloc_rx_skb(tp->pci_dev, dev, tp->RxDescArray + i, - tp->rx_buf_sz, tp->align); + tp->rx_buf_sz, tp->align, gfp); if (!skb) break; @@ -4082,7 +4084,7 @@ static int rtl8169_init_ring(struct net_device *dev) memset(tp->tx_skb, 0x0, NUM_TX_DESC * sizeof(struct ring_info)); memset(tp->Rx_skbuff, 0x0, NUM_RX_DESC * sizeof(struct sk_buff *)); - if (rtl8169_rx_fill(tp, dev, 0, NUM_RX_DESC) != NUM_RX_DESC) + if (rtl8169_rx_fill(tp, dev, 0, NUM_RX_DESC, GFP_KERNEL) != NUM_RX_DESC) goto err_out; rtl8169_mark_as_last_descriptor(tp->RxDescArray + NUM_RX_DESC - 1); @@ -4099,7 +4101,8 @@ static void rtl8169_unmap_tx_skb(struct pci_dev *pdev, struct ring_info *tx_skb, { unsigned int len = tx_skb->len; - pci_unmap_single(pdev, le64_to_cpu(desc->addr), len, PCI_DMA_TODEVICE); + dma_unmap_single(&pdev->dev, le64_to_cpu(desc->addr), len, + PCI_DMA_TODEVICE); desc->opts1 = 0x00; desc->opts2 = 0x00; desc->addr = 0x00; @@ -4243,7 +4246,8 @@ static int rtl8169_xmit_frags(struct rtl8169_private *tp, struct sk_buff *skb, txd = tp->TxDescArray + entry; len = frag->size; addr = ((void *) page_address(frag->page)) + frag->page_offset; - mapping = pci_map_single(tp->pci_dev, addr, len, PCI_DMA_TODEVICE); + mapping = dma_map_single(&tp->pci_dev->dev, addr, len, + PCI_DMA_TODEVICE); /* anti gcc 2.95.3 bugware (sic) */ status = opts1 | len | (RingEnd * !((entry + 1) % NUM_TX_DESC)); @@ -4313,7 +4317,8 @@ static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb, tp->tx_skb[entry].skb = skb; } - mapping = pci_map_single(tp->pci_dev, skb->data, len, PCI_DMA_TODEVICE); + mapping = dma_map_single(&tp->pci_dev->dev, skb->data, len, + PCI_DMA_TODEVICE); tp->tx_skb[entry].len = len; txd->addr = cpu_to_le64(mapping); @@ -4477,8 +4482,8 @@ static inline bool rtl8169_try_rx_copy(struct sk_buff **sk_buff, if (!skb) goto out; - pci_dma_sync_single_for_cpu(tp->pci_dev, addr, pkt_size, - PCI_DMA_FROMDEVICE); + dma_sync_single_for_cpu(&tp->pci_dev->dev, addr, pkt_size, + PCI_DMA_FROMDEVICE); skb_copy_from_linear_data(*sk_buff, skb->data, pkt_size); *sk_buff = skb; done = true; @@ -4549,11 +4554,11 @@ static int rtl8169_rx_interrupt(struct net_device *dev, rtl8169_rx_csum(skb, desc); if (rtl8169_try_rx_copy(&skb, tp, pkt_size, addr)) { - pci_dma_sync_single_for_device(pdev, addr, + dma_sync_single_for_device(&pdev->dev, addr, pkt_size, PCI_DMA_FROMDEVICE); rtl8169_mark_to_asic(desc, tp->rx_buf_sz); } else { - pci_unmap_single(pdev, addr, tp->rx_buf_sz, + dma_unmap_single(&pdev->dev, addr, tp->rx_buf_sz, PCI_DMA_FROMDEVICE); tp->Rx_skbuff[entry] = NULL; } @@ -4583,7 +4588,7 @@ static int rtl8169_rx_interrupt(struct net_device *dev, count = cur_rx - tp->cur_rx; tp->cur_rx = cur_rx; - delta = rtl8169_rx_fill(tp, dev, tp->dirty_rx, tp->cur_rx); + delta = rtl8169_rx_fill(tp, dev, tp->dirty_rx, tp->cur_rx, GFP_ATOMIC); if (!delta && count) netif_info(tp, intr, dev, "no Rx buffer allocated\n"); tp->dirty_rx += delta; @@ -4769,10 +4774,10 @@ static int rtl8169_close(struct net_device *dev) free_irq(dev->irq, dev); - pci_free_consistent(pdev, R8169_RX_RING_BYTES, tp->RxDescArray, - tp->RxPhyAddr); - pci_free_consistent(pdev, R8169_TX_RING_BYTES, tp->TxDescArray, - tp->TxPhyAddr); + dma_free_coherent(&pdev->dev, R8169_RX_RING_BYTES, tp->RxDescArray, + tp->RxPhyAddr); + dma_free_coherent(&pdev->dev, R8169_TX_RING_BYTES, tp->TxDescArray, + tp->TxPhyAddr); tp->TxDescArray = NULL; tp->RxDescArray = NULL; diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index bc3af78a869..1ec4b9e0239 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -4666,7 +4666,7 @@ static int tg3_rx(struct tg3_napi *tnapi, int budget) desc_idx, *post_ptr); drop_it_no_recycle: /* Other statistics kept track of by card. */ - tp->net_stats.rx_dropped++; + tp->rx_dropped++; goto next_pkt; } @@ -4726,7 +4726,7 @@ static int tg3_rx(struct tg3_napi *tnapi, int budget) if (len > (tp->dev->mtu + ETH_HLEN) && skb->protocol != htons(ETH_P_8021Q)) { dev_kfree_skb(skb); - goto next_pkt; + goto drop_it_no_recycle; } if (desc->type_flags & RXD_FLAG_VLAN && @@ -9240,6 +9240,8 @@ static struct rtnl_link_stats64 *tg3_get_stats64(struct net_device *dev, stats->rx_missed_errors = old_stats->rx_missed_errors + get_stat64(&hw_stats->rx_discards); + stats->rx_dropped = tp->rx_dropped; + return stats; } diff --git a/drivers/net/tg3.h b/drivers/net/tg3.h index 4937bd19096..be7ff138a7f 100644 --- a/drivers/net/tg3.h +++ b/drivers/net/tg3.h @@ -2759,7 +2759,7 @@ struct tg3 { /* begin "everything else" cacheline(s) section */ - struct rtnl_link_stats64 net_stats; + unsigned long rx_dropped; struct rtnl_link_stats64 net_stats_prev; struct tg3_ethtool_stats estats; struct tg3_ethtool_stats estats_prev; diff --git a/drivers/net/wan/cosa.c b/drivers/net/wan/cosa.c index 04c6cd4333f..10bafd59f9c 100644 --- a/drivers/net/wan/cosa.c +++ b/drivers/net/wan/cosa.c @@ -575,7 +575,7 @@ static int cosa_probe(int base, int irq, int dma) /* Initialize the chardev data structures */ mutex_init(&chan->rlock); - init_MUTEX(&chan->wsem); + sema_init(&chan->wsem, 1); /* Register the network interface */ if (!(chan->netdev = alloc_hdlcdev(chan))) { diff --git a/drivers/net/wimax/i2400m/rx.c b/drivers/net/wimax/i2400m/rx.c index 8cc9e319f43..1737d1488b3 100644 --- a/drivers/net/wimax/i2400m/rx.c +++ b/drivers/net/wimax/i2400m/rx.c @@ -1244,16 +1244,16 @@ int i2400m_rx(struct i2400m *i2400m, struct sk_buff *skb) int i, result; struct device *dev = i2400m_dev(i2400m); const struct i2400m_msg_hdr *msg_hdr; - size_t pl_itr, pl_size, skb_len; + size_t pl_itr, pl_size; unsigned long flags; - unsigned num_pls, single_last; + unsigned num_pls, single_last, skb_len; skb_len = skb->len; - d_fnstart(4, dev, "(i2400m %p skb %p [size %zu])\n", + d_fnstart(4, dev, "(i2400m %p skb %p [size %u])\n", i2400m, skb, skb_len); result = -EIO; msg_hdr = (void *) skb->data; - result = i2400m_rx_msg_hdr_check(i2400m, msg_hdr, skb->len); + result = i2400m_rx_msg_hdr_check(i2400m, msg_hdr, skb_len); if (result < 0) goto error_msg_hdr_check; result = -EIO; @@ -1261,10 +1261,10 @@ int i2400m_rx(struct i2400m *i2400m, struct sk_buff *skb) pl_itr = sizeof(*msg_hdr) + /* Check payload descriptor(s) */ num_pls * sizeof(msg_hdr->pld[0]); pl_itr = ALIGN(pl_itr, I2400M_PL_ALIGN); - if (pl_itr > skb->len) { /* got all the payload descriptors? */ + if (pl_itr > skb_len) { /* got all the payload descriptors? */ dev_err(dev, "RX: HW BUG? message too short (%u bytes) for " "%u payload descriptors (%zu each, total %zu)\n", - skb->len, num_pls, sizeof(msg_hdr->pld[0]), pl_itr); + skb_len, num_pls, sizeof(msg_hdr->pld[0]), pl_itr); goto error_pl_descr_short; } /* Walk each payload payload--check we really got it */ @@ -1272,7 +1272,7 @@ int i2400m_rx(struct i2400m *i2400m, struct sk_buff *skb) /* work around old gcc warnings */ pl_size = i2400m_pld_size(&msg_hdr->pld[i]); result = i2400m_rx_pl_descr_check(i2400m, &msg_hdr->pld[i], - pl_itr, skb->len); + pl_itr, skb_len); if (result < 0) goto error_pl_descr_check; single_last = num_pls == 1 || i == num_pls - 1; @@ -1290,16 +1290,16 @@ int i2400m_rx(struct i2400m *i2400m, struct sk_buff *skb) if (i < i2400m->rx_pl_min) i2400m->rx_pl_min = i; i2400m->rx_num++; - i2400m->rx_size_acc += skb->len; - if (skb->len < i2400m->rx_size_min) - i2400m->rx_size_min = skb->len; - if (skb->len > i2400m->rx_size_max) - i2400m->rx_size_max = skb->len; + i2400m->rx_size_acc += skb_len; + if (skb_len < i2400m->rx_size_min) + i2400m->rx_size_min = skb_len; + if (skb_len > i2400m->rx_size_max) + i2400m->rx_size_max = skb_len; spin_unlock_irqrestore(&i2400m->rx_lock, flags); error_pl_descr_check: error_pl_descr_short: error_msg_hdr_check: - d_fnend(4, dev, "(i2400m %p skb %p [size %zu]) = %d\n", + d_fnend(4, dev, "(i2400m %p skb %p [size %u]) = %d\n", i2400m, skb, skb_len, result); return result; } diff --git a/drivers/oprofile/oprof.c b/drivers/oprofile/oprof.c index b336cd9ee7a..f9bda64fcd1 100644 --- a/drivers/oprofile/oprof.c +++ b/drivers/oprofile/oprof.c @@ -225,26 +225,17 @@ post_sync: mutex_unlock(&start_mutex); } -int oprofile_set_backtrace(unsigned long val) +int oprofile_set_ulong(unsigned long *addr, unsigned long val) { - int err = 0; + int err = -EBUSY; mutex_lock(&start_mutex); - - if (oprofile_started) { - err = -EBUSY; - goto out; - } - - if (!oprofile_ops.backtrace) { - err = -EINVAL; - goto out; + if (!oprofile_started) { + *addr = val; + err = 0; } - - oprofile_backtrace_depth = val; - -out: mutex_unlock(&start_mutex); + return err; } @@ -257,16 +248,9 @@ static int __init oprofile_init(void) printk(KERN_INFO "oprofile: using timer interrupt.\n"); err = oprofile_timer_init(&oprofile_ops); if (err) - goto out_arch; + return err; } - err = oprofilefs_register(); - if (err) - goto out_arch; - return 0; - -out_arch: - oprofile_arch_exit(); - return err; + return oprofilefs_register(); } diff --git a/drivers/oprofile/oprof.h b/drivers/oprofile/oprof.h index 47e12cb4ee8..177b73de5e5 100644 --- a/drivers/oprofile/oprof.h +++ b/drivers/oprofile/oprof.h @@ -37,7 +37,7 @@ void oprofile_create_files(struct super_block *sb, struct dentry *root); int oprofile_timer_init(struct oprofile_operations *ops); void oprofile_timer_exit(void); -int oprofile_set_backtrace(unsigned long depth); +int oprofile_set_ulong(unsigned long *addr, unsigned long val); int oprofile_set_timeout(unsigned long time); #endif /* OPROF_H */ diff --git a/drivers/oprofile/oprofile_files.c b/drivers/oprofile/oprofile_files.c index bbd7516e086..ccf099e684a 100644 --- a/drivers/oprofile/oprofile_files.c +++ b/drivers/oprofile/oprofile_files.c @@ -79,14 +79,17 @@ static ssize_t depth_write(struct file *file, char const __user *buf, size_t cou if (*offset) return -EINVAL; + if (!oprofile_ops.backtrace) + return -EINVAL; + retval = oprofilefs_ulong_from_user(&val, buf, count); if (retval) return retval; - retval = oprofile_set_backtrace(val); - + retval = oprofile_set_ulong(&oprofile_backtrace_depth, val); if (retval) return retval; + return count; } diff --git a/drivers/oprofile/oprofile_perf.c b/drivers/oprofile/oprofile_perf.c new file mode 100644 index 00000000000..9046f7b2ed7 --- /dev/null +++ b/drivers/oprofile/oprofile_perf.c @@ -0,0 +1,328 @@ +/* + * Copyright 2010 ARM Ltd. + * + * Perf-events backend for OProfile. + */ +#include <linux/perf_event.h> +#include <linux/platform_device.h> +#include <linux/oprofile.h> +#include <linux/slab.h> + +/* + * Per performance monitor configuration as set via oprofilefs. + */ +struct op_counter_config { + unsigned long count; + unsigned long enabled; + unsigned long event; + unsigned long unit_mask; + unsigned long kernel; + unsigned long user; + struct perf_event_attr attr; +}; + +static int oprofile_perf_enabled; +static DEFINE_MUTEX(oprofile_perf_mutex); + +static struct op_counter_config *counter_config; +static struct perf_event **perf_events[nr_cpumask_bits]; +static int num_counters; + +/* + * Overflow callback for oprofile. + */ +static void op_overflow_handler(struct perf_event *event, int unused, + struct perf_sample_data *data, struct pt_regs *regs) +{ + int id; + u32 cpu = smp_processor_id(); + + for (id = 0; id < num_counters; ++id) + if (perf_events[cpu][id] == event) + break; + + if (id != num_counters) + oprofile_add_sample(regs, id); + else + pr_warning("oprofile: ignoring spurious overflow " + "on cpu %u\n", cpu); +} + +/* + * Called by oprofile_perf_setup to create perf attributes to mirror the oprofile + * settings in counter_config. Attributes are created as `pinned' events and + * so are permanently scheduled on the PMU. + */ +static void op_perf_setup(void) +{ + int i; + u32 size = sizeof(struct perf_event_attr); + struct perf_event_attr *attr; + + for (i = 0; i < num_counters; ++i) { + attr = &counter_config[i].attr; + memset(attr, 0, size); + attr->type = PERF_TYPE_RAW; + attr->size = size; + attr->config = counter_config[i].event; + attr->sample_period = counter_config[i].count; + attr->pinned = 1; + } +} + +static int op_create_counter(int cpu, int event) +{ + struct perf_event *pevent; + + if (!counter_config[event].enabled || perf_events[cpu][event]) + return 0; + + pevent = perf_event_create_kernel_counter(&counter_config[event].attr, + cpu, NULL, + op_overflow_handler); + + if (IS_ERR(pevent)) + return PTR_ERR(pevent); + + if (pevent->state != PERF_EVENT_STATE_ACTIVE) { + perf_event_release_kernel(pevent); + pr_warning("oprofile: failed to enable event %d " + "on CPU %d\n", event, cpu); + return -EBUSY; + } + + perf_events[cpu][event] = pevent; + + return 0; +} + +static void op_destroy_counter(int cpu, int event) +{ + struct perf_event *pevent = perf_events[cpu][event]; + + if (pevent) { + perf_event_release_kernel(pevent); + perf_events[cpu][event] = NULL; + } +} + +/* + * Called by oprofile_perf_start to create active perf events based on the + * perviously configured attributes. + */ +static int op_perf_start(void) +{ + int cpu, event, ret = 0; + + for_each_online_cpu(cpu) { + for (event = 0; event < num_counters; ++event) { + ret = op_create_counter(cpu, event); + if (ret) + return ret; + } + } + + return ret; +} + +/* + * Called by oprofile_perf_stop at the end of a profiling run. + */ +static void op_perf_stop(void) +{ + int cpu, event; + + for_each_online_cpu(cpu) + for (event = 0; event < num_counters; ++event) + op_destroy_counter(cpu, event); +} + +static int oprofile_perf_create_files(struct super_block *sb, struct dentry *root) +{ + unsigned int i; + + for (i = 0; i < num_counters; i++) { + struct dentry *dir; + char buf[4]; + + snprintf(buf, sizeof buf, "%d", i); + dir = oprofilefs_mkdir(sb, root, buf); + oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled); + oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event); + oprofilefs_create_ulong(sb, dir, "count", &counter_config[i].count); + oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask); + oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel); + oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user); + } + + return 0; +} + +static int oprofile_perf_setup(void) +{ + spin_lock(&oprofilefs_lock); + op_perf_setup(); + spin_unlock(&oprofilefs_lock); + return 0; +} + +static int oprofile_perf_start(void) +{ + int ret = -EBUSY; + + mutex_lock(&oprofile_perf_mutex); + if (!oprofile_perf_enabled) { + ret = 0; + op_perf_start(); + oprofile_perf_enabled = 1; + } + mutex_unlock(&oprofile_perf_mutex); + return ret; +} + +static void oprofile_perf_stop(void) +{ + mutex_lock(&oprofile_perf_mutex); + if (oprofile_perf_enabled) + op_perf_stop(); + oprofile_perf_enabled = 0; + mutex_unlock(&oprofile_perf_mutex); +} + +#ifdef CONFIG_PM + +static int oprofile_perf_suspend(struct platform_device *dev, pm_message_t state) +{ + mutex_lock(&oprofile_perf_mutex); + if (oprofile_perf_enabled) + op_perf_stop(); + mutex_unlock(&oprofile_perf_mutex); + return 0; +} + +static int oprofile_perf_resume(struct platform_device *dev) +{ + mutex_lock(&oprofile_perf_mutex); + if (oprofile_perf_enabled && op_perf_start()) + oprofile_perf_enabled = 0; + mutex_unlock(&oprofile_perf_mutex); + return 0; +} + +static struct platform_driver oprofile_driver = { + .driver = { + .name = "oprofile-perf", + }, + .resume = oprofile_perf_resume, + .suspend = oprofile_perf_suspend, +}; + +static struct platform_device *oprofile_pdev; + +static int __init init_driverfs(void) +{ + int ret; + + ret = platform_driver_register(&oprofile_driver); + if (ret) + return ret; + + oprofile_pdev = platform_device_register_simple( + oprofile_driver.driver.name, 0, NULL, 0); + if (IS_ERR(oprofile_pdev)) { + ret = PTR_ERR(oprofile_pdev); + platform_driver_unregister(&oprofile_driver); + } + + return ret; +} + +static void exit_driverfs(void) +{ + platform_device_unregister(oprofile_pdev); + platform_driver_unregister(&oprofile_driver); +} + +#else + +static inline int init_driverfs(void) { return 0; } +static inline void exit_driverfs(void) { } + +#endif /* CONFIG_PM */ + +void oprofile_perf_exit(void) +{ + int cpu, id; + struct perf_event *event; + + for_each_possible_cpu(cpu) { + for (id = 0; id < num_counters; ++id) { + event = perf_events[cpu][id]; + if (event) + perf_event_release_kernel(event); + } + + kfree(perf_events[cpu]); + } + + kfree(counter_config); + exit_driverfs(); +} + +int __init oprofile_perf_init(struct oprofile_operations *ops) +{ + int cpu, ret = 0; + + ret = init_driverfs(); + if (ret) + return ret; + + memset(&perf_events, 0, sizeof(perf_events)); + + num_counters = perf_num_counters(); + if (num_counters <= 0) { + pr_info("oprofile: no performance counters\n"); + ret = -ENODEV; + goto out; + } + + counter_config = kcalloc(num_counters, + sizeof(struct op_counter_config), GFP_KERNEL); + + if (!counter_config) { + pr_info("oprofile: failed to allocate %d " + "counters\n", num_counters); + ret = -ENOMEM; + num_counters = 0; + goto out; + } + + for_each_possible_cpu(cpu) { + perf_events[cpu] = kcalloc(num_counters, + sizeof(struct perf_event *), GFP_KERNEL); + if (!perf_events[cpu]) { + pr_info("oprofile: failed to allocate %d perf events " + "for cpu %d\n", num_counters, cpu); + ret = -ENOMEM; + goto out; + } + } + + ops->create_files = oprofile_perf_create_files; + ops->setup = oprofile_perf_setup; + ops->start = oprofile_perf_start; + ops->stop = oprofile_perf_stop; + ops->shutdown = oprofile_perf_stop; + ops->cpu_type = op_name_from_perf_id(); + + if (!ops->cpu_type) + ret = -ENODEV; + else + pr_info("oprofile: using %s\n", ops->cpu_type); + +out: + if (ret) + oprofile_perf_exit(); + + return ret; +} diff --git a/drivers/oprofile/oprofilefs.c b/drivers/oprofile/oprofilefs.c index 2766a6d3c2e..1944621930d 100644 --- a/drivers/oprofile/oprofilefs.c +++ b/drivers/oprofile/oprofilefs.c @@ -91,16 +91,20 @@ static ssize_t ulong_read_file(struct file *file, char __user *buf, size_t count static ssize_t ulong_write_file(struct file *file, char const __user *buf, size_t count, loff_t *offset) { - unsigned long *value = file->private_data; + unsigned long value; int retval; if (*offset) return -EINVAL; - retval = oprofilefs_ulong_from_user(value, buf, count); + retval = oprofilefs_ulong_from_user(&value, buf, count); + if (retval) + return retval; + retval = oprofile_set_ulong(file->private_data, value); if (retval) return retval; + return count; } @@ -126,50 +130,41 @@ static const struct file_operations ulong_ro_fops = { }; -static struct dentry *__oprofilefs_create_file(struct super_block *sb, +static int __oprofilefs_create_file(struct super_block *sb, struct dentry *root, char const *name, const struct file_operations *fops, - int perm) + int perm, void *priv) { struct dentry *dentry; struct inode *inode; dentry = d_alloc_name(root, name); if (!dentry) - return NULL; + return -ENOMEM; inode = oprofilefs_get_inode(sb, S_IFREG | perm); if (!inode) { dput(dentry); - return NULL; + return -ENOMEM; } inode->i_fop = fops; d_add(dentry, inode); - return dentry; + dentry->d_inode->i_private = priv; + return 0; } int oprofilefs_create_ulong(struct super_block *sb, struct dentry *root, char const *name, unsigned long *val) { - struct dentry *d = __oprofilefs_create_file(sb, root, name, - &ulong_fops, 0644); - if (!d) - return -EFAULT; - - d->d_inode->i_private = val; - return 0; + return __oprofilefs_create_file(sb, root, name, + &ulong_fops, 0644, val); } int oprofilefs_create_ro_ulong(struct super_block *sb, struct dentry *root, char const *name, unsigned long *val) { - struct dentry *d = __oprofilefs_create_file(sb, root, name, - &ulong_ro_fops, 0444); - if (!d) - return -EFAULT; - - d->d_inode->i_private = val; - return 0; + return __oprofilefs_create_file(sb, root, name, + &ulong_ro_fops, 0444, val); } @@ -189,31 +184,22 @@ static const struct file_operations atomic_ro_fops = { int oprofilefs_create_ro_atomic(struct super_block *sb, struct dentry *root, char const *name, atomic_t *val) { - struct dentry *d = __oprofilefs_create_file(sb, root, name, - &atomic_ro_fops, 0444); - if (!d) - return -EFAULT; - - d->d_inode->i_private = val; - return 0; + return __oprofilefs_create_file(sb, root, name, + &atomic_ro_fops, 0444, val); } int oprofilefs_create_file(struct super_block *sb, struct dentry *root, char const *name, const struct file_operations *fops) { - if (!__oprofilefs_create_file(sb, root, name, fops, 0644)) - return -EFAULT; - return 0; + return __oprofilefs_create_file(sb, root, name, fops, 0644, NULL); } int oprofilefs_create_file_perm(struct super_block *sb, struct dentry *root, char const *name, const struct file_operations *fops, int perm) { - if (!__oprofilefs_create_file(sb, root, name, fops, perm)) - return -EFAULT; - return 0; + return __oprofilefs_create_file(sb, root, name, fops, perm, NULL); } diff --git a/drivers/parport/share.c b/drivers/parport/share.c index dffa5d4fb29..a2d9d1e5926 100644 --- a/drivers/parport/share.c +++ b/drivers/parport/share.c @@ -306,7 +306,7 @@ struct parport *parport_register_port(unsigned long base, int irq, int dma, spin_lock_init(&tmp->pardevice_lock); tmp->ieee1284.mode = IEEE1284_MODE_COMPAT; tmp->ieee1284.phase = IEEE1284_PH_FWD_IDLE; - init_MUTEX_LOCKED (&tmp->ieee1284.irq); /* actually a semaphore at 0 */ + sema_init(&tmp->ieee1284.irq, 0); tmp->spintime = parport_default_spintime; atomic_set (&tmp->ref_count, 1); INIT_LIST_HEAD(&tmp->full_list); diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index ad0ed212db4..348fba0a897 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -1046,13 +1046,13 @@ int scsi_get_vpd_page(struct scsi_device *sdev, u8 page, unsigned char *buf, /* If the user actually wanted this page, we can skip the rest */ if (page == 0) - return -EINVAL; + return 0; for (i = 0; i < min((int)buf[3], buf_len - 4); i++) if (buf[i + 4] == page) goto found; - if (i < buf[3] && i > buf_len) + if (i < buf[3] && i >= buf_len - 4) /* ran off the end of the buffer, give us benefit of doubt */ goto found; /* The device claims it doesn't support the requested page */ diff --git a/drivers/serial/ioc3_serial.c b/drivers/serial/ioc3_serial.c index 93de907b120..800c5460233 100644 --- a/drivers/serial/ioc3_serial.c +++ b/drivers/serial/ioc3_serial.c @@ -2044,6 +2044,7 @@ ioc3uart_probe(struct ioc3_submodule *is, struct ioc3_driver_data *idd) if (!port) { printk(KERN_WARNING "IOC3 serial memory not available for port\n"); + ret = -ENOMEM; goto out4; } spin_lock_init(&port->ip_lock); diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 7c8008225ee..17927b1f933 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -127,7 +127,10 @@ static void handle_tx(struct vhost_net *net) size_t len, total_len = 0; int err, wmem; size_t hdr_size; - struct socket *sock = rcu_dereference(vq->private_data); + struct socket *sock; + + sock = rcu_dereference_check(vq->private_data, + lockdep_is_held(&vq->mutex)); if (!sock) return; @@ -582,7 +585,10 @@ static void vhost_net_disable_vq(struct vhost_net *n, static void vhost_net_enable_vq(struct vhost_net *n, struct vhost_virtqueue *vq) { - struct socket *sock = vq->private_data; + struct socket *sock; + + sock = rcu_dereference_protected(vq->private_data, + lockdep_is_held(&vq->mutex)); if (!sock) return; if (vq == n->vqs + VHOST_NET_VQ_TX) { @@ -598,7 +604,8 @@ static struct socket *vhost_net_stop_vq(struct vhost_net *n, struct socket *sock; mutex_lock(&vq->mutex); - sock = vq->private_data; + sock = rcu_dereference_protected(vq->private_data, + lockdep_is_held(&vq->mutex)); vhost_net_disable_vq(n, vq); rcu_assign_pointer(vq->private_data, NULL); mutex_unlock(&vq->mutex); @@ -736,7 +743,8 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) } /* start polling new socket */ - oldsock = vq->private_data; + oldsock = rcu_dereference_protected(vq->private_data, + lockdep_is_held(&vq->mutex)); if (sock != oldsock) { vhost_net_disable_vq(n, vq); rcu_assign_pointer(vq->private_data, sock); diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index dd3d6f7406f..8b5a1b33d0f 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -320,7 +320,7 @@ long vhost_dev_reset_owner(struct vhost_dev *dev) vhost_dev_cleanup(dev); memory->nregions = 0; - dev->memory = memory; + RCU_INIT_POINTER(dev->memory, memory); return 0; } @@ -352,8 +352,9 @@ void vhost_dev_cleanup(struct vhost_dev *dev) fput(dev->log_file); dev->log_file = NULL; /* No one will access memory at this point */ - kfree(dev->memory); - dev->memory = NULL; + kfree(rcu_dereference_protected(dev->memory, + lockdep_is_held(&dev->mutex))); + RCU_INIT_POINTER(dev->memory, NULL); if (dev->mm) mmput(dev->mm); dev->mm = NULL; @@ -440,14 +441,22 @@ static int vq_access_ok(unsigned int num, /* Caller should have device mutex but not vq mutex */ int vhost_log_access_ok(struct vhost_dev *dev) { - return memory_access_ok(dev, dev->memory, 1); + struct vhost_memory *mp; + + mp = rcu_dereference_protected(dev->memory, + lockdep_is_held(&dev->mutex)); + return memory_access_ok(dev, mp, 1); } /* Verify access for write logging. */ /* Caller should have vq mutex and device mutex */ static int vq_log_access_ok(struct vhost_virtqueue *vq, void __user *log_base) { - return vq_memory_access_ok(log_base, vq->dev->memory, + struct vhost_memory *mp; + + mp = rcu_dereference_protected(vq->dev->memory, + lockdep_is_held(&vq->mutex)); + return vq_memory_access_ok(log_base, mp, vhost_has_feature(vq->dev, VHOST_F_LOG_ALL)) && (!vq->log_used || log_access_ok(log_base, vq->log_addr, sizeof *vq->used + @@ -487,7 +496,8 @@ static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m) kfree(newmem); return -EFAULT; } - oldmem = d->memory; + oldmem = rcu_dereference_protected(d->memory, + lockdep_is_held(&d->mutex)); rcu_assign_pointer(d->memory, newmem); synchronize_rcu(); kfree(oldmem); diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index afd77295971..af3c11ded5f 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -106,7 +106,7 @@ struct vhost_virtqueue { * vhost_work execution acts instead of rcu_read_lock() and the end of * vhost_work execution acts instead of rcu_read_lock(). * Writers use virtqueue mutex. */ - void *private_data; + void __rcu *private_data; /* Log write descriptors */ void __user *log_base; struct vhost_log log[VHOST_NET_MAX_SG]; @@ -116,7 +116,7 @@ struct vhost_dev { /* Readers use RCU to access memory table pointer * log base pointer and features. * Writers use mutex below.*/ - struct vhost_memory *memory; + struct vhost_memory __rcu *memory; struct mm_struct *mm; struct mutex mutex; unsigned acked_features; @@ -173,7 +173,11 @@ enum { static inline int vhost_has_feature(struct vhost_dev *dev, int bit) { - unsigned acked_features = rcu_dereference(dev->acked_features); + unsigned acked_features; + + acked_features = + rcu_dereference_index_check(dev->acked_features, + lockdep_is_held(&dev->mutex)); return acked_features & (1 << bit); } |