From e53e97ce3c7119199d2788d8fd1618efa9c2d1eb Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 9 Jul 2007 22:22:44 -0700 Subject: [SPARC64]: Add LDOM virtual channel driver and VIO device layer. Virtual devices on Sun Logical Domains are built on top of a virtual channel framework. This, with help of hypervisor interfaces, provides a link layer protocol with basic handshaking over which virtual device clients and servers communicate. Built on top of this is a VIO device protocol which has it's own handshaking and message types. At this layer attributes are exchanged (disk size, network device addresses, etc.) descriptor rings are registered, and data transfers are triggers and replied to. Signed-off-by: David S. Miller --- include/asm-sparc64/ldc.h | 136 ++++++++++++++++ include/asm-sparc64/vio.h | 402 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 538 insertions(+) create mode 100644 include/asm-sparc64/ldc.h create mode 100644 include/asm-sparc64/vio.h (limited to 'include') diff --git a/include/asm-sparc64/ldc.h b/include/asm-sparc64/ldc.h new file mode 100644 index 00000000000..24fd2367d33 --- /dev/null +++ b/include/asm-sparc64/ldc.h @@ -0,0 +1,136 @@ +#ifndef _SPARC64_LDC_H +#define _SPARC64_LDC_H + +#include + +extern int ldom_domaining_enabled; + +/* The event handler will be evoked when link state changes + * or data becomes available on the receive side. + * + * For non-RAW links, if the LDC_EVENT_RESET event arrives the + * driver should reset all of it's internal state and reinvoke + * ldc_connect() to try and bring the link up again. + * + * For RAW links, ldc_connect() is not used. Instead the driver + * just waits for the LDC_EVENT_UP event. + */ +struct ldc_channel_config { + void (*event)(void *arg, int event); + + u32 mtu; + unsigned int rx_irq; + unsigned int tx_irq; + u8 mode; +#define LDC_MODE_RAW 0x00 +#define LDC_MODE_UNRELIABLE 0x01 +#define LDC_MODE_RESERVED 0x02 +#define LDC_MODE_RELIABLE 0x03 +#define LDC_MODE_STREAM 0x04 + + u8 debug; +#define LDC_DEBUG_HS 0x01 +#define LDC_DEBUG_STATE 0x02 +#define LDC_DEBUG_RX 0x04 +#define LDC_DEBUG_TX 0x08 +#define LDC_DEBUG_DATA 0x10 +}; + +#define LDC_EVENT_RESET 0x01 +#define LDC_EVENT_UP 0x02 +#define LDC_EVENT_DATA_READY 0x04 + +#define LDC_STATE_INVALID 0x00 +#define LDC_STATE_INIT 0x01 +#define LDC_STATE_BOUND 0x02 +#define LDC_STATE_READY 0x03 +#define LDC_STATE_CONNECTED 0x04 + +struct ldc_channel; + +/* Allocate state for a channel. */ +extern struct ldc_channel *ldc_alloc(unsigned long id, + const struct ldc_channel_config *cfgp, + void *event_arg); + +/* Shut down and free state for a channel. */ +extern void ldc_free(struct ldc_channel *lp); + +/* Register TX and RX queues of the link with the hypervisor. */ +extern int ldc_bind(struct ldc_channel *lp); + +/* For non-RAW protocols we need to complete a handshake before + * communication can proceed. ldc_connect() does that, if the + * handshake completes successfully, an LDC_EVENT_UP event will + * be sent up to the driver. + */ +extern int ldc_connect(struct ldc_channel *lp); +extern int ldc_disconnect(struct ldc_channel *lp); + +extern int ldc_state(struct ldc_channel *lp); + +/* Read and write operations. Only valid when the link is up. */ +extern int ldc_write(struct ldc_channel *lp, const void *buf, + unsigned int size); +extern int ldc_read(struct ldc_channel *lp, void *buf, unsigned int size); + +#define LDC_MAP_SHADOW 0x01 +#define LDC_MAP_DIRECT 0x02 +#define LDC_MAP_IO 0x04 +#define LDC_MAP_R 0x08 +#define LDC_MAP_W 0x10 +#define LDC_MAP_X 0x20 +#define LDC_MAP_RW (LDC_MAP_R | LDC_MAP_W) +#define LDC_MAP_RWX (LDC_MAP_R | LDC_MAP_W | LDC_MAP_X) +#define LDC_MAP_ALL 0x03f + +struct ldc_trans_cookie { + u64 cookie_addr; + u64 cookie_size; +}; + +struct scatterlist; +extern int ldc_map_sg(struct ldc_channel *lp, + struct scatterlist *sg, int num_sg, + struct ldc_trans_cookie *cookies, int ncookies, + unsigned int map_perm); + +extern int ldc_map_single(struct ldc_channel *lp, + void *buf, unsigned int len, + struct ldc_trans_cookie *cookies, int ncookies, + unsigned int map_perm); + +extern void ldc_unmap(struct ldc_channel *lp, struct ldc_trans_cookie *cookies, + int ncookies); + +extern int ldc_copy(struct ldc_channel *lp, int copy_dir, + void *buf, unsigned int len, unsigned long offset, + struct ldc_trans_cookie *cookies, int ncookies); + +static inline int ldc_get_dring_entry(struct ldc_channel *lp, + void *buf, unsigned int len, + unsigned long offset, + struct ldc_trans_cookie *cookies, + int ncookies) +{ + return ldc_copy(lp, LDC_COPY_IN, buf, len, offset, cookies, ncookies); +} + +static inline int ldc_put_dring_entry(struct ldc_channel *lp, + void *buf, unsigned int len, + unsigned long offset, + struct ldc_trans_cookie *cookies, + int ncookies) +{ + return ldc_copy(lp, LDC_COPY_OUT, buf, len, offset, cookies, ncookies); +} + +extern void *ldc_alloc_exp_dring(struct ldc_channel *lp, unsigned int len, + struct ldc_trans_cookie *cookies, + int *ncookies, unsigned int map_perm); + +extern void ldc_free_exp_dring(struct ldc_channel *lp, void *buf, + unsigned int len, + struct ldc_trans_cookie *cookies, int ncookies); + +#endif /* _SPARC64_LDC_H */ diff --git a/include/asm-sparc64/vio.h b/include/asm-sparc64/vio.h new file mode 100644 index 00000000000..47c3da76dcb --- /dev/null +++ b/include/asm-sparc64/vio.h @@ -0,0 +1,402 @@ +#ifndef _SPARC64_VIO_H +#define _SPARC64_VIO_H + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +struct vio_msg_tag { + u8 type; +#define VIO_TYPE_CTRL 0x01 +#define VIO_TYPE_DATA 0x02 +#define VIO_TYPE_ERR 0x04 + + u8 stype; +#define VIO_SUBTYPE_INFO 0x01 +#define VIO_SUBTYPE_ACK 0x02 +#define VIO_SUBTYPE_NACK 0x04 + + u16 stype_env; +#define VIO_VER_INFO 0x0001 +#define VIO_ATTR_INFO 0x0002 +#define VIO_DRING_REG 0x0003 +#define VIO_DRING_UNREG 0x0004 +#define VIO_RDX 0x0005 +#define VIO_PKT_DATA 0x0040 +#define VIO_DESC_DATA 0x0041 +#define VIO_DRING_DATA 0x0042 +#define VNET_MCAST_INFO 0x0101 + + u32 sid; +}; + +struct vio_rdx { + struct vio_msg_tag tag; + u64 resv[6]; +}; + +struct vio_ver_info { + struct vio_msg_tag tag; + u16 major; + u16 minor; + u8 dev_class; +#define VDEV_NETWORK 0x01 +#define VDEV_NETWORK_SWITCH 0x02 +#define VDEV_DISK 0x03 +#define VDEV_DISK_SERVER 0x04 + + u8 resv1[3]; + u64 resv2[5]; +}; + +struct vio_dring_register { + struct vio_msg_tag tag; + u64 dring_ident; + u32 num_descr; + u32 descr_size; + u16 options; +#define VIO_TX_DRING 0x0001 +#define VIO_RX_DRING 0x0002 + u16 resv; + u32 num_cookies; + struct ldc_trans_cookie cookies[0]; +}; + +struct vio_dring_unregister { + struct vio_msg_tag tag; + u64 dring_ident; + u64 resv[5]; +}; + +/* Data transfer modes */ +#define VIO_PKT_MODE 0x01 /* Packet based transfer */ +#define VIO_DESC_MODE 0x02 /* In-band descriptors */ +#define VIO_DRING_MODE 0x03 /* Descriptor rings */ + +struct vio_dring_data { + struct vio_msg_tag tag; + u64 seq; + u64 dring_ident; + u32 start_idx; + u32 end_idx; + u8 state; +#define VIO_DRING_ACTIVE 0x01 +#define VIO_DRING_STOPPED 0x02 + + u8 __pad1; + u16 __pad2; + u32 __pad3; + u64 __par4[2]; +}; + +struct vio_dring_hdr { + u8 state; +#define VIO_DESC_FREE 0x01 +#define VIO_DESC_READY 0x02 +#define VIO_DESC_ACCEPTED 0x03 +#define VIO_DESC_DONE 0x04 + u8 ack; +#define VIO_ACK_ENABLE 0x01 +#define VIO_ACK_DISABLE 0x00 + + u16 __pad1; + u32 __pad2; +}; + +/* VIO disk specific structures and defines */ +struct vio_disk_attr_info { + struct vio_msg_tag tag; + u8 xfer_mode; + u8 vdisk_type; +#define VD_DISK_TYPE_SLICE 0x01 /* Slice in block device */ +#define VD_DISK_TYPE_DISK 0x02 /* Entire block device */ + u16 resv1; + u32 vdisk_block_size; + u64 operations; + u64 vdisk_size; + u64 max_xfer_size; + u64 resv2[2]; +}; + +struct vio_disk_desc { + struct vio_dring_hdr hdr; + u64 req_id; + u8 operation; +#define VD_OP_BREAD 0x01 /* Block read */ +#define VD_OP_BWRITE 0x02 /* Block write */ +#define VD_OP_FLUSH 0x03 /* Flush disk contents */ +#define VD_OP_GET_WCE 0x04 /* Get write-cache status */ +#define VD_OP_SET_WCE 0x05 /* Enable/disable write-cache */ +#define VD_OP_GET_VTOC 0x06 /* Get VTOC */ +#define VD_OP_SET_VTOC 0x07 /* Set VTOC */ +#define VD_OP_GET_DISKGEOM 0x08 /* Get disk geometry */ +#define VD_OP_SET_DISKGEOM 0x09 /* Set disk geometry */ +#define VD_OP_SCSICMD 0x0a /* SCSI control command */ +#define VD_OP_GET_DEVID 0x0b /* Get device ID */ +#define VD_OP_GET_EFI 0x0c /* Get EFI */ +#define VD_OP_SET_EFI 0x0d /* Set EFI */ + u8 slice; + u16 resv1; + u32 status; + u64 offset; + u64 size; + u32 ncookies; + u32 resv2; + struct ldc_trans_cookie cookies[0]; +}; + +#define VIO_DISK_VNAME_LEN 8 +#define VIO_DISK_ALABEL_LEN 128 +#define VIO_DISK_NUM_PART 8 + +struct vio_disk_vtoc { + u8 volume_name[VIO_DISK_VNAME_LEN]; + u16 sector_size; + u16 num_partitions; + u8 ascii_label[VIO_DISK_ALABEL_LEN]; + struct { + u16 id; + u16 perm_flags; + u32 resv; + u64 start_block; + u64 num_blocks; + } partitions[VIO_DISK_NUM_PART]; +}; + +struct vio_disk_geom { + u16 num_cyl; /* Num data cylinders */ + u16 alt_cyl; /* Num alternate cylinders */ + u16 beg_cyl; /* Cyl off of fixed head area */ + u16 num_hd; /* Num heads */ + u16 num_sec; /* Num sectors */ + u16 ifact; /* Interleave factor */ + u16 apc; /* Alts per cylinder (SCSI) */ + u16 rpm; /* Revolutions per minute */ + u16 phy_cyl; /* Num physical cylinders */ + u16 wr_skip; /* Num sects to skip, writes */ + u16 rd_skip; /* Num sects to skip, writes */ +}; + +struct vio_disk_devid { + u16 resv; + u16 type; + u32 len; + char id[0]; +}; + +struct vio_disk_efi { + u64 lba; + u64 len; + char data[0]; +}; + +/* VIO net specific structures and defines */ +struct vio_net_attr_info { + struct vio_msg_tag tag; + u8 xfer_mode; + u8 addr_type; +#define VNET_ADDR_ETHERMAC 0x01 + u16 ack_freq; + u32 resv1; + u64 addr; + u64 mtu; + u64 resv2[3]; +}; + +#define VNET_NUM_MCAST 7 + +struct vio_net_mcast_info { + struct vio_msg_tag tag; + u8 set; + u8 count; + u8 mcast_addr[VNET_NUM_MCAST * 6]; + u32 resv; +}; + +struct vio_net_desc { + struct vio_dring_hdr hdr; + u32 size; + u32 ncookies; + struct ldc_trans_cookie cookies[0]; +}; + +#define VIO_MAX_RING_COOKIES 24 + +struct vio_dring_state { + u64 ident; + void *base; + u64 snd_nxt; + u64 rcv_nxt; + u32 entry_size; + u32 num_entries; + u32 prod; + u32 cons; + u32 pending; + int ncookies; + struct ldc_trans_cookie cookies[VIO_MAX_RING_COOKIES]; +}; + +static inline void *vio_dring_cur(struct vio_dring_state *dr) +{ + return dr->base + (dr->entry_size * dr->prod); +} + +static inline void *vio_dring_entry(struct vio_dring_state *dr, + unsigned int index) +{ + return dr->base + (dr->entry_size * index); +} + +static inline u32 vio_dring_avail(struct vio_dring_state *dr, + unsigned int ring_size) +{ + /* Ensure build-time power-of-2. */ + BUILD_BUG_ON(ring_size & (ring_size - 1)); + + return (dr->pending - + ((dr->prod - dr->cons) & (ring_size - 1))); +} + +struct vio_dev { + struct mdesc_node *mp; + struct device_node *dp; + + const char *type; + const char *compat; + int compat_len; + + struct device dev; +}; + +struct vio_driver { + struct list_head node; + const struct vio_device_id *id_table; + int (*probe)(struct vio_dev *dev, const struct vio_device_id *id); + int (*remove)(struct vio_dev *dev); + void (*shutdown)(struct vio_dev *dev); + unsigned long driver_data; + struct device_driver driver; +}; + +struct vio_version { + u16 major; + u16 minor; +}; + +struct vio_driver_state; +struct vio_driver_ops { + int (*send_attr)(struct vio_driver_state *vio); + int (*handle_attr)(struct vio_driver_state *vio, void *pkt); + void (*handshake_complete)(struct vio_driver_state *vio); +}; + +struct vio_completion { + struct completion com; + int err; + int waiting_for; +}; + +struct vio_driver_state { + /* Protects VIO handshake and, optionally, driver private state. */ + spinlock_t lock; + + struct ldc_channel *lp; + + u32 _peer_sid; + u32 _local_sid; + struct vio_dring_state drings[2]; +#define VIO_DRIVER_TX_RING 0 +#define VIO_DRIVER_RX_RING 1 + + u8 hs_state; +#define VIO_HS_INVALID 0x00 +#define VIO_HS_GOTVERS 0x01 +#define VIO_HS_GOT_ATTR 0x04 +#define VIO_HS_SENT_DREG 0x08 +#define VIO_HS_SENT_RDX 0x10 +#define VIO_HS_GOT_RDX_ACK 0x20 +#define VIO_HS_GOT_RDX 0x40 +#define VIO_HS_SENT_RDX_ACK 0x80 +#define VIO_HS_COMPLETE (VIO_HS_GOT_RDX_ACK | VIO_HS_SENT_RDX_ACK) + + u8 dev_class; + + u8 dr_state; +#define VIO_DR_STATE_TXREG 0x01 +#define VIO_DR_STATE_RXREG 0x02 +#define VIO_DR_STATE_TXREQ 0x10 +#define VIO_DR_STATE_RXREQ 0x20 + + u8 debug; +#define VIO_DEBUG_HS 0x01 +#define VIO_DEBUG_DATA 0x02 + + void *desc_buf; + unsigned int desc_buf_len; + + struct vio_completion *cmp; + + struct vio_dev *vdev; + + unsigned long channel_id; + unsigned int tx_irq; + unsigned int rx_irq; + + struct timer_list timer; + + struct vio_version ver; + + struct mdesc_node *endpoint; + + struct vio_version *ver_table; + int ver_table_entries; + + char *name; + + struct vio_driver_ops *ops; +}; + +#define viodbg(TYPE, f, a...) \ +do { if (vio->debug & VIO_DEBUG_##TYPE) \ + printk(KERN_INFO "vio: ID[%lu] " f, vio->channel_id, ## a); \ +} while (0) + +extern int vio_register_driver(struct vio_driver *drv); +extern void vio_unregister_driver(struct vio_driver *drv); + +static inline struct vio_driver *to_vio_driver(struct device_driver *drv) +{ + return container_of(drv, struct vio_driver, driver); +} + +static inline struct vio_dev *to_vio_dev(struct device *dev) +{ + return container_of(dev, struct vio_dev, dev); +} + +extern int vio_ldc_send(struct vio_driver_state *vio, void *data, int len); +extern void vio_link_state_change(struct vio_driver_state *vio, int event); +extern void vio_conn_reset(struct vio_driver_state *vio); +extern int vio_control_pkt_engine(struct vio_driver_state *vio, void *pkt); +extern int vio_validate_sid(struct vio_driver_state *vio, + struct vio_msg_tag *tp); +extern u32 vio_send_sid(struct vio_driver_state *vio); +extern int vio_ldc_alloc(struct vio_driver_state *vio, + struct ldc_channel_config *base_cfg, void *event_arg); +extern void vio_ldc_free(struct vio_driver_state *vio); +extern int vio_driver_init(struct vio_driver_state *vio, struct vio_dev *vdev, + u8 dev_class, struct mdesc_node *channel_endpoint, + struct vio_version *ver_table, int ver_table_size, + struct vio_driver_ops *ops, char *name); + +extern struct mdesc_node *vio_find_endpoint(struct vio_dev *vdev); +extern void vio_port_up(struct vio_driver_state *vio); + +#endif /* _SPARC64_VIO_H */ -- cgit v1.2.3-70-g09d2 From cb4812358423e7ea47d2b6471918d65238452cc5 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 11 Jul 2007 18:14:41 -0700 Subject: [SPARC64]: Assorted LDC bug cures. 1) LDC_MODE_RELIABLE is deprecated an unused by anything, plus it and LDC_MODE_STREAM were mis-numbered. 2) read_stream() should try to read as much as possible into the per-LDC stream buffer area, so do not trim the read_nonraw() length by the caller's size parameter. 3) Send data ACKs when necessary in read_nonraw(). 4) In read_nonraw() when we get a pure ACK, advance the RX head unconditionally past it. 5) Provide the ACKID field in the ldcdgb() packet dump in read_nonraw(). This helps debugging stream mode LDC channel problems. 6) Decrease verbosity of rx_data_wait() so that it is more useful. A debugging message each loop iteration is too much. 7) In process_data_ack() stop the loop checking when we hit lp->tx_tail not lp->tx_head. 8) Set the seqid field properly in send_data_nack(). Signed-off-by: David S. Miller --- arch/sparc64/kernel/ldc.c | 56 ++++++++++++++++++++++++++++++++--------------- include/asm-sparc64/ldc.h | 3 +-- 2 files changed, 39 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/arch/sparc64/kernel/ldc.c b/arch/sparc64/kernel/ldc.c index 0fa04d6f978..4cba2868596 100644 --- a/arch/sparc64/kernel/ldc.c +++ b/arch/sparc64/kernel/ldc.c @@ -239,8 +239,7 @@ static struct ldc_packet *handshake_get_tx_packet(struct ldc_channel *lp, */ static unsigned long head_for_data(struct ldc_channel *lp) { - if (lp->cfg.mode == LDC_MODE_RELIABLE || - lp->cfg.mode == LDC_MODE_STREAM) + if (lp->cfg.mode == LDC_MODE_STREAM) return lp->tx_acked; return lp->tx_head; } @@ -494,7 +493,7 @@ static int send_data_nack(struct ldc_channel *lp, struct ldc_packet *data_pkt) p->type = data_pkt->type; p->stype = LDC_NACK; p->ctrl = data_pkt->ctrl & LDC_CTRL_MSK; - p->seqid = lp->snd_nxt; + p->seqid = lp->snd_nxt + 1; p->u.r.ackid = lp->rcv_nxt; ldcdbg(HS, "SEND DATA NACK type[0x%x] ctl[0x%x] seq[0x%x] ack[0x%x]\n", @@ -765,7 +764,7 @@ static int process_data_ack(struct ldc_channel *lp, lp->tx_acked = head; return 0; } - if (head == lp->tx_head) + if (head == lp->tx_tail) return ldc_abort(lp); } @@ -1093,11 +1092,6 @@ struct ldc_channel *ldc_alloc(unsigned long id, mss = LDC_PACKET_SIZE - 8; break; - case LDC_MODE_RELIABLE: - mops = &nonraw_ops; - mss = LDC_PACKET_SIZE - 8 - 8; - break; - case LDC_MODE_STREAM: mops = &stream_ops; mss = LDC_PACKET_SIZE - 8 - 8; @@ -1579,15 +1573,14 @@ static int rx_data_wait(struct ldc_channel *lp, unsigned long cur_head) if (hv_err) return ldc_abort(lp); - ldcdbg(DATA, "REREAD head[%lx] tail[%lx] chan_state[%lx]\n", - dummy, lp->rx_tail, lp->chan_state); - if (lp->chan_state == LDC_CHANNEL_DOWN || lp->chan_state == LDC_CHANNEL_RESETTING) return -ECONNRESET; if (cur_head != lp->rx_tail) { - ldcdbg(DATA, "DATA WAIT DONE\n"); + ldcdbg(DATA, "DATA WAIT DONE " + "head[%lx] tail[%lx] chan_state[%lx]\n", + dummy, lp->rx_tail, lp->chan_state); return 0; } @@ -1607,6 +1600,28 @@ static int rx_set_head(struct ldc_channel *lp, unsigned long head) return 0; } +static void send_data_ack(struct ldc_channel *lp) +{ + unsigned long new_tail; + struct ldc_packet *p; + + p = data_get_tx_packet(lp, &new_tail); + if (likely(p)) { + int err; + + memset(p, 0, sizeof(*p)); + p->type = LDC_DATA; + p->stype = LDC_ACK; + p->ctrl = 0; + p->seqid = lp->snd_nxt + 1; + p->u.r.ackid = lp->rcv_nxt; + + err = send_tx_packet(lp, p, new_tail); + if (!err) + lp->snd_nxt++; + } +} + static int read_nonraw(struct ldc_channel *lp, void *buf, unsigned int size) { struct ldc_packet *first_frag; @@ -1637,13 +1652,14 @@ static int read_nonraw(struct ldc_channel *lp, void *buf, unsigned int size) BUG_ON(new == lp->rx_tail); p = lp->rx_base + (new / LDC_PACKET_SIZE); - ldcdbg(RX, "RX read pkt[%02x:%02x:%02x:%02x:%08x] " + ldcdbg(RX, "RX read pkt[%02x:%02x:%02x:%02x:%08x:%08x] " "rcv_nxt[%08x]\n", p->type, p->stype, p->ctrl, p->env, p->seqid, + p->u.r.ackid, lp->rcv_nxt); if (unlikely(!rx_seq_ok(lp, p->seqid))) { @@ -1672,6 +1688,9 @@ static int read_nonraw(struct ldc_channel *lp, void *buf, unsigned int size) } if (!(p->stype & LDC_INFO)) { new = rx_advance(lp, new); + err = rx_set_head(lp, new); + if (err) + break; goto no_data; } @@ -1748,8 +1767,11 @@ no_data: if (err && first_frag) lp->rcv_nxt = first_frag->seqid - 1; - if (!err) + if (!err) { err = copied; + if (err > 0 && lp->cfg.mode != LDC_MODE_UNRELIABLE) + send_data_ack(lp); + } return err; } @@ -1770,9 +1792,7 @@ static int write_stream(struct ldc_channel *lp, const void *buf, static int read_stream(struct ldc_channel *lp, void *buf, unsigned int size) { if (!lp->mssbuf_len) { - int err = read_nonraw(lp, lp->mssbuf, - (size > lp->cfg.mtu ? - lp->cfg.mtu : size)); + int err = read_nonraw(lp, lp->mssbuf, lp->cfg.mtu); if (err < 0) return err; diff --git a/include/asm-sparc64/ldc.h b/include/asm-sparc64/ldc.h index 24fd2367d33..1c13738f13f 100644 --- a/include/asm-sparc64/ldc.h +++ b/include/asm-sparc64/ldc.h @@ -25,8 +25,7 @@ struct ldc_channel_config { #define LDC_MODE_RAW 0x00 #define LDC_MODE_UNRELIABLE 0x01 #define LDC_MODE_RESERVED 0x02 -#define LDC_MODE_RELIABLE 0x03 -#define LDC_MODE_STREAM 0x04 +#define LDC_MODE_STREAM 0x03 u8 debug; #define LDC_DEBUG_HS 0x01 -- cgit v1.2.3-70-g09d2 From 13077d80286205e02eebe1c2786a914a4bbd2588 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 11 Jul 2007 18:18:04 -0700 Subject: [SPARC64]: Export powerd facilities for external entities. Besides the existing usage for power-button interrupts, we'll want to make use of this code for domain-services where the LDOM manager can send reboot requests to the guest node. Signed-off-by: David S. Miller --- arch/sparc64/kernel/power.c | 54 ++++++++++++++++++++++++++------------------- include/asm-sparc64/power.h | 7 ++++++ 2 files changed, 38 insertions(+), 23 deletions(-) create mode 100644 include/asm-sparc64/power.h (limited to 'include') diff --git a/arch/sparc64/kernel/power.c b/arch/sparc64/kernel/power.c index 5d6adea3967..8dd4294ad21 100644 --- a/arch/sparc64/kernel/power.c +++ b/arch/sparc64/kernel/power.c @@ -1,7 +1,6 @@ -/* $Id: power.c,v 1.10 2001/12/11 01:57:16 davem Exp $ - * power.c: Power management driver. +/* power.c: Power management driver. * - * Copyright (C) 1999 David S. Miller (davem@redhat.com) + * Copyright (C) 1999, 2007 David S. Miller (davem@davemloft.net) */ #include @@ -19,6 +18,7 @@ #include #include #include +#include #include #include @@ -29,24 +29,26 @@ */ int scons_pwroff = 1; -#ifdef CONFIG_PCI -#include static void __iomem *power_reg; static DECLARE_WAIT_QUEUE_HEAD(powerd_wait); static int button_pressed; -static irqreturn_t power_handler(int irq, void *dev_id) +void wake_up_powerd(void) { if (button_pressed == 0) { button_pressed = 1; wake_up(&powerd_wait); } +} + +static irqreturn_t power_handler(int irq, void *dev_id) +{ + wake_up_powerd(); /* FIXME: Check registers for status... */ return IRQ_HANDLED; } -#endif /* CONFIG_PCI */ extern void machine_halt(void); extern void machine_alt_power_off(void); @@ -56,19 +58,18 @@ void machine_power_off(void) { sstate_poweroff(); if (!serial_console || scons_pwroff) { -#ifdef CONFIG_PCI if (power_reg) { /* Both register bits seem to have the * same effect, so until I figure out * what the difference is... */ writel(AUXIO_PCIO_CPWR_OFF | AUXIO_PCIO_SPWR_OFF, power_reg); - } else -#endif /* CONFIG_PCI */ + } else { if (poweroff_method != NULL) { poweroff_method(); /* not reached */ } + } } machine_halt(); } @@ -76,7 +77,6 @@ void machine_power_off(void) void (*pm_power_off)(void) = machine_power_off; EXPORT_SYMBOL(pm_power_off); -#ifdef CONFIG_PCI static int powerd(void *__unused) { static char *envp[] = { "HOME=/", "TERM=linux", "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL }; @@ -86,7 +86,7 @@ static int powerd(void *__unused) daemonize("powerd"); add_wait_queue(&powerd_wait, &wait); -again: + for (;;) { set_task_state(current, TASK_INTERRUPTIBLE); if (button_pressed) @@ -100,16 +100,28 @@ again: /* Ok, down we go... */ button_pressed = 0; if (kernel_execve("/sbin/shutdown", argv, envp) < 0) { - printk("powerd: shutdown execution failed\n"); - add_wait_queue(&powerd_wait, &wait); - goto again; + printk(KERN_ERR "powerd: shutdown execution failed\n"); + machine_power_off(); } return 0; } +int start_powerd(void) +{ + int err; + + err = kernel_thread(powerd, NULL, CLONE_FS); + if (err < 0) + printk(KERN_ERR "power: Failed to start power daemon.\n"); + else + printk(KERN_INFO "power: powerd running.\n"); + + return err; +} + static int __init has_button_interrupt(unsigned int irq, struct device_node *dp) { - if (irq == PCI_IRQ_NONE) + if (irq == 0xffffffff) return 0; if (!of_find_property(dp, "button", NULL)) return 0; @@ -130,17 +142,14 @@ static int __devinit power_probe(struct of_device *op, const struct of_device_id poweroff_method = machine_halt; /* able to use the standard halt */ if (has_button_interrupt(irq, op->node)) { - if (kernel_thread(powerd, NULL, CLONE_FS) < 0) { - printk("Failed to start power daemon.\n"); + if (start_powerd() < 0) return 0; - } - printk("powerd running.\n"); if (request_irq(irq, power_handler, 0, "power", NULL) < 0) - printk("power: Error, cannot register IRQ handler.\n"); + printk(KERN_ERR "power: Cannot setup IRQ handler.\n"); } else { - printk("not using powerd.\n"); + printk(KERN_INFO "power: Not using powerd.\n"); } return 0; @@ -164,4 +173,3 @@ void __init power_init(void) of_register_driver(&power_driver, &of_bus_type); return; } -#endif /* CONFIG_PCI */ diff --git a/include/asm-sparc64/power.h b/include/asm-sparc64/power.h new file mode 100644 index 00000000000..94495c1ac4f --- /dev/null +++ b/include/asm-sparc64/power.h @@ -0,0 +1,7 @@ +#ifndef _SPARC64_POWER_H +#define _SPARC64_POWER_H + +extern void wake_up_powerd(void); +extern int start_powerd(void); + +#endif /* !(_SPARC64_POWER_H) */ -- cgit v1.2.3-70-g09d2 From 133f09a169f3022be3de671b29658b7ecb375022 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 11 Jul 2007 23:22:55 -0700 Subject: [SPARC64]: Use more mearningful names for IRQ registry. All of the interrupts say "LDX RX" and "LDX TX" currently which is next to useless. Put a device specific prefix before "RX" and "TX" instead which makes it much more useful. Signed-off-by: David S. Miller --- arch/sparc64/kernel/ds.c | 2 +- arch/sparc64/kernel/ldc.c | 16 +++++++++++++--- arch/sparc64/kernel/viohs.c | 2 +- include/asm-sparc64/ldc.h | 2 +- 4 files changed, 16 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/arch/sparc64/kernel/ds.c b/arch/sparc64/kernel/ds.c index 22517dfd021..c7ece8c5203 100644 --- a/arch/sparc64/kernel/ds.c +++ b/arch/sparc64/kernel/ds.c @@ -598,7 +598,7 @@ static int __devinit ds_probe(struct vio_dev *vdev, } dp->lp = lp; - err = ldc_bind(lp); + err = ldc_bind(lp, "DS"); if (err) goto out_free_ldc; diff --git a/arch/sparc64/kernel/ldc.c b/arch/sparc64/kernel/ldc.c index 4cba2868596..dbb65b674a6 100644 --- a/arch/sparc64/kernel/ldc.c +++ b/arch/sparc64/kernel/ldc.c @@ -158,6 +158,10 @@ struct ldc_channel { u8 mss; u8 state; +#define LDC_IRQ_NAME_MAX 32 + char rx_irq_name[LDC_IRQ_NAME_MAX]; + char tx_irq_name[LDC_IRQ_NAME_MAX]; + struct hlist_head mh_list; struct hlist_node list; @@ -1226,25 +1230,31 @@ EXPORT_SYMBOL(ldc_free); * state. This does not initiate a handshake, ldc_connect() does * that. */ -int ldc_bind(struct ldc_channel *lp) +int ldc_bind(struct ldc_channel *lp, const char *name) { unsigned long hv_err, flags; int err = -EINVAL; spin_lock_irqsave(&lp->lock, flags); + if (!name) + goto out_err; + if (lp->state != LDC_STATE_INIT) goto out_err; + snprintf(lp->rx_irq_name, LDC_IRQ_NAME_MAX, "%s RX", name); + snprintf(lp->tx_irq_name, LDC_IRQ_NAME_MAX, "%s TX", name); + err = request_irq(lp->cfg.rx_irq, ldc_rx, IRQF_SAMPLE_RANDOM | IRQF_SHARED, - "LDC RX", lp); + lp->rx_irq_name, lp); if (err) goto out_err; err = request_irq(lp->cfg.tx_irq, ldc_tx, IRQF_SAMPLE_RANDOM | IRQF_SHARED, - "LDC TX", lp); + lp->tx_irq_name, lp); if (err) goto out_free_rx_irq; diff --git a/arch/sparc64/kernel/viohs.c b/arch/sparc64/kernel/viohs.c index 3eb42e3624f..b0b1b877934 100644 --- a/arch/sparc64/kernel/viohs.c +++ b/arch/sparc64/kernel/viohs.c @@ -724,7 +724,7 @@ void vio_port_up(struct vio_driver_state *vio) err = 0; if (state == LDC_STATE_INIT) { - err = ldc_bind(vio->lp); + err = ldc_bind(vio->lp, vio->name); if (err) printk(KERN_WARNING "%s: Port %lu bind failed, " "err=%d\n", diff --git a/include/asm-sparc64/ldc.h b/include/asm-sparc64/ldc.h index 1c13738f13f..3c91f269f9d 100644 --- a/include/asm-sparc64/ldc.h +++ b/include/asm-sparc64/ldc.h @@ -56,7 +56,7 @@ extern struct ldc_channel *ldc_alloc(unsigned long id, extern void ldc_free(struct ldc_channel *lp); /* Register TX and RX queues of the link with the hypervisor. */ -extern int ldc_bind(struct ldc_channel *lp); +extern int ldc_bind(struct ldc_channel *lp, const char *name); /* For non-RAW protocols we need to complete a handshake before * communication can proceed. ldc_connect() does that, if the -- cgit v1.2.3-70-g09d2 From 43fdf27470b216ebdef47e09ff83bed2f2894b13 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 12 Jul 2007 13:47:50 -0700 Subject: [SPARC64]: Abstract out mdesc accesses for better MD update handling. Since we have to be able to handle MD updates, having an in-tree set of data structures representing the MD objects actually makes things more painful. The MD itself is easy to parse, and we can implement the existing interfaces using direct parsing of the MD binary image. The MD is now reference counted, so accesses have to now take the form: handle = mdesc_grab(); ... operations on MD ... mdesc_release(handle); The only remaining issue are cases where code holds on to references to MD property values. mdesc_get_property() returns a direct pointer to the property value, most cases just pull in the information they need and discard the pointer, but there are few that use the pointer directly over a long lifetime. Those will be fixed up in a subsequent changeset. A preliminary handler for MD update events from domain services is there, it is rudimentry but it works and handles all of the reference counting. It does not check the generation number of the MDs, and it does not generate a "add/delete" list for notification to interesting parties about MD changes but that will be forthcoming. Signed-off-by: David S. Miller --- arch/sparc64/kernel/ds.c | 22 +- arch/sparc64/kernel/ldc.c | 13 +- arch/sparc64/kernel/mdesc.c | 666 ++++++++++++++++++++++++-------------------- arch/sparc64/kernel/vio.c | 143 +++++----- arch/sparc64/kernel/viohs.c | 35 +-- drivers/block/sunvdc.c | 24 +- drivers/net/sunvnet.c | 34 ++- include/asm-sparc64/mdesc.h | 67 +++-- include/asm-sparc64/vio.h | 23 +- 9 files changed, 553 insertions(+), 474 deletions(-) (limited to 'include') diff --git a/arch/sparc64/kernel/ds.c b/arch/sparc64/kernel/ds.c index c7ece8c5203..9c8839d1cff 100644 --- a/arch/sparc64/kernel/ds.c +++ b/arch/sparc64/kernel/ds.c @@ -15,6 +15,7 @@ #include #include #include +#include #define DRV_MODULE_NAME "ds" #define PFX DRV_MODULE_NAME ": " @@ -170,8 +171,7 @@ static void md_update_data(struct ldc_channel *lp, rp = (struct ds_md_update_req *) (dpkt + 1); - printk(KERN_ERR PFX "MD update REQ [%lx] len=%d\n", - rp->req_num, len); + printk(KERN_ERR PFX "Machine description update.\n"); memset(&pkt, 0, sizeof(pkt)); pkt.data.tag.type = DS_DATA; @@ -181,6 +181,8 @@ static void md_update_data(struct ldc_channel *lp, pkt.res.result = DS_OK; ds_send(lp, &pkt, sizeof(pkt)); + + mdesc_update(); } struct ds_shutdown_req { @@ -555,7 +557,6 @@ static int __devinit ds_probe(struct vio_dev *vdev, const struct vio_device_id *id) { static int ds_version_printed; - struct mdesc_node *endp; struct ldc_channel_config ds_cfg = { .event = ds_event, .mtu = 4096, @@ -563,20 +564,11 @@ static int __devinit ds_probe(struct vio_dev *vdev, }; struct ldc_channel *lp; struct ds_info *dp; - const u64 *chan_id; int err; if (ds_version_printed++ == 0) printk(KERN_INFO "%s", version); - endp = vio_find_endpoint(vdev); - if (!endp) - return -ENODEV; - - chan_id = md_get_property(endp, "id", NULL); - if (!chan_id) - return -ENODEV; - dp = kzalloc(sizeof(*dp), GFP_KERNEL); err = -ENOMEM; if (!dp) @@ -588,10 +580,10 @@ static int __devinit ds_probe(struct vio_dev *vdev, dp->rcv_buf_len = 4096; - ds_cfg.tx_irq = endp->irqs[0]; - ds_cfg.rx_irq = endp->irqs[1]; + ds_cfg.tx_irq = vdev->tx_irq; + ds_cfg.rx_irq = vdev->rx_irq; - lp = ldc_alloc(*chan_id, &ds_cfg, dp); + lp = ldc_alloc(vdev->channel_id, &ds_cfg, dp); if (IS_ERR(lp)) { err = PTR_ERR(lp); goto out_free_rcv_buf; diff --git a/arch/sparc64/kernel/ldc.c b/arch/sparc64/kernel/ldc.c index dbb65b674a6..85a2be0b096 100644 --- a/arch/sparc64/kernel/ldc.c +++ b/arch/sparc64/kernel/ldc.c @@ -2335,15 +2335,20 @@ EXPORT_SYMBOL(ldc_free_exp_dring); static int __init ldc_init(void) { - struct mdesc_node *mp; unsigned long major, minor; + struct mdesc_handle *hp; const u64 *v; + u64 mp; - mp = md_find_node_by_name(NULL, "platform"); - if (!mp) + hp = mdesc_grab(); + if (!hp) return -ENODEV; - v = md_get_property(mp, "domaining-enabled", NULL); + mp = mdesc_node_by_name(hp, MDESC_NODE_NULL, "platform"); + if (mp == MDESC_NODE_NULL) + return -ENODEV; + + v = mdesc_get_property(hp, mp, "domaining-enabled", NULL); if (!v) return -ENODEV; diff --git a/arch/sparc64/kernel/mdesc.c b/arch/sparc64/kernel/mdesc.c index f0e16045fb1..9e5088d563c 100644 --- a/arch/sparc64/kernel/mdesc.c +++ b/arch/sparc64/kernel/mdesc.c @@ -6,6 +6,8 @@ #include #include #include +#include +#include #include #include @@ -29,7 +31,7 @@ struct mdesc_hdr { u32 node_sz; /* node block size */ u32 name_sz; /* name block size */ u32 data_sz; /* data block size */ -}; +} __attribute__((aligned(16))); struct mdesc_elem { u8 tag; @@ -53,306 +55,386 @@ struct mdesc_elem { } d; }; -static struct mdesc_hdr *main_mdesc; -static struct mdesc_node *allnodes; - -static struct mdesc_node *allnodes_tail; -static unsigned int unique_id; +struct mdesc_mem_ops { + struct mdesc_handle *(*alloc)(unsigned int mdesc_size); + void (*free)(struct mdesc_handle *handle); +}; -static struct mdesc_node **mdesc_hash; -static unsigned int mdesc_hash_size; +struct mdesc_handle { + struct list_head list; + struct mdesc_mem_ops *mops; + void *self_base; + atomic_t refcnt; + unsigned int handle_size; + struct mdesc_hdr mdesc; +}; -static inline unsigned int node_hashfn(u64 node) +static void mdesc_handle_init(struct mdesc_handle *hp, + unsigned int handle_size, + void *base) { - return ((unsigned int) (node ^ (node >> 8) ^ (node >> 16))) - & (mdesc_hash_size - 1); + BUG_ON(((unsigned long)&hp->mdesc) & (16UL - 1)); + + memset(hp, 0, handle_size); + INIT_LIST_HEAD(&hp->list); + hp->self_base = base; + atomic_set(&hp->refcnt, 1); + hp->handle_size = handle_size; } -static inline void hash_node(struct mdesc_node *mp) +static struct mdesc_handle *mdesc_bootmem_alloc(unsigned int mdesc_size) { - struct mdesc_node **head = &mdesc_hash[node_hashfn(mp->node)]; + struct mdesc_handle *hp; + unsigned int handle_size, alloc_size; - mp->hash_next = *head; - *head = mp; + handle_size = (sizeof(struct mdesc_handle) - + sizeof(struct mdesc_hdr) + + mdesc_size); + alloc_size = PAGE_ALIGN(handle_size); - if (allnodes_tail) { - allnodes_tail->allnodes_next = mp; - allnodes_tail = mp; - } else { - allnodes = allnodes_tail = mp; - } + hp = __alloc_bootmem(alloc_size, PAGE_SIZE, 0UL); + if (hp) + mdesc_handle_init(hp, handle_size, hp); + + return hp; } -static struct mdesc_node *find_node(u64 node) +static void mdesc_bootmem_free(struct mdesc_handle *hp) { - struct mdesc_node *mp = mdesc_hash[node_hashfn(node)]; + unsigned int alloc_size, handle_size = hp->handle_size; + unsigned long start, end; - while (mp) { - if (mp->node == node) - return mp; + BUG_ON(atomic_read(&hp->refcnt) != 0); + BUG_ON(!list_empty(&hp->list)); - mp = mp->hash_next; + alloc_size = PAGE_ALIGN(handle_size); + + start = (unsigned long) hp; + end = start + alloc_size; + + while (start < end) { + struct page *p; + + p = virt_to_page(start); + ClearPageReserved(p); + __free_page(p); + start += PAGE_SIZE; } - return NULL; } -struct property *md_find_property(const struct mdesc_node *mp, - const char *name, - int *lenp) +static struct mdesc_mem_ops bootmem_mdesc_memops = { + .alloc = mdesc_bootmem_alloc, + .free = mdesc_bootmem_free, +}; + +static struct mdesc_handle *mdesc_kmalloc(unsigned int mdesc_size) { - struct property *pp; + unsigned int handle_size; + void *base; - for (pp = mp->properties; pp != 0; pp = pp->next) { - if (strcasecmp(pp->name, name) == 0) { - if (lenp) - *lenp = pp->length; - break; - } + handle_size = (sizeof(struct mdesc_handle) - + sizeof(struct mdesc_hdr) + + mdesc_size); + + base = kmalloc(handle_size + 15, GFP_KERNEL); + if (base) { + struct mdesc_handle *hp; + unsigned long addr; + + addr = (unsigned long)base; + addr = (addr + 15UL) & ~15UL; + hp = (struct mdesc_handle *) addr; + + mdesc_handle_init(hp, handle_size, base); + return hp; } - return pp; + + return NULL; } -EXPORT_SYMBOL(md_find_property); -/* - * Find a property with a given name for a given node - * and return the value. - */ -const void *md_get_property(const struct mdesc_node *mp, const char *name, - int *lenp) +static void mdesc_kfree(struct mdesc_handle *hp) { - struct property *pp = md_find_property(mp, name, lenp); - return pp ? pp->value : NULL; + BUG_ON(atomic_read(&hp->refcnt) != 0); + BUG_ON(!list_empty(&hp->list)); + + kfree(hp->self_base); } -EXPORT_SYMBOL(md_get_property); -struct mdesc_node *md_find_node_by_name(struct mdesc_node *from, - const char *name) +static struct mdesc_mem_ops kmalloc_mdesc_memops = { + .alloc = mdesc_kmalloc, + .free = mdesc_kfree, +}; + +static struct mdesc_handle *mdesc_alloc(unsigned int mdesc_size, + struct mdesc_mem_ops *mops) { - struct mdesc_node *mp; + struct mdesc_handle *hp = mops->alloc(mdesc_size); - mp = from ? from->allnodes_next : allnodes; - for (; mp != NULL; mp = mp->allnodes_next) { - if (strcmp(mp->name, name) == 0) - break; - } - return mp; -} -EXPORT_SYMBOL(md_find_node_by_name); + if (hp) + hp->mops = mops; -static unsigned int mdesc_early_allocated; + return hp; +} -static void * __init mdesc_early_alloc(unsigned long size) +static void mdesc_free(struct mdesc_handle *hp) { - void *ret; + hp->mops->free(hp); +} - ret = __alloc_bootmem(size, SMP_CACHE_BYTES, 0UL); - if (ret == NULL) { - prom_printf("MDESC: alloc of %lu bytes failed.\n", size); - prom_halt(); - } +static struct mdesc_handle *cur_mdesc; +static LIST_HEAD(mdesc_zombie_list); +static DEFINE_SPINLOCK(mdesc_lock); - memset(ret, 0, size); +struct mdesc_handle *mdesc_grab(void) +{ + struct mdesc_handle *hp; + unsigned long flags; - mdesc_early_allocated += size; + spin_lock_irqsave(&mdesc_lock, flags); + hp = cur_mdesc; + if (hp) + atomic_inc(&hp->refcnt); + spin_unlock_irqrestore(&mdesc_lock, flags); - return ret; + return hp; } +EXPORT_SYMBOL(mdesc_grab); -static unsigned int __init count_arcs(struct mdesc_elem *ep) +void mdesc_release(struct mdesc_handle *hp) { - unsigned int ret = 0; + unsigned long flags; - ep++; - while (ep->tag != MD_NODE_END) { - if (ep->tag == MD_PROP_ARC) - ret++; - ep++; + spin_lock_irqsave(&mdesc_lock, flags); + if (atomic_dec_and_test(&hp->refcnt)) { + list_del_init(&hp->list); + hp->mops->free(hp); } - return ret; + spin_unlock_irqrestore(&mdesc_lock, flags); } +EXPORT_SYMBOL(mdesc_release); -static void __init mdesc_node_alloc(u64 node, struct mdesc_elem *ep, const char *names) +static void do_mdesc_update(struct work_struct *work) { - unsigned int num_arcs = count_arcs(ep); - struct mdesc_node *mp; + unsigned long len, real_len, status; + struct mdesc_handle *hp, *orig_hp; + unsigned long flags; + + (void) sun4v_mach_desc(0UL, 0UL, &len); + + hp = mdesc_alloc(len, &kmalloc_mdesc_memops); + if (!hp) { + printk(KERN_ERR "MD: mdesc alloc fails\n"); + return; + } + + status = sun4v_mach_desc(__pa(&hp->mdesc), len, &real_len); + if (status != HV_EOK || real_len > len) { + printk(KERN_ERR "MD: mdesc reread fails with %lu\n", + status); + atomic_dec(&hp->refcnt); + mdesc_free(hp); + return; + } - mp = mdesc_early_alloc(sizeof(*mp) + - (num_arcs * sizeof(struct mdesc_arc))); - mp->name = names + ep->name_offset; - mp->node = node; - mp->unique_id = unique_id++; - mp->num_arcs = num_arcs; + spin_lock_irqsave(&mdesc_lock, flags); + orig_hp = cur_mdesc; + cur_mdesc = hp; - hash_node(mp); + if (atomic_dec_and_test(&orig_hp->refcnt)) + mdesc_free(orig_hp); + else + list_add(&orig_hp->list, &mdesc_zombie_list); + spin_unlock_irqrestore(&mdesc_lock, flags); } -static inline struct mdesc_elem *node_block(struct mdesc_hdr *mdesc) +static DECLARE_WORK(mdesc_update_work, do_mdesc_update); + +void mdesc_update(void) +{ + schedule_work(&mdesc_update_work); +} + +static struct mdesc_elem *node_block(struct mdesc_hdr *mdesc) { return (struct mdesc_elem *) (mdesc + 1); } -static inline void *name_block(struct mdesc_hdr *mdesc) +static void *name_block(struct mdesc_hdr *mdesc) { return ((void *) node_block(mdesc)) + mdesc->node_sz; } -static inline void *data_block(struct mdesc_hdr *mdesc) +static void *data_block(struct mdesc_hdr *mdesc) { return ((void *) name_block(mdesc)) + mdesc->name_sz; } -/* In order to avoid recursion (the graph can be very deep) we use a - * two pass algorithm. First we allocate all the nodes and hash them. - * Then we iterate over each node, filling in the arcs and properties. - */ -static void __init build_all_nodes(struct mdesc_hdr *mdesc) +u64 mdesc_node_by_name(struct mdesc_handle *hp, + u64 from_node, const char *name) { - struct mdesc_elem *start, *ep; - struct mdesc_node *mp; - const char *names; - void *data; - u64 last_node; - - start = ep = node_block(mdesc); - last_node = mdesc->node_sz / 16; + struct mdesc_elem *ep = node_block(&hp->mdesc); + const char *names = name_block(&hp->mdesc); + u64 last_node = hp->mdesc.node_sz / 16; + u64 ret; + + if (from_node == MDESC_NODE_NULL) + from_node = 0; + + if (from_node >= last_node) + return MDESC_NODE_NULL; + + ret = ep[from_node].d.val; + while (ret < last_node) { + if (ep[ret].tag != MD_NODE) + return MDESC_NODE_NULL; + if (!strcmp(names + ep[ret].name_offset, name)) + break; + ret = ep[ret].d.val; + } + if (ret >= last_node) + ret = MDESC_NODE_NULL; + return ret; +} +EXPORT_SYMBOL(mdesc_node_by_name); - names = name_block(mdesc); +const void *mdesc_get_property(struct mdesc_handle *hp, u64 node, + const char *name, int *lenp) +{ + const char *names = name_block(&hp->mdesc); + u64 last_node = hp->mdesc.node_sz / 16; + void *data = data_block(&hp->mdesc); + struct mdesc_elem *ep; - while (1) { - u64 node = ep - start; + if (node == MDESC_NODE_NULL || node >= last_node) + return NULL; - if (ep->tag == MD_LIST_END) + ep = node_block(&hp->mdesc) + node; + ep++; + for (; ep->tag != MD_NODE_END; ep++) { + void *val = NULL; + int len = 0; + + switch (ep->tag) { + case MD_PROP_VAL: + val = &ep->d.val; + len = 8; break; - if (ep->tag != MD_NODE) { - prom_printf("MDESC: Inconsistent element list.\n"); - prom_halt(); - } - - mdesc_node_alloc(node, ep, names); + case MD_PROP_STR: + case MD_PROP_DATA: + val = data + ep->d.data.data_offset; + len = ep->d.data.data_len; + break; - if (ep->d.val >= last_node) { - printk("MDESC: Warning, early break out of node scan.\n"); - printk("MDESC: Next node [%lu] last_node [%lu].\n", - node, last_node); + default: break; } + if (!val) + continue; - ep = start + ep->d.val; + if (!strcmp(names + ep->name_offset, name)) { + if (lenp) + *lenp = len; + return val; + } } - data = data_block(mdesc); - for (mp = allnodes; mp; mp = mp->allnodes_next) { - struct mdesc_elem *ep = start + mp->node; - struct property **link = &mp->properties; - unsigned int this_arc = 0; - - ep++; - while (ep->tag != MD_NODE_END) { - switch (ep->tag) { - case MD_PROP_ARC: { - struct mdesc_node *target; - - if (this_arc >= mp->num_arcs) { - prom_printf("MDESC: ARC overrun [%u:%u]\n", - this_arc, mp->num_arcs); - prom_halt(); - } - target = find_node(ep->d.val); - if (!target) { - printk("MDESC: Warning, arc points to " - "missing node, ignoring.\n"); - break; - } - mp->arcs[this_arc].name = - (names + ep->name_offset); - mp->arcs[this_arc].arc = target; - this_arc++; - break; - } + return NULL; +} +EXPORT_SYMBOL(mdesc_get_property); - case MD_PROP_VAL: - case MD_PROP_STR: - case MD_PROP_DATA: { - struct property *p = mdesc_early_alloc(sizeof(*p)); - - p->unique_id = unique_id++; - p->name = (char *) names + ep->name_offset; - if (ep->tag == MD_PROP_VAL) { - p->value = &ep->d.val; - p->length = 8; - } else { - p->value = data + ep->d.data.data_offset; - p->length = ep->d.data.data_len; - } - *link = p; - link = &p->next; - break; - } +u64 mdesc_next_arc(struct mdesc_handle *hp, u64 from, const char *arc_type) +{ + struct mdesc_elem *ep, *base = node_block(&hp->mdesc); + const char *names = name_block(&hp->mdesc); + u64 last_node = hp->mdesc.node_sz / 16; - case MD_NOOP: - break; + if (from == MDESC_NODE_NULL || from >= last_node) + return MDESC_NODE_NULL; - default: - printk("MDESC: Warning, ignoring unknown tag type %02x\n", - ep->tag); - } - ep++; - } + ep = base + from; + + ep++; + for (; ep->tag != MD_NODE_END; ep++) { + if (ep->tag != MD_PROP_ARC) + continue; + + if (strcmp(names + ep->name_offset, arc_type)) + continue; + + return ep - base; } + + return MDESC_NODE_NULL; } +EXPORT_SYMBOL(mdesc_next_arc); -static unsigned int __init count_nodes(struct mdesc_hdr *mdesc) +u64 mdesc_arc_target(struct mdesc_handle *hp, u64 arc) { - struct mdesc_elem *ep = node_block(mdesc); - struct mdesc_elem *end; - unsigned int cnt = 0; - - end = ((void *)ep) + mdesc->node_sz; - while (ep < end) { - if (ep->tag == MD_NODE) - cnt++; - ep++; - } - return cnt; + struct mdesc_elem *ep, *base = node_block(&hp->mdesc); + + ep = base + arc; + + return ep->d.val; } +EXPORT_SYMBOL(mdesc_arc_target); + +const char *mdesc_node_name(struct mdesc_handle *hp, u64 node) +{ + struct mdesc_elem *ep, *base = node_block(&hp->mdesc); + const char *names = name_block(&hp->mdesc); + u64 last_node = hp->mdesc.node_sz / 16; + + if (node == MDESC_NODE_NULL || node >= last_node) + return NULL; + + ep = base + node; + if (ep->tag != MD_NODE) + return NULL; + + return names + ep->name_offset; +} +EXPORT_SYMBOL(mdesc_node_name); static void __init report_platform_properties(void) { - struct mdesc_node *pn = md_find_node_by_name(NULL, "platform"); + struct mdesc_handle *hp = mdesc_grab(); + u64 pn = mdesc_node_by_name(hp, MDESC_NODE_NULL, "platform"); const char *s; const u64 *v; - if (!pn) { + if (pn == MDESC_NODE_NULL) { prom_printf("No platform node in machine-description.\n"); prom_halt(); } - s = md_get_property(pn, "banner-name", NULL); + s = mdesc_get_property(hp, pn, "banner-name", NULL); printk("PLATFORM: banner-name [%s]\n", s); - s = md_get_property(pn, "name", NULL); + s = mdesc_get_property(hp, pn, "name", NULL); printk("PLATFORM: name [%s]\n", s); - v = md_get_property(pn, "hostid", NULL); + v = mdesc_get_property(hp, pn, "hostid", NULL); if (v) printk("PLATFORM: hostid [%08lx]\n", *v); - v = md_get_property(pn, "serial#", NULL); + v = mdesc_get_property(hp, pn, "serial#", NULL); if (v) printk("PLATFORM: serial# [%08lx]\n", *v); - v = md_get_property(pn, "stick-frequency", NULL); + v = mdesc_get_property(hp, pn, "stick-frequency", NULL); printk("PLATFORM: stick-frequency [%08lx]\n", *v); - v = md_get_property(pn, "mac-address", NULL); + v = mdesc_get_property(hp, pn, "mac-address", NULL); if (v) printk("PLATFORM: mac-address [%lx]\n", *v); - v = md_get_property(pn, "watchdog-resolution", NULL); + v = mdesc_get_property(hp, pn, "watchdog-resolution", NULL); if (v) printk("PLATFORM: watchdog-resolution [%lu ms]\n", *v); - v = md_get_property(pn, "watchdog-max-timeout", NULL); + v = mdesc_get_property(hp, pn, "watchdog-max-timeout", NULL); if (v) printk("PLATFORM: watchdog-max-timeout [%lu ms]\n", *v); - v = md_get_property(pn, "max-cpus", NULL); + v = mdesc_get_property(hp, pn, "max-cpus", NULL); if (v) printk("PLATFORM: max-cpus [%lu]\n", *v); + + mdesc_release(hp); } static int inline find_in_proplist(const char *list, const char *match, int len) @@ -369,15 +451,17 @@ static int inline find_in_proplist(const char *list, const char *match, int len) return 0; } -static void __init fill_in_one_cache(cpuinfo_sparc *c, struct mdesc_node *mp) +static void __init fill_in_one_cache(cpuinfo_sparc *c, + struct mdesc_handle *hp, + u64 mp) { - const u64 *level = md_get_property(mp, "level", NULL); - const u64 *size = md_get_property(mp, "size", NULL); - const u64 *line_size = md_get_property(mp, "line-size", NULL); + const u64 *level = mdesc_get_property(hp, mp, "level", NULL); + const u64 *size = mdesc_get_property(hp, mp, "size", NULL); + const u64 *line_size = mdesc_get_property(hp, mp, "line-size", NULL); const char *type; int type_len; - type = md_get_property(mp, "type", &type_len); + type = mdesc_get_property(hp, mp, "type", &type_len); switch (*level) { case 1: @@ -400,48 +484,44 @@ static void __init fill_in_one_cache(cpuinfo_sparc *c, struct mdesc_node *mp) } if (*level == 1) { - unsigned int i; + u64 a; - for (i = 0; i < mp->num_arcs; i++) { - struct mdesc_node *t = mp->arcs[i].arc; + mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_FWD) { + u64 target = mdesc_arc_target(hp, a); + const char *name = mdesc_node_name(hp, target); - if (strcmp(mp->arcs[i].name, "fwd")) - continue; - - if (!strcmp(t->name, "cache")) - fill_in_one_cache(c, t); + if (!strcmp(name, "cache")) + fill_in_one_cache(c, hp, target); } } } -static void __init mark_core_ids(struct mdesc_node *mp, int core_id) +static void __init mark_core_ids(struct mdesc_handle *hp, u64 mp, int core_id) { - unsigned int i; + u64 a; - for (i = 0; i < mp->num_arcs; i++) { - struct mdesc_node *t = mp->arcs[i].arc; + mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_BACK) { + u64 t = mdesc_arc_target(hp, a); + const char *name; const u64 *id; - if (strcmp(mp->arcs[i].name, "back")) - continue; - - if (!strcmp(t->name, "cpu")) { - id = md_get_property(t, "id", NULL); + name = mdesc_node_name(hp, t); + if (!strcmp(name, "cpu")) { + id = mdesc_get_property(hp, t, "id", NULL); if (*id < NR_CPUS) cpu_data(*id).core_id = core_id; } else { - unsigned int j; + u64 j; - for (j = 0; j < t->num_arcs; j++) { - struct mdesc_node *n = t->arcs[j].arc; + mdesc_for_each_arc(j, hp, t, MDESC_ARC_TYPE_BACK) { + u64 n = mdesc_arc_target(hp, j); + const char *n_name; - if (strcmp(t->arcs[j].name, "back")) + n_name = mdesc_node_name(hp, n); + if (strcmp(n_name, "cpu")) continue; - if (strcmp(n->name, "cpu")) - continue; - - id = md_get_property(n, "id", NULL); + id = mdesc_get_property(hp, n, "id", NULL); if (*id < NR_CPUS) cpu_data(*id).core_id = core_id; } @@ -449,75 +529,76 @@ static void __init mark_core_ids(struct mdesc_node *mp, int core_id) } } -static void __init set_core_ids(void) +static void __init set_core_ids(struct mdesc_handle *hp) { - struct mdesc_node *mp; int idx; + u64 mp; idx = 1; - md_for_each_node_by_name(mp, "cache") { - const u64 *level = md_get_property(mp, "level", NULL); + mdesc_for_each_node_by_name(hp, mp, "cache") { + const u64 *level; const char *type; int len; + level = mdesc_get_property(hp, mp, "level", NULL); if (*level != 1) continue; - type = md_get_property(mp, "type", &len); + type = mdesc_get_property(hp, mp, "type", &len); if (!find_in_proplist(type, "instn", len)) continue; - mark_core_ids(mp, idx); + mark_core_ids(hp, mp, idx); idx++; } } -static void __init mark_proc_ids(struct mdesc_node *mp, int proc_id) +static void __init mark_proc_ids(struct mdesc_handle *hp, u64 mp, int proc_id) { - int i; + u64 a; - for (i = 0; i < mp->num_arcs; i++) { - struct mdesc_node *t = mp->arcs[i].arc; + mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_BACK) { + u64 t = mdesc_arc_target(hp, a); + const char *name; const u64 *id; - if (strcmp(mp->arcs[i].name, "back")) - continue; - - if (strcmp(t->name, "cpu")) + name = mdesc_node_name(hp, t); + if (strcmp(name, "cpu")) continue; - id = md_get_property(t, "id", NULL); + id = mdesc_get_property(hp, t, "id", NULL); if (*id < NR_CPUS) cpu_data(*id).proc_id = proc_id; } } -static void __init __set_proc_ids(const char *exec_unit_name) +static void __init __set_proc_ids(struct mdesc_handle *hp, + const char *exec_unit_name) { - struct mdesc_node *mp; int idx; + u64 mp; idx = 0; - md_for_each_node_by_name(mp, exec_unit_name) { + mdesc_for_each_node_by_name(hp, mp, exec_unit_name) { const char *type; int len; - type = md_get_property(mp, "type", &len); + type = mdesc_get_property(hp, mp, "type", &len); if (!find_in_proplist(type, "int", len) && !find_in_proplist(type, "integer", len)) continue; - mark_proc_ids(mp, idx); + mark_proc_ids(hp, mp, idx); idx++; } } -static void __init set_proc_ids(void) +static void __init set_proc_ids(struct mdesc_handle *hp) { - __set_proc_ids("exec_unit"); - __set_proc_ids("exec-unit"); + __set_proc_ids(hp, "exec_unit"); + __set_proc_ids(hp, "exec-unit"); } static void __init get_one_mondo_bits(const u64 *p, unsigned int *mask, unsigned char def) @@ -538,35 +619,37 @@ use_default: *mask = ((1U << def) * 64U) - 1U; } -static void __init get_mondo_data(struct mdesc_node *mp, struct trap_per_cpu *tb) +static void __init get_mondo_data(struct mdesc_handle *hp, u64 mp, + struct trap_per_cpu *tb) { const u64 *val; - val = md_get_property(mp, "q-cpu-mondo-#bits", NULL); + val = mdesc_get_property(hp, mp, "q-cpu-mondo-#bits", NULL); get_one_mondo_bits(val, &tb->cpu_mondo_qmask, 7); - val = md_get_property(mp, "q-dev-mondo-#bits", NULL); + val = mdesc_get_property(hp, mp, "q-dev-mondo-#bits", NULL); get_one_mondo_bits(val, &tb->dev_mondo_qmask, 7); - val = md_get_property(mp, "q-resumable-#bits", NULL); + val = mdesc_get_property(hp, mp, "q-resumable-#bits", NULL); get_one_mondo_bits(val, &tb->resum_qmask, 6); - val = md_get_property(mp, "q-nonresumable-#bits", NULL); + val = mdesc_get_property(hp, mp, "q-nonresumable-#bits", NULL); get_one_mondo_bits(val, &tb->nonresum_qmask, 2); } static void __init mdesc_fill_in_cpu_data(void) { - struct mdesc_node *mp; + struct mdesc_handle *hp = mdesc_grab(); + u64 mp; ncpus_probed = 0; - md_for_each_node_by_name(mp, "cpu") { - const u64 *id = md_get_property(mp, "id", NULL); - const u64 *cfreq = md_get_property(mp, "clock-frequency", NULL); + mdesc_for_each_node_by_name(hp, mp, "cpu") { + const u64 *id = mdesc_get_property(hp, mp, "id", NULL); + const u64 *cfreq = mdesc_get_property(hp, mp, "clock-frequency", NULL); struct trap_per_cpu *tb; cpuinfo_sparc *c; - unsigned int i; int cpuid; + u64 a; ncpus_probed++; @@ -589,29 +672,25 @@ static void __init mdesc_fill_in_cpu_data(void) c->clock_tick = *cfreq; tb = &trap_block[cpuid]; - get_mondo_data(mp, tb); - - for (i = 0; i < mp->num_arcs; i++) { - struct mdesc_node *t = mp->arcs[i].arc; - unsigned int j; + get_mondo_data(hp, mp, tb); - if (strcmp(mp->arcs[i].name, "fwd")) - continue; + mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_FWD) { + u64 j, t = mdesc_arc_target(hp, a); + const char *t_name; - if (!strcmp(t->name, "cache")) { - fill_in_one_cache(c, t); + t_name = mdesc_node_name(hp, t); + if (!strcmp(t_name, "cache")) { + fill_in_one_cache(c, hp, t); continue; } - for (j = 0; j < t->num_arcs; j++) { - struct mdesc_node *n; + mdesc_for_each_arc(j, hp, t, MDESC_ARC_TYPE_FWD) { + u64 n = mdesc_arc_target(hp, j); + const char *n_name; - n = t->arcs[j].arc; - if (strcmp(t->arcs[j].name, "fwd")) - continue; - - if (!strcmp(n->name, "cache")) - fill_in_one_cache(c, n); + n_name = mdesc_node_name(hp, n); + if (!strcmp(n_name, "cache")) + fill_in_one_cache(c, hp, n); } } @@ -628,44 +707,39 @@ static void __init mdesc_fill_in_cpu_data(void) sparc64_multi_core = 1; #endif - set_core_ids(); - set_proc_ids(); + set_core_ids(hp); + set_proc_ids(hp); smp_fill_in_sib_core_maps(); + + mdesc_release(hp); } void __init sun4v_mdesc_init(void) { + struct mdesc_handle *hp; unsigned long len, real_len, status; (void) sun4v_mach_desc(0UL, 0UL, &len); printk("MDESC: Size is %lu bytes.\n", len); - main_mdesc = mdesc_early_alloc(len); + hp = mdesc_alloc(len, &bootmem_mdesc_memops); + if (hp == NULL) { + prom_printf("MDESC: alloc of %lu bytes failed.\n", len); + prom_halt(); + } - status = sun4v_mach_desc(__pa(main_mdesc), len, &real_len); + status = sun4v_mach_desc(__pa(&hp->mdesc), len, &real_len); if (status != HV_EOK || real_len > len) { prom_printf("sun4v_mach_desc fails, err(%lu), " "len(%lu), real_len(%lu)\n", status, len, real_len); + mdesc_free(hp); prom_halt(); } - len = count_nodes(main_mdesc); - printk("MDESC: %lu nodes.\n", len); - - len = roundup_pow_of_two(len); - - mdesc_hash = mdesc_early_alloc(len * sizeof(struct mdesc_node *)); - mdesc_hash_size = len; - - printk("MDESC: Hash size %lu entries.\n", len); - - build_all_nodes(main_mdesc); - - printk("MDESC: Built graph with %u bytes of memory.\n", - mdesc_early_allocated); + cur_mdesc = hp; report_platform_properties(); mdesc_fill_in_cpu_data(); diff --git a/arch/sparc64/kernel/vio.c b/arch/sparc64/kernel/vio.c index 7eccc91cd59..64f082555bc 100644 --- a/arch/sparc64/kernel/vio.c +++ b/arch/sparc64/kernel/vio.c @@ -147,30 +147,6 @@ void vio_unregister_driver(struct vio_driver *viodrv) } EXPORT_SYMBOL(vio_unregister_driver); -struct mdesc_node *vio_find_endpoint(struct vio_dev *vdev) -{ - struct mdesc_node *endp, *mp = vdev->mp; - int i; - - endp = NULL; - for (i = 0; i < mp->num_arcs; i++) { - struct mdesc_node *t; - - if (strcmp(mp->arcs[i].name, "fwd")) - continue; - - t = mp->arcs[i].arc; - if (strcmp(t->name, "channel-endpoint")) - continue; - - endp = t; - break; - } - - return endp; -} -EXPORT_SYMBOL(vio_find_endpoint); - static void __devinit vio_dev_release(struct device *dev) { kfree(to_vio_dev(dev)); @@ -197,22 +173,47 @@ struct device_node *cdev_node; static struct vio_dev *root_vdev; static u64 cdev_cfg_handle; -static struct vio_dev *vio_create_one(struct mdesc_node *mp, +static void vio_fill_channel_info(struct mdesc_handle *hp, u64 mp, + struct vio_dev *vdev) +{ + u64 a; + + mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_FWD) { + const u64 *chan_id; + const u64 *irq; + u64 target; + + target = mdesc_arc_target(hp, a); + + irq = mdesc_get_property(hp, target, "tx-ino", NULL); + if (irq) + vdev->tx_irq = sun4v_build_virq(cdev_cfg_handle, *irq); + + irq = mdesc_get_property(hp, target, "rx-ino", NULL); + if (irq) + vdev->rx_irq = sun4v_build_virq(cdev_cfg_handle, *irq); + + chan_id = mdesc_get_property(hp, target, "id", NULL); + if (chan_id) + vdev->channel_id = *chan_id; + } +} + +static struct vio_dev *vio_create_one(struct mdesc_handle *hp, u64 mp, struct device *parent) { const char *type, *compat; struct device_node *dp; struct vio_dev *vdev; - const u64 *irq; int err, clen; - type = md_get_property(mp, "device-type", NULL); + type = mdesc_get_property(hp, mp, "device-type", NULL); if (!type) { - type = md_get_property(mp, "name", NULL); + type = mdesc_get_property(hp, mp, "name", NULL); if (!type) - type = mp->name; + type = mdesc_node_name(hp, mp); } - compat = md_get_property(mp, "device-type", &clen); + compat = mdesc_get_property(hp, mp, "device-type", &clen); vdev = kzalloc(sizeof(*vdev), GFP_KERNEL); if (!vdev) { @@ -225,15 +226,13 @@ static struct vio_dev *vio_create_one(struct mdesc_node *mp, vdev->compat = compat; vdev->compat_len = clen; - irq = md_get_property(mp, "tx-ino", NULL); - if (irq) - mp->irqs[0] = sun4v_build_virq(cdev_cfg_handle, *irq); + vdev->channel_id = ~0UL; + vdev->tx_irq = ~0; + vdev->rx_irq = ~0; - irq = md_get_property(mp, "rx-ino", NULL); - if (irq) - mp->irqs[1] = sun4v_build_virq(cdev_cfg_handle, *irq); + vio_fill_channel_info(hp, mp, vdev); - snprintf(vdev->dev.bus_id, BUS_ID_SIZE, "%lx", mp->node); + snprintf(vdev->dev.bus_id, BUS_ID_SIZE, "%lx", mp); vdev->dev.parent = parent; vdev->dev.bus = &vio_bus_type; vdev->dev.release = vio_dev_release; @@ -267,46 +266,43 @@ static struct vio_dev *vio_create_one(struct mdesc_node *mp, return vdev; } -static void walk_tree(struct mdesc_node *n, struct vio_dev *parent) +static void walk_tree(struct mdesc_handle *hp, u64 n, struct vio_dev *parent) { - int i; + u64 a; - for (i = 0; i < n->num_arcs; i++) { - struct mdesc_node *mp; + mdesc_for_each_arc(a, hp, n, MDESC_ARC_TYPE_FWD) { struct vio_dev *vdev; + u64 target; - if (strcmp(n->arcs[i].name, "fwd")) - continue; - - mp = n->arcs[i].arc; - - vdev = vio_create_one(mp, &parent->dev); - if (vdev && mp->num_arcs) - walk_tree(mp, vdev); + target = mdesc_arc_target(hp, a); + vdev = vio_create_one(hp, target, &parent->dev); + if (vdev) + walk_tree(hp, target, vdev); } } -static void create_devices(struct mdesc_node *root) +static void create_devices(struct mdesc_handle *hp, u64 root) { - struct mdesc_node *mp; + u64 mp; - root_vdev = vio_create_one(root, NULL); + root_vdev = vio_create_one(hp, root, NULL); if (!root_vdev) { printk(KERN_ERR "VIO: Coult not create root device.\n"); return; } - walk_tree(root, root_vdev); + walk_tree(hp, root, root_vdev); /* Domain services is odd as it doesn't sit underneath the * channel-devices node, so we plug it in manually. */ - mp = md_find_node_by_name(NULL, "domain-services"); - if (mp) { - struct vio_dev *parent = vio_create_one(mp, &root_vdev->dev); + mp = mdesc_node_by_name(hp, MDESC_NODE_NULL, "domain-services"); + if (mp != MDESC_NODE_NULL) { + struct vio_dev *parent = vio_create_one(hp, mp, + &root_vdev->dev); if (parent) - walk_tree(mp, parent); + walk_tree(hp, mp, parent); } } @@ -316,40 +312,47 @@ const char *cfg_handle_prop = "cfg-handle"; static int __init vio_init(void) { - struct mdesc_node *root; + struct mdesc_handle *hp; const char *compat; const u64 *cfg_handle; int err, len; + u64 root; + + hp = mdesc_grab(); + if (!hp) + return 0; - root = md_find_node_by_name(NULL, channel_devices_node); - if (!root) { + root = mdesc_node_by_name(hp, MDESC_NODE_NULL, channel_devices_node); + if (root == MDESC_NODE_NULL) { printk(KERN_INFO "VIO: No channel-devices MDESC node.\n"); + mdesc_release(hp); return 0; } cdev_node = of_find_node_by_name(NULL, "channel-devices"); + err = -ENODEV; if (!cdev_node) { printk(KERN_INFO "VIO: No channel-devices OBP node.\n"); - return -ENODEV; + goto out_release; } - compat = md_get_property(root, "compatible", &len); + compat = mdesc_get_property(hp, root, "compatible", &len); if (!compat) { printk(KERN_ERR "VIO: Channel devices lacks compatible " "property\n"); - return -ENODEV; + goto out_release; } if (!find_in_proplist(compat, channel_devices_compat, len)) { printk(KERN_ERR "VIO: Channel devices node lacks (%s) " "compat entry.\n", channel_devices_compat); - return -ENODEV; + goto out_release; } - cfg_handle = md_get_property(root, cfg_handle_prop, NULL); + cfg_handle = mdesc_get_property(hp, root, cfg_handle_prop, NULL); if (!cfg_handle) { printk(KERN_ERR "VIO: Channel devices lacks %s property\n", cfg_handle_prop); - return -ENODEV; + goto out_release; } cdev_cfg_handle = *cfg_handle; @@ -361,9 +364,15 @@ static int __init vio_init(void) return err; } - create_devices(root); + create_devices(hp, root); + + mdesc_release(hp); return 0; + +out_release: + mdesc_release(hp); + return err; } postcore_initcall(vio_init); diff --git a/arch/sparc64/kernel/viohs.c b/arch/sparc64/kernel/viohs.c index b0b1b877934..15613add45d 100644 --- a/arch/sparc64/kernel/viohs.c +++ b/arch/sparc64/kernel/viohs.c @@ -136,7 +136,7 @@ static int process_unknown(struct vio_driver_state *vio, void *arg) pkt->type, pkt->stype, pkt->stype_env, pkt->sid); printk(KERN_ERR "vio: ID[%lu] Resetting connection.\n", - vio->channel_id); + vio->vdev->channel_id); ldc_disconnect(vio->lp); @@ -678,21 +678,11 @@ extern int vio_ldc_alloc(struct vio_driver_state *vio, { struct ldc_channel_config cfg = *base_cfg; struct ldc_channel *lp; - const u64 *id; - id = md_get_property(vio->endpoint, "id", NULL); - if (!id) { - printk(KERN_ERR "%s: Channel lacks id property.\n", - vio->name); - return -ENODEV; - } - - vio->channel_id = *id; - - cfg.rx_irq = vio->rx_irq; - cfg.tx_irq = vio->tx_irq; + cfg.tx_irq = vio->vdev->tx_irq; + cfg.rx_irq = vio->vdev->rx_irq; - lp = ldc_alloc(vio->channel_id, &cfg, event_arg); + lp = ldc_alloc(vio->vdev->channel_id, &cfg, event_arg); if (IS_ERR(lp)) return PTR_ERR(lp); @@ -728,7 +718,7 @@ void vio_port_up(struct vio_driver_state *vio) if (err) printk(KERN_WARNING "%s: Port %lu bind failed, " "err=%d\n", - vio->name, vio->channel_id, err); + vio->name, vio->vdev->channel_id, err); } if (!err) { @@ -736,7 +726,7 @@ void vio_port_up(struct vio_driver_state *vio) if (err) printk(KERN_WARNING "%s: Port %lu connect failed, " "err=%d\n", - vio->name, vio->channel_id, err); + vio->name, vio->vdev->channel_id, err); } if (err) { unsigned long expires = jiffies + HZ; @@ -757,9 +747,9 @@ static void vio_port_timer(unsigned long _arg) } int vio_driver_init(struct vio_driver_state *vio, struct vio_dev *vdev, - u8 dev_class, struct mdesc_node *channel_endpoint, - struct vio_version *ver_table, int ver_table_size, - struct vio_driver_ops *ops, char *name) + u8 dev_class, struct vio_version *ver_table, + int ver_table_size, struct vio_driver_ops *ops, + char *name) { switch (dev_class) { case VDEV_NETWORK: @@ -777,9 +767,6 @@ int vio_driver_init(struct vio_driver_state *vio, struct vio_dev *vdev, !ops->handshake_complete) return -EINVAL; - if (!channel_endpoint) - return -EINVAL; - if (!ver_table || ver_table_size < 0) return -EINVAL; @@ -793,10 +780,6 @@ int vio_driver_init(struct vio_driver_state *vio, struct vio_dev *vdev, vio->dev_class = dev_class; vio->vdev = vdev; - vio->endpoint = channel_endpoint; - vio->tx_irq = channel_endpoint->irqs[0]; - vio->rx_irq = channel_endpoint->irqs[1]; - vio->ver_table = ver_table; vio->ver_table_entries = ver_table_size; diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c index 8dbbeace52a..0f5e3caf85d 100644 --- a/drivers/block/sunvdc.c +++ b/drivers/block/sunvdc.c @@ -750,7 +750,7 @@ static struct vio_driver_ops vdc_vio_ops = { static int __devinit vdc_port_probe(struct vio_dev *vdev, const struct vio_device_id *id) { - struct mdesc_node *endp; + struct mdesc_handle *hp; struct vdc_port *port; unsigned long flags; struct vdc *vp; @@ -763,26 +763,24 @@ static int __devinit vdc_port_probe(struct vio_dev *vdev, return -ENODEV; } - endp = vio_find_endpoint(vdev); - if (!endp) { - printk(KERN_ERR PFX "Port lacks channel-endpoint.\n"); - return -ENODEV; - } + hp = mdesc_grab(); - port_id = md_get_property(vdev->mp, "id", NULL); + port_id = mdesc_get_property(hp, vdev->mp, "id", NULL); + err = -ENODEV; if (!port_id) { printk(KERN_ERR PFX "Port lacks id property.\n"); - return -ENODEV; + goto err_out_release_mdesc; } if ((*port_id << PARTITION_SHIFT) & ~(u64)MINORMASK) { printk(KERN_ERR PFX "Port id [%lu] too large.\n", *port_id); - return -ENODEV; + goto err_out_release_mdesc; } port = kzalloc(sizeof(*port), GFP_KERNEL); + err = -ENOMEM; if (!port) { printk(KERN_ERR PFX "Cannot allocate vdc_port.\n"); - return -ENOMEM; + goto err_out_release_mdesc; } port->vp = vp; @@ -797,7 +795,7 @@ static int __devinit vdc_port_probe(struct vio_dev *vdev, snprintf(port->disk_name, sizeof(port->disk_name), VDCBLK_NAME "%c", 'a' + (port->dev_no % 26)); - err = vio_driver_init(&port->vio, vdev, VDEV_DISK, endp, + err = vio_driver_init(&port->vio, vdev, VDEV_DISK, vdc_versions, ARRAY_SIZE(vdc_versions), &vdc_vio_ops, port->disk_name); if (err) @@ -828,6 +826,8 @@ static int __devinit vdc_port_probe(struct vio_dev *vdev, dev_set_drvdata(&vdev->dev, port); + mdesc_release(hp); + return 0; err_out_free_tx_ring: @@ -839,6 +839,8 @@ err_out_free_ldc: err_out_free_port: kfree(port); +err_out_release_mdesc: + mdesc_release(hp); return err; } diff --git a/drivers/net/sunvnet.c b/drivers/net/sunvnet.c index d764e4ccba5..8a667c13fae 100644 --- a/drivers/net/sunvnet.c +++ b/drivers/net/sunvnet.c @@ -892,7 +892,7 @@ const char *remote_macaddr_prop = "remote-mac-address"; static int __devinit vnet_port_probe(struct vio_dev *vdev, const struct vio_device_id *id) { - struct mdesc_node *endp; + struct mdesc_handle *hp; struct vnet_port *port; unsigned long flags; struct vnet *vp; @@ -905,23 +905,21 @@ static int __devinit vnet_port_probe(struct vio_dev *vdev, return -ENODEV; } - rmac = md_get_property(vdev->mp, remote_macaddr_prop, &len); + hp = mdesc_grab(); + + rmac = mdesc_get_property(hp, vdev->mp, remote_macaddr_prop, &len); + err = -ENODEV; if (!rmac) { printk(KERN_ERR PFX "Port lacks %s property.\n", remote_macaddr_prop); - return -ENODEV; - } - - endp = vio_find_endpoint(vdev); - if (!endp) { - printk(KERN_ERR PFX "Port lacks channel-endpoint.\n"); - return -ENODEV; + goto err_out_put_mdesc; } port = kzalloc(sizeof(*port), GFP_KERNEL); + err = -ENOMEM; if (!port) { printk(KERN_ERR PFX "Cannot allocate vnet_port.\n"); - return -ENOMEM; + goto err_out_put_mdesc; } for (i = 0; i < ETH_ALEN; i++) @@ -929,7 +927,7 @@ static int __devinit vnet_port_probe(struct vio_dev *vdev, port->vp = vp; - err = vio_driver_init(&port->vio, vdev, VDEV_NETWORK, endp, + err = vio_driver_init(&port->vio, vdev, VDEV_NETWORK, vnet_versions, ARRAY_SIZE(vnet_versions), &vnet_vio_ops, vp->dev->name); if (err) @@ -947,7 +945,7 @@ static int __devinit vnet_port_probe(struct vio_dev *vdev, INIT_LIST_HEAD(&port->list); switch_port = 0; - if (md_get_property(vdev->mp, "switch-port", NULL) != NULL) + if (mdesc_get_property(hp, vdev->mp, "switch-port", NULL) != NULL) switch_port = 1; spin_lock_irqsave(&vp->lock, flags); @@ -969,6 +967,8 @@ static int __devinit vnet_port_probe(struct vio_dev *vdev, vio_port_up(&port->vio); + mdesc_release(hp); + return 0; err_out_free_ldc: @@ -977,6 +977,8 @@ err_out_free_ldc: err_out_free_port: kfree(port); +err_out_put_mdesc: + mdesc_release(hp); return err; } @@ -1029,6 +1031,7 @@ static int __devinit vnet_probe(struct vio_dev *vdev, const struct vio_device_id *id) { static int vnet_version_printed; + struct mdesc_handle *hp; struct net_device *dev; struct vnet *vp; const u64 *mac; @@ -1037,7 +1040,9 @@ static int __devinit vnet_probe(struct vio_dev *vdev, if (vnet_version_printed++ == 0) printk(KERN_INFO "%s", version); - mac = md_get_property(vdev->mp, local_mac_prop, &len); + hp = mdesc_grab(); + + mac = mdesc_get_property(hp, vdev->mp, local_mac_prop, &len); if (!mac) { printk(KERN_ERR PFX "vnet lacks %s property.\n", local_mac_prop); @@ -1093,12 +1098,15 @@ static int __devinit vnet_probe(struct vio_dev *vdev, dev_set_drvdata(&vdev->dev, vp); + mdesc_release(hp); + return 0; err_out_free_dev: free_netdev(dev); err_out: + mdesc_release(hp); return err; } diff --git a/include/asm-sparc64/mdesc.h b/include/asm-sparc64/mdesc.h index c6383982b53..bbb0c0bed48 100644 --- a/include/asm-sparc64/mdesc.h +++ b/include/asm-sparc64/mdesc.h @@ -4,36 +4,43 @@ #include #include -struct mdesc_node; -struct mdesc_arc { - const char *name; - struct mdesc_node *arc; -}; - -struct mdesc_node { - const char *name; - u64 node; - unsigned int unique_id; - unsigned int num_arcs; - unsigned int irqs[2]; - struct property *properties; - struct mdesc_node *hash_next; - struct mdesc_node *allnodes_next; - struct mdesc_arc arcs[0]; -}; - -extern struct mdesc_node *md_find_node_by_name(struct mdesc_node *from, - const char *name); -#define md_for_each_node_by_name(__mn, __name) \ - for (__mn = md_find_node_by_name(NULL, __name); __mn; \ - __mn = md_find_node_by_name(__mn, __name)) - -extern struct property *md_find_property(const struct mdesc_node *mp, - const char *name, - int *lenp); -extern const void *md_get_property(const struct mdesc_node *mp, - const char *name, - int *lenp); +struct mdesc_handle; + +/* Machine description operations are to be surrounded by grab and + * release calls. The mdesc_handle returned from the grab is + * the first argument to all of the operational calls that work + * on mdescs. + */ +extern struct mdesc_handle *mdesc_grab(void); +extern void mdesc_release(struct mdesc_handle *); + +#define MDESC_NODE_NULL (~(u64)0) + +extern u64 mdesc_node_by_name(struct mdesc_handle *handle, + u64 from_node, const char *name); +#define mdesc_for_each_node_by_name(__hdl, __node, __name) \ + for (__node = mdesc_node_by_name(__hdl, MDESC_NODE_NULL, __name); \ + (__node) != MDESC_NODE_NULL; \ + __node = mdesc_node_by_name(__hdl, __node, __name)) + +extern const void *mdesc_get_property(struct mdesc_handle *handle, + u64 node, const char *name, int *lenp); + +#define MDESC_ARC_TYPE_FWD "fwd" +#define MDESC_ARC_TYPE_BACK "back" + +extern u64 mdesc_next_arc(struct mdesc_handle *handle, u64 from, + const char *arc_type); +#define mdesc_for_each_arc(__arc, __hdl, __node, __type) \ + for (__arc = mdesc_next_arc(__hdl, __node, __type); \ + (__arc) != MDESC_NODE_NULL; \ + __arc = mdesc_next_arc(__hdl, __arc, __type)) + +extern u64 mdesc_arc_target(struct mdesc_handle *hp, u64 arc); + +extern const char *mdesc_node_name(struct mdesc_handle *hp, u64 node); + +extern void mdesc_update(void); extern void sun4v_mdesc_init(void); diff --git a/include/asm-sparc64/vio.h b/include/asm-sparc64/vio.h index 47c3da76dcb..a8a53e6fc25 100644 --- a/include/asm-sparc64/vio.h +++ b/include/asm-sparc64/vio.h @@ -265,13 +265,18 @@ static inline u32 vio_dring_avail(struct vio_dring_state *dr, } struct vio_dev { - struct mdesc_node *mp; + u64 mp; struct device_node *dp; const char *type; const char *compat; int compat_len; + unsigned long channel_id; + + unsigned int tx_irq; + unsigned int rx_irq; + struct device dev; }; @@ -345,16 +350,10 @@ struct vio_driver_state { struct vio_dev *vdev; - unsigned long channel_id; - unsigned int tx_irq; - unsigned int rx_irq; - struct timer_list timer; struct vio_version ver; - struct mdesc_node *endpoint; - struct vio_version *ver_table; int ver_table_entries; @@ -365,7 +364,8 @@ struct vio_driver_state { #define viodbg(TYPE, f, a...) \ do { if (vio->debug & VIO_DEBUG_##TYPE) \ - printk(KERN_INFO "vio: ID[%lu] " f, vio->channel_id, ## a); \ + printk(KERN_INFO "vio: ID[%lu] " f, \ + vio->vdev->channel_id, ## a); \ } while (0) extern int vio_register_driver(struct vio_driver *drv); @@ -392,11 +392,10 @@ extern int vio_ldc_alloc(struct vio_driver_state *vio, struct ldc_channel_config *base_cfg, void *event_arg); extern void vio_ldc_free(struct vio_driver_state *vio); extern int vio_driver_init(struct vio_driver_state *vio, struct vio_dev *vdev, - u8 dev_class, struct mdesc_node *channel_endpoint, - struct vio_version *ver_table, int ver_table_size, - struct vio_driver_ops *ops, char *name); + u8 dev_class, struct vio_version *ver_table, + int ver_table_size, struct vio_driver_ops *ops, + char *name); -extern struct mdesc_node *vio_find_endpoint(struct vio_dev *vdev); extern void vio_port_up(struct vio_driver_state *vio); #endif /* _SPARC64_VIO_H */ -- cgit v1.2.3-70-g09d2 From 83292e0a9c3f1c326b28fbf8cb70a8ce81a98163 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 12 Jul 2007 14:16:22 -0700 Subject: [SPARC64]: Fix MD property lifetime bugs. Property values cannot be referenced outside of mdesc_grab()/mdesc_release() pairs. The only major offender was the VIO bus layer, easily fixed. Add some commentary to mdesc.h describing these rules. Signed-off-by: David S. Miller --- arch/sparc64/kernel/vio.c | 39 ++++++++++++++++++++++++++++----------- include/asm-sparc64/mdesc.h | 22 ++++++++++++++++++++-- include/asm-sparc64/vio.h | 7 +++++-- 3 files changed, 53 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/arch/sparc64/kernel/vio.c b/arch/sparc64/kernel/vio.c index 64f082555bc..8b269aabff0 100644 --- a/arch/sparc64/kernel/vio.c +++ b/arch/sparc64/kernel/vio.c @@ -44,12 +44,11 @@ static const struct vio_device_id *vio_match_device( while (matches->type[0] || matches->compat[0]) { int match = 1; - if (matches->type[0]) { - match &= type - && !strcmp(matches->type, type); - } + if (matches->type[0]) + match &= !strcmp(matches->type, type); + if (matches->compat[0]) { - match &= compat && + match &= len && find_in_proplist(compat, matches->compat, len); } if (match) @@ -205,15 +204,30 @@ static struct vio_dev *vio_create_one(struct mdesc_handle *hp, u64 mp, const char *type, *compat; struct device_node *dp; struct vio_dev *vdev; - int err, clen; + int err, tlen, clen; - type = mdesc_get_property(hp, mp, "device-type", NULL); + type = mdesc_get_property(hp, mp, "device-type", &tlen); if (!type) { - type = mdesc_get_property(hp, mp, "name", NULL); - if (!type) + type = mdesc_get_property(hp, mp, "name", &tlen); + if (!type) { type = mdesc_node_name(hp, mp); + tlen = strlen(type) + 1; + } + } + if (tlen > VIO_MAX_TYPE_LEN) { + printk(KERN_ERR "VIO: Type string [%s] is too long.\n", + type); + return NULL; } + compat = mdesc_get_property(hp, mp, "device-type", &clen); + if (!compat) { + clen = 0; + } else if (clen > VIO_MAX_COMPAT_LEN) { + printk(KERN_ERR "VIO: Compat len %d for [%s] is too long.\n", + clen, type); + return NULL; + } vdev = kzalloc(sizeof(*vdev), GFP_KERNEL); if (!vdev) { @@ -222,8 +236,11 @@ static struct vio_dev *vio_create_one(struct mdesc_handle *hp, u64 mp, } vdev->mp = mp; - vdev->type = type; - vdev->compat = compat; + memcpy(vdev->type, type, tlen); + if (compat) + memcpy(vdev->compat, compat, clen); + else + memset(vdev->compat, 0, sizeof(vdev->compat)); vdev->compat_len = clen; vdev->channel_id = ~0UL; diff --git a/include/asm-sparc64/mdesc.h b/include/asm-sparc64/mdesc.h index bbb0c0bed48..dc372df23fb 100644 --- a/include/asm-sparc64/mdesc.h +++ b/include/asm-sparc64/mdesc.h @@ -23,8 +23,28 @@ extern u64 mdesc_node_by_name(struct mdesc_handle *handle, (__node) != MDESC_NODE_NULL; \ __node = mdesc_node_by_name(__hdl, __node, __name)) +/* Access to property values returned from mdesc_get_property() are + * only valid inside of a mdesc_grab()/mdesc_release() sequence. + * Once mdesc_release() is called, the memory backed up by these + * pointers may reference freed up memory. + * + * Therefore callers must make copies of any property values + * they need. + * + * These same rules apply to mdesc_node_name(). + */ extern const void *mdesc_get_property(struct mdesc_handle *handle, u64 node, const char *name, int *lenp); +extern const char *mdesc_node_name(struct mdesc_handle *hp, u64 node); + +/* MD arc iteration, the standard sequence is: + * + * unsigned long arc; + * mdesc_for_each_arc(arc, handle, node, MDESC_ARC_TYPE_{FWD,BACK}) { + * unsigned long target = mdesc_arc_target(handle, arc); + * ... + * } + */ #define MDESC_ARC_TYPE_FWD "fwd" #define MDESC_ARC_TYPE_BACK "back" @@ -38,8 +58,6 @@ extern u64 mdesc_next_arc(struct mdesc_handle *handle, u64 from, extern u64 mdesc_arc_target(struct mdesc_handle *hp, u64 arc); -extern const char *mdesc_node_name(struct mdesc_handle *hp, u64 node); - extern void mdesc_update(void); extern void sun4v_mdesc_init(void); diff --git a/include/asm-sparc64/vio.h b/include/asm-sparc64/vio.h index a8a53e6fc25..83c96422e9d 100644 --- a/include/asm-sparc64/vio.h +++ b/include/asm-sparc64/vio.h @@ -264,12 +264,15 @@ static inline u32 vio_dring_avail(struct vio_dring_state *dr, ((dr->prod - dr->cons) & (ring_size - 1))); } +#define VIO_MAX_TYPE_LEN 64 +#define VIO_MAX_COMPAT_LEN 64 + struct vio_dev { u64 mp; struct device_node *dp; - const char *type; - const char *compat; + char type[VIO_MAX_TYPE_LEN]; + char compat[VIO_MAX_COMPAT_LEN]; int compat_len; unsigned long channel_id; -- cgit v1.2.3-70-g09d2 From b3e13fbeb9ac1eb8e7b0791bf56e1775c692972b Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 12 Jul 2007 15:55:55 -0700 Subject: [SPARC64]: Fix setting of variables in LDOM guest. There is a special domain services capability for setting variables in the OBP options node. Guests don't have permanent store for the OBP variables like a normal system, so they are instead maintained in the LDOM control node or in the SC. Signed-off-by: David S. Miller --- arch/sparc64/kernel/ds.c | 187 ++++++++++++++++++++++++++++++++++++++++++---- arch/sparc64/prom/misc.c | 5 ++ arch/sparc64/prom/tree.c | 13 +++- include/asm-sparc64/ldc.h | 2 + 4 files changed, 192 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/arch/sparc64/kernel/ds.c b/arch/sparc64/kernel/ds.c index 9c8839d1cff..4e20ef232c5 100644 --- a/arch/sparc64/kernel/ds.c +++ b/arch/sparc64/kernel/ds.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -171,7 +172,7 @@ static void md_update_data(struct ldc_channel *lp, rp = (struct ds_md_update_req *) (dpkt + 1); - printk(KERN_ERR PFX "Machine description update.\n"); + printk(KERN_INFO PFX "Machine description update.\n"); memset(&pkt, 0, sizeof(pkt)); pkt.data.tag.type = DS_DATA; @@ -248,8 +249,8 @@ static void domain_panic_data(struct ldc_channel *lp, rp = (struct ds_panic_req *) (dpkt + 1); - printk(KERN_ERR PFX "Panic REQ [%lx], len=%d\n", - rp->req_num, len); + printk(KERN_ALERT PFX "Panic request from " + "LDOM manager received.\n"); memset(&pkt, 0, sizeof(pkt)); pkt.data.tag.type = DS_DATA; @@ -313,10 +314,60 @@ static void ds_pri_data(struct ldc_channel *lp, rp = (struct ds_pri_msg *) (dpkt + 1); - printk(KERN_ERR PFX "PRI REQ [%lx:%lx], len=%d\n", + printk(KERN_INFO PFX "PRI REQ [%lx:%lx], len=%d\n", rp->req_num, rp->type, len); } +struct ds_var_hdr { + __u32 type; +#define DS_VAR_SET_REQ 0x00 +#define DS_VAR_DELETE_REQ 0x01 +#define DS_VAR_SET_RESP 0x02 +#define DS_VAR_DELETE_RESP 0x03 +}; + +struct ds_var_set_msg { + struct ds_var_hdr hdr; + char name_and_value[0]; +}; + +struct ds_var_delete_msg { + struct ds_var_hdr hdr; + char name[0]; +}; + +struct ds_var_resp { + struct ds_var_hdr hdr; + __u32 result; +#define DS_VAR_SUCCESS 0x00 +#define DS_VAR_NO_SPACE 0x01 +#define DS_VAR_INVALID_VAR 0x02 +#define DS_VAR_INVALID_VAL 0x03 +#define DS_VAR_NOT_PRESENT 0x04 +}; + +static DEFINE_MUTEX(ds_var_mutex); +static int ds_var_doorbell; +static int ds_var_response; + +static void ds_var_data(struct ldc_channel *lp, + struct ds_cap_state *dp, + void *buf, int len) +{ + struct ds_data *dpkt = buf; + struct ds_var_resp *rp; + + rp = (struct ds_var_resp *) (dpkt + 1); + + if (rp->hdr.type != DS_VAR_SET_RESP && + rp->hdr.type != DS_VAR_DELETE_RESP) + return; + + ds_var_response = rp->result; + wmb(); + ds_var_doorbell = 1; +} + struct ds_cap_state ds_states[] = { { .service_id = "md-update", @@ -338,17 +389,16 @@ struct ds_cap_state ds_states[] = { .service_id = "pri", .data = ds_pri_data, }, + { + .service_id = "var-config", + .data = ds_var_data, + }, + { + .service_id = "var-config-backup", + .data = ds_var_data, + }, }; -static struct ds_cap_state *find_cap(u64 handle) -{ - unsigned int index = handle >> 32; - - if (index >= ARRAY_SIZE(ds_states)) - return NULL; - return &ds_states[index]; -} - static DEFINE_SPINLOCK(ds_lock); struct ds_info { @@ -361,6 +411,115 @@ struct ds_info { int rcv_buf_len; }; +static struct ds_info *ds_info; + +static struct ds_cap_state *find_cap(u64 handle) +{ + unsigned int index = handle >> 32; + + if (index >= ARRAY_SIZE(ds_states)) + return NULL; + return &ds_states[index]; +} + +static struct ds_cap_state *find_cap_by_string(const char *name) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(ds_states); i++) { + if (strcmp(ds_states[i].service_id, name)) + continue; + + return &ds_states[i]; + } + return NULL; +} + +void ldom_set_var(const char *var, const char *value) +{ + struct ds_info *dp = ds_info; + struct ds_cap_state *cp; + + cp = find_cap_by_string("var-config"); + if (cp->state != CAP_STATE_REGISTERED) + cp = find_cap_by_string("var-config-backup"); + + if (cp->state == CAP_STATE_REGISTERED) { + union { + struct { + struct ds_data data; + struct ds_var_set_msg msg; + } header; + char all[512]; + } pkt; + unsigned long flags; + char *base, *p; + int msg_len, loops; + + memset(&pkt, 0, sizeof(pkt)); + pkt.header.data.tag.type = DS_DATA; + pkt.header.data.handle = cp->handle; + pkt.header.msg.hdr.type = DS_VAR_SET_REQ; + base = p = &pkt.header.msg.name_and_value[0]; + strcpy(p, var); + p += strlen(var) + 1; + strcpy(p, value); + p += strlen(value) + 1; + + msg_len = (sizeof(struct ds_data) + + sizeof(struct ds_var_set_msg) + + (p - base)); + msg_len = (msg_len + 3) & ~3; + pkt.header.data.tag.len = msg_len - sizeof(struct ds_msg_tag); + + mutex_lock(&ds_var_mutex); + + spin_lock_irqsave(&ds_lock, flags); + ds_var_doorbell = 0; + ds_var_response = -1; + + ds_send(dp->lp, &pkt, msg_len); + spin_unlock_irqrestore(&ds_lock, flags); + + loops = 1000; + while (ds_var_doorbell == 0) { + if (loops-- < 0) + break; + barrier(); + udelay(100); + } + + mutex_unlock(&ds_var_mutex); + + if (ds_var_doorbell == 0 || + ds_var_response != DS_VAR_SUCCESS) + printk(KERN_ERR PFX "var-config [%s:%s] " + "failed, response(%d).\n", + var, value, + ds_var_response); + } else { + printk(KERN_ERR PFX "var-config not registered so " + "could not set (%s) variable to (%s).\n", + var, value); + } +} + +void ldom_reboot(const char *boot_command) +{ + /* Don't bother with any of this if the boot_command + * is empty. + */ + if (boot_command && strlen(boot_command)) { + char full_boot_str[256]; + + strcpy(full_boot_str, "boot "); + strcpy(full_boot_str + strlen("boot "), boot_command); + + ldom_set_var("reboot-command", full_boot_str); + } + sun4v_mach_sir(); +} + static void ds_conn_reset(struct ds_info *dp) { printk(KERN_ERR PFX "ds_conn_reset() from %p\n", @@ -594,6 +753,8 @@ static int __devinit ds_probe(struct vio_dev *vdev, if (err) goto out_free_ldc; + ds_info = dp; + start_powerd(); return err; diff --git a/arch/sparc64/prom/misc.c b/arch/sparc64/prom/misc.c index f3e0c14e9ee..72d272c9de6 100644 --- a/arch/sparc64/prom/misc.c +++ b/arch/sparc64/prom/misc.c @@ -14,6 +14,7 @@ #include #include #include +#include int prom_service_exists(const char *service_name) { @@ -37,6 +38,10 @@ void prom_sun4v_guest_soft_state(void) /* Reset and reboot the machine with the command 'bcommand'. */ void prom_reboot(const char *bcommand) { +#ifdef CONFIG_SUN_LDOMS + if (ldom_domaining_enabled) + ldom_reboot(bcommand); +#endif p1275_cmd("boot", P1275_ARG(0, P1275_ARG_IN_STRING) | P1275_INOUT(1, 0), bcommand); } diff --git a/arch/sparc64/prom/tree.c b/arch/sparc64/prom/tree.c index 500f05e2cfc..17b7ecfe7ca 100644 --- a/arch/sparc64/prom/tree.c +++ b/arch/sparc64/prom/tree.c @@ -13,6 +13,7 @@ #include #include +#include /* Return the child of node 'node' or zero if no this node has no * direct descendent. @@ -261,9 +262,17 @@ int prom_node_has_property(int node, const char *prop) int prom_setprop(int node, const char *pname, char *value, int size) { - if(size == 0) return 0; - if((pname == 0) || (value == 0)) return 0; + if (size == 0) + return 0; + if ((pname == 0) || (value == 0)) + return 0; +#ifdef CONFIG_SUN_LDOMS + if (ldom_domaining_enabled) { + ldom_set_var(pname, value); + return 0; + } +#endif return p1275_cmd ("setprop", P1275_ARG(1,P1275_ARG_IN_STRING)| P1275_ARG(2,P1275_ARG_IN_BUF)| P1275_INOUT(4, 1), diff --git a/include/asm-sparc64/ldc.h b/include/asm-sparc64/ldc.h index 3c91f269f9d..a21996c6b15 100644 --- a/include/asm-sparc64/ldc.h +++ b/include/asm-sparc64/ldc.h @@ -4,6 +4,8 @@ #include extern int ldom_domaining_enabled; +extern void ldom_set_var(const char *var, const char *value); +extern void ldom_reboot(const char *boot_command); /* The event handler will be evoked when link state changes * or data becomes available on the receive side. -- cgit v1.2.3-70-g09d2 From 4f0234f4f9da485ecb9729af1b88567700fd4767 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 13 Jul 2007 16:03:42 -0700 Subject: [SPARC64]: Initial LDOM cpu hotplug support. Only adding cpus is supports at the moment, removal will come next. When new cpus are configured, the machine description is updated. When we get the configure request we pass in a cpu mask of to-be-added cpus to the mdesc CPU node parser so it only fetches information for those cpus. That code also proceeds to update the SMT/multi-core scheduling bitmaps. cpu_up() does all the work and we return the status back over the DS channel. CPUs via dr-cpu need to be booted straight out of the hypervisor, and this requires: 1) A new trampoline mechanism. CPUs are booted straight out of the hypervisor with MMU disabled and running in physical addresses with no mappings installed in the TLB. The new hvtramp.S code sets up the critical cpu state, installs the locked TLB mappings for the kernel, and turns the MMU on. It then proceeds to follow the logic of the existing trampoline.S SMP cpu bringup code. 2) All calls into OBP have to be disallowed when domaining is enabled. Since cpus boot straight into the kernel from the hypervisor, OBP has no state about that cpu and therefore cannot handle being invoked on that cpu. Luckily it's only a handful of interfaces which can be called after the OBP device tree is obtained. For example, rebooting, halting, powering-off, and setting options node variables. CPU removal support will require some infrastructure changes here. Namely we'll have to process the requests via a true kernel thread instead of in a workqueue. workqueues run on a per-cpu thread, but when unconfiguring we might need to force the thread to execute on another cpu if the current cpu is the one being removed. Removal of a cpu also causes the kernel to destroy that cpu's workqueue running thread. Another issue on removal is that we may have interrupts still pointing to the cpu-to-be-removed. So new code will be needed to walk the active INO list and retarget those cpus as-needed. Signed-off-by: David S. Miller --- arch/sparc64/Kconfig | 10 + arch/sparc64/kernel/Makefile | 3 +- arch/sparc64/kernel/ds.c | 514 ++++++++++++++++++++++++++++++------ arch/sparc64/kernel/hvtramp.S | 139 ++++++++++ arch/sparc64/kernel/mdesc.c | 53 ++-- arch/sparc64/kernel/prom.c | 2 +- arch/sparc64/kernel/smp.c | 55 ++-- arch/sparc64/kernel/sparc64_ksyms.c | 4 - arch/sparc64/prom/misc.c | 8 + arch/sparc64/prom/p1275.c | 1 + include/asm-sparc64/cpudata.h | 3 +- include/asm-sparc64/hvtramp.h | 37 +++ include/asm-sparc64/hypervisor.h | 2 +- include/asm-sparc64/ldc.h | 2 + include/asm-sparc64/mdesc.h | 3 + include/asm-sparc64/smp.h | 8 +- 16 files changed, 716 insertions(+), 128 deletions(-) create mode 100644 arch/sparc64/kernel/hvtramp.S create mode 100644 include/asm-sparc64/hvtramp.h (limited to 'include') diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig index af59daa8105..3c2e3397caf 100644 --- a/arch/sparc64/Kconfig +++ b/arch/sparc64/Kconfig @@ -108,6 +108,15 @@ config SECCOMP source kernel/Kconfig.hz +config HOTPLUG_CPU + bool "Support for hot-pluggable CPUs" + depends on SMP + select HOTPLUG + ---help--- + Say Y here to experiment with turning CPUs off and on. CPUs + can be controlled through /sys/devices/system/cpu/cpu#. + Say N if you want to disable CPU hotplug. + source "init/Kconfig" config SYSVIPC_COMPAT @@ -307,6 +316,7 @@ config SUN_IO config SUN_LDOMS bool "Sun Logical Domains support" + select HOTPLUG_CPU help Say Y here is you want to support virtual devices via Logical Domains. diff --git a/arch/sparc64/kernel/Makefile b/arch/sparc64/kernel/Makefile index 70e6c501392..62db93c148c 100644 --- a/arch/sparc64/kernel/Makefile +++ b/arch/sparc64/kernel/Makefile @@ -12,7 +12,8 @@ obj-y := process.o setup.o cpu.o idprom.o \ irq.o ptrace.o time.o sys_sparc.o signal.o \ unaligned.o central.o pci.o starfire.o semaphore.o \ power.o sbus.o iommu_common.o sparc64_ksyms.o chmc.o \ - visemul.o prom.o of_device.o hvapi.o sstate.o mdesc.o + visemul.o prom.o of_device.o hvapi.o sstate.o mdesc.o \ + hvtramp.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-$(CONFIG_PCI) += ebus.o isa.o pci_common.o pci_iommu.o \ diff --git a/arch/sparc64/kernel/ds.c b/arch/sparc64/kernel/ds.c index 4e20ef232c5..b82c03a25d9 100644 --- a/arch/sparc64/kernel/ds.c +++ b/arch/sparc64/kernel/ds.c @@ -12,11 +12,16 @@ #include #include #include +#include +#include #include #include #include #include +#include +#include +#include #define DRV_MODULE_NAME "ds" #define PFX DRV_MODULE_NAME ": " @@ -124,7 +129,7 @@ struct ds_cap_state { __u64 handle; void (*data)(struct ldc_channel *lp, - struct ds_cap_state *dp, + struct ds_cap_state *cp, void *buf, int len); const char *service_id; @@ -135,6 +140,91 @@ struct ds_cap_state { #define CAP_STATE_REGISTERED 0x02 }; +static void md_update_data(struct ldc_channel *lp, struct ds_cap_state *cp, + void *buf, int len); +static void domain_shutdown_data(struct ldc_channel *lp, + struct ds_cap_state *cp, + void *buf, int len); +static void domain_panic_data(struct ldc_channel *lp, + struct ds_cap_state *cp, + void *buf, int len); +static void dr_cpu_data(struct ldc_channel *lp, + struct ds_cap_state *cp, + void *buf, int len); +static void ds_pri_data(struct ldc_channel *lp, + struct ds_cap_state *cp, + void *buf, int len); +static void ds_var_data(struct ldc_channel *lp, + struct ds_cap_state *cp, + void *buf, int len); + +struct ds_cap_state ds_states[] = { + { + .service_id = "md-update", + .data = md_update_data, + }, + { + .service_id = "domain-shutdown", + .data = domain_shutdown_data, + }, + { + .service_id = "domain-panic", + .data = domain_panic_data, + }, + { + .service_id = "dr-cpu", + .data = dr_cpu_data, + }, + { + .service_id = "pri", + .data = ds_pri_data, + }, + { + .service_id = "var-config", + .data = ds_var_data, + }, + { + .service_id = "var-config-backup", + .data = ds_var_data, + }, +}; + +static DEFINE_SPINLOCK(ds_lock); + +struct ds_info { + struct ldc_channel *lp; + u8 hs_state; +#define DS_HS_START 0x01 +#define DS_HS_DONE 0x02 + + void *rcv_buf; + int rcv_buf_len; +}; + +static struct ds_info *ds_info; + +static struct ds_cap_state *find_cap(u64 handle) +{ + unsigned int index = handle >> 32; + + if (index >= ARRAY_SIZE(ds_states)) + return NULL; + return &ds_states[index]; +} + +static struct ds_cap_state *find_cap_by_string(const char *name) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(ds_states); i++) { + if (strcmp(ds_states[i].service_id, name)) + continue; + + return &ds_states[i]; + } + return NULL; +} + static int ds_send(struct ldc_channel *lp, void *data, int len) { int err, limit = 1000; @@ -265,36 +355,354 @@ static void domain_panic_data(struct ldc_channel *lp, panic("PANIC requested by LDOM manager."); } -struct ds_cpu_tag { +struct dr_cpu_tag { __u64 req_num; __u32 type; -#define DS_CPU_CONFIGURE 0x43 -#define DS_CPU_UNCONFIGURE 0x55 -#define DS_CPU_FORCE_UNCONFIGURE 0x46 -#define DS_CPU_STATUS 0x53 +#define DR_CPU_CONFIGURE 0x43 +#define DR_CPU_UNCONFIGURE 0x55 +#define DR_CPU_FORCE_UNCONFIGURE 0x46 +#define DR_CPU_STATUS 0x53 /* Responses */ -#define DS_CPU_OK 0x6f -#define DS_CPU_ERROR 0x65 +#define DR_CPU_OK 0x6f +#define DR_CPU_ERROR 0x65 __u32 num_records; }; -struct ds_cpu_record { - __u32 cpu_id; +struct dr_cpu_resp_entry { + __u32 cpu; + __u32 result; +#define DR_CPU_RES_OK 0x00 +#define DR_CPU_RES_FAILURE 0x01 +#define DR_CPU_RES_BLOCKED 0x02 +#define DR_CPU_RES_CPU_NOT_RESPONDING 0x03 +#define DR_CPU_RES_NOT_IN_MD 0x04 + + __u32 stat; +#define DR_CPU_STAT_NOT_PRESENT 0x00 +#define DR_CPU_STAT_UNCONFIGURED 0x01 +#define DR_CPU_STAT_CONFIGURED 0x02 + + __u32 str_off; }; +/* XXX Put this in some common place. XXX */ +static unsigned long kimage_addr_to_ra(void *p) +{ + unsigned long val = (unsigned long) p; + + return kern_base + (val - KERNBASE); +} + +void ldom_startcpu_cpuid(unsigned int cpu, unsigned long thread_reg) +{ + extern unsigned long sparc64_ttable_tl0; + extern unsigned long kern_locked_tte_data; + extern int bigkernel; + struct hvtramp_descr *hdesc; + unsigned long trampoline_ra; + struct trap_per_cpu *tb; + u64 tte_vaddr, tte_data; + unsigned long hv_err; + + hdesc = kzalloc(sizeof(*hdesc), GFP_KERNEL); + if (!hdesc) { + printk(KERN_ERR PFX "ldom_startcpu_cpuid: Cannot allocate " + "hvtramp_descr.\n"); + return; + } + + hdesc->cpu = cpu; + hdesc->num_mappings = (bigkernel ? 2 : 1); + + tb = &trap_block[cpu]; + tb->hdesc = hdesc; + + hdesc->fault_info_va = (unsigned long) &tb->fault_info; + hdesc->fault_info_pa = kimage_addr_to_ra(&tb->fault_info); + + hdesc->thread_reg = thread_reg; + + tte_vaddr = (unsigned long) KERNBASE; + tte_data = kern_locked_tte_data; + + hdesc->maps[0].vaddr = tte_vaddr; + hdesc->maps[0].tte = tte_data; + if (bigkernel) { + tte_vaddr += 0x400000; + tte_data += 0x400000; + hdesc->maps[1].vaddr = tte_vaddr; + hdesc->maps[1].tte = tte_data; + } + + trampoline_ra = kimage_addr_to_ra(hv_cpu_startup); + + hv_err = sun4v_cpu_start(cpu, trampoline_ra, + kimage_addr_to_ra(&sparc64_ttable_tl0), + __pa(hdesc)); +} + +/* DR cpu requests get queued onto the work list by the + * dr_cpu_data() callback. The list is protected by + * ds_lock, and processed by dr_cpu_process() in order. + */ +static LIST_HEAD(dr_cpu_work_list); + +struct dr_cpu_queue_entry { + struct list_head list; + char req[0]; +}; + +static void __dr_cpu_send_error(struct ds_cap_state *cp, struct ds_data *data) +{ + struct dr_cpu_tag *tag = (struct dr_cpu_tag *) (data + 1); + struct ds_info *dp = ds_info; + struct { + struct ds_data data; + struct dr_cpu_tag tag; + } pkt; + int msg_len; + + memset(&pkt, 0, sizeof(pkt)); + pkt.data.tag.type = DS_DATA; + pkt.data.handle = cp->handle; + pkt.tag.req_num = tag->req_num; + pkt.tag.type = DR_CPU_ERROR; + pkt.tag.num_records = 0; + + msg_len = (sizeof(struct ds_data) + + sizeof(struct dr_cpu_tag)); + + pkt.data.tag.len = msg_len - sizeof(struct ds_msg_tag); + + ds_send(dp->lp, &pkt, msg_len); +} + +static void dr_cpu_send_error(struct ds_cap_state *cp, struct ds_data *data) +{ + unsigned long flags; + + spin_lock_irqsave(&ds_lock, flags); + __dr_cpu_send_error(cp, data); + spin_unlock_irqrestore(&ds_lock, flags); +} + +#define CPU_SENTINEL 0xffffffff + +static void purge_dups(u32 *list, u32 num_ents) +{ + unsigned int i; + + for (i = 0; i < num_ents; i++) { + u32 cpu = list[i]; + unsigned int j; + + if (cpu == CPU_SENTINEL) + continue; + + for (j = i + 1; j < num_ents; j++) { + if (list[j] == cpu) + list[j] = CPU_SENTINEL; + } + } +} + +static int dr_cpu_size_response(int ncpus) +{ + return (sizeof(struct ds_data) + + sizeof(struct dr_cpu_tag) + + (sizeof(struct dr_cpu_resp_entry) * ncpus)); +} + +static void dr_cpu_init_response(struct ds_data *resp, u64 req_num, + u64 handle, int resp_len, int ncpus, + cpumask_t *mask, u32 default_stat) +{ + struct dr_cpu_resp_entry *ent; + struct dr_cpu_tag *tag; + int i, cpu; + + tag = (struct dr_cpu_tag *) (resp + 1); + ent = (struct dr_cpu_resp_entry *) (tag + 1); + + resp->tag.type = DS_DATA; + resp->tag.len = resp_len - sizeof(struct ds_msg_tag); + resp->handle = handle; + tag->req_num = req_num; + tag->type = DR_CPU_OK; + tag->num_records = ncpus; + + i = 0; + for_each_cpu_mask(cpu, *mask) { + ent[i].cpu = cpu; + ent[i].result = DR_CPU_RES_OK; + ent[i].stat = default_stat; + i++; + } + BUG_ON(i != ncpus); +} + +static void dr_cpu_mark(struct ds_data *resp, int cpu, int ncpus, + u32 res, u32 stat) +{ + struct dr_cpu_resp_entry *ent; + struct dr_cpu_tag *tag; + int i; + + tag = (struct dr_cpu_tag *) (resp + 1); + ent = (struct dr_cpu_resp_entry *) (tag + 1); + + for (i = 0; i < ncpus; i++) { + if (ent[i].cpu != cpu) + continue; + ent[i].result = res; + ent[i].stat = stat; + break; + } +} + +static int dr_cpu_configure(struct ds_cap_state *cp, u64 req_num, + cpumask_t *mask) +{ + struct ds_data *resp; + int resp_len, ncpus, cpu; + unsigned long flags; + + ncpus = cpus_weight(*mask); + resp_len = dr_cpu_size_response(ncpus); + resp = kzalloc(resp_len, GFP_KERNEL); + if (!resp) + return -ENOMEM; + + dr_cpu_init_response(resp, req_num, cp->handle, + resp_len, ncpus, mask, + DR_CPU_STAT_CONFIGURED); + + mdesc_fill_in_cpu_data(*mask); + + for_each_cpu_mask(cpu, *mask) { + int err; + + printk(KERN_INFO PFX "Starting cpu %d...\n", cpu); + err = cpu_up(cpu); + if (err) + dr_cpu_mark(resp, cpu, ncpus, + DR_CPU_RES_FAILURE, + DR_CPU_STAT_UNCONFIGURED); + } + + spin_lock_irqsave(&ds_lock, flags); + ds_send(ds_info->lp, resp, resp_len); + spin_unlock_irqrestore(&ds_lock, flags); + + kfree(resp); + + return 0; +} + +static int dr_cpu_unconfigure(struct ds_cap_state *cp, u64 req_num, + cpumask_t *mask) +{ + struct ds_data *resp; + int resp_len, ncpus; + + ncpus = cpus_weight(*mask); + resp_len = dr_cpu_size_response(ncpus); + resp = kzalloc(resp_len, GFP_KERNEL); + if (!resp) + return -ENOMEM; + + dr_cpu_init_response(resp, req_num, cp->handle, + resp_len, ncpus, mask, + DR_CPU_STAT_UNCONFIGURED); + + kfree(resp); + + return -EOPNOTSUPP; +} + +static void dr_cpu_process(struct work_struct *work) +{ + struct dr_cpu_queue_entry *qp, *tmp; + struct ds_cap_state *cp; + unsigned long flags; + LIST_HEAD(todo); + cpumask_t mask; + + cp = find_cap_by_string("dr-cpu"); + + spin_lock_irqsave(&ds_lock, flags); + list_splice(&dr_cpu_work_list, &todo); + spin_unlock_irqrestore(&ds_lock, flags); + + list_for_each_entry_safe(qp, tmp, &todo, list) { + struct ds_data *data = (struct ds_data *) qp->req; + struct dr_cpu_tag *tag = (struct dr_cpu_tag *) (data + 1); + u32 *cpu_list = (u32 *) (tag + 1); + u64 req_num = tag->req_num; + unsigned int i; + int err; + + switch (tag->type) { + case DR_CPU_CONFIGURE: + case DR_CPU_UNCONFIGURE: + case DR_CPU_FORCE_UNCONFIGURE: + break; + + default: + dr_cpu_send_error(cp, data); + goto next; + } + + purge_dups(cpu_list, tag->num_records); + + cpus_clear(mask); + for (i = 0; i < tag->num_records; i++) { + if (cpu_list[i] == CPU_SENTINEL) + continue; + + if (cpu_list[i] < NR_CPUS) + cpu_set(cpu_list[i], mask); + } + + if (tag->type == DR_CPU_CONFIGURE) + err = dr_cpu_configure(cp, req_num, &mask); + else + err = dr_cpu_unconfigure(cp, req_num, &mask); + + if (err) + dr_cpu_send_error(cp, data); + +next: + list_del(&qp->list); + kfree(qp); + } +} + +static DECLARE_WORK(dr_cpu_work, dr_cpu_process); + static void dr_cpu_data(struct ldc_channel *lp, struct ds_cap_state *dp, void *buf, int len) { + struct dr_cpu_queue_entry *qp; struct ds_data *dpkt = buf; - struct ds_cpu_tag *rp; + struct dr_cpu_tag *rp; - rp = (struct ds_cpu_tag *) (dpkt + 1); + rp = (struct dr_cpu_tag *) (dpkt + 1); - printk(KERN_ERR PFX "CPU REQ [%lx:%x], len=%d\n", - rp->req_num, rp->type, len); + qp = kmalloc(sizeof(struct dr_cpu_queue_entry) + len, GFP_ATOMIC); + if (!qp) { + struct ds_cap_state *cp; + + cp = find_cap_by_string("dr-cpu"); + __dr_cpu_send_error(cp, dpkt); + } else { + memcpy(&qp->req, buf, len); + list_add_tail(&qp->list, &dr_cpu_work_list); + schedule_work(&dr_cpu_work); + } } struct ds_pri_msg { @@ -368,73 +776,6 @@ static void ds_var_data(struct ldc_channel *lp, ds_var_doorbell = 1; } -struct ds_cap_state ds_states[] = { - { - .service_id = "md-update", - .data = md_update_data, - }, - { - .service_id = "domain-shutdown", - .data = domain_shutdown_data, - }, - { - .service_id = "domain-panic", - .data = domain_panic_data, - }, - { - .service_id = "dr-cpu", - .data = dr_cpu_data, - }, - { - .service_id = "pri", - .data = ds_pri_data, - }, - { - .service_id = "var-config", - .data = ds_var_data, - }, - { - .service_id = "var-config-backup", - .data = ds_var_data, - }, -}; - -static DEFINE_SPINLOCK(ds_lock); - -struct ds_info { - struct ldc_channel *lp; - u8 hs_state; -#define DS_HS_START 0x01 -#define DS_HS_DONE 0x02 - - void *rcv_buf; - int rcv_buf_len; -}; - -static struct ds_info *ds_info; - -static struct ds_cap_state *find_cap(u64 handle) -{ - unsigned int index = handle >> 32; - - if (index >= ARRAY_SIZE(ds_states)) - return NULL; - return &ds_states[index]; -} - -static struct ds_cap_state *find_cap_by_string(const char *name) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(ds_states); i++) { - if (strcmp(ds_states[i].service_id, name)) - continue; - - return &ds_states[i]; - } - return NULL; -} - void ldom_set_var(const char *var, const char *value) { struct ds_info *dp = ds_info; @@ -467,8 +808,8 @@ void ldom_set_var(const char *var, const char *value) p += strlen(value) + 1; msg_len = (sizeof(struct ds_data) + - sizeof(struct ds_var_set_msg) + - (p - base)); + sizeof(struct ds_var_set_msg) + + (p - base)); msg_len = (msg_len + 3) & ~3; pkt.header.data.tag.len = msg_len - sizeof(struct ds_msg_tag); @@ -520,6 +861,11 @@ void ldom_reboot(const char *boot_command) sun4v_mach_sir(); } +void ldom_power_off(void) +{ + sun4v_mach_exit(0); +} + static void ds_conn_reset(struct ds_info *dp) { printk(KERN_ERR PFX "ds_conn_reset() from %p\n", @@ -601,7 +947,7 @@ static int ds_handshake(struct ds_info *dp, struct ds_msg_tag *pkt) np->handle); return 0; } - printk(KERN_ERR PFX "Could not register %s service\n", + printk(KERN_INFO PFX "Could not register %s service\n", cp->service_id); cp->state = CAP_STATE_UNKNOWN; } diff --git a/arch/sparc64/kernel/hvtramp.S b/arch/sparc64/kernel/hvtramp.S new file mode 100644 index 00000000000..76a090e2c2a --- /dev/null +++ b/arch/sparc64/kernel/hvtramp.S @@ -0,0 +1,139 @@ +/* hvtramp.S: Hypervisor start-cpu trampoline code. + * + * Copyright (C) 2007 David S. Miller + */ + +#include +#include +#include +#include +#include +#include +#include +#include + + .text + .align 8 + .globl hv_cpu_startup, hv_cpu_startup_end + + /* This code executes directly out of the hypervisor + * with physical addressing (va==pa). %o0 contains + * our client argument which for Linux points to + * a descriptor data structure which defines the + * MMU entries we need to load up. + * + * After we set things up we enable the MMU and call + * into the kernel. + * + * First setup basic privileged cpu state. + */ +hv_cpu_startup: + wrpr %g0, 0, %gl + wrpr %g0, 15, %pil + wrpr %g0, 0, %canrestore + wrpr %g0, 0, %otherwin + wrpr %g0, 6, %cansave + wrpr %g0, 6, %cleanwin + wrpr %g0, 0, %cwp + wrpr %g0, 0, %wstate + wrpr %g0, 0, %tl + + sethi %hi(sparc64_ttable_tl0), %g1 + wrpr %g1, %tba + + mov %o0, %l0 + + lduw [%l0 + HVTRAMP_DESCR_CPU], %g1 + mov SCRATCHPAD_CPUID, %g2 + stxa %g1, [%g2] ASI_SCRATCHPAD + + ldx [%l0 + HVTRAMP_DESCR_FAULT_INFO_VA], %g2 + stxa %g2, [%g0] ASI_SCRATCHPAD + + mov 0, %l1 + lduw [%l0 + HVTRAMP_DESCR_NUM_MAPPINGS], %l2 + add %l0, HVTRAMP_DESCR_MAPS, %l3 + +1: ldx [%l3 + HVTRAMP_MAPPING_VADDR], %o0 + clr %o1 + ldx [%l3 + HVTRAMP_MAPPING_TTE], %o2 + mov HV_MMU_IMMU | HV_MMU_DMMU, %o3 + mov HV_FAST_MMU_MAP_PERM_ADDR, %o5 + ta HV_FAST_TRAP + + brnz,pn %o0, 80f + nop + + add %l1, 1, %l1 + cmp %l1, %l2 + blt,a,pt %xcc, 1b + add %l3, HVTRAMP_MAPPING_SIZE, %l3 + + ldx [%l0 + HVTRAMP_DESCR_FAULT_INFO_PA], %o0 + mov HV_FAST_MMU_FAULT_AREA_CONF, %o5 + ta HV_FAST_TRAP + + brnz,pn %o0, 80f + nop + + wrpr %g0, (PSTATE_PRIV | PSTATE_PEF), %pstate + + ldx [%l0 + HVTRAMP_DESCR_THREAD_REG], %l6 + + mov 1, %o0 + set 1f, %o1 + mov HV_FAST_MMU_ENABLE, %o5 + ta HV_FAST_TRAP + + ba,pt %xcc, 80f + nop + +1: + wr %g0, 0, %fprs + wr %g0, ASI_P, %asi + + mov PRIMARY_CONTEXT, %g7 + stxa %g0, [%g7] ASI_MMU + membar #Sync + + mov SECONDARY_CONTEXT, %g7 + stxa %g0, [%g7] ASI_MMU + membar #Sync + + mov %l6, %g6 + ldx [%g6 + TI_TASK], %g4 + + mov 1, %g5 + sllx %g5, THREAD_SHIFT, %g5 + sub %g5, (STACKFRAME_SZ + STACK_BIAS), %g5 + add %g6, %g5, %sp + mov 0, %fp + + call init_irqwork_curcpu + nop + call hard_smp_processor_id + nop + + mov %o0, %o1 + mov 0, %o0 + mov 0, %o2 + call sun4v_init_mondo_queues + mov 1, %o3 + + call init_cur_cpu_trap + mov %g6, %o0 + + wrpr %g0, (PSTATE_PRIV | PSTATE_PEF | PSTATE_IE), %pstate + + call smp_callin + nop + call cpu_idle + mov 0, %o0 + call cpu_panic + nop + +80: ba,pt %xcc, 80b + nop + + .align 8 +hv_cpu_startup_end: diff --git a/arch/sparc64/kernel/mdesc.c b/arch/sparc64/kernel/mdesc.c index 9e5088d563c..3f79940a293 100644 --- a/arch/sparc64/kernel/mdesc.c +++ b/arch/sparc64/kernel/mdesc.c @@ -434,6 +434,22 @@ static void __init report_platform_properties(void) if (v) printk("PLATFORM: max-cpus [%lu]\n", *v); +#ifdef CONFIG_SMP + { + int max_cpu, i; + + if (v) { + max_cpu = *v; + if (max_cpu > NR_CPUS) + max_cpu = NR_CPUS; + } else { + max_cpu = NR_CPUS; + } + for (i = 0; i < max_cpu; i++) + cpu_set(i, cpu_possible_map); + } +#endif + mdesc_release(hp); } @@ -451,9 +467,9 @@ static int inline find_in_proplist(const char *list, const char *match, int len) return 0; } -static void __init fill_in_one_cache(cpuinfo_sparc *c, - struct mdesc_handle *hp, - u64 mp) +static void __devinit fill_in_one_cache(cpuinfo_sparc *c, + struct mdesc_handle *hp, + u64 mp) { const u64 *level = mdesc_get_property(hp, mp, "level", NULL); const u64 *size = mdesc_get_property(hp, mp, "size", NULL); @@ -496,7 +512,8 @@ static void __init fill_in_one_cache(cpuinfo_sparc *c, } } -static void __init mark_core_ids(struct mdesc_handle *hp, u64 mp, int core_id) +static void __devinit mark_core_ids(struct mdesc_handle *hp, u64 mp, + int core_id) { u64 a; @@ -529,7 +546,7 @@ static void __init mark_core_ids(struct mdesc_handle *hp, u64 mp, int core_id) } } -static void __init set_core_ids(struct mdesc_handle *hp) +static void __devinit set_core_ids(struct mdesc_handle *hp) { int idx; u64 mp; @@ -554,7 +571,8 @@ static void __init set_core_ids(struct mdesc_handle *hp) } } -static void __init mark_proc_ids(struct mdesc_handle *hp, u64 mp, int proc_id) +static void __devinit mark_proc_ids(struct mdesc_handle *hp, u64 mp, + int proc_id) { u64 a; @@ -573,8 +591,8 @@ static void __init mark_proc_ids(struct mdesc_handle *hp, u64 mp, int proc_id) } } -static void __init __set_proc_ids(struct mdesc_handle *hp, - const char *exec_unit_name) +static void __devinit __set_proc_ids(struct mdesc_handle *hp, + const char *exec_unit_name) { int idx; u64 mp; @@ -595,13 +613,14 @@ static void __init __set_proc_ids(struct mdesc_handle *hp, } } -static void __init set_proc_ids(struct mdesc_handle *hp) +static void __devinit set_proc_ids(struct mdesc_handle *hp) { __set_proc_ids(hp, "exec_unit"); __set_proc_ids(hp, "exec-unit"); } -static void __init get_one_mondo_bits(const u64 *p, unsigned int *mask, unsigned char def) +static void __devinit get_one_mondo_bits(const u64 *p, unsigned int *mask, + unsigned char def) { u64 val; @@ -619,8 +638,8 @@ use_default: *mask = ((1U << def) * 64U) - 1U; } -static void __init get_mondo_data(struct mdesc_handle *hp, u64 mp, - struct trap_per_cpu *tb) +static void __devinit get_mondo_data(struct mdesc_handle *hp, u64 mp, + struct trap_per_cpu *tb) { const u64 *val; @@ -637,7 +656,7 @@ static void __init get_mondo_data(struct mdesc_handle *hp, u64 mp, get_one_mondo_bits(val, &tb->nonresum_qmask, 2); } -static void __init mdesc_fill_in_cpu_data(void) +void __devinit mdesc_fill_in_cpu_data(cpumask_t mask) { struct mdesc_handle *hp = mdesc_grab(); u64 mp; @@ -658,6 +677,8 @@ static void __init mdesc_fill_in_cpu_data(void) #ifdef CONFIG_SMP if (cpuid >= NR_CPUS) continue; + if (!cpu_isset(cpuid, mask)) + continue; #else /* On uniprocessor we only want the values for the * real physical cpu the kernel booted onto, however @@ -696,7 +717,6 @@ static void __init mdesc_fill_in_cpu_data(void) #ifdef CONFIG_SMP cpu_set(cpuid, cpu_present_map); - cpu_set(cpuid, phys_cpu_present_map); #endif c->core_id = 0; @@ -719,6 +739,7 @@ void __init sun4v_mdesc_init(void) { struct mdesc_handle *hp; unsigned long len, real_len, status; + cpumask_t mask; (void) sun4v_mach_desc(0UL, 0UL, &len); @@ -742,5 +763,7 @@ void __init sun4v_mdesc_init(void) cur_mdesc = hp; report_platform_properties(); - mdesc_fill_in_cpu_data(); + + cpus_setall(mask); + mdesc_fill_in_cpu_data(mask); } diff --git a/arch/sparc64/kernel/prom.c b/arch/sparc64/kernel/prom.c index 61036b34666..5d220302cd5 100644 --- a/arch/sparc64/kernel/prom.c +++ b/arch/sparc64/kernel/prom.c @@ -1808,7 +1808,7 @@ static void __init of_fill_in_cpu_data(void) #ifdef CONFIG_SMP cpu_set(cpuid, cpu_present_map); - cpu_set(cpuid, phys_cpu_present_map); + cpu_set(cpuid, cpu_possible_map); #endif } diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c index 40e40f968d6..315eef0869b 100644 --- a/arch/sparc64/kernel/smp.c +++ b/arch/sparc64/kernel/smp.c @@ -41,6 +41,7 @@ #include #include #include +#include extern void calibrate_delay(void); @@ -49,12 +50,18 @@ int sparc64_multi_core __read_mostly; /* Please don't make this stuff initdata!!! --DaveM */ unsigned char boot_cpu_id; +cpumask_t cpu_possible_map __read_mostly = CPU_MASK_NONE; cpumask_t cpu_online_map __read_mostly = CPU_MASK_NONE; -cpumask_t phys_cpu_present_map __read_mostly = CPU_MASK_NONE; cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = CPU_MASK_NONE }; cpumask_t cpu_core_map[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = CPU_MASK_NONE }; + +EXPORT_SYMBOL(cpu_possible_map); +EXPORT_SYMBOL(cpu_online_map); +EXPORT_SYMBOL(cpu_sibling_map); +EXPORT_SYMBOL(cpu_core_map); + static cpumask_t smp_commenced_mask; static cpumask_t cpu_callout_map; @@ -84,9 +91,10 @@ extern void setup_sparc64_timer(void); static volatile unsigned long callin_flag = 0; -void __init smp_callin(void) +void __devinit smp_callin(void) { int cpuid = hard_smp_processor_id(); + struct trap_per_cpu *tb = &trap_block[cpuid];; __local_per_cpu_offset = __per_cpu_offset(cpuid); @@ -117,6 +125,11 @@ void __init smp_callin(void) atomic_inc(&init_mm.mm_count); current->active_mm = &init_mm; + if (tb->hdesc) { + kfree(tb->hdesc); + tb->hdesc = NULL; + } + while (!cpu_isset(cpuid, smp_commenced_mask)) rmb(); @@ -296,14 +309,20 @@ static int __devinit smp_boot_one_cpu(unsigned int cpu) /* Alloc the mondo queues, cpu will load them. */ sun4v_init_mondo_queues(0, cpu, 1, 0); - prom_startcpu_cpuid(cpu, entry, cookie); +#ifdef CONFIG_SUN_LDOMS + if (ldom_domaining_enabled) + ldom_startcpu_cpuid(cpu, + (unsigned long) cpu_new_thread); + else +#endif + prom_startcpu_cpuid(cpu, entry, cookie); } else { struct device_node *dp = of_find_node_by_cpuid(cpu); prom_startcpu(dp->node, entry, cookie); } - for (timeout = 0; timeout < 5000000; timeout++) { + for (timeout = 0; timeout < 50000; timeout++) { if (callin_flag) break; udelay(100); @@ -1163,22 +1182,8 @@ int setup_profiling_timer(unsigned int multiplier) return -EINVAL; } -/* Constrain the number of cpus to max_cpus. */ void __init smp_prepare_cpus(unsigned int max_cpus) { - int i; - - if (num_possible_cpus() > max_cpus) { - for_each_possible_cpu(i) { - if (i != boot_cpu_id) { - cpu_clear(i, phys_cpu_present_map); - cpu_clear(i, cpu_present_map); - if (num_possible_cpus() <= max_cpus) - break; - } - } - } - cpu_data(boot_cpu_id).udelay_val = loops_per_jiffy; } @@ -1242,6 +1247,20 @@ int __cpuinit __cpu_up(unsigned int cpu) return ret; } +#ifdef CONFIG_HOTPLUG_CPU +int __cpu_disable(void) +{ + printk(KERN_ERR "SMP: __cpu_disable() on cpu %d\n", + smp_processor_id()); + return -ENODEV; +} + +void __cpu_die(unsigned int cpu) +{ + printk(KERN_ERR "SMP: __cpu_die(%u)\n", cpu); +} +#endif + void __init smp_cpus_done(unsigned int max_cpus) { unsigned long bogosum = 0; diff --git a/arch/sparc64/kernel/sparc64_ksyms.c b/arch/sparc64/kernel/sparc64_ksyms.c index 6fa76161289..51e059e36d4 100644 --- a/arch/sparc64/kernel/sparc64_ksyms.c +++ b/arch/sparc64/kernel/sparc64_ksyms.c @@ -124,10 +124,6 @@ EXPORT_SYMBOL(__write_lock); EXPORT_SYMBOL(__write_unlock); EXPORT_SYMBOL(__write_trylock); -/* CPU online map and active count. */ -EXPORT_SYMBOL(cpu_online_map); -EXPORT_SYMBOL(phys_cpu_present_map); - EXPORT_SYMBOL(smp_call_function); #endif /* CONFIG_SMP */ diff --git a/arch/sparc64/prom/misc.c b/arch/sparc64/prom/misc.c index 72d272c9de6..33c5b7da31e 100644 --- a/arch/sparc64/prom/misc.c +++ b/arch/sparc64/prom/misc.c @@ -96,6 +96,10 @@ void prom_cmdline(void) */ void prom_halt(void) { +#ifdef CONFIG_SUN_LDOMS + if (ldom_domaining_enabled) + ldom_power_off(); +#endif again: p1275_cmd("exit", P1275_INOUT(0, 0)); goto again; /* PROM is out to get me -DaveM */ @@ -103,6 +107,10 @@ again: void prom_halt_power_off(void) { +#ifdef CONFIG_SUN_LDOMS + if (ldom_domaining_enabled) + ldom_power_off(); +#endif p1275_cmd("SUNW,power-off", P1275_INOUT(0, 0)); /* if nothing else helps, we just halt */ diff --git a/arch/sparc64/prom/p1275.c b/arch/sparc64/prom/p1275.c index 2b32c489860..7fcccc0e19c 100644 --- a/arch/sparc64/prom/p1275.c +++ b/arch/sparc64/prom/p1275.c @@ -16,6 +16,7 @@ #include #include #include +#include struct { long prom_callback; /* 0x00 */ diff --git a/include/asm-sparc64/cpudata.h b/include/asm-sparc64/cpudata.h index 445026fbec3..0016d8b4531 100644 --- a/include/asm-sparc64/cpudata.h +++ b/include/asm-sparc64/cpudata.h @@ -80,7 +80,8 @@ struct trap_per_cpu { unsigned int dev_mondo_qmask; unsigned int resum_qmask; unsigned int nonresum_qmask; - unsigned int __pad2[3]; + unsigned int __pad2[1]; + void *hdesc; } __attribute__((aligned(64))); extern struct trap_per_cpu trap_block[NR_CPUS]; extern void init_cur_cpu_trap(struct thread_info *); diff --git a/include/asm-sparc64/hvtramp.h b/include/asm-sparc64/hvtramp.h new file mode 100644 index 00000000000..c7dd6ad056d --- /dev/null +++ b/include/asm-sparc64/hvtramp.h @@ -0,0 +1,37 @@ +#ifndef _SPARC64_HVTRAP_H +#define _SPARC64_HVTRAP_H + +#ifndef __ASSEMBLY__ + +#include + +struct hvtramp_mapping { + __u64 vaddr; + __u64 tte; +}; + +struct hvtramp_descr { + __u32 cpu; + __u32 num_mappings; + __u64 fault_info_va; + __u64 fault_info_pa; + __u64 thread_reg; + struct hvtramp_mapping maps[2]; +}; + +extern void hv_cpu_startup(unsigned long hvdescr_pa); + +#endif + +#define HVTRAMP_DESCR_CPU 0x00 +#define HVTRAMP_DESCR_NUM_MAPPINGS 0x04 +#define HVTRAMP_DESCR_FAULT_INFO_VA 0x08 +#define HVTRAMP_DESCR_FAULT_INFO_PA 0x10 +#define HVTRAMP_DESCR_THREAD_REG 0x18 +#define HVTRAMP_DESCR_MAPS 0x20 + +#define HVTRAMP_MAPPING_VADDR 0x00 +#define HVTRAMP_MAPPING_TTE 0x08 +#define HVTRAMP_MAPPING_SIZE 0x10 + +#endif /* _SPARC64_HVTRAP_H */ diff --git a/include/asm-sparc64/hypervisor.h b/include/asm-sparc64/hypervisor.h index db2130a95d6..524d49835df 100644 --- a/include/asm-sparc64/hypervisor.h +++ b/include/asm-sparc64/hypervisor.h @@ -98,7 +98,7 @@ #define HV_FAST_MACH_EXIT 0x00 #ifndef __ASSEMBLY__ -extern void sun4v_mach_exit(unsigned long exit_core); +extern void sun4v_mach_exit(unsigned long exit_code); #endif /* Domain services. */ diff --git a/include/asm-sparc64/ldc.h b/include/asm-sparc64/ldc.h index a21996c6b15..8d17bd6bd5d 100644 --- a/include/asm-sparc64/ldc.h +++ b/include/asm-sparc64/ldc.h @@ -6,6 +6,8 @@ extern int ldom_domaining_enabled; extern void ldom_set_var(const char *var, const char *value); extern void ldom_reboot(const char *boot_command); +extern void ldom_power_off(void); +extern void ldom_startcpu_cpuid(unsigned int cpu, unsigned long thread_reg); /* The event handler will be evoked when link state changes * or data becomes available on the receive side. diff --git a/include/asm-sparc64/mdesc.h b/include/asm-sparc64/mdesc.h index dc372df23fb..e97c4313375 100644 --- a/include/asm-sparc64/mdesc.h +++ b/include/asm-sparc64/mdesc.h @@ -2,6 +2,7 @@ #define _SPARC64_MDESC_H #include +#include #include struct mdesc_handle; @@ -60,6 +61,8 @@ extern u64 mdesc_arc_target(struct mdesc_handle *hp, u64 arc); extern void mdesc_update(void); +extern void mdesc_fill_in_cpu_data(cpumask_t mask); + extern void sun4v_mdesc_init(void); #endif diff --git a/include/asm-sparc64/smp.h b/include/asm-sparc64/smp.h index 4fb8c4bfb84..c42c5a035c7 100644 --- a/include/asm-sparc64/smp.h +++ b/include/asm-sparc64/smp.h @@ -29,9 +29,6 @@ #include #include -extern cpumask_t phys_cpu_present_map; -#define cpu_possible_map phys_cpu_present_map - extern cpumask_t cpu_sibling_map[NR_CPUS]; extern cpumask_t cpu_core_map[NR_CPUS]; extern int sparc64_multi_core; @@ -46,6 +43,11 @@ extern int hard_smp_processor_id(void); extern void smp_fill_in_sib_core_maps(void); extern unsigned char boot_cpu_id; +#ifdef CONFIG_HOTPLUG_CPU +extern int __cpu_disable(void); +extern void __cpu_die(unsigned int cpu); +#endif + #endif /* !(__ASSEMBLY__) */ #else -- cgit v1.2.3-70-g09d2 From b14f5c100ce4c63e4c5a71ab47e71cf4a1caa9e3 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 14 Jul 2007 00:45:16 -0700 Subject: [SPARC64]: Fix build regressions added by dr-cpu changes. Do not select HOTPLUG_CPU from SUN_LDOMS, that causes HOTPLUG_CPU to be selected even on non-SMP which is illegal. Only build hvtramp.o when SMP, just like trampoline.o Protect dr-cpu code in ds.c with HOTPLUG_CPU. Likewise move ldom_startcpu_cpuid() to smp.c and protect it and the call site with SUN_LDOMS && HOTPLUG_CPU. Signed-off-by: David S. Miller --- arch/sparc64/Kconfig | 1 - arch/sparc64/kernel/Makefile | 5 ++-- arch/sparc64/kernel/ds.c | 54 +++++--------------------------------------- arch/sparc64/kernel/smp.c | 52 +++++++++++++++++++++++++++++++++++++++++- include/asm-sparc64/ldc.h | 1 - 5 files changed, 59 insertions(+), 54 deletions(-) (limited to 'include') diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig index 3c2e3397caf..b84b6af1241 100644 --- a/arch/sparc64/Kconfig +++ b/arch/sparc64/Kconfig @@ -316,7 +316,6 @@ config SUN_IO config SUN_LDOMS bool "Sun Logical Domains support" - select HOTPLUG_CPU help Say Y here is you want to support virtual devices via Logical Domains. diff --git a/arch/sparc64/kernel/Makefile b/arch/sparc64/kernel/Makefile index 62db93c148c..b66876bf410 100644 --- a/arch/sparc64/kernel/Makefile +++ b/arch/sparc64/kernel/Makefile @@ -12,14 +12,13 @@ obj-y := process.o setup.o cpu.o idprom.o \ irq.o ptrace.o time.o sys_sparc.o signal.o \ unaligned.o central.o pci.o starfire.o semaphore.o \ power.o sbus.o iommu_common.o sparc64_ksyms.o chmc.o \ - visemul.o prom.o of_device.o hvapi.o sstate.o mdesc.o \ - hvtramp.o + visemul.o prom.o of_device.o hvapi.o sstate.o mdesc.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-$(CONFIG_PCI) += ebus.o isa.o pci_common.o pci_iommu.o \ pci_psycho.o pci_sabre.o pci_schizo.o \ pci_sun4v.o pci_sun4v_asm.o pci_fire.o -obj-$(CONFIG_SMP) += smp.o trampoline.o +obj-$(CONFIG_SMP) += smp.o trampoline.o hvtramp.o obj-$(CONFIG_SPARC32_COMPAT) += sys32.o sys_sparc32.o signal32.o obj-$(CONFIG_BINFMT_ELF32) += binfmt_elf32.o obj-$(CONFIG_BINFMT_AOUT32) += binfmt_aout32.o diff --git a/arch/sparc64/kernel/ds.c b/arch/sparc64/kernel/ds.c index b82c03a25d9..2e4114fba14 100644 --- a/arch/sparc64/kernel/ds.c +++ b/arch/sparc64/kernel/ds.c @@ -148,9 +148,11 @@ static void domain_shutdown_data(struct ldc_channel *lp, static void domain_panic_data(struct ldc_channel *lp, struct ds_cap_state *cp, void *buf, int len); +#ifdef CONFIG_HOTPLUG_CPU static void dr_cpu_data(struct ldc_channel *lp, struct ds_cap_state *cp, void *buf, int len); +#endif static void ds_pri_data(struct ldc_channel *lp, struct ds_cap_state *cp, void *buf, int len); @@ -171,10 +173,12 @@ struct ds_cap_state ds_states[] = { .service_id = "domain-panic", .data = domain_panic_data, }, +#ifdef CONFIG_HOTPLUG_CPU { .service_id = "dr-cpu", .data = dr_cpu_data, }, +#endif { .service_id = "pri", .data = ds_pri_data, @@ -355,6 +359,7 @@ static void domain_panic_data(struct ldc_channel *lp, panic("PANIC requested by LDOM manager."); } +#ifdef CONFIG_HOTPLUG_CPU struct dr_cpu_tag { __u64 req_num; __u32 type; @@ -395,54 +400,6 @@ static unsigned long kimage_addr_to_ra(void *p) return kern_base + (val - KERNBASE); } -void ldom_startcpu_cpuid(unsigned int cpu, unsigned long thread_reg) -{ - extern unsigned long sparc64_ttable_tl0; - extern unsigned long kern_locked_tte_data; - extern int bigkernel; - struct hvtramp_descr *hdesc; - unsigned long trampoline_ra; - struct trap_per_cpu *tb; - u64 tte_vaddr, tte_data; - unsigned long hv_err; - - hdesc = kzalloc(sizeof(*hdesc), GFP_KERNEL); - if (!hdesc) { - printk(KERN_ERR PFX "ldom_startcpu_cpuid: Cannot allocate " - "hvtramp_descr.\n"); - return; - } - - hdesc->cpu = cpu; - hdesc->num_mappings = (bigkernel ? 2 : 1); - - tb = &trap_block[cpu]; - tb->hdesc = hdesc; - - hdesc->fault_info_va = (unsigned long) &tb->fault_info; - hdesc->fault_info_pa = kimage_addr_to_ra(&tb->fault_info); - - hdesc->thread_reg = thread_reg; - - tte_vaddr = (unsigned long) KERNBASE; - tte_data = kern_locked_tte_data; - - hdesc->maps[0].vaddr = tte_vaddr; - hdesc->maps[0].tte = tte_data; - if (bigkernel) { - tte_vaddr += 0x400000; - tte_data += 0x400000; - hdesc->maps[1].vaddr = tte_vaddr; - hdesc->maps[1].tte = tte_data; - } - - trampoline_ra = kimage_addr_to_ra(hv_cpu_startup); - - hv_err = sun4v_cpu_start(cpu, trampoline_ra, - kimage_addr_to_ra(&sparc64_ttable_tl0), - __pa(hdesc)); -} - /* DR cpu requests get queued onto the work list by the * dr_cpu_data() callback. The list is protected by * ds_lock, and processed by dr_cpu_process() in order. @@ -704,6 +661,7 @@ static void dr_cpu_data(struct ldc_channel *lp, schedule_work(&dr_cpu_work); } } +#endif struct ds_pri_msg { __u64 req_num; diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c index 315eef0869b..833b284616a 100644 --- a/arch/sparc64/kernel/smp.c +++ b/arch/sparc64/kernel/smp.c @@ -281,6 +281,56 @@ static void smp_synchronize_one_tick(int cpu) spin_unlock_irqrestore(&itc_sync_lock, flags); } +#if defined(CONFIG_SUN_LDOMS) && defined(CONFIG_HOTPLUG_CPU) +static void ldom_startcpu_cpuid(unsigned int cpu, unsigned long thread_reg) +{ + extern unsigned long sparc64_ttable_tl0; + extern unsigned long kern_locked_tte_data; + extern int bigkernel; + struct hvtramp_descr *hdesc; + unsigned long trampoline_ra; + struct trap_per_cpu *tb; + u64 tte_vaddr, tte_data; + unsigned long hv_err; + + hdesc = kzalloc(sizeof(*hdesc), GFP_KERNEL); + if (!hdesc) { + printk(KERN_ERR PFX "ldom_startcpu_cpuid: Cannot allocate " + "hvtramp_descr.\n"); + return; + } + + hdesc->cpu = cpu; + hdesc->num_mappings = (bigkernel ? 2 : 1); + + tb = &trap_block[cpu]; + tb->hdesc = hdesc; + + hdesc->fault_info_va = (unsigned long) &tb->fault_info; + hdesc->fault_info_pa = kimage_addr_to_ra(&tb->fault_info); + + hdesc->thread_reg = thread_reg; + + tte_vaddr = (unsigned long) KERNBASE; + tte_data = kern_locked_tte_data; + + hdesc->maps[0].vaddr = tte_vaddr; + hdesc->maps[0].tte = tte_data; + if (bigkernel) { + tte_vaddr += 0x400000; + tte_data += 0x400000; + hdesc->maps[1].vaddr = tte_vaddr; + hdesc->maps[1].tte = tte_data; + } + + trampoline_ra = kimage_addr_to_ra(hv_cpu_startup); + + hv_err = sun4v_cpu_start(cpu, trampoline_ra, + kimage_addr_to_ra(&sparc64_ttable_tl0), + __pa(hdesc)); +} +#endif + extern void sun4v_init_mondo_queues(int use_bootmem, int cpu, int alloc, int load); extern unsigned long sparc64_cpu_startup; @@ -309,7 +359,7 @@ static int __devinit smp_boot_one_cpu(unsigned int cpu) /* Alloc the mondo queues, cpu will load them. */ sun4v_init_mondo_queues(0, cpu, 1, 0); -#ifdef CONFIG_SUN_LDOMS +#if defined(CONFIG_SUN_LDOMS) && defined(CONFIG_HOTPLUG_CPU) if (ldom_domaining_enabled) ldom_startcpu_cpuid(cpu, (unsigned long) cpu_new_thread); diff --git a/include/asm-sparc64/ldc.h b/include/asm-sparc64/ldc.h index 8d17bd6bd5d..bdb524a7b81 100644 --- a/include/asm-sparc64/ldc.h +++ b/include/asm-sparc64/ldc.h @@ -7,7 +7,6 @@ extern int ldom_domaining_enabled; extern void ldom_set_var(const char *var, const char *value); extern void ldom_reboot(const char *boot_command); extern void ldom_power_off(void); -extern void ldom_startcpu_cpuid(unsigned int cpu, unsigned long thread_reg); /* The event handler will be evoked when link state changes * or data becomes available on the receive side. -- cgit v1.2.3-70-g09d2 From 8b99cfb8cc51adae7f5294c8962a026c63100959 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 14 Jul 2007 02:23:37 -0700 Subject: [SPARC64]: More sensible udelay implementation. Take a page from the powerpc folks and just calculate the delay factor directly. Since frequency scaling chips use a system-tick register, the value is going to be the same system-wide. Signed-off-by: David S. Miller --- arch/sparc64/kernel/setup.c | 3 --- arch/sparc64/kernel/smp.c | 24 ------------------- arch/sparc64/kernel/sparc64_ksyms.c | 12 ++-------- arch/sparc64/kernel/sysfs.c | 2 -- arch/sparc64/kernel/time.c | 28 ++++++++++++++++------ arch/sparc64/lib/Makefile | 2 +- arch/sparc64/lib/delay.c | 46 ------------------------------------- include/asm-sparc64/bugs.h | 5 ---- include/asm-sparc64/cpudata.h | 2 +- include/asm-sparc64/delay.h | 32 +++++--------------------- 10 files changed, 31 insertions(+), 125 deletions(-) delete mode 100644 arch/sparc64/lib/delay.c (limited to 'include') diff --git a/arch/sparc64/kernel/setup.c b/arch/sparc64/kernel/setup.c index 7490cc670a5..55db632999f 100644 --- a/arch/sparc64/kernel/setup.c +++ b/arch/sparc64/kernel/setup.c @@ -442,7 +442,6 @@ static int show_cpuinfo(struct seq_file *m, void *__unused) "D$ parity tl1\t: %u\n" "I$ parity tl1\t: %u\n" #ifndef CONFIG_SMP - "Cpu0Bogo\t: %lu.%02lu\n" "Cpu0ClkTck\t: %016lx\n" #endif , @@ -457,8 +456,6 @@ static int show_cpuinfo(struct seq_file *m, void *__unused) dcache_parity_tl1_occurred, icache_parity_tl1_occurred #ifndef CONFIG_SMP - , cpu_data(0).udelay_val/(500000/HZ), - (cpu_data(0).udelay_val/(5000/HZ)) % 100, cpu_data(0).clock_tick #endif ); diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c index 9d02b3a9bb8..69a1183c622 100644 --- a/arch/sparc64/kernel/smp.c +++ b/arch/sparc64/kernel/smp.c @@ -49,9 +49,6 @@ extern void calibrate_delay(void); int sparc64_multi_core __read_mostly; -/* Please don't make this stuff initdata!!! --DaveM */ -unsigned char boot_cpu_id; - cpumask_t cpu_possible_map __read_mostly = CPU_MASK_NONE; cpumask_t cpu_online_map __read_mostly = CPU_MASK_NONE; cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly = @@ -82,10 +79,7 @@ void smp_bogo(struct seq_file *m) for_each_online_cpu(i) seq_printf(m, - "Cpu%dBogo\t: %lu.%02lu\n" "Cpu%dClkTck\t: %016lx\n", - i, cpu_data(i).udelay_val / (500000/HZ), - (cpu_data(i).udelay_val / (5000/HZ)) % 100, i, cpu_data(i).clock_tick); } @@ -112,8 +106,6 @@ void __devinit smp_callin(void) local_irq_enable(); - calibrate_delay(); - cpu_data(cpuid).udelay_val = loops_per_jiffy; callin_flag = 1; __asm__ __volatile__("membar #Sync\n\t" "flush %%g6" : : : "memory"); @@ -1231,11 +1223,6 @@ void smp_penguin_jailcell(int irq, struct pt_regs *regs) preempt_enable(); } -void __init smp_tick_init(void) -{ - boot_cpu_id = hard_smp_processor_id(); -} - /* /proc/profile writes can call this, don't __init it please. */ int setup_profiling_timer(unsigned int multiplier) { @@ -1244,7 +1231,6 @@ int setup_profiling_timer(unsigned int multiplier) void __init smp_prepare_cpus(unsigned int max_cpus) { - cpu_data(boot_cpu_id).udelay_val = loops_per_jiffy; } void __devinit smp_prepare_boot_cpu(void) @@ -1323,16 +1309,6 @@ void __cpu_die(unsigned int cpu) void __init smp_cpus_done(unsigned int max_cpus) { - unsigned long bogosum = 0; - int i; - - for_each_online_cpu(i) - bogosum += cpu_data(i).udelay_val; - printk("Total of %ld processors activated " - "(%lu.%02lu BogoMIPS).\n", - (long) num_online_cpus(), - bogosum/(500000/HZ), - (bogosum/(5000/HZ))%100); } void smp_send_reschedule(int cpu) diff --git a/arch/sparc64/kernel/sparc64_ksyms.c b/arch/sparc64/kernel/sparc64_ksyms.c index 51e059e36d4..719d676c2dd 100644 --- a/arch/sparc64/kernel/sparc64_ksyms.c +++ b/arch/sparc64/kernel/sparc64_ksyms.c @@ -1,7 +1,6 @@ -/* $Id: sparc64_ksyms.c,v 1.121 2002/02/09 19:49:31 davem Exp $ - * arch/sparc64/kernel/sparc64_ksyms.c: Sparc64 specific ksyms support. +/* arch/sparc64/kernel/sparc64_ksyms.c: Sparc64 specific ksyms support. * - * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) + * Copyright (C) 1996, 2007 David S. Miller (davem@davemloft.net) * Copyright (C) 1996 Eddie C. Dost (ecd@skynet.be) * Copyright (C) 1999 Jakub Jelinek (jj@ultra.linux.cz) */ @@ -28,7 +27,6 @@ #include #include -#include #include #include #include @@ -326,12 +324,6 @@ EXPORT_SYMBOL(memset); EXPORT_SYMBOL(memmove); EXPORT_SYMBOL(strncmp); -/* Delay routines. */ -EXPORT_SYMBOL(__udelay); -EXPORT_SYMBOL(__ndelay); -EXPORT_SYMBOL(__const_udelay); -EXPORT_SYMBOL(__delay); - void VISenter(void); /* RAID code needs this */ EXPORT_SYMBOL(VISenter); diff --git a/arch/sparc64/kernel/sysfs.c b/arch/sparc64/kernel/sysfs.c index cdb1477af89..52816c7be0b 100644 --- a/arch/sparc64/kernel/sysfs.c +++ b/arch/sparc64/kernel/sysfs.c @@ -193,7 +193,6 @@ static ssize_t show_##NAME(struct sys_device *dev, char *buf) \ } SHOW_CPUDATA_ULONG_NAME(clock_tick, clock_tick); -SHOW_CPUDATA_ULONG_NAME(udelay_val, udelay_val); SHOW_CPUDATA_UINT_NAME(l1_dcache_size, dcache_size); SHOW_CPUDATA_UINT_NAME(l1_dcache_line_size, dcache_line_size); SHOW_CPUDATA_UINT_NAME(l1_icache_size, icache_size); @@ -203,7 +202,6 @@ SHOW_CPUDATA_UINT_NAME(l2_cache_line_size, ecache_line_size); static struct sysdev_attribute cpu_core_attrs[] = { _SYSDEV_ATTR(clock_tick, 0444, show_clock_tick, NULL), - _SYSDEV_ATTR(udelay_val, 0444, show_udelay_val, NULL), _SYSDEV_ATTR(l1_dcache_size, 0444, show_l1_dcache_size, NULL), _SYSDEV_ATTR(l1_dcache_line_size, 0444, show_l1_dcache_line_size, NULL), _SYSDEV_ATTR(l1_icache_size, 0444, show_l1_icache_size, NULL), diff --git a/arch/sparc64/kernel/time.c b/arch/sparc64/kernel/time.c index a31a0439244..62e316ab133 100644 --- a/arch/sparc64/kernel/time.c +++ b/arch/sparc64/kernel/time.c @@ -849,9 +849,6 @@ static unsigned long sparc64_init_timers(void) { struct device_node *dp; unsigned long clock; -#ifdef CONFIG_SMP - extern void smp_tick_init(void); -#endif dp = of_find_node_by_path("/"); if (tlb_type == spitfire) { @@ -874,10 +871,6 @@ static unsigned long sparc64_init_timers(void) clock = of_getintprop_default(dp, "stick-frequency", 0); } -#ifdef CONFIG_SMP - smp_tick_init(); -#endif - return clock; } @@ -1038,10 +1031,31 @@ static void __init setup_clockevent_multiplier(unsigned long hz) sparc64_clockevent.mult = mult; } +static unsigned long tb_ticks_per_usec __read_mostly; + +void __delay(unsigned long loops) +{ + unsigned long bclock, now; + + bclock = tick_ops->get_tick(); + do { + now = tick_ops->get_tick(); + } while ((now-bclock) < loops); +} +EXPORT_SYMBOL(__delay); + +void udelay(unsigned long usecs) +{ + __delay(tb_ticks_per_usec * usecs); +} +EXPORT_SYMBOL(udelay); + void __init time_init(void) { unsigned long clock = sparc64_init_timers(); + tb_ticks_per_usec = clock / USEC_PER_SEC; + timer_ticks_per_nsec_quotient = clocksource_hz2mult(clock, SPARC64_NSEC_PER_CYC_SHIFT); diff --git a/arch/sparc64/lib/Makefile b/arch/sparc64/lib/Makefile index 4a725d8985f..c4a6d6e7d03 100644 --- a/arch/sparc64/lib/Makefile +++ b/arch/sparc64/lib/Makefile @@ -14,6 +14,6 @@ lib-y := PeeCeeI.o copy_page.o clear_page.o strlen.o strncmp.o \ NGmemcpy.o NGcopy_from_user.o NGcopy_to_user.o NGpatch.o \ NGpage.o NGbzero.o \ copy_in_user.o user_fixup.o memmove.o \ - mcount.o ipcsum.o rwsem.o xor.o delay.o + mcount.o ipcsum.o rwsem.o xor.o obj-y += iomap.o diff --git a/arch/sparc64/lib/delay.c b/arch/sparc64/lib/delay.c deleted file mode 100644 index fb27e54a03e..00000000000 --- a/arch/sparc64/lib/delay.c +++ /dev/null @@ -1,46 +0,0 @@ -/* delay.c: Delay loops for sparc64 - * - * Copyright (C) 2004, 2006 David S. Miller - * - * Based heavily upon x86 variant which is: - * Copyright (C) 1993 Linus Torvalds - * Copyright (C) 1997 Martin Mares - */ - -#include -#include - -void __delay(unsigned long loops) -{ - unsigned long bclock, now; - - bclock = tick_ops->get_tick(); - do { - now = tick_ops->get_tick(); - } while ((now-bclock) < loops); -} - -/* We used to multiply by HZ after shifting down by 32 bits - * but that runs into problems for higher values of HZ and - * slow cpus. - */ -void __const_udelay(unsigned long n) -{ - n *= 4; - - n *= (cpu_data(raw_smp_processor_id()).udelay_val * (HZ/4)); - n >>= 32; - - __delay(n + 1); -} - -void __udelay(unsigned long n) -{ - __const_udelay(n * 0x10c7UL); -} - - -void __ndelay(unsigned long n) -{ - __const_udelay(n * 0x5UL); -} diff --git a/include/asm-sparc64/bugs.h b/include/asm-sparc64/bugs.h index bf39d86c0c9..11ade684197 100644 --- a/include/asm-sparc64/bugs.h +++ b/include/asm-sparc64/bugs.h @@ -4,12 +4,7 @@ */ #include -extern unsigned long loops_per_jiffy; - static void __init check_bugs(void) { -#ifndef CONFIG_SMP - cpu_data(0).udelay_val = loops_per_jiffy; -#endif sstate_running(); } diff --git a/include/asm-sparc64/cpudata.h b/include/asm-sparc64/cpudata.h index 0016d8b4531..98a6e609163 100644 --- a/include/asm-sparc64/cpudata.h +++ b/include/asm-sparc64/cpudata.h @@ -19,7 +19,7 @@ typedef struct { unsigned int __softirq_pending; /* must be 1st, see rtrap.S */ unsigned int __pad0; unsigned long clock_tick; /* %tick's per second */ - unsigned long udelay_val; + unsigned long __pad; unsigned int __pad1; unsigned int __pad2; diff --git a/include/asm-sparc64/delay.h b/include/asm-sparc64/delay.h index a4aae6f8062..a77aa622d76 100644 --- a/include/asm-sparc64/delay.h +++ b/include/asm-sparc64/delay.h @@ -1,37 +1,17 @@ /* delay.h: Linux delay routines on sparc64. * - * Copyright (C) 1996, 2004 David S. Miller (davem@davemloft.net). - * - * Based heavily upon x86 variant which is: - * Copyright (C) 1993 Linus Torvalds - * - * Delay routines calling functions in arch/sparc64/lib/delay.c + * Copyright (C) 1996, 2004, 2007 David S. Miller (davem@davemloft.net). */ -#ifndef __SPARC64_DELAY_H -#define __SPARC64_DELAY_H - -#include -#include +#ifndef _SPARC64_DELAY_H +#define _SPARC64_DELAY_H #ifndef __ASSEMBLY__ -extern void __bad_udelay(void); -extern void __bad_ndelay(void); - -extern void __udelay(unsigned long usecs); -extern void __ndelay(unsigned long nsecs); -extern void __const_udelay(unsigned long usecs); extern void __delay(unsigned long loops); - -#define udelay(n) (__builtin_constant_p(n) ? \ - ((n) > 20000 ? __bad_udelay() : __const_udelay((n) * 0x10c7ul)) : \ - __udelay(n)) - -#define ndelay(n) (__builtin_constant_p(n) ? \ - ((n) > 20000 ? __bad_ndelay() : __const_udelay((n) * 5ul)) : \ - __ndelay(n)) +extern void udelay(unsigned long usecs); +#define mdelay(n) udelay((n) * 1000) #endif /* !__ASSEMBLY__ */ -#endif /* defined(__SPARC64_DELAY_H) */ +#endif /* _SPARC64_DELAY_H */ -- cgit v1.2.3-70-g09d2 From e0204409df29fe1b7d18f81dfc3ae6f9d90e7a63 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 16 Jul 2007 03:49:40 -0700 Subject: [SPARC64]: dr-cpu unconfigure support. Signed-off-by: David S. Miller --- arch/sparc64/kernel/ds.c | 25 +++++++- arch/sparc64/kernel/irq.c | 20 +++++++ arch/sparc64/kernel/process.c | 21 +++++-- arch/sparc64/kernel/smp.c | 118 ++++++++++++++++++++++++++++++++++---- include/asm-sparc64/irq.h | 2 + include/asm-sparc64/mmu_context.h | 3 + include/asm-sparc64/smp.h | 3 +- 7 files changed, 171 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/arch/sparc64/kernel/ds.c b/arch/sparc64/kernel/ds.c index b6331718ee0..1c587107cef 100644 --- a/arch/sparc64/kernel/ds.c +++ b/arch/sparc64/kernel/ds.c @@ -20,6 +20,7 @@ #include #include #include +#include #define DRV_MODULE_NAME "ds" #define PFX DRV_MODULE_NAME ": " @@ -559,6 +560,9 @@ static int dr_cpu_configure(struct ds_cap_state *cp, u64 req_num, kfree(resp); + /* Redistribute IRQs, taking into account the new cpus. */ + fixup_irqs(); + return 0; } @@ -566,7 +570,8 @@ static int dr_cpu_unconfigure(struct ds_cap_state *cp, u64 req_num, cpumask_t *mask) { struct ds_data *resp; - int resp_len, ncpus; + int resp_len, ncpus, cpu; + unsigned long flags; ncpus = cpus_weight(*mask); resp_len = dr_cpu_size_response(ncpus); @@ -578,9 +583,25 @@ static int dr_cpu_unconfigure(struct ds_cap_state *cp, u64 req_num, resp_len, ncpus, mask, DR_CPU_STAT_UNCONFIGURED); + for_each_cpu_mask(cpu, *mask) { + int err; + + printk(KERN_INFO PFX "CPU[%d]: Shutting down cpu %d...\n", + smp_processor_id(), cpu); + err = cpu_down(cpu); + if (err) + dr_cpu_mark(resp, cpu, ncpus, + DR_CPU_RES_FAILURE, + DR_CPU_STAT_CONFIGURED); + } + + spin_lock_irqsave(&ds_lock, flags); + ds_send(ds_info->lp, resp, resp_len); + spin_unlock_irqrestore(&ds_lock, flags); + kfree(resp); - return -EOPNOTSUPP; + return 0; } static void process_dr_cpu_list(struct ds_cap_state *cp) diff --git a/arch/sparc64/kernel/irq.c b/arch/sparc64/kernel/irq.c index a1c916f35ba..8cb3358674f 100644 --- a/arch/sparc64/kernel/irq.c +++ b/arch/sparc64/kernel/irq.c @@ -803,6 +803,26 @@ void handler_irq(int irq, struct pt_regs *regs) set_irq_regs(old_regs); } +#ifdef CONFIG_HOTPLUG_CPU +void fixup_irqs(void) +{ + unsigned int irq; + + for (irq = 0; irq < NR_IRQS; irq++) { + unsigned long flags; + + spin_lock_irqsave(&irq_desc[irq].lock, flags); + if (irq_desc[irq].action && + !(irq_desc[irq].status & IRQ_PER_CPU)) { + if (irq_desc[irq].chip->set_affinity) + irq_desc[irq].chip->set_affinity(irq, + irq_desc[irq].affinity); + } + spin_unlock_irqrestore(&irq_desc[irq].lock, flags); + } +} +#endif + struct sun5_timer { u64 count0; u64 limit0; diff --git a/arch/sparc64/kernel/process.c b/arch/sparc64/kernel/process.c index f5f97e2c669..93557507ec9 100644 --- a/arch/sparc64/kernel/process.c +++ b/arch/sparc64/kernel/process.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -49,7 +50,7 @@ /* #define VERBOSE_SHOWREGS */ -static void sparc64_yield(void) +static void sparc64_yield(int cpu) { if (tlb_type != hypervisor) return; @@ -57,7 +58,7 @@ static void sparc64_yield(void) clear_thread_flag(TIF_POLLING_NRFLAG); smp_mb__after_clear_bit(); - while (!need_resched()) { + while (!need_resched() && !cpu_is_offline(cpu)) { unsigned long pstate; /* Disable interrupts. */ @@ -68,7 +69,7 @@ static void sparc64_yield(void) : "=&r" (pstate) : "i" (PSTATE_IE)); - if (!need_resched()) + if (!need_resched() && !cpu_is_offline(cpu)) sun4v_cpu_yield(); /* Re-enable interrupts. */ @@ -86,15 +87,25 @@ static void sparc64_yield(void) /* The idle loop on sparc64. */ void cpu_idle(void) { + int cpu = smp_processor_id(); + set_thread_flag(TIF_POLLING_NRFLAG); while(1) { tick_nohz_stop_sched_tick(); - while (!need_resched()) - sparc64_yield(); + + while (!need_resched() && !cpu_is_offline(cpu)) + sparc64_yield(cpu); + tick_nohz_restart_sched_tick(); preempt_enable_no_resched(); + +#ifdef CONFIG_HOTPLUG_CPU + if (cpu_is_offline(cpu)) + cpu_play_dead(); +#endif + schedule(); preempt_disable(); } diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c index e038ae65cb6..b448d33321c 100644 --- a/arch/sparc64/kernel/smp.c +++ b/arch/sparc64/kernel/smp.c @@ -44,6 +44,7 @@ #include #include #include +#include extern void calibrate_delay(void); @@ -62,7 +63,6 @@ EXPORT_SYMBOL(cpu_sibling_map); EXPORT_SYMBOL(cpu_core_map); static cpumask_t smp_commenced_mask; -static cpumask_t cpu_callout_map; void smp_info(struct seq_file *m) { @@ -83,6 +83,8 @@ void smp_bogo(struct seq_file *m) i, cpu_data(i).clock_tick); } +static __cacheline_aligned_in_smp DEFINE_SPINLOCK(call_lock); + extern void setup_sparc64_timer(void); static volatile unsigned long callin_flag = 0; @@ -121,7 +123,9 @@ void __devinit smp_callin(void) while (!cpu_isset(cpuid, smp_commenced_mask)) rmb(); + spin_lock(&call_lock); cpu_set(cpuid, cpu_online_map); + spin_unlock(&call_lock); /* idle thread is expected to have preempt disabled */ preempt_disable(); @@ -324,6 +328,9 @@ static void ldom_startcpu_cpuid(unsigned int cpu, unsigned long thread_reg) hv_err = sun4v_cpu_start(cpu, trampoline_ra, kimage_addr_to_ra(&sparc64_ttable_tl0), __pa(hdesc)); + if (hv_err) + printk(KERN_ERR "ldom_startcpu_cpuid: sun4v_cpu_start() " + "gives error %lu\n", hv_err); } #endif @@ -350,7 +357,6 @@ static int __devinit smp_boot_one_cpu(unsigned int cpu) p = fork_idle(cpu); callin_flag = 0; cpu_new_thread = task_thread_info(p); - cpu_set(cpu, cpu_callout_map); if (tlb_type == hypervisor) { /* Alloc the mondo queues, cpu will load them. */ @@ -379,7 +385,6 @@ static int __devinit smp_boot_one_cpu(unsigned int cpu) ret = 0; } else { printk("Processor %d is stuck.\n", cpu); - cpu_clear(cpu, cpu_callout_map); ret = -ENODEV; } cpu_new_thread = NULL; @@ -791,7 +796,6 @@ struct call_data_struct { int wait; }; -static __cacheline_aligned_in_smp DEFINE_SPINLOCK(call_lock); static struct call_data_struct *call_data; extern unsigned long xcall_call_function; @@ -1241,7 +1245,7 @@ void __devinit smp_fill_in_sib_core_maps(void) { unsigned int i; - for_each_possible_cpu(i) { + for_each_present_cpu(i) { unsigned int j; cpus_clear(cpu_core_map[i]); @@ -1250,14 +1254,14 @@ void __devinit smp_fill_in_sib_core_maps(void) continue; } - for_each_possible_cpu(j) { + for_each_present_cpu(j) { if (cpu_data(i).core_id == cpu_data(j).core_id) cpu_set(j, cpu_core_map[i]); } } - for_each_possible_cpu(i) { + for_each_present_cpu(i) { unsigned int j; cpus_clear(cpu_sibling_map[i]); @@ -1266,7 +1270,7 @@ void __devinit smp_fill_in_sib_core_maps(void) continue; } - for_each_possible_cpu(j) { + for_each_present_cpu(j) { if (cpu_data(i).proc_id == cpu_data(j).proc_id) cpu_set(j, cpu_sibling_map[i]); @@ -1296,16 +1300,106 @@ int __cpuinit __cpu_up(unsigned int cpu) } #ifdef CONFIG_HOTPLUG_CPU +void cpu_play_dead(void) +{ + int cpu = smp_processor_id(); + unsigned long pstate; + + idle_task_exit(); + + if (tlb_type == hypervisor) { + struct trap_per_cpu *tb = &trap_block[cpu]; + + sun4v_cpu_qconf(HV_CPU_QUEUE_CPU_MONDO, + tb->cpu_mondo_pa, 0); + sun4v_cpu_qconf(HV_CPU_QUEUE_DEVICE_MONDO, + tb->dev_mondo_pa, 0); + sun4v_cpu_qconf(HV_CPU_QUEUE_RES_ERROR, + tb->resum_mondo_pa, 0); + sun4v_cpu_qconf(HV_CPU_QUEUE_NONRES_ERROR, + tb->nonresum_mondo_pa, 0); + } + + cpu_clear(cpu, smp_commenced_mask); + membar_safe("#Sync"); + + local_irq_disable(); + + __asm__ __volatile__( + "rdpr %%pstate, %0\n\t" + "wrpr %0, %1, %%pstate" + : "=r" (pstate) + : "i" (PSTATE_IE)); + + while (1) + barrier(); +} + int __cpu_disable(void) { - printk(KERN_ERR "SMP: __cpu_disable() on cpu %d\n", - smp_processor_id()); - return -ENODEV; + int cpu = smp_processor_id(); + cpuinfo_sparc *c; + int i; + + for_each_cpu_mask(i, cpu_core_map[cpu]) + cpu_clear(cpu, cpu_core_map[i]); + cpus_clear(cpu_core_map[cpu]); + + for_each_cpu_mask(i, cpu_sibling_map[cpu]) + cpu_clear(cpu, cpu_sibling_map[i]); + cpus_clear(cpu_sibling_map[cpu]); + + c = &cpu_data(cpu); + + c->core_id = 0; + c->proc_id = -1; + + spin_lock(&call_lock); + cpu_clear(cpu, cpu_online_map); + spin_unlock(&call_lock); + + smp_wmb(); + + /* Make sure no interrupts point to this cpu. */ + fixup_irqs(); + + local_irq_enable(); + mdelay(1); + local_irq_disable(); + + return 0; } void __cpu_die(unsigned int cpu) { - printk(KERN_ERR "SMP: __cpu_die(%u)\n", cpu); + int i; + + for (i = 0; i < 100; i++) { + smp_rmb(); + if (!cpu_isset(cpu, smp_commenced_mask)) + break; + msleep(100); + } + if (cpu_isset(cpu, smp_commenced_mask)) { + printk(KERN_ERR "CPU %u didn't die...\n", cpu); + } else { +#if defined(CONFIG_SUN_LDOMS) + unsigned long hv_err; + int limit = 100; + + do { + hv_err = sun4v_cpu_stop(cpu); + if (hv_err == HV_EOK) { + cpu_clear(cpu, cpu_present_map); + break; + } + } while (--limit > 0); + if (limit <= 0) { + printk(KERN_ERR "sun4v_cpu_stop() fails err=%lu\n", + hv_err); + } +#endif + } } #endif diff --git a/include/asm-sparc64/irq.h b/include/asm-sparc64/irq.h index 90781e34a95..e6c436ef935 100644 --- a/include/asm-sparc64/irq.h +++ b/include/asm-sparc64/irq.h @@ -53,6 +53,8 @@ extern unsigned int sun4v_build_msi(u32 devhandle, unsigned int *virt_irq_p, extern void sun4v_destroy_msi(unsigned int virt_irq); extern unsigned int sbus_build_irq(void *sbus, unsigned int ino); +extern void fixup_irqs(void); + static __inline__ void set_softint(unsigned long bits) { __asm__ __volatile__("wr %0, 0x0, %%set_softint" diff --git a/include/asm-sparc64/mmu_context.h b/include/asm-sparc64/mmu_context.h index 8d129032013..9fc225ed550 100644 --- a/include/asm-sparc64/mmu_context.h +++ b/include/asm-sparc64/mmu_context.h @@ -76,6 +76,9 @@ static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, str unsigned long ctx_valid, flags; int cpu; + if (unlikely(mm == &init_mm)) + return; + spin_lock_irqsave(&mm->context.lock, flags); ctx_valid = CTX_VALID(mm->context); if (!ctx_valid) diff --git a/include/asm-sparc64/smp.h b/include/asm-sparc64/smp.h index c42c5a035c7..e8a96a31761 100644 --- a/include/asm-sparc64/smp.h +++ b/include/asm-sparc64/smp.h @@ -41,7 +41,7 @@ extern int hard_smp_processor_id(void); #define raw_smp_processor_id() (current_thread_info()->cpu) extern void smp_fill_in_sib_core_maps(void); -extern unsigned char boot_cpu_id; +extern void cpu_play_dead(void); #ifdef CONFIG_HOTPLUG_CPU extern int __cpu_disable(void); @@ -54,7 +54,6 @@ extern void __cpu_die(unsigned int cpu); #define hard_smp_processor_id() 0 #define smp_fill_in_sib_core_maps() do { } while (0) -#define boot_cpu_id (0) #endif /* !(CONFIG_SMP) */ -- cgit v1.2.3-70-g09d2