diff options
428 files changed, 17256 insertions, 10604 deletions
diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt index 9691c7f5166..0705040531a 100644 --- a/Documentation/kdump/kdump.txt +++ b/Documentation/kdump/kdump.txt @@ -65,26 +65,26 @@ Install kexec-tools 2) Download the kexec-tools user-space package from the following URL: -http://www.kernel.org/pub/linux/kernel/people/horms/kexec-tools/kexec-tools-testing.tar.gz +http://www.kernel.org/pub/linux/kernel/people/horms/kexec-tools/kexec-tools.tar.gz -This is a symlink to the latest version, which at the time of writing is -20061214, the only release of kexec-tools-testing so far. As other versions -are released, the older ones will remain available at -http://www.kernel.org/pub/linux/kernel/people/horms/kexec-tools/ +This is a symlink to the latest version. -Note: Latest kexec-tools-testing git tree is available at +The latest kexec-tools git tree is available at: -git://git.kernel.org/pub/scm/linux/kernel/git/horms/kexec-tools-testing.git +git://git.kernel.org/pub/scm/linux/kernel/git/horms/kexec-tools.git or -http://www.kernel.org/git/?p=linux/kernel/git/horms/kexec-tools-testing.git;a=summary +http://www.kernel.org/git/?p=linux/kernel/git/horms/kexec-tools.git + +More information about kexec-tools can be found at +http://www.kernel.org/pub/linux/kernel/people/horms/kexec-tools/README.html 3) Unpack the tarball with the tar command, as follows: - tar xvpzf kexec-tools-testing.tar.gz + tar xvpzf kexec-tools.tar.gz 4) Change to the kexec-tools directory, as follows: - cd kexec-tools-testing-VERSION + cd kexec-tools-VERSION 5) Configure the package, as follows: diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c index 82fafe0429f..b88b0ea54e9 100644 --- a/Documentation/lguest/lguest.c +++ b/Documentation/lguest/lguest.c @@ -36,11 +36,13 @@ #include <sched.h> #include <limits.h> #include <stddef.h> +#include <signal.h> #include "linux/lguest_launcher.h" #include "linux/virtio_config.h" #include "linux/virtio_net.h" #include "linux/virtio_blk.h" #include "linux/virtio_console.h" +#include "linux/virtio_rng.h" #include "linux/virtio_ring.h" #include "asm-x86/bootparam.h" /*L:110 We can ignore the 39 include files we need for this program, but I do @@ -64,8 +66,8 @@ typedef uint8_t u8; #endif /* We can have up to 256 pages for devices. */ #define DEVICE_PAGES 256 -/* This will occupy 2 pages: it must be a power of 2. */ -#define VIRTQUEUE_NUM 128 +/* This will occupy 3 pages: it must be a power of 2. */ +#define VIRTQUEUE_NUM 256 /*L:120 verbose is both a global flag and a macro. The C preprocessor allows * this, and although I wouldn't recommend it, it works quite nicely here. */ @@ -74,12 +76,19 @@ static bool verbose; do { if (verbose) printf(args); } while(0) /*:*/ -/* The pipe to send commands to the waker process */ -static int waker_fd; +/* File descriptors for the Waker. */ +struct { + int pipe[2]; + int lguest_fd; +} waker_fds; + /* The pointer to the start of guest memory. */ static void *guest_base; /* The maximum guest physical address allowed, and maximum possible. */ static unsigned long guest_limit, guest_max; +/* The pipe for signal hander to write to. */ +static int timeoutpipe[2]; +static unsigned int timeout_usec = 500; /* a per-cpu variable indicating whose vcpu is currently running */ static unsigned int __thread cpu_id; @@ -155,11 +164,14 @@ struct virtqueue /* Last available index we saw. */ u16 last_avail_idx; - /* The routine to call when the Guest pings us. */ - void (*handle_output)(int fd, struct virtqueue *me); + /* The routine to call when the Guest pings us, or timeout. */ + void (*handle_output)(int fd, struct virtqueue *me, bool timeout); /* Outstanding buffers */ unsigned int inflight; + + /* Is this blocked awaiting a timer? */ + bool blocked; }; /* Remember the arguments to the program so we can "reboot" */ @@ -190,6 +202,9 @@ static void *_convert(struct iovec *iov, size_t size, size_t align, return iov->iov_base; } +/* Wrapper for the last available index. Makes it easier to change. */ +#define lg_last_avail(vq) ((vq)->last_avail_idx) + /* The virtio configuration space is defined to be little-endian. x86 is * little-endian too, but it's nice to be explicit so we have these helpers. */ #define cpu_to_le16(v16) (v16) @@ -199,6 +214,33 @@ static void *_convert(struct iovec *iov, size_t size, size_t align, #define le32_to_cpu(v32) (v32) #define le64_to_cpu(v64) (v64) +/* Is this iovec empty? */ +static bool iov_empty(const struct iovec iov[], unsigned int num_iov) +{ + unsigned int i; + + for (i = 0; i < num_iov; i++) + if (iov[i].iov_len) + return false; + return true; +} + +/* Take len bytes from the front of this iovec. */ +static void iov_consume(struct iovec iov[], unsigned num_iov, unsigned len) +{ + unsigned int i; + + for (i = 0; i < num_iov; i++) { + unsigned int used; + + used = iov[i].iov_len < len ? iov[i].iov_len : len; + iov[i].iov_base += used; + iov[i].iov_len -= used; + len -= used; + } + assert(len == 0); +} + /* The device virtqueue descriptors are followed by feature bitmasks. */ static u8 *get_feature_bits(struct device *dev) { @@ -254,6 +296,7 @@ static void *map_zeroed_pages(unsigned int num) PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE, fd, 0); if (addr == MAP_FAILED) err(1, "Mmaping %u pages of /dev/zero", num); + close(fd); return addr; } @@ -540,69 +583,64 @@ static void add_device_fd(int fd) * watch, but handing a file descriptor mask through to the kernel is fairly * icky. * - * Instead, we fork off a process which watches the file descriptors and writes + * Instead, we clone off a thread which watches the file descriptors and writes * the LHREQ_BREAK command to the /dev/lguest file descriptor to tell the Host * stop running the Guest. This causes the Launcher to return from the * /dev/lguest read with -EAGAIN, where it will write to /dev/lguest to reset * the LHREQ_BREAK and wake us up again. * * This, of course, is merely a different *kind* of icky. + * + * Given my well-known antipathy to threads, I'd prefer to use processes. But + * it's easier to share Guest memory with threads, and trivial to share the + * devices.infds as the Launcher changes it. */ -static void wake_parent(int pipefd, int lguest_fd) +static int waker(void *unused) { - /* Add the pipe from the Launcher to the fdset in the device_list, so - * we watch it, too. */ - add_device_fd(pipefd); + /* Close the write end of the pipe: only the Launcher has it open. */ + close(waker_fds.pipe[1]); for (;;) { fd_set rfds = devices.infds; unsigned long args[] = { LHREQ_BREAK, 1 }; + unsigned int maxfd = devices.max_infd; + + /* We also listen to the pipe from the Launcher. */ + FD_SET(waker_fds.pipe[0], &rfds); + if (waker_fds.pipe[0] > maxfd) + maxfd = waker_fds.pipe[0]; /* Wait until input is ready from one of the devices. */ - select(devices.max_infd+1, &rfds, NULL, NULL, NULL); - /* Is it a message from the Launcher? */ - if (FD_ISSET(pipefd, &rfds)) { - int fd; - /* If read() returns 0, it means the Launcher has - * exited. We silently follow. */ - if (read(pipefd, &fd, sizeof(fd)) == 0) - exit(0); - /* Otherwise it's telling us to change what file - * descriptors we're to listen to. Positive means - * listen to a new one, negative means stop - * listening. */ - if (fd >= 0) - FD_SET(fd, &devices.infds); - else - FD_CLR(-fd - 1, &devices.infds); - } else /* Send LHREQ_BREAK command. */ - pwrite(lguest_fd, args, sizeof(args), cpu_id); + select(maxfd+1, &rfds, NULL, NULL, NULL); + + /* Message from Launcher? */ + if (FD_ISSET(waker_fds.pipe[0], &rfds)) { + char c; + /* If this fails, then assume Launcher has exited. + * Don't do anything on exit: we're just a thread! */ + if (read(waker_fds.pipe[0], &c, 1) != 1) + _exit(0); + continue; + } + + /* Send LHREQ_BREAK command to snap the Launcher out of it. */ + pwrite(waker_fds.lguest_fd, args, sizeof(args), cpu_id); } + return 0; } /* This routine just sets up a pipe to the Waker process. */ -static int setup_waker(int lguest_fd) -{ - int pipefd[2], child; - - /* We create a pipe to talk to the Waker, and also so it knows when the - * Launcher dies (and closes pipe). */ - pipe(pipefd); - child = fork(); - if (child == -1) - err(1, "forking"); - - if (child == 0) { - /* We are the Waker: close the "writing" end of our copy of the - * pipe and start waiting for input. */ - close(pipefd[1]); - wake_parent(pipefd[0], lguest_fd); - } - /* Close the reading end of our copy of the pipe. */ - close(pipefd[0]); +static void setup_waker(int lguest_fd) +{ + /* This pipe is closed when Launcher dies, telling Waker. */ + if (pipe(waker_fds.pipe) != 0) + err(1, "Creating pipe for Waker"); - /* Here is the fd used to talk to the waker. */ - return pipefd[1]; + /* Waker also needs to know the lguest fd */ + waker_fds.lguest_fd = lguest_fd; + + if (clone(waker, malloc(4096) + 4096, CLONE_VM | SIGCHLD, NULL) == -1) + err(1, "Creating Waker"); } /* @@ -661,19 +699,22 @@ static unsigned get_vq_desc(struct virtqueue *vq, unsigned int *out_num, unsigned int *in_num) { unsigned int i, head; + u16 last_avail; /* Check it isn't doing very strange things with descriptor numbers. */ - if ((u16)(vq->vring.avail->idx - vq->last_avail_idx) > vq->vring.num) + last_avail = lg_last_avail(vq); + if ((u16)(vq->vring.avail->idx - last_avail) > vq->vring.num) errx(1, "Guest moved used index from %u to %u", - vq->last_avail_idx, vq->vring.avail->idx); + last_avail, vq->vring.avail->idx); /* If there's nothing new since last we looked, return invalid. */ - if (vq->vring.avail->idx == vq->last_avail_idx) + if (vq->vring.avail->idx == last_avail) return vq->vring.num; /* Grab the next descriptor number they're advertising, and increment * the index we've seen. */ - head = vq->vring.avail->ring[vq->last_avail_idx++ % vq->vring.num]; + head = vq->vring.avail->ring[last_avail % vq->vring.num]; + lg_last_avail(vq)++; /* If their number is silly, that's a fatal mistake. */ if (head >= vq->vring.num) @@ -821,8 +862,8 @@ static bool handle_console_input(int fd, struct device *dev) unsigned long args[] = { LHREQ_BREAK, 0 }; /* Close the fd so Waker will know it has to * exit. */ - close(waker_fd); - /* Just in case waker is blocked in BREAK, send + close(waker_fds.pipe[1]); + /* Just in case Waker is blocked in BREAK, send * unbreak now. */ write(fd, args, sizeof(args)); exit(2); @@ -839,7 +880,7 @@ static bool handle_console_input(int fd, struct device *dev) /* Handling output for console is simple: we just get all the output buffers * and write them to stdout. */ -static void handle_console_output(int fd, struct virtqueue *vq) +static void handle_console_output(int fd, struct virtqueue *vq, bool timeout) { unsigned int head, out, in; int len; @@ -854,6 +895,21 @@ static void handle_console_output(int fd, struct virtqueue *vq) } } +static void block_vq(struct virtqueue *vq) +{ + struct itimerval itm; + + vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY; + vq->blocked = true; + + itm.it_interval.tv_sec = 0; + itm.it_interval.tv_usec = 0; + itm.it_value.tv_sec = 0; + itm.it_value.tv_usec = timeout_usec; + + setitimer(ITIMER_REAL, &itm, NULL); +} + /* * The Network * @@ -861,22 +917,34 @@ static void handle_console_output(int fd, struct virtqueue *vq) * and write them (ignoring the first element) to this device's file descriptor * (/dev/net/tun). */ -static void handle_net_output(int fd, struct virtqueue *vq) +static void handle_net_output(int fd, struct virtqueue *vq, bool timeout) { - unsigned int head, out, in; + unsigned int head, out, in, num = 0; int len; struct iovec iov[vq->vring.num]; + static int last_timeout_num; /* Keep getting output buffers from the Guest until we run out. */ while ((head = get_vq_desc(vq, iov, &out, &in)) != vq->vring.num) { if (in) errx(1, "Input buffers in output queue?"); - /* Check header, but otherwise ignore it (we told the Guest we - * supported no features, so it shouldn't have anything - * interesting). */ - (void)convert(&iov[0], struct virtio_net_hdr); - len = writev(vq->dev->fd, iov+1, out-1); + len = writev(vq->dev->fd, iov, out); + if (len < 0) + err(1, "Writing network packet to tun"); add_used_and_trigger(fd, vq, head, len); + num++; + } + + /* Block further kicks and set up a timer if we saw anything. */ + if (!timeout && num) + block_vq(vq); + + if (timeout) { + if (num < last_timeout_num) + timeout_usec += 10; + else if (timeout_usec > 1) + timeout_usec--; + last_timeout_num = num; } } @@ -887,7 +955,6 @@ static bool handle_tun_input(int fd, struct device *dev) unsigned int head, in_num, out_num; int len; struct iovec iov[dev->vq->vring.num]; - struct virtio_net_hdr *hdr; /* First we need a network buffer from the Guests's recv virtqueue. */ head = get_vq_desc(dev->vq, iov, &out_num, &in_num); @@ -896,25 +963,23 @@ static bool handle_tun_input(int fd, struct device *dev) * early, the Guest won't be ready yet. Wait until the device * status says it's ready. */ /* FIXME: Actually want DRIVER_ACTIVE here. */ - if (dev->desc->status & VIRTIO_CONFIG_S_DRIVER_OK) - warn("network: no dma buffer!"); + + /* Now tell it we want to know if new things appear. */ + dev->vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY; + wmb(); + /* We'll turn this back on if input buffers are registered. */ return false; } else if (out_num) errx(1, "Output buffers in network recv queue?"); - /* First element is the header: we set it to 0 (no features). */ - hdr = convert(&iov[0], struct virtio_net_hdr); - hdr->flags = 0; - hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE; - /* Read the packet from the device directly into the Guest's buffer. */ - len = readv(dev->fd, iov+1, in_num-1); + len = readv(dev->fd, iov, in_num); if (len <= 0) err(1, "reading network"); /* Tell the Guest about the new packet. */ - add_used_and_trigger(fd, dev->vq, head, sizeof(*hdr) + len); + add_used_and_trigger(fd, dev->vq, head, len); verbose("tun input packet len %i [%02x %02x] (%s)\n", len, ((u8 *)iov[1].iov_base)[0], ((u8 *)iov[1].iov_base)[1], @@ -927,11 +992,18 @@ static bool handle_tun_input(int fd, struct device *dev) /*L:215 This is the callback attached to the network and console input * virtqueues: it ensures we try again, in case we stopped console or net * delivery because Guest didn't have any buffers. */ -static void enable_fd(int fd, struct virtqueue *vq) +static void enable_fd(int fd, struct virtqueue *vq, bool timeout) { add_device_fd(vq->dev->fd); - /* Tell waker to listen to it again */ - write(waker_fd, &vq->dev->fd, sizeof(vq->dev->fd)); + /* Snap the Waker out of its select loop. */ + write(waker_fds.pipe[1], "", 1); +} + +static void net_enable_fd(int fd, struct virtqueue *vq, bool timeout) +{ + /* We don't need to know again when Guest refills receive buffer. */ + vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY; + enable_fd(fd, vq, timeout); } /* When the Guest tells us they updated the status field, we handle it. */ @@ -951,7 +1023,7 @@ static void update_device_status(struct device *dev) for (vq = dev->vq; vq; vq = vq->next) { memset(vq->vring.desc, 0, vring_size(vq->config.num, getpagesize())); - vq->last_avail_idx = 0; + lg_last_avail(vq) = 0; } } else if (dev->desc->status & VIRTIO_CONFIG_S_FAILED) { warnx("Device %s configuration FAILED", dev->name); @@ -960,10 +1032,10 @@ static void update_device_status(struct device *dev) verbose("Device %s OK: offered", dev->name); for (i = 0; i < dev->desc->feature_len; i++) - verbose(" %08x", get_feature_bits(dev)[i]); + verbose(" %02x", get_feature_bits(dev)[i]); verbose(", accepted"); for (i = 0; i < dev->desc->feature_len; i++) - verbose(" %08x", get_feature_bits(dev) + verbose(" %02x", get_feature_bits(dev) [dev->desc->feature_len+i]); if (dev->ready) @@ -1000,7 +1072,7 @@ static void handle_output(int fd, unsigned long addr) if (strcmp(vq->dev->name, "console") != 0) verbose("Output to %s\n", vq->dev->name); if (vq->handle_output) - vq->handle_output(fd, vq); + vq->handle_output(fd, vq, false); return; } } @@ -1014,6 +1086,29 @@ static void handle_output(int fd, unsigned long addr) strnlen(from_guest_phys(addr), guest_limit - addr)); } +static void handle_timeout(int fd) +{ + char buf[32]; + struct device *i; + struct virtqueue *vq; + + /* Clear the pipe */ + read(timeoutpipe[0], buf, sizeof(buf)); + + /* Check each device and virtqueue: flush blocked ones. */ + for (i = devices.dev; i; i = i->next) { + for (vq = i->vq; vq; vq = vq->next) { + if (!vq->blocked) + continue; + + vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY; + vq->blocked = false; + if (vq->handle_output) + vq->handle_output(fd, vq, true); + } + } +} + /* This is called when the Waker wakes us up: check for incoming file * descriptors. */ static void handle_input(int fd) @@ -1024,16 +1119,20 @@ static void handle_input(int fd) for (;;) { struct device *i; fd_set fds = devices.infds; + int num; + num = select(devices.max_infd+1, &fds, NULL, NULL, &poll); + /* Could get interrupted */ + if (num < 0) + continue; /* If nothing is ready, we're done. */ - if (select(devices.max_infd+1, &fds, NULL, NULL, &poll) == 0) + if (num == 0) break; /* Otherwise, call the device(s) which have readable file * descriptors and a method of handling them. */ for (i = devices.dev; i; i = i->next) { if (i->handle_input && FD_ISSET(i->fd, &fds)) { - int dev_fd; if (i->handle_input(fd, i)) continue; @@ -1043,13 +1142,12 @@ static void handle_input(int fd) * buffers to deliver into. Console also uses * it when it discovers that stdin is closed. */ FD_CLR(i->fd, &devices.infds); - /* Tell waker to ignore it too, by sending a - * negative fd number (-1, since 0 is a valid - * FD number). */ - dev_fd = -i->fd - 1; - write(waker_fd, &dev_fd, sizeof(dev_fd)); } } + + /* Is this the timeout fd? */ + if (FD_ISSET(timeoutpipe[0], &fds)) + handle_timeout(fd); } } @@ -1098,7 +1196,7 @@ static struct lguest_device_desc *new_dev_desc(u16 type) /* Each device descriptor is followed by the description of its virtqueues. We * specify how many descriptors the virtqueue is to have. */ static void add_virtqueue(struct device *dev, unsigned int num_descs, - void (*handle_output)(int fd, struct virtqueue *me)) + void (*handle_output)(int, struct virtqueue *, bool)) { unsigned int pages; struct virtqueue **i, *vq = malloc(sizeof(*vq)); @@ -1114,6 +1212,7 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs, vq->last_avail_idx = 0; vq->dev = dev; vq->inflight = 0; + vq->blocked = false; /* Initialize the configuration. */ vq->config.num = num_descs; @@ -1246,6 +1345,24 @@ static void setup_console(void) } /*:*/ +static void timeout_alarm(int sig) +{ + write(timeoutpipe[1], "", 1); +} + +static void setup_timeout(void) +{ + if (pipe(timeoutpipe) != 0) + err(1, "Creating timeout pipe"); + + if (fcntl(timeoutpipe[1], F_SETFL, + fcntl(timeoutpipe[1], F_GETFL) | O_NONBLOCK) != 0) + err(1, "Making timeout pipe nonblocking"); + + add_device_fd(timeoutpipe[0]); + signal(SIGALRM, timeout_alarm); +} + /*M:010 Inter-guest networking is an interesting area. Simplest is to have a * --sharenet=<name> option which opens or creates a named pipe. This can be * used to send packets to another guest in a 1:1 manner. @@ -1264,10 +1381,25 @@ static void setup_console(void) static u32 str2ip(const char *ipaddr) { - unsigned int byte[4]; + unsigned int b[4]; - sscanf(ipaddr, "%u.%u.%u.%u", &byte[0], &byte[1], &byte[2], &byte[3]); - return (byte[0] << 24) | (byte[1] << 16) | (byte[2] << 8) | byte[3]; + if (sscanf(ipaddr, "%u.%u.%u.%u", &b[0], &b[1], &b[2], &b[3]) != 4) + errx(1, "Failed to parse IP address '%s'", ipaddr); + return (b[0] << 24) | (b[1] << 16) | (b[2] << 8) | b[3]; +} + +static void str2mac(const char *macaddr, unsigned char mac[6]) +{ + unsigned int m[6]; + if (sscanf(macaddr, "%02x:%02x:%02x:%02x:%02x:%02x", + &m[0], &m[1], &m[2], &m[3], &m[4], &m[5]) != 6) + errx(1, "Failed to parse mac address '%s'", macaddr); + mac[0] = m[0]; + mac[1] = m[1]; + mac[2] = m[2]; + mac[3] = m[3]; + mac[4] = m[4]; + mac[5] = m[5]; } /* This code is "adapted" from libbridge: it attaches the Host end of the @@ -1288,6 +1420,7 @@ static void add_to_bridge(int fd, const char *if_name, const char *br_name) errx(1, "interface %s does not exist!", if_name); strncpy(ifr.ifr_name, br_name, IFNAMSIZ); + ifr.ifr_name[IFNAMSIZ-1] = '\0'; ifr.ifr_ifindex = ifidx; if (ioctl(fd, SIOCBRADDIF, &ifr) < 0) err(1, "can't add %s to bridge %s", if_name, br_name); @@ -1296,64 +1429,90 @@ static void add_to_bridge(int fd, const char *if_name, const char *br_name) /* This sets up the Host end of the network device with an IP address, brings * it up so packets will flow, the copies the MAC address into the hwaddr * pointer. */ -static void configure_device(int fd, const char *devname, u32 ipaddr, - unsigned char hwaddr[6]) +static void configure_device(int fd, const char *tapif, u32 ipaddr) { struct ifreq ifr; struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr; - /* Don't read these incantations. Just cut & paste them like I did! */ memset(&ifr, 0, sizeof(ifr)); - strcpy(ifr.ifr_name, devname); + strcpy(ifr.ifr_name, tapif); + + /* Don't read these incantations. Just cut & paste them like I did! */ sin->sin_family = AF_INET; sin->sin_addr.s_addr = htonl(ipaddr); if (ioctl(fd, SIOCSIFADDR, &ifr) != 0) - err(1, "Setting %s interface address", devname); + err(1, "Setting %s interface address", tapif); ifr.ifr_flags = IFF_UP; if (ioctl(fd, SIOCSIFFLAGS, &ifr) != 0) - err(1, "Bringing interface %s up", devname); + err(1, "Bringing interface %s up", tapif); +} + +static void get_mac(int fd, const char *tapif, unsigned char hwaddr[6]) +{ + struct ifreq ifr; + + memset(&ifr, 0, sizeof(ifr)); + strcpy(ifr.ifr_name, tapif); /* SIOC stands for Socket I/O Control. G means Get (vs S for Set * above). IF means Interface, and HWADDR is hardware address. * Simple! */ if (ioctl(fd, SIOCGIFHWADDR, &ifr) != 0) - err(1, "getting hw address for %s", devname); + err(1, "getting hw address for %s", tapif); memcpy(hwaddr, ifr.ifr_hwaddr.sa_data, 6); } -/*L:195 Our network is a Host<->Guest network. This can either use bridging or - * routing, but the principle is the same: it uses the "tun" device to inject - * packets into the Host as if they came in from a normal network card. We - * just shunt packets between the Guest and the tun device. */ -static void setup_tun_net(const char *arg) +static int get_tun_device(char tapif[IFNAMSIZ]) { - struct device *dev; struct ifreq ifr; - int netfd, ipfd; - u32 ip; - const char *br_name = NULL; - struct virtio_net_config conf; + int netfd; + + /* Start with this zeroed. Messy but sure. */ + memset(&ifr, 0, sizeof(ifr)); /* We open the /dev/net/tun device and tell it we want a tap device. A * tap device is like a tun device, only somehow different. To tell * the truth, I completely blundered my way through this code, but it * works now! */ netfd = open_or_die("/dev/net/tun", O_RDWR); - memset(&ifr, 0, sizeof(ifr)); - ifr.ifr_flags = IFF_TAP | IFF_NO_PI; + ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR; strcpy(ifr.ifr_name, "tap%d"); if (ioctl(netfd, TUNSETIFF, &ifr) != 0) err(1, "configuring /dev/net/tun"); + + if (ioctl(netfd, TUNSETOFFLOAD, + TUN_F_CSUM|TUN_F_TSO4|TUN_F_TSO6|TUN_F_TSO_ECN) != 0) + err(1, "Could not set features for tun device"); + /* We don't need checksums calculated for packets coming in this * device: trust us! */ ioctl(netfd, TUNSETNOCSUM, 1); + memcpy(tapif, ifr.ifr_name, IFNAMSIZ); + return netfd; +} + +/*L:195 Our network is a Host<->Guest network. This can either use bridging or + * routing, but the principle is the same: it uses the "tun" device to inject + * packets into the Host as if they came in from a normal network card. We + * just shunt packets between the Guest and the tun device. */ +static void setup_tun_net(char *arg) +{ + struct device *dev; + int netfd, ipfd; + u32 ip = INADDR_ANY; + bool bridging = false; + char tapif[IFNAMSIZ], *p; + struct virtio_net_config conf; + + netfd = get_tun_device(tapif); + /* First we create a new network device. */ dev = new_device("net", VIRTIO_ID_NET, netfd, handle_tun_input); /* Network devices need a receive and a send queue, just like * console. */ - add_virtqueue(dev, VIRTQUEUE_NUM, enable_fd); + add_virtqueue(dev, VIRTQUEUE_NUM, net_enable_fd); add_virtqueue(dev, VIRTQUEUE_NUM, handle_net_output); /* We need a socket to perform the magic network ioctls to bring up the @@ -1364,28 +1523,56 @@ static void setup_tun_net(const char *arg) /* If the command line was --tunnet=bridge:<name> do bridging. */ if (!strncmp(BRIDGE_PFX, arg, strlen(BRIDGE_PFX))) { - ip = INADDR_ANY; - br_name = arg + strlen(BRIDGE_PFX); - add_to_bridge(ipfd, ifr.ifr_name, br_name); - } else /* It is an IP address to set up the device with */ + arg += strlen(BRIDGE_PFX); + bridging = true; + } + + /* A mac address may follow the bridge name or IP address */ + p = strchr(arg, ':'); + if (p) { + str2mac(p+1, conf.mac); + *p = '\0'; + } else { + p = arg + strlen(arg); + /* None supplied; query the randomly assigned mac. */ + get_mac(ipfd, tapif, conf.mac); + } + + /* arg is now either an IP address or a bridge name */ + if (bridging) + add_to_bridge(ipfd, tapif, arg); + else ip = str2ip(arg); - /* Set up the tun device, and get the mac address for the interface. */ - configure_device(ipfd, ifr.ifr_name, ip, conf.mac); + /* Set up the tun device. */ + configure_device(ipfd, tapif, ip); /* Tell Guest what MAC address to use. */ add_feature(dev, VIRTIO_NET_F_MAC); add_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY); + /* Expect Guest to handle everything except UFO */ + add_feature(dev, VIRTIO_NET_F_CSUM); + add_feature(dev, VIRTIO_NET_F_GUEST_CSUM); + add_feature(dev, VIRTIO_NET_F_MAC); + add_feature(dev, VIRTIO_NET_F_GUEST_TSO4); + add_feature(dev, VIRTIO_NET_F_GUEST_TSO6); + add_feature(dev, VIRTIO_NET_F_GUEST_ECN); + add_feature(dev, VIRTIO_NET_F_HOST_TSO4); + add_feature(dev, VIRTIO_NET_F_HOST_TSO6); + add_feature(dev, VIRTIO_NET_F_HOST_ECN); set_config(dev, sizeof(conf), &conf); /* We don't need the socket any more; setup is done. */ close(ipfd); - verbose("device %u: tun net %u.%u.%u.%u\n", - devices.device_num++, - (u8)(ip>>24),(u8)(ip>>16),(u8)(ip>>8),(u8)ip); - if (br_name) - verbose("attached to bridge: %s\n", br_name); + devices.device_num++; + + if (bridging) + verbose("device %u: tun %s attached to bridge: %s\n", + devices.device_num, tapif, arg); + else + verbose("device %u: tun %s: %s\n", + devices.device_num, tapif, arg); } /* Our block (disk) device should be really simple: the Guest asks for a block @@ -1550,7 +1737,7 @@ static bool handle_io_finish(int fd, struct device *dev) } /* When the Guest submits some I/O, we just need to wake the I/O thread. */ -static void handle_virtblk_output(int fd, struct virtqueue *vq) +static void handle_virtblk_output(int fd, struct virtqueue *vq, bool timeout) { struct vblk_info *vblk = vq->dev->priv; char c = 0; @@ -1621,6 +1808,64 @@ static void setup_block_file(const char *filename) verbose("device %u: virtblock %llu sectors\n", devices.device_num, le64_to_cpu(conf.capacity)); } + +/* Our random number generator device reads from /dev/random into the Guest's + * input buffers. The usual case is that the Guest doesn't want random numbers + * and so has no buffers although /dev/random is still readable, whereas + * console is the reverse. + * + * The same logic applies, however. */ +static bool handle_rng_input(int fd, struct device *dev) +{ + int len; + unsigned int head, in_num, out_num, totlen = 0; + struct iovec iov[dev->vq->vring.num]; + + /* First we need a buffer from the Guests's virtqueue. */ + head = get_vq_desc(dev->vq, iov, &out_num, &in_num); + + /* If they're not ready for input, stop listening to this file + * descriptor. We'll start again once they add an input buffer. */ + if (head == dev->vq->vring.num) + return false; + + if (out_num) + errx(1, "Output buffers in rng?"); + + /* This is why we convert to iovecs: the readv() call uses them, and so + * it reads straight into the Guest's buffer. We loop to make sure we + * fill it. */ + while (!iov_empty(iov, in_num)) { + len = readv(dev->fd, iov, in_num); + if (len <= 0) + err(1, "Read from /dev/random gave %i", len); + iov_consume(iov, in_num, len); + totlen += len; + } + + /* Tell the Guest about the new input. */ + add_used_and_trigger(fd, dev->vq, head, totlen); + + /* Everything went OK! */ + return true; +} + +/* And this creates a "hardware" random number device for the Guest. */ +static void setup_rng(void) +{ + struct device *dev; + int fd; + + fd = open_or_die("/dev/random", O_RDONLY); + + /* The device responds to return from I/O thread. */ + dev = new_device("rng", VIRTIO_ID_RNG, fd, handle_rng_input); + + /* The device has one virtqueue, where the Guest places inbufs. */ + add_virtqueue(dev, VIRTQUEUE_NUM, enable_fd); + + verbose("device %u: rng\n", devices.device_num++); +} /* That's the end of device setup. */ /*L:230 Reboot is pretty easy: clean up and exec() the Launcher afresh. */ @@ -1628,11 +1873,12 @@ static void __attribute__((noreturn)) restart_guest(void) { unsigned int i; - /* Closing pipes causes the Waker thread and io_threads to die, and - * closing /dev/lguest cleans up the Guest. Since we don't track all - * open fds, we simply close everything beyond stderr. */ + /* Since we don't track all open fds, we simply close everything beyond + * stderr. */ for (i = 3; i < FD_SETSIZE; i++) close(i); + + /* The exec automatically gets rid of the I/O and Waker threads. */ execv(main_args[0], main_args); err(1, "Could not exec %s", main_args[0]); } @@ -1663,7 +1909,7 @@ static void __attribute__((noreturn)) run_guest(int lguest_fd) /* ERESTART means that we need to reboot the guest */ } else if (errno == ERESTART) { restart_guest(); - /* EAGAIN means the Waker wanted us to look at some input. + /* EAGAIN means a signal (timeout). * Anything else means a bug or incompatible change. */ } else if (errno != EAGAIN) err(1, "Running guest failed"); @@ -1691,13 +1937,14 @@ static struct option opts[] = { { "verbose", 0, NULL, 'v' }, { "tunnet", 1, NULL, 't' }, { "block", 1, NULL, 'b' }, + { "rng", 0, NULL, 'r' }, { "initrd", 1, NULL, 'i' }, { NULL }, }; static void usage(void) { errx(1, "Usage: lguest [--verbose] " - "[--tunnet=(<ipaddr>|bridge:<bridgename>)\n" + "[--tunnet=(<ipaddr>:<macaddr>|bridge:<bridgename>:<macaddr>)\n" "|--block=<filename>|--initrd=<filename>]...\n" "<mem-in-mb> vmlinux [args...]"); } @@ -1765,6 +2012,9 @@ int main(int argc, char *argv[]) case 'b': setup_block_file(optarg); break; + case 'r': + setup_rng(); + break; case 'i': initrd_name = optarg; break; @@ -1783,6 +2033,9 @@ int main(int argc, char *argv[]) /* We always have a console device */ setup_console(); + /* We can timeout waiting for Guest network transmit. */ + setup_timeout(); + /* Now we load the kernel */ start = load_kernel(open_or_die(argv[optind+1], O_RDONLY)); @@ -1826,10 +2079,10 @@ int main(int argc, char *argv[]) * /dev/lguest file descriptor. */ lguest_fd = tell_kernel(pgdir, start); - /* We fork off a child process, which wakes the Launcher whenever one - * of the input file descriptors needs attention. We call this the - * Waker, and we'll cover it in a moment. */ - waker_fd = setup_waker(lguest_fd); + /* We clone off a thread, which wakes the Launcher whenever one of the + * input file descriptors needs attention. We call this the Waker, and + * we'll cover it in a moment. */ + setup_waker(lguest_fd); /* Finally, run the Guest. This doesn't return. */ run_guest(lguest_fd); diff --git a/Documentation/powerpc/00-INDEX b/Documentation/powerpc/00-INDEX index 3be84aa38df..29d839ce732 100644 --- a/Documentation/powerpc/00-INDEX +++ b/Documentation/powerpc/00-INDEX @@ -20,8 +20,6 @@ mpc52xx-device-tree-bindings.txt - MPC5200 Device Tree Bindings ppc_htab.txt - info about the Linux/PPC /proc/ppc_htab entry -SBC8260_memory_mapping.txt - - EST SBC8260 board info smp.txt - use and state info about Linux/PPC on MP machines sound.txt diff --git a/Documentation/powerpc/SBC8260_memory_mapping.txt b/Documentation/powerpc/SBC8260_memory_mapping.txt deleted file mode 100644 index e6e9ee0506c..00000000000 --- a/Documentation/powerpc/SBC8260_memory_mapping.txt +++ /dev/null @@ -1,197 +0,0 @@ -Please mail me (Jon Diekema, diekema_jon@si.com or diekema@cideas.com) -if you have questions, comments or corrections. - - * EST SBC8260 Linux memory mapping rules - - http://www.estc.com/ - http://www.estc.com/products/boards/SBC8260-8240_ds.html - - Initial conditions: - ------------------- - - Tasks that need to be perform by the boot ROM before control is - transferred to zImage (compressed Linux kernel): - - - Define the IMMR to 0xf0000000 - - - Initialize the memory controller so that RAM is available at - physical address 0x00000000. On the SBC8260 is this 16M (64M) - SDRAM. - - - The boot ROM should only clear the RAM that it is using. - - The reason for doing this is to enhances the chances of a - successful post mortem on a Linux panic. One of the first - items to examine is the 16k (LOG_BUF_LEN) circular console - buffer called log_buf which is defined in kernel/printk.c. - - - To enhance boot ROM performance, the I-cache can be enabled. - - Date: Mon, 22 May 2000 14:21:10 -0700 - From: Neil Russell <caret@c-side.com> - - LiMon (LInux MONitor) runs with and starts Linux with MMU - off, I-cache enabled, D-cache disabled. The I-cache doesn't - need hints from the MMU to work correctly as the D-cache - does. No D-cache means no special code to handle devices in - the presence of cache (no snooping, etc). The use of the - I-cache means that the monitor can run acceptably fast - directly from ROM, rather than having to copy it to RAM. - - - Build the board information structure (see - include/asm-ppc/est8260.h for its definition) - - - The compressed Linux kernel (zImage) contains a bootstrap loader - that is position independent; you can load it into any RAM, - ROM or FLASH memory address >= 0x00500000 (above 5 MB), or - at its link address of 0x00400000 (4 MB). - - Note: If zImage is loaded at its link address of 0x00400000 (4 MB), - then zImage will skip the step of moving itself to - its link address. - - - Load R3 with the address of the board information structure - - - Transfer control to zImage - - - The Linux console port is SMC1, and the baud rate is controlled - from the bi_baudrate field of the board information structure. - On thing to keep in mind when picking the baud rate, is that - there is no flow control on the SMC ports. I would stick - with something safe and standard like 19200. - - On the EST SBC8260, the SMC1 port is on the COM1 connector of - the board. - - - EST SBC8260 defaults: - --------------------- - - Chip - Memory Sel Bus Use - --------------------- --- --- ---------------------------------- - 0x00000000-0x03FFFFFF CS2 60x (16M or 64M)/64M SDRAM - 0x04000000-0x04FFFFFF CS4 local 4M/16M SDRAM (soldered to the board) - 0x21000000-0x21000000 CS7 60x 1B/64K Flash present detect (from the flash SIMM) - 0x21000001-0x21000001 CS7 60x 1B/64K Switches (read) and LEDs (write) - 0x22000000-0x2200FFFF CS5 60x 8K/64K EEPROM - 0xFC000000-0xFCFFFFFF CS6 60x 2M/16M flash (8 bits wide, soldered to the board) - 0xFE000000-0xFFFFFFFF CS0 60x 4M/16M flash (SIMM) - - Notes: - ------ - - - The chip selects can map 32K blocks and up (powers of 2) - - - The SDRAM machine can handled up to 128Mbytes per chip select - - - Linux uses the 60x bus memory (the SDRAM DIMM) for the - communications buffers. - - - BATs can map 128K-256Mbytes each. There are four data BATs and - four instruction BATs. Generally the data and instruction BATs - are mapped the same. - - - The IMMR must be set above the kernel virtual memory addresses, - which start at 0xC0000000. Otherwise, the kernel may crash as - soon as you start any threads or processes due to VM collisions - in the kernel or user process space. - - - Details from Dan Malek <dan_malek@mvista.com> on 10/29/1999: - - The user application virtual space consumes the first 2 Gbytes - (0x00000000 to 0x7FFFFFFF). The kernel virtual text starts at - 0xC0000000, with data following. There is a "protection hole" - between the end of kernel data and the start of the kernel - dynamically allocated space, but this space is still within - 0xCxxxxxxx. - - Obviously the kernel can't map any physical addresses 1:1 in - these ranges. - - - Details from Dan Malek <dan_malek@mvista.com> on 5/19/2000: - - During the early kernel initialization, the kernel virtual - memory allocator is not operational. Prior to this KVM - initialization, we choose to map virtual to physical addresses - 1:1. That is, the kernel virtual address exactly matches the - physical address on the bus. These mappings are typically done - in arch/ppc/kernel/head.S, or arch/ppc/mm/init.c. Only - absolutely necessary mappings should be done at this time, for - example board control registers or a serial uart. Normal device - driver initialization should map resources later when necessary. - - Although platform dependent, and certainly the case for embedded - 8xx, traditionally memory is mapped at physical address zero, - and I/O devices above physical address 0x80000000. The lowest - and highest (above 0xf0000000) I/O addresses are traditionally - used for devices or registers we need to map during kernel - initialization and prior to KVM operation. For this reason, - and since it followed prior PowerPC platform examples, I chose - to map the embedded 8xx kernel to the 0xc0000000 virtual address. - This way, we can enable the MMU to map the kernel for proper - operation, and still map a few windows before the KVM is operational. - - On some systems, you could possibly run the kernel at the - 0x80000000 or any other virtual address. It just depends upon - mapping that must be done prior to KVM operational. You can never - map devices or kernel spaces that overlap with the user virtual - space. This is why default IMMR mapping used by most BDM tools - won't work. They put the IMMR at something like 0x10000000 or - 0x02000000 for example. You simply can't map these addresses early - in the kernel, and continue proper system operation. - - The embedded 8xx/82xx kernel is mature enough that all you should - need to do is map the IMMR someplace at or above 0xf0000000 and it - should boot far enough to get serial console messages and KGDB - connected on any platform. There are lots of other subtle memory - management design features that you simply don't need to worry - about. If you are changing functions related to MMU initialization, - you are likely breaking things that are known to work and are - heading down a path of disaster and frustration. Your changes - should be to make the flexibility of the processor fit Linux, - not force arbitrary and non-workable memory mappings into Linux. - - - You don't want to change KERNELLOAD or KERNELBASE, otherwise the - virtual memory and MMU code will get confused. - - arch/ppc/Makefile:KERNELLOAD = 0xc0000000 - - include/asm-ppc/page.h:#define PAGE_OFFSET 0xc0000000 - include/asm-ppc/page.h:#define KERNELBASE PAGE_OFFSET - - - RAM is at physical address 0x00000000, and gets mapped to - virtual address 0xC0000000 for the kernel. - - - Physical addresses used by the Linux kernel: - -------------------------------------------- - - 0x00000000-0x3FFFFFFF 1GB reserved for RAM - 0xF0000000-0xF001FFFF 128K IMMR 64K used for dual port memory, - 64K for 8260 registers - - - Logical addresses used by the Linux kernel: - ------------------------------------------- - - 0xF0000000-0xFFFFFFFF 256M BAT0 (IMMR: dual port RAM, registers) - 0xE0000000-0xEFFFFFFF 256M BAT1 (I/O space for custom boards) - 0xC0000000-0xCFFFFFFF 256M BAT2 (RAM) - 0xD0000000-0xDFFFFFFF 256M BAT3 (if RAM > 256MByte) - - - EST SBC8260 Linux mapping: - -------------------------- - - DBAT0, IBAT0, cache inhibited: - - Chip - Memory Sel Use - --------------------- --- --------------------------------- - 0xF0000000-0xF001FFFF n/a IMMR: dual port RAM, registers - - DBAT1, IBAT1, cache inhibited: - diff --git a/Documentation/powerpc/dts-bindings/fsl/cpm_qe/serial.txt b/Documentation/powerpc/dts-bindings/fsl/cpm_qe/serial.txt index b35f3482e3e..2ea76d9d137 100644 --- a/Documentation/powerpc/dts-bindings/fsl/cpm_qe/serial.txt +++ b/Documentation/powerpc/dts-bindings/fsl/cpm_qe/serial.txt @@ -7,6 +7,15 @@ Currently defined compatibles: - fsl,cpm2-scc-uart - fsl,qe-uart +Modem control lines connected to GPIO controllers are listed in the gpios +property as described in booting-without-of.txt, section IX.1 in the following +order: + +CTS, RTS, DCD, DSR, DTR, and RI. + +The gpios property is optional and can be left out when control lines are +not used. + Example: serial@11a00 { @@ -18,4 +27,6 @@ Example: interrupt-parent = <&PIC>; fsl,cpm-brg = <1>; fsl,cpm-command = <00800000>; + gpios = <&gpio_c 15 0 + &gpio_d 29 0>; }; diff --git a/Documentation/rfkill.txt b/Documentation/rfkill.txt index 0843ed0163a..28b6ec87c64 100644 --- a/Documentation/rfkill.txt +++ b/Documentation/rfkill.txt @@ -390,9 +390,10 @@ rfkill lines are inactive, it must return RFKILL_STATE_SOFT_BLOCKED if its soft rfkill input line is active. Only if none of the rfkill input lines are active, will it return RFKILL_STATE_UNBLOCKED. -If it doesn't implement the get_state() hook, it must make sure that its calls -to rfkill_force_state() are enough to keep the status always up-to-date, and it -must do a rfkill_force_state() on resume from sleep. +Since the device has a hardware rfkill line, it IS subject to state changes +external to rfkill. Therefore, the driver must make sure that it calls +rfkill_force_state() to keep the status always up-to-date, and it must do a +rfkill_force_state() on resume from sleep. Every time the driver gets a notification from the card that one of its rfkill lines changed state (polling might be needed on badly designed cards that don't @@ -422,13 +423,24 @@ of the hardware is unknown), or read-write (where the hardware can be queried about its current state). The rfkill class will call the get_state hook of a device every time it needs -to know the *real* current state of the hardware. This can happen often. +to know the *real* current state of the hardware. This can happen often, but +it does not do any polling, so it is not enough on hardware that is subject +to state changes outside of the rfkill subsystem. + +Therefore, calling rfkill_force_state() when a state change happens is +mandatory when the device has a hardware rfkill line, or when something else +like the firmware could cause its state to be changed without going through the +rfkill class. Some hardware provides events when its status changes. In these cases, it is best for the driver to not provide a get_state hook, and instead register the rfkill class *already* with the correct status, and keep it updated using rfkill_force_state() when it gets an event from the hardware. +rfkill_force_state() must be used on the device resume handlers to update the +rfkill status, should there be any chance of the device status changing during +the sleep. + There is no provision for a statically-allocated rfkill struct. You must use rfkill_allocate() to allocate one. @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 -SUBLEVEL = 26 -EXTRAVERSION = +SUBLEVEL = 27 +EXTRAVERSION = -rc1 NAME = Rotary Wombat # *DOCUMENTATION* diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index c8f528284a9..257033c691f 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -17,6 +17,7 @@ config ARM select HAVE_KRETPROBES if (HAVE_KPROBES) select HAVE_FTRACE if (!XIP_KERNEL) select HAVE_DYNAMIC_FTRACE if (HAVE_FTRACE) + select HAVE_GENERIC_DMA_COHERENT help The ARM series is a line of low-power-consumption RISC chip designs licensed by ARM Ltd and targeted at embedded applications and @@ -234,6 +235,7 @@ config ARCH_VERSATILE config ARCH_AT91 bool "Atmel AT91" select GENERIC_GPIO + select HAVE_CLK help This enables support for systems based on the Atmel AT91RM9200, AT91SAM9 and AT91CAP9 processors. @@ -267,7 +269,6 @@ config ARCH_EP93XX select ARM_VIC select GENERIC_GPIO select HAVE_CLK - select HAVE_CLK select ARCH_REQUIRE_GPIOLIB help This enables support for the Cirrus EP93xx series of CPUs. diff --git a/arch/arm/mm/consistent.c b/arch/arm/mm/consistent.c index 333a82a3717..db7b3e38ef1 100644 --- a/arch/arm/mm/consistent.c +++ b/arch/arm/mm/consistent.c @@ -274,6 +274,11 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, void * dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp) { + void *memory; + + if (dma_alloc_from_coherent(dev, size, handle, &memory)) + return memory; + if (arch_is_coherent()) { void *virt; @@ -362,6 +367,9 @@ void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr WARN_ON(irqs_disabled()); + if (dma_release_from_coherent(dev, get_order(size), cpu_addr)) + return; + if (arch_is_coherent()) { kfree(cpu_addr); return; diff --git a/arch/cris/arch-v32/drivers/Kconfig b/arch/cris/arch-v32/drivers/Kconfig index 2a92cb1886c..7a64fcef9d0 100644 --- a/arch/cris/arch-v32/drivers/Kconfig +++ b/arch/cris/arch-v32/drivers/Kconfig @@ -641,6 +641,7 @@ config PCI bool depends on ETRAX_CARDBUS default y + select HAVE_GENERIC_DMA_COHERENT config ETRAX_IOP_FW_LOAD tristate "IO-processor hotplug firmware loading support" diff --git a/arch/cris/arch-v32/drivers/pci/dma.c b/arch/cris/arch-v32/drivers/pci/dma.c index e0364654fc4..fbe65954ee6 100644 --- a/arch/cris/arch-v32/drivers/pci/dma.c +++ b/arch/cris/arch-v32/drivers/pci/dma.c @@ -15,35 +15,16 @@ #include <linux/pci.h> #include <asm/io.h> -struct dma_coherent_mem { - void *virt_base; - u32 device_base; - int size; - int flags; - unsigned long *bitmap; -}; - void *dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp) { void *ret; - struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; int order = get_order(size); /* ignore region specifiers */ gfp &= ~(__GFP_DMA | __GFP_HIGHMEM); - if (mem) { - int page = bitmap_find_free_region(mem->bitmap, mem->size, - order); - if (page >= 0) { - *dma_handle = mem->device_base + (page << PAGE_SHIFT); - ret = mem->virt_base + (page << PAGE_SHIFT); - memset(ret, 0, size); - return ret; - } - if (mem->flags & DMA_MEMORY_EXCLUSIVE) - return NULL; - } + if (dma_alloc_from_coherent(dev, size, dma_handle, &ret)) + return ret; if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff)) gfp |= GFP_DMA; @@ -60,90 +41,9 @@ void *dma_alloc_coherent(struct device *dev, size_t size, void dma_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle) { - struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; int order = get_order(size); - if (mem && vaddr >= mem->virt_base && vaddr < (mem->virt_base + (mem->size << PAGE_SHIFT))) { - int page = (vaddr - mem->virt_base) >> PAGE_SHIFT; - - bitmap_release_region(mem->bitmap, page, order); - } else + if (!dma_release_from_coherent(dev, order, vaddr)) free_pages((unsigned long)vaddr, order); } -int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr, - dma_addr_t device_addr, size_t size, int flags) -{ - void __iomem *mem_base; - int pages = size >> PAGE_SHIFT; - int bitmap_size = BITS_TO_LONGS(pages) * sizeof(long); - - if ((flags & (DMA_MEMORY_MAP | DMA_MEMORY_IO)) == 0) - goto out; - if (!size) - goto out; - if (dev->dma_mem) - goto out; - - /* FIXME: this routine just ignores DMA_MEMORY_INCLUDES_CHILDREN */ - - mem_base = ioremap(bus_addr, size); - if (!mem_base) - goto out; - - dev->dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL); - if (!dev->dma_mem) - goto iounmap_out; - dev->dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL); - if (!dev->dma_mem->bitmap) - goto free1_out; - - dev->dma_mem->virt_base = mem_base; - dev->dma_mem->device_base = device_addr; - dev->dma_mem->size = pages; - dev->dma_mem->flags = flags; - - if (flags & DMA_MEMORY_MAP) - return DMA_MEMORY_MAP; - - return DMA_MEMORY_IO; - - free1_out: - kfree(dev->dma_mem); - iounmap_out: - iounmap(mem_base); - out: - return 0; -} -EXPORT_SYMBOL(dma_declare_coherent_memory); - -void dma_release_declared_memory(struct device *dev) -{ - struct dma_coherent_mem *mem = dev->dma_mem; - - if(!mem) - return; - dev->dma_mem = NULL; - iounmap(mem->virt_base); - kfree(mem->bitmap); - kfree(mem); -} -EXPORT_SYMBOL(dma_release_declared_memory); - -void *dma_mark_declared_memory_occupied(struct device *dev, - dma_addr_t device_addr, size_t size) -{ - struct dma_coherent_mem *mem = dev->dma_mem; - int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1) >> PAGE_SHIFT; - int pos, err; - - if (!mem) - return ERR_PTR(-EINVAL); - - pos = (device_addr - mem->device_base) >> PAGE_SHIFT; - err = bitmap_allocate_region(mem->bitmap, pos, get_order(pages)); - if (err != 0) - return ERR_PTR(err); - return mem->virt_base + (pos << PAGE_SHIFT); -} -EXPORT_SYMBOL(dma_mark_declared_memory_occupied); diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c index c45fc7f5a97..b0f615759e9 100644 --- a/arch/ia64/mm/hugetlbpage.c +++ b/arch/ia64/mm/hugetlbpage.c @@ -13,6 +13,7 @@ #include <linux/mm.h> #include <linux/hugetlb.h> #include <linux/pagemap.h> +#include <linux/module.h> #include <linux/slab.h> #include <linux/sysctl.h> #include <linux/log2.h> @@ -21,7 +22,8 @@ #include <asm/tlb.h> #include <asm/tlbflush.h> -unsigned int hpage_shift=HPAGE_SHIFT_DEFAULT; +unsigned int hpage_shift = HPAGE_SHIFT_DEFAULT; +EXPORT_SYMBOL(hpage_shift); pte_t * huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz) diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index b4c4eaa5dd2..4da736e2533 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -3,6 +3,7 @@ config MIPS default y select HAVE_IDE select HAVE_OPROFILE + select HAVE_ARCH_KGDB # Horrible source of confusion. Die, die, die ... select EMBEDDED select RTC_LIB @@ -34,7 +35,6 @@ config BASLER_EXCITE select SYS_HAS_CPU_RM9000 select SYS_SUPPORTS_32BIT_KERNEL select SYS_SUPPORTS_BIG_ENDIAN - select SYS_SUPPORTS_KGDB help The eXcite is a smart camera platform manufactured by Basler Vision Technologies AG. @@ -280,7 +280,6 @@ config PMC_MSP select SYS_HAS_CPU_MIPS32_R2 select SYS_SUPPORTS_32BIT_KERNEL select SYS_SUPPORTS_BIG_ENDIAN - select SYS_SUPPORTS_KGDB select IRQ_CPU select SERIAL_8250 select SERIAL_8250_CONSOLE @@ -306,7 +305,6 @@ config PMC_YOSEMITE select SYS_SUPPORTS_64BIT_KERNEL select SYS_SUPPORTS_BIG_ENDIAN select SYS_SUPPORTS_HIGHMEM - select SYS_SUPPORTS_KGDB select SYS_SUPPORTS_SMP help Yosemite is an evaluation board for the RM9000x2 processor @@ -359,7 +357,6 @@ config SGI_IP27 select SYS_HAS_CPU_R10000 select SYS_SUPPORTS_64BIT_KERNEL select SYS_SUPPORTS_BIG_ENDIAN - select SYS_SUPPORTS_KGDB select SYS_SUPPORTS_NUMA select SYS_SUPPORTS_SMP select GENERIC_HARDIRQS_NO__DO_IRQ @@ -475,7 +472,6 @@ config SIBYTE_SWARM select SYS_HAS_CPU_SB1 select SYS_SUPPORTS_BIG_ENDIAN select SYS_SUPPORTS_HIGHMEM - select SYS_SUPPORTS_KGDB select SYS_SUPPORTS_LITTLE_ENDIAN select ZONE_DMA32 if 64BIT @@ -868,7 +864,6 @@ config SOC_PNX8550 select SYS_HAS_EARLY_PRINTK select SYS_SUPPORTS_32BIT_KERNEL select GENERIC_HARDIRQS_NO__DO_IRQ - select SYS_SUPPORTS_KGDB select GENERIC_GPIO config SWAP_IO_SPACE diff --git a/arch/mips/Kconfig.debug b/arch/mips/Kconfig.debug index f18cf92650e..765c8e287d2 100644 --- a/arch/mips/Kconfig.debug +++ b/arch/mips/Kconfig.debug @@ -34,28 +34,6 @@ config SMTC_IDLE_HOOK_DEBUG arch/mips/kernel/smtc.c. This debugging option result in significant overhead so should be disabled in production kernels. -config KGDB - bool "Remote GDB kernel debugging" - depends on DEBUG_KERNEL && SYS_SUPPORTS_KGDB - select DEBUG_INFO - help - If you say Y here, it will be possible to remotely debug the MIPS - kernel using gdb. This enlarges your kernel image disk size by - several megabytes and requires a machine with more than 16 MB, - better 32 MB RAM to avoid excessive linking time. This is only - useful for kernel hackers. If unsure, say N. - -config SYS_SUPPORTS_KGDB - bool - -config GDB_CONSOLE - bool "Console output to GDB" - depends on KGDB - help - If you are using GDB for remote debugging over a serial port and - would like kernel messages to be formatted into GDB $O packets so - that GDB prints them as program output, say 'Y'. - config SB1XXX_CORELIS bool "Corelis Debugger" depends on SIBYTE_SB1xxx_SOC diff --git a/arch/mips/au1000/Kconfig b/arch/mips/au1000/Kconfig index 1fe97cccead..e4a057d80ab 100644 --- a/arch/mips/au1000/Kconfig +++ b/arch/mips/au1000/Kconfig @@ -134,4 +134,3 @@ config SOC_AU1X00 select SYS_HAS_CPU_MIPS32_R1 select SYS_SUPPORTS_32BIT_KERNEL select SYS_SUPPORTS_APM_EMULATION - select SYS_SUPPORTS_KGDB diff --git a/arch/mips/au1000/common/Makefile b/arch/mips/au1000/common/Makefile index dd0e19dacfc..df48fd65bbf 100644 --- a/arch/mips/au1000/common/Makefile +++ b/arch/mips/au1000/common/Makefile @@ -9,7 +9,6 @@ obj-y += prom.o irq.o puts.o time.o reset.o \ au1xxx_irqmap.o clocks.o platform.o power.o setup.o \ sleeper.o cputable.o dma.o dbdma.o gpio.o -obj-$(CONFIG_KGDB) += dbg_io.o obj-$(CONFIG_PCI) += pci.o EXTRA_CFLAGS += -Werror diff --git a/arch/mips/au1000/common/dbg_io.c b/arch/mips/au1000/common/dbg_io.c deleted file mode 100644 index af5be7df2f2..00000000000 --- a/arch/mips/au1000/common/dbg_io.c +++ /dev/null @@ -1,109 +0,0 @@ -#include <linux/types.h> - -#include <asm/mach-au1x00/au1000.h> - -#ifdef CONFIG_KGDB - -/* - * FIXME the user should be able to select the - * uart to be used for debugging. - */ -#define DEBUG_BASE UART_DEBUG_BASE - -#define UART16550_BAUD_2400 2400 -#define UART16550_BAUD_4800 4800 -#define UART16550_BAUD_9600 9600 -#define UART16550_BAUD_19200 19200 -#define UART16550_BAUD_38400 38400 -#define UART16550_BAUD_57600 57600 -#define UART16550_BAUD_115200 115200 - -#define UART16550_PARITY_NONE 0 -#define UART16550_PARITY_ODD 0x08 -#define UART16550_PARITY_EVEN 0x18 -#define UART16550_PARITY_MARK 0x28 -#define UART16550_PARITY_SPACE 0x38 - -#define UART16550_DATA_5BIT 0x0 -#define UART16550_DATA_6BIT 0x1 -#define UART16550_DATA_7BIT 0x2 -#define UART16550_DATA_8BIT 0x3 - -#define UART16550_STOP_1BIT 0x0 -#define UART16550_STOP_2BIT 0x4 - - -#define UART_RX 0 /* Receive buffer */ -#define UART_TX 4 /* Transmit buffer */ -#define UART_IER 8 /* Interrupt Enable Register */ -#define UART_IIR 0xC /* Interrupt ID Register */ -#define UART_FCR 0x10 /* FIFO Control Register */ -#define UART_LCR 0x14 /* Line Control Register */ -#define UART_MCR 0x18 /* Modem Control Register */ -#define UART_LSR 0x1C /* Line Status Register */ -#define UART_MSR 0x20 /* Modem Status Register */ -#define UART_CLK 0x28 /* Baud Rat4e Clock Divider */ -#define UART_MOD_CNTRL 0x100 /* Module Control */ - -/* memory-mapped read/write of the port */ -#define UART16550_READ(y) (au_readl(DEBUG_BASE + y) & 0xff) -#define UART16550_WRITE(y, z) (au_writel(z & 0xff, DEBUG_BASE + y)) - -extern unsigned long calc_clock(void); - -void debugInit(u32 baud, u8 data, u8 parity, u8 stop) -{ - if (UART16550_READ(UART_MOD_CNTRL) != 0x3) - UART16550_WRITE(UART_MOD_CNTRL, 3); - calc_clock(); - - /* disable interrupts */ - UART16550_WRITE(UART_IER, 0); - - /* set up baud rate */ - { - u32 divisor; - - /* set divisor */ - divisor = get_au1x00_uart_baud_base() / baud; - UART16550_WRITE(UART_CLK, divisor & 0xffff); - } - - /* set data format */ - UART16550_WRITE(UART_LCR, (data | parity | stop)); -} - -static int remoteDebugInitialized; - -u8 getDebugChar(void) -{ - if (!remoteDebugInitialized) { - remoteDebugInitialized = 1; - debugInit(UART16550_BAUD_115200, - UART16550_DATA_8BIT, - UART16550_PARITY_NONE, - UART16550_STOP_1BIT); - } - - while ((UART16550_READ(UART_LSR) & 0x1) == 0); - return UART16550_READ(UART_RX); -} - - -int putDebugChar(u8 byte) -{ - if (!remoteDebugInitialized) { - remoteDebugInitialized = 1; - debugInit(UART16550_BAUD_115200, - UART16550_DATA_8BIT, - UART16550_PARITY_NONE, - UART16550_STOP_1BIT); - } - - while ((UART16550_READ(UART_LSR) & 0x40) == 0); - UART16550_WRITE(UART_TX, byte); - - return 1; -} - -#endif diff --git a/arch/mips/au1000/db1x00/init.c b/arch/mips/au1000/db1x00/init.c index 5ebe0de5e45..84741351496 100644 --- a/arch/mips/au1000/db1x00/init.c +++ b/arch/mips/au1000/db1x00/init.c @@ -57,6 +57,6 @@ void __init prom_init(void) if (!memsize_str) memsize = 0x04000000; else - memsize = strict_strtol(memsize_str, 0, NULL); + strict_strtol(memsize_str, 0, &memsize); add_memory_region(0, memsize, BOOT_MEM_RAM); } diff --git a/arch/mips/au1000/mtx-1/init.c b/arch/mips/au1000/mtx-1/init.c index 33a4aebe0cb..3bae13c2895 100644 --- a/arch/mips/au1000/mtx-1/init.c +++ b/arch/mips/au1000/mtx-1/init.c @@ -55,6 +55,6 @@ void __init prom_init(void) if (!memsize_str) memsize = 0x04000000; else - memsize = strict_strtol(memsize_str, 0, NULL); + strict_strtol(memsize_str, 0, &memsize); add_memory_region(0, memsize, BOOT_MEM_RAM); } diff --git a/arch/mips/au1000/pb1000/init.c b/arch/mips/au1000/pb1000/init.c index 3837365d613..8a9c7d57208 100644 --- a/arch/mips/au1000/pb1000/init.c +++ b/arch/mips/au1000/pb1000/init.c @@ -52,6 +52,6 @@ void __init prom_init(void) if (!memsize_str) memsize = 0x04000000; else - memsize = strict_strtol(memsize_str, 0, NULL); + strict_strtol(memsize_str, 0, &memsize); add_memory_region(0, memsize, BOOT_MEM_RAM); } diff --git a/arch/mips/au1000/pb1100/init.c b/arch/mips/au1000/pb1100/init.c index 8355483f3de..7c6792308bc 100644 --- a/arch/mips/au1000/pb1100/init.c +++ b/arch/mips/au1000/pb1100/init.c @@ -54,7 +54,7 @@ void __init prom_init(void) if (!memsize_str) memsize = 0x04000000; else - memsize = strict_strtol(memsize_str, 0, NULL); + strict_strtol(memsize_str, 0, &memsize); add_memory_region(0, memsize, BOOT_MEM_RAM); } diff --git a/arch/mips/au1000/pb1200/init.c b/arch/mips/au1000/pb1200/init.c index 09fd63b8606..e9b2a0fd48a 100644 --- a/arch/mips/au1000/pb1200/init.c +++ b/arch/mips/au1000/pb1200/init.c @@ -53,6 +53,6 @@ void __init prom_init(void) if (!memsize_str) memsize = 0x08000000; else - memsize = strict_strtol(memsize_str, 0, NULL); + strict_strtol(memsize_str, 0, &memsize); add_memory_region(0, memsize, BOOT_MEM_RAM); } diff --git a/arch/mips/au1000/pb1500/init.c b/arch/mips/au1000/pb1500/init.c index 49f51e16586..3b6e395cf95 100644 --- a/arch/mips/au1000/pb1500/init.c +++ b/arch/mips/au1000/pb1500/init.c @@ -53,6 +53,6 @@ void __init prom_init(void) if (!memsize_str) memsize = 0x04000000; else - memsize = strict_strtol(memsize_str, 0, NULL); + strict_strtol(memsize_str, 0, &memsize); add_memory_region(0, memsize, BOOT_MEM_RAM); } diff --git a/arch/mips/au1000/pb1550/init.c b/arch/mips/au1000/pb1550/init.c index 1b5f58434bb..e1055a13a1a 100644 --- a/arch/mips/au1000/pb1550/init.c +++ b/arch/mips/au1000/pb1550/init.c @@ -53,6 +53,6 @@ void __init prom_init(void) if (!memsize_str) memsize = 0x08000000; else - memsize = strict_strtol(memsize_str, 0, NULL); + strict_strtol(memsize_str, 0, &memsize); add_memory_region(0, memsize, BOOT_MEM_RAM); } diff --git a/arch/mips/au1000/xxs1500/init.c b/arch/mips/au1000/xxs1500/init.c index b849bf501c0..7516434760a 100644 --- a/arch/mips/au1000/xxs1500/init.c +++ b/arch/mips/au1000/xxs1500/init.c @@ -53,6 +53,6 @@ void __init prom_init(void) if (!memsize_str) memsize = 0x04000000; else - memsize = strict_strtol(memsize_str, 0, NULL); + strict_strtol(memsize_str, 0, &memsize); add_memory_region(0, memsize, BOOT_MEM_RAM); } diff --git a/arch/mips/basler/excite/Makefile b/arch/mips/basler/excite/Makefile index 519142c2e4e..cff29cf46d0 100644 --- a/arch/mips/basler/excite/Makefile +++ b/arch/mips/basler/excite/Makefile @@ -5,5 +5,4 @@ obj-$(CONFIG_BASLER_EXCITE) += excite_irq.o excite_prom.o excite_setup.o \ excite_device.o excite_procfs.o -obj-$(CONFIG_KGDB) += excite_dbg_io.o obj-m += excite_iodev.o diff --git a/arch/mips/basler/excite/excite_dbg_io.c b/arch/mips/basler/excite/excite_dbg_io.c deleted file mode 100644 index d289e3a868c..00000000000 --- a/arch/mips/basler/excite/excite_dbg_io.c +++ /dev/null @@ -1,121 +0,0 @@ -/* - * Copyright (C) 2004 by Basler Vision Technologies AG - * Author: Thomas Koeller <thomas.koeller@baslerweb.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <linux/linkage.h> -#include <linux/init.h> -#include <linux/kernel.h> -#include <asm/gdb-stub.h> -#include <asm/rm9k-ocd.h> -#include <excite.h> - -#if defined(CONFIG_SERIAL_8250) && CONFIG_SERIAL_8250_NR_UARTS > 1 -#error Debug port used by serial driver -#endif - -#define UART_CLK 25000000 -#define BASE_BAUD (UART_CLK / 16) -#define REGISTER_BASE_0 0x0208UL -#define REGISTER_BASE_1 0x0238UL - -#define REGISTER_BASE_DBG REGISTER_BASE_1 - -#define CPRR 0x0004 -#define UACFG 0x0200 -#define UAINTS 0x0204 -#define UARBR (REGISTER_BASE_DBG + 0x0000) -#define UATHR (REGISTER_BASE_DBG + 0x0004) -#define UADLL (REGISTER_BASE_DBG + 0x0008) -#define UAIER (REGISTER_BASE_DBG + 0x000c) -#define UADLH (REGISTER_BASE_DBG + 0x0010) -#define UAIIR (REGISTER_BASE_DBG + 0x0014) -#define UAFCR (REGISTER_BASE_DBG + 0x0018) -#define UALCR (REGISTER_BASE_DBG + 0x001c) -#define UAMCR (REGISTER_BASE_DBG + 0x0020) -#define UALSR (REGISTER_BASE_DBG + 0x0024) -#define UAMSR (REGISTER_BASE_DBG + 0x0028) -#define UASCR (REGISTER_BASE_DBG + 0x002c) - -#define PARITY_NONE 0 -#define PARITY_ODD 0x08 -#define PARITY_EVEN 0x18 -#define PARITY_MARK 0x28 -#define PARITY_SPACE 0x38 - -#define DATA_5BIT 0x0 -#define DATA_6BIT 0x1 -#define DATA_7BIT 0x2 -#define DATA_8BIT 0x3 - -#define STOP_1BIT 0x0 -#define STOP_2BIT 0x4 - -#define BAUD_DBG 57600 -#define PARITY_DBG PARITY_NONE -#define DATA_DBG DATA_8BIT -#define STOP_DBG STOP_1BIT - -/* Initialize the serial port for KGDB debugging */ -void __init excite_kgdb_init(void) -{ - const u32 divisor = BASE_BAUD / BAUD_DBG; - - /* Take the UART out of reset */ - titan_writel(0x00ff1cff, CPRR); - titan_writel(0x00000000, UACFG); - titan_writel(0x00000002, UACFG); - - titan_writel(0x0, UALCR); - titan_writel(0x0, UAIER); - - /* Disable FIFOs */ - titan_writel(0x00, UAFCR); - - titan_writel(0x80, UALCR); - titan_writel(divisor & 0xff, UADLL); - titan_writel((divisor & 0xff00) >> 8, UADLH); - titan_writel(0x0, UALCR); - - titan_writel(DATA_DBG | PARITY_DBG | STOP_DBG, UALCR); - - /* Enable receiver interrupt */ - titan_readl(UARBR); - titan_writel(0x1, UAIER); -} - -int getDebugChar(void) -{ - while (!(titan_readl(UALSR) & 0x1)); - return titan_readl(UARBR); -} - -int putDebugChar(int data) -{ - while (!(titan_readl(UALSR) & 0x20)); - titan_writel(data, UATHR); - return 1; -} - -/* KGDB interrupt handler */ -asmlinkage void excite_kgdb_inthdl(void) -{ - if (unlikely( - ((titan_readl(UAIIR) & 0x7) == 4) - && ((titan_readl(UARBR) & 0xff) == 0x3))) - set_async_breakpoint(®s->cp0_epc); -} diff --git a/arch/mips/basler/excite/excite_irq.c b/arch/mips/basler/excite/excite_irq.c index 4903e067916..934e0a6b101 100644 --- a/arch/mips/basler/excite/excite_irq.c +++ b/arch/mips/basler/excite/excite_irq.c @@ -50,10 +50,6 @@ void __init arch_init_irq(void) mips_cpu_irq_init(); rm7k_cpu_irq_init(); rm9k_cpu_irq_init(); - -#ifdef CONFIG_KGDB - excite_kgdb_init(); -#endif } asmlinkage void plat_irq_dispatch(void) @@ -90,9 +86,6 @@ asmlinkage void plat_irq_dispatch(void) msgint = msgintflags & msgintmask & (0x1 << (TITAN_MSGINT % 0x20)); if ((pending & (1 << TITAN_IRQ)) && msgint) { ocd_writel(msgint, INTP0Clear0 + (TITAN_MSGINT / 0x20 * 0x10)); -#if defined(CONFIG_KGDB) - excite_kgdb_inthdl(); -#endif do_IRQ(TITAN_IRQ); return; } diff --git a/arch/mips/basler/excite/excite_setup.c b/arch/mips/basler/excite/excite_setup.c index 6dd8f0d46d0..d66b3b8edf2 100644 --- a/arch/mips/basler/excite/excite_setup.c +++ b/arch/mips/basler/excite/excite_setup.c @@ -95,13 +95,13 @@ static int __init excite_init_console(void) /* Take the DUART out of reset */ titan_writel(0x00ff1cff, CPRR); -#if defined(CONFIG_KGDB) || (CONFIG_SERIAL_8250_NR_UARTS > 1) +#if (CONFIG_SERIAL_8250_NR_UARTS > 1) /* Enable both ports */ titan_writel(MASK_SER0 | MASK_SER1, UACFG); #else /* Enable port #0 only */ titan_writel(MASK_SER0, UACFG); -#endif /* defined(CONFIG_KGDB) */ +#endif /* * Set up serial port #0. Do not use autodetection; the result is diff --git a/arch/mips/configs/cobalt_defconfig b/arch/mips/configs/cobalt_defconfig index 2678b7ec335..eb44b72254a 100644 --- a/arch/mips/configs/cobalt_defconfig +++ b/arch/mips/configs/cobalt_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.23-rc5 -# Thu Sep 6 13:14:29 2007 +# Linux kernel version: 2.6.26 +# Fri Jul 25 10:25:34 2008 # CONFIG_MIPS=y @@ -10,9 +10,11 @@ CONFIG_MIPS=y # # CONFIG_MACH_ALCHEMY is not set # CONFIG_BASLER_EXCITE is not set +# CONFIG_BCM47XX is not set CONFIG_MIPS_COBALT=y # CONFIG_MACH_DECSTATION is not set # CONFIG_MACH_JAZZ is not set +# CONFIG_LASAT is not set # CONFIG_LEMOTE_FULONG is not set # CONFIG_MIPS_MALTA is not set # CONFIG_MIPS_SIM is not set @@ -24,6 +26,7 @@ CONFIG_MIPS_COBALT=y # CONFIG_PMC_YOSEMITE is not set # CONFIG_SGI_IP22 is not set # CONFIG_SGI_IP27 is not set +# CONFIG_SGI_IP28 is not set # CONFIG_SGI_IP32 is not set # CONFIG_SIBYTE_CRHINE is not set # CONFIG_SIBYTE_CARMEL is not set @@ -34,19 +37,25 @@ CONFIG_MIPS_COBALT=y # CONFIG_SIBYTE_SENTOSA is not set # CONFIG_SIBYTE_BIGSUR is not set # CONFIG_SNI_RM is not set -# CONFIG_TOSHIBA_JMR3927 is not set -# CONFIG_TOSHIBA_RBTX4927 is not set -# CONFIG_TOSHIBA_RBTX4938 is not set +# CONFIG_MACH_TX39XX is not set +# CONFIG_MACH_TX49XX is not set +# CONFIG_MIKROTIK_RB532 is not set # CONFIG_WR_PPMC is not set CONFIG_RWSEM_GENERIC_SPINLOCK=y # CONFIG_ARCH_HAS_ILOG2_U32 is not set # CONFIG_ARCH_HAS_ILOG2_U64 is not set +CONFIG_ARCH_SUPPORTS_OPROFILE=y CONFIG_GENERIC_FIND_NEXT_BIT=y CONFIG_GENERIC_HWEIGHT=y CONFIG_GENERIC_CALIBRATE_DELAY=y +CONFIG_GENERIC_CLOCKEVENTS=y CONFIG_GENERIC_TIME=y +CONFIG_GENERIC_CMOS_UPDATE=y CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y +CONFIG_CEVT_GT641XX=y +CONFIG_CEVT_R4K=y +CONFIG_CSRC_R4K=y CONFIG_DMA_NONCOHERENT=y CONFIG_DMA_NEED_PCI_MAP_STATE=y CONFIG_EARLY_PRINTK=y @@ -108,6 +117,7 @@ CONFIG_CPU_HAS_SYNC=y CONFIG_GENERIC_HARDIRQS=y CONFIG_GENERIC_IRQ_PROBE=y CONFIG_ARCH_FLATMEM_ENABLE=y +CONFIG_ARCH_POPULATES_NODE_MAP=y CONFIG_SELECT_MEMORY_MODEL=y CONFIG_FLATMEM_MANUAL=y # CONFIG_DISCONTIGMEM_MANUAL is not set @@ -115,10 +125,16 @@ CONFIG_FLATMEM_MANUAL=y CONFIG_FLATMEM=y CONFIG_FLAT_NODE_MEM_MAP=y # CONFIG_SPARSEMEM_STATIC is not set +# CONFIG_SPARSEMEM_VMEMMAP_ENABLE is not set +CONFIG_PAGEFLAGS_EXTENDED=y CONFIG_SPLIT_PTLOCK_CPUS=4 # CONFIG_RESOURCES_64BIT is not set CONFIG_ZONE_DMA_FLAG=0 CONFIG_VIRT_TO_BUS=y +# CONFIG_TICK_ONESHOT is not set +# CONFIG_NO_HZ is not set +# CONFIG_HIGH_RES_TIMERS is not set +CONFIG_GENERIC_CLOCKEVENTS_BUILD=y # CONFIG_HZ_48 is not set # CONFIG_HZ_100 is not set # CONFIG_HZ_128 is not set @@ -151,23 +167,28 @@ CONFIG_SYSVIPC_SYSCTL=y # CONFIG_POSIX_MQUEUE is not set # CONFIG_BSD_PROCESS_ACCT is not set # CONFIG_TASKSTATS is not set -# CONFIG_USER_NS is not set # CONFIG_AUDIT is not set # CONFIG_IKCONFIG is not set CONFIG_LOG_BUF_SHIFT=14 -CONFIG_SYSFS_DEPRECATED=y +# CONFIG_CGROUPS is not set +# CONFIG_GROUP_SCHED is not set +# CONFIG_SYSFS_DEPRECATED_V2 is not set CONFIG_RELAY=y +# CONFIG_NAMESPACES is not set # CONFIG_BLK_DEV_INITRD is not set # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_SYSCTL=y CONFIG_EMBEDDED=y CONFIG_SYSCTL_SYSCALL=y +CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_EXTRA_PASS is not set CONFIG_HOTPLUG=y CONFIG_PRINTK=y CONFIG_BUG=y CONFIG_ELF_CORE=y +CONFIG_PCSPKR_PLATFORM=y +CONFIG_COMPAT_BRK=y CONFIG_BASE_FULL=y CONFIG_FUTEX=y CONFIG_ANON_INODES=y @@ -177,23 +198,37 @@ CONFIG_TIMERFD=y CONFIG_EVENTFD=y CONFIG_SHMEM=y CONFIG_VM_EVENT_COUNTERS=y -CONFIG_SLAB=y -# CONFIG_SLUB is not set +CONFIG_SLUB_DEBUG=y +# CONFIG_SLAB is not set +CONFIG_SLUB=y # CONFIG_SLOB is not set +# CONFIG_PROFILING is not set +# CONFIG_MARKERS is not set +CONFIG_HAVE_OPROFILE=y +# CONFIG_HAVE_IOREMAP_PROT is not set +# CONFIG_HAVE_KPROBES is not set +# CONFIG_HAVE_KRETPROBES is not set +# CONFIG_HAVE_DMA_ATTRS is not set +# CONFIG_USE_GENERIC_SMP_HELPERS is not set +# CONFIG_HAVE_CLK is not set +CONFIG_PROC_PAGE_MONITOR=y +CONFIG_SLABINFO=y CONFIG_RT_MUTEXES=y # CONFIG_TINY_SHMEM is not set CONFIG_BASE_SMALL=0 CONFIG_MODULES=y +# CONFIG_MODULE_FORCE_LOAD is not set CONFIG_MODULE_UNLOAD=y # CONFIG_MODULE_FORCE_UNLOAD is not set # CONFIG_MODVERSIONS is not set # CONFIG_MODULE_SRCVERSION_ALL is not set -# CONFIG_KMOD is not set +CONFIG_KMOD=y CONFIG_BLOCK=y # CONFIG_LBD is not set # CONFIG_BLK_DEV_IO_TRACE is not set # CONFIG_LSF is not set # CONFIG_BLK_DEV_BSG is not set +# CONFIG_BLK_DEV_INTEGRITY is not set # # IO Schedulers @@ -207,18 +242,18 @@ CONFIG_DEFAULT_AS=y # CONFIG_DEFAULT_CFQ is not set # CONFIG_DEFAULT_NOOP is not set CONFIG_DEFAULT_IOSCHED="anticipatory" +CONFIG_CLASSIC_RCU=y # # Bus options (PCI, PCMCIA, EISA, ISA, TC) # CONFIG_HW_HAS_PCI=y CONFIG_PCI=y +CONFIG_PCI_DOMAINS=y # CONFIG_ARCH_SUPPORTS_MSI is not set +CONFIG_PCI_LEGACY=y CONFIG_MMU=y - -# -# PCCARD (PCMCIA/CardBus) support -# +CONFIG_I8253=y # CONFIG_PCCARD is not set # CONFIG_HOTPLUG_PCI is not set @@ -232,8 +267,8 @@ CONFIG_TRAD_SIGNALS=y # # Power management options # +CONFIG_ARCH_SUSPEND_POSSIBLE=y # CONFIG_PM is not set -CONFIG_SUSPEND_UP_POSSIBLE=y # # Networking @@ -250,6 +285,7 @@ CONFIG_XFRM=y CONFIG_XFRM_USER=y # CONFIG_XFRM_SUB_POLICY is not set CONFIG_XFRM_MIGRATE=y +# CONFIG_XFRM_STATISTICS is not set CONFIG_NET_KEY=y CONFIG_NET_KEY_MIGRATE=y CONFIG_INET=y @@ -269,6 +305,7 @@ CONFIG_IP_FIB_HASH=y CONFIG_INET_XFRM_MODE_TRANSPORT=y CONFIG_INET_XFRM_MODE_TUNNEL=y CONFIG_INET_XFRM_MODE_BEET=y +# CONFIG_INET_LRO is not set CONFIG_INET_DIAG=y CONFIG_INET_TCP_DIAG=y # CONFIG_TCP_CONG_ADVANCED is not set @@ -276,8 +313,6 @@ CONFIG_TCP_CONG_CUBIC=y CONFIG_DEFAULT_TCP_CONG="cubic" # CONFIG_TCP_MD5SIG is not set # CONFIG_IPV6 is not set -# CONFIG_INET6_XFRM_TUNNEL is not set -# CONFIG_INET6_TUNNEL is not set # CONFIG_NETWORK_SECMARK is not set # CONFIG_NETFILTER is not set # CONFIG_IP_DCCP is not set @@ -294,10 +329,6 @@ CONFIG_DEFAULT_TCP_CONG="cubic" # CONFIG_LAPB is not set # CONFIG_ECONET is not set # CONFIG_WAN_ROUTER is not set - -# -# QoS and/or fair queueing -# # CONFIG_NET_SCHED is not set # @@ -305,6 +336,7 @@ CONFIG_DEFAULT_TCP_CONG="cubic" # # CONFIG_NET_PKTGEN is not set # CONFIG_HAMRADIO is not set +# CONFIG_CAN is not set # CONFIG_IRDA is not set # CONFIG_BT is not set # CONFIG_AF_RXRPC is not set @@ -326,9 +358,12 @@ CONFIG_DEFAULT_TCP_CONG="cubic" # # Generic Driver Options # +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_STANDALONE=y CONFIG_PREVENT_FIRMWARE_BUILD=y CONFIG_FW_LOADER=y +CONFIG_FIRMWARE_IN_KERNEL=y +CONFIG_EXTRA_FIRMWARE="" # CONFIG_SYS_HYPERVISOR is not set # CONFIG_CONNECTOR is not set CONFIG_MTD=y @@ -337,6 +372,7 @@ CONFIG_MTD=y CONFIG_MTD_PARTITIONS=y # CONFIG_MTD_REDBOOT_PARTS is not set # CONFIG_MTD_CMDLINE_PARTS is not set +# CONFIG_MTD_AR7_PARTS is not set # # User Modules And Translation Layers @@ -350,6 +386,7 @@ CONFIG_MTD_BLKDEVS=y # CONFIG_INFTL is not set # CONFIG_RFD_FTL is not set # CONFIG_SSFDC is not set +# CONFIG_MTD_OOPS is not set # # RAM/ROM/Flash chip drivers @@ -384,6 +421,7 @@ CONFIG_MTD_PHYSMAP=y CONFIG_MTD_PHYSMAP_START=0x0 CONFIG_MTD_PHYSMAP_LEN=0x0 CONFIG_MTD_PHYSMAP_BANKWIDTH=0 +# CONFIG_MTD_INTEL_VR_NOR is not set # CONFIG_MTD_PLATRAM is not set # @@ -423,7 +461,9 @@ CONFIG_BLK_DEV_LOOP=y # CONFIG_BLK_DEV_RAM is not set # CONFIG_CDROM_PKTCDVD is not set # CONFIG_ATA_OVER_ETH is not set +# CONFIG_BLK_DEV_HD is not set # CONFIG_MISC_DEVICES is not set +CONFIG_HAVE_IDE=y # CONFIG_IDE is not set # @@ -462,10 +502,15 @@ CONFIG_SCSI_WAIT_SCAN=m # CONFIG_SCSI_FC_ATTRS is not set # CONFIG_SCSI_ISCSI_ATTRS is not set # CONFIG_SCSI_SAS_LIBSAS is not set +# CONFIG_SCSI_SRP_ATTRS is not set # CONFIG_SCSI_LOWLEVEL is not set +# CONFIG_SCSI_DH is not set CONFIG_ATA=y # CONFIG_ATA_NONSTANDARD is not set +CONFIG_SATA_PMP=y # CONFIG_SATA_AHCI is not set +# CONFIG_SATA_SIL24 is not set +CONFIG_ATA_SFF=y # CONFIG_SATA_SVW is not set # CONFIG_ATA_PIIX is not set # CONFIG_SATA_MV is not set @@ -475,7 +520,6 @@ CONFIG_ATA=y # CONFIG_SATA_PROMISE is not set # CONFIG_SATA_SX4 is not set # CONFIG_SATA_SIL is not set -# CONFIG_SATA_SIL24 is not set # CONFIG_SATA_SIS is not set # CONFIG_SATA_ULI is not set # CONFIG_SATA_VIA is not set @@ -504,7 +548,9 @@ CONFIG_ATA=y # CONFIG_PATA_MPIIX is not set # CONFIG_PATA_OLDPIIX is not set # CONFIG_PATA_NETCELL is not set +# CONFIG_PATA_NINJA32 is not set # CONFIG_PATA_NS87410 is not set +# CONFIG_PATA_NS87415 is not set # CONFIG_PATA_OPTI is not set # CONFIG_PATA_OPTIDMA is not set # CONFIG_PATA_PDC_OLD is not set @@ -518,29 +564,27 @@ CONFIG_ATA=y CONFIG_PATA_VIA=y # CONFIG_PATA_WINBOND is not set # CONFIG_PATA_PLATFORM is not set +# CONFIG_PATA_SCH is not set # CONFIG_MD is not set +# CONFIG_FUSION is not set # -# Fusion MPT device support +# IEEE 1394 (FireWire) support # -# CONFIG_FUSION is not set -# CONFIG_FUSION_SPI is not set -# CONFIG_FUSION_FC is not set -# CONFIG_FUSION_SAS is not set # -# IEEE 1394 (FireWire) support +# Enable only one of the two stacks, unless you know what you are doing # # CONFIG_FIREWIRE is not set # CONFIG_IEEE1394 is not set # CONFIG_I2O is not set CONFIG_NETDEVICES=y -# CONFIG_NETDEVICES_MULTIQUEUE is not set # CONFIG_DUMMY is not set # CONFIG_BONDING is not set # CONFIG_MACVLAN is not set # CONFIG_EQUALIZER is not set # CONFIG_TUN is not set +# CONFIG_VETH is not set # CONFIG_ARCNET is not set # CONFIG_PHYLIB is not set CONFIG_NET_ETHERNET=y @@ -562,7 +606,12 @@ CONFIG_TULIP=y # CONFIG_DM9102 is not set # CONFIG_ULI526X is not set # CONFIG_HP100 is not set +# CONFIG_IBM_NEW_EMAC_ZMII is not set +# CONFIG_IBM_NEW_EMAC_RGMII is not set +# CONFIG_IBM_NEW_EMAC_TAH is not set +# CONFIG_IBM_NEW_EMAC_EMAC4 is not set # CONFIG_NET_PCI is not set +# CONFIG_B44 is not set # CONFIG_NETDEV_1000 is not set # CONFIG_NETDEV_10000 is not set # CONFIG_TR is not set @@ -572,6 +621,7 @@ CONFIG_TULIP=y # # CONFIG_WLAN_PRE80211 is not set # CONFIG_WLAN_80211 is not set +# CONFIG_IWLWIFI_LEDS is not set # # USB Network Adapters @@ -580,7 +630,6 @@ CONFIG_TULIP=y # CONFIG_USB_KAWETH is not set # CONFIG_USB_PEGASUS is not set # CONFIG_USB_RTL8150 is not set -# CONFIG_USB_USBNET_MII is not set # CONFIG_USB_USBNET is not set # CONFIG_WAN is not set # CONFIG_FDDI is not set @@ -588,7 +637,6 @@ CONFIG_TULIP=y # CONFIG_PPP is not set # CONFIG_SLIP is not set # CONFIG_NET_FC is not set -# CONFIG_SHAPER is not set # CONFIG_NETCONSOLE is not set # CONFIG_NETPOLL is not set # CONFIG_NET_POLL_CONTROLLER is not set @@ -607,7 +655,6 @@ CONFIG_INPUT_POLLDEV=y # # CONFIG_INPUT_MOUSEDEV is not set # CONFIG_INPUT_JOYDEV is not set -# CONFIG_INPUT_TSDEV is not set CONFIG_INPUT_EVDEV=y # CONFIG_INPUT_EVBUG is not set @@ -642,7 +689,9 @@ CONFIG_VT=y CONFIG_VT_CONSOLE=y CONFIG_HW_CONSOLE=y CONFIG_VT_HW_CONSOLE_BINDING=y +CONFIG_DEVKMEM=y # CONFIG_SERIAL_NONSTANDARD is not set +# CONFIG_NOZOMI is not set # # Serial drivers @@ -664,65 +713,122 @@ CONFIG_UNIX98_PTYS=y CONFIG_LEGACY_PTYS=y CONFIG_LEGACY_PTY_COUNT=256 # CONFIG_IPMI_HANDLER is not set -# CONFIG_WATCHDOG is not set # CONFIG_HW_RANDOM is not set -# CONFIG_RTC is not set -CONFIG_COBALT_LCD=y # CONFIG_R3964 is not set # CONFIG_APPLICOM is not set -# CONFIG_DRM is not set # CONFIG_RAW_DRIVER is not set # CONFIG_TCG_TPM is not set CONFIG_DEVPORT=y # CONFIG_I2C is not set - -# -# SPI support -# # CONFIG_SPI is not set -# CONFIG_SPI_MASTER is not set # CONFIG_W1 is not set # CONFIG_POWER_SUPPLY is not set # CONFIG_HWMON is not set +# CONFIG_THERMAL is not set +# CONFIG_THERMAL_HWMON is not set +# CONFIG_WATCHDOG is not set + +# +# Sonics Silicon Backplane +# +CONFIG_SSB_POSSIBLE=y +# CONFIG_SSB is not set # # Multifunction device drivers # +# CONFIG_MFD_CORE is not set # CONFIG_MFD_SM501 is not set +# CONFIG_HTC_PASIC3 is not set # # Multimedia devices # + +# +# Multimedia core support +# # CONFIG_VIDEO_DEV is not set # CONFIG_DVB_CORE is not set +# CONFIG_VIDEO_MEDIA is not set + +# +# Multimedia drivers +# # CONFIG_DAB is not set # # Graphics support # +# CONFIG_DRM is not set +# CONFIG_VGASTATE is not set +# CONFIG_VIDEO_OUTPUT_CONTROL is not set +CONFIG_FB=y +# CONFIG_FIRMWARE_EDID is not set +# CONFIG_FB_DDC is not set +# CONFIG_FB_CFB_FILLRECT is not set +# CONFIG_FB_CFB_COPYAREA is not set +# CONFIG_FB_CFB_IMAGEBLIT is not set +# CONFIG_FB_CFB_REV_PIXELS_IN_BYTE is not set +# CONFIG_FB_SYS_FILLRECT is not set +# CONFIG_FB_SYS_COPYAREA is not set +# CONFIG_FB_SYS_IMAGEBLIT is not set +# CONFIG_FB_FOREIGN_ENDIAN is not set +# CONFIG_FB_SYS_FOPS is not set +# CONFIG_FB_SVGALIB is not set +# CONFIG_FB_MACMODES is not set +# CONFIG_FB_BACKLIGHT is not set +# CONFIG_FB_MODE_HELPERS is not set +# CONFIG_FB_TILEBLITTING is not set + +# +# Frame buffer hardware drivers +# +# CONFIG_FB_CIRRUS is not set +# CONFIG_FB_PM2 is not set +# CONFIG_FB_CYBER2000 is not set +# CONFIG_FB_ASILIANT is not set +# CONFIG_FB_IMSTT is not set +# CONFIG_FB_S1D13XXX is not set +# CONFIG_FB_NVIDIA is not set +# CONFIG_FB_RIVA is not set +# CONFIG_FB_MATROX is not set +# CONFIG_FB_RADEON is not set +# CONFIG_FB_ATY128 is not set +# CONFIG_FB_ATY is not set +# CONFIG_FB_S3 is not set +# CONFIG_FB_SAVAGE is not set +# CONFIG_FB_SIS is not set +# CONFIG_FB_NEOMAGIC is not set +# CONFIG_FB_KYRO is not set +# CONFIG_FB_3DFX is not set +# CONFIG_FB_VOODOO1 is not set +# CONFIG_FB_VT8623 is not set +# CONFIG_FB_TRIDENT is not set +# CONFIG_FB_ARK is not set +# CONFIG_FB_PM3 is not set +# CONFIG_FB_CARMINE is not set +CONFIG_FB_COBALT=y +# CONFIG_FB_VIRTUAL is not set # CONFIG_BACKLIGHT_LCD_SUPPORT is not set # # Display device support # # CONFIG_DISPLAY_SUPPORT is not set -# CONFIG_VGASTATE is not set -# CONFIG_VIDEO_OUTPUT_CONTROL is not set -# CONFIG_FB is not set # # Console display driver support # # CONFIG_VGA_CONSOLE is not set CONFIG_DUMMY_CONSOLE=y - -# -# Sound -# +# CONFIG_FRAMEBUFFER_CONSOLE is not set +# CONFIG_LOGO is not set # CONFIG_SOUND is not set CONFIG_HID_SUPPORT=y CONFIG_HID=m # CONFIG_HID_DEBUG is not set +# CONFIG_HIDRAW is not set # # USB Input Devices @@ -743,6 +849,7 @@ CONFIG_USB_ARCH_HAS_OHCI=y CONFIG_USB_ARCH_HAS_EHCI=y CONFIG_USB=m # CONFIG_USB_DEBUG is not set +# CONFIG_USB_ANNOUNCE_NEW_DEVICES is not set # # Miscellaneous USB options @@ -751,15 +858,18 @@ CONFIG_USB=m # CONFIG_USB_DEVICE_CLASS is not set # CONFIG_USB_DYNAMIC_MINORS is not set # CONFIG_USB_OTG is not set +# CONFIG_USB_OTG_WHITELIST is not set +# CONFIG_USB_OTG_BLACKLIST_HUB is not set # # USB Host Controller Drivers # +# CONFIG_USB_C67X00_HCD is not set CONFIG_USB_EHCI_HCD=m -# CONFIG_USB_EHCI_SPLIT_ISO is not set # CONFIG_USB_EHCI_ROOT_HUB_TT is not set # CONFIG_USB_EHCI_TT_NEWSCHED is not set # CONFIG_USB_ISP116X_HCD is not set +# CONFIG_USB_ISP1760_HCD is not set CONFIG_USB_OHCI_HCD=m # CONFIG_USB_OHCI_BIG_ENDIAN_DESC is not set # CONFIG_USB_OHCI_BIG_ENDIAN_MMIO is not set @@ -773,6 +883,7 @@ CONFIG_USB_OHCI_LITTLE_ENDIAN=y # # CONFIG_USB_ACM is not set # CONFIG_USB_PRINTER is not set +# CONFIG_USB_WDM is not set # # NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' @@ -785,6 +896,7 @@ CONFIG_USB_STORAGE=m # CONFIG_USB_STORAGE_DEBUG is not set # CONFIG_USB_STORAGE_DATAFAB is not set # CONFIG_USB_STORAGE_FREECOM is not set +# CONFIG_USB_STORAGE_ISD200 is not set # CONFIG_USB_STORAGE_DPCM is not set # CONFIG_USB_STORAGE_USBAT is not set # CONFIG_USB_STORAGE_SDDR09 is not set @@ -793,6 +905,7 @@ CONFIG_USB_STORAGE=m # CONFIG_USB_STORAGE_ALAUDA is not set # CONFIG_USB_STORAGE_ONETOUCH is not set # CONFIG_USB_STORAGE_KARMA is not set +# CONFIG_USB_STORAGE_CYPRESS_ATACB is not set # CONFIG_USB_LIBUSUAL is not set # @@ -800,15 +913,11 @@ CONFIG_USB_STORAGE=m # # CONFIG_USB_MDC800 is not set # CONFIG_USB_MICROTEK is not set -CONFIG_USB_MON=y +# CONFIG_USB_MON is not set # # USB port drivers # - -# -# USB Serial Converter support -# # CONFIG_USB_SERIAL is not set # @@ -833,16 +942,10 @@ CONFIG_USB_MON=y # CONFIG_USB_LD is not set # CONFIG_USB_TRANCEVIBRATOR is not set # CONFIG_USB_IOWARRIOR is not set - -# -# USB DSL modem support -# - -# -# USB Gadget Support -# +# CONFIG_USB_ISIGHTFW is not set # CONFIG_USB_GADGET is not set # CONFIG_MMC is not set +# CONFIG_MEMSTICK is not set CONFIG_NEW_LEDS=y CONFIG_LEDS_CLASS=y @@ -858,6 +961,8 @@ CONFIG_LEDS_COBALT_RAQ=y CONFIG_LEDS_TRIGGERS=y # CONFIG_LEDS_TRIGGER_TIMER is not set # CONFIG_LEDS_TRIGGER_HEARTBEAT is not set +# CONFIG_LEDS_TRIGGER_DEFAULT_ON is not set +# CONFIG_ACCESSIBILITY is not set # CONFIG_INFINIBAND is not set CONFIG_RTC_LIB=y CONFIG_RTC_CLASS=y @@ -882,9 +987,10 @@ CONFIG_RTC_INTF_DEV=y # Platform RTC drivers # CONFIG_RTC_DRV_CMOS=y +# CONFIG_RTC_DRV_DS1511 is not set # CONFIG_RTC_DRV_DS1553 is not set -# CONFIG_RTC_DRV_STK17TA8 is not set # CONFIG_RTC_DRV_DS1742 is not set +# CONFIG_RTC_DRV_STK17TA8 is not set # CONFIG_RTC_DRV_M48T86 is not set # CONFIG_RTC_DRV_M48T59 is not set # CONFIG_RTC_DRV_V3020 is not set @@ -892,23 +998,7 @@ CONFIG_RTC_DRV_CMOS=y # # on-CPU RTC drivers # - -# -# DMA Engine support -# -# CONFIG_DMA_ENGINE is not set - -# -# DMA Clients -# - -# -# DMA Devices -# - -# -# Userspace I/O -# +# CONFIG_DMADEVICES is not set # CONFIG_UIO is not set # @@ -923,22 +1013,22 @@ CONFIG_EXT3_FS=y CONFIG_EXT3_FS_XATTR=y CONFIG_EXT3_FS_POSIX_ACL=y CONFIG_EXT3_FS_SECURITY=y -# CONFIG_EXT4DEV_FS is not set +CONFIG_EXT4DEV_FS=y +CONFIG_EXT4DEV_FS_XATTR=y +CONFIG_EXT4DEV_FS_POSIX_ACL=y +CONFIG_EXT4DEV_FS_SECURITY=y CONFIG_JBD=y -# CONFIG_JBD_DEBUG is not set +CONFIG_JBD2=y CONFIG_FS_MBCACHE=y # CONFIG_REISERFS_FS is not set # CONFIG_JFS_FS is not set CONFIG_FS_POSIX_ACL=y # CONFIG_XFS_FS is not set -# CONFIG_GFS2_FS is not set # CONFIG_OCFS2_FS is not set -# CONFIG_MINIX_FS is not set -# CONFIG_ROMFS_FS is not set +CONFIG_DNOTIFY=y CONFIG_INOTIFY=y CONFIG_INOTIFY_USER=y # CONFIG_QUOTA is not set -CONFIG_DNOTIFY=y # CONFIG_AUTOFS_FS is not set # CONFIG_AUTOFS4_FS is not set # CONFIG_FUSE_FS is not set @@ -967,7 +1057,6 @@ CONFIG_SYSFS=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y # CONFIG_HUGETLB_PAGE is not set -CONFIG_RAMFS=y CONFIG_CONFIGFS_FS=y # @@ -983,32 +1072,28 @@ CONFIG_CONFIGFS_FS=y # CONFIG_JFFS2_FS is not set # CONFIG_CRAMFS is not set # CONFIG_VXFS_FS is not set +# CONFIG_MINIX_FS is not set # CONFIG_HPFS_FS is not set # CONFIG_QNX4FS_FS is not set +# CONFIG_ROMFS_FS is not set # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set - -# -# Network File Systems -# +CONFIG_NETWORK_FILESYSTEMS=y CONFIG_NFS_FS=y CONFIG_NFS_V3=y CONFIG_NFS_V3_ACL=y # CONFIG_NFS_V4 is not set -# CONFIG_NFS_DIRECTIO is not set CONFIG_NFSD=y CONFIG_NFSD_V2_ACL=y CONFIG_NFSD_V3=y CONFIG_NFSD_V3_ACL=y # CONFIG_NFSD_V4 is not set -CONFIG_NFSD_TCP=y CONFIG_LOCKD=y CONFIG_LOCKD_V4=y CONFIG_EXPORTFS=y CONFIG_NFS_ACL_SUPPORT=y CONFIG_NFS_COMMON=y CONFIG_SUNRPC=y -# CONFIG_SUNRPC_BIND34 is not set # CONFIG_RPCSEC_GSS_KRB5 is not set # CONFIG_RPCSEC_GSS_SPKM3 is not set # CONFIG_SMB_FS is not set @@ -1022,34 +1107,26 @@ CONFIG_SUNRPC=y # # CONFIG_PARTITION_ADVANCED is not set CONFIG_MSDOS_PARTITION=y - -# -# Native Language Support -# # CONFIG_NLS is not set - -# -# Distributed Lock Manager -# # CONFIG_DLM is not set # -# Profiling support -# -# CONFIG_PROFILING is not set - -# # Kernel hacking # CONFIG_TRACE_IRQFLAGS_SUPPORT=y # CONFIG_PRINTK_TIME is not set +CONFIG_ENABLE_WARN_DEPRECATED=y CONFIG_ENABLE_MUST_CHECK=y +CONFIG_FRAME_WARN=1024 # CONFIG_MAGIC_SYSRQ is not set # CONFIG_UNUSED_SYMBOLS is not set # CONFIG_DEBUG_FS is not set # CONFIG_HEADERS_CHECK is not set # CONFIG_DEBUG_KERNEL is not set -CONFIG_CROSSCOMPILE=y +# CONFIG_SLUB_DEBUG_ON is not set +# CONFIG_SLUB_STATS is not set +# CONFIG_DEBUG_MEMORY_INIT is not set +# CONFIG_SAMPLES is not set CONFIG_CMDLINE="" # @@ -1057,14 +1134,95 @@ CONFIG_CMDLINE="" # # CONFIG_KEYS is not set # CONFIG_SECURITY is not set -# CONFIG_CRYPTO is not set +# CONFIG_SECURITY_FILE_CAPABILITIES is not set +CONFIG_CRYPTO=y + +# +# Crypto core or helper +# +# CONFIG_CRYPTO_MANAGER is not set +# CONFIG_CRYPTO_GF128MUL is not set +# CONFIG_CRYPTO_NULL is not set +# CONFIG_CRYPTO_CRYPTD is not set +# CONFIG_CRYPTO_AUTHENC is not set +# CONFIG_CRYPTO_TEST is not set + +# +# Authenticated Encryption with Associated Data +# +# CONFIG_CRYPTO_CCM is not set +# CONFIG_CRYPTO_GCM is not set +# CONFIG_CRYPTO_SEQIV is not set + +# +# Block modes +# +# CONFIG_CRYPTO_CBC is not set +# CONFIG_CRYPTO_CTR is not set +# CONFIG_CRYPTO_CTS is not set +# CONFIG_CRYPTO_ECB is not set +# CONFIG_CRYPTO_LRW is not set +# CONFIG_CRYPTO_PCBC is not set +# CONFIG_CRYPTO_XTS is not set + +# +# Hash modes +# +# CONFIG_CRYPTO_HMAC is not set +# CONFIG_CRYPTO_XCBC is not set + +# +# Digest +# +# CONFIG_CRYPTO_CRC32C is not set +# CONFIG_CRYPTO_MD4 is not set +# CONFIG_CRYPTO_MD5 is not set +# CONFIG_CRYPTO_MICHAEL_MIC is not set +# CONFIG_CRYPTO_RMD128 is not set +# CONFIG_CRYPTO_RMD160 is not set +# CONFIG_CRYPTO_RMD256 is not set +# CONFIG_CRYPTO_RMD320 is not set +# CONFIG_CRYPTO_SHA1 is not set +# CONFIG_CRYPTO_SHA256 is not set +# CONFIG_CRYPTO_SHA512 is not set +# CONFIG_CRYPTO_TGR192 is not set +# CONFIG_CRYPTO_WP512 is not set + +# +# Ciphers +# +# CONFIG_CRYPTO_AES is not set +# CONFIG_CRYPTO_ANUBIS is not set +# CONFIG_CRYPTO_ARC4 is not set +# CONFIG_CRYPTO_BLOWFISH is not set +# CONFIG_CRYPTO_CAMELLIA is not set +# CONFIG_CRYPTO_CAST5 is not set +# CONFIG_CRYPTO_CAST6 is not set +# CONFIG_CRYPTO_DES is not set +# CONFIG_CRYPTO_FCRYPT is not set +# CONFIG_CRYPTO_KHAZAD is not set +# CONFIG_CRYPTO_SALSA20 is not set +# CONFIG_CRYPTO_SEED is not set +# CONFIG_CRYPTO_SERPENT is not set +# CONFIG_CRYPTO_TEA is not set +# CONFIG_CRYPTO_TWOFISH is not set + +# +# Compression +# +# CONFIG_CRYPTO_DEFLATE is not set +# CONFIG_CRYPTO_LZO is not set +CONFIG_CRYPTO_HW=y +# CONFIG_CRYPTO_DEV_HIFN_795X is not set # # Library routines # CONFIG_BITREVERSE=y +# CONFIG_GENERIC_FIND_FIRST_BIT is not set # CONFIG_CRC_CCITT is not set -# CONFIG_CRC16 is not set +CONFIG_CRC16=y +# CONFIG_CRC_T10DIF is not set # CONFIG_CRC_ITU_T is not set CONFIG_CRC32=y # CONFIG_CRC7 is not set diff --git a/arch/mips/configs/db1000_defconfig b/arch/mips/configs/db1000_defconfig index ebb8ad62b3a..a279165e3a7 100644 --- a/arch/mips/configs/db1000_defconfig +++ b/arch/mips/configs/db1000_defconfig @@ -1092,7 +1092,6 @@ CONFIG_ENABLE_MUST_CHECK=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_CROSSCOMPILE=y CONFIG_CMDLINE="" -CONFIG_SYS_SUPPORTS_KGDB=y # # Security options diff --git a/arch/mips/configs/db1100_defconfig b/arch/mips/configs/db1100_defconfig index ad4e5ef6559..8944d15caf1 100644 --- a/arch/mips/configs/db1100_defconfig +++ b/arch/mips/configs/db1100_defconfig @@ -1092,7 +1092,6 @@ CONFIG_ENABLE_MUST_CHECK=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_CROSSCOMPILE=y CONFIG_CMDLINE="" -CONFIG_SYS_SUPPORTS_KGDB=y # # Security options diff --git a/arch/mips/configs/db1200_defconfig b/arch/mips/configs/db1200_defconfig index d0dc2e83ad3..ab17973107f 100644 --- a/arch/mips/configs/db1200_defconfig +++ b/arch/mips/configs/db1200_defconfig @@ -1174,7 +1174,6 @@ CONFIG_ENABLE_MUST_CHECK=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_CROSSCOMPILE=y CONFIG_CMDLINE="mem=48M" -CONFIG_SYS_SUPPORTS_KGDB=y # # Security options diff --git a/arch/mips/configs/db1500_defconfig b/arch/mips/configs/db1500_defconfig index 9155082313c..b65803f1935 100644 --- a/arch/mips/configs/db1500_defconfig +++ b/arch/mips/configs/db1500_defconfig @@ -1392,7 +1392,6 @@ CONFIG_ENABLE_MUST_CHECK=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_CROSSCOMPILE=y CONFIG_CMDLINE="" -CONFIG_SYS_SUPPORTS_KGDB=y # # Security options diff --git a/arch/mips/configs/db1550_defconfig b/arch/mips/configs/db1550_defconfig index e4e324422cd..a190ac07740 100644 --- a/arch/mips/configs/db1550_defconfig +++ b/arch/mips/configs/db1550_defconfig @@ -1209,7 +1209,6 @@ CONFIG_ENABLE_MUST_CHECK=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_CROSSCOMPILE=y CONFIG_CMDLINE="" -CONFIG_SYS_SUPPORTS_KGDB=y # # Security options diff --git a/arch/mips/configs/excite_defconfig b/arch/mips/configs/excite_defconfig index 3572e80356d..4e465e94599 100644 --- a/arch/mips/configs/excite_defconfig +++ b/arch/mips/configs/excite_defconfig @@ -1269,7 +1269,6 @@ CONFIG_ENABLE_MUST_CHECK=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_CROSSCOMPILE=y CONFIG_CMDLINE="" -CONFIG_SYS_SUPPORTS_KGDB=y # # Security options diff --git a/arch/mips/configs/ip27_defconfig b/arch/mips/configs/ip27_defconfig index 138c575a015..831d3e5a1ea 100644 --- a/arch/mips/configs/ip27_defconfig +++ b/arch/mips/configs/ip27_defconfig @@ -943,7 +943,6 @@ CONFIG_ENABLE_MUST_CHECK=y # CONFIG_DEBUG_KERNEL is not set CONFIG_CROSSCOMPILE=y CONFIG_CMDLINE="" -CONFIG_SYS_SUPPORTS_KGDB=y # # Security options diff --git a/arch/mips/configs/msp71xx_defconfig b/arch/mips/configs/msp71xx_defconfig index 59d19472b16..dd13db4d0fb 100644 --- a/arch/mips/configs/msp71xx_defconfig +++ b/arch/mips/configs/msp71xx_defconfig @@ -1415,8 +1415,6 @@ CONFIG_FORCED_INLINING=y CONFIG_CROSSCOMPILE=y CONFIG_CMDLINE="" # CONFIG_DEBUG_STACK_USAGE is not set -# CONFIG_KGDB is not set -CONFIG_SYS_SUPPORTS_KGDB=y # CONFIG_RUNTIME_DEBUG is not set # CONFIG_MIPS_UNCACHED is not set diff --git a/arch/mips/configs/mtx1_defconfig b/arch/mips/configs/mtx1_defconfig index bacf0dd0e34..db9272677aa 100644 --- a/arch/mips/configs/mtx1_defconfig +++ b/arch/mips/configs/mtx1_defconfig @@ -3020,7 +3020,6 @@ CONFIG_MAGIC_SYSRQ=y # CONFIG_DEBUG_KERNEL is not set CONFIG_CROSSCOMPILE=y CONFIG_CMDLINE="" -CONFIG_SYS_SUPPORTS_KGDB=y # # Security options diff --git a/arch/mips/configs/pb1100_defconfig b/arch/mips/configs/pb1100_defconfig index 6dfe6f793ce..9e21e333a2f 100644 --- a/arch/mips/configs/pb1100_defconfig +++ b/arch/mips/configs/pb1100_defconfig @@ -1085,7 +1085,6 @@ CONFIG_ENABLE_MUST_CHECK=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_CROSSCOMPILE=y CONFIG_CMDLINE="" -CONFIG_SYS_SUPPORTS_KGDB=y # # Security options diff --git a/arch/mips/configs/pb1500_defconfig b/arch/mips/configs/pb1500_defconfig index c965a87e6a9..af67ed4f71a 100644 --- a/arch/mips/configs/pb1500_defconfig +++ b/arch/mips/configs/pb1500_defconfig @@ -1202,7 +1202,6 @@ CONFIG_ENABLE_MUST_CHECK=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_CROSSCOMPILE=y CONFIG_CMDLINE="" -CONFIG_SYS_SUPPORTS_KGDB=y # # Security options diff --git a/arch/mips/configs/pb1550_defconfig b/arch/mips/configs/pb1550_defconfig index 0778996c682..7956f56cbf3 100644 --- a/arch/mips/configs/pb1550_defconfig +++ b/arch/mips/configs/pb1550_defconfig @@ -1195,7 +1195,6 @@ CONFIG_ENABLE_MUST_CHECK=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_CROSSCOMPILE=y CONFIG_CMDLINE="" -CONFIG_SYS_SUPPORTS_KGDB=y # # Security options diff --git a/arch/mips/configs/pnx8550-jbs_defconfig b/arch/mips/configs/pnx8550-jbs_defconfig index 37c7b5ffd47..723bd5176a3 100644 --- a/arch/mips/configs/pnx8550-jbs_defconfig +++ b/arch/mips/configs/pnx8550-jbs_defconfig @@ -1216,10 +1216,8 @@ CONFIG_DEBUG_MUTEXES=y CONFIG_FORCED_INLINING=y # CONFIG_RCU_TORTURE_TEST is not set CONFIG_CROSSCOMPILE=y -CONFIG_CMDLINE="console=ttyS1,38400n8 kgdb=ttyS0 root=/dev/nfs ip=bootp" +CONFIG_CMDLINE="console=ttyS1,38400n8 root=/dev/nfs ip=bootp" # CONFIG_DEBUG_STACK_USAGE is not set -# CONFIG_KGDB is not set -CONFIG_SYS_SUPPORTS_KGDB=y # CONFIG_RUNTIME_DEBUG is not set # diff --git a/arch/mips/configs/pnx8550-stb810_defconfig b/arch/mips/configs/pnx8550-stb810_defconfig index 893e5c4ab66..b5052fb42e9 100644 --- a/arch/mips/configs/pnx8550-stb810_defconfig +++ b/arch/mips/configs/pnx8550-stb810_defconfig @@ -1206,10 +1206,8 @@ CONFIG_DEBUG_SLAB=y CONFIG_FORCED_INLINING=y # CONFIG_RCU_TORTURE_TEST is not set CONFIG_CROSSCOMPILE=y -CONFIG_CMDLINE="console=ttyS1,38400n8 kgdb=ttyS0 root=/dev/nfs ip=bootp" +CONFIG_CMDLINE="console=ttyS1,38400n8 root=/dev/nfs ip=bootp" # CONFIG_DEBUG_STACK_USAGE is not set -# CONFIG_KGDB is not set -CONFIG_SYS_SUPPORTS_KGDB=y # CONFIG_RUNTIME_DEBUG is not set # diff --git a/arch/mips/configs/rbtx49xx_defconfig b/arch/mips/configs/rbtx49xx_defconfig index e42aed5a38b..c7c0864b8ce 100644 --- a/arch/mips/configs/rbtx49xx_defconfig +++ b/arch/mips/configs/rbtx49xx_defconfig @@ -742,7 +742,6 @@ CONFIG_DEBUG_FS=y # CONFIG_DEBUG_KERNEL is not set # CONFIG_SAMPLES is not set CONFIG_CMDLINE="" -CONFIG_SYS_SUPPORTS_KGDB=y # # Security options diff --git a/arch/mips/configs/sb1250-swarm_defconfig b/arch/mips/configs/sb1250-swarm_defconfig index 1ea97865f2c..a9acaa2f9da 100644 --- a/arch/mips/configs/sb1250-swarm_defconfig +++ b/arch/mips/configs/sb1250-swarm_defconfig @@ -963,7 +963,6 @@ CONFIG_ENABLE_MUST_CHECK=y # CONFIG_DEBUG_KERNEL is not set # CONFIG_SAMPLES is not set CONFIG_CMDLINE="" -CONFIG_SYS_SUPPORTS_KGDB=y # CONFIG_SB1XXX_CORELIS is not set # diff --git a/arch/mips/configs/yosemite_defconfig b/arch/mips/configs/yosemite_defconfig index 7f86c43d1bd..ea8249c75b3 100644 --- a/arch/mips/configs/yosemite_defconfig +++ b/arch/mips/configs/yosemite_defconfig @@ -827,8 +827,6 @@ CONFIG_FORCED_INLINING=y CONFIG_CROSSCOMPILE=y CONFIG_CMDLINE="" # CONFIG_DEBUG_STACK_USAGE is not set -# CONFIG_KGDB is not set -CONFIG_SYS_SUPPORTS_KGDB=y # CONFIG_RUNTIME_DEBUG is not set # diff --git a/arch/mips/emma2rh/markeins/platform.c b/arch/mips/emma2rh/markeins/platform.c index 11567702b15..d70627de7cf 100644 --- a/arch/mips/emma2rh/markeins/platform.c +++ b/arch/mips/emma2rh/markeins/platform.c @@ -34,7 +34,6 @@ #include <asm/bcache.h> #include <asm/irq.h> #include <asm/reboot.h> -#include <asm/gdb-stub.h> #include <asm/traps.h> #include <asm/debug.h> diff --git a/arch/mips/emma2rh/markeins/setup.c b/arch/mips/emma2rh/markeins/setup.c index 62bfb455d1b..a56c4b804b0 100644 --- a/arch/mips/emma2rh/markeins/setup.c +++ b/arch/mips/emma2rh/markeins/setup.c @@ -41,7 +41,6 @@ #include <asm/bcache.h> #include <asm/irq.h> #include <asm/reboot.h> -#include <asm/gdb-stub.h> #include <asm/traps.h> #include <asm/debug.h> diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile index 0fd31974ba2..706f9397479 100644 --- a/arch/mips/kernel/Makefile +++ b/arch/mips/kernel/Makefile @@ -71,7 +71,7 @@ obj-$(CONFIG_MIPS32_COMPAT) += linux32.o ptrace32.o signal32.o obj-$(CONFIG_MIPS32_N32) += binfmt_elfn32.o scall64-n32.o signal_n32.o obj-$(CONFIG_MIPS32_O32) += binfmt_elfo32.o scall64-o32.o -obj-$(CONFIG_KGDB) += gdb-low.o gdb-stub.o +obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_64BIT) += cpu-bugs64.o diff --git a/arch/mips/kernel/gdb-low.S b/arch/mips/kernel/gdb-low.S deleted file mode 100644 index 2c446063636..00000000000 --- a/arch/mips/kernel/gdb-low.S +++ /dev/null @@ -1,394 +0,0 @@ -/* - * gdb-low.S contains the low-level trap handler for the GDB stub. - * - * Copyright (C) 1995 Andreas Busse - */ -#include <linux/sys.h> - -#include <asm/asm.h> -#include <asm/errno.h> -#include <asm/irqflags.h> -#include <asm/mipsregs.h> -#include <asm/regdef.h> -#include <asm/stackframe.h> -#include <asm/gdb-stub.h> - -#ifdef CONFIG_32BIT -#define DMFC0 mfc0 -#define DMTC0 mtc0 -#define LDC1 lwc1 -#define SDC1 lwc1 -#endif -#ifdef CONFIG_64BIT -#define DMFC0 dmfc0 -#define DMTC0 dmtc0 -#define LDC1 ldc1 -#define SDC1 ldc1 -#endif - -/* - * [jsun] We reserves about 2x GDB_FR_SIZE in stack. The lower (addressed) - * part is used to store registers and passed to exception handler. - * The upper part is reserved for "call func" feature where gdb client - * saves some of the regs, setups call frame and passes args. - * - * A trace shows about 200 bytes are used to store about half of all regs. - * The rest should be big enough for frame setup and passing args. - */ - -/* - * The low level trap handler - */ - .align 5 - NESTED(trap_low, GDB_FR_SIZE, sp) - .set noat - .set noreorder - - mfc0 k0, CP0_STATUS - sll k0, 3 /* extract cu0 bit */ - bltz k0, 1f - move k1, sp - - /* - * Called from user mode, go somewhere else. - */ - mfc0 k0, CP0_CAUSE - andi k0, k0, 0x7c -#ifdef CONFIG_64BIT - dsll k0, k0, 1 -#endif - PTR_L k1, saved_vectors(k0) - jr k1 - nop -1: - move k0, sp - PTR_SUBU sp, k1, GDB_FR_SIZE*2 # see comment above - LONG_S k0, GDB_FR_REG29(sp) - LONG_S $2, GDB_FR_REG2(sp) - -/* - * First save the CP0 and special registers - */ - - mfc0 v0, CP0_STATUS - LONG_S v0, GDB_FR_STATUS(sp) - mfc0 v0, CP0_CAUSE - LONG_S v0, GDB_FR_CAUSE(sp) - DMFC0 v0, CP0_EPC - LONG_S v0, GDB_FR_EPC(sp) - DMFC0 v0, CP0_BADVADDR - LONG_S v0, GDB_FR_BADVADDR(sp) - mfhi v0 - LONG_S v0, GDB_FR_HI(sp) - mflo v0 - LONG_S v0, GDB_FR_LO(sp) - -/* - * Now the integer registers - */ - - LONG_S zero, GDB_FR_REG0(sp) /* I know... */ - LONG_S $1, GDB_FR_REG1(sp) - /* v0 already saved */ - LONG_S $3, GDB_FR_REG3(sp) - LONG_S $4, GDB_FR_REG4(sp) - LONG_S $5, GDB_FR_REG5(sp) - LONG_S $6, GDB_FR_REG6(sp) - LONG_S $7, GDB_FR_REG7(sp) - LONG_S $8, GDB_FR_REG8(sp) - LONG_S $9, GDB_FR_REG9(sp) - LONG_S $10, GDB_FR_REG10(sp) - LONG_S $11, GDB_FR_REG11(sp) - LONG_S $12, GDB_FR_REG12(sp) - LONG_S $13, GDB_FR_REG13(sp) - LONG_S $14, GDB_FR_REG14(sp) - LONG_S $15, GDB_FR_REG15(sp) - LONG_S $16, GDB_FR_REG16(sp) - LONG_S $17, GDB_FR_REG17(sp) - LONG_S $18, GDB_FR_REG18(sp) - LONG_S $19, GDB_FR_REG19(sp) - LONG_S $20, GDB_FR_REG20(sp) - LONG_S $21, GDB_FR_REG21(sp) - LONG_S $22, GDB_FR_REG22(sp) - LONG_S $23, GDB_FR_REG23(sp) - LONG_S $24, GDB_FR_REG24(sp) - LONG_S $25, GDB_FR_REG25(sp) - LONG_S $26, GDB_FR_REG26(sp) - LONG_S $27, GDB_FR_REG27(sp) - LONG_S $28, GDB_FR_REG28(sp) - /* sp already saved */ - LONG_S $30, GDB_FR_REG30(sp) - LONG_S $31, GDB_FR_REG31(sp) - - CLI /* disable interrupts */ - TRACE_IRQS_OFF - -/* - * Followed by the floating point registers - */ - mfc0 v0, CP0_STATUS /* FPU enabled? */ - srl v0, v0, 16 - andi v0, v0, (ST0_CU1 >> 16) - - beqz v0,2f /* disabled, skip */ - nop - - SDC1 $0, GDB_FR_FPR0(sp) - SDC1 $1, GDB_FR_FPR1(sp) - SDC1 $2, GDB_FR_FPR2(sp) - SDC1 $3, GDB_FR_FPR3(sp) - SDC1 $4, GDB_FR_FPR4(sp) - SDC1 $5, GDB_FR_FPR5(sp) - SDC1 $6, GDB_FR_FPR6(sp) - SDC1 $7, GDB_FR_FPR7(sp) - SDC1 $8, GDB_FR_FPR8(sp) - SDC1 $9, GDB_FR_FPR9(sp) - SDC1 $10, GDB_FR_FPR10(sp) - SDC1 $11, GDB_FR_FPR11(sp) - SDC1 $12, GDB_FR_FPR12(sp) - SDC1 $13, GDB_FR_FPR13(sp) - SDC1 $14, GDB_FR_FPR14(sp) - SDC1 $15, GDB_FR_FPR15(sp) - SDC1 $16, GDB_FR_FPR16(sp) - SDC1 $17, GDB_FR_FPR17(sp) - SDC1 $18, GDB_FR_FPR18(sp) - SDC1 $19, GDB_FR_FPR19(sp) - SDC1 $20, GDB_FR_FPR20(sp) - SDC1 $21, GDB_FR_FPR21(sp) - SDC1 $22, GDB_FR_FPR22(sp) - SDC1 $23, GDB_FR_FPR23(sp) - SDC1 $24, GDB_FR_FPR24(sp) - SDC1 $25, GDB_FR_FPR25(sp) - SDC1 $26, GDB_FR_FPR26(sp) - SDC1 $27, GDB_FR_FPR27(sp) - SDC1 $28, GDB_FR_FPR28(sp) - SDC1 $29, GDB_FR_FPR29(sp) - SDC1 $30, GDB_FR_FPR30(sp) - SDC1 $31, GDB_FR_FPR31(sp) - -/* - * FPU control registers - */ - - cfc1 v0, CP1_STATUS - LONG_S v0, GDB_FR_FSR(sp) - cfc1 v0, CP1_REVISION - LONG_S v0, GDB_FR_FIR(sp) - -/* - * Current stack frame ptr - */ - -2: - LONG_S sp, GDB_FR_FRP(sp) - -/* - * CP0 registers (R4000/R4400 unused registers skipped) - */ - - mfc0 v0, CP0_INDEX - LONG_S v0, GDB_FR_CP0_INDEX(sp) - mfc0 v0, CP0_RANDOM - LONG_S v0, GDB_FR_CP0_RANDOM(sp) - DMFC0 v0, CP0_ENTRYLO0 - LONG_S v0, GDB_FR_CP0_ENTRYLO0(sp) - DMFC0 v0, CP0_ENTRYLO1 - LONG_S v0, GDB_FR_CP0_ENTRYLO1(sp) - DMFC0 v0, CP0_CONTEXT - LONG_S v0, GDB_FR_CP0_CONTEXT(sp) - mfc0 v0, CP0_PAGEMASK - LONG_S v0, GDB_FR_CP0_PAGEMASK(sp) - mfc0 v0, CP0_WIRED - LONG_S v0, GDB_FR_CP0_WIRED(sp) - DMFC0 v0, CP0_ENTRYHI - LONG_S v0, GDB_FR_CP0_ENTRYHI(sp) - mfc0 v0, CP0_PRID - LONG_S v0, GDB_FR_CP0_PRID(sp) - - .set at - -/* - * Continue with the higher level handler - */ - - move a0,sp - - jal handle_exception - nop - -/* - * Restore all writable registers, in reverse order - */ - - .set noat - - LONG_L v0, GDB_FR_CP0_ENTRYHI(sp) - LONG_L v1, GDB_FR_CP0_WIRED(sp) - DMTC0 v0, CP0_ENTRYHI - mtc0 v1, CP0_WIRED - LONG_L v0, GDB_FR_CP0_PAGEMASK(sp) - LONG_L v1, GDB_FR_CP0_ENTRYLO1(sp) - mtc0 v0, CP0_PAGEMASK - DMTC0 v1, CP0_ENTRYLO1 - LONG_L v0, GDB_FR_CP0_ENTRYLO0(sp) - LONG_L v1, GDB_FR_CP0_INDEX(sp) - DMTC0 v0, CP0_ENTRYLO0 - LONG_L v0, GDB_FR_CP0_CONTEXT(sp) - mtc0 v1, CP0_INDEX - DMTC0 v0, CP0_CONTEXT - - -/* - * Next, the floating point registers - */ - mfc0 v0, CP0_STATUS /* check if the FPU is enabled */ - srl v0, v0, 16 - andi v0, v0, (ST0_CU1 >> 16) - - beqz v0, 3f /* disabled, skip */ - nop - - LDC1 $31, GDB_FR_FPR31(sp) - LDC1 $30, GDB_FR_FPR30(sp) - LDC1 $29, GDB_FR_FPR29(sp) - LDC1 $28, GDB_FR_FPR28(sp) - LDC1 $27, GDB_FR_FPR27(sp) - LDC1 $26, GDB_FR_FPR26(sp) - LDC1 $25, GDB_FR_FPR25(sp) - LDC1 $24, GDB_FR_FPR24(sp) - LDC1 $23, GDB_FR_FPR23(sp) - LDC1 $22, GDB_FR_FPR22(sp) - LDC1 $21, GDB_FR_FPR21(sp) - LDC1 $20, GDB_FR_FPR20(sp) - LDC1 $19, GDB_FR_FPR19(sp) - LDC1 $18, GDB_FR_FPR18(sp) - LDC1 $17, GDB_FR_FPR17(sp) - LDC1 $16, GDB_FR_FPR16(sp) - LDC1 $15, GDB_FR_FPR15(sp) - LDC1 $14, GDB_FR_FPR14(sp) - LDC1 $13, GDB_FR_FPR13(sp) - LDC1 $12, GDB_FR_FPR12(sp) - LDC1 $11, GDB_FR_FPR11(sp) - LDC1 $10, GDB_FR_FPR10(sp) - LDC1 $9, GDB_FR_FPR9(sp) - LDC1 $8, GDB_FR_FPR8(sp) - LDC1 $7, GDB_FR_FPR7(sp) - LDC1 $6, GDB_FR_FPR6(sp) - LDC1 $5, GDB_FR_FPR5(sp) - LDC1 $4, GDB_FR_FPR4(sp) - LDC1 $3, GDB_FR_FPR3(sp) - LDC1 $2, GDB_FR_FPR2(sp) - LDC1 $1, GDB_FR_FPR1(sp) - LDC1 $0, GDB_FR_FPR0(sp) - -/* - * Now the CP0 and integer registers - */ - -3: -#ifdef CONFIG_MIPS_MT_SMTC - /* Read-modify write of Status must be atomic */ - mfc0 t2, CP0_TCSTATUS - ori t1, t2, TCSTATUS_IXMT - mtc0 t1, CP0_TCSTATUS - andi t2, t2, TCSTATUS_IXMT - _ehb - DMT 9 # dmt t1 - jal mips_ihb - nop -#endif /* CONFIG_MIPS_MT_SMTC */ - mfc0 t0, CP0_STATUS - ori t0, 0x1f - xori t0, 0x1f - mtc0 t0, CP0_STATUS -#ifdef CONFIG_MIPS_MT_SMTC - andi t1, t1, VPECONTROL_TE - beqz t1, 9f - nop - EMT # emt -9: - mfc0 t1, CP0_TCSTATUS - xori t1, t1, TCSTATUS_IXMT - or t1, t1, t2 - mtc0 t1, CP0_TCSTATUS - _ehb -#endif /* CONFIG_MIPS_MT_SMTC */ - LONG_L v0, GDB_FR_STATUS(sp) - LONG_L v1, GDB_FR_EPC(sp) - mtc0 v0, CP0_STATUS - DMTC0 v1, CP0_EPC - LONG_L v0, GDB_FR_HI(sp) - LONG_L v1, GDB_FR_LO(sp) - mthi v0 - mtlo v1 - LONG_L $31, GDB_FR_REG31(sp) - LONG_L $30, GDB_FR_REG30(sp) - LONG_L $28, GDB_FR_REG28(sp) - LONG_L $27, GDB_FR_REG27(sp) - LONG_L $26, GDB_FR_REG26(sp) - LONG_L $25, GDB_FR_REG25(sp) - LONG_L $24, GDB_FR_REG24(sp) - LONG_L $23, GDB_FR_REG23(sp) - LONG_L $22, GDB_FR_REG22(sp) - LONG_L $21, GDB_FR_REG21(sp) - LONG_L $20, GDB_FR_REG20(sp) - LONG_L $19, GDB_FR_REG19(sp) - LONG_L $18, GDB_FR_REG18(sp) - LONG_L $17, GDB_FR_REG17(sp) - LONG_L $16, GDB_FR_REG16(sp) - LONG_L $15, GDB_FR_REG15(sp) - LONG_L $14, GDB_FR_REG14(sp) - LONG_L $13, GDB_FR_REG13(sp) - LONG_L $12, GDB_FR_REG12(sp) - LONG_L $11, GDB_FR_REG11(sp) - LONG_L $10, GDB_FR_REG10(sp) - LONG_L $9, GDB_FR_REG9(sp) - LONG_L $8, GDB_FR_REG8(sp) - LONG_L $7, GDB_FR_REG7(sp) - LONG_L $6, GDB_FR_REG6(sp) - LONG_L $5, GDB_FR_REG5(sp) - LONG_L $4, GDB_FR_REG4(sp) - LONG_L $3, GDB_FR_REG3(sp) - LONG_L $2, GDB_FR_REG2(sp) - LONG_L $1, GDB_FR_REG1(sp) -#if defined(CONFIG_CPU_R3000) || defined(CONFIG_CPU_TX39XX) - LONG_L k0, GDB_FR_EPC(sp) - LONG_L $29, GDB_FR_REG29(sp) /* Deallocate stack */ - jr k0 - rfe -#else - LONG_L sp, GDB_FR_REG29(sp) /* Deallocate stack */ - - .set mips3 - eret - .set mips0 -#endif - .set at - .set reorder - END(trap_low) - -LEAF(kgdb_read_byte) -4: lb t0, (a0) - sb t0, (a1) - li v0, 0 - jr ra - .section __ex_table,"a" - PTR 4b, kgdbfault - .previous - END(kgdb_read_byte) - -LEAF(kgdb_write_byte) -5: sb a0, (a1) - li v0, 0 - jr ra - .section __ex_table,"a" - PTR 5b, kgdbfault - .previous - END(kgdb_write_byte) - - .type kgdbfault@function - .ent kgdbfault - -kgdbfault: li v0, -EFAULT - jr ra - .end kgdbfault diff --git a/arch/mips/kernel/gdb-stub.c b/arch/mips/kernel/gdb-stub.c deleted file mode 100644 index 25f4eab8ea9..00000000000 --- a/arch/mips/kernel/gdb-stub.c +++ /dev/null @@ -1,1155 +0,0 @@ -/* - * arch/mips/kernel/gdb-stub.c - * - * Originally written by Glenn Engel, Lake Stevens Instrument Division - * - * Contributed by HP Systems - * - * Modified for SPARC by Stu Grossman, Cygnus Support. - * - * Modified for Linux/MIPS (and MIPS in general) by Andreas Busse - * Send complaints, suggestions etc. to <andy@waldorf-gmbh.de> - * - * Copyright (C) 1995 Andreas Busse - * - * Copyright (C) 2003 MontaVista Software Inc. - * Author: Jun Sun, jsun@mvista.com or jsun@junsun.net - */ - -/* - * To enable debugger support, two things need to happen. One, a - * call to set_debug_traps() is necessary in order to allow any breakpoints - * or error conditions to be properly intercepted and reported to gdb. - * Two, a breakpoint needs to be generated to begin communication. This - * is most easily accomplished by a call to breakpoint(). Breakpoint() - * simulates a breakpoint by executing a BREAK instruction. - * - * - * The following gdb commands are supported: - * - * command function Return value - * - * g return the value of the CPU registers hex data or ENN - * G set the value of the CPU registers OK or ENN - * - * mAA..AA,LLLL Read LLLL bytes at address AA..AA hex data or ENN - * MAA..AA,LLLL: Write LLLL bytes at address AA.AA OK or ENN - * - * c Resume at current address SNN ( signal NN) - * cAA..AA Continue at address AA..AA SNN - * - * s Step one instruction SNN - * sAA..AA Step one instruction from AA..AA SNN - * - * k kill - * - * ? What was the last sigval ? SNN (signal NN) - * - * bBB..BB Set baud rate to BB..BB OK or BNN, then sets - * baud rate - * - * All commands and responses are sent with a packet which includes a - * checksum. A packet consists of - * - * $<packet info>#<checksum>. - * - * where - * <packet info> :: <characters representing the command or response> - * <checksum> :: < two hex digits computed as modulo 256 sum of <packetinfo>> - * - * When a packet is received, it is first acknowledged with either '+' or '-'. - * '+' indicates a successful transfer. '-' indicates a failed transfer. - * - * Example: - * - * Host: Reply: - * $m0,10#2a +$00010203040506070809101112131415#42 - * - * - * ============== - * MORE EXAMPLES: - * ============== - * - * For reference -- the following are the steps that one - * company took (RidgeRun Inc) to get remote gdb debugging - * going. In this scenario the host machine was a PC and the - * target platform was a Galileo EVB64120A MIPS evaluation - * board. - * - * Step 1: - * First download gdb-5.0.tar.gz from the internet. - * and then build/install the package. - * - * Example: - * $ tar zxf gdb-5.0.tar.gz - * $ cd gdb-5.0 - * $ ./configure --target=mips-linux-elf - * $ make - * $ install - * $ which mips-linux-elf-gdb - * /usr/local/bin/mips-linux-elf-gdb - * - * Step 2: - * Configure linux for remote debugging and build it. - * - * Example: - * $ cd ~/linux - * $ make menuconfig <go to "Kernel Hacking" and turn on remote debugging> - * $ make - * - * Step 3: - * Download the kernel to the remote target and start - * the kernel running. It will promptly halt and wait - * for the host gdb session to connect. It does this - * since the "Kernel Hacking" option has defined - * CONFIG_KGDB which in turn enables your calls - * to: - * set_debug_traps(); - * breakpoint(); - * - * Step 4: - * Start the gdb session on the host. - * - * Example: - * $ mips-linux-elf-gdb vmlinux - * (gdb) set remotebaud 115200 - * (gdb) target remote /dev/ttyS1 - * ...at this point you are connected to - * the remote target and can use gdb - * in the normal fasion. Setting - * breakpoints, single stepping, - * printing variables, etc. - */ -#include <linux/string.h> -#include <linux/kernel.h> -#include <linux/signal.h> -#include <linux/sched.h> -#include <linux/mm.h> -#include <linux/console.h> -#include <linux/init.h> -#include <linux/smp.h> -#include <linux/spinlock.h> -#include <linux/slab.h> -#include <linux/reboot.h> - -#include <asm/asm.h> -#include <asm/cacheflush.h> -#include <asm/mipsregs.h> -#include <asm/pgtable.h> -#include <asm/system.h> -#include <asm/gdb-stub.h> -#include <asm/inst.h> - -/* - * external low-level support routines - */ - -extern int putDebugChar(char c); /* write a single character */ -extern char getDebugChar(void); /* read and return a single char */ -extern void trap_low(void); - -/* - * breakpoint and test functions - */ -extern void breakpoint(void); -extern void breakinst(void); -extern void async_breakpoint(void); -extern void async_breakinst(void); -extern void adel(void); - -/* - * local prototypes - */ - -static void getpacket(char *buffer); -static void putpacket(char *buffer); -static int computeSignal(int tt); -static int hex(unsigned char ch); -static int hexToInt(char **ptr, int *intValue); -static int hexToLong(char **ptr, long *longValue); -static unsigned char *mem2hex(char *mem, char *buf, int count, int may_fault); -void handle_exception(struct gdb_regs *regs); - -int kgdb_enabled; - -/* - * spin locks for smp case - */ -static DEFINE_SPINLOCK(kgdb_lock); -static raw_spinlock_t kgdb_cpulock[NR_CPUS] = { - [0 ... NR_CPUS-1] = __RAW_SPIN_LOCK_UNLOCKED, -}; - -/* - * BUFMAX defines the maximum number of characters in inbound/outbound buffers - * at least NUMREGBYTES*2 are needed for register packets - */ -#define BUFMAX 2048 - -static char input_buffer[BUFMAX]; -static char output_buffer[BUFMAX]; -static int initialized; /* !0 means we've been initialized */ -static int kgdb_started; -static const char hexchars[]="0123456789abcdef"; - -/* Used to prevent crashes in memory access. Note that they'll crash anyway if - we haven't set up fault handlers yet... */ -int kgdb_read_byte(unsigned char *address, unsigned char *dest); -int kgdb_write_byte(unsigned char val, unsigned char *dest); - -/* - * Convert ch from a hex digit to an int - */ -static int hex(unsigned char ch) -{ - if (ch >= 'a' && ch <= 'f') - return ch-'a'+10; - if (ch >= '0' && ch <= '9') - return ch-'0'; - if (ch >= 'A' && ch <= 'F') - return ch-'A'+10; - return -1; -} - -/* - * scan for the sequence $<data>#<checksum> - */ -static void getpacket(char *buffer) -{ - unsigned char checksum; - unsigned char xmitcsum; - int i; - int count; - unsigned char ch; - - do { - /* - * wait around for the start character, - * ignore all other characters - */ - while ((ch = (getDebugChar() & 0x7f)) != '$') ; - - checksum = 0; - xmitcsum = -1; - count = 0; - - /* - * now, read until a # or end of buffer is found - */ - while (count < BUFMAX) { - ch = getDebugChar(); - if (ch == '#') - break; - checksum = checksum + ch; - buffer[count] = ch; - count = count + 1; - } - - if (count >= BUFMAX) - continue; - - buffer[count] = 0; - - if (ch == '#') { - xmitcsum = hex(getDebugChar() & 0x7f) << 4; - xmitcsum |= hex(getDebugChar() & 0x7f); - - if (checksum != xmitcsum) - putDebugChar('-'); /* failed checksum */ - else { - putDebugChar('+'); /* successful transfer */ - - /* - * if a sequence char is present, - * reply the sequence ID - */ - if (buffer[2] == ':') { - putDebugChar(buffer[0]); - putDebugChar(buffer[1]); - - /* - * remove sequence chars from buffer - */ - count = strlen(buffer); - for (i=3; i <= count; i++) - buffer[i-3] = buffer[i]; - } - } - } - } - while (checksum != xmitcsum); -} - -/* - * send the packet in buffer. - */ -static void putpacket(char *buffer) -{ - unsigned char checksum; - int count; - unsigned char ch; - - /* - * $<packet info>#<checksum>. - */ - - do { - putDebugChar('$'); - checksum = 0; - count = 0; - - while ((ch = buffer[count]) != 0) { - if (!(putDebugChar(ch))) - return; - checksum += ch; - count += 1; - } - - putDebugChar('#'); - putDebugChar(hexchars[checksum >> 4]); - putDebugChar(hexchars[checksum & 0xf]); - - } - while ((getDebugChar() & 0x7f) != '+'); -} - - -/* - * Convert the memory pointed to by mem into hex, placing result in buf. - * Return a pointer to the last char put in buf (null), in case of mem fault, - * return 0. - * may_fault is non-zero if we are reading from arbitrary memory, but is currently - * not used. - */ -static unsigned char *mem2hex(char *mem, char *buf, int count, int may_fault) -{ - unsigned char ch; - - while (count-- > 0) { - if (kgdb_read_byte(mem++, &ch) != 0) - return 0; - *buf++ = hexchars[ch >> 4]; - *buf++ = hexchars[ch & 0xf]; - } - - *buf = 0; - - return buf; -} - -/* - * convert the hex array pointed to by buf into binary to be placed in mem - * return a pointer to the character AFTER the last byte written - * may_fault is non-zero if we are reading from arbitrary memory, but is currently - * not used. - */ -static char *hex2mem(char *buf, char *mem, int count, int binary, int may_fault) -{ - int i; - unsigned char ch; - - for (i=0; i<count; i++) - { - if (binary) { - ch = *buf++; - if (ch == 0x7d) - ch = 0x20 ^ *buf++; - } - else { - ch = hex(*buf++) << 4; - ch |= hex(*buf++); - } - if (kgdb_write_byte(ch, mem++) != 0) - return 0; - } - - return mem; -} - -/* - * This table contains the mapping between SPARC hardware trap types, and - * signals, which are primarily what GDB understands. It also indicates - * which hardware traps we need to commandeer when initializing the stub. - */ -static struct hard_trap_info { - unsigned char tt; /* Trap type code for MIPS R3xxx and R4xxx */ - unsigned char signo; /* Signal that we map this trap into */ -} hard_trap_info[] = { - { 6, SIGBUS }, /* instruction bus error */ - { 7, SIGBUS }, /* data bus error */ - { 9, SIGTRAP }, /* break */ - { 10, SIGILL }, /* reserved instruction */ -/* { 11, SIGILL }, */ /* CPU unusable */ - { 12, SIGFPE }, /* overflow */ - { 13, SIGTRAP }, /* trap */ - { 14, SIGSEGV }, /* virtual instruction cache coherency */ - { 15, SIGFPE }, /* floating point exception */ - { 23, SIGSEGV }, /* watch */ - { 31, SIGSEGV }, /* virtual data cache coherency */ - { 0, 0} /* Must be last */ -}; - -/* Save the normal trap handlers for user-mode traps. */ -void *saved_vectors[32]; - -/* - * Set up exception handlers for tracing and breakpoints - */ -void set_debug_traps(void) -{ - struct hard_trap_info *ht; - unsigned long flags; - unsigned char c; - - local_irq_save(flags); - for (ht = hard_trap_info; ht->tt && ht->signo; ht++) - saved_vectors[ht->tt] = set_except_vector(ht->tt, trap_low); - - putDebugChar('+'); /* 'hello world' */ - /* - * In case GDB is started before us, ack any packets - * (presumably "$?#xx") sitting there. - */ - while((c = getDebugChar()) != '$'); - while((c = getDebugChar()) != '#'); - c = getDebugChar(); /* eat first csum byte */ - c = getDebugChar(); /* eat second csum byte */ - putDebugChar('+'); /* ack it */ - - initialized = 1; - local_irq_restore(flags); -} - -void restore_debug_traps(void) -{ - struct hard_trap_info *ht; - unsigned long flags; - - local_irq_save(flags); - for (ht = hard_trap_info; ht->tt && ht->signo; ht++) - set_except_vector(ht->tt, saved_vectors[ht->tt]); - local_irq_restore(flags); -} - -/* - * Convert the MIPS hardware trap type code to a Unix signal number. - */ -static int computeSignal(int tt) -{ - struct hard_trap_info *ht; - - for (ht = hard_trap_info; ht->tt && ht->signo; ht++) - if (ht->tt == tt) - return ht->signo; - - return SIGHUP; /* default for things we don't know about */ -} - -/* - * While we find nice hex chars, build an int. - * Return number of chars processed. - */ -static int hexToInt(char **ptr, int *intValue) -{ - int numChars = 0; - int hexValue; - - *intValue = 0; - - while (**ptr) { - hexValue = hex(**ptr); - if (hexValue < 0) - break; - - *intValue = (*intValue << 4) | hexValue; - numChars ++; - - (*ptr)++; - } - - return (numChars); -} - -static int hexToLong(char **ptr, long *longValue) -{ - int numChars = 0; - int hexValue; - - *longValue = 0; - - while (**ptr) { - hexValue = hex(**ptr); - if (hexValue < 0) - break; - - *longValue = (*longValue << 4) | hexValue; - numChars ++; - - (*ptr)++; - } - - return numChars; -} - - -#if 0 -/* - * Print registers (on target console) - * Used only to debug the stub... - */ -void show_gdbregs(struct gdb_regs * regs) -{ - /* - * Saved main processor registers - */ - printk("$0 : %08lx %08lx %08lx %08lx %08lx %08lx %08lx %08lx\n", - regs->reg0, regs->reg1, regs->reg2, regs->reg3, - regs->reg4, regs->reg5, regs->reg6, regs->reg7); - printk("$8 : %08lx %08lx %08lx %08lx %08lx %08lx %08lx %08lx\n", - regs->reg8, regs->reg9, regs->reg10, regs->reg11, - regs->reg12, regs->reg13, regs->reg14, regs->reg15); - printk("$16: %08lx %08lx %08lx %08lx %08lx %08lx %08lx %08lx\n", - regs->reg16, regs->reg17, regs->reg18, regs->reg19, - regs->reg20, regs->reg21, regs->reg22, regs->reg23); - printk("$24: %08lx %08lx %08lx %08lx %08lx %08lx %08lx %08lx\n", - regs->reg24, regs->reg25, regs->reg26, regs->reg27, - regs->reg28, regs->reg29, regs->reg30, regs->reg31); - - /* - * Saved cp0 registers - */ - printk("epc : %08lx\nStatus: %08lx\nCause : %08lx\n", - regs->cp0_epc, regs->cp0_status, regs->cp0_cause); -} -#endif /* dead code */ - -/* - * We single-step by setting breakpoints. When an exception - * is handled, we need to restore the instructions hoisted - * when the breakpoints were set. - * - * This is where we save the original instructions. - */ -static struct gdb_bp_save { - unsigned long addr; - unsigned int val; -} step_bp[2]; - -#define BP 0x0000000d /* break opcode */ - -/* - * Set breakpoint instructions for single stepping. - */ -static void single_step(struct gdb_regs *regs) -{ - union mips_instruction insn; - unsigned long targ; - int is_branch, is_cond, i; - - targ = regs->cp0_epc; - insn.word = *(unsigned int *)targ; - is_branch = is_cond = 0; - - switch (insn.i_format.opcode) { - /* - * jr and jalr are in r_format format. - */ - case spec_op: - switch (insn.r_format.func) { - case jalr_op: - case jr_op: - targ = *(®s->reg0 + insn.r_format.rs); - is_branch = 1; - break; - } - break; - - /* - * This group contains: - * bltz_op, bgez_op, bltzl_op, bgezl_op, - * bltzal_op, bgezal_op, bltzall_op, bgezall_op. - */ - case bcond_op: - is_branch = is_cond = 1; - targ += 4 + (insn.i_format.simmediate << 2); - break; - - /* - * These are unconditional and in j_format. - */ - case jal_op: - case j_op: - is_branch = 1; - targ += 4; - targ >>= 28; - targ <<= 28; - targ |= (insn.j_format.target << 2); - break; - - /* - * These are conditional. - */ - case beq_op: - case beql_op: - case bne_op: - case bnel_op: - case blez_op: - case blezl_op: - case bgtz_op: - case bgtzl_op: - case cop0_op: - case cop1_op: - case cop2_op: - case cop1x_op: - is_branch = is_cond = 1; - targ += 4 + (insn.i_format.simmediate << 2); - break; - } - - if (is_branch) { - i = 0; - if (is_cond && targ != (regs->cp0_epc + 8)) { - step_bp[i].addr = regs->cp0_epc + 8; - step_bp[i++].val = *(unsigned *)(regs->cp0_epc + 8); - *(unsigned *)(regs->cp0_epc + 8) = BP; - } - step_bp[i].addr = targ; - step_bp[i].val = *(unsigned *)targ; - *(unsigned *)targ = BP; - } else { - step_bp[0].addr = regs->cp0_epc + 4; - step_bp[0].val = *(unsigned *)(regs->cp0_epc + 4); - *(unsigned *)(regs->cp0_epc + 4) = BP; - } -} - -/* - * If asynchronously interrupted by gdb, then we need to set a breakpoint - * at the interrupted instruction so that we wind up stopped with a - * reasonable stack frame. - */ -static struct gdb_bp_save async_bp; - -/* - * Swap the interrupted EPC with our asynchronous breakpoint routine. - * This is safer than stuffing the breakpoint in-place, since no cache - * flushes (or resulting smp_call_functions) are required. The - * assumption is that only one CPU will be handling asynchronous bp's, - * and only one can be active at a time. - */ -extern spinlock_t smp_call_lock; - -void set_async_breakpoint(unsigned long *epc) -{ - /* skip breaking into userland */ - if ((*epc & 0x80000000) == 0) - return; - -#ifdef CONFIG_SMP - /* avoid deadlock if someone is make IPC */ - if (spin_is_locked(&smp_call_lock)) - return; -#endif - - async_bp.addr = *epc; - *epc = (unsigned long)async_breakpoint; -} - -#ifdef CONFIG_SMP -static void kgdb_wait(void *arg) -{ - unsigned flags; - int cpu = smp_processor_id(); - - local_irq_save(flags); - - __raw_spin_lock(&kgdb_cpulock[cpu]); - __raw_spin_unlock(&kgdb_cpulock[cpu]); - - local_irq_restore(flags); -} -#endif - -/* - * GDB stub needs to call kgdb_wait on all processor with interrupts - * disabled, so it uses it's own special variant. - */ -static int kgdb_smp_call_kgdb_wait(void) -{ -#ifdef CONFIG_SMP - cpumask_t mask = cpu_online_map; - struct call_data_struct data; - int cpu = smp_processor_id(); - int cpus; - - /* - * Can die spectacularly if this CPU isn't yet marked online - */ - BUG_ON(!cpu_online(cpu)); - - cpu_clear(cpu, mask); - cpus = cpus_weight(mask); - if (!cpus) - return 0; - - if (spin_is_locked(&smp_call_lock)) { - /* - * Some other processor is trying to make us do something - * but we're not going to respond... give up - */ - return -1; - } - - /* - * We will continue here, accepting the fact that - * the kernel may deadlock if another CPU attempts - * to call smp_call_function now... - */ - - data.func = kgdb_wait; - data.info = NULL; - atomic_set(&data.started, 0); - data.wait = 0; - - spin_lock(&smp_call_lock); - call_data = &data; - mb(); - - core_send_ipi_mask(mask, SMP_CALL_FUNCTION); - - /* Wait for response */ - /* FIXME: lock-up detection, backtrace on lock-up */ - while (atomic_read(&data.started) != cpus) - barrier(); - - call_data = NULL; - spin_unlock(&smp_call_lock); -#endif - - return 0; -} - -/* - * This function does all command processing for interfacing to gdb. It - * returns 1 if you should skip the instruction at the trap address, 0 - * otherwise. - */ -void handle_exception(struct gdb_regs *regs) -{ - int trap; /* Trap type */ - int sigval; - long addr; - int length; - char *ptr; - unsigned long *stack; - int i; - int bflag = 0; - - kgdb_started = 1; - - /* - * acquire the big kgdb spinlock - */ - if (!spin_trylock(&kgdb_lock)) { - /* - * some other CPU has the lock, we should go back to - * receive the gdb_wait IPC - */ - return; - } - - /* - * If we're in async_breakpoint(), restore the real EPC from - * the breakpoint. - */ - if (regs->cp0_epc == (unsigned long)async_breakinst) { - regs->cp0_epc = async_bp.addr; - async_bp.addr = 0; - } - - /* - * acquire the CPU spinlocks - */ - for_each_online_cpu(i) - if (__raw_spin_trylock(&kgdb_cpulock[i]) == 0) - panic("kgdb: couldn't get cpulock %d\n", i); - - /* - * force other cpus to enter kgdb - */ - kgdb_smp_call_kgdb_wait(); - - /* - * If we're in breakpoint() increment the PC - */ - trap = (regs->cp0_cause & 0x7c) >> 2; - if (trap == 9 && regs->cp0_epc == (unsigned long)breakinst) - regs->cp0_epc += 4; - - /* - * If we were single_stepping, restore the opcodes hoisted - * for the breakpoint[s]. - */ - if (step_bp[0].addr) { - *(unsigned *)step_bp[0].addr = step_bp[0].val; - step_bp[0].addr = 0; - - if (step_bp[1].addr) { - *(unsigned *)step_bp[1].addr = step_bp[1].val; - step_bp[1].addr = 0; - } - } - - stack = (long *)regs->reg29; /* stack ptr */ - sigval = computeSignal(trap); - - /* - * reply to host that an exception has occurred - */ - ptr = output_buffer; - - /* - * Send trap type (converted to signal) - */ - *ptr++ = 'T'; - *ptr++ = hexchars[sigval >> 4]; - *ptr++ = hexchars[sigval & 0xf]; - - /* - * Send Error PC - */ - *ptr++ = hexchars[REG_EPC >> 4]; - *ptr++ = hexchars[REG_EPC & 0xf]; - *ptr++ = ':'; - ptr = mem2hex((char *)®s->cp0_epc, ptr, sizeof(long), 0); - *ptr++ = ';'; - - /* - * Send frame pointer - */ - *ptr++ = hexchars[REG_FP >> 4]; - *ptr++ = hexchars[REG_FP & 0xf]; - *ptr++ = ':'; - ptr = mem2hex((char *)®s->reg30, ptr, sizeof(long), 0); - *ptr++ = ';'; - - /* - * Send stack pointer - */ - *ptr++ = hexchars[REG_SP >> 4]; - *ptr++ = hexchars[REG_SP & 0xf]; - *ptr++ = ':'; - ptr = mem2hex((char *)®s->reg29, ptr, sizeof(long), 0); - *ptr++ = ';'; - - *ptr++ = 0; - putpacket(output_buffer); /* send it off... */ - - /* - * Wait for input from remote GDB - */ - while (1) { - output_buffer[0] = 0; - getpacket(input_buffer); - - switch (input_buffer[0]) - { - case '?': - output_buffer[0] = 'S'; - output_buffer[1] = hexchars[sigval >> 4]; - output_buffer[2] = hexchars[sigval & 0xf]; - output_buffer[3] = 0; - break; - - /* - * Detach debugger; let CPU run - */ - case 'D': - putpacket(output_buffer); - goto finish_kgdb; - break; - - case 'd': - /* toggle debug flag */ - break; - - /* - * Return the value of the CPU registers - */ - case 'g': - ptr = output_buffer; - ptr = mem2hex((char *)®s->reg0, ptr, 32*sizeof(long), 0); /* r0...r31 */ - ptr = mem2hex((char *)®s->cp0_status, ptr, 6*sizeof(long), 0); /* cp0 */ - ptr = mem2hex((char *)®s->fpr0, ptr, 32*sizeof(long), 0); /* f0...31 */ - ptr = mem2hex((char *)®s->cp1_fsr, ptr, 2*sizeof(long), 0); /* cp1 */ - ptr = mem2hex((char *)®s->frame_ptr, ptr, 2*sizeof(long), 0); /* frp */ - ptr = mem2hex((char *)®s->cp0_index, ptr, 16*sizeof(long), 0); /* cp0 */ - break; - - /* - * set the value of the CPU registers - return OK - */ - case 'G': - { - ptr = &input_buffer[1]; - hex2mem(ptr, (char *)®s->reg0, 32*sizeof(long), 0, 0); - ptr += 32*(2*sizeof(long)); - hex2mem(ptr, (char *)®s->cp0_status, 6*sizeof(long), 0, 0); - ptr += 6*(2*sizeof(long)); - hex2mem(ptr, (char *)®s->fpr0, 32*sizeof(long), 0, 0); - ptr += 32*(2*sizeof(long)); - hex2mem(ptr, (char *)®s->cp1_fsr, 2*sizeof(long), 0, 0); - ptr += 2*(2*sizeof(long)); - hex2mem(ptr, (char *)®s->frame_ptr, 2*sizeof(long), 0, 0); - ptr += 2*(2*sizeof(long)); - hex2mem(ptr, (char *)®s->cp0_index, 16*sizeof(long), 0, 0); - strcpy(output_buffer, "OK"); - } - break; - - /* - * mAA..AA,LLLL Read LLLL bytes at address AA..AA - */ - case 'm': - ptr = &input_buffer[1]; - - if (hexToLong(&ptr, &addr) - && *ptr++ == ',' - && hexToInt(&ptr, &length)) { - if (mem2hex((char *)addr, output_buffer, length, 1)) - break; - strcpy(output_buffer, "E03"); - } else - strcpy(output_buffer, "E01"); - break; - - /* - * XAA..AA,LLLL: Write LLLL escaped binary bytes at address AA.AA - */ - case 'X': - bflag = 1; - /* fall through */ - - /* - * MAA..AA,LLLL: Write LLLL bytes at address AA.AA return OK - */ - case 'M': - ptr = &input_buffer[1]; - - if (hexToLong(&ptr, &addr) - && *ptr++ == ',' - && hexToInt(&ptr, &length) - && *ptr++ == ':') { - if (hex2mem(ptr, (char *)addr, length, bflag, 1)) - strcpy(output_buffer, "OK"); - else - strcpy(output_buffer, "E03"); - } - else - strcpy(output_buffer, "E02"); - break; - - /* - * cAA..AA Continue at address AA..AA(optional) - */ - case 'c': - /* try to read optional parameter, pc unchanged if no parm */ - - ptr = &input_buffer[1]; - if (hexToLong(&ptr, &addr)) - regs->cp0_epc = addr; - - goto exit_kgdb_exception; - break; - - /* - * kill the program; let us try to restart the machine - * Reset the whole machine. - */ - case 'k': - case 'r': - machine_restart("kgdb restarts machine"); - break; - - /* - * Step to next instruction - */ - case 's': - /* - * There is no single step insn in the MIPS ISA, so we - * use breakpoints and continue, instead. - */ - single_step(regs); - goto exit_kgdb_exception; - /* NOTREACHED */ - break; - - /* - * Set baud rate (bBB) - * FIXME: Needs to be written - */ - case 'b': - { -#if 0 - int baudrate; - extern void set_timer_3(); - - ptr = &input_buffer[1]; - if (!hexToInt(&ptr, &baudrate)) - { - strcpy(output_buffer, "B01"); - break; - } - - /* Convert baud rate to uart clock divider */ - - switch (baudrate) - { - case 38400: - baudrate = 16; - break; - case 19200: - baudrate = 33; - break; - case 9600: - baudrate = 65; - break; - default: - baudrate = 0; - strcpy(output_buffer, "B02"); - goto x1; - } - - if (baudrate) { - putpacket("OK"); /* Ack before changing speed */ - set_timer_3(baudrate); /* Set it */ - } -#endif - } - break; - - } /* switch */ - - /* - * reply to the request - */ - - putpacket(output_buffer); - - } /* while */ - - return; - -finish_kgdb: - restore_debug_traps(); - -exit_kgdb_exception: - /* release locks so other CPUs can go */ - for_each_online_cpu(i) - __raw_spin_unlock(&kgdb_cpulock[i]); - spin_unlock(&kgdb_lock); - - __flush_cache_all(); - return; -} - -/* - * This function will generate a breakpoint exception. It is used at the - * beginning of a program to sync up with a debugger and can be used - * otherwise as a quick means to stop program execution and "break" into - * the debugger. - */ -void breakpoint(void) -{ - if (!initialized) - return; - - __asm__ __volatile__( - ".globl breakinst\n\t" - ".set\tnoreorder\n\t" - "nop\n" - "breakinst:\tbreak\n\t" - "nop\n\t" - ".set\treorder" - ); -} - -/* Nothing but the break; don't pollute any registers */ -void async_breakpoint(void) -{ - __asm__ __volatile__( - ".globl async_breakinst\n\t" - ".set\tnoreorder\n\t" - "nop\n" - "async_breakinst:\tbreak\n\t" - "nop\n\t" - ".set\treorder" - ); -} - -void adel(void) -{ - __asm__ __volatile__( - ".globl\tadel\n\t" - "lui\t$8,0x8000\n\t" - "lw\t$9,1($8)\n\t" - ); -} - -/* - * malloc is needed by gdb client in "call func()", even a private one - * will make gdb happy - */ -static void __used *malloc(size_t size) -{ - return kmalloc(size, GFP_ATOMIC); -} - -static void __used free(void *where) -{ - kfree(where); -} - -#ifdef CONFIG_GDB_CONSOLE - -void gdb_putsn(const char *str, int l) -{ - char outbuf[18]; - - if (!kgdb_started) - return; - - outbuf[0]='O'; - - while(l) { - int i = (l>8)?8:l; - mem2hex((char *)str, &outbuf[1], i, 0); - outbuf[(i*2)+1]=0; - putpacket(outbuf); - str += i; - l -= i; - } -} - -static void gdb_console_write(struct console *con, const char *s, unsigned n) -{ - gdb_putsn(s, n); -} - -static struct console gdb_console = { - .name = "gdb", - .write = gdb_console_write, - .flags = CON_PRINTBUFFER, - .index = -1 -}; - -static int __init register_gdb_console(void) -{ - register_console(&gdb_console); - - return 0; -} - -console_initcall(register_gdb_console); - -#endif diff --git a/arch/mips/kernel/irq.c b/arch/mips/kernel/irq.c index 6045b9a51a3..4b4007b3083 100644 --- a/arch/mips/kernel/irq.c +++ b/arch/mips/kernel/irq.c @@ -21,11 +21,16 @@ #include <linux/sched.h> #include <linux/seq_file.h> #include <linux/kallsyms.h> +#include <linux/kgdb.h> #include <asm/atomic.h> #include <asm/system.h> #include <asm/uaccess.h> +#ifdef CONFIG_KGDB +int kgdb_early_setup; +#endif + static unsigned long irq_map[NR_IRQS / BITS_PER_LONG]; int allocate_irqno(void) @@ -126,33 +131,22 @@ asmlinkage void spurious_interrupt(void) atomic_inc(&irq_err_count); } -#ifdef CONFIG_KGDB -extern void breakpoint(void); -extern void set_debug_traps(void); - -static int kgdb_flag = 1; -static int __init nokgdb(char *str) -{ - kgdb_flag = 0; - return 1; -} -__setup("nokgdb", nokgdb); -#endif - void __init init_IRQ(void) { int i; +#ifdef CONFIG_KGDB + if (kgdb_early_setup) + return; +#endif + for (i = 0; i < NR_IRQS; i++) set_irq_noprobe(i); arch_init_irq(); #ifdef CONFIG_KGDB - if (kgdb_flag) { - printk("Wait for gdb client connection ...\n"); - set_debug_traps(); - breakpoint(); - } + if (!kgdb_early_setup) + kgdb_early_setup = 1; #endif } diff --git a/arch/mips/kernel/kgdb.c b/arch/mips/kernel/kgdb.c new file mode 100644 index 00000000000..c5a8b2d21ca --- /dev/null +++ b/arch/mips/kernel/kgdb.c @@ -0,0 +1,281 @@ +/* + * Originally written by Glenn Engel, Lake Stevens Instrument Division + * + * Contributed by HP Systems + * + * Modified for Linux/MIPS (and MIPS in general) by Andreas Busse + * Send complaints, suggestions etc. to <andy@waldorf-gmbh.de> + * + * Copyright (C) 1995 Andreas Busse + * + * Copyright (C) 2003 MontaVista Software Inc. + * Author: Jun Sun, jsun@mvista.com or jsun@junsun.net + * + * Copyright (C) 2004-2005 MontaVista Software Inc. + * Author: Manish Lachwani, mlachwani@mvista.com or manish@koffee-break.com + * + * Copyright (C) 2007-2008 Wind River Systems, Inc. + * Author/Maintainer: Jason Wessel, jason.wessel@windriver.com + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program is licensed "as is" without any warranty of any + * kind, whether express or implied. + */ + +#include <linux/ptrace.h> /* for linux pt_regs struct */ +#include <linux/kgdb.h> +#include <linux/kdebug.h> +#include <linux/sched.h> +#include <asm/inst.h> +#include <asm/fpu.h> +#include <asm/cacheflush.h> +#include <asm/processor.h> +#include <asm/sigcontext.h> + +static struct hard_trap_info { + unsigned char tt; /* Trap type code for MIPS R3xxx and R4xxx */ + unsigned char signo; /* Signal that we map this trap into */ +} hard_trap_info[] = { + { 6, SIGBUS }, /* instruction bus error */ + { 7, SIGBUS }, /* data bus error */ + { 9, SIGTRAP }, /* break */ +/* { 11, SIGILL }, */ /* CPU unusable */ + { 12, SIGFPE }, /* overflow */ + { 13, SIGTRAP }, /* trap */ + { 14, SIGSEGV }, /* virtual instruction cache coherency */ + { 15, SIGFPE }, /* floating point exception */ + { 23, SIGSEGV }, /* watch */ + { 31, SIGSEGV }, /* virtual data cache coherency */ + { 0, 0} /* Must be last */ +}; + +void arch_kgdb_breakpoint(void) +{ + __asm__ __volatile__( + ".globl breakinst\n\t" + ".set\tnoreorder\n\t" + "nop\n" + "breakinst:\tbreak\n\t" + "nop\n\t" + ".set\treorder"); +} + +static void kgdb_call_nmi_hook(void *ignored) +{ + kgdb_nmicallback(raw_smp_processor_id(), (void *)0); +} + +void kgdb_roundup_cpus(unsigned long flags) +{ + local_irq_enable(); + smp_call_function(kgdb_call_nmi_hook, NULL, NULL); + local_irq_disable(); +} + +static int compute_signal(int tt) +{ + struct hard_trap_info *ht; + + for (ht = hard_trap_info; ht->tt && ht->signo; ht++) + if (ht->tt == tt) + return ht->signo; + + return SIGHUP; /* default for things we don't know about */ +} + +void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) +{ + int reg; + +#if (KGDB_GDB_REG_SIZE == 32) + u32 *ptr = (u32 *)gdb_regs; +#else + u64 *ptr = (u64 *)gdb_regs; +#endif + + for (reg = 0; reg < 32; reg++) + *(ptr++) = regs->regs[reg]; + + *(ptr++) = regs->cp0_status; + *(ptr++) = regs->lo; + *(ptr++) = regs->hi; + *(ptr++) = regs->cp0_badvaddr; + *(ptr++) = regs->cp0_cause; + *(ptr++) = regs->cp0_epc; + + /* FP REGS */ + if (!(current && (regs->cp0_status & ST0_CU1))) + return; + + save_fp(current); + for (reg = 0; reg < 32; reg++) + *(ptr++) = current->thread.fpu.fpr[reg]; +} + +void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs) +{ + int reg; + +#if (KGDB_GDB_REG_SIZE == 32) + const u32 *ptr = (u32 *)gdb_regs; +#else + const u64 *ptr = (u64 *)gdb_regs; +#endif + + for (reg = 0; reg < 32; reg++) + regs->regs[reg] = *(ptr++); + + regs->cp0_status = *(ptr++); + regs->lo = *(ptr++); + regs->hi = *(ptr++); + regs->cp0_badvaddr = *(ptr++); + regs->cp0_cause = *(ptr++); + regs->cp0_epc = *(ptr++); + + /* FP REGS from current */ + if (!(current && (regs->cp0_status & ST0_CU1))) + return; + + for (reg = 0; reg < 32; reg++) + current->thread.fpu.fpr[reg] = *(ptr++); + restore_fp(current); +} + +/* + * Similar to regs_to_gdb_regs() except that process is sleeping and so + * we may not be able to get all the info. + */ +void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) +{ + int reg; + struct thread_info *ti = task_thread_info(p); + unsigned long ksp = (unsigned long)ti + THREAD_SIZE - 32; + struct pt_regs *regs = (struct pt_regs *)ksp - 1; +#if (KGDB_GDB_REG_SIZE == 32) + u32 *ptr = (u32 *)gdb_regs; +#else + u64 *ptr = (u64 *)gdb_regs; +#endif + + for (reg = 0; reg < 16; reg++) + *(ptr++) = regs->regs[reg]; + + /* S0 - S7 */ + for (reg = 16; reg < 24; reg++) + *(ptr++) = regs->regs[reg]; + + for (reg = 24; reg < 28; reg++) + *(ptr++) = 0; + + /* GP, SP, FP, RA */ + for (reg = 28; reg < 32; reg++) + *(ptr++) = regs->regs[reg]; + + *(ptr++) = regs->cp0_status; + *(ptr++) = regs->lo; + *(ptr++) = regs->hi; + *(ptr++) = regs->cp0_badvaddr; + *(ptr++) = regs->cp0_cause; + *(ptr++) = regs->cp0_epc; +} + +/* + * Calls linux_debug_hook before the kernel dies. If KGDB is enabled, + * then try to fall into the debugger + */ +static int kgdb_mips_notify(struct notifier_block *self, unsigned long cmd, + void *ptr) +{ + struct die_args *args = (struct die_args *)ptr; + struct pt_regs *regs = args->regs; + int trap = (regs->cp0_cause & 0x7c) >> 2; + + if (fixup_exception(regs)) + return NOTIFY_DONE; + + /* Userpace events, ignore. */ + if (user_mode(regs)) + return NOTIFY_DONE; + + if (atomic_read(&kgdb_active) != -1) + kgdb_nmicallback(smp_processor_id(), regs); + + if (kgdb_handle_exception(trap, compute_signal(trap), 0, regs)) + return NOTIFY_DONE; + + if (atomic_read(&kgdb_setting_breakpoint)) + if ((trap == 9) && (regs->cp0_epc == (unsigned long)breakinst)) + regs->cp0_epc += 4; + + /* In SMP mode, __flush_cache_all does IPI */ + local_irq_enable(); + __flush_cache_all(); + + return NOTIFY_STOP; +} + +static struct notifier_block kgdb_notifier = { + .notifier_call = kgdb_mips_notify, +}; + +/* + * Handle the 's' and 'c' commands + */ +int kgdb_arch_handle_exception(int vector, int signo, int err_code, + char *remcom_in_buffer, char *remcom_out_buffer, + struct pt_regs *regs) +{ + char *ptr; + unsigned long address; + int cpu = smp_processor_id(); + + switch (remcom_in_buffer[0]) { + case 's': + case 'c': + /* handle the optional parameter */ + ptr = &remcom_in_buffer[1]; + if (kgdb_hex2long(&ptr, &address)) + regs->cp0_epc = address; + + atomic_set(&kgdb_cpu_doing_single_step, -1); + if (remcom_in_buffer[0] == 's') + if (kgdb_contthread) + atomic_set(&kgdb_cpu_doing_single_step, cpu); + + return 0; + } + + return -1; +} + +struct kgdb_arch arch_kgdb_ops; + +/* + * We use kgdb_early_setup so that functions we need to call now don't + * cause trouble when called again later. + */ +int kgdb_arch_init(void) +{ + union mips_instruction insn = { + .r_format = { + .opcode = spec_op, + .func = break_op, + } + }; + memcpy(arch_kgdb_ops.gdb_bpt_instr, insn.byte, BREAK_INSTR_SIZE); + + register_die_notifier(&kgdb_notifier); + + return 0; +} + +/* + * kgdb_arch_exit - Perform any architecture specific uninitalization. + * + * This function will handle the uninitalization of any architecture + * specific callbacks, for dynamic registration and unregistration. + */ +void kgdb_arch_exit(void) +{ + unregister_die_notifier(&kgdb_notifier); +} diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index b8ea4e9d0d8..426cced1e9d 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -23,6 +23,8 @@ #include <linux/bootmem.h> #include <linux/interrupt.h> #include <linux/ptrace.h> +#include <linux/kgdb.h> +#include <linux/kdebug.h> #include <asm/bootinfo.h> #include <asm/branch.h> @@ -425,6 +427,10 @@ asmlinkage void do_be(struct pt_regs *regs) printk(KERN_ALERT "%s bus error, epc == %0*lx, ra == %0*lx\n", data ? "Data" : "Instruction", field, regs->cp0_epc, field, regs->regs[31]); + if (notify_die(DIE_OOPS, "bus error", regs, SIGBUS, 0, 0) + == NOTIFY_STOP) + return; + die_if_kernel("Oops", regs); force_sig(SIGBUS, current); } @@ -623,6 +629,9 @@ asmlinkage void do_fpe(struct pt_regs *regs, unsigned long fcr31) { siginfo_t info; + if (notify_die(DIE_FP, "FP exception", regs, SIGFPE, 0, 0) + == NOTIFY_STOP) + return; die_if_kernel("FP exception in kernel code", regs); if (fcr31 & FPU_CSR_UNI_X) { @@ -682,6 +691,9 @@ static void do_trap_or_bp(struct pt_regs *regs, unsigned int code, siginfo_t info; char b[40]; + if (notify_die(DIE_TRAP, str, regs, code, 0, 0) == NOTIFY_STOP) + return; + /* * A short test says that IRIX 5.3 sends SIGTRAP for all trap * insns, even for trap and break codes that indicate arithmetic @@ -762,6 +774,10 @@ asmlinkage void do_ri(struct pt_regs *regs) unsigned int opcode = 0; int status = -1; + if (notify_die(DIE_RI, "RI Fault", regs, SIGSEGV, 0, 0) + == NOTIFY_STOP) + return; + die_if_kernel("Reserved instruction in kernel code", regs); if (unlikely(compute_return_epc(regs) < 0)) @@ -1537,6 +1553,11 @@ void __init trap_init(void) extern char except_vec4; unsigned long i; +#if defined(CONFIG_KGDB) + if (kgdb_early_setup) + return; /* Already done */ +#endif + if (cpu_has_veic || cpu_has_vint) ebase = (unsigned long) alloc_bootmem_low_pages(0x200 + VECTORSPACING*64); else diff --git a/arch/mips/mm/tlb-r3k.c b/arch/mips/mm/tlb-r3k.c index a782549ac80..f0cf46adb97 100644 --- a/arch/mips/mm/tlb-r3k.c +++ b/arch/mips/mm/tlb-r3k.c @@ -246,10 +246,6 @@ void __init add_wired_entry(unsigned long entrylo0, unsigned long entrylo1, old_pagemask = read_c0_pagemask(); w = read_c0_wired(); write_c0_wired(w + 1); - if (read_c0_wired() != w + 1) { - printk("[tlbwired] No WIRED reg?\n"); - return; - } write_c0_index(w << 8); write_c0_pagemask(pagemask); write_c0_entryhi(entryhi); diff --git a/arch/mips/mti-malta/Makefile b/arch/mips/mti-malta/Makefile index f8064446e81..3b7dd722c32 100644 --- a/arch/mips/mti-malta/Makefile +++ b/arch/mips/mti-malta/Makefile @@ -13,7 +13,6 @@ obj-y := malta-amon.o malta-cmdline.o \ obj-$(CONFIG_EARLY_PRINTK) += malta-console.o obj-$(CONFIG_PCI) += malta-pci.o -obj-$(CONFIG_KGDB) += malta-kgdb.o # FIXME FIXME FIXME obj-$(CONFIG_MIPS_MT_SMTC) += malta_smtc.o diff --git a/arch/mips/mti-malta/malta-init.c b/arch/mips/mti-malta/malta-init.c index c0653021a17..4832af25166 100644 --- a/arch/mips/mti-malta/malta-init.c +++ b/arch/mips/mti-malta/malta-init.c @@ -37,15 +37,6 @@ #include <asm/mips-boards/malta.h> -#ifdef CONFIG_KGDB -extern int rs_kgdb_hook(int, int); -extern int rs_putDebugChar(char); -extern char rs_getDebugChar(void); -extern int saa9730_kgdb_hook(int); -extern int saa9730_putDebugChar(char); -extern char saa9730_getDebugChar(void); -#endif - int prom_argc; int *_prom_argv, *_prom_envp; @@ -173,51 +164,6 @@ static void __init console_config(void) } #endif -#ifdef CONFIG_KGDB -void __init kgdb_config(void) -{ - extern int (*generic_putDebugChar)(char); - extern char (*generic_getDebugChar)(void); - char *argptr; - int line, speed; - - argptr = prom_getcmdline(); - if ((argptr = strstr(argptr, "kgdb=ttyS")) != NULL) { - argptr += strlen("kgdb=ttyS"); - if (*argptr != '0' && *argptr != '1') - printk("KGDB: Unknown serial line /dev/ttyS%c, " - "falling back to /dev/ttyS1\n", *argptr); - line = *argptr == '0' ? 0 : 1; - printk("KGDB: Using serial line /dev/ttyS%d for session\n", line); - - speed = 0; - if (*++argptr == ',') - { - int c; - while ((c = *++argptr) && ('0' <= c && c <= '9')) - speed = speed * 10 + c - '0'; - } - { - speed = rs_kgdb_hook(line, speed); - generic_putDebugChar = rs_putDebugChar; - generic_getDebugChar = rs_getDebugChar; - } - - pr_info("KGDB: Using serial line /dev/ttyS%d at %d for " - "session, please connect your debugger\n", - line ? 1 : 0, speed); - - { - char *s; - for (s = "Please connect GDB to this port\r\n"; *s; ) - generic_putDebugChar(*s++); - } - - /* Breakpoint is invoked after interrupts are initialised */ - } -} -#endif - static void __init mips_nmi_setup(void) { void *base; diff --git a/arch/mips/mti-malta/malta-kgdb.c b/arch/mips/mti-malta/malta-kgdb.c deleted file mode 100644 index 6a1854de457..00000000000 --- a/arch/mips/mti-malta/malta-kgdb.c +++ /dev/null @@ -1,133 +0,0 @@ -/* - * Carsten Langgaard, carstenl@mips.com - * Copyright (C) 2000 MIPS Technologies, Inc. All rights reserved. - * - * This program is free software; you can distribute it and/or modify it - * under the terms of the GNU General Public License (Version 2) as - * published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. - * - * This is the interface to the remote debugger stub. - */ -#include <linux/types.h> -#include <linux/serial.h> -#include <linux/serialP.h> -#include <linux/serial_reg.h> - -#include <asm/serial.h> -#include <asm/io.h> - -static struct serial_state rs_table[] = { - SERIAL_PORT_DFNS /* Defined in serial.h */ -}; - -static struct async_struct kdb_port_info = {0}; - -int (*generic_putDebugChar)(char); -char (*generic_getDebugChar)(void); - -static __inline__ unsigned int serial_in(struct async_struct *info, int offset) -{ - return inb(info->port + offset); -} - -static __inline__ void serial_out(struct async_struct *info, int offset, - int value) -{ - outb(value, info->port+offset); -} - -int rs_kgdb_hook(int tty_no, int speed) { - int t; - struct serial_state *ser = &rs_table[tty_no]; - - kdb_port_info.state = ser; - kdb_port_info.magic = SERIAL_MAGIC; - kdb_port_info.port = ser->port; - kdb_port_info.flags = ser->flags; - - /* - * Clear all interrupts - */ - serial_in(&kdb_port_info, UART_LSR); - serial_in(&kdb_port_info, UART_RX); - serial_in(&kdb_port_info, UART_IIR); - serial_in(&kdb_port_info, UART_MSR); - - /* - * Now, initialize the UART - */ - serial_out(&kdb_port_info, UART_LCR, UART_LCR_WLEN8); /* reset DLAB */ - if (kdb_port_info.flags & ASYNC_FOURPORT) { - kdb_port_info.MCR = UART_MCR_DTR | UART_MCR_RTS; - t = UART_MCR_DTR | UART_MCR_OUT1; - } else { - kdb_port_info.MCR - = UART_MCR_DTR | UART_MCR_RTS | UART_MCR_OUT2; - t = UART_MCR_DTR | UART_MCR_RTS; - } - - kdb_port_info.MCR = t; /* no interrupts, please */ - serial_out(&kdb_port_info, UART_MCR, kdb_port_info.MCR); - - /* - * and set the speed of the serial port - */ - if (speed == 0) - speed = 9600; - - t = kdb_port_info.state->baud_base / speed; - /* set DLAB */ - serial_out(&kdb_port_info, UART_LCR, UART_LCR_WLEN8 | UART_LCR_DLAB); - serial_out(&kdb_port_info, UART_DLL, t & 0xff);/* LS of divisor */ - serial_out(&kdb_port_info, UART_DLM, t >> 8); /* MS of divisor */ - /* reset DLAB */ - serial_out(&kdb_port_info, UART_LCR, UART_LCR_WLEN8); - - return speed; -} - -int putDebugChar(char c) -{ - return generic_putDebugChar(c); -} - -char getDebugChar(void) -{ - return generic_getDebugChar(); -} - -int rs_putDebugChar(char c) -{ - - if (!kdb_port_info.state) { /* need to init device first */ - return 0; - } - - while ((serial_in(&kdb_port_info, UART_LSR) & UART_LSR_THRE) == 0) - ; - - serial_out(&kdb_port_info, UART_TX, c); - - return 1; -} - -char rs_getDebugChar(void) -{ - if (!kdb_port_info.state) { /* need to init device first */ - return 0; - } - - while (!(serial_in(&kdb_port_info, UART_LSR) & 1)) - ; - - return serial_in(&kdb_port_info, UART_RX); -} diff --git a/arch/mips/mti-malta/malta-setup.c b/arch/mips/mti-malta/malta-setup.c index e7cad54936c..dc78b8983ee 100644 --- a/arch/mips/mti-malta/malta-setup.c +++ b/arch/mips/mti-malta/malta-setup.c @@ -199,10 +199,6 @@ void __init plat_mem_setup(void) */ enable_dma(4); -#ifdef CONFIG_KGDB - kgdb_config(); -#endif - #ifdef CONFIG_DMA_COHERENT if (mips_revision_sconid != MIPS_REVISION_SCON_BONITO) panic("Hardware DMA cache coherency not supported"); diff --git a/arch/mips/nxp/pnx8550/common/Makefile b/arch/mips/nxp/pnx8550/common/Makefile index 31cc1a5cec3..dd9e7b1f7fd 100644 --- a/arch/mips/nxp/pnx8550/common/Makefile +++ b/arch/mips/nxp/pnx8550/common/Makefile @@ -24,6 +24,5 @@ obj-y := setup.o prom.o int.o reset.o time.o proc.o platform.o obj-$(CONFIG_PCI) += pci.o -obj-$(CONFIG_KGDB) += gdb_hook.o EXTRA_CFLAGS += -Werror diff --git a/arch/mips/nxp/pnx8550/common/gdb_hook.c b/arch/mips/nxp/pnx8550/common/gdb_hook.c deleted file mode 100644 index ad4624f6d9b..00000000000 --- a/arch/mips/nxp/pnx8550/common/gdb_hook.c +++ /dev/null @@ -1,109 +0,0 @@ -/* - * Carsten Langgaard, carstenl@mips.com - * Copyright (C) 2000 MIPS Technologies, Inc. All rights reserved. - * - * ######################################################################## - * - * This program is free software; you can distribute it and/or modify it - * under the terms of the GNU General Public License (Version 2) as - * published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. - * - * ######################################################################## - * - * This is the interface to the remote debugger stub. - * - */ -#include <linux/types.h> -#include <linux/serial.h> -#include <linux/serialP.h> -#include <linux/serial_reg.h> -#include <linux/serial_ip3106.h> - -#include <asm/serial.h> -#include <asm/io.h> - -#include <uart.h> - -static struct serial_state rs_table[IP3106_NR_PORTS] = { -}; -static struct async_struct kdb_port_info = {0}; - -void rs_kgdb_hook(int tty_no) -{ - struct serial_state *ser = &rs_table[tty_no]; - - kdb_port_info.state = ser; - kdb_port_info.magic = SERIAL_MAGIC; - kdb_port_info.port = tty_no; - kdb_port_info.flags = ser->flags; - - /* - * Clear all interrupts - */ - /* Clear all the transmitter FIFO counters (pointer and status) */ - ip3106_lcr(UART_BASE, tty_no) |= IP3106_UART_LCR_TX_RST; - /* Clear all the receiver FIFO counters (pointer and status) */ - ip3106_lcr(UART_BASE, tty_no) |= IP3106_UART_LCR_RX_RST; - /* Clear all interrupts */ - ip3106_iclr(UART_BASE, tty_no) = IP3106_UART_INT_ALLRX | - IP3106_UART_INT_ALLTX; - - /* - * Now, initialize the UART - */ - ip3106_lcr(UART_BASE, tty_no) = IP3106_UART_LCR_8BIT; - ip3106_baud(UART_BASE, tty_no) = 5; // 38400 Baud -} - -int putDebugChar(char c) -{ - /* Wait until FIFO not full */ - while (((ip3106_fifo(UART_BASE, kdb_port_info.port) & IP3106_UART_FIFO_TXFIFO) >> 16) >= 16) - ; - /* Send one char */ - ip3106_fifo(UART_BASE, kdb_port_info.port) = c; - - return 1; -} - -char getDebugChar(void) -{ - char ch; - - /* Wait until there is a char in the FIFO */ - while (!((ip3106_fifo(UART_BASE, kdb_port_info.port) & - IP3106_UART_FIFO_RXFIFO) >> 8)) - ; - /* Read one char */ - ch = ip3106_fifo(UART_BASE, kdb_port_info.port) & - IP3106_UART_FIFO_RBRTHR; - /* Advance the RX FIFO read pointer */ - ip3106_lcr(UART_BASE, kdb_port_info.port) |= IP3106_UART_LCR_RX_NEXT; - return (ch); -} - -void rs_disable_debug_interrupts(void) -{ - ip3106_ien(UART_BASE, kdb_port_info.port) = 0; /* Disable all interrupts */ -} - -void rs_enable_debug_interrupts(void) -{ - /* Clear all the transmitter FIFO counters (pointer and status) */ - ip3106_lcr(UART_BASE, kdb_port_info.port) |= IP3106_UART_LCR_TX_RST; - /* Clear all the receiver FIFO counters (pointer and status) */ - ip3106_lcr(UART_BASE, kdb_port_info.port) |= IP3106_UART_LCR_RX_RST; - /* Clear all interrupts */ - ip3106_iclr(UART_BASE, kdb_port_info.port) = IP3106_UART_INT_ALLRX | - IP3106_UART_INT_ALLTX; - ip3106_ien(UART_BASE, kdb_port_info.port) = IP3106_UART_INT_ALLRX; /* Enable RX interrupts */ -} diff --git a/arch/mips/nxp/pnx8550/common/int.c b/arch/mips/nxp/pnx8550/common/int.c index aad03429a5e..f080f114a1b 100644 --- a/arch/mips/nxp/pnx8550/common/int.c +++ b/arch/mips/nxp/pnx8550/common/int.c @@ -34,7 +34,6 @@ #include <linux/module.h> #include <asm/io.h> -#include <asm/gdb-stub.h> #include <int.h> #include <uart.h> diff --git a/arch/mips/nxp/pnx8550/common/proc.c b/arch/mips/nxp/pnx8550/common/proc.c index 18b125e3b65..acf1fa88944 100644 --- a/arch/mips/nxp/pnx8550/common/proc.c +++ b/arch/mips/nxp/pnx8550/common/proc.c @@ -22,7 +22,6 @@ #include <linux/random.h> #include <asm/io.h> -#include <asm/gdb-stub.h> #include <int.h> #include <uart.h> diff --git a/arch/mips/nxp/pnx8550/common/setup.c b/arch/mips/nxp/pnx8550/common/setup.c index 92d764c9770..2aed50fef10 100644 --- a/arch/mips/nxp/pnx8550/common/setup.c +++ b/arch/mips/nxp/pnx8550/common/setup.c @@ -47,7 +47,6 @@ extern void pnx8550_machine_halt(void); extern void pnx8550_machine_power_off(void); extern struct resource ioport_resource; extern struct resource iomem_resource; -extern void rs_kgdb_hook(int tty_no); extern char *prom_getcmdline(void); struct resource standard_io_resources[] = { @@ -142,16 +141,5 @@ void __init plat_mem_setup(void) ip3106_baud(UART_BASE, pnx8550_console_port) = 5; } -#ifdef CONFIG_KGDB - argptr = prom_getcmdline(); - if ((argptr = strstr(argptr, "kgdb=ttyS")) != NULL) { - int line; - argptr += strlen("kgdb=ttyS"); - line = *argptr == '0' ? 0 : 1; - rs_kgdb_hook(line); - pr_info("KGDB: Using ttyS%i for session, " - "please connect your debugger\n", line ? 1 : 0); - } -#endif return; } diff --git a/arch/mips/pci/ops-tx3927.c b/arch/mips/pci/ops-tx3927.c index 8a17a39e5bf..31c15019659 100644 --- a/arch/mips/pci/ops-tx3927.c +++ b/arch/mips/pci/ops-tx3927.c @@ -37,45 +37,48 @@ #include <linux/pci.h> #include <linux/kernel.h> #include <linux/init.h> +#include <linux/interrupt.h> #include <asm/addrspace.h> +#include <asm/txx9irq.h> +#include <asm/txx9/pci.h> #include <asm/txx9/tx3927.h> -static inline int mkaddr(unsigned char bus, unsigned char dev_fn, - unsigned char where) +static int mkaddr(struct pci_bus *bus, unsigned char devfn, unsigned char where) { - if (bus == 0 && dev_fn >= PCI_DEVFN(TX3927_PCIC_MAX_DEVNU, 0)) - return PCIBIOS_DEVICE_NOT_FOUND; - - tx3927_pcicptr->ica = ((bus & 0xff) << 0x10) | - ((dev_fn & 0xff) << 0x08) | - (where & 0xfc); + if (bus->parent == NULL && + devfn >= PCI_DEVFN(TX3927_PCIC_MAX_DEVNU, 0)) + return -1; + tx3927_pcicptr->ica = + ((bus->number & 0xff) << 0x10) | + ((devfn & 0xff) << 0x08) | + (where & 0xfc) | (bus->parent ? 1 : 0); /* clear M_ABORT and Disable M_ABORT Int. */ tx3927_pcicptr->pcistat |= PCI_STATUS_REC_MASTER_ABORT; tx3927_pcicptr->pcistatim &= ~PCI_STATUS_REC_MASTER_ABORT; - - return PCIBIOS_SUCCESSFUL; + return 0; } static inline int check_abort(void) { - if (tx3927_pcicptr->pcistat & PCI_STATUS_REC_MASTER_ABORT) + if (tx3927_pcicptr->pcistat & PCI_STATUS_REC_MASTER_ABORT) { tx3927_pcicptr->pcistat |= PCI_STATUS_REC_MASTER_ABORT; tx3927_pcicptr->pcistatim |= PCI_STATUS_REC_MASTER_ABORT; + /* flush write buffer */ + iob(); return PCIBIOS_DEVICE_NOT_FOUND; - + } return PCIBIOS_SUCCESSFUL; } static int tx3927_pci_read_config(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 * val) { - int ret; - - ret = mkaddr(bus->number, devfn, where); - if (ret) - return ret; + if (mkaddr(bus, devfn, where)) { + *val = 0xffffffff; + return PCIBIOS_DEVICE_NOT_FOUND; + } switch (size) { case 1: @@ -97,11 +100,8 @@ static int tx3927_pci_read_config(struct pci_bus *bus, unsigned int devfn, static int tx3927_pci_write_config(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 val) { - int ret; - - ret = mkaddr(bus->number, devfn, where); - if (ret) - return ret; + if (mkaddr(bus, devfn, where)) + return PCIBIOS_DEVICE_NOT_FOUND; switch (size) { case 1: @@ -117,11 +117,6 @@ static int tx3927_pci_write_config(struct pci_bus *bus, unsigned int devfn, tx3927_pcicptr->icd = cpu_to_le32(val); } - if (tx3927_pcicptr->pcistat & PCI_STATUS_REC_MASTER_ABORT) - tx3927_pcicptr->pcistat |= PCI_STATUS_REC_MASTER_ABORT; - tx3927_pcicptr->pcistatim |= PCI_STATUS_REC_MASTER_ABORT; - return PCIBIOS_DEVICE_NOT_FOUND; - return check_abort(); } @@ -202,3 +197,34 @@ void __init tx3927_pcic_setup(struct pci_controller *channel, PCI_COMMAND_PARITY | PCI_COMMAND_SERR; local_irq_restore(flags); } + +static irqreturn_t tx3927_pcierr_interrupt(int irq, void *dev_id) +{ + struct pt_regs *regs = get_irq_regs(); + + if (txx9_pci_err_action != TXX9_PCI_ERR_IGNORE) { + printk(KERN_WARNING "PCI error interrupt at 0x%08lx.\n", + regs->cp0_epc); + printk(KERN_WARNING "pcistat:%02x, lbstat:%04lx\n", + tx3927_pcicptr->pcistat, tx3927_pcicptr->lbstat); + } + if (txx9_pci_err_action != TXX9_PCI_ERR_PANIC) { + /* clear all pci errors */ + tx3927_pcicptr->pcistat |= TX3927_PCIC_PCISTATIM_ALL; + tx3927_pcicptr->istat = TX3927_PCIC_IIM_ALL; + tx3927_pcicptr->tstat = TX3927_PCIC_TIM_ALL; + tx3927_pcicptr->lbstat = TX3927_PCIC_LBIM_ALL; + return IRQ_HANDLED; + } + console_verbose(); + panic("PCI error."); +} + +void __init tx3927_setup_pcierr_irq(void) +{ + if (request_irq(TXX9_IRQ_BASE + TX3927_IR_PCI, + tx3927_pcierr_interrupt, + IRQF_DISABLED, "PCI error", + (void *)TX3927_PCIC_REG)) + printk(KERN_WARNING "Failed to request irq for PCIERR\n"); +} diff --git a/arch/mips/pci/ops-tx4927.c b/arch/mips/pci/ops-tx4927.c index c6b49bccd27..5989e747527 100644 --- a/arch/mips/pci/ops-tx4927.c +++ b/arch/mips/pci/ops-tx4927.c @@ -16,6 +16,8 @@ * option) any later version. */ #include <linux/kernel.h> +#include <linux/interrupt.h> +#include <asm/txx9/pci.h> #include <asm/txx9/tx4927pcic.h> static struct { @@ -85,6 +87,8 @@ static int check_abort(struct tx4927_pcic_reg __iomem *pcicptr) __raw_writel((__raw_readl(&pcicptr->pcistatus) & 0x0000ffff) | (PCI_STATUS_REC_MASTER_ABORT << 16), &pcicptr->pcistatus); + /* flush write buffer */ + iob(); code = PCIBIOS_DEVICE_NOT_FOUND; } return code; @@ -192,6 +196,28 @@ static struct { .gbwc = 0xfe0, /* 4064 GBUSCLK for CCFG.GTOT=0b11 */ }; +char *__devinit tx4927_pcibios_setup(char *str) +{ + unsigned long val; + + if (!strncmp(str, "trdyto=", 7)) { + if (strict_strtoul(str + 7, 0, &val) == 0) + tx4927_pci_opts.trdyto = val; + return NULL; + } + if (!strncmp(str, "retryto=", 8)) { + if (strict_strtoul(str + 8, 0, &val) == 0) + tx4927_pci_opts.retryto = val; + return NULL; + } + if (!strncmp(str, "gbwc=", 5)) { + if (strict_strtoul(str + 5, 0, &val) == 0) + tx4927_pci_opts.gbwc = val; + return NULL; + } + return str; +} + void __init tx4927_pcic_setup(struct tx4927_pcic_reg __iomem *pcicptr, struct pci_controller *channel, int extarb) { @@ -406,3 +432,95 @@ void tx4927_report_pcic_status(void) tx4927_report_pcic_status1(pcicptrs[i].pcicptr); } } + +static void tx4927_dump_pcic_settings1(struct tx4927_pcic_reg __iomem *pcicptr) +{ + int i; + __u32 __iomem *preg = (__u32 __iomem *)pcicptr; + + printk(KERN_INFO "tx4927 pcic (0x%p) settings:", pcicptr); + for (i = 0; i < sizeof(struct tx4927_pcic_reg); i += 4, preg++) { + if (i % 32 == 0) { + printk(KERN_CONT "\n"); + printk(KERN_INFO "%04x:", i); + } + /* skip registers with side-effects */ + if (i == offsetof(struct tx4927_pcic_reg, g2pintack) + || i == offsetof(struct tx4927_pcic_reg, g2pspc) + || i == offsetof(struct tx4927_pcic_reg, g2pcfgadrs) + || i == offsetof(struct tx4927_pcic_reg, g2pcfgdata)) { + printk(KERN_CONT " XXXXXXXX"); + continue; + } + printk(KERN_CONT " %08x", __raw_readl(preg)); + } + printk(KERN_CONT "\n"); +} + +void tx4927_dump_pcic_settings(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(pcicptrs); i++) { + if (pcicptrs[i].pcicptr) + tx4927_dump_pcic_settings1(pcicptrs[i].pcicptr); + } +} + +irqreturn_t tx4927_pcierr_interrupt(int irq, void *dev_id) +{ + struct pt_regs *regs = get_irq_regs(); + struct tx4927_pcic_reg __iomem *pcicptr = + (struct tx4927_pcic_reg __iomem *)(unsigned long)dev_id; + + if (txx9_pci_err_action != TXX9_PCI_ERR_IGNORE) { + printk(KERN_WARNING "PCIERR interrupt at 0x%0*lx\n", + (int)(2 * sizeof(unsigned long)), regs->cp0_epc); + tx4927_report_pcic_status1(pcicptr); + } + if (txx9_pci_err_action != TXX9_PCI_ERR_PANIC) { + /* clear all pci errors */ + __raw_writel((__raw_readl(&pcicptr->pcistatus) & 0x0000ffff) + | (TX4927_PCIC_PCISTATUS_ALL << 16), + &pcicptr->pcistatus); + __raw_writel(TX4927_PCIC_G2PSTATUS_ALL, &pcicptr->g2pstatus); + __raw_writel(TX4927_PCIC_PBASTATUS_ALL, &pcicptr->pbastatus); + __raw_writel(TX4927_PCIC_PCICSTATUS_ALL, &pcicptr->pcicstatus); + return IRQ_HANDLED; + } + console_verbose(); + tx4927_dump_pcic_settings1(pcicptr); + panic("PCI error."); +} + +#ifdef CONFIG_TOSHIBA_FPCIB0 +static void __init tx4927_quirk_slc90e66_bridge(struct pci_dev *dev) +{ + struct tx4927_pcic_reg __iomem *pcicptr = pci_bus_to_pcicptr(dev->bus); + + if (!pcicptr) + return; + if (__raw_readl(&pcicptr->pbacfg) & TX4927_PCIC_PBACFG_PBAEN) { + /* Reset Bus Arbiter */ + __raw_writel(TX4927_PCIC_PBACFG_RPBA, &pcicptr->pbacfg); + /* + * swap reqBP and reqXP (raise priority of SLC90E66). + * SLC90E66(PCI-ISA bridge) is connected to REQ2 on + * PCI Backplane board. + */ + __raw_writel(0x72543610, &pcicptr->pbareqport); + __raw_writel(0, &pcicptr->pbabm); + /* Use Fixed ParkMaster (required by SLC90E66) */ + __raw_writel(TX4927_PCIC_PBACFG_FIXPA, &pcicptr->pbacfg); + /* Enable Bus Arbiter */ + __raw_writel(TX4927_PCIC_PBACFG_FIXPA | + TX4927_PCIC_PBACFG_PBAEN, + &pcicptr->pbacfg); + printk(KERN_INFO "PCI: Use Fixed Park Master (REQPORT %08x)\n", + __raw_readl(&pcicptr->pbareqport)); + } +} +#define PCI_DEVICE_ID_EFAR_SLC90E66_0 0x9460 +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_EFAR, PCI_DEVICE_ID_EFAR_SLC90E66_0, + tx4927_quirk_slc90e66_bridge); +#endif diff --git a/arch/mips/pci/pci-tx4927.c b/arch/mips/pci/pci-tx4927.c index 27e86a09dd4..aaa90059679 100644 --- a/arch/mips/pci/pci-tx4927.c +++ b/arch/mips/pci/pci-tx4927.c @@ -15,6 +15,7 @@ #include <linux/init.h> #include <linux/pci.h> #include <linux/kernel.h> +#include <linux/interrupt.h> #include <asm/txx9/generic.h> #include <asm/txx9/tx4927.h> @@ -81,3 +82,12 @@ int __init tx4927_pciclk66_setup(void) pciclk = -1; return pciclk; } + +void __init tx4927_setup_pcierr_irq(void) +{ + if (request_irq(TXX9_IRQ_BASE + TX4927_IR_PCIERR, + tx4927_pcierr_interrupt, + IRQF_DISABLED, "PCI error", + (void *)TX4927_PCIC_REG)) + printk(KERN_WARNING "Failed to request irq for PCIERR\n"); +} diff --git a/arch/mips/pci/pci-tx4938.c b/arch/mips/pci/pci-tx4938.c index e5375511c2b..60e2c52c2c5 100644 --- a/arch/mips/pci/pci-tx4938.c +++ b/arch/mips/pci/pci-tx4938.c @@ -15,6 +15,7 @@ #include <linux/init.h> #include <linux/pci.h> #include <linux/kernel.h> +#include <linux/interrupt.h> #include <asm/txx9/generic.h> #include <asm/txx9/tx4938.h> @@ -132,3 +133,12 @@ int tx4938_pcic1_map_irq(const struct pci_dev *dev, u8 slot) } return -1; } + +void __init tx4938_setup_pcierr_irq(void) +{ + if (request_irq(TXX9_IRQ_BASE + TX4938_IR_PCIERR, + tx4927_pcierr_interrupt, + IRQF_DISABLED, "PCI error", + (void *)TX4927_PCIC_REG)) + printk(KERN_WARNING "Failed to request irq for PCIERR\n"); +} diff --git a/arch/mips/pci/pci.c b/arch/mips/pci/pci.c index 77bd5b68dc4..c7fe6ec621e 100644 --- a/arch/mips/pci/pci.c +++ b/arch/mips/pci/pci.c @@ -328,7 +328,11 @@ EXPORT_SYMBOL(PCIBIOS_MIN_IO); EXPORT_SYMBOL(PCIBIOS_MIN_MEM); #endif -char *pcibios_setup(char *str) +char * (*pcibios_plat_setup)(char *str) __devinitdata; + +char *__devinit pcibios_setup(char *str) { + if (pcibios_plat_setup) + return pcibios_plat_setup(str); return str; } diff --git a/arch/mips/pmc-sierra/msp71xx/msp_serial.c b/arch/mips/pmc-sierra/msp71xx/msp_serial.c index 9de34302e5f..f7261628d8a 100644 --- a/arch/mips/pmc-sierra/msp71xx/msp_serial.c +++ b/arch/mips/pmc-sierra/msp71xx/msp_serial.c @@ -38,68 +38,6 @@ #include <msp_int.h> #include <msp_regs.h> -#ifdef CONFIG_KGDB -/* - * kgdb uses serial port 1 so the console can remain on port 0. - * To use port 0 change the definition to read as follows: - * #define DEBUG_PORT_BASE KSEG1ADDR(MSP_UART0_BASE) - */ -#define DEBUG_PORT_BASE KSEG1ADDR(MSP_UART1_BASE) - -int putDebugChar(char c) -{ - volatile uint32_t *uart = (volatile uint32_t *)DEBUG_PORT_BASE; - uint32_t val = (uint32_t)c; - - local_irq_disable(); - while( !(uart[5] & 0x20) ); /* Wait for TXRDY */ - uart[0] = val; - while( !(uart[5] & 0x20) ); /* Wait for TXRDY */ - local_irq_enable(); - - return 1; -} - -char getDebugChar(void) -{ - volatile uint32_t *uart = (volatile uint32_t *)DEBUG_PORT_BASE; - uint32_t val; - - while( !(uart[5] & 0x01) ); /* Wait for RXRDY */ - val = uart[0]; - - return (char)val; -} - -void initDebugPort(unsigned int uartclk, unsigned int baudrate) -{ - unsigned int baud_divisor = (uartclk + 8 * baudrate)/(16 * baudrate); - - /* Enable FIFOs */ - writeb(UART_FCR_ENABLE_FIFO | UART_FCR_CLEAR_RCVR | - UART_FCR_CLEAR_XMIT | UART_FCR_TRIGGER_4, - (char *)DEBUG_PORT_BASE + (UART_FCR * 4)); - - /* Select brtc divisor */ - writeb(UART_LCR_DLAB, (char *)DEBUG_PORT_BASE + (UART_LCR * 4)); - - /* Store divisor lsb */ - writeb(baud_divisor, (char *)DEBUG_PORT_BASE + (UART_TX * 4)); - - /* Store divisor msb */ - writeb(baud_divisor >> 8, (char *)DEBUG_PORT_BASE + (UART_IER * 4)); - - /* Set 8N1 mode */ - writeb(UART_LCR_WLEN8, (char *)DEBUG_PORT_BASE + (UART_LCR * 4)); - - /* Disable flow control */ - writeb(0, (char *)DEBUG_PORT_BASE + (UART_MCR * 4)); - - /* Disable receive interrupt(!) */ - writeb(0, (char *)DEBUG_PORT_BASE + (UART_IER * 4)); -} -#endif - void __init msp_serial_setup(void) { char *s; @@ -139,17 +77,6 @@ void __init msp_serial_setup(void) case MACH_MSP7120_FPGA: /* Enable UART1 on MSP4200 and MSP7120 */ *GPIO_CFG2_REG = 0x00002299; - -#ifdef CONFIG_KGDB - /* Initialize UART1 for kgdb since PMON doesn't */ - if( DEBUG_PORT_BASE == KSEG1ADDR(MSP_UART1_BASE) ) { - if( mips_machtype == MACH_MSP4200_FPGA - || mips_machtype == MACH_MSP7120_FPGA ) - initDebugPort(uartclk, 19200); - else - initDebugPort(uartclk, 57600); - } -#endif break; default: diff --git a/arch/mips/pmc-sierra/yosemite/Makefile b/arch/mips/pmc-sierra/yosemite/Makefile index 8fd9a04e353..b16f95c3df6 100644 --- a/arch/mips/pmc-sierra/yosemite/Makefile +++ b/arch/mips/pmc-sierra/yosemite/Makefile @@ -4,7 +4,6 @@ obj-y += irq.o prom.o py-console.o setup.o -obj-$(CONFIG_KGDB) += dbg_io.o obj-$(CONFIG_SMP) += smp.o EXTRA_CFLAGS += -Werror diff --git a/arch/mips/pmc-sierra/yosemite/dbg_io.c b/arch/mips/pmc-sierra/yosemite/dbg_io.c deleted file mode 100644 index 6362c702e38..00000000000 --- a/arch/mips/pmc-sierra/yosemite/dbg_io.c +++ /dev/null @@ -1,180 +0,0 @@ -/* - * Copyright 2003 PMC-Sierra - * Author: Manish Lachwani (lachwani@pmc-sierra.com) - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN - * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -/* - * Support for KGDB for the Yosemite board. We make use of single serial - * port to be used for KGDB as well as console. The second serial port - * seems to be having a problem. Single IRQ is allocated for both the - * ports. Hence, the interrupt routing code needs to figure out whether - * the interrupt came from channel A or B. - */ - -#include <asm/serial.h> - -/* - * Baud rate, Parity, Data and Stop bit settings for the - * serial port on the Yosemite. Note that the Early printk - * patch has been added. So, we should be all set to go - */ -#define YOSEMITE_BAUD_2400 2400 -#define YOSEMITE_BAUD_4800 4800 -#define YOSEMITE_BAUD_9600 9600 -#define YOSEMITE_BAUD_19200 19200 -#define YOSEMITE_BAUD_38400 38400 -#define YOSEMITE_BAUD_57600 57600 -#define YOSEMITE_BAUD_115200 115200 - -#define YOSEMITE_PARITY_NONE 0 -#define YOSEMITE_PARITY_ODD 0x08 -#define YOSEMITE_PARITY_EVEN 0x18 -#define YOSEMITE_PARITY_MARK 0x28 -#define YOSEMITE_PARITY_SPACE 0x38 - -#define YOSEMITE_DATA_5BIT 0x0 -#define YOSEMITE_DATA_6BIT 0x1 -#define YOSEMITE_DATA_7BIT 0x2 -#define YOSEMITE_DATA_8BIT 0x3 - -#define YOSEMITE_STOP_1BIT 0x0 -#define YOSEMITE_STOP_2BIT 0x4 - -/* This is crucial */ -#define SERIAL_REG_OFS 0x1 - -#define SERIAL_RCV_BUFFER 0x0 -#define SERIAL_TRANS_HOLD 0x0 -#define SERIAL_SEND_BUFFER 0x0 -#define SERIAL_INTR_ENABLE (1 * SERIAL_REG_OFS) -#define SERIAL_INTR_ID (2 * SERIAL_REG_OFS) -#define SERIAL_DATA_FORMAT (3 * SERIAL_REG_OFS) -#define SERIAL_LINE_CONTROL (3 * SERIAL_REG_OFS) -#define SERIAL_MODEM_CONTROL (4 * SERIAL_REG_OFS) -#define SERIAL_RS232_OUTPUT (4 * SERIAL_REG_OFS) -#define SERIAL_LINE_STATUS (5 * SERIAL_REG_OFS) -#define SERIAL_MODEM_STATUS (6 * SERIAL_REG_OFS) -#define SERIAL_RS232_INPUT (6 * SERIAL_REG_OFS) -#define SERIAL_SCRATCH_PAD (7 * SERIAL_REG_OFS) - -#define SERIAL_DIVISOR_LSB (0 * SERIAL_REG_OFS) -#define SERIAL_DIVISOR_MSB (1 * SERIAL_REG_OFS) - -/* - * Functions to READ and WRITE to serial port 0 - */ -#define SERIAL_READ(ofs) (*((volatile unsigned char*) \ - (TITAN_SERIAL_BASE + ofs))) - -#define SERIAL_WRITE(ofs, val) ((*((volatile unsigned char*) \ - (TITAN_SERIAL_BASE + ofs))) = val) - -/* - * Functions to READ and WRITE to serial port 1 - */ -#define SERIAL_READ_1(ofs) (*((volatile unsigned char*) \ - (TITAN_SERIAL_BASE_1 + ofs))) - -#define SERIAL_WRITE_1(ofs, val) ((*((volatile unsigned char*) \ - (TITAN_SERIAL_BASE_1 + ofs))) = val) - -/* - * Second serial port initialization - */ -void init_second_port(void) -{ - /* Disable Interrupts */ - SERIAL_WRITE_1(SERIAL_LINE_CONTROL, 0x0); - SERIAL_WRITE_1(SERIAL_INTR_ENABLE, 0x0); - - { - unsigned int divisor; - - SERIAL_WRITE_1(SERIAL_LINE_CONTROL, 0x80); - divisor = TITAN_SERIAL_BASE_BAUD / YOSEMITE_BAUD_115200; - SERIAL_WRITE_1(SERIAL_DIVISOR_LSB, divisor & 0xff); - - SERIAL_WRITE_1(SERIAL_DIVISOR_MSB, - (divisor & 0xff00) >> 8); - SERIAL_WRITE_1(SERIAL_LINE_CONTROL, 0x0); - } - - SERIAL_WRITE_1(SERIAL_DATA_FORMAT, YOSEMITE_DATA_8BIT | - YOSEMITE_PARITY_NONE | YOSEMITE_STOP_1BIT); - - /* Enable Interrupts */ - SERIAL_WRITE_1(SERIAL_INTR_ENABLE, 0xf); -} - -/* Initialize the serial port for KGDB debugging */ -void debugInit(unsigned int baud, unsigned char data, unsigned char parity, - unsigned char stop) -{ - /* Disable Interrupts */ - SERIAL_WRITE(SERIAL_LINE_CONTROL, 0x0); - SERIAL_WRITE(SERIAL_INTR_ENABLE, 0x0); - - { - unsigned int divisor; - - SERIAL_WRITE(SERIAL_LINE_CONTROL, 0x80); - - divisor = TITAN_SERIAL_BASE_BAUD / baud; - SERIAL_WRITE(SERIAL_DIVISOR_LSB, divisor & 0xff); - - SERIAL_WRITE(SERIAL_DIVISOR_MSB, (divisor & 0xff00) >> 8); - SERIAL_WRITE(SERIAL_LINE_CONTROL, 0x0); - } - - SERIAL_WRITE(SERIAL_DATA_FORMAT, data | parity | stop); -} - -static int remoteDebugInitialized = 0; - -unsigned char getDebugChar(void) -{ - if (!remoteDebugInitialized) { - remoteDebugInitialized = 1; - debugInit(YOSEMITE_BAUD_115200, - YOSEMITE_DATA_8BIT, - YOSEMITE_PARITY_NONE, YOSEMITE_STOP_1BIT); - } - - while ((SERIAL_READ(SERIAL_LINE_STATUS) & 0x1) == 0); - return SERIAL_READ(SERIAL_RCV_BUFFER); -} - -int putDebugChar(unsigned char byte) -{ - if (!remoteDebugInitialized) { - remoteDebugInitialized = 1; - debugInit(YOSEMITE_BAUD_115200, - YOSEMITE_DATA_8BIT, - YOSEMITE_PARITY_NONE, YOSEMITE_STOP_1BIT); - } - - while ((SERIAL_READ(SERIAL_LINE_STATUS) & 0x20) == 0); - SERIAL_WRITE(SERIAL_SEND_BUFFER, byte); - - return 1; -} diff --git a/arch/mips/pmc-sierra/yosemite/irq.c b/arch/mips/pmc-sierra/yosemite/irq.c index 4decc280786..5f673eba142 100644 --- a/arch/mips/pmc-sierra/yosemite/irq.c +++ b/arch/mips/pmc-sierra/yosemite/irq.c @@ -141,10 +141,6 @@ asmlinkage void plat_irq_dispatch(void) } } -#ifdef CONFIG_KGDB -extern void init_second_port(void); -#endif - /* * Initialize the next level interrupt handler */ @@ -156,11 +152,6 @@ void __init arch_init_irq(void) rm7k_cpu_irq_init(); rm9k_cpu_irq_init(); -#ifdef CONFIG_KGDB - /* At this point, initialize the second serial port */ - init_second_port(); -#endif - #ifdef CONFIG_GDB_CONSOLE register_gdb_console(); #endif diff --git a/arch/mips/rb532/gpio.c b/arch/mips/rb532/gpio.c index b2fe82dba0a..00a1c7877bf 100644 --- a/arch/mips/rb532/gpio.c +++ b/arch/mips/rb532/gpio.c @@ -64,7 +64,8 @@ static struct resource rb532_dev3_ctl_res[] = { void set_434_reg(unsigned reg_offs, unsigned bit, unsigned len, unsigned val) { - unsigned flags, data; + unsigned long flags; + unsigned data; unsigned i = 0; spin_lock_irqsave(&dev3.lock, flags); @@ -90,7 +91,7 @@ EXPORT_SYMBOL(get_434_reg); void set_latch_u5(unsigned char or_mask, unsigned char nand_mask) { - unsigned flags; + unsigned long flags; spin_lock_irqsave(&dev3.lock, flags); diff --git a/arch/mips/rb532/time.c b/arch/mips/rb532/time.c index db74edf8cef..8e7a46855b5 100644 --- a/arch/mips/rb532/time.c +++ b/arch/mips/rb532/time.c @@ -49,8 +49,8 @@ static unsigned long __init cal_r4koff(void) void __init plat_time_init(void) { - unsigned int est_freq, flags; - unsigned long r4k_offset; + unsigned int est_freq; + unsigned long flags, r4k_offset; local_irq_save(flags); diff --git a/arch/mips/sgi-ip22/ip22-setup.c b/arch/mips/sgi-ip22/ip22-setup.c index 5f389ee26fc..896a1ef8482 100644 --- a/arch/mips/sgi-ip22/ip22-setup.c +++ b/arch/mips/sgi-ip22/ip22-setup.c @@ -20,7 +20,6 @@ #include <asm/irq.h> #include <asm/reboot.h> #include <asm/time.h> -#include <asm/gdb-stub.h> #include <asm/io.h> #include <asm/traps.h> #include <asm/sgialib.h> @@ -81,30 +80,6 @@ void __init plat_mem_setup(void) add_preferred_console("arc", 0, NULL); } -#ifdef CONFIG_KGDB - { - char *kgdb_ttyd = prom_getcmdline(); - - if ((kgdb_ttyd = strstr(kgdb_ttyd, "kgdb=ttyd")) != NULL) { - int line; - kgdb_ttyd += strlen("kgdb=ttyd"); - if (*kgdb_ttyd != '1' && *kgdb_ttyd != '2') - printk(KERN_INFO "KGDB: Uknown serial line /dev/ttyd%c" - ", falling back to /dev/ttyd1\n", *kgdb_ttyd); - line = *kgdb_ttyd == '2' ? 0 : 1; - printk(KERN_INFO "KGDB: Using serial line /dev/ttyd%d for " - "session\n", line ? 1 : 2); - rs_kgdb_hook(line); - - printk(KERN_INFO "KGDB: Using serial line /dev/ttyd%d for " - "session, please connect your debugger\n", line ? 1:2); - - kgdb_enabled = 1; - /* Breakpoints and stuff are in sgi_irq_setup() */ - } - } -#endif - #if defined(CONFIG_VT) && defined(CONFIG_SGI_NEWPORT_CONSOLE) { ULONG *gfxinfo; diff --git a/arch/mips/sgi-ip27/Makefile b/arch/mips/sgi-ip27/Makefile index e0a6871d56e..31f4931b848 100644 --- a/arch/mips/sgi-ip27/Makefile +++ b/arch/mips/sgi-ip27/Makefile @@ -7,7 +7,6 @@ obj-y := ip27-berr.o ip27-irq.o ip27-init.o ip27-klconfig.o ip27-klnuma.o \ ip27-xtalk.o obj-$(CONFIG_EARLY_PRINTK) += ip27-console.o -obj-$(CONFIG_KGDB) += ip27-dbgio.o obj-$(CONFIG_SMP) += ip27-smp.o EXTRA_CFLAGS += -Werror diff --git a/arch/mips/sgi-ip27/ip27-dbgio.c b/arch/mips/sgi-ip27/ip27-dbgio.c deleted file mode 100644 index 08fd88b36f8..00000000000 --- a/arch/mips/sgi-ip27/ip27-dbgio.c +++ /dev/null @@ -1,60 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN - * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Copyright 2004 Ralf Baechle <ralf@linux-mips.org> - */ -#include <asm/sn/addrs.h> -#include <asm/sn/sn0/hub.h> -#include <asm/sn/klconfig.h> -#include <asm/sn/ioc3.h> -#include <asm/sn/sn_private.h> - -#include <linux/serial.h> -#include <linux/serial_core.h> -#include <linux/serial_reg.h> - -#define IOC3_CLK (22000000 / 3) -#define IOC3_FLAGS (0) - -static inline struct ioc3_uartregs *console_uart(void) -{ - struct ioc3 *ioc3; - - ioc3 = (struct ioc3 *)KL_CONFIG_CH_CONS_INFO(get_nasid())->memory_base; - - return &ioc3->sregs.uarta; -} - -unsigned char getDebugChar(void) -{ - struct ioc3_uartregs *uart = console_uart(); - - while ((uart->iu_lsr & UART_LSR_DR) == 0); - return uart->iu_rbr; -} - -void putDebugChar(unsigned char c) -{ - struct ioc3_uartregs *uart = console_uart(); - - while ((uart->iu_lsr & UART_LSR_THRE) == 0); - uart->iu_thr = c; -} diff --git a/arch/mips/sibyte/bcm1480/irq.c b/arch/mips/sibyte/bcm1480/irq.c index db372a0f106..a35818ed426 100644 --- a/arch/mips/sibyte/bcm1480/irq.c +++ b/arch/mips/sibyte/bcm1480/irq.c @@ -57,30 +57,6 @@ static void bcm1480_set_affinity(unsigned int irq, cpumask_t mask); extern unsigned long ht_eoi_space; #endif -#ifdef CONFIG_KGDB -#include <asm/gdb-stub.h> -extern void breakpoint(void); -static int kgdb_irq; -#ifdef CONFIG_GDB_CONSOLE -extern void register_gdb_console(void); -#endif - -/* kgdb is on when configured. Pass "nokgdb" kernel arg to turn it off */ -static int kgdb_flag = 1; -static int __init nokgdb(char *str) -{ - kgdb_flag = 0; - return 1; -} -__setup("nokgdb", nokgdb); - -/* Default to UART1 */ -int kgdb_port = 1; -#ifdef CONFIG_SERIAL_SB1250_DUART -extern char sb1250_duart_present[]; -#endif -#endif - static struct irq_chip bcm1480_irq_type = { .name = "BCM1480-IMR", .ack = ack_bcm1480_irq, @@ -355,61 +331,10 @@ void __init arch_init_irq(void) * does its own management of IP7. */ -#ifdef CONFIG_KGDB - imask |= STATUSF_IP6; -#endif /* Enable necessary IPs, disable the rest */ change_c0_status(ST0_IM, imask); - -#ifdef CONFIG_KGDB - if (kgdb_flag) { - kgdb_irq = K_BCM1480_INT_UART_0 + kgdb_port; - -#ifdef CONFIG_SERIAL_SB1250_DUART - sb1250_duart_present[kgdb_port] = 0; -#endif - /* Setup uart 1 settings, mapper */ - /* QQQ FIXME */ - __raw_writeq(M_DUART_IMR_BRK, IOADDR(A_DUART_IMRREG(kgdb_port))); - - __raw_writeq(IMR_IP6_VAL, - IOADDR(A_BCM1480_IMR_REGISTER(0, R_BCM1480_IMR_INTERRUPT_MAP_BASE_H) + - (kgdb_irq << 3))); - bcm1480_unmask_irq(0, kgdb_irq); - -#ifdef CONFIG_GDB_CONSOLE - register_gdb_console(); -#endif - printk("Waiting for GDB on UART port %d\n", kgdb_port); - set_debug_traps(); - breakpoint(); - } -#endif -} - -#ifdef CONFIG_KGDB - -#include <linux/delay.h> - -#define duart_out(reg, val) csr_out32(val, IOADDR(A_DUART_CHANREG(kgdb_port, reg))) -#define duart_in(reg) csr_in32(IOADDR(A_DUART_CHANREG(kgdb_port, reg))) - -static void bcm1480_kgdb_interrupt(void) -{ - /* - * Clear break-change status (allow some time for the remote - * host to stop the break, since we would see another - * interrupt on the end-of-break too) - */ - kstat.irqs[smp_processor_id()][kgdb_irq]++; - mdelay(500); - duart_out(R_DUART_CMD, V_DUART_MISC_CMD_RESET_BREAK_INT | - M_DUART_RX_EN | M_DUART_TX_EN); - set_async_breakpoint(&get_irq_regs()->cp0_epc); } -#endif /* CONFIG_KGDB */ - extern void bcm1480_mailbox_interrupt(void); static inline void dispatch_ip2(void) @@ -462,11 +387,6 @@ asmlinkage void plat_irq_dispatch(void) bcm1480_mailbox_interrupt(); #endif -#ifdef CONFIG_KGDB - else if (pending & CAUSEF_IP6) - bcm1480_kgdb_interrupt(); /* KGDB (uart 1) */ -#endif - else if (pending & CAUSEF_IP2) dispatch_ip2(); } diff --git a/arch/mips/sibyte/cfe/setup.c b/arch/mips/sibyte/cfe/setup.c index fd9604d5555..3de30f79db3 100644 --- a/arch/mips/sibyte/cfe/setup.c +++ b/arch/mips/sibyte/cfe/setup.c @@ -59,10 +59,6 @@ int cfe_cons_handle; extern unsigned long initrd_start, initrd_end; #endif -#ifdef CONFIG_KGDB -extern int kgdb_port; -#endif - static void __noreturn cfe_linux_exit(void *arg) { int warm = *(int *)arg; @@ -246,9 +242,6 @@ void __init prom_init(void) int argc = fw_arg0; char **envp = (char **) fw_arg2; int *prom_vec = (int *) fw_arg3; -#ifdef CONFIG_KGDB - char *arg; -#endif _machine_restart = cfe_linux_restart; _machine_halt = cfe_linux_halt; @@ -309,13 +302,6 @@ void __init prom_init(void) } } -#ifdef CONFIG_KGDB - if ((arg = strstr(arcs_cmdline, "kgdb=duart")) != NULL) - kgdb_port = (arg[10] == '0') ? 0 : 1; - else - kgdb_port = 1; -#endif - #ifdef CONFIG_BLK_DEV_INITRD { char *ptr; diff --git a/arch/mips/sibyte/sb1250/irq.c b/arch/mips/sibyte/sb1250/irq.c index eac9065ffe0..a5158483986 100644 --- a/arch/mips/sibyte/sb1250/irq.c +++ b/arch/mips/sibyte/sb1250/irq.c @@ -57,16 +57,6 @@ static void sb1250_set_affinity(unsigned int irq, cpumask_t mask); extern unsigned long ldt_eoi_space; #endif -#ifdef CONFIG_KGDB -static int kgdb_irq; - -/* Default to UART1 */ -int kgdb_port = 1; -#ifdef CONFIG_SERIAL_SB1250_DUART -extern char sb1250_duart_present[]; -#endif -#endif - static struct irq_chip sb1250_irq_type = { .name = "SB1250-IMR", .ack = ack_sb1250_irq, @@ -313,55 +303,10 @@ void __init arch_init_irq(void) * does its own management of IP7. */ -#ifdef CONFIG_KGDB - imask |= STATUSF_IP6; -#endif /* Enable necessary IPs, disable the rest */ change_c0_status(ST0_IM, imask); - -#ifdef CONFIG_KGDB - if (kgdb_flag) { - kgdb_irq = K_INT_UART_0 + kgdb_port; - -#ifdef CONFIG_SERIAL_SB1250_DUART - sb1250_duart_present[kgdb_port] = 0; -#endif - /* Setup uart 1 settings, mapper */ - __raw_writeq(M_DUART_IMR_BRK, - IOADDR(A_DUART_IMRREG(kgdb_port))); - - __raw_writeq(IMR_IP6_VAL, - IOADDR(A_IMR_REGISTER(0, - R_IMR_INTERRUPT_MAP_BASE) + - (kgdb_irq << 3))); - sb1250_unmask_irq(0, kgdb_irq); - } -#endif -} - -#ifdef CONFIG_KGDB - -#include <linux/delay.h> - -#define duart_out(reg, val) csr_out32(val, IOADDR(A_DUART_CHANREG(kgdb_port, reg))) -#define duart_in(reg) csr_in32(IOADDR(A_DUART_CHANREG(kgdb_port, reg))) - -static void sb1250_kgdb_interrupt(void) -{ - /* - * Clear break-change status (allow some time for the remote - * host to stop the break, since we would see another - * interrupt on the end-of-break too) - */ - kstat_this_cpu.irqs[kgdb_irq]++; - mdelay(500); - duart_out(R_DUART_CMD, V_DUART_MISC_CMD_RESET_BREAK_INT | - M_DUART_RX_EN | M_DUART_TX_EN); - set_async_breakpoint(&get_irq_regs()->cp0_epc); } -#endif /* CONFIG_KGDB */ - extern void sb1250_mailbox_interrupt(void); static inline void dispatch_ip2(void) @@ -407,11 +352,6 @@ asmlinkage void plat_irq_dispatch(void) sb1250_mailbox_interrupt(); #endif -#ifdef CONFIG_KGDB - else if (pending & CAUSEF_IP6) /* KGDB (uart 1) */ - sb1250_kgdb_interrupt(); -#endif - else if (pending & CAUSEF_IP2) dispatch_ip2(); else diff --git a/arch/mips/sibyte/swarm/Makefile b/arch/mips/sibyte/swarm/Makefile index 255d692bfa1..f18ba9201bb 100644 --- a/arch/mips/sibyte/swarm/Makefile +++ b/arch/mips/sibyte/swarm/Makefile @@ -1,4 +1,3 @@ obj-y := setup.o rtc_xicor1241.o rtc_m41t81.o obj-$(CONFIG_I2C_BOARDINFO) += swarm-i2c.o -obj-$(CONFIG_KGDB) += dbg_io.o diff --git a/arch/mips/sibyte/swarm/dbg_io.c b/arch/mips/sibyte/swarm/dbg_io.c deleted file mode 100644 index b97ae304848..00000000000 --- a/arch/mips/sibyte/swarm/dbg_io.c +++ /dev/null @@ -1,76 +0,0 @@ -/* - * kgdb debug routines for SiByte boards. - * - * Copyright (C) 2001 MontaVista Software Inc. - * Author: Jun Sun, jsun@mvista.com or jsun@junsun.net - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - */ - -/* -------------------- BEGINNING OF CONFIG --------------------- */ - -#include <linux/delay.h> -#include <asm/io.h> -#include <asm/sibyte/sb1250.h> -#include <asm/sibyte/sb1250_regs.h> -#include <asm/sibyte/sb1250_uart.h> -#include <asm/sibyte/sb1250_int.h> -#include <asm/addrspace.h> - -/* - * We use the second serial port for kgdb traffic. - * 115200, 8, N, 1. - */ - -#define BAUD_RATE 115200 -#define CLK_DIVISOR V_DUART_BAUD_RATE(BAUD_RATE) -#define DATA_BITS V_DUART_BITS_PER_CHAR_8 /* or 7 */ -#define PARITY V_DUART_PARITY_MODE_NONE /* or even */ -#define STOP_BITS M_DUART_STOP_BIT_LEN_1 /* or 2 */ - -static int duart_initialized = 0; /* 0: need to be init'ed by kgdb */ - -/* -------------------- END OF CONFIG --------------------- */ -extern int kgdb_port; - -#define duart_out(reg, val) csr_out32(val, IOADDR(A_DUART_CHANREG(kgdb_port, reg))) -#define duart_in(reg) csr_in32(IOADDR(A_DUART_CHANREG(kgdb_port, reg))) - -void putDebugChar(unsigned char c); -unsigned char getDebugChar(void); -static void -duart_init(int clk_divisor, int data, int parity, int stop) -{ - duart_out(R_DUART_MODE_REG_1, data | parity); - duart_out(R_DUART_MODE_REG_2, stop); - duart_out(R_DUART_CLK_SEL, clk_divisor); - - duart_out(R_DUART_CMD, M_DUART_RX_EN | M_DUART_TX_EN); /* enable rx and tx */ -} - -void -putDebugChar(unsigned char c) -{ - if (!duart_initialized) { - duart_initialized = 1; - duart_init(CLK_DIVISOR, DATA_BITS, PARITY, STOP_BITS); - } - while ((duart_in(R_DUART_STATUS) & M_DUART_TX_RDY) == 0); - duart_out(R_DUART_TX_HOLD, c); -} - -unsigned char -getDebugChar(void) -{ - if (!duart_initialized) { - duart_initialized = 1; - duart_init(CLK_DIVISOR, DATA_BITS, PARITY, STOP_BITS); - } - while ((duart_in(R_DUART_STATUS) & M_DUART_RX_RDY) == 0) ; - return duart_in(R_DUART_RX_HOLD); -} - diff --git a/arch/mips/txx9/Kconfig b/arch/mips/txx9/Kconfig index 6de4c5aa92b..840fe757c48 100644 --- a/arch/mips/txx9/Kconfig +++ b/arch/mips/txx9/Kconfig @@ -1,3 +1,27 @@ +config MACH_TX39XX + bool + select MACH_TXX9 + select SYS_HAS_CPU_TX39XX + +config MACH_TX49XX + bool + select MACH_TXX9 + select CEVT_R4K + select CSRC_R4K + select IRQ_CPU + select SYS_HAS_CPU_TX49XX + select SYS_SUPPORTS_64BIT_KERNEL + +config MACH_TXX9 + bool + select DMA_NONCOHERENT + select SWAP_IO_SPACE + select SYS_HAS_EARLY_PRINTK + select SYS_SUPPORTS_32BIT_KERNEL + select SYS_SUPPORTS_LITTLE_ENDIAN + select SYS_SUPPORTS_BIG_ENDIAN + select GENERIC_HARDIRQS_NO__DO_IRQ + config TOSHIBA_JMR3927 bool "Toshiba JMR-TX3927 board" depends on MACH_TX39XX @@ -24,68 +48,37 @@ config TOSHIBA_RBTX4938 config SOC_TX3927 bool select CEVT_TXX9 - select DMA_NONCOHERENT select HAS_TXX9_SERIAL select HW_HAS_PCI select IRQ_TXX9 - select SWAP_IO_SPACE - select SYS_HAS_CPU_TX39XX - select SYS_SUPPORTS_32BIT_KERNEL - select SYS_SUPPORTS_LITTLE_ENDIAN - select SYS_SUPPORTS_BIG_ENDIAN - select GENERIC_HARDIRQS_NO__DO_IRQ select GPIO_TXX9 config SOC_TX4927 bool - select CEVT_R4K - select CSRC_R4K select CEVT_TXX9 - select DMA_NONCOHERENT select HAS_TXX9_SERIAL select HW_HAS_PCI - select IRQ_CPU select IRQ_TXX9 select PCI_TX4927 - select SWAP_IO_SPACE - select SYS_HAS_CPU_TX49XX - select SYS_SUPPORTS_32BIT_KERNEL - select SYS_SUPPORTS_64BIT_KERNEL - select SYS_SUPPORTS_LITTLE_ENDIAN - select SYS_SUPPORTS_BIG_ENDIAN - select SYS_SUPPORTS_KGDB - select GENERIC_HARDIRQS_NO__DO_IRQ select GPIO_TXX9 config SOC_TX4938 bool - select CEVT_R4K - select CSRC_R4K select CEVT_TXX9 - select DMA_NONCOHERENT select HAS_TXX9_SERIAL select HW_HAS_PCI - select IRQ_CPU select IRQ_TXX9 select PCI_TX4927 - select SWAP_IO_SPACE - select SYS_HAS_CPU_TX49XX - select SYS_SUPPORTS_32BIT_KERNEL - select SYS_SUPPORTS_64BIT_KERNEL - select SYS_SUPPORTS_LITTLE_ENDIAN - select SYS_SUPPORTS_BIG_ENDIAN - select SYS_SUPPORTS_KGDB - select GENERIC_HARDIRQS_NO__DO_IRQ select GPIO_TXX9 config TOSHIBA_FPCIB0 bool "FPCIB0 Backplane Support" - depends on PCI && (MACH_TX39XX || MACH_TX49XX) + depends on PCI && MACH_TXX9 select I8259 config PICMG_PCI_BACKPLANE_DEFAULT bool "Support for PICMG PCI Backplane" - depends on PCI && (MACH_TX39XX || MACH_TX49XX) + depends on PCI && MACH_TXX9 default y if !TOSHIBA_FPCIB0 if TOSHIBA_RBTX4938 diff --git a/arch/mips/txx9/generic/Makefile b/arch/mips/txx9/generic/Makefile index 9c120771e65..9bb34af26b7 100644 --- a/arch/mips/txx9/generic/Makefile +++ b/arch/mips/txx9/generic/Makefile @@ -4,9 +4,9 @@ obj-y += setup.o obj-$(CONFIG_PCI) += pci.o +obj-$(CONFIG_SOC_TX3927) += setup_tx3927.o irq_tx3927.o obj-$(CONFIG_SOC_TX4927) += mem_tx4927.o setup_tx4927.o irq_tx4927.o obj-$(CONFIG_SOC_TX4938) += mem_tx4927.o setup_tx4938.o irq_tx4938.o obj-$(CONFIG_TOSHIBA_FPCIB0) += smsc_fdc37m81x.o -obj-$(CONFIG_KGDB) += dbgio.o EXTRA_CFLAGS += -Werror diff --git a/arch/mips/txx9/generic/dbgio.c b/arch/mips/txx9/generic/dbgio.c deleted file mode 100644 index 33b9c672a32..00000000000 --- a/arch/mips/txx9/generic/dbgio.c +++ /dev/null @@ -1,48 +0,0 @@ -/* - * linux/arch/mips/tx4938/common/dbgio.c - * - * kgdb interface for gdb - * - * Author: MontaVista Software, Inc. - * source@mvista.com - * - * Copyright 2005 MontaVista Software Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS - * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR - * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE - * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Support for TX4938 in 2.6 - Hiroshi DOYU <Hiroshi_DOYU@montavista.co.jp> - */ - -#include <linux/types> - -extern u8 txx9_sio_kdbg_rd(void); -extern int txx9_sio_kdbg_wr( u8 ch ); - -u8 getDebugChar(void) -{ - return (txx9_sio_kdbg_rd()); -} - -int putDebugChar(u8 byte) -{ - return (txx9_sio_kdbg_wr(byte)); -} - diff --git a/arch/mips/txx9/generic/irq_tx3927.c b/arch/mips/txx9/generic/irq_tx3927.c new file mode 100644 index 00000000000..c683f593eda --- /dev/null +++ b/arch/mips/txx9/generic/irq_tx3927.c @@ -0,0 +1,25 @@ +/* + * Common tx3927 irq handler + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright 2001 MontaVista Software Inc. + * Copyright (C) 2000-2001 Toshiba Corporation + */ +#include <linux/init.h> +#include <asm/txx9irq.h> +#include <asm/txx9/tx3927.h> + +void __init tx3927_irq_init(void) +{ + int i; + + txx9_irq_init(TX3927_IRC_REG); + /* raise priority for timers, sio */ + for (i = 0; i < TX3927_NR_TMR; i++) + txx9_irq_set_pri(TX3927_IR_TMR(i), 6); + for (i = 0; i < TX3927_NR_SIO; i++) + txx9_irq_set_pri(TX3927_IR_SIO(i), 7); +} diff --git a/arch/mips/txx9/generic/pci.c b/arch/mips/txx9/generic/pci.c index 0b92d8c1320..7b637a7c0e6 100644 --- a/arch/mips/txx9/generic/pci.c +++ b/arch/mips/txx9/generic/pci.c @@ -386,3 +386,39 @@ int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { return txx9_board_vec->pci_map_irq(dev, slot, pin); } + +char * (*txx9_board_pcibios_setup)(char *str) __devinitdata; + +char *__devinit txx9_pcibios_setup(char *str) +{ + if (txx9_board_pcibios_setup && !txx9_board_pcibios_setup(str)) + return NULL; + if (!strcmp(str, "picmg")) { + /* PICMG compliant backplane (TOSHIBA JMB-PICMG-ATX + (5V or 3.3V), JMB-PICMG-L2 (5V only), etc.) */ + txx9_pci_option |= TXX9_PCI_OPT_PICMG; + return NULL; + } else if (!strcmp(str, "nopicmg")) { + /* non-PICMG compliant backplane (TOSHIBA + RBHBK4100,RBHBK4200, Interface PCM-PCM05, etc.) */ + txx9_pci_option &= ~TXX9_PCI_OPT_PICMG; + return NULL; + } else if (!strncmp(str, "clk=", 4)) { + char *val = str + 4; + txx9_pci_option &= ~TXX9_PCI_OPT_CLK_MASK; + if (strcmp(val, "33") == 0) + txx9_pci_option |= TXX9_PCI_OPT_CLK_33; + else if (strcmp(val, "66") == 0) + txx9_pci_option |= TXX9_PCI_OPT_CLK_66; + else /* "auto" */ + txx9_pci_option |= TXX9_PCI_OPT_CLK_AUTO; + return NULL; + } else if (!strncmp(str, "err=", 4)) { + if (!strcmp(str + 4, "panic")) + txx9_pci_err_action = TXX9_PCI_ERR_PANIC; + else if (!strcmp(str + 4, "ignore")) + txx9_pci_err_action = TXX9_PCI_ERR_IGNORE; + return NULL; + } + return str; +} diff --git a/arch/mips/txx9/generic/setup.c b/arch/mips/txx9/generic/setup.c index 8c60c78b9a9..1bc57d0f4c5 100644 --- a/arch/mips/txx9/generic/setup.c +++ b/arch/mips/txx9/generic/setup.c @@ -20,9 +20,13 @@ #include <linux/clk.h> #include <linux/err.h> #include <linux/gpio.h> +#include <linux/platform_device.h> +#include <linux/serial_core.h> #include <asm/bootinfo.h> #include <asm/time.h> +#include <asm/reboot.h> #include <asm/txx9/generic.h> +#include <asm/txx9/pci.h> #ifdef CONFIG_CPU_TX49XX #include <asm/txx9/tx4938.h> #endif @@ -187,6 +191,117 @@ char * __init prom_getcmdline(void) return &(arcs_cmdline[0]); } +static void __noreturn txx9_machine_halt(void) +{ + local_irq_disable(); + clear_c0_status(ST0_IM); + while (1) { + if (cpu_wait) { + (*cpu_wait)(); + if (cpu_has_counter) { + /* + * Clear counter interrupt while it + * breaks WAIT instruction even if + * masked. + */ + write_c0_compare(0); + } + } + } +} + +/* Watchdog support */ +void __init txx9_wdt_init(unsigned long base) +{ + struct resource res = { + .start = base, + .end = base + 0x100 - 1, + .flags = IORESOURCE_MEM, + }; + platform_device_register_simple("txx9wdt", -1, &res, 1); +} + +/* SPI support */ +void __init txx9_spi_init(int busid, unsigned long base, int irq) +{ + struct resource res[] = { + { + .start = base, + .end = base + 0x20 - 1, + .flags = IORESOURCE_MEM, + }, { + .start = irq, + .flags = IORESOURCE_IRQ, + }, + }; + platform_device_register_simple("spi_txx9", busid, + res, ARRAY_SIZE(res)); +} + +void __init txx9_ethaddr_init(unsigned int id, unsigned char *ethaddr) +{ + struct platform_device *pdev = + platform_device_alloc("tc35815-mac", id); + if (!pdev || + platform_device_add_data(pdev, ethaddr, 6) || + platform_device_add(pdev)) + platform_device_put(pdev); +} + +void __init txx9_sio_init(unsigned long baseaddr, int irq, + unsigned int line, unsigned int sclk, int nocts) +{ +#ifdef CONFIG_SERIAL_TXX9 + struct uart_port req; + + memset(&req, 0, sizeof(req)); + req.line = line; + req.iotype = UPIO_MEM; + req.membase = ioremap(baseaddr, 0x24); + req.mapbase = baseaddr; + req.irq = irq; + if (!nocts) + req.flags |= UPF_BUGGY_UART /*HAVE_CTS_LINE*/; + if (sclk) { + req.flags |= UPF_MAGIC_MULTIPLIER /*USE_SCLK*/; + req.uartclk = sclk; + } else + req.uartclk = TXX9_IMCLK; + early_serial_txx9_setup(&req); +#endif /* CONFIG_SERIAL_TXX9 */ +} + +#ifdef CONFIG_EARLY_PRINTK +static void __init null_prom_putchar(char c) +{ +} +void (*txx9_prom_putchar)(char c) __initdata = null_prom_putchar; + +void __init prom_putchar(char c) +{ + txx9_prom_putchar(c); +} + +static void __iomem *early_txx9_sio_port; + +static void __init early_txx9_sio_putchar(char c) +{ +#define TXX9_SICISR 0x0c +#define TXX9_SITFIFO 0x1c +#define TXX9_SICISR_TXALS 0x00000002 + while (!(__raw_readl(early_txx9_sio_port + TXX9_SICISR) & + TXX9_SICISR_TXALS)) + ; + __raw_writel(c, early_txx9_sio_port + TXX9_SITFIFO); +} + +void __init txx9_sio_putchar_init(unsigned long baseaddr) +{ + early_txx9_sio_port = ioremap(baseaddr, 0x24); + txx9_prom_putchar = early_txx9_sio_putchar; +} +#endif /* CONFIG_EARLY_PRINTK */ + /* wrappers */ void __init plat_mem_setup(void) { @@ -194,6 +309,15 @@ void __init plat_mem_setup(void) ioport_resource.end = ~0UL; /* no limit */ iomem_resource.start = 0; iomem_resource.end = ~0UL; /* no limit */ + + /* fallback restart/halt routines */ + _machine_restart = (void (*)(char *))txx9_machine_halt; + _machine_halt = txx9_machine_halt; + pm_power_off = txx9_machine_halt; + +#ifdef CONFIG_PCI + pcibios_plat_setup = txx9_pcibios_setup; +#endif txx9_board_vec->mem_setup(); } diff --git a/arch/mips/txx9/generic/setup_tx3927.c b/arch/mips/txx9/generic/setup_tx3927.c new file mode 100644 index 00000000000..7bd963d37fc --- /dev/null +++ b/arch/mips/txx9/generic/setup_tx3927.c @@ -0,0 +1,130 @@ +/* + * TX3927 setup routines + * Based on linux/arch/mips/txx9/jmr3927/setup.c + * + * Copyright 2001 MontaVista Software Inc. + * Copyright (C) 2000-2001 Toshiba Corporation + * Copyright (C) 2007 Ralf Baechle (ralf@linux-mips.org) + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ +#include <linux/init.h> +#include <linux/ioport.h> +#include <linux/delay.h> +#include <linux/param.h> +#include <linux/io.h> +#include <asm/mipsregs.h> +#include <asm/txx9irq.h> +#include <asm/txx9tmr.h> +#include <asm/txx9pio.h> +#include <asm/txx9/generic.h> +#include <asm/txx9/tx3927.h> + +void __init tx3927_wdt_init(void) +{ + txx9_wdt_init(TX3927_TMR_REG(2)); +} + +void __init tx3927_setup(void) +{ + int i; + unsigned int conf; + + /* don't enable - see errata */ + txx9_ccfg_toeon = 0; + if (strstr(prom_getcmdline(), "toeon") != NULL) + txx9_ccfg_toeon = 1; + + txx9_reg_res_init(TX3927_REV_PCODE(), TX3927_REG_BASE, + TX3927_REG_SIZE); + + /* SDRAMC,ROMC are configured by PROM */ + for (i = 0; i < 8; i++) { + if (!(tx3927_romcptr->cr[i] & 0x8)) + continue; /* disabled */ + txx9_ce_res[i].start = (unsigned long)TX3927_ROMC_BA(i); + txx9_ce_res[i].end = + txx9_ce_res[i].start + TX3927_ROMC_SIZE(i) - 1; + request_resource(&iomem_resource, &txx9_ce_res[i]); + } + + /* clocks */ + txx9_gbus_clock = txx9_cpu_clock / 2; + /* change default value to udelay/mdelay take reasonable time */ + loops_per_jiffy = txx9_cpu_clock / HZ / 2; + + /* CCFG */ + /* enable Timeout BusError */ + if (txx9_ccfg_toeon) + tx3927_ccfgptr->ccfg |= TX3927_CCFG_TOE; + + /* clear BusErrorOnWrite flag */ + tx3927_ccfgptr->ccfg &= ~TX3927_CCFG_BEOW; + if (read_c0_conf() & TX39_CONF_WBON) + /* Disable PCI snoop */ + tx3927_ccfgptr->ccfg &= ~TX3927_CCFG_PSNP; + else + /* Enable PCI SNOOP - with write through only */ + tx3927_ccfgptr->ccfg |= TX3927_CCFG_PSNP; + /* do reset on watchdog */ + tx3927_ccfgptr->ccfg |= TX3927_CCFG_WR; + + printk(KERN_INFO "TX3927 -- CRIR:%08lx CCFG:%08lx PCFG:%08lx\n", + tx3927_ccfgptr->crir, + tx3927_ccfgptr->ccfg, tx3927_ccfgptr->pcfg); + + /* TMR */ + for (i = 0; i < TX3927_NR_TMR; i++) + txx9_tmr_init(TX3927_TMR_REG(i)); + + /* DMA */ + tx3927_dmaptr->mcr = 0; + for (i = 0; i < ARRAY_SIZE(tx3927_dmaptr->ch); i++) { + /* reset channel */ + tx3927_dmaptr->ch[i].ccr = TX3927_DMA_CCR_CHRST; + tx3927_dmaptr->ch[i].ccr = 0; + } + /* enable DMA */ +#ifdef __BIG_ENDIAN + tx3927_dmaptr->mcr = TX3927_DMA_MCR_MSTEN; +#else + tx3927_dmaptr->mcr = TX3927_DMA_MCR_MSTEN | TX3927_DMA_MCR_LE; +#endif + + /* PIO */ + __raw_writel(0, &tx3927_pioptr->maskcpu); + __raw_writel(0, &tx3927_pioptr->maskext); + txx9_gpio_init(TX3927_PIO_REG, 0, 16); + + conf = read_c0_conf(); + if (!(conf & TX39_CONF_ICE)) + printk(KERN_INFO "TX3927 I-Cache disabled.\n"); + if (!(conf & TX39_CONF_DCE)) + printk(KERN_INFO "TX3927 D-Cache disabled.\n"); + else if (!(conf & TX39_CONF_WBON)) + printk(KERN_INFO "TX3927 D-Cache WriteThrough.\n"); + else if (!(conf & TX39_CONF_CWFON)) + printk(KERN_INFO "TX3927 D-Cache WriteBack.\n"); + else + printk(KERN_INFO "TX3927 D-Cache WriteBack (CWF) .\n"); +} + +void __init tx3927_time_init(unsigned int evt_tmrnr, unsigned int src_tmrnr) +{ + txx9_clockevent_init(TX3927_TMR_REG(evt_tmrnr), + TXX9_IRQ_BASE + TX3927_IR_TMR(evt_tmrnr), + TXX9_IMCLK); + txx9_clocksource_init(TX3927_TMR_REG(src_tmrnr), TXX9_IMCLK); +} + +void __init tx3927_sio_init(unsigned int sclk, unsigned int cts_mask) +{ + int i; + + for (i = 0; i < 2; i++) + txx9_sio_init(TX3927_SIO_REG(i), + TXX9_IRQ_BASE + TX3927_IR_SIO(i), + i, sclk, (1 << i) & cts_mask); +} diff --git a/arch/mips/txx9/generic/setup_tx4927.c b/arch/mips/txx9/generic/setup_tx4927.c index 89d6e28add9..f80d4b7a694 100644 --- a/arch/mips/txx9/generic/setup_tx4927.c +++ b/arch/mips/txx9/generic/setup_tx4927.c @@ -13,7 +13,6 @@ #include <linux/init.h> #include <linux/ioport.h> #include <linux/delay.h> -#include <linux/serial_core.h> #include <linux/param.h> #include <asm/txx9irq.h> #include <asm/txx9tmr.h> @@ -21,7 +20,7 @@ #include <asm/txx9/generic.h> #include <asm/txx9/tx4927.h> -void __init tx4927_wdr_init(void) +static void __init tx4927_wdr_init(void) { /* clear WatchDogReset (W1C) */ tx4927_ccfg_set(TX4927_CCFG_WDRST); @@ -29,6 +28,11 @@ void __init tx4927_wdr_init(void) tx4927_ccfg_set(TX4927_CCFG_WR); } +void __init tx4927_wdt_init(void) +{ + txx9_wdt_init(TX4927_TMR_REG(2) & 0xfffffffffULL); +} + static struct resource tx4927_sdram_resource[4]; void __init tx4927_setup(void) @@ -173,22 +177,12 @@ void __init tx4927_time_init(unsigned int tmrnr) TXX9_IMCLK); } -void __init tx4927_setup_serial(void) +void __init tx4927_sio_init(unsigned int sclk, unsigned int cts_mask) { -#ifdef CONFIG_SERIAL_TXX9 int i; - struct uart_port req; - - for (i = 0; i < 2; i++) { - memset(&req, 0, sizeof(req)); - req.line = i; - req.iotype = UPIO_MEM; - req.membase = (unsigned char __iomem *)TX4927_SIO_REG(i); - req.mapbase = TX4927_SIO_REG(i) & 0xfffffffffULL; - req.irq = TXX9_IRQ_BASE + TX4927_IR_SIO(i); - req.flags |= UPF_BUGGY_UART /*HAVE_CTS_LINE*/; - req.uartclk = TXX9_IMCLK; - early_serial_txx9_setup(&req); - } -#endif /* CONFIG_SERIAL_TXX9 */ + + for (i = 0; i < 2; i++) + txx9_sio_init(TX4927_SIO_REG(i) & 0xfffffffffULL, + TXX9_IRQ_BASE + TX4927_IR_SIO(i), + i, sclk, (1 << i) & cts_mask); } diff --git a/arch/mips/txx9/generic/setup_tx4938.c b/arch/mips/txx9/generic/setup_tx4938.c index 317378d8579..f3040b9ba05 100644 --- a/arch/mips/txx9/generic/setup_tx4938.c +++ b/arch/mips/txx9/generic/setup_tx4938.c @@ -13,7 +13,6 @@ #include <linux/init.h> #include <linux/ioport.h> #include <linux/delay.h> -#include <linux/serial_core.h> #include <linux/param.h> #include <asm/txx9irq.h> #include <asm/txx9tmr.h> @@ -21,7 +20,7 @@ #include <asm/txx9/generic.h> #include <asm/txx9/tx4938.h> -void __init tx4938_wdr_init(void) +static void __init tx4938_wdr_init(void) { /* clear WatchDogReset (W1C) */ tx4938_ccfg_set(TX4938_CCFG_WDRST); @@ -29,6 +28,11 @@ void __init tx4938_wdr_init(void) tx4938_ccfg_set(TX4938_CCFG_WR); } +void __init tx4938_wdt_init(void) +{ + txx9_wdt_init(TX4938_TMR_REG(2) & 0xfffffffffULL); +} + static struct resource tx4938_sdram_resource[4]; static struct resource tx4938_sram_resource; @@ -233,11 +237,9 @@ void __init tx4938_time_init(unsigned int tmrnr) TXX9_IMCLK); } -void __init tx4938_setup_serial(void) +void __init tx4938_sio_init(unsigned int sclk, unsigned int cts_mask) { -#ifdef CONFIG_SERIAL_TXX9 int i; - struct uart_port req; unsigned int ch_mask = 0; if (__raw_readq(&tx4938_ccfgptr->pcfg) & TX4938_PCFG_ETH0_SEL) @@ -245,15 +247,24 @@ void __init tx4938_setup_serial(void) for (i = 0; i < 2; i++) { if ((1 << i) & ch_mask) continue; - memset(&req, 0, sizeof(req)); - req.line = i; - req.iotype = UPIO_MEM; - req.membase = (unsigned char __iomem *)TX4938_SIO_REG(i); - req.mapbase = TX4938_SIO_REG(i) & 0xfffffffffULL; - req.irq = TXX9_IRQ_BASE + TX4938_IR_SIO(i); - req.flags |= UPF_BUGGY_UART /*HAVE_CTS_LINE*/; - req.uartclk = TXX9_IMCLK; - early_serial_txx9_setup(&req); + txx9_sio_init(TX4938_SIO_REG(i) & 0xfffffffffULL, + TXX9_IRQ_BASE + TX4938_IR_SIO(i), + i, sclk, (1 << i) & cts_mask); } -#endif /* CONFIG_SERIAL_TXX9 */ +} + +void __init tx4938_spi_init(int busid) +{ + txx9_spi_init(busid, TX4938_SPI_REG & 0xfffffffffULL, + TXX9_IRQ_BASE + TX4938_IR_SPI); +} + +void __init tx4938_ethaddr_init(unsigned char *addr0, unsigned char *addr1) +{ + u64 pcfg = __raw_readq(&tx4938_ccfgptr->pcfg); + + if (addr0 && (pcfg & TX4938_PCFG_ETH0_SEL)) + txx9_ethaddr_init(TXX9_IRQ_BASE + TX4938_IR_ETH0, addr0); + if (addr1 && (pcfg & TX4938_PCFG_ETH1_SEL)) + txx9_ethaddr_init(TXX9_IRQ_BASE + TX4938_IR_ETH1, addr1); } diff --git a/arch/mips/txx9/generic/smsc_fdc37m81x.c b/arch/mips/txx9/generic/smsc_fdc37m81x.c index 69e487467fa..a2b2d62d88e 100644 --- a/arch/mips/txx9/generic/smsc_fdc37m81x.c +++ b/arch/mips/txx9/generic/smsc_fdc37m81x.c @@ -15,8 +15,6 @@ #include <asm/io.h> #include <asm/txx9/smsc_fdc37m81x.h> -#define DEBUG - /* Common Registers */ #define SMSC_FDC37M81X_CONFIG_INDEX 0x00 #define SMSC_FDC37M81X_CONFIG_DATA 0x01 @@ -55,7 +53,7 @@ #define SMSC_FDC37M81X_CONFIG_EXIT 0xaa #define SMSC_FDC37M81X_CHIP_ID 0x4d -static unsigned long g_smsc_fdc37m81x_base = 0; +static unsigned long g_smsc_fdc37m81x_base; static inline unsigned char smsc_fdc37m81x_rd(unsigned char index) { @@ -107,7 +105,8 @@ unsigned long __init smsc_fdc37m81x_init(unsigned long port) u8 chip_id; if (g_smsc_fdc37m81x_base) - printk("smsc_fdc37m81x_init() stepping on old base=0x%0*lx\n", + printk(KERN_WARNING "%s: stepping on old base=0x%0*lx\n", + __func__, field, g_smsc_fdc37m81x_base); g_smsc_fdc37m81x_base = port; @@ -118,7 +117,7 @@ unsigned long __init smsc_fdc37m81x_init(unsigned long port) if (chip_id == SMSC_FDC37M81X_CHIP_ID) smsc_fdc37m81x_config_end(); else { - printk("smsc_fdc37m81x_init() unknow chip id 0x%02x\n", + printk(KERN_WARNING "%s: unknow chip id 0x%02x\n", __func__, chip_id); g_smsc_fdc37m81x_base = 0; } @@ -127,22 +126,23 @@ unsigned long __init smsc_fdc37m81x_init(unsigned long port) } #ifdef DEBUG -void smsc_fdc37m81x_config_dump_one(char *key, u8 dev, u8 reg) +static void smsc_fdc37m81x_config_dump_one(const char *key, u8 dev, u8 reg) { - printk("%s: dev=0x%02x reg=0x%02x val=0x%02x\n", key, dev, reg, + printk(KERN_INFO "%s: dev=0x%02x reg=0x%02x val=0x%02x\n", + key, dev, reg, smsc_fdc37m81x_rd(reg)); } void smsc_fdc37m81x_config_dump(void) { u8 orig; - char *fname = "smsc_fdc37m81x_config_dump()"; + const char *fname = __func__; smsc_fdc37m81x_config_beg(); orig = smsc_fdc37m81x_rd(SMSC_FDC37M81X_DNUM); - printk("%s: common\n", fname); + printk(KERN_INFO "%s: common\n", fname); smsc_fdc37m81x_config_dump_one(fname, SMSC_FDC37M81X_NONE, SMSC_FDC37M81X_DNUM); smsc_fdc37m81x_config_dump_one(fname, SMSC_FDC37M81X_NONE, @@ -154,7 +154,7 @@ void smsc_fdc37m81x_config_dump(void) smsc_fdc37m81x_config_dump_one(fname, SMSC_FDC37M81X_NONE, SMSC_FDC37M81X_PMGT); - printk("%s: keyboard\n", fname); + printk(KERN_INFO "%s: keyboard\n", fname); smsc_dc37m81x_wr(SMSC_FDC37M81X_DNUM, SMSC_FDC37M81X_KBD); smsc_fdc37m81x_config_dump_one(fname, SMSC_FDC37M81X_KBD, SMSC_FDC37M81X_ACTIVE); diff --git a/arch/mips/txx9/jmr3927/Makefile b/arch/mips/txx9/jmr3927/Makefile index ba292c94566..20d61ac543e 100644 --- a/arch/mips/txx9/jmr3927/Makefile +++ b/arch/mips/txx9/jmr3927/Makefile @@ -3,6 +3,5 @@ # obj-y += prom.o irq.o setup.o -obj-$(CONFIG_KGDB) += kgdb_io.o EXTRA_CFLAGS += -Werror diff --git a/arch/mips/txx9/jmr3927/irq.c b/arch/mips/txx9/jmr3927/irq.c index 070c9a115e5..6ec626c9473 100644 --- a/arch/mips/txx9/jmr3927/irq.c +++ b/arch/mips/txx9/jmr3927/irq.c @@ -30,15 +30,11 @@ * 675 Mass Ave, Cambridge, MA 02139, USA. */ #include <linux/init.h> -#include <linux/sched.h> #include <linux/types.h> #include <linux/interrupt.h> #include <asm/io.h> #include <asm/mipsregs.h> -#include <asm/system.h> - -#include <asm/processor.h> #include <asm/txx9/generic.h> #include <asm/txx9/jmr3927.h> @@ -46,13 +42,6 @@ #error JMR3927_IRQ_END > NR_IRQS #endif -static unsigned char irc_level[TX3927_NUM_IR] = { - 5, 5, 5, 5, 5, 5, /* INT[5:0] */ - 7, 7, /* SIO */ - 5, 5, 5, 0, 0, /* DMA, PIO, PCI */ - 6, 6, 6 /* TMR */ -}; - /* * CP0_STATUS is a thread's resource (saved/restored on context switch). * So disable_irq/enable_irq MUST handle IOC/IRC registers. @@ -103,26 +92,18 @@ static int jmr3927_irq_dispatch(int pending) return irq; } -#ifdef CONFIG_PCI -static irqreturn_t jmr3927_pcierr_interrupt(int irq, void *dev_id) -{ - printk(KERN_WARNING "PCI error interrupt (irq 0x%x).\n", irq); - printk(KERN_WARNING "pcistat:%02x, lbstat:%04lx\n", - tx3927_pcicptr->pcistat, tx3927_pcicptr->lbstat); - - return IRQ_HANDLED; -} -static struct irqaction pcierr_action = { - .handler = jmr3927_pcierr_interrupt, - .mask = CPU_MASK_NONE, - .name = "PCI error", +static struct irq_chip jmr3927_irq_ioc = { + .name = "jmr3927_ioc", + .ack = mask_irq_ioc, + .mask = mask_irq_ioc, + .mask_ack = mask_irq_ioc, + .unmask = unmask_irq_ioc, }; -#endif - -static void __init jmr3927_irq_init(void); void __init jmr3927_irq_setup(void) { + int i; + txx9_irq_dispatch = jmr3927_irq_dispatch; /* Now, interrupt control disabled, */ /* all IRC interrupts are masked, */ @@ -138,34 +119,10 @@ void __init jmr3927_irq_setup(void) /* clear PCI Reset interrupts */ jmr3927_ioc_reg_out(0, JMR3927_IOC_RESET_ADDR); - jmr3927_irq_init(); + tx3927_irq_init(); + for (i = JMR3927_IRQ_IOC; i < JMR3927_IRQ_IOC + JMR3927_NR_IRQ_IOC; i++) + set_irq_chip_and_handler(i, &jmr3927_irq_ioc, handle_level_irq); /* setup IOC interrupt 1 (PCI, MODEM) */ set_irq_chained_handler(JMR3927_IRQ_IOCINT, handle_simple_irq); - -#ifdef CONFIG_PCI - setup_irq(JMR3927_IRQ_IRC_PCI, &pcierr_action); -#endif - - /* enable all CPU interrupt bits. */ - set_c0_status(ST0_IM); /* IE bit is still 0. */ -} - -static struct irq_chip jmr3927_irq_ioc = { - .name = "jmr3927_ioc", - .ack = mask_irq_ioc, - .mask = mask_irq_ioc, - .mask_ack = mask_irq_ioc, - .unmask = unmask_irq_ioc, -}; - -static void __init jmr3927_irq_init(void) -{ - u32 i; - - txx9_irq_init(TX3927_IRC_REG); - for (i = 0; i < TXx9_MAX_IR; i++) - txx9_irq_set_pri(i, irc_level[i]); - for (i = JMR3927_IRQ_IOC; i < JMR3927_IRQ_IOC + JMR3927_NR_IRQ_IOC; i++) - set_irq_chip_and_handler(i, &jmr3927_irq_ioc, handle_level_irq); } diff --git a/arch/mips/txx9/jmr3927/kgdb_io.c b/arch/mips/txx9/jmr3927/kgdb_io.c deleted file mode 100644 index 5bd757e56f7..00000000000 --- a/arch/mips/txx9/jmr3927/kgdb_io.c +++ /dev/null @@ -1,105 +0,0 @@ -/* - * BRIEF MODULE DESCRIPTION - * Low level uart routines to directly access a TX[34]927 SIO. - * - * Copyright 2001 MontaVista Software Inc. - * Author: MontaVista Software, Inc. - * ahennessy@mvista.com or source@mvista.com - * - * Based on arch/mips/ddb5xxx/ddb5477/kgdb_io.c - * - * Copyright (C) 2000-2001 Toshiba Corporation - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN - * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include <asm/txx9/jmr3927.h> - -#define TIMEOUT 0xffffff - -static int remoteDebugInitialized = 0; -static void debugInit(int baud); - -int putDebugChar(unsigned char c) -{ - int i = 0; - - if (!remoteDebugInitialized) { - remoteDebugInitialized = 1; - debugInit(38400); - } - - do { - slow_down(); - i++; - if (i>TIMEOUT) { - break; - } - } while (!(tx3927_sioptr(0)->cisr & TXx927_SICISR_TXALS)); - tx3927_sioptr(0)->tfifo = c; - - return 1; -} - -unsigned char getDebugChar(void) -{ - int i = 0; - int dicr; - char c; - - if (!remoteDebugInitialized) { - remoteDebugInitialized = 1; - debugInit(38400); - } - - /* diable RX int. */ - dicr = tx3927_sioptr(0)->dicr; - tx3927_sioptr(0)->dicr = 0; - - do { - slow_down(); - i++; - if (i>TIMEOUT) { - break; - } - } while (tx3927_sioptr(0)->disr & TXx927_SIDISR_UVALID) - ; - c = tx3927_sioptr(0)->rfifo; - - /* clear RX int. status */ - tx3927_sioptr(0)->disr &= ~TXx927_SIDISR_RDIS; - /* enable RX int. */ - tx3927_sioptr(0)->dicr = dicr; - - return c; -} - -static void debugInit(int baud) -{ - tx3927_sioptr(0)->lcr = 0x020; - tx3927_sioptr(0)->dicr = 0; - tx3927_sioptr(0)->disr = 0x4100; - tx3927_sioptr(0)->cisr = 0x014; - tx3927_sioptr(0)->fcr = 0; - tx3927_sioptr(0)->flcr = 0x02; - tx3927_sioptr(0)->bgr = ((JMR3927_BASE_BAUD + baud / 2) / baud) | - TXx927_SIBGR_BCLK_T0; -} diff --git a/arch/mips/txx9/jmr3927/prom.c b/arch/mips/txx9/jmr3927/prom.c index 2cadb423fac..70c4c8ec3e8 100644 --- a/arch/mips/txx9/jmr3927/prom.c +++ b/arch/mips/txx9/jmr3927/prom.c @@ -36,41 +36,18 @@ * 675 Mass Ave, Cambridge, MA 02139, USA. */ #include <linux/init.h> +#include <linux/kernel.h> #include <asm/bootinfo.h> #include <asm/txx9/generic.h> #include <asm/txx9/jmr3927.h> -#define TIMEOUT 0xffffff - -void -prom_putchar(char c) -{ - int i = 0; - - do { - i++; - if (i>TIMEOUT) - break; - } while (!(tx3927_sioptr(1)->cisr & TXx927_SICISR_TXALS)); - tx3927_sioptr(1)->tfifo = c; - return; -} - -void -puts(const char *cp) -{ - while (*cp) - prom_putchar(*cp++); - prom_putchar('\r'); - prom_putchar('\n'); -} - void __init jmr3927_prom_init(void) { /* CCFG */ if ((tx3927_ccfgptr->ccfg & TX3927_CCFG_TLBOFF) == 0) - puts("Warning: TX3927 TLB off\n"); + printk(KERN_ERR "TX3927 TLB off\n"); prom_init_cmdline(); add_memory_region(0, JMR3927_SDRAM_SIZE, BOOT_MEM_RAM); + txx9_sio_putchar_init(TX3927_SIO_REG(1)); } diff --git a/arch/mips/txx9/jmr3927/setup.c b/arch/mips/txx9/jmr3927/setup.c index 03647ebe413..87db41be8a5 100644 --- a/arch/mips/txx9/jmr3927/setup.c +++ b/arch/mips/txx9/jmr3927/setup.c @@ -32,27 +32,18 @@ #include <linux/types.h> #include <linux/ioport.h> #include <linux/delay.h> -#include <linux/pm.h> #include <linux/platform_device.h> #include <linux/gpio.h> -#ifdef CONFIG_SERIAL_TXX9 -#include <linux/serial_core.h> -#endif -#include <asm/txx9tmr.h> -#include <asm/txx9pio.h> #include <asm/reboot.h> +#include <asm/txx9pio.h> #include <asm/txx9/generic.h> #include <asm/txx9/pci.h> #include <asm/txx9/jmr3927.h> #include <asm/mipsregs.h> -extern void puts(const char *cp); - -/* don't enable - see errata */ -static int jmr3927_ccfg_toeon; - -static inline void do_reset(void) +static void jmr3927_machine_restart(char *command) { + local_irq_disable(); #if 1 /* Resetting PCI bus */ jmr3927_ioc_reg_out(0, JMR3927_IOC_RESET_ADDR); jmr3927_ioc_reg_out(JMR3927_IOC_RESET_PCI, JMR3927_IOC_RESET_ADDR); @@ -61,33 +52,13 @@ static inline void do_reset(void) jmr3927_ioc_reg_out(0, JMR3927_IOC_RESET_ADDR); #endif jmr3927_ioc_reg_out(JMR3927_IOC_RESET_CPU, JMR3927_IOC_RESET_ADDR); -} - -static void jmr3927_machine_restart(char *command) -{ - local_irq_disable(); - puts("Rebooting..."); - do_reset(); -} - -static void jmr3927_machine_halt(void) -{ - puts("JMR-TX3927 halted.\n"); - while (1); -} - -static void jmr3927_machine_power_off(void) -{ - puts("JMR-TX3927 halted. Please turn off the power.\n"); - while (1); + /* fallback */ + (*_machine_halt)(); } static void __init jmr3927_time_init(void) { - txx9_clockevent_init(TX3927_TMR_REG(0), - TXX9_IRQ_BASE + JMR3927_IRQ_IRC_TMR(0), - JMR3927_IMCLK); - txx9_clocksource_init(TX3927_TMR_REG(1), JMR3927_IMCLK); + tx3927_time_init(0, 1); } #define DO_WRITE_THROUGH @@ -102,11 +73,6 @@ static void __init jmr3927_mem_setup(void) set_io_port_base(JMR3927_PORT_BASE + JMR3927_PCIIO); _machine_restart = jmr3927_machine_restart; - _machine_halt = jmr3927_machine_halt; - pm_power_off = jmr3927_machine_power_off; - - /* Reboot on panic */ - panic_timeout = 180; /* cache setup */ { @@ -125,7 +91,8 @@ static void __init jmr3927_mem_setup(void) #endif conf = read_c0_conf(); - conf &= ~(TX39_CONF_ICE | TX39_CONF_DCE | TX39_CONF_WBON | TX39_CONF_CWFON); + conf &= ~(TX39_CONF_ICE | TX39_CONF_DCE | + TX39_CONF_WBON | TX39_CONF_CWFON); conf |= mips_ic_disable ? 0 : TX39_CONF_ICE; conf |= mips_dc_disable ? 0 : TX39_CONF_DCE; conf |= mips_config_wbon ? TX39_CONF_WBON : 0; @@ -138,47 +105,14 @@ static void __init jmr3927_mem_setup(void) /* initialize board */ jmr3927_board_init(); - argptr = prom_getcmdline(); - - if ((argptr = strstr(argptr, "toeon")) != NULL) - jmr3927_ccfg_toeon = 1; - argptr = prom_getcmdline(); - if ((argptr = strstr(argptr, "ip=")) == NULL) { - argptr = prom_getcmdline(); - strcat(argptr, " ip=bootp"); - } - -#ifdef CONFIG_SERIAL_TXX9 - { - extern int early_serial_txx9_setup(struct uart_port *port); - int i; - struct uart_port req; - for(i = 0; i < 2; i++) { - memset(&req, 0, sizeof(req)); - req.line = i; - req.iotype = UPIO_MEM; - req.membase = (unsigned char __iomem *)TX3927_SIO_REG(i); - req.mapbase = TX3927_SIO_REG(i); - req.irq = i == 0 ? - JMR3927_IRQ_IRC_SIO0 : JMR3927_IRQ_IRC_SIO1; - if (i == 0) - req.flags |= UPF_BUGGY_UART /*HAVE_CTS_LINE*/; - req.uartclk = JMR3927_IMCLK; - early_serial_txx9_setup(&req); - } - } + tx3927_sio_init(0, 1 << 1); /* ch1: noCTS */ #ifdef CONFIG_SERIAL_TXX9_CONSOLE argptr = prom_getcmdline(); - if ((argptr = strstr(argptr, "console=")) == NULL) { - argptr = prom_getcmdline(); + if (!strstr(argptr, "console=")) strcat(argptr, " console=ttyS1,115200"); - } -#endif #endif } -static void tx3927_setup(void); - static void __init jmr3927_pci_setup(void) { #ifdef CONFIG_PCI @@ -199,32 +133,13 @@ static void __init jmr3927_pci_setup(void) jmr3927_ioc_reg_out(0, JMR3927_IOC_RESET_ADDR); } tx3927_pcic_setup(c, JMR3927_SDRAM_SIZE, extarb); + tx3927_setup_pcierr_irq(); #endif /* CONFIG_PCI */ } static void __init jmr3927_board_init(void) { - tx3927_setup(); - jmr3927_pci_setup(); - - /* SIO0 DTR on */ - jmr3927_ioc_reg_out(0, JMR3927_IOC_DTR_ADDR); - - jmr3927_led_set(0); - - printk("JMR-TX3927 (Rev %d) --- IOC(Rev %d) DIPSW:%d,%d,%d,%d\n", - jmr3927_ioc_reg_in(JMR3927_IOC_BREV_ADDR) & JMR3927_REV_MASK, - jmr3927_ioc_reg_in(JMR3927_IOC_REV_ADDR) & JMR3927_REV_MASK, - jmr3927_dipsw1(), jmr3927_dipsw2(), - jmr3927_dipsw3(), jmr3927_dipsw4()); -} - -static void __init tx3927_setup(void) -{ - int i; - txx9_cpu_clock = JMR3927_CORECLK; - txx9_gbus_clock = JMR3927_GBUSCLK; /* SDRAMC are configured by PROM */ /* ROMC */ @@ -233,74 +148,32 @@ static void __init tx3927_setup(void) tx3927_romcptr->cr[3] = JMR3927_ROMCE3 | 0x0003f698; tx3927_romcptr->cr[5] = JMR3927_ROMCE5 | 0x0000f218; - /* CCFG */ - /* enable Timeout BusError */ - if (jmr3927_ccfg_toeon) - tx3927_ccfgptr->ccfg |= TX3927_CCFG_TOE; - - /* clear BusErrorOnWrite flag */ - tx3927_ccfgptr->ccfg &= ~TX3927_CCFG_BEOW; - /* Disable PCI snoop */ - tx3927_ccfgptr->ccfg &= ~TX3927_CCFG_PSNP; - /* do reset on watchdog */ - tx3927_ccfgptr->ccfg |= TX3927_CCFG_WR; - -#ifdef DO_WRITE_THROUGH - /* Enable PCI SNOOP - with write through only */ - tx3927_ccfgptr->ccfg |= TX3927_CCFG_PSNP; -#endif - /* Pin selection */ tx3927_ccfgptr->pcfg &= ~TX3927_PCFG_SELALL; tx3927_ccfgptr->pcfg |= TX3927_PCFG_SELSIOC(0) | TX3927_PCFG_SELSIO_ALL | (TX3927_PCFG_SELDMA_ALL & ~TX3927_PCFG_SELDMA(1)); - printk("TX3927 -- CRIR:%08lx CCFG:%08lx PCFG:%08lx\n", - tx3927_ccfgptr->crir, - tx3927_ccfgptr->ccfg, tx3927_ccfgptr->pcfg); - - /* TMR */ - for (i = 0; i < TX3927_NR_TMR; i++) - txx9_tmr_init(TX3927_TMR_REG(i)); - - /* DMA */ - tx3927_dmaptr->mcr = 0; - for (i = 0; i < ARRAY_SIZE(tx3927_dmaptr->ch); i++) { - /* reset channel */ - tx3927_dmaptr->ch[i].ccr = TX3927_DMA_CCR_CHRST; - tx3927_dmaptr->ch[i].ccr = 0; - } - /* enable DMA */ -#ifdef __BIG_ENDIAN - tx3927_dmaptr->mcr = TX3927_DMA_MCR_MSTEN; -#else - tx3927_dmaptr->mcr = TX3927_DMA_MCR_MSTEN | TX3927_DMA_MCR_LE; -#endif + tx3927_setup(); - /* PIO */ /* PIO[15:12] connected to LEDs */ __raw_writel(0x0000f000, &tx3927_pioptr->dir); - __raw_writel(0, &tx3927_pioptr->maskcpu); - __raw_writel(0, &tx3927_pioptr->maskext); - txx9_gpio_init(TX3927_PIO_REG, 0, 16); gpio_request(11, "dipsw1"); gpio_request(10, "dipsw2"); - { - unsigned int conf; - conf = read_c0_conf(); - if (!(conf & TX39_CONF_ICE)) - printk("TX3927 I-Cache disabled.\n"); - if (!(conf & TX39_CONF_DCE)) - printk("TX3927 D-Cache disabled.\n"); - else if (!(conf & TX39_CONF_WBON)) - printk("TX3927 D-Cache WriteThrough.\n"); - else if (!(conf & TX39_CONF_CWFON)) - printk("TX3927 D-Cache WriteBack.\n"); - else - printk("TX3927 D-Cache WriteBack (CWF) .\n"); - } + jmr3927_pci_setup(); + + /* SIO0 DTR on */ + jmr3927_ioc_reg_out(0, JMR3927_IOC_DTR_ADDR); + + jmr3927_led_set(0); + + printk(KERN_INFO + "JMR-TX3927 (Rev %d) --- IOC(Rev %d) DIPSW:%d,%d,%d,%d\n", + jmr3927_ioc_reg_in(JMR3927_IOC_BREV_ADDR) & JMR3927_REV_MASK, + jmr3927_ioc_reg_in(JMR3927_IOC_REV_ADDR) & JMR3927_REV_MASK, + jmr3927_dipsw1(), jmr3927_dipsw2(), + jmr3927_dipsw3(), jmr3927_dipsw4()); } /* This trick makes rtc-ds1742 driver usable as is. */ @@ -316,42 +189,21 @@ static unsigned long jmr3927_swizzle_addr_b(unsigned long port) #endif } -static int __init jmr3927_rtc_init(void) +static void __init jmr3927_rtc_init(void) { static struct resource __initdata res = { .start = JMR3927_IOC_NVRAMB_ADDR - IO_BASE, .end = JMR3927_IOC_NVRAMB_ADDR - IO_BASE + 0x800 - 1, .flags = IORESOURCE_MEM, }; - struct platform_device *dev; - dev = platform_device_register_simple("rtc-ds1742", -1, &res, 1); - return IS_ERR(dev) ? PTR_ERR(dev) : 0; -} - -/* Watchdog support */ - -static int __init txx9_wdt_init(unsigned long base) -{ - struct resource res = { - .start = base, - .end = base + 0x100 - 1, - .flags = IORESOURCE_MEM, - }; - struct platform_device *dev = - platform_device_register_simple("txx9wdt", -1, &res, 1); - return IS_ERR(dev) ? PTR_ERR(dev) : 0; -} - -static int __init jmr3927_wdt_init(void) -{ - return txx9_wdt_init(TX3927_TMR_REG(2)); + platform_device_register_simple("rtc-ds1742", -1, &res, 1); } static void __init jmr3927_device_init(void) { __swizzle_addr_b = jmr3927_swizzle_addr_b; jmr3927_rtc_init(); - jmr3927_wdt_init(); + tx3927_wdt_init(); } struct txx9_board_vec jmr3927_vec __initdata = { diff --git a/arch/mips/txx9/rbtx4927/irq.c b/arch/mips/txx9/rbtx4927/irq.c index cd748a93032..00cd5231da3 100644 --- a/arch/mips/txx9/rbtx4927/irq.c +++ b/arch/mips/txx9/rbtx4927/irq.c @@ -27,85 +27,86 @@ * 675 Mass Ave, Cambridge, MA 02139, USA. */ /* -IRQ Device -00 RBTX4927-ISA/00 -01 RBTX4927-ISA/01 PS2/Keyboard -02 RBTX4927-ISA/02 Cascade RBTX4927-ISA (irqs 8-15) -03 RBTX4927-ISA/03 -04 RBTX4927-ISA/04 -05 RBTX4927-ISA/05 -06 RBTX4927-ISA/06 -07 RBTX4927-ISA/07 -08 RBTX4927-ISA/08 -09 RBTX4927-ISA/09 -10 RBTX4927-ISA/10 -11 RBTX4927-ISA/11 -12 RBTX4927-ISA/12 PS2/Mouse (not supported at this time) -13 RBTX4927-ISA/13 -14 RBTX4927-ISA/14 IDE -15 RBTX4927-ISA/15 - -16 TX4927-CP0/00 Software 0 -17 TX4927-CP0/01 Software 1 -18 TX4927-CP0/02 Cascade TX4927-CP0 -19 TX4927-CP0/03 Multiplexed -- do not use -20 TX4927-CP0/04 Multiplexed -- do not use -21 TX4927-CP0/05 Multiplexed -- do not use -22 TX4927-CP0/06 Multiplexed -- do not use -23 TX4927-CP0/07 CPU TIMER - -24 TX4927-PIC/00 -25 TX4927-PIC/01 -26 TX4927-PIC/02 -27 TX4927-PIC/03 Cascade RBTX4927-IOC -28 TX4927-PIC/04 -29 TX4927-PIC/05 RBTX4927 RTL-8019AS ethernet -30 TX4927-PIC/06 -31 TX4927-PIC/07 -32 TX4927-PIC/08 TX4927 SerialIO Channel 0 -33 TX4927-PIC/09 TX4927 SerialIO Channel 1 -34 TX4927-PIC/10 -35 TX4927-PIC/11 -36 TX4927-PIC/12 -37 TX4927-PIC/13 -38 TX4927-PIC/14 -39 TX4927-PIC/15 -40 TX4927-PIC/16 TX4927 PCI PCI-C -41 TX4927-PIC/17 -42 TX4927-PIC/18 -43 TX4927-PIC/19 -44 TX4927-PIC/20 -45 TX4927-PIC/21 -46 TX4927-PIC/22 TX4927 PCI PCI-ERR -47 TX4927-PIC/23 TX4927 PCI PCI-PMA (not used) -48 TX4927-PIC/24 -49 TX4927-PIC/25 -50 TX4927-PIC/26 -51 TX4927-PIC/27 -52 TX4927-PIC/28 -53 TX4927-PIC/29 -54 TX4927-PIC/30 -55 TX4927-PIC/31 - -56 RBTX4927-IOC/00 FPCIB0 PCI-D PJ4/A PJ5/B SB/C PJ6/D PJ7/A (SouthBridge/NotUsed) [RTL-8139=PJ4] -57 RBTX4927-IOC/01 FPCIB0 PCI-C PJ4/D PJ5/A SB/B PJ6/C PJ7/D (SouthBridge/NotUsed) [RTL-8139=PJ5] -58 RBTX4927-IOC/02 FPCIB0 PCI-B PJ4/C PJ5/D SB/A PJ6/B PJ7/C (SouthBridge/IDE/pin=1,INTR) [RTL-8139=NotSupported] -59 RBTX4927-IOC/03 FPCIB0 PCI-A PJ4/B PJ5/C SB/D PJ6/A PJ7/B (SouthBridge/USB/pin=4) [RTL-8139=PJ6] -60 RBTX4927-IOC/04 -61 RBTX4927-IOC/05 -62 RBTX4927-IOC/06 -63 RBTX4927-IOC/07 - -NOTES: -SouthBridge/INTR is mapped to SouthBridge/A=PCI-B/#58 -SouthBridge/ISA/pin=0 no pci irq used by this device -SouthBridge/IDE/pin=1 no pci irq used by this device, using INTR via ISA IRQ14 -SouthBridge/USB/pin=4 using pci irq SouthBridge/D=PCI-A=#59 -SouthBridge/PMC/pin=0 no pci irq used by this device -SuperIO/PS2/Keyboard, using INTR via ISA IRQ1 -SuperIO/PS2/Mouse, using INTR via ISA IRQ12 (mouse not currently supported) -JP7 is not bus master -- do NOT use -- only 4 pci bus master's allowed -- SouthBridge, JP4, JP5, JP6 -*/ + * I8259A_IRQ_BASE+00 + * I8259A_IRQ_BASE+01 PS2/Keyboard + * I8259A_IRQ_BASE+02 Cascade RBTX4927-ISA (irqs 8-15) + * I8259A_IRQ_BASE+03 + * I8259A_IRQ_BASE+04 + * I8259A_IRQ_BASE+05 + * I8259A_IRQ_BASE+06 + * I8259A_IRQ_BASE+07 + * I8259A_IRQ_BASE+08 + * I8259A_IRQ_BASE+09 + * I8259A_IRQ_BASE+10 + * I8259A_IRQ_BASE+11 + * I8259A_IRQ_BASE+12 PS2/Mouse (not supported at this time) + * I8259A_IRQ_BASE+13 + * I8259A_IRQ_BASE+14 IDE + * I8259A_IRQ_BASE+15 + * + * MIPS_CPU_IRQ_BASE+00 Software 0 + * MIPS_CPU_IRQ_BASE+01 Software 1 + * MIPS_CPU_IRQ_BASE+02 Cascade TX4927-CP0 + * MIPS_CPU_IRQ_BASE+03 Multiplexed -- do not use + * MIPS_CPU_IRQ_BASE+04 Multiplexed -- do not use + * MIPS_CPU_IRQ_BASE+05 Multiplexed -- do not use + * MIPS_CPU_IRQ_BASE+06 Multiplexed -- do not use + * MIPS_CPU_IRQ_BASE+07 CPU TIMER + * + * TXX9_IRQ_BASE+00 + * TXX9_IRQ_BASE+01 + * TXX9_IRQ_BASE+02 + * TXX9_IRQ_BASE+03 Cascade RBTX4927-IOC + * TXX9_IRQ_BASE+04 + * TXX9_IRQ_BASE+05 RBTX4927 RTL-8019AS ethernet + * TXX9_IRQ_BASE+06 + * TXX9_IRQ_BASE+07 + * TXX9_IRQ_BASE+08 TX4927 SerialIO Channel 0 + * TXX9_IRQ_BASE+09 TX4927 SerialIO Channel 1 + * TXX9_IRQ_BASE+10 + * TXX9_IRQ_BASE+11 + * TXX9_IRQ_BASE+12 + * TXX9_IRQ_BASE+13 + * TXX9_IRQ_BASE+14 + * TXX9_IRQ_BASE+15 + * TXX9_IRQ_BASE+16 TX4927 PCI PCI-C + * TXX9_IRQ_BASE+17 + * TXX9_IRQ_BASE+18 + * TXX9_IRQ_BASE+19 + * TXX9_IRQ_BASE+20 + * TXX9_IRQ_BASE+21 + * TXX9_IRQ_BASE+22 TX4927 PCI PCI-ERR + * TXX9_IRQ_BASE+23 TX4927 PCI PCI-PMA (not used) + * TXX9_IRQ_BASE+24 + * TXX9_IRQ_BASE+25 + * TXX9_IRQ_BASE+26 + * TXX9_IRQ_BASE+27 + * TXX9_IRQ_BASE+28 + * TXX9_IRQ_BASE+29 + * TXX9_IRQ_BASE+30 + * TXX9_IRQ_BASE+31 + * + * RBTX4927_IRQ_IOC+00 FPCIB0 PCI-D (SouthBridge) + * RBTX4927_IRQ_IOC+01 FPCIB0 PCI-C (SouthBridge) + * RBTX4927_IRQ_IOC+02 FPCIB0 PCI-B (SouthBridge/IDE/pin=1,INTR) + * RBTX4927_IRQ_IOC+03 FPCIB0 PCI-A (SouthBridge/USB/pin=4) + * RBTX4927_IRQ_IOC+04 + * RBTX4927_IRQ_IOC+05 + * RBTX4927_IRQ_IOC+06 + * RBTX4927_IRQ_IOC+07 + * + * NOTES: + * SouthBridge/INTR is mapped to SouthBridge/A=PCI-B/#58 + * SouthBridge/ISA/pin=0 no pci irq used by this device + * SouthBridge/IDE/pin=1 no pci irq used by this device, using INTR + * via ISA IRQ14 + * SouthBridge/USB/pin=4 using pci irq SouthBridge/D=PCI-A=#59 + * SouthBridge/PMC/pin=0 no pci irq used by this device + * SuperIO/PS2/Keyboard, using INTR via ISA IRQ1 + * SuperIO/PS2/Mouse, using INTR via ISA IRQ12 (mouse not currently supported) + * JP7 is not bus master -- do NOT use -- only 4 pci bus master's + * allowed -- SouthBridge, JP4, JP5, JP6 + */ #include <linux/init.h> #include <linux/types.h> @@ -134,7 +135,7 @@ static int toshiba_rbtx4927_irq_nested(int sw_irq) level3 = readb(rbtx4927_imstat_addr) & 0x1f; if (level3) sw_irq = RBTX4927_IRQ_IOC + fls(level3) - 1; - return (sw_irq); + return sw_irq; } static void __init toshiba_rbtx4927_irq_ioc_init(void) diff --git a/arch/mips/txx9/rbtx4927/prom.c b/arch/mips/txx9/rbtx4927/prom.c index 5c0de54ebdd..1dc0a5b1956 100644 --- a/arch/mips/txx9/rbtx4927/prom.c +++ b/arch/mips/txx9/rbtx4927/prom.c @@ -38,4 +38,5 @@ void __init rbtx4927_prom_init(void) { prom_init_cmdline(); add_memory_region(0, tx4927_get_mem_size(), BOOT_MEM_RAM); + txx9_sio_putchar_init(TX4927_SIO_REG(0) & 0xfffffffffULL); } diff --git a/arch/mips/txx9/rbtx4927/setup.c b/arch/mips/txx9/rbtx4927/setup.c index 3da20ea3e55..0d39bafea79 100644 --- a/arch/mips/txx9/rbtx4927/setup.c +++ b/arch/mips/txx9/rbtx4927/setup.c @@ -46,12 +46,9 @@ #include <linux/kernel.h> #include <linux/types.h> #include <linux/ioport.h> -#include <linux/interrupt.h> -#include <linux/pm.h> #include <linux/platform_device.h> #include <linux/delay.h> #include <asm/io.h> -#include <asm/processor.h> #include <asm/reboot.h> #include <asm/txx9/generic.h> #include <asm/txx9/pci.h> @@ -103,6 +100,7 @@ static void __init tx4927_pci_setup(void) tx4927_report_pciclk(); tx4927_pcic_setup(tx4927_pcicptr, c, extarb); } + tx4927_setup_pcierr_irq(); } static void __init tx4937_pci_setup(void) @@ -149,6 +147,7 @@ static void __init tx4937_pci_setup(void) tx4938_report_pciclk(); tx4927_pcic_setup(tx4938_pcicptr, c, extarb); } + tx4938_setup_pcierr_irq(); } static void __init rbtx4927_arch_init(void) @@ -165,17 +164,8 @@ static void __init rbtx4937_arch_init(void) #define rbtx4937_arch_init NULL #endif /* CONFIG_PCI */ -static void __noreturn wait_forever(void) -{ - while (1) - if (cpu_wait) - (*cpu_wait)(); -} - static void toshiba_rbtx4927_restart(char *command) { - printk(KERN_NOTICE "System Rebooting...\n"); - /* enable the s/w reset register */ writeb(1, rbtx4927_softresetlock_addr); @@ -186,24 +176,8 @@ static void toshiba_rbtx4927_restart(char *command) /* do a s/w reset */ writeb(1, rbtx4927_softreset_addr); - /* do something passive while waiting for reset */ - local_irq_disable(); - wait_forever(); - /* no return */ -} - -static void toshiba_rbtx4927_halt(void) -{ - printk(KERN_NOTICE "System Halted\n"); - local_irq_disable(); - wait_forever(); - /* no return */ -} - -static void toshiba_rbtx4927_power_off(void) -{ - toshiba_rbtx4927_halt(); - /* no return */ + /* fallback */ + (*_machine_halt)(); } static void __init rbtx4927_clock_init(void); @@ -214,9 +188,6 @@ static void __init rbtx4927_mem_setup(void) u32 cp0_config; char *argptr; - /* f/w leaves this on at startup */ - clear_c0_status(ST0_ERL); - /* enable caches -- HCP5 does this, pmon does not */ cp0_config = read_c0_config(); cp0_config = cp0_config & ~(TX49_CONF_IC | TX49_CONF_DC); @@ -231,37 +202,21 @@ static void __init rbtx4927_mem_setup(void) } _machine_restart = toshiba_rbtx4927_restart; - _machine_halt = toshiba_rbtx4927_halt; - pm_power_off = toshiba_rbtx4927_power_off; #ifdef CONFIG_PCI txx9_alloc_pci_controller(&txx9_primary_pcic, RBTX4927_PCIMEM, RBTX4927_PCIMEM_SIZE, RBTX4927_PCIIO, RBTX4927_PCIIO_SIZE); + txx9_board_pcibios_setup = tx4927_pcibios_setup; #else set_io_port_base(KSEG1 + RBTX4927_ISA_IO_OFFSET); #endif - tx4927_setup_serial(); + tx4927_sio_init(0, 0); #ifdef CONFIG_SERIAL_TXX9_CONSOLE - argptr = prom_getcmdline(); - if (strstr(argptr, "console=") == NULL) { - strcat(argptr, " console=ttyS0,38400"); - } -#endif - -#ifdef CONFIG_ROOT_NFS - argptr = prom_getcmdline(); - if (strstr(argptr, "root=") == NULL) { - strcat(argptr, " root=/dev/nfs rw"); - } -#endif - -#ifdef CONFIG_IP_PNP - argptr = prom_getcmdline(); - if (strstr(argptr, "ip=") == NULL) { - strcat(argptr, " ip=any"); - } + argptr = prom_getcmdline(); + if (!strstr(argptr, "console=")) + strcat(argptr, " console=ttyS0,38400"); #endif } @@ -324,19 +279,17 @@ static void __init rbtx4927_time_init(void) tx4927_time_init(0); } -static int __init toshiba_rbtx4927_rtc_init(void) +static void __init toshiba_rbtx4927_rtc_init(void) { struct resource res = { .start = RBTX4927_BRAMRTC_BASE - IO_BASE, .end = RBTX4927_BRAMRTC_BASE - IO_BASE + 0x800 - 1, .flags = IORESOURCE_MEM, }; - struct platform_device *dev = - platform_device_register_simple("rtc-ds1742", -1, &res, 1); - return IS_ERR(dev) ? PTR_ERR(dev) : 0; + platform_device_register_simple("rtc-ds1742", -1, &res, 1); } -static int __init rbtx4927_ne_init(void) +static void __init rbtx4927_ne_init(void) { struct resource res[] = { { @@ -348,36 +301,14 @@ static int __init rbtx4927_ne_init(void) .flags = IORESOURCE_IRQ, } }; - struct platform_device *dev = - platform_device_register_simple("ne", -1, - res, ARRAY_SIZE(res)); - return IS_ERR(dev) ? PTR_ERR(dev) : 0; -} - -/* Watchdog support */ - -static int __init txx9_wdt_init(unsigned long base) -{ - struct resource res = { - .start = base, - .end = base + 0x100 - 1, - .flags = IORESOURCE_MEM, - }; - struct platform_device *dev = - platform_device_register_simple("txx9wdt", -1, &res, 1); - return IS_ERR(dev) ? PTR_ERR(dev) : 0; -} - -static int __init rbtx4927_wdt_init(void) -{ - return txx9_wdt_init(TX4927_TMR_REG(2) & 0xfffffffffULL); + platform_device_register_simple("ne", -1, res, ARRAY_SIZE(res)); } static void __init rbtx4927_device_init(void) { toshiba_rbtx4927_rtc_init(); rbtx4927_ne_init(); - rbtx4927_wdt_init(); + tx4927_wdt_init(); } struct txx9_board_vec rbtx4927_vec __initdata = { diff --git a/arch/mips/txx9/rbtx4938/irq.c b/arch/mips/txx9/rbtx4938/irq.c index 3971a061657..ca2f8306ce9 100644 --- a/arch/mips/txx9/rbtx4938/irq.c +++ b/arch/mips/txx9/rbtx4938/irq.c @@ -11,59 +11,57 @@ */ /* -IRQ Device - -16 TX4938-CP0/00 Software 0 -17 TX4938-CP0/01 Software 1 -18 TX4938-CP0/02 Cascade TX4938-CP0 -19 TX4938-CP0/03 Multiplexed -- do not use -20 TX4938-CP0/04 Multiplexed -- do not use -21 TX4938-CP0/05 Multiplexed -- do not use -22 TX4938-CP0/06 Multiplexed -- do not use -23 TX4938-CP0/07 CPU TIMER - -24 TX4938-PIC/00 -25 TX4938-PIC/01 -26 TX4938-PIC/02 Cascade RBTX4938-IOC -27 TX4938-PIC/03 RBTX4938 RTL-8019AS Ethernet -28 TX4938-PIC/04 -29 TX4938-PIC/05 TX4938 ETH1 -30 TX4938-PIC/06 TX4938 ETH0 -31 TX4938-PIC/07 -32 TX4938-PIC/08 TX4938 SIO 0 -33 TX4938-PIC/09 TX4938 SIO 1 -34 TX4938-PIC/10 TX4938 DMA0 -35 TX4938-PIC/11 TX4938 DMA1 -36 TX4938-PIC/12 TX4938 DMA2 -37 TX4938-PIC/13 TX4938 DMA3 -38 TX4938-PIC/14 -39 TX4938-PIC/15 -40 TX4938-PIC/16 TX4938 PCIC -41 TX4938-PIC/17 TX4938 TMR0 -42 TX4938-PIC/18 TX4938 TMR1 -43 TX4938-PIC/19 TX4938 TMR2 -44 TX4938-PIC/20 -45 TX4938-PIC/21 -46 TX4938-PIC/22 TX4938 PCIERR -47 TX4938-PIC/23 -48 TX4938-PIC/24 -49 TX4938-PIC/25 -50 TX4938-PIC/26 -51 TX4938-PIC/27 -52 TX4938-PIC/28 -53 TX4938-PIC/29 -54 TX4938-PIC/30 -55 TX4938-PIC/31 TX4938 SPI - -56 RBTX4938-IOC/00 PCI-D -57 RBTX4938-IOC/01 PCI-C -58 RBTX4938-IOC/02 PCI-B -59 RBTX4938-IOC/03 PCI-A -60 RBTX4938-IOC/04 RTC -61 RBTX4938-IOC/05 ATA -62 RBTX4938-IOC/06 MODEM -63 RBTX4938-IOC/07 SWINT -*/ + * MIPS_CPU_IRQ_BASE+00 Software 0 + * MIPS_CPU_IRQ_BASE+01 Software 1 + * MIPS_CPU_IRQ_BASE+02 Cascade TX4938-CP0 + * MIPS_CPU_IRQ_BASE+03 Multiplexed -- do not use + * MIPS_CPU_IRQ_BASE+04 Multiplexed -- do not use + * MIPS_CPU_IRQ_BASE+05 Multiplexed -- do not use + * MIPS_CPU_IRQ_BASE+06 Multiplexed -- do not use + * MIPS_CPU_IRQ_BASE+07 CPU TIMER + * + * TXX9_IRQ_BASE+00 + * TXX9_IRQ_BASE+01 + * TXX9_IRQ_BASE+02 Cascade RBTX4938-IOC + * TXX9_IRQ_BASE+03 RBTX4938 RTL-8019AS Ethernet + * TXX9_IRQ_BASE+04 + * TXX9_IRQ_BASE+05 TX4938 ETH1 + * TXX9_IRQ_BASE+06 TX4938 ETH0 + * TXX9_IRQ_BASE+07 + * TXX9_IRQ_BASE+08 TX4938 SIO 0 + * TXX9_IRQ_BASE+09 TX4938 SIO 1 + * TXX9_IRQ_BASE+10 TX4938 DMA0 + * TXX9_IRQ_BASE+11 TX4938 DMA1 + * TXX9_IRQ_BASE+12 TX4938 DMA2 + * TXX9_IRQ_BASE+13 TX4938 DMA3 + * TXX9_IRQ_BASE+14 + * TXX9_IRQ_BASE+15 + * TXX9_IRQ_BASE+16 TX4938 PCIC + * TXX9_IRQ_BASE+17 TX4938 TMR0 + * TXX9_IRQ_BASE+18 TX4938 TMR1 + * TXX9_IRQ_BASE+19 TX4938 TMR2 + * TXX9_IRQ_BASE+20 + * TXX9_IRQ_BASE+21 + * TXX9_IRQ_BASE+22 TX4938 PCIERR + * TXX9_IRQ_BASE+23 + * TXX9_IRQ_BASE+24 + * TXX9_IRQ_BASE+25 + * TXX9_IRQ_BASE+26 + * TXX9_IRQ_BASE+27 + * TXX9_IRQ_BASE+28 + * TXX9_IRQ_BASE+29 + * TXX9_IRQ_BASE+30 + * TXX9_IRQ_BASE+31 TX4938 SPI + * + * RBTX4938_IRQ_IOC+00 PCI-D + * RBTX4938_IRQ_IOC+01 PCI-C + * RBTX4938_IRQ_IOC+02 PCI-B + * RBTX4938_IRQ_IOC+03 PCI-A + * RBTX4938_IRQ_IOC+04 RTC + * RBTX4938_IRQ_IOC+05 ATA + * RBTX4938_IRQ_IOC+06 MODEM + * RBTX4938_IRQ_IOC+07 SWINT + */ #include <linux/init.h> #include <linux/interrupt.h> #include <asm/mipsregs.h> @@ -93,9 +91,6 @@ static int toshiba_rbtx4938_irq_nested(int sw_irq) return sw_irq; } -/**********************************************************************************/ -/* Functions for ioc */ -/**********************************************************************************/ static void __init toshiba_rbtx4938_irq_ioc_init(void) { diff --git a/arch/mips/txx9/rbtx4938/prom.c b/arch/mips/txx9/rbtx4938/prom.c index ee189519ce5..d73123cd2ab 100644 --- a/arch/mips/txx9/rbtx4938/prom.c +++ b/arch/mips/txx9/rbtx4938/prom.c @@ -22,4 +22,5 @@ void __init rbtx4938_prom_init(void) prom_init_cmdline(); #endif add_memory_region(0, tx4938_get_mem_size(), BOOT_MEM_RAM); + txx9_sio_putchar_init(TX4938_SIO_REG(0) & 0xfffffffffULL); } diff --git a/arch/mips/txx9/rbtx4938/setup.c b/arch/mips/txx9/rbtx4938/setup.c index 6c2b99bb8af..9ab48dec0fe 100644 --- a/arch/mips/txx9/rbtx4938/setup.c +++ b/arch/mips/txx9/rbtx4938/setup.c @@ -13,9 +13,6 @@ #include <linux/types.h> #include <linux/ioport.h> #include <linux/delay.h> -#include <linux/interrupt.h> -#include <linux/console.h> -#include <linux/pm.h> #include <linux/platform_device.h> #include <linux/gpio.h> @@ -28,33 +25,14 @@ #include <asm/txx9/spi.h> #include <asm/txx9pio.h> -static void rbtx4938_machine_halt(void) -{ - printk(KERN_NOTICE "System Halted\n"); - local_irq_disable(); - - while (1) - __asm__(".set\tmips3\n\t" - "wait\n\t" - ".set\tmips0"); -} - -static void rbtx4938_machine_power_off(void) -{ - rbtx4938_machine_halt(); - /* no return */ -} - static void rbtx4938_machine_restart(char *command) { local_irq_disable(); - - printk("Rebooting..."); writeb(1, rbtx4938_softresetlock_addr); writeb(1, rbtx4938_sfvol_addr); writeb(1, rbtx4938_softreset_addr); - while(1) - ; + /* fallback */ + (*_machine_halt)(); } static void __init rbtx4938_pci_setup(void) @@ -121,6 +99,7 @@ static void __init rbtx4938_pci_setup(void) register_pci_controller(c); tx4927_pcic_setup(tx4938_pcic1ptr, c, 0); } + tx4938_setup_pcierr_irq(); #endif /* CONFIG_PCI */ } @@ -151,19 +130,7 @@ static int __init rbtx4938_ethaddr_init(void) if (sum) printk(KERN_WARNING "seeprom: bad checksum.\n"); } - for (i = 0; i < 2; i++) { - unsigned int id = - TXX9_IRQ_BASE + (i ? TX4938_IR_ETH1 : TX4938_IR_ETH0); - struct platform_device *pdev; - if (!(__raw_readq(&tx4938_ccfgptr->pcfg) & - (i ? TX4938_PCFG_ETH1_SEL : TX4938_PCFG_ETH0_SEL))) - continue; - pdev = platform_device_alloc("tc35815-mac", id); - if (!pdev || - platform_device_add_data(pdev, &dat[4 + 6 * i], 6) || - platform_device_add(pdev)) - platform_device_put(pdev); - } + tx4938_ethaddr_init(&dat[4], &dat[4 + 6]); #endif /* CONFIG_PCI */ return 0; } @@ -193,51 +160,36 @@ static void __init rbtx4938_mem_setup(void) #ifdef CONFIG_PCI txx9_alloc_pci_controller(&txx9_primary_pcic, 0, 0, 0, 0); + txx9_board_pcibios_setup = tx4927_pcibios_setup; #else set_io_port_base(RBTX4938_ETHER_BASE); #endif - tx4938_setup_serial(); + tx4938_sio_init(7372800, 0); #ifdef CONFIG_SERIAL_TXX9_CONSOLE - argptr = prom_getcmdline(); - if (strstr(argptr, "console=") == NULL) { - strcat(argptr, " console=ttyS0,38400"); - } + argptr = prom_getcmdline(); + if (!strstr(argptr, "console=")) + strcat(argptr, " console=ttyS0,38400"); #endif #ifdef CONFIG_TOSHIBA_RBTX4938_MPLEX_PIO58_61 - printk("PIOSEL: disabling both ata and nand selection\n"); - local_irq_disable(); + printk(KERN_INFO "PIOSEL: disabling both ata and nand selection\n"); txx9_clear64(&tx4938_ccfgptr->pcfg, TX4938_PCFG_NDF_SEL | TX4938_PCFG_ATA_SEL); #endif #ifdef CONFIG_TOSHIBA_RBTX4938_MPLEX_NAND - printk("PIOSEL: enabling nand selection\n"); + printk(KERN_INFO "PIOSEL: enabling nand selection\n"); txx9_set64(&tx4938_ccfgptr->pcfg, TX4938_PCFG_NDF_SEL); txx9_clear64(&tx4938_ccfgptr->pcfg, TX4938_PCFG_ATA_SEL); #endif #ifdef CONFIG_TOSHIBA_RBTX4938_MPLEX_ATA - printk("PIOSEL: enabling ata selection\n"); + printk(KERN_INFO "PIOSEL: enabling ata selection\n"); txx9_set64(&tx4938_ccfgptr->pcfg, TX4938_PCFG_ATA_SEL); txx9_clear64(&tx4938_ccfgptr->pcfg, TX4938_PCFG_NDF_SEL); #endif -#ifdef CONFIG_IP_PNP - argptr = prom_getcmdline(); - if (strstr(argptr, "ip=") == NULL) { - strcat(argptr, " ip=any"); - } -#endif - - -#ifdef CONFIG_FB - { - conswitchp = &dummy_con; - } -#endif - rbtx4938_spi_setup(); pcfg = ____raw_readq(&tx4938_ccfgptr->pcfg); /* updated */ /* fixup piosel */ @@ -258,11 +210,9 @@ static void __init rbtx4938_mem_setup(void) rbtx4938_fpga_resource.end = CPHYSADDR(RBTX4938_FPGA_REG_ADDR) + 0xffff; rbtx4938_fpga_resource.flags = IORESOURCE_MEM | IORESOURCE_BUSY; if (request_resource(&txx9_ce_res[2], &rbtx4938_fpga_resource)) - printk("request resource for fpga failed\n"); + printk(KERN_ERR "request resource for fpga failed\n"); _machine_restart = rbtx4938_machine_restart; - _machine_halt = rbtx4938_machine_halt; - pm_power_off = rbtx4938_machine_power_off; writeb(0xff, rbtx4938_led_addr); printk(KERN_INFO "RBTX4938 --- FPGA(Rev %02x) DIPSW:%02x,%02x\n", @@ -270,7 +220,7 @@ static void __init rbtx4938_mem_setup(void) readb(rbtx4938_dipsw_addr), readb(rbtx4938_bdipsw_addr)); } -static int __init rbtx4938_ne_init(void) +static void __init rbtx4938_ne_init(void) { struct resource res[] = { { @@ -282,10 +232,7 @@ static int __init rbtx4938_ne_init(void) .flags = IORESOURCE_IRQ, } }; - struct platform_device *dev = - platform_device_register_simple("ne", -1, - res, ARRAY_SIZE(res)); - return IS_ERR(dev) ? PTR_ERR(dev) : 0; + platform_device_register_simple("ne", -1, res, ARRAY_SIZE(res)); } static DEFINE_SPINLOCK(rbtx4938_spi_gpio_lock); @@ -321,24 +268,6 @@ static struct gpio_chip rbtx4938_spi_gpio_chip = { .ngpio = 3, }; -/* SPI support */ - -static void __init txx9_spi_init(unsigned long base, int irq) -{ - struct resource res[] = { - { - .start = base, - .end = base + 0x20 - 1, - .flags = IORESOURCE_MEM, - }, { - .start = irq, - .flags = IORESOURCE_IRQ, - }, - }; - platform_device_register_simple("spi_txx9", 0, - res, ARRAY_SIZE(res)); -} - static int __init rbtx4938_spi_init(void) { struct spi_board_info srtc_info = { @@ -361,7 +290,7 @@ static int __init rbtx4938_spi_init(void) gpio_direction_output(16 + SEEPROM2_CS, 1); gpio_request(16 + SEEPROM3_CS, "seeprom3"); gpio_direction_output(16 + SEEPROM3_CS, 1); - txx9_spi_init(TX4938_SPI_REG & 0xfffffffffULL, RBTX4938_IRQ_IRC_SPI); + tx4938_spi_init(0); return 0; } @@ -372,30 +301,11 @@ static void __init rbtx4938_arch_init(void) rbtx4938_spi_init(); } -/* Watchdog support */ - -static int __init txx9_wdt_init(unsigned long base) -{ - struct resource res = { - .start = base, - .end = base + 0x100 - 1, - .flags = IORESOURCE_MEM, - }; - struct platform_device *dev = - platform_device_register_simple("txx9wdt", -1, &res, 1); - return IS_ERR(dev) ? PTR_ERR(dev) : 0; -} - -static int __init rbtx4938_wdt_init(void) -{ - return txx9_wdt_init(TX4938_TMR_REG(2) & 0xfffffffffULL); -} - static void __init rbtx4938_device_init(void) { rbtx4938_ethaddr_init(); rbtx4938_ne_init(); - rbtx4938_wdt_init(); + tx4938_wdt_init(); } struct txx9_board_vec rbtx4938_vec __initdata = { diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 587da5e0990..63c9cafda9c 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -42,6 +42,9 @@ config GENERIC_HARDIRQS bool default y +config HAVE_GET_USER_PAGES_FAST + def_bool PPC64 + config HAVE_SETUP_PER_CPU_AREA def_bool PPC64 diff --git a/arch/powerpc/boot/dts/mpc832x_mds.dts b/arch/powerpc/boot/dts/mpc832x_mds.dts index 7345743d3d9..fbc930410ff 100644 --- a/arch/powerpc/boot/dts/mpc832x_mds.dts +++ b/arch/powerpc/boot/dts/mpc832x_mds.dts @@ -68,6 +68,7 @@ #address-cells = <1>; #size-cells = <1>; device_type = "soc"; + compatible = "simple-bus"; ranges = <0x0 0xe0000000 0x00100000>; reg = <0xe0000000 0x00000200>; bus-frequency = <132000000>; diff --git a/arch/powerpc/boot/dts/mpc832x_rdb.dts b/arch/powerpc/boot/dts/mpc832x_rdb.dts index e74c045a0f8..b157d1885a2 100644 --- a/arch/powerpc/boot/dts/mpc832x_rdb.dts +++ b/arch/powerpc/boot/dts/mpc832x_rdb.dts @@ -51,6 +51,7 @@ #address-cells = <1>; #size-cells = <1>; device_type = "soc"; + compatible = "simple-bus"; ranges = <0x0 0xe0000000 0x00100000>; reg = <0xe0000000 0x00000200>; bus-frequency = <0>; diff --git a/arch/powerpc/boot/dts/mpc8349emitx.dts b/arch/powerpc/boot/dts/mpc8349emitx.dts index 8dfab566258..700e076ef3f 100644 --- a/arch/powerpc/boot/dts/mpc8349emitx.dts +++ b/arch/powerpc/boot/dts/mpc8349emitx.dts @@ -52,6 +52,7 @@ #address-cells = <1>; #size-cells = <1>; device_type = "soc"; + compatible = "simple-bus"; ranges = <0x0 0xe0000000 0x00100000>; reg = <0xe0000000 0x00000200>; bus-frequency = <0>; // from bootloader diff --git a/arch/powerpc/boot/dts/mpc8349emitxgp.dts b/arch/powerpc/boot/dts/mpc8349emitxgp.dts index 49ca3497eef..cdd3063258e 100644 --- a/arch/powerpc/boot/dts/mpc8349emitxgp.dts +++ b/arch/powerpc/boot/dts/mpc8349emitxgp.dts @@ -50,6 +50,7 @@ #address-cells = <1>; #size-cells = <1>; device_type = "soc"; + compatible = "simple-bus"; ranges = <0x0 0xe0000000 0x00100000>; reg = <0xe0000000 0x00000200>; bus-frequency = <0>; // from bootloader diff --git a/arch/powerpc/boot/dts/mpc834x_mds.dts b/arch/powerpc/boot/dts/mpc834x_mds.dts index ba586cb7afb..783241c0024 100644 --- a/arch/powerpc/boot/dts/mpc834x_mds.dts +++ b/arch/powerpc/boot/dts/mpc834x_mds.dts @@ -57,6 +57,7 @@ #address-cells = <1>; #size-cells = <1>; device_type = "soc"; + compatible = "simple-bus"; ranges = <0x0 0xe0000000 0x00100000>; reg = <0xe0000000 0x00000200>; bus-frequency = <0>; diff --git a/arch/powerpc/boot/dts/mpc836x_mds.dts b/arch/powerpc/boot/dts/mpc836x_mds.dts index 3701dae1ee0..a3b76a70995 100644 --- a/arch/powerpc/boot/dts/mpc836x_mds.dts +++ b/arch/powerpc/boot/dts/mpc836x_mds.dts @@ -61,6 +61,7 @@ #address-cells = <1>; #size-cells = <1>; device_type = "soc"; + compatible = "simple-bus"; ranges = <0x0 0xe0000000 0x00100000>; reg = <0xe0000000 0x00000200>; bus-frequency = <264000000>; diff --git a/arch/powerpc/boot/dts/mpc836x_rdk.dts b/arch/powerpc/boot/dts/mpc836x_rdk.dts index 8acd1d6577f..89c9202f8bd 100644 --- a/arch/powerpc/boot/dts/mpc836x_rdk.dts +++ b/arch/powerpc/boot/dts/mpc836x_rdk.dts @@ -149,18 +149,14 @@ }; crypto@30000 { - compatible = "fsl,sec2-crypto"; + compatible = "fsl,sec2.0"; reg = <0x30000 0x10000>; - interrupts = <11 8>; + interrupts = <11 0x8>; interrupt-parent = <&ipic>; - num-channels = <4>; - channel-fifo-len = <24>; - exec-units-mask = <0x7e>; - /* - * desc mask is for rev1.x, we need runtime fixup - * for >=2.x - */ - descriptor-types-mask = <0x1010ebf>; + fsl,num-channels = <4>; + fsl,channel-fifo-len = <24>; + fsl,exec-units-mask = <0x7e>; + fsl,descriptor-types-mask = <0x01010ebf>; }; ipic: interrupt-controller@700 { diff --git a/arch/powerpc/boot/dts/mpc8377_mds.dts b/arch/powerpc/boot/dts/mpc8377_mds.dts index 0a700cb5f61..432782b6d20 100644 --- a/arch/powerpc/boot/dts/mpc8377_mds.dts +++ b/arch/powerpc/boot/dts/mpc8377_mds.dts @@ -117,6 +117,7 @@ #address-cells = <1>; #size-cells = <1>; device_type = "soc"; + compatible = "simple-bus"; ranges = <0x0 0xe0000000 0x00100000>; reg = <0xe0000000 0x00000200>; bus-frequency = <0>; diff --git a/arch/powerpc/boot/dts/mpc8378_mds.dts b/arch/powerpc/boot/dts/mpc8378_mds.dts index 29c8c76a58f..ed32c8ddafe 100644 --- a/arch/powerpc/boot/dts/mpc8378_mds.dts +++ b/arch/powerpc/boot/dts/mpc8378_mds.dts @@ -117,6 +117,7 @@ #address-cells = <1>; #size-cells = <1>; device_type = "soc"; + compatible = "simple-bus"; ranges = <0x0 0xe0000000 0x00100000>; reg = <0xe0000000 0x00000200>; bus-frequency = <0>; diff --git a/arch/powerpc/boot/dts/mpc8379_mds.dts b/arch/powerpc/boot/dts/mpc8379_mds.dts index d641a8985ea..f4db9ed4a30 100644 --- a/arch/powerpc/boot/dts/mpc8379_mds.dts +++ b/arch/powerpc/boot/dts/mpc8379_mds.dts @@ -117,6 +117,7 @@ #address-cells = <1>; #size-cells = <1>; device_type = "soc"; + compatible = "simple-bus"; ranges = <0x0 0xe0000000 0x00100000>; reg = <0xe0000000 0x00000200>; bus-frequency = <0>; diff --git a/arch/powerpc/boot/dts/mpc8536ds.dts b/arch/powerpc/boot/dts/mpc8536ds.dts index 02cfa24a169..1505d6855ef 100644 --- a/arch/powerpc/boot/dts/mpc8536ds.dts +++ b/arch/powerpc/boot/dts/mpc8536ds.dts @@ -49,6 +49,7 @@ #address-cells = <1>; #size-cells = <1>; device_type = "soc"; + compatible = "simple-bus"; ranges = <0x0 0xffe00000 0x100000>; reg = <0xffe00000 0x1000>; bus-frequency = <0>; // Filled out by uboot. diff --git a/arch/powerpc/boot/dts/mpc8540ads.dts b/arch/powerpc/boot/dts/mpc8540ads.dts index f2273a872b1..9568bfaff8f 100644 --- a/arch/powerpc/boot/dts/mpc8540ads.dts +++ b/arch/powerpc/boot/dts/mpc8540ads.dts @@ -53,6 +53,7 @@ #address-cells = <1>; #size-cells = <1>; device_type = "soc"; + compatible = "simple-bus"; ranges = <0x0 0xe0000000 0x100000>; reg = <0xe0000000 0x100000>; // CCSRBAR 1M bus-frequency = <0>; diff --git a/arch/powerpc/boot/dts/mpc8541cds.dts b/arch/powerpc/boot/dts/mpc8541cds.dts index c4469f19ff8..6480f4fd96e 100644 --- a/arch/powerpc/boot/dts/mpc8541cds.dts +++ b/arch/powerpc/boot/dts/mpc8541cds.dts @@ -53,6 +53,7 @@ #address-cells = <1>; #size-cells = <1>; device_type = "soc"; + compatible = "simple-bus"; ranges = <0x0 0xe0000000 0x100000>; reg = <0xe0000000 0x1000>; // CCSRBAR 1M bus-frequency = <0>; diff --git a/arch/powerpc/boot/dts/mpc8544ds.dts b/arch/powerpc/boot/dts/mpc8544ds.dts index 7d3829d3495..f1fb20737e3 100644 --- a/arch/powerpc/boot/dts/mpc8544ds.dts +++ b/arch/powerpc/boot/dts/mpc8544ds.dts @@ -54,6 +54,7 @@ #address-cells = <1>; #size-cells = <1>; device_type = "soc"; + compatible = "simple-bus"; ranges = <0x0 0xe0000000 0x100000>; reg = <0xe0000000 0x1000>; // CCSRBAR 1M diff --git a/arch/powerpc/boot/dts/mpc8548cds.dts b/arch/powerpc/boot/dts/mpc8548cds.dts index d84466bb7ec..431b496270d 100644 --- a/arch/powerpc/boot/dts/mpc8548cds.dts +++ b/arch/powerpc/boot/dts/mpc8548cds.dts @@ -58,6 +58,7 @@ #address-cells = <1>; #size-cells = <1>; device_type = "soc"; + compatible = "simple-bus"; ranges = <0x0 0xe0000000 0x100000>; reg = <0xe0000000 0x1000>; // CCSRBAR bus-frequency = <0>; diff --git a/arch/powerpc/boot/dts/mpc8555cds.dts b/arch/powerpc/boot/dts/mpc8555cds.dts index e03a7800628..d833a5c4f47 100644 --- a/arch/powerpc/boot/dts/mpc8555cds.dts +++ b/arch/powerpc/boot/dts/mpc8555cds.dts @@ -53,6 +53,7 @@ #address-cells = <1>; #size-cells = <1>; device_type = "soc"; + compatible = "simple-bus"; ranges = <0x0 0xe0000000 0x100000>; reg = <0xe0000000 0x1000>; // CCSRBAR 1M bus-frequency = <0>; diff --git a/arch/powerpc/boot/dts/mpc8560ads.dts b/arch/powerpc/boot/dts/mpc8560ads.dts index ba8159de040..4d1f2f28409 100644 --- a/arch/powerpc/boot/dts/mpc8560ads.dts +++ b/arch/powerpc/boot/dts/mpc8560ads.dts @@ -53,6 +53,7 @@ #address-cells = <1>; #size-cells = <1>; device_type = "soc"; + compatible = "simple-bus"; ranges = <0x0 0xe0000000 0x100000>; reg = <0xe0000000 0x200>; bus-frequency = <330000000>; diff --git a/arch/powerpc/boot/dts/mpc8568mds.dts b/arch/powerpc/boot/dts/mpc8568mds.dts index 9c30a34821d..a15f10343f5 100644 --- a/arch/powerpc/boot/dts/mpc8568mds.dts +++ b/arch/powerpc/boot/dts/mpc8568mds.dts @@ -60,6 +60,7 @@ #address-cells = <1>; #size-cells = <1>; device_type = "soc"; + compatible = "simple-bus"; ranges = <0x0 0xe0000000 0x100000>; reg = <0xe0000000 0x1000>; bus-frequency = <0>; diff --git a/arch/powerpc/boot/dts/mpc8572ds.dts b/arch/powerpc/boot/dts/mpc8572ds.dts index 08c61e3daec..e124dd18fb5 100644 --- a/arch/powerpc/boot/dts/mpc8572ds.dts +++ b/arch/powerpc/boot/dts/mpc8572ds.dts @@ -68,6 +68,7 @@ #address-cells = <1>; #size-cells = <1>; device_type = "soc"; + compatible = "simple-bus"; ranges = <0x0 0xffe00000 0x100000>; reg = <0xffe00000 0x1000>; // CCSRBAR & soc regs, remove once parse code for immrbase fixed bus-frequency = <0>; // Filled out by uboot. diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c index 9f856a0c3e3..1a09719c762 100644 --- a/arch/powerpc/kernel/lparcfg.c +++ b/arch/powerpc/kernel/lparcfg.c @@ -636,10 +636,6 @@ static ssize_t lparcfg_write(struct file *file, const char __user * buf, retval = -EIO; } else if (retval == H_PARAMETER) { retval = -EINVAL; - } else { - printk(KERN_WARNING "%s: received unknown hv return code %ld", - __func__, retval); - retval = -EIO; } return retval; diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 6b66cd85b43..3635be61f89 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -375,7 +375,7 @@ static int vsr_get(struct task_struct *target, const struct user_regset *regset, flush_vsx_to_thread(target); for (i = 0; i < 32 ; i++) - buf[i] = current->thread.fpr[i][TS_VSRLOWOFFSET]; + buf[i] = target->thread.fpr[i][TS_VSRLOWOFFSET]; ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, buf, 0, 32 * sizeof(double)); @@ -394,7 +394,7 @@ static int vsr_set(struct task_struct *target, const struct user_regset *regset, ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, 32 * sizeof(double)); for (i = 0; i < 32 ; i++) - current->thread.fpr[i][TS_VSRLOWOFFSET] = buf[i]; + target->thread.fpr[i][TS_VSRLOWOFFSET] = buf[i]; return ret; @@ -975,15 +975,13 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) case PTRACE_GETVSRREGS: return copy_regset_to_user(child, &user_ppc_native_view, REGSET_VSX, - 0, (32 * sizeof(vector128) + - sizeof(u32)), + 0, 32 * sizeof(double), (void __user *) data); case PTRACE_SETVSRREGS: return copy_regset_from_user(child, &user_ppc_native_view, REGSET_VSX, - 0, (32 * sizeof(vector128) + - sizeof(u32)), + 0, 32 * sizeof(double), (const void __user *) data); #endif #ifdef CONFIG_SPE diff --git a/arch/powerpc/kernel/ptrace32.c b/arch/powerpc/kernel/ptrace32.c index 67bf1a1e7e1..197d49c790a 100644 --- a/arch/powerpc/kernel/ptrace32.c +++ b/arch/powerpc/kernel/ptrace32.c @@ -294,6 +294,8 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, case PTRACE_SETFPREGS: case PTRACE_GETVRREGS: case PTRACE_SETVRREGS: + case PTRACE_GETVSRREGS: + case PTRACE_SETVSRREGS: case PTRACE_GETREGS64: case PTRACE_SETREGS64: case PPC_PTRACE_GETFPREGS: diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 1c00e0196f6..e7392b45a5e 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -12,7 +12,8 @@ obj-y := fault.o mem.o \ mmu_context_$(CONFIG_WORD_SIZE).o hash-$(CONFIG_PPC_NATIVE) := hash_native_64.o obj-$(CONFIG_PPC64) += hash_utils_64.o \ - slb_low.o slb.o stab.o mmap.o $(hash-y) + slb_low.o slb.o stab.o \ + gup.o mmap.o $(hash-y) obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu_32.o obj-$(CONFIG_PPC_STD_MMU) += hash_low_$(CONFIG_WORD_SIZE).o \ tlb_$(CONFIG_WORD_SIZE).o diff --git a/arch/powerpc/mm/gup.c b/arch/powerpc/mm/gup.c new file mode 100644 index 00000000000..9fdf4d6335e --- /dev/null +++ b/arch/powerpc/mm/gup.c @@ -0,0 +1,280 @@ +/* + * Lockless get_user_pages_fast for powerpc + * + * Copyright (C) 2008 Nick Piggin + * Copyright (C) 2008 Novell Inc. + */ +#undef DEBUG + +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/hugetlb.h> +#include <linux/vmstat.h> +#include <linux/pagemap.h> +#include <linux/rwsem.h> +#include <asm/pgtable.h> + +/* + * The performance critical leaf functions are made noinline otherwise gcc + * inlines everything into a single function which results in too much + * register pressure. + */ +static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, + unsigned long end, int write, struct page **pages, int *nr) +{ + unsigned long mask, result; + pte_t *ptep; + + result = _PAGE_PRESENT|_PAGE_USER; + if (write) + result |= _PAGE_RW; + mask = result | _PAGE_SPECIAL; + + ptep = pte_offset_kernel(&pmd, addr); + do { + pte_t pte = *ptep; + struct page *page; + + if ((pte_val(pte) & mask) != result) + return 0; + VM_BUG_ON(!pfn_valid(pte_pfn(pte))); + page = pte_page(pte); + if (!page_cache_get_speculative(page)) + return 0; + if (unlikely(pte != *ptep)) { + put_page(page); + return 0; + } + pages[*nr] = page; + (*nr)++; + + } while (ptep++, addr += PAGE_SIZE, addr != end); + + return 1; +} + +#ifdef CONFIG_HUGETLB_PAGE +static noinline int gup_huge_pte(pte_t *ptep, struct hstate *hstate, + unsigned long *addr, unsigned long end, + int write, struct page **pages, int *nr) +{ + unsigned long mask; + unsigned long pte_end; + struct page *head, *page; + pte_t pte; + int refs; + + pte_end = (*addr + huge_page_size(hstate)) & huge_page_mask(hstate); + if (pte_end < end) + end = pte_end; + + pte = *ptep; + mask = _PAGE_PRESENT|_PAGE_USER; + if (write) + mask |= _PAGE_RW; + if ((pte_val(pte) & mask) != mask) + return 0; + /* hugepages are never "special" */ + VM_BUG_ON(!pfn_valid(pte_pfn(pte))); + + refs = 0; + head = pte_page(pte); + page = head + ((*addr & ~huge_page_mask(hstate)) >> PAGE_SHIFT); + do { + VM_BUG_ON(compound_head(page) != head); + pages[*nr] = page; + (*nr)++; + page++; + refs++; + } while (*addr += PAGE_SIZE, *addr != end); + + if (!page_cache_add_speculative(head, refs)) { + *nr -= refs; + return 0; + } + if (unlikely(pte != *ptep)) { + /* Could be optimized better */ + while (*nr) { + put_page(page); + (*nr)--; + } + } + + return 1; +} +#endif /* CONFIG_HUGETLB_PAGE */ + +static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, + int write, struct page **pages, int *nr) +{ + unsigned long next; + pmd_t *pmdp; + + pmdp = pmd_offset(&pud, addr); + do { + pmd_t pmd = *pmdp; + + next = pmd_addr_end(addr, end); + if (pmd_none(pmd)) + return 0; + if (!gup_pte_range(pmd, addr, next, write, pages, nr)) + return 0; + } while (pmdp++, addr = next, addr != end); + + return 1; +} + +static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, + int write, struct page **pages, int *nr) +{ + unsigned long next; + pud_t *pudp; + + pudp = pud_offset(&pgd, addr); + do { + pud_t pud = *pudp; + + next = pud_addr_end(addr, end); + if (pud_none(pud)) + return 0; + if (!gup_pmd_range(pud, addr, next, write, pages, nr)) + return 0; + } while (pudp++, addr = next, addr != end); + + return 1; +} + +int get_user_pages_fast(unsigned long start, int nr_pages, int write, + struct page **pages) +{ + struct mm_struct *mm = current->mm; + unsigned long addr, len, end; + unsigned long next; + pgd_t *pgdp; + int psize, nr = 0; + unsigned int shift; + + pr_debug("%s(%lx,%x,%s)\n", __func__, start, nr_pages, write ? "write" : "read"); + + start &= PAGE_MASK; + addr = start; + len = (unsigned long) nr_pages << PAGE_SHIFT; + end = start + len; + + if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ, + start, len))) + goto slow_irqon; + + pr_debug(" aligned: %lx .. %lx\n", start, end); + +#ifdef CONFIG_HUGETLB_PAGE + /* We bail out on slice boundary crossing when hugetlb is + * enabled in order to not have to deal with two different + * page table formats + */ + if (addr < SLICE_LOW_TOP) { + if (end > SLICE_LOW_TOP) + goto slow_irqon; + + if (unlikely(GET_LOW_SLICE_INDEX(addr) != + GET_LOW_SLICE_INDEX(end - 1))) + goto slow_irqon; + } else { + if (unlikely(GET_HIGH_SLICE_INDEX(addr) != + GET_HIGH_SLICE_INDEX(end - 1))) + goto slow_irqon; + } +#endif /* CONFIG_HUGETLB_PAGE */ + + /* + * XXX: batch / limit 'nr', to avoid large irq off latency + * needs some instrumenting to determine the common sizes used by + * important workloads (eg. DB2), and whether limiting the batch size + * will decrease performance. + * + * It seems like we're in the clear for the moment. Direct-IO is + * the main guy that batches up lots of get_user_pages, and even + * they are limited to 64-at-a-time which is not so many. + */ + /* + * This doesn't prevent pagetable teardown, but does prevent + * the pagetables from being freed on powerpc. + * + * So long as we atomically load page table pointers versus teardown, + * we can follow the address down to the the page and take a ref on it. + */ + local_irq_disable(); + + psize = get_slice_psize(mm, addr); + shift = mmu_psize_defs[psize].shift; + +#ifdef CONFIG_HUGETLB_PAGE + if (unlikely(mmu_huge_psizes[psize])) { + pte_t *ptep; + unsigned long a = addr; + unsigned long sz = ((1UL) << shift); + struct hstate *hstate = size_to_hstate(sz); + + BUG_ON(!hstate); + /* + * XXX: could be optimized to avoid hstate + * lookup entirely (just use shift) + */ + + do { + VM_BUG_ON(shift != mmu_psize_defs[get_slice_psize(mm, a)].shift); + ptep = huge_pte_offset(mm, a); + pr_debug(" %016lx: huge ptep %p\n", a, ptep); + if (!ptep || !gup_huge_pte(ptep, hstate, &a, end, write, pages, + &nr)) + goto slow; + } while (a != end); + } else +#endif /* CONFIG_HUGETLB_PAGE */ + { + pgdp = pgd_offset(mm, addr); + do { + pgd_t pgd = *pgdp; + + VM_BUG_ON(shift != mmu_psize_defs[get_slice_psize(mm, addr)].shift); + pr_debug(" %016lx: normal pgd %p\n", addr, (void *)pgd); + next = pgd_addr_end(addr, end); + if (pgd_none(pgd)) + goto slow; + if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) + goto slow; + } while (pgdp++, addr = next, addr != end); + } + local_irq_enable(); + + VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT); + return nr; + + { + int ret; + +slow: + local_irq_enable(); +slow_irqon: + pr_debug(" slow path ! nr = %d\n", nr); + + /* Try to get the remaining pages with get_user_pages */ + start += nr << PAGE_SHIFT; + pages += nr; + + down_read(&mm->mmap_sem); + ret = get_user_pages(current, mm, start, + (end - start) >> PAGE_SHIFT, write, 0, pages, NULL); + up_read(&mm->mmap_sem); + + /* Have to be a bit careful with return values */ + if (nr > 0) { + if (ret < 0) + ret = nr; + else + ret += nr; + } + + return ret; + } +} diff --git a/arch/powerpc/platforms/83xx/mpc832x_mds.c b/arch/powerpc/platforms/83xx/mpc832x_mds.c index dd4be4aee31..ec43477caa6 100644 --- a/arch/powerpc/platforms/83xx/mpc832x_mds.c +++ b/arch/powerpc/platforms/83xx/mpc832x_mds.c @@ -105,6 +105,7 @@ static void __init mpc832x_sys_setup_arch(void) static struct of_device_id mpc832x_ids[] = { { .type = "soc", }, { .compatible = "soc", }, + { .compatible = "simple-bus", }, { .type = "qe", }, { .compatible = "fsl,qe", }, {}, diff --git a/arch/powerpc/platforms/83xx/mpc832x_rdb.c b/arch/powerpc/platforms/83xx/mpc832x_rdb.c index f049d692d4c..0300268ce5b 100644 --- a/arch/powerpc/platforms/83xx/mpc832x_rdb.c +++ b/arch/powerpc/platforms/83xx/mpc832x_rdb.c @@ -115,6 +115,7 @@ static void __init mpc832x_rdb_setup_arch(void) static struct of_device_id mpc832x_ids[] = { { .type = "soc", }, { .compatible = "soc", }, + { .compatible = "simple-bus", }, { .type = "qe", }, { .compatible = "fsl,qe", }, {}, diff --git a/arch/powerpc/platforms/83xx/mpc834x_itx.c b/arch/powerpc/platforms/83xx/mpc834x_itx.c index 7301d77a08e..76092d37c7d 100644 --- a/arch/powerpc/platforms/83xx/mpc834x_itx.c +++ b/arch/powerpc/platforms/83xx/mpc834x_itx.c @@ -41,6 +41,7 @@ static struct of_device_id __initdata mpc834x_itx_ids[] = { { .compatible = "fsl,pq2pro-localbus", }, + { .compatible = "simple-bus", }, {}, }; diff --git a/arch/powerpc/platforms/83xx/mpc834x_mds.c b/arch/powerpc/platforms/83xx/mpc834x_mds.c index 30d509aa9f0..fc3f2ed1f3e 100644 --- a/arch/powerpc/platforms/83xx/mpc834x_mds.c +++ b/arch/powerpc/platforms/83xx/mpc834x_mds.c @@ -111,6 +111,7 @@ static void __init mpc834x_mds_init_IRQ(void) static struct of_device_id mpc834x_ids[] = { { .type = "soc", }, { .compatible = "soc", }, + { .compatible = "simple-bus", }, {}, }; diff --git a/arch/powerpc/platforms/83xx/mpc836x_mds.c b/arch/powerpc/platforms/83xx/mpc836x_mds.c index 75b80e83657..9d46e5bdd10 100644 --- a/arch/powerpc/platforms/83xx/mpc836x_mds.c +++ b/arch/powerpc/platforms/83xx/mpc836x_mds.c @@ -136,6 +136,7 @@ static void __init mpc836x_mds_setup_arch(void) static struct of_device_id mpc836x_ids[] = { { .type = "soc", }, { .compatible = "soc", }, + { .compatible = "simple-bus", }, { .type = "qe", }, { .compatible = "fsl,qe", }, {}, diff --git a/arch/powerpc/platforms/83xx/sbc834x.c b/arch/powerpc/platforms/83xx/sbc834x.c index fc21f5c15ba..156c4e21800 100644 --- a/arch/powerpc/platforms/83xx/sbc834x.c +++ b/arch/powerpc/platforms/83xx/sbc834x.c @@ -83,6 +83,7 @@ static void __init sbc834x_init_IRQ(void) static struct __initdata of_device_id sbc834x_ids[] = { { .type = "soc", }, { .compatible = "soc", }, + { .compatible = "simple-bus", }, {}, }; diff --git a/arch/powerpc/platforms/85xx/ksi8560.c b/arch/powerpc/platforms/85xx/ksi8560.c index 2145adeb220..8a3b117b6ce 100644 --- a/arch/powerpc/platforms/85xx/ksi8560.c +++ b/arch/powerpc/platforms/85xx/ksi8560.c @@ -222,6 +222,7 @@ static void ksi8560_show_cpuinfo(struct seq_file *m) static struct of_device_id __initdata of_bus_ids[] = { { .type = "soc", }, + { .type = "simple-bus", }, { .name = "cpm", }, { .name = "localbus", }, {}, diff --git a/arch/powerpc/platforms/85xx/mpc8536_ds.c b/arch/powerpc/platforms/85xx/mpc8536_ds.c index 6b846aa1ced..1bf5aefdfeb 100644 --- a/arch/powerpc/platforms/85xx/mpc8536_ds.c +++ b/arch/powerpc/platforms/85xx/mpc8536_ds.c @@ -91,6 +91,7 @@ static void __init mpc8536_ds_setup_arch(void) static struct of_device_id __initdata mpc8536_ds_ids[] = { { .type = "soc", }, { .compatible = "soc", }, + { .compatible = "simple-bus", }, {}, }; diff --git a/arch/powerpc/platforms/85xx/mpc85xx_ads.c b/arch/powerpc/platforms/85xx/mpc85xx_ads.c index ba498d6f2d0..d17807a6b89 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_ads.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_ads.c @@ -230,6 +230,7 @@ static struct of_device_id __initdata of_bus_ids[] = { { .type = "soc", }, { .name = "cpm", }, { .name = "localbus", }, + { .compatible = "simple-bus", }, {}, }; diff --git a/arch/powerpc/platforms/85xx/mpc85xx_ds.c b/arch/powerpc/platforms/85xx/mpc85xx_ds.c index 00c53580664..483b65cbaba 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_ds.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_ds.c @@ -186,6 +186,7 @@ static int __init mpc8544_ds_probe(void) static struct of_device_id __initdata mpc85xxds_ids[] = { { .type = "soc", }, { .compatible = "soc", }, + { .compatible = "simple-bus", }, {}, }; diff --git a/arch/powerpc/platforms/85xx/mpc85xx_mds.c b/arch/powerpc/platforms/85xx/mpc85xx_mds.c index 43a459f63e3..2494c515591 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_mds.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_mds.c @@ -260,6 +260,7 @@ machine_arch_initcall(mpc85xx_mds, board_fixups); static struct of_device_id mpc85xx_ids[] = { { .type = "soc", }, { .compatible = "soc", }, + { .compatible = "simple-bus", }, { .type = "qe", }, { .compatible = "fsl,qe", }, {}, diff --git a/arch/powerpc/platforms/85xx/sbc8560.c b/arch/powerpc/platforms/85xx/sbc8560.c index 2c580cd24e4..6509ade7166 100644 --- a/arch/powerpc/platforms/85xx/sbc8560.c +++ b/arch/powerpc/platforms/85xx/sbc8560.c @@ -217,6 +217,7 @@ static struct of_device_id __initdata of_bus_ids[] = { { .type = "soc", }, { .name = "cpm", }, { .name = "localbus", }, + { .compatible = "simple-bus", }, {}, }; diff --git a/arch/powerpc/platforms/8xx/Kconfig b/arch/powerpc/platforms/8xx/Kconfig index 6fc849e51e4..71d7562e190 100644 --- a/arch/powerpc/platforms/8xx/Kconfig +++ b/arch/powerpc/platforms/8xx/Kconfig @@ -105,6 +105,16 @@ config 8xx_COPYBACK If in doubt, say Y here. +config 8xx_GPIO + bool "GPIO API Support" + select GENERIC_GPIO + select ARCH_REQUIRE_GPIOLIB + help + Saying Y here will cause the ports on an MPC8xx processor to be used + with the GPIO API. If you say N here, the kernel needs less memory. + + If in doubt, say Y here. + config 8xx_CPU6 bool "CPU6 Silicon Errata (860 Pre Rev. C)" help diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig index 1d0968775c0..4c900efa164 100644 --- a/arch/powerpc/platforms/Kconfig +++ b/arch/powerpc/platforms/Kconfig @@ -254,6 +254,8 @@ config CPM2 select CPM select PPC_LIB_RHEAP select PPC_PCI_CHOICE + select ARCH_REQUIRE_GPIOLIB + select GENERIC_GPIO help The CPM2 (Communications Processor Module) is a coprocessor on embedded CPUs made by Freescale. Selecting this option means that @@ -281,6 +283,7 @@ config FSL_ULI1575 config CPM bool + select PPC_CLOCK config OF_RTC bool diff --git a/arch/powerpc/platforms/cell/cbe_cpufreq_pmi.c b/arch/powerpc/platforms/cell/cbe_cpufreq_pmi.c index 69288f65314..3233fe84d15 100644 --- a/arch/powerpc/platforms/cell/cbe_cpufreq_pmi.c +++ b/arch/powerpc/platforms/cell/cbe_cpufreq_pmi.c @@ -96,6 +96,12 @@ static int pmi_notifier(struct notifier_block *nb, struct cpufreq_frequency_table *cbe_freqs; u8 node; + /* Should this really be called for CPUFREQ_ADJUST, CPUFREQ_INCOMPATIBLE + * and CPUFREQ_NOTIFY policy events?) + */ + if (event == CPUFREQ_START) + return 0; + cbe_freqs = cpufreq_frequency_get_table(policy->cpu); node = cbe_cpu_to_node(policy->cpu); diff --git a/arch/powerpc/sysdev/cpm1.c b/arch/powerpc/sysdev/cpm1.c index 661df42830b..4a04823e842 100644 --- a/arch/powerpc/sysdev/cpm1.c +++ b/arch/powerpc/sysdev/cpm1.c @@ -30,6 +30,7 @@ #include <linux/interrupt.h> #include <linux/irq.h> #include <linux/module.h> +#include <linux/spinlock.h> #include <asm/page.h> #include <asm/pgtable.h> #include <asm/8xx_immap.h> @@ -42,6 +43,10 @@ #include <asm/fs_pd.h> +#ifdef CONFIG_8xx_GPIO +#include <linux/of_gpio.h> +#endif + #define CPM_MAP_SIZE (0x4000) cpm8xx_t __iomem *cpmp; /* Pointer to comm processor space */ @@ -290,20 +295,24 @@ struct cpm_ioport16 { __be16 res[3]; }; -struct cpm_ioport32 { - __be32 dir, par, sor; +struct cpm_ioport32b { + __be32 dir, par, odr, dat; +}; + +struct cpm_ioport32e { + __be32 dir, par, sor, odr, dat; }; static void cpm1_set_pin32(int port, int pin, int flags) { - struct cpm_ioport32 __iomem *iop; + struct cpm_ioport32e __iomem *iop; pin = 1 << (31 - pin); if (port == CPM_PORTB) - iop = (struct cpm_ioport32 __iomem *) + iop = (struct cpm_ioport32e __iomem *) &mpc8xx_immr->im_cpm.cp_pbdir; else - iop = (struct cpm_ioport32 __iomem *) + iop = (struct cpm_ioport32e __iomem *) &mpc8xx_immr->im_cpm.cp_pedir; if (flags & CPM_PIN_OUTPUT) @@ -498,3 +507,251 @@ int cpm1_clk_setup(enum cpm_clk_target target, int clock, int mode) return 0; } + +/* + * GPIO LIB API implementation + */ +#ifdef CONFIG_8xx_GPIO + +struct cpm1_gpio16_chip { + struct of_mm_gpio_chip mm_gc; + spinlock_t lock; + + /* shadowed data register to clear/set bits safely */ + u16 cpdata; +}; + +static inline struct cpm1_gpio16_chip * +to_cpm1_gpio16_chip(struct of_mm_gpio_chip *mm_gc) +{ + return container_of(mm_gc, struct cpm1_gpio16_chip, mm_gc); +} + +static void cpm1_gpio16_save_regs(struct of_mm_gpio_chip *mm_gc) +{ + struct cpm1_gpio16_chip *cpm1_gc = to_cpm1_gpio16_chip(mm_gc); + struct cpm_ioport16 __iomem *iop = mm_gc->regs; + + cpm1_gc->cpdata = in_be16(&iop->dat); +} + +static int cpm1_gpio16_get(struct gpio_chip *gc, unsigned int gpio) +{ + struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc); + struct cpm_ioport16 __iomem *iop = mm_gc->regs; + u16 pin_mask; + + pin_mask = 1 << (15 - gpio); + + return !!(in_be16(&iop->dat) & pin_mask); +} + +static void cpm1_gpio16_set(struct gpio_chip *gc, unsigned int gpio, int value) +{ + struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc); + struct cpm1_gpio16_chip *cpm1_gc = to_cpm1_gpio16_chip(mm_gc); + struct cpm_ioport16 __iomem *iop = mm_gc->regs; + unsigned long flags; + u16 pin_mask = 1 << (15 - gpio); + + spin_lock_irqsave(&cpm1_gc->lock, flags); + + if (value) + cpm1_gc->cpdata |= pin_mask; + else + cpm1_gc->cpdata &= ~pin_mask; + + out_be16(&iop->dat, cpm1_gc->cpdata); + + spin_unlock_irqrestore(&cpm1_gc->lock, flags); +} + +static int cpm1_gpio16_dir_out(struct gpio_chip *gc, unsigned int gpio, int val) +{ + struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc); + struct cpm_ioport16 __iomem *iop = mm_gc->regs; + u16 pin_mask; + + pin_mask = 1 << (15 - gpio); + + setbits16(&iop->dir, pin_mask); + + cpm1_gpio16_set(gc, gpio, val); + + return 0; +} + +static int cpm1_gpio16_dir_in(struct gpio_chip *gc, unsigned int gpio) +{ + struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc); + struct cpm_ioport16 __iomem *iop = mm_gc->regs; + u16 pin_mask; + + pin_mask = 1 << (15 - gpio); + + clrbits16(&iop->dir, pin_mask); + + return 0; +} + +int cpm1_gpiochip_add16(struct device_node *np) +{ + struct cpm1_gpio16_chip *cpm1_gc; + struct of_mm_gpio_chip *mm_gc; + struct of_gpio_chip *of_gc; + struct gpio_chip *gc; + + cpm1_gc = kzalloc(sizeof(*cpm1_gc), GFP_KERNEL); + if (!cpm1_gc) + return -ENOMEM; + + spin_lock_init(&cpm1_gc->lock); + + mm_gc = &cpm1_gc->mm_gc; + of_gc = &mm_gc->of_gc; + gc = &of_gc->gc; + + mm_gc->save_regs = cpm1_gpio16_save_regs; + of_gc->gpio_cells = 2; + gc->ngpio = 16; + gc->direction_input = cpm1_gpio16_dir_in; + gc->direction_output = cpm1_gpio16_dir_out; + gc->get = cpm1_gpio16_get; + gc->set = cpm1_gpio16_set; + + return of_mm_gpiochip_add(np, mm_gc); +} + +struct cpm1_gpio32_chip { + struct of_mm_gpio_chip mm_gc; + spinlock_t lock; + + /* shadowed data register to clear/set bits safely */ + u32 cpdata; +}; + +static inline struct cpm1_gpio32_chip * +to_cpm1_gpio32_chip(struct of_mm_gpio_chip *mm_gc) +{ + return container_of(mm_gc, struct cpm1_gpio32_chip, mm_gc); +} + +static void cpm1_gpio32_save_regs(struct of_mm_gpio_chip *mm_gc) +{ + struct cpm1_gpio32_chip *cpm1_gc = to_cpm1_gpio32_chip(mm_gc); + struct cpm_ioport32b __iomem *iop = mm_gc->regs; + + cpm1_gc->cpdata = in_be32(&iop->dat); +} + +static int cpm1_gpio32_get(struct gpio_chip *gc, unsigned int gpio) +{ + struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc); + struct cpm_ioport32b __iomem *iop = mm_gc->regs; + u32 pin_mask; + + pin_mask = 1 << (31 - gpio); + + return !!(in_be32(&iop->dat) & pin_mask); +} + +static void cpm1_gpio32_set(struct gpio_chip *gc, unsigned int gpio, int value) +{ + struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc); + struct cpm1_gpio32_chip *cpm1_gc = to_cpm1_gpio32_chip(mm_gc); + struct cpm_ioport32b __iomem *iop = mm_gc->regs; + unsigned long flags; + u32 pin_mask = 1 << (31 - gpio); + + spin_lock_irqsave(&cpm1_gc->lock, flags); + + if (value) + cpm1_gc->cpdata |= pin_mask; + else + cpm1_gc->cpdata &= ~pin_mask; + + out_be32(&iop->dat, cpm1_gc->cpdata); + + spin_unlock_irqrestore(&cpm1_gc->lock, flags); +} + +static int cpm1_gpio32_dir_out(struct gpio_chip *gc, unsigned int gpio, int val) +{ + struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc); + struct cpm_ioport32b __iomem *iop = mm_gc->regs; + u32 pin_mask; + + pin_mask = 1 << (31 - gpio); + + setbits32(&iop->dir, pin_mask); + + cpm1_gpio32_set(gc, gpio, val); + + return 0; +} + +static int cpm1_gpio32_dir_in(struct gpio_chip *gc, unsigned int gpio) +{ + struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc); + struct cpm_ioport32b __iomem *iop = mm_gc->regs; + u32 pin_mask; + + pin_mask = 1 << (31 - gpio); + + clrbits32(&iop->dir, pin_mask); + + return 0; +} + +int cpm1_gpiochip_add32(struct device_node *np) +{ + struct cpm1_gpio32_chip *cpm1_gc; + struct of_mm_gpio_chip *mm_gc; + struct of_gpio_chip *of_gc; + struct gpio_chip *gc; + + cpm1_gc = kzalloc(sizeof(*cpm1_gc), GFP_KERNEL); + if (!cpm1_gc) + return -ENOMEM; + + spin_lock_init(&cpm1_gc->lock); + + mm_gc = &cpm1_gc->mm_gc; + of_gc = &mm_gc->of_gc; + gc = &of_gc->gc; + + mm_gc->save_regs = cpm1_gpio32_save_regs; + of_gc->gpio_cells = 2; + gc->ngpio = 32; + gc->direction_input = cpm1_gpio32_dir_in; + gc->direction_output = cpm1_gpio32_dir_out; + gc->get = cpm1_gpio32_get; + gc->set = cpm1_gpio32_set; + + return of_mm_gpiochip_add(np, mm_gc); +} + +static int cpm_init_par_io(void) +{ + struct device_node *np; + + for_each_compatible_node(np, NULL, "fsl,cpm1-pario-bank-a") + cpm1_gpiochip_add16(np); + + for_each_compatible_node(np, NULL, "fsl,cpm1-pario-bank-b") + cpm1_gpiochip_add32(np); + + for_each_compatible_node(np, NULL, "fsl,cpm1-pario-bank-c") + cpm1_gpiochip_add16(np); + + for_each_compatible_node(np, NULL, "fsl,cpm1-pario-bank-d") + cpm1_gpiochip_add16(np); + + /* Port E uses CPM2 layout */ + for_each_compatible_node(np, NULL, "fsl,cpm1-pario-bank-e") + cpm2_gpiochip_add32(np); + return 0; +} +arch_initcall(cpm_init_par_io); + +#endif /* CONFIG_8xx_GPIO */ diff --git a/arch/powerpc/sysdev/cpm2.c b/arch/powerpc/sysdev/cpm2.c index 5a6c5dfc53e..f1c3395633b 100644 --- a/arch/powerpc/sysdev/cpm2.c +++ b/arch/powerpc/sysdev/cpm2.c @@ -115,16 +115,10 @@ EXPORT_SYMBOL(cpm_command); * Baud rate clocks are zero-based in the driver code (as that maps * to port numbers). Documentation uses 1-based numbering. */ -#define BRG_INT_CLK (get_brgfreq()) -#define BRG_UART_CLK (BRG_INT_CLK/16) - -/* This function is used by UARTS, or anything else that uses a 16x - * oversampled clock. - */ -void -cpm_setbrg(uint brg, uint rate) +void __cpm2_setbrg(uint brg, uint rate, uint clk, int div16, int src) { u32 __iomem *bp; + u32 val; /* This is good enough to get SMCs running..... */ @@ -135,34 +129,14 @@ cpm_setbrg(uint brg, uint rate) brg -= 4; } bp += brg; - out_be32(bp, (((BRG_UART_CLK / rate) - 1) << 1) | CPM_BRG_EN); - - cpm2_unmap(bp); -} - -/* This function is used to set high speed synchronous baud rate - * clocks. - */ -void -cpm2_fastbrg(uint brg, uint rate, int div16) -{ - u32 __iomem *bp; - u32 val; - - if (brg < 4) { - bp = cpm2_map_size(im_brgc1, 16); - } else { - bp = cpm2_map_size(im_brgc5, 16); - brg -= 4; - } - bp += brg; - val = ((BRG_INT_CLK / rate) << 1) | CPM_BRG_EN; + val = (((clk / rate) - 1) << 1) | CPM_BRG_EN | src; if (div16) val |= CPM_BRG_DIV16; out_be32(bp, val); cpm2_unmap(bp); } +EXPORT_SYMBOL(__cpm2_setbrg); int cpm2_clk_setup(enum cpm_clk_target target, int clock, int mode) { @@ -377,3 +351,14 @@ void cpm2_set_pin(int port, int pin, int flags) else clrbits32(&iop[port].odr, pin); } + +static int cpm_init_par_io(void) +{ + struct device_node *np; + + for_each_compatible_node(np, NULL, "fsl,cpm2-pario-bank") + cpm2_gpiochip_add32(np); + return 0; +} +arch_initcall(cpm_init_par_io); + diff --git a/arch/powerpc/sysdev/cpm_common.c b/arch/powerpc/sysdev/cpm_common.c index e4b7296acb2..53da8a079f9 100644 --- a/arch/powerpc/sysdev/cpm_common.c +++ b/arch/powerpc/sysdev/cpm_common.c @@ -19,6 +19,8 @@ #include <linux/init.h> #include <linux/of_device.h> +#include <linux/spinlock.h> +#include <linux/of.h> #include <asm/udbg.h> #include <asm/io.h> @@ -28,6 +30,10 @@ #include <mm/mmu_decl.h> +#if defined(CONFIG_CPM2) || defined(CONFIG_8xx_GPIO) +#include <linux/of_gpio.h> +#endif + #ifdef CONFIG_PPC_EARLY_DEBUG_CPM static u32 __iomem *cpm_udbg_txdesc = (u32 __iomem __force *)CONFIG_PPC_EARLY_DEBUG_CPM_ADDR; @@ -207,3 +213,120 @@ dma_addr_t cpm_muram_dma(void __iomem *addr) return muram_pbase + ((u8 __iomem *)addr - muram_vbase); } EXPORT_SYMBOL(cpm_muram_dma); + +#if defined(CONFIG_CPM2) || defined(CONFIG_8xx_GPIO) + +struct cpm2_ioports { + u32 dir, par, sor, odr, dat; + u32 res[3]; +}; + +struct cpm2_gpio32_chip { + struct of_mm_gpio_chip mm_gc; + spinlock_t lock; + + /* shadowed data register to clear/set bits safely */ + u32 cpdata; +}; + +static inline struct cpm2_gpio32_chip * +to_cpm2_gpio32_chip(struct of_mm_gpio_chip *mm_gc) +{ + return container_of(mm_gc, struct cpm2_gpio32_chip, mm_gc); +} + +static void cpm2_gpio32_save_regs(struct of_mm_gpio_chip *mm_gc) +{ + struct cpm2_gpio32_chip *cpm2_gc = to_cpm2_gpio32_chip(mm_gc); + struct cpm2_ioports __iomem *iop = mm_gc->regs; + + cpm2_gc->cpdata = in_be32(&iop->dat); +} + +static int cpm2_gpio32_get(struct gpio_chip *gc, unsigned int gpio) +{ + struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc); + struct cpm2_ioports __iomem *iop = mm_gc->regs; + u32 pin_mask; + + pin_mask = 1 << (31 - gpio); + + return !!(in_be32(&iop->dat) & pin_mask); +} + +static void cpm2_gpio32_set(struct gpio_chip *gc, unsigned int gpio, int value) +{ + struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc); + struct cpm2_gpio32_chip *cpm2_gc = to_cpm2_gpio32_chip(mm_gc); + struct cpm2_ioports __iomem *iop = mm_gc->regs; + unsigned long flags; + u32 pin_mask = 1 << (31 - gpio); + + spin_lock_irqsave(&cpm2_gc->lock, flags); + + if (value) + cpm2_gc->cpdata |= pin_mask; + else + cpm2_gc->cpdata &= ~pin_mask; + + out_be32(&iop->dat, cpm2_gc->cpdata); + + spin_unlock_irqrestore(&cpm2_gc->lock, flags); +} + +static int cpm2_gpio32_dir_out(struct gpio_chip *gc, unsigned int gpio, int val) +{ + struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc); + struct cpm2_ioports __iomem *iop = mm_gc->regs; + u32 pin_mask; + + pin_mask = 1 << (31 - gpio); + + setbits32(&iop->dir, pin_mask); + + cpm2_gpio32_set(gc, gpio, val); + + return 0; +} + +static int cpm2_gpio32_dir_in(struct gpio_chip *gc, unsigned int gpio) +{ + struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc); + struct cpm2_ioports __iomem *iop = mm_gc->regs; + u32 pin_mask; + + pin_mask = 1 << (31 - gpio); + + clrbits32(&iop->dir, pin_mask); + + return 0; +} + +int cpm2_gpiochip_add32(struct device_node *np) +{ + struct cpm2_gpio32_chip *cpm2_gc; + struct of_mm_gpio_chip *mm_gc; + struct of_gpio_chip *of_gc; + struct gpio_chip *gc; + + cpm2_gc = kzalloc(sizeof(*cpm2_gc), GFP_KERNEL); + if (!cpm2_gc) + return -ENOMEM; + + spin_lock_init(&cpm2_gc->lock); + + mm_gc = &cpm2_gc->mm_gc; + of_gc = &mm_gc->of_gc; + gc = &of_gc->gc; + + mm_gc->save_regs = cpm2_gpio32_save_regs; + of_gc->gpio_cells = 2; + gc->ngpio = 32; + gc->direction_input = cpm2_gpio32_dir_in; + gc->direction_output = cpm2_gpio32_dir_out; + gc->get = cpm2_gpio32_get; + gc->set = cpm2_gpio32_set; + + return of_mm_gpiochip_add(np, mm_gc); +} +#endif /* CONFIG_CPM2 || CONFIG_8xx_GPIO */ diff --git a/arch/powerpc/sysdev/rtc_cmos_setup.c b/arch/powerpc/sysdev/rtc_cmos_setup.c index c09ddc0dbeb..c1879ebfd4f 100644 --- a/arch/powerpc/sysdev/rtc_cmos_setup.c +++ b/arch/powerpc/sysdev/rtc_cmos_setup.c @@ -21,6 +21,7 @@ static int __init add_rtc(void) struct device_node *np; struct platform_device *pd; struct resource res[2]; + unsigned int num_res = 1; int ret; memset(&res, 0, sizeof(res)); @@ -41,14 +42,24 @@ static int __init add_rtc(void) if (res[0].start != RTC_PORT(0)) return -EINVAL; - /* Use a fixed interrupt value of 8 since on PPC if we are using this - * its off an i8259 which we ensure has interrupt numbers 0..15. */ - res[1].start = 8; - res[1].end = 8; - res[1].flags = IORESOURCE_IRQ; + np = of_find_compatible_node(NULL, NULL, "chrp,iic"); + if (!np) + np = of_find_compatible_node(NULL, NULL, "pnpPNP,000"); + if (np) { + of_node_put(np); + /* + * Use a fixed interrupt value of 8 since on PPC if we are + * using this its off an i8259 which we ensure has interrupt + * numbers 0..15. + */ + res[1].start = 8; + res[1].end = 8; + res[1].flags = IORESOURCE_IRQ; + num_res++; + } pd = platform_device_register_simple("rtc_cmos", -1, - &res[0], 2); + &res[0], num_res); if (IS_ERR(pd)) return PTR_ERR(pd); diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 2ed88122be9..8d41908e251 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -317,6 +317,9 @@ config ARCH_ENABLE_MEMORY_HOTPLUG def_bool y depends on SPARSEMEM +config ARCH_ENABLE_MEMORY_HOTREMOVE + def_bool y + source "mm/Kconfig" comment "I/O subsystem configuration" diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index d003a6e16af..328a20e880b 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -1732,3 +1732,40 @@ compat_sys_timerfd_gettime_wrapper: lgfr %r2,%r2 # int llgtr %r3,%r3 # struct compat_itimerspec * jg compat_sys_timerfd_gettime + + .globl compat_sys_signalfd4_wrapper +compat_sys_signalfd4_wrapper: + lgfr %r2,%r2 # int + llgtr %r3,%r3 # compat_sigset_t * + llgfr %r4,%r4 # compat_size_t + lgfr %r5,%r5 # int + jg compat_sys_signalfd4 + + .globl sys_eventfd2_wrapper +sys_eventfd2_wrapper: + llgfr %r2,%r2 # unsigned int + lgfr %r3,%r3 # int + jg sys_eventfd2 + + .globl sys_inotify_init1_wrapper +sys_inotify_init1_wrapper: + lgfr %r2,%r2 # int + jg sys_inotify_init1 + + .globl sys_pipe2_wrapper +sys_pipe2_wrapper: + llgtr %r2,%r2 # u32 * + lgfr %r3,%r3 # int + jg sys_pipe2 # branch to system call + + .globl sys_dup3_wrapper +sys_dup3_wrapper: + llgfr %r2,%r2 # unsigned int + llgfr %r3,%r3 # unsigned int + lgfr %r4,%r4 # int + jg sys_dup3 # branch to system call + + .globl sys_epoll_create1_wrapper +sys_epoll_create1_wrapper: + lgfr %r2,%r2 # int + jg sys_epoll_create1 # branch to system call diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 54b2779b5e2..2dcf590faba 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -1705,7 +1705,10 @@ void __init setup_ipl(void) void __init ipl_update_parameters(void) { - if (diag308(DIAG308_STORE, &ipl_block) == DIAG308_RC_OK) + int rc; + + rc = diag308(DIAG308_STORE, &ipl_block); + if ((rc == DIAG308_RC_OK) || (rc == DIAG308_RC_NOCONFIG)) diag308_set_works = 1; } diff --git a/arch/s390/kernel/mem_detect.c b/arch/s390/kernel/mem_detect.c index 18ed7abe16c..9872999c66d 100644 --- a/arch/s390/kernel/mem_detect.c +++ b/arch/s390/kernel/mem_detect.c @@ -9,27 +9,6 @@ #include <asm/sclp.h> #include <asm/setup.h> -static int memory_fast_detect(struct mem_chunk *chunk) -{ - unsigned long val0 = 0; - unsigned long val1 = 0xc; - int rc = -EOPNOTSUPP; - - if (ipl_flags & IPL_NSS_VALID) - return -EOPNOTSUPP; - asm volatile( - " diag %1,%2,0x260\n" - "0: lhi %0,0\n" - "1:\n" - EX_TABLE(0b,1b) - : "+d" (rc), "+d" (val0), "+d" (val1) : : "cc"); - - if (rc || val0 != val1) - return -EOPNOTSUPP; - chunk->size = val0 + 1; - return 0; -} - static inline int tprot(unsigned long addr) { int rc = -EFAULT; @@ -84,8 +63,6 @@ void detect_memory_layout(struct mem_chunk chunk[]) unsigned long flags, cr0; memset(chunk, 0, MEMORY_CHUNKS * sizeof(struct mem_chunk)); - if (memory_fast_detect(&chunk[0]) == 0) - return; /* Disable IRQs, DAT and low address protection so tprot does the * right thing and we don't get scheduled away with low address * protection disabled. diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index c87ec687d4c..c66d35e5514 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -330,3 +330,9 @@ SYSCALL(sys_eventfd,sys_eventfd,sys_eventfd_wrapper) SYSCALL(sys_timerfd_create,sys_timerfd_create,sys_timerfd_create_wrapper) SYSCALL(sys_timerfd_settime,sys_timerfd_settime,compat_sys_timerfd_settime_wrapper) /* 320 */ SYSCALL(sys_timerfd_gettime,sys_timerfd_gettime,compat_sys_timerfd_gettime_wrapper) +SYSCALL(sys_signalfd4,sys_signalfd4,compat_sys_signalfd4_wrapper) +SYSCALL(sys_eventfd2,sys_eventfd2,sys_eventfd2_wrapper) +SYSCALL(sys_inotify_init1,sys_inotify_init1,sys_inotify_init1_wrapper) +SYSCALL(sys_pipe2,sys_pipe2,sys_pipe2_wrapper) /* 325 */ +SYSCALL(sys_dup3,sys_dup3,sys_dup3_wrapper) +SYSCALL(sys_epoll_create1,sys_epoll_create1,sys_epoll_create1_wrapper) diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index ab70d9bd926..ca114fe46ff 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -1348,7 +1348,7 @@ early_param("stp", early_parse_stp); /* * Reset STP attachment. */ -static void stp_reset(void) +static void __init stp_reset(void) { int rc; diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c index eae21a8ac72..fc6ab6094df 100644 --- a/arch/s390/lib/delay.c +++ b/arch/s390/lib/delay.c @@ -43,7 +43,7 @@ void __udelay(unsigned long usecs) local_bh_disable(); local_irq_save(flags); if (raw_irqs_disabled_flags(flags)) { - old_cc = S390_lowcore.clock_comparator; + old_cc = local_tick_disable(); S390_lowcore.clock_comparator = -1ULL; __ctl_store(cr0, 0, 0); dummy = (cr0 & 0xffff00e0) | 0x00000800; @@ -65,7 +65,7 @@ void __udelay(unsigned long usecs) if (raw_irqs_disabled_flags(flags)) { __ctl_load(cr0, 0, 0); - S390_lowcore.clock_comparator = old_cc; + local_tick_enable(old_cc); } if (!irq_context) _local_bh_enable(); diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 4993b0f594e..1169130a97e 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -179,7 +179,7 @@ int arch_add_memory(int nid, u64 start, u64 size) int rc; pgdat = NODE_DATA(nid); - zone = pgdat->node_zones + ZONE_NORMAL; + zone = pgdat->node_zones + ZONE_MOVABLE; rc = vmem_add_mapping(start, size); if (rc) return rc; @@ -189,3 +189,14 @@ int arch_add_memory(int nid, u64 start, u64 size) return rc; } #endif /* CONFIG_MEMORY_HOTPLUG */ + +#ifdef CONFIG_MEMORY_HOTREMOVE +int remove_memory(u64 start, u64 size) +{ + unsigned long start_pfn, end_pfn; + + start_pfn = PFN_DOWN(start); + end_pfn = start_pfn + PFN_DOWN(size); + return offline_pages(start_pfn, end_pfn, 120 * HZ); +} +#endif /* CONFIG_MEMORY_HOTREMOVE */ diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 2b742206ba0..cb992c3d6b7 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -11,6 +11,7 @@ config SUPERH select HAVE_CLK select HAVE_IDE select HAVE_OPROFILE + select HAVE_GENERIC_DMA_COHERENT help The SuperH is a RISC processor targeted for use in embedded systems and consumer electronics; it was also used in the Sega Dreamcast diff --git a/arch/sh/include/asm/dma-mapping.h b/arch/sh/include/asm/dma-mapping.h index 6c0b8a2de14..627315ecdb5 100644 --- a/arch/sh/include/asm/dma-mapping.h +++ b/arch/sh/include/asm/dma-mapping.h @@ -5,6 +5,7 @@ #include <linux/scatterlist.h> #include <asm/cacheflush.h> #include <asm/io.h> +#include <asm-generic/dma-coherent.h> extern struct bus_type pci_bus_type; diff --git a/arch/sh/mm/consistent.c b/arch/sh/mm/consistent.c index 8277982d093..b2ce014401b 100644 --- a/arch/sh/mm/consistent.c +++ b/arch/sh/mm/consistent.c @@ -28,21 +28,10 @@ void *dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp) { void *ret, *ret_nocache; - struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; int order = get_order(size); - if (mem) { - int page = bitmap_find_free_region(mem->bitmap, mem->size, - order); - if (page >= 0) { - *dma_handle = mem->device_base + (page << PAGE_SHIFT); - ret = mem->virt_base + (page << PAGE_SHIFT); - memset(ret, 0, size); - return ret; - } - if (mem->flags & DMA_MEMORY_EXCLUSIVE) - return NULL; - } + if (dma_alloc_from_coherent(dev, size, dma_handle, &ret)) + return ret; ret = (void *)__get_free_pages(gfp, order); if (!ret) @@ -72,11 +61,7 @@ void dma_free_coherent(struct device *dev, size_t size, struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; int order = get_order(size); - if (mem && vaddr >= mem->virt_base && vaddr < (mem->virt_base + (mem->size << PAGE_SHIFT))) { - int page = (vaddr - mem->virt_base) >> PAGE_SHIFT; - - bitmap_release_region(mem->bitmap, page, order); - } else { + if (!dma_release_from_coherent(dev, order, vaddr)) { WARN_ON(irqs_disabled()); /* for portability */ BUG_ON(mem && mem->flags & DMA_MEMORY_EXCLUSIVE); free_pages((unsigned long)phys_to_virt(dma_handle), order); @@ -85,83 +70,6 @@ void dma_free_coherent(struct device *dev, size_t size, } EXPORT_SYMBOL(dma_free_coherent); -int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr, - dma_addr_t device_addr, size_t size, int flags) -{ - void __iomem *mem_base = NULL; - int pages = size >> PAGE_SHIFT; - int bitmap_size = BITS_TO_LONGS(pages) * sizeof(long); - - if ((flags & (DMA_MEMORY_MAP | DMA_MEMORY_IO)) == 0) - goto out; - if (!size) - goto out; - if (dev->dma_mem) - goto out; - - /* FIXME: this routine just ignores DMA_MEMORY_INCLUDES_CHILDREN */ - - mem_base = ioremap_nocache(bus_addr, size); - if (!mem_base) - goto out; - - dev->dma_mem = kmalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL); - if (!dev->dma_mem) - goto out; - dev->dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL); - if (!dev->dma_mem->bitmap) - goto free1_out; - - dev->dma_mem->virt_base = mem_base; - dev->dma_mem->device_base = device_addr; - dev->dma_mem->size = pages; - dev->dma_mem->flags = flags; - - if (flags & DMA_MEMORY_MAP) - return DMA_MEMORY_MAP; - - return DMA_MEMORY_IO; - - free1_out: - kfree(dev->dma_mem); - out: - if (mem_base) - iounmap(mem_base); - return 0; -} -EXPORT_SYMBOL(dma_declare_coherent_memory); - -void dma_release_declared_memory(struct device *dev) -{ - struct dma_coherent_mem *mem = dev->dma_mem; - - if (!mem) - return; - dev->dma_mem = NULL; - iounmap(mem->virt_base); - kfree(mem->bitmap); - kfree(mem); -} -EXPORT_SYMBOL(dma_release_declared_memory); - -void *dma_mark_declared_memory_occupied(struct device *dev, - dma_addr_t device_addr, size_t size) -{ - struct dma_coherent_mem *mem = dev->dma_mem; - int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1) >> PAGE_SHIFT; - int pos, err; - - if (!mem) - return ERR_PTR(-EINVAL); - - pos = (device_addr - mem->device_base) >> PAGE_SHIFT; - err = bitmap_allocate_region(mem->bitmap, pos, get_order(pages)); - if (err != 0) - return ERR_PTR(err); - return mem->virt_base + (pos << PAGE_SHIFT); -} -EXPORT_SYMBOL(dma_mark_declared_memory_occupied); - void dma_cache_sync(struct device *dev, void *vaddr, size_t size, enum dma_data_direction direction) { diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c index 5047490fc29..d741f35d7b3 100644 --- a/arch/um/drivers/line.c +++ b/arch/um/drivers/line.c @@ -362,19 +362,7 @@ static irqreturn_t line_write_interrupt(int irq, void *data) if (tty == NULL) return IRQ_NONE; - if (test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags) && - (tty->ldisc.write_wakeup != NULL)) - (tty->ldisc.write_wakeup)(tty); - - /* - * BLOCKING mode - * In blocking mode, everything sleeps on tty->write_wait. - * Sleeping in the console driver would break non-blocking - * writes. - */ - - if (waitqueue_active(&tty->write_wait)) - wake_up_interruptible(&tty->write_wait); + tty_wakeup(tty); return IRQ_HANDLED; } diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index b6fa2877b17..3d0f2b6a5a1 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -30,6 +30,7 @@ config X86 select HAVE_FTRACE select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) select HAVE_ARCH_KGDB if !X86_VOYAGER + select HAVE_GENERIC_DMA_COHERENT if X86_32 select HAVE_EFFICIENT_UNALIGNED_ACCESS config ARCH_DEFCONFIG diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index 9220cf46aa1..c2502eb9aa8 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c @@ -73,7 +73,6 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu, struct cpuinfo_x86 *c = &cpu_data(cpu); cpumask_t saved_mask; - cpumask_of_cpu_ptr(new_mask, cpu); int retval; unsigned int eax, ebx, ecx, edx; unsigned int edx_part; @@ -92,7 +91,7 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu, /* Make sure we are running on right CPU */ saved_mask = current->cpus_allowed; - retval = set_cpus_allowed_ptr(current, new_mask); + retval = set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); if (retval) return -1; diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 74697408576..22d7d050905 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -29,9 +29,6 @@ #define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28)) -#define to_pages(addr, size) \ - (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT) - #define EXIT_LOOP_COUNT 10000000 static DEFINE_RWLOCK(amd_iommu_devtable_lock); @@ -185,7 +182,7 @@ static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid, u64 address, size_t size) { int s = 0; - unsigned pages = to_pages(address, size); + unsigned pages = iommu_num_pages(address, size); address &= PAGE_MASK; @@ -557,8 +554,8 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu, if (iommu->exclusion_start && iommu->exclusion_start < dma_dom->aperture_size) { unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT; - int pages = to_pages(iommu->exclusion_start, - iommu->exclusion_length); + int pages = iommu_num_pages(iommu->exclusion_start, + iommu->exclusion_length); dma_ops_reserve_addresses(dma_dom, startpage, pages); } @@ -767,7 +764,7 @@ static dma_addr_t __map_single(struct device *dev, unsigned int pages; int i; - pages = to_pages(paddr, size); + pages = iommu_num_pages(paddr, size); paddr &= PAGE_MASK; address = dma_ops_alloc_addresses(dev, dma_dom, pages); @@ -802,7 +799,7 @@ static void __unmap_single(struct amd_iommu *iommu, if ((dma_addr == 0) || (dma_addr + size > dma_dom->aperture_size)) return; - pages = to_pages(dma_addr, size); + pages = iommu_num_pages(dma_addr, size); dma_addr &= PAGE_MASK; start = dma_addr; diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index ff2fff56f0a..dd097b83583 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -200,12 +200,10 @@ static void drv_read(struct drv_cmd *cmd) static void drv_write(struct drv_cmd *cmd) { cpumask_t saved_mask = current->cpus_allowed; - cpumask_of_cpu_ptr_declare(cpu_mask); unsigned int i; for_each_cpu_mask_nr(i, cmd->mask) { - cpumask_of_cpu_ptr_next(cpu_mask, i); - set_cpus_allowed_ptr(current, cpu_mask); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(i)); do_drv_write(cmd); } @@ -269,12 +267,11 @@ static unsigned int get_measured_perf(unsigned int cpu) } aperf_cur, mperf_cur; cpumask_t saved_mask; - cpumask_of_cpu_ptr(cpu_mask, cpu); unsigned int perf_percent; unsigned int retval; saved_mask = current->cpus_allowed; - set_cpus_allowed_ptr(current, cpu_mask); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); if (get_cpu() != cpu) { /* We were not able to run on requested processor */ put_cpu(); @@ -340,7 +337,6 @@ static unsigned int get_measured_perf(unsigned int cpu) static unsigned int get_cur_freq_on_cpu(unsigned int cpu) { - cpumask_of_cpu_ptr(cpu_mask, cpu); struct acpi_cpufreq_data *data = per_cpu(drv_data, cpu); unsigned int freq; unsigned int cached_freq; @@ -353,7 +349,7 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu) } cached_freq = data->freq_table[data->acpi_data->state].frequency; - freq = extract_freq(get_cur_val(cpu_mask), data); + freq = extract_freq(get_cur_val(&cpumask_of_cpu(cpu)), data); if (freq != cached_freq) { /* * The dreaded BIOS frequency change behind our back. diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 53c7b693697..c45ca6d4dce 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -479,12 +479,11 @@ static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvi static int check_supported_cpu(unsigned int cpu) { cpumask_t oldmask; - cpumask_of_cpu_ptr(cpu_mask, cpu); u32 eax, ebx, ecx, edx; unsigned int rc = 0; oldmask = current->cpus_allowed; - set_cpus_allowed_ptr(current, cpu_mask); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); if (smp_processor_id() != cpu) { printk(KERN_ERR PFX "limiting to cpu %u failed\n", cpu); @@ -1017,7 +1016,6 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned i static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsigned relation) { cpumask_t oldmask; - cpumask_of_cpu_ptr(cpu_mask, pol->cpu); struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu); u32 checkfid; u32 checkvid; @@ -1032,7 +1030,7 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi /* only run on specific CPU from here on */ oldmask = current->cpus_allowed; - set_cpus_allowed_ptr(current, cpu_mask); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(pol->cpu)); if (smp_processor_id() != pol->cpu) { printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); @@ -1107,7 +1105,6 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) { struct powernow_k8_data *data; cpumask_t oldmask; - cpumask_of_cpu_ptr_declare(newmask); int rc; if (!cpu_online(pol->cpu)) @@ -1159,8 +1156,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) /* only run on specific CPU from here on */ oldmask = current->cpus_allowed; - cpumask_of_cpu_ptr_next(newmask, pol->cpu); - set_cpus_allowed_ptr(current, newmask); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(pol->cpu)); if (smp_processor_id() != pol->cpu) { printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); @@ -1182,7 +1178,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) set_cpus_allowed_ptr(current, &oldmask); if (cpu_family == CPU_HW_PSTATE) - pol->cpus = *newmask; + pol->cpus = cpumask_of_cpu(pol->cpu); else pol->cpus = per_cpu(cpu_core_map, pol->cpu); data->available_cores = &(pol->cpus); @@ -1248,7 +1244,6 @@ static unsigned int powernowk8_get (unsigned int cpu) { struct powernow_k8_data *data; cpumask_t oldmask = current->cpus_allowed; - cpumask_of_cpu_ptr(newmask, cpu); unsigned int khz = 0; unsigned int first; @@ -1258,7 +1253,7 @@ static unsigned int powernowk8_get (unsigned int cpu) if (!data) return -EINVAL; - set_cpus_allowed_ptr(current, newmask); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); if (smp_processor_id() != cpu) { printk(KERN_ERR PFX "limiting to CPU %d failed in powernowk8_get\n", cpu); diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c index ca2ac13b7af..15e13c01cc3 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c @@ -324,10 +324,9 @@ static unsigned int get_cur_freq(unsigned int cpu) unsigned l, h; unsigned clock_freq; cpumask_t saved_mask; - cpumask_of_cpu_ptr(new_mask, cpu); saved_mask = current->cpus_allowed; - set_cpus_allowed_ptr(current, new_mask); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); if (smp_processor_id() != cpu) return 0; @@ -585,15 +584,12 @@ static int centrino_target (struct cpufreq_policy *policy, * Best effort undo.. */ - if (!cpus_empty(*covered_cpus)) { - cpumask_of_cpu_ptr_declare(new_mask); - + if (!cpus_empty(*covered_cpus)) for_each_cpu_mask_nr(j, *covered_cpus) { - cpumask_of_cpu_ptr_next(new_mask, j); - set_cpus_allowed_ptr(current, new_mask); + set_cpus_allowed_ptr(current, + &cpumask_of_cpu(j)); wrmsr(MSR_IA32_PERF_CTL, oldmsr, h); } - } tmp = freqs.new; freqs.new = freqs.old; diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c index 2f3728dc24f..191f7263c61 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c @@ -244,8 +244,7 @@ static unsigned int _speedstep_get(const cpumask_t *cpus) static unsigned int speedstep_get(unsigned int cpu) { - cpumask_of_cpu_ptr(newmask, cpu); - return _speedstep_get(newmask); + return _speedstep_get(&cpumask_of_cpu(cpu)); } /** diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 650d40f7912..6b0a10b002f 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -516,7 +516,6 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu) unsigned long j; int retval; cpumask_t oldmask; - cpumask_of_cpu_ptr(newmask, cpu); if (num_cache_leaves == 0) return -ENOENT; @@ -527,7 +526,7 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu) return -ENOMEM; oldmask = current->cpus_allowed; - retval = set_cpus_allowed_ptr(current, newmask); + retval = set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); if (retval) goto out; diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index 1fa8be5bd21..eaff0bbb144 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -99,3 +99,4 @@ int is_uv_system(void) { return uv_system_type != UV_NONE; } +EXPORT_SYMBOL_GPL(is_uv_system); diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index 3fee2aa50f3..b68e21f06f4 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -62,12 +62,10 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload) if (reload) { #ifdef CONFIG_SMP - cpumask_of_cpu_ptr_declare(mask); - preempt_disable(); load_LDT(pc); - cpumask_of_cpu_ptr_next(mask, smp_processor_id()); - if (!cpus_equal(current->mm->cpu_vm_mask, *mask)) + if (!cpus_equal(current->mm->cpu_vm_mask, + cpumask_of_cpu(smp_processor_id()))) smp_call_function(flush_ldt, current->mm, 1); preempt_enable(); #else diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c index 6994c751590..652fa5c38eb 100644 --- a/arch/x86/kernel/microcode.c +++ b/arch/x86/kernel/microcode.c @@ -388,7 +388,6 @@ static int do_microcode_update (void) void *new_mc = NULL; int cpu; cpumask_t old; - cpumask_of_cpu_ptr_declare(newmask); old = current->cpus_allowed; @@ -405,8 +404,7 @@ static int do_microcode_update (void) if (!uci->valid) continue; - cpumask_of_cpu_ptr_next(newmask, cpu); - set_cpus_allowed_ptr(current, newmask); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); error = get_maching_microcode(new_mc, cpu); if (error < 0) goto out; @@ -576,7 +574,6 @@ static int apply_microcode_check_cpu(int cpu) struct cpuinfo_x86 *c = &cpu_data(cpu); struct ucode_cpu_info *uci = ucode_cpu_info + cpu; cpumask_t old; - cpumask_of_cpu_ptr(newmask, cpu); unsigned int val[2]; int err = 0; @@ -585,7 +582,7 @@ static int apply_microcode_check_cpu(int cpu) return 0; old = current->cpus_allowed; - set_cpus_allowed_ptr(current, newmask); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); /* Check if the microcode we have in memory matches the CPU */ if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || @@ -623,12 +620,11 @@ static int apply_microcode_check_cpu(int cpu) static void microcode_init_cpu(int cpu, int resume) { cpumask_t old; - cpumask_of_cpu_ptr(newmask, cpu); struct ucode_cpu_info *uci = ucode_cpu_info + cpu; old = current->cpus_allowed; - set_cpus_allowed_ptr(current, newmask); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); mutex_lock(µcode_mutex); collect_cpu_info(cpu); if (uci->valid && system_state == SYSTEM_RUNNING && !resume) @@ -661,13 +657,10 @@ static ssize_t reload_store(struct sys_device *dev, if (end == buf) return -EINVAL; if (val == 1) { - cpumask_t old; - cpumask_of_cpu_ptr(newmask, cpu); - - old = current->cpus_allowed; + cpumask_t old = current->cpus_allowed; get_online_cpus(); - set_cpus_allowed_ptr(current, newmask); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); mutex_lock(µcode_mutex); if (uci->valid) diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 37544123896..87d4d6964ec 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -123,6 +123,14 @@ void __init pci_iommu_alloc(void) pci_swiotlb_init(); } + +unsigned long iommu_num_pages(unsigned long addr, unsigned long len) +{ + unsigned long size = roundup((addr & ~PAGE_MASK) + len, PAGE_SIZE); + + return size >> PAGE_SHIFT; +} +EXPORT_SYMBOL(iommu_num_pages); #endif /* @@ -192,124 +200,6 @@ static __init int iommu_setup(char *p) } early_param("iommu", iommu_setup); -#ifdef CONFIG_X86_32 -int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr, - dma_addr_t device_addr, size_t size, int flags) -{ - void __iomem *mem_base = NULL; - int pages = size >> PAGE_SHIFT; - int bitmap_size = BITS_TO_LONGS(pages) * sizeof(long); - - if ((flags & (DMA_MEMORY_MAP | DMA_MEMORY_IO)) == 0) - goto out; - if (!size) - goto out; - if (dev->dma_mem) - goto out; - - /* FIXME: this routine just ignores DMA_MEMORY_INCLUDES_CHILDREN */ - - mem_base = ioremap(bus_addr, size); - if (!mem_base) - goto out; - - dev->dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL); - if (!dev->dma_mem) - goto out; - dev->dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL); - if (!dev->dma_mem->bitmap) - goto free1_out; - - dev->dma_mem->virt_base = mem_base; - dev->dma_mem->device_base = device_addr; - dev->dma_mem->size = pages; - dev->dma_mem->flags = flags; - - if (flags & DMA_MEMORY_MAP) - return DMA_MEMORY_MAP; - - return DMA_MEMORY_IO; - - free1_out: - kfree(dev->dma_mem); - out: - if (mem_base) - iounmap(mem_base); - return 0; -} -EXPORT_SYMBOL(dma_declare_coherent_memory); - -void dma_release_declared_memory(struct device *dev) -{ - struct dma_coherent_mem *mem = dev->dma_mem; - - if (!mem) - return; - dev->dma_mem = NULL; - iounmap(mem->virt_base); - kfree(mem->bitmap); - kfree(mem); -} -EXPORT_SYMBOL(dma_release_declared_memory); - -void *dma_mark_declared_memory_occupied(struct device *dev, - dma_addr_t device_addr, size_t size) -{ - struct dma_coherent_mem *mem = dev->dma_mem; - int pos, err; - int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1); - - pages >>= PAGE_SHIFT; - - if (!mem) - return ERR_PTR(-EINVAL); - - pos = (device_addr - mem->device_base) >> PAGE_SHIFT; - err = bitmap_allocate_region(mem->bitmap, pos, get_order(pages)); - if (err != 0) - return ERR_PTR(err); - return mem->virt_base + (pos << PAGE_SHIFT); -} -EXPORT_SYMBOL(dma_mark_declared_memory_occupied); - -static int dma_alloc_from_coherent_mem(struct device *dev, ssize_t size, - dma_addr_t *dma_handle, void **ret) -{ - struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; - int order = get_order(size); - - if (mem) { - int page = bitmap_find_free_region(mem->bitmap, mem->size, - order); - if (page >= 0) { - *dma_handle = mem->device_base + (page << PAGE_SHIFT); - *ret = mem->virt_base + (page << PAGE_SHIFT); - memset(*ret, 0, size); - } - if (mem->flags & DMA_MEMORY_EXCLUSIVE) - *ret = NULL; - } - return (mem != NULL); -} - -static int dma_release_coherent(struct device *dev, int order, void *vaddr) -{ - struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; - - if (mem && vaddr >= mem->virt_base && vaddr < - (mem->virt_base + (mem->size << PAGE_SHIFT))) { - int page = (vaddr - mem->virt_base) >> PAGE_SHIFT; - - bitmap_release_region(mem->bitmap, page, order); - return 1; - } - return 0; -} -#else -#define dma_alloc_from_coherent_mem(dev, size, handle, ret) (0) -#define dma_release_coherent(dev, order, vaddr) (0) -#endif /* CONFIG_X86_32 */ - int dma_supported(struct device *dev, u64 mask) { struct dma_mapping_ops *ops = get_dma_ops(dev); @@ -379,7 +269,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, /* ignore region specifiers */ gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); - if (dma_alloc_from_coherent_mem(dev, size, dma_handle, &memory)) + if (dma_alloc_from_coherent(dev, size, dma_handle, &memory)) return memory; if (!dev) { @@ -484,7 +374,7 @@ void dma_free_coherent(struct device *dev, size_t size, int order = get_order(size); WARN_ON(irqs_disabled()); /* for portability */ - if (dma_release_coherent(dev, order, vaddr)) + if (dma_release_from_coherent(dev, order, vaddr)) return; if (ops->unmap_single) ops->unmap_single(dev, bus, size, 0); diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 744126e6495..49285f8fd4d 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -67,9 +67,6 @@ static u32 gart_unmapped_entry; (((x) & 0xfffff000) | (((x) >> 32) << 4) | GPTE_VALID | GPTE_COHERENT) #define GPTE_DECODE(x) (((x) & 0xfffff000) | (((u64)(x) & 0xff0) << 28)) -#define to_pages(addr, size) \ - (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT) - #define EMERGENCY_PAGES 32 /* = 128KB */ #ifdef CONFIG_AGP @@ -241,7 +238,7 @@ nonforced_iommu(struct device *dev, unsigned long addr, size_t size) static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, size_t size, int dir) { - unsigned long npages = to_pages(phys_mem, size); + unsigned long npages = iommu_num_pages(phys_mem, size); unsigned long iommu_page = alloc_iommu(dev, npages); int i; @@ -304,7 +301,7 @@ static void gart_unmap_single(struct device *dev, dma_addr_t dma_addr, return; iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT; - npages = to_pages(dma_addr, size); + npages = iommu_num_pages(dma_addr, size); for (i = 0; i < npages; i++) { iommu_gatt_base[iommu_page + i] = gart_unmapped_entry; CLEAR_LEAK(iommu_page + i); @@ -387,7 +384,7 @@ static int __dma_map_cont(struct device *dev, struct scatterlist *start, } addr = phys_addr; - pages = to_pages(s->offset, s->length); + pages = iommu_num_pages(s->offset, s->length); while (pages--) { iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr); SET_LEAK(iommu_page); @@ -470,7 +467,7 @@ gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) seg_size += s->length; need = nextneed; - pages += to_pages(s->offset, s->length); + pages += iommu_num_pages(s->offset, s->length); ps = s; } if (dma_map_cont(dev, start_sg, i - start, sgmap, pages, need) < 0) diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 06a9f643817..724adfc63cb 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -414,25 +414,20 @@ void native_machine_shutdown(void) /* The boot cpu is always logical cpu 0 */ int reboot_cpu_id = 0; - cpumask_of_cpu_ptr(newmask, reboot_cpu_id); #ifdef CONFIG_X86_32 /* See if there has been given a command line override */ if ((reboot_cpu != -1) && (reboot_cpu < NR_CPUS) && - cpu_online(reboot_cpu)) { + cpu_online(reboot_cpu)) reboot_cpu_id = reboot_cpu; - cpumask_of_cpu_ptr_next(newmask, reboot_cpu_id); - } #endif /* Make certain the cpu I'm about to reboot on is online */ - if (!cpu_online(reboot_cpu_id)) { + if (!cpu_online(reboot_cpu_id)) reboot_cpu_id = smp_processor_id(); - cpumask_of_cpu_ptr_next(newmask, reboot_cpu_id); - } /* Make certain I only run on the appropriate processor */ - set_cpus_allowed_ptr(current, newmask); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(reboot_cpu_id)); /* O.K Now that I'm on the appropriate processor, * stop all of the others. diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index b520dae02bf..2d888586385 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -788,10 +788,6 @@ void __init setup_arch(char **cmdline_p) initmem_init(0, max_pfn); -#ifdef CONFIG_X86_64 - dma32_reserve_bootmem(); -#endif - #ifdef CONFIG_ACPI_SLEEP /* * Reserve low memory region for sleep support. @@ -806,6 +802,15 @@ void __init setup_arch(char **cmdline_p) #endif reserve_crashkernel(); +#ifdef CONFIG_X86_64 + /* + * dma32_reserve_bootmem() allocates bootmem which may conflict + * with the crashkernel command line, so do that after + * reserve_crashkernel() + */ + dma32_reserve_bootmem(); +#endif + reserve_ibft_region(); #ifdef CONFIG_KVM_CLOCK diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index f7745f94c00..76e305e064f 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -80,24 +80,6 @@ static void __init setup_per_cpu_maps(void) #endif } -#ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP -cpumask_t *cpumask_of_cpu_map __read_mostly; -EXPORT_SYMBOL(cpumask_of_cpu_map); - -/* requires nr_cpu_ids to be initialized */ -static void __init setup_cpumask_of_cpu(void) -{ - int i; - - /* alloc_bootmem zeroes memory */ - cpumask_of_cpu_map = alloc_bootmem_low(sizeof(cpumask_t) * nr_cpu_ids); - for (i = 0; i < nr_cpu_ids; i++) - cpu_set(i, cpumask_of_cpu_map[i]); -} -#else -static inline void setup_cpumask_of_cpu(void) { } -#endif - #ifdef CONFIG_X86_32 /* * Great future not-so-futuristic plan: make i386 and x86_64 do it @@ -197,9 +179,6 @@ void __init setup_per_cpu_areas(void) /* Setup node to cpumask map */ setup_node_to_cpumask_map(); - - /* Setup cpumask_of_cpu map */ - setup_cpumask_of_cpu(); } #endif diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 8d45fabc5f3..ce3251ce550 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -21,6 +21,7 @@ config KVM tristate "Kernel-based Virtual Machine (KVM) support" depends on HAVE_KVM select PREEMPT_NOTIFIERS + select MMU_NOTIFIER select ANON_INODES ---help--- Support hosting fully virtualized guest machines using hardware diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 0313a5eec41..d9249a882aa 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -1014,6 +1014,9 @@ __init void lguest_init(void) init_pg_tables_start = __pa(pg0); init_pg_tables_end = __pa(pg0); + /* As described in head_32.S, we map the first 128M of memory. */ + max_pfn_mapped = (128*1024*1024) >> PAGE_SHIFT; + /* Load the %fs segment register (the per-cpu segment register) with * the normal data segment to get through booting. */ asm volatile ("mov %0, %%fs" : : "r" (__KERNEL_DS) : "memory"); diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index dfdf428975c..f118c110af3 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -52,7 +52,7 @@ jnz 100b 102: .section .fixup,"ax" -103: addl %r8d,%edx /* ecx is zerorest also */ +103: addl %ecx,%edx /* ecx is zerorest also */ jmp copy_user_handle_tail .previous diff --git a/arch/x86/lib/copy_user_nocache_64.S b/arch/x86/lib/copy_user_nocache_64.S index 40e0e309d27..cb0c112386f 100644 --- a/arch/x86/lib/copy_user_nocache_64.S +++ b/arch/x86/lib/copy_user_nocache_64.S @@ -32,7 +32,7 @@ jnz 100b 102: .section .fixup,"ax" -103: addl %r8d,%edx /* ecx is zerorest also */ +103: addl %ecx,%edx /* ecx is zerorest also */ jmp copy_user_handle_tail .previous @@ -108,7 +108,6 @@ ENTRY(__copy_user_nocache) jmp 60f 50: movl %ecx,%edx 60: sfence - movl %r8d,%ecx jmp copy_user_handle_tail .previous diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index 3085f25b435..007bb06c750 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c @@ -223,14 +223,17 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write, struct page **pages) { struct mm_struct *mm = current->mm; - unsigned long end = start + (nr_pages << PAGE_SHIFT); - unsigned long addr = start; + unsigned long addr, len, end; unsigned long next; pgd_t *pgdp; int nr = 0; + start &= PAGE_MASK; + addr = start; + len = (unsigned long) nr_pages << PAGE_SHIFT; + end = start + len; if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ, - start, nr_pages*PAGE_SIZE))) + start, len))) goto slow_irqon; /* diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index ff3a6a33634..4bdaa590375 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -23,7 +23,8 @@ static void __devinit pci_fixup_i450nx(struct pci_dev *d) pci_read_config_byte(d, reg++, &busno); pci_read_config_byte(d, reg++, &suba); pci_read_config_byte(d, reg++, &subb); - DBG("i450NX PXB %d: %02x/%02x/%02x\n", pxb, busno, suba, subb); + dev_dbg(&d->dev, "i450NX PXB %d: %02x/%02x/%02x\n", pxb, busno, + suba, subb); if (busno) pci_scan_bus_with_sysdata(busno); /* Bus A */ if (suba < subb) diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index a09505806b8..5807d1bc73f 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c @@ -128,10 +128,8 @@ static void __init pcibios_allocate_bus_resources(struct list_head *bus_list) pr = pci_find_parent_resource(dev, r); if (!r->start || !pr || request_resource(pr, r) < 0) { - printk(KERN_ERR "PCI: Cannot allocate " - "resource region %d " - "of bridge %s\n", - idx, pci_name(dev)); + dev_err(&dev->dev, "BAR %d: can't " + "allocate resource\n", idx); /* * Something is wrong with the region. * Invalidate the resource to prevent @@ -166,15 +164,15 @@ static void __init pcibios_allocate_resources(int pass) else disabled = !(command & PCI_COMMAND_MEMORY); if (pass == disabled) { - DBG("PCI: Resource %08lx-%08lx " - "(f=%lx, d=%d, p=%d)\n", - r->start, r->end, r->flags, disabled, pass); + dev_dbg(&dev->dev, "resource %#08llx-%#08llx " + "(f=%lx, d=%d, p=%d)\n", + (unsigned long long) r->start, + (unsigned long long) r->end, + r->flags, disabled, pass); pr = pci_find_parent_resource(dev, r); if (!pr || request_resource(pr, r) < 0) { - printk(KERN_ERR "PCI: Cannot allocate " - "resource region %d " - "of device %s\n", - idx, pci_name(dev)); + dev_err(&dev->dev, "BAR %d: can't " + "allocate resource\n", idx); /* We'll assign a new address later */ r->end -= r->start; r->start = 0; @@ -187,8 +185,7 @@ static void __init pcibios_allocate_resources(int pass) /* Turn the ROM off, leave the resource region, * but keep it unregistered. */ u32 reg; - DBG("PCI: Switching off ROM of %s\n", - pci_name(dev)); + dev_dbg(&dev->dev, "disabling ROM\n"); r->flags &= ~IORESOURCE_ROM_ENABLE; pci_read_config_dword(dev, dev->rom_base_reg, ®); @@ -257,8 +254,7 @@ void pcibios_set_master(struct pci_dev *dev) lat = pcibios_max_latency; else return; - printk(KERN_DEBUG "PCI: Setting latency timer of device %s to %d\n", - pci_name(dev), lat); + dev_printk(KERN_DEBUG, &dev->dev, "setting latency timer to %d\n", lat); pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat); } diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index 6a06a2eb059..fec0123b33a 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c @@ -436,7 +436,7 @@ static int pirq_vlsi_get(struct pci_dev *router, struct pci_dev *dev, int pirq) { WARN_ON_ONCE(pirq >= 9); if (pirq > 8) { - printk(KERN_INFO "VLSI router pirq escape (%d)\n", pirq); + dev_info(&dev->dev, "VLSI router PIRQ escape (%d)\n", pirq); return 0; } return read_config_nybble(router, 0x74, pirq-1); @@ -446,7 +446,7 @@ static int pirq_vlsi_set(struct pci_dev *router, struct pci_dev *dev, int pirq, { WARN_ON_ONCE(pirq >= 9); if (pirq > 8) { - printk(KERN_INFO "VLSI router pirq escape (%d)\n", pirq); + dev_info(&dev->dev, "VLSI router PIRQ escape (%d)\n", pirq); return 0; } write_config_nybble(router, 0x74, pirq-1, irq); @@ -492,15 +492,17 @@ static int pirq_amd756_get(struct pci_dev *router, struct pci_dev *dev, int pirq irq = 0; if (pirq <= 4) irq = read_config_nybble(router, 0x56, pirq - 1); - printk(KERN_INFO "AMD756: dev %04x:%04x, router pirq : %d get irq : %2d\n", - dev->vendor, dev->device, pirq, irq); + dev_info(&dev->dev, + "AMD756: dev [%04x/%04x], router PIRQ %d get IRQ %d\n", + dev->vendor, dev->device, pirq, irq); return irq; } static int pirq_amd756_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) { - printk(KERN_INFO "AMD756: dev %04x:%04x, router pirq : %d SET irq : %2d\n", - dev->vendor, dev->device, pirq, irq); + dev_info(&dev->dev, + "AMD756: dev [%04x/%04x], router PIRQ %d set IRQ %d\n", + dev->vendor, dev->device, pirq, irq); if (pirq <= 4) write_config_nybble(router, 0x56, pirq - 1, irq); return 1; @@ -730,7 +732,6 @@ static __init int ali_router_probe(struct irq_router *r, struct pci_dev *router, switch (device) { case PCI_DEVICE_ID_AL_M1533: case PCI_DEVICE_ID_AL_M1563: - printk(KERN_DEBUG "PCI: Using ALI IRQ Router\n"); r->name = "ALI"; r->get = pirq_ali_get; r->set = pirq_ali_set; @@ -840,11 +841,9 @@ static void __init pirq_find_router(struct irq_router *r) h->probe(r, pirq_router_dev, pirq_router_dev->device)) break; } - printk(KERN_INFO "PCI: Using IRQ router %s [%04x/%04x] at %s\n", - pirq_router.name, - pirq_router_dev->vendor, - pirq_router_dev->device, - pci_name(pirq_router_dev)); + dev_info(&pirq_router_dev->dev, "%s IRQ router [%04x/%04x]\n", + pirq_router.name, + pirq_router_dev->vendor, pirq_router_dev->device); /* The device remains referenced for the kernel lifetime */ } @@ -877,7 +876,7 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) /* Find IRQ pin */ pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); if (!pin) { - DBG(KERN_DEBUG " -> no interrupt pin\n"); + dev_dbg(&dev->dev, "no interrupt pin\n"); return 0; } pin = pin - 1; @@ -887,20 +886,20 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) if (!pirq_table) return 0; - DBG(KERN_DEBUG "IRQ for %s[%c]", pci_name(dev), 'A' + pin); info = pirq_get_info(dev); if (!info) { - DBG(" -> not found in routing table\n" KERN_DEBUG); + dev_dbg(&dev->dev, "PCI INT %c not found in routing table\n", + 'A' + pin); return 0; } pirq = info->irq[pin].link; mask = info->irq[pin].bitmap; if (!pirq) { - DBG(" -> not routed\n" KERN_DEBUG); + dev_dbg(&dev->dev, "PCI INT %c not routed\n", 'A' + pin); return 0; } - DBG(" -> PIRQ %02x, mask %04x, excl %04x", pirq, mask, - pirq_table->exclusive_irqs); + dev_dbg(&dev->dev, "PCI INT %c -> PIRQ %02x, mask %04x, excl %04x", + 'A' + pin, pirq, mask, pirq_table->exclusive_irqs); mask &= pcibios_irq_mask; /* Work around broken HP Pavilion Notebooks which assign USB to @@ -930,10 +929,8 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) if (pci_probe & PCI_USE_PIRQ_MASK) newirq = 0; else - printk("\n" KERN_WARNING - "PCI: IRQ %i for device %s doesn't match PIRQ mask - try pci=usepirqmask\n" - KERN_DEBUG, newirq, - pci_name(dev)); + dev_warn(&dev->dev, "IRQ %d doesn't match PIRQ mask " + "%#x; try pci=usepirqmask\n", newirq, mask); } if (!newirq && assign) { for (i = 0; i < 16; i++) { @@ -944,39 +941,35 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) newirq = i; } } - DBG(" -> newirq=%d", newirq); + dev_dbg(&dev->dev, "PCI INT %c -> newirq %d", 'A' + pin, newirq); /* Check if it is hardcoded */ if ((pirq & 0xf0) == 0xf0) { irq = pirq & 0xf; - DBG(" -> hardcoded IRQ %d\n", irq); - msg = "Hardcoded"; + msg = "hardcoded"; } else if (r->get && (irq = r->get(pirq_router_dev, dev, pirq)) && \ ((!(pci_probe & PCI_USE_PIRQ_MASK)) || ((1 << irq) & mask))) { - DBG(" -> got IRQ %d\n", irq); - msg = "Found"; + msg = "found"; eisa_set_level_irq(irq); } else if (newirq && r->set && (dev->class >> 8) != PCI_CLASS_DISPLAY_VGA) { - DBG(" -> assigning IRQ %d", newirq); if (r->set(pirq_router_dev, dev, pirq, newirq)) { eisa_set_level_irq(newirq); - DBG(" ... OK\n"); - msg = "Assigned"; + msg = "assigned"; irq = newirq; } } if (!irq) { - DBG(" ... failed\n"); if (newirq && mask == (1 << newirq)) { - msg = "Guessed"; + msg = "guessed"; irq = newirq; - } else + } else { + dev_dbg(&dev->dev, "can't route interrupt\n"); return 0; + } } - printk(KERN_INFO "PCI: %s IRQ %d for device %s\n", msg, irq, - pci_name(dev)); + dev_info(&dev->dev, "%s PCI INT %c -> IRQ %d\n", msg, 'A' + pin, irq); /* Update IRQ for all devices with the same pirq value */ while ((dev2 = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev2)) != NULL) { @@ -996,17 +989,17 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) (!(pci_probe & PCI_USE_PIRQ_MASK) || \ ((1 << dev2->irq) & mask))) { #ifndef CONFIG_PCI_MSI - printk(KERN_INFO "IRQ routing conflict for %s, have irq %d, want irq %d\n", - pci_name(dev2), dev2->irq, irq); + dev_info(&dev2->dev, "IRQ routing conflict: " + "have IRQ %d, want IRQ %d\n", + dev2->irq, irq); #endif continue; } dev2->irq = irq; pirq_penalty[irq]++; if (dev != dev2) - printk(KERN_INFO - "PCI: Sharing IRQ %d with %s\n", - irq, pci_name(dev2)); + dev_info(&dev->dev, "sharing IRQ %d with %s\n", + irq, pci_name(dev2)); } } return 1; @@ -1025,8 +1018,7 @@ static void __init pcibios_fixup_irqs(void) * already in use. */ if (dev->irq >= 16) { - DBG(KERN_DEBUG "%s: ignoring bogus IRQ %d\n", - pci_name(dev), dev->irq); + dev_dbg(&dev->dev, "ignoring bogus IRQ %d\n", dev->irq); dev->irq = 0; } /* @@ -1070,12 +1062,12 @@ static void __init pcibios_fixup_irqs(void) irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, PCI_SLOT(bridge->devfn), pin); if (irq >= 0) - printk(KERN_WARNING "PCI: using PPB %s[%c] to get irq %d\n", - pci_name(bridge), 'A' + pin, irq); + dev_warn(&dev->dev, "using bridge %s INT %c to get IRQ %d\n", + pci_name(bridge), + 'A' + pin, irq); } if (irq >= 0) { - printk(KERN_INFO "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n", - pci_name(dev), 'A' + pin, irq); + dev_info(&dev->dev, "PCI->APIC IRQ transform: INT %c -> IRQ %d\n", 'A' + pin, irq); dev->irq = irq; } } @@ -1231,25 +1223,24 @@ static int pirq_enable_irq(struct pci_dev *dev) irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, PCI_SLOT(bridge->devfn), pin); if (irq >= 0) - printk(KERN_WARNING - "PCI: using PPB %s[%c] to get irq %d\n", - pci_name(bridge), - 'A' + pin, irq); + dev_warn(&dev->dev, "using bridge %s " + "INT %c to get IRQ %d\n", + pci_name(bridge), 'A' + pin, + irq); dev = bridge; } dev = temp_dev; if (irq >= 0) { - printk(KERN_INFO - "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n", - pci_name(dev), 'A' + pin, irq); + dev_info(&dev->dev, "PCI->APIC IRQ transform: " + "INT %c -> IRQ %d\n", 'A' + pin, irq); dev->irq = irq; return 0; } else - msg = " Probably buggy MP table."; + msg = "; probably buggy MP table"; } else if (pci_probe & PCI_BIOS_IRQ_SCAN) msg = ""; else - msg = " Please try using pci=biosirq."; + msg = "; please try using pci=biosirq"; /* * With IDE legacy devices the IRQ lookup failure is not @@ -1259,9 +1250,8 @@ static int pirq_enable_irq(struct pci_dev *dev) !(dev->class & 0x5)) return 0; - printk(KERN_WARNING - "PCI: No IRQ known for interrupt pin %c of device %s.%s\n", - 'A' + pin, pci_name(dev), msg); + dev_warn(&dev->dev, "can't find IRQ for PCI INT %c%s\n", + 'A' + pin, msg); } return 0; } diff --git a/arch/x86/pci/numaq_32.c b/arch/x86/pci/numaq_32.c index f4b16dc11da..1177845d318 100644 --- a/arch/x86/pci/numaq_32.c +++ b/arch/x86/pci/numaq_32.c @@ -131,13 +131,14 @@ static void __devinit pci_fixup_i450nx(struct pci_dev *d) u8 busno, suba, subb; int quad = BUS2QUAD(d->bus->number); - printk("PCI: Searching for i450NX host bridges on %s\n", pci_name(d)); + dev_info(&d->dev, "searching for i450NX host bridges\n"); reg = 0xd0; for(pxb=0; pxb<2; pxb++) { pci_read_config_byte(d, reg++, &busno); pci_read_config_byte(d, reg++, &suba); pci_read_config_byte(d, reg++, &subb); - DBG("i450NX PXB %d: %02x/%02x/%02x\n", pxb, busno, suba, subb); + dev_dbg(&d->dev, "i450NX PXB %d: %02x/%02x/%02x\n", + pxb, busno, suba, subb); if (busno) { /* Bus A */ pci_scan_bus_with_sysdata(QUADLOCAL2BUS(quad, busno)); diff --git a/drivers/acpi/pci_slot.c b/drivers/acpi/pci_slot.c index dd376f7ad09..d5b4ef89887 100644 --- a/drivers/acpi/pci_slot.c +++ b/drivers/acpi/pci_slot.c @@ -76,9 +76,9 @@ static struct acpi_pci_driver acpi_pci_slot_driver = { }; static int -check_slot(acpi_handle handle, int *device, unsigned long *sun) +check_slot(acpi_handle handle, unsigned long *sun) { - int retval = 0; + int device = -1; unsigned long adr, sta; acpi_status status; struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; @@ -89,32 +89,27 @@ check_slot(acpi_handle handle, int *device, unsigned long *sun) if (check_sta_before_sun) { /* If SxFy doesn't have _STA, we just assume it's there */ status = acpi_evaluate_integer(handle, "_STA", NULL, &sta); - if (ACPI_SUCCESS(status) && !(sta & ACPI_STA_DEVICE_PRESENT)) { - retval = -1; + if (ACPI_SUCCESS(status) && !(sta & ACPI_STA_DEVICE_PRESENT)) goto out; - } } status = acpi_evaluate_integer(handle, "_ADR", NULL, &adr); if (ACPI_FAILURE(status)) { dbg("_ADR returned %d on %s\n", status, (char *)buffer.pointer); - retval = -1; goto out; } - *device = (adr >> 16) & 0xffff; - /* No _SUN == not a slot == bail */ status = acpi_evaluate_integer(handle, "_SUN", NULL, sun); if (ACPI_FAILURE(status)) { dbg("_SUN returned %d on %s\n", status, (char *)buffer.pointer); - retval = -1; goto out; } + device = (adr >> 16) & 0xffff; out: kfree(buffer.pointer); - return retval; + return device; } struct callback_args { @@ -144,7 +139,8 @@ register_slot(acpi_handle handle, u32 lvl, void *context, void **rv) struct callback_args *parent_context = context; struct pci_bus *pci_bus = parent_context->pci_bus; - if (check_slot(handle, &device, &sun)) + device = check_slot(handle, &sun); + if (device < 0) return AE_OK; slot = kmalloc(sizeof(*slot), GFP_KERNEL); diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c index b4749969c6b..0133af49cf0 100644 --- a/drivers/acpi/processor_perflib.c +++ b/drivers/acpi/processor_perflib.c @@ -64,7 +64,13 @@ static DEFINE_MUTEX(performance_mutex); * policy is adjusted accordingly. */ -static unsigned int ignore_ppc = 0; +/* ignore_ppc: + * -1 -> cpufreq low level drivers not initialized -> _PSS, etc. not called yet + * ignore _PPC + * 0 -> cpufreq low level drivers initialized -> consider _PPC values + * 1 -> ignore _PPC totally -> forced by user through boot param + */ +static unsigned int ignore_ppc = -1; module_param(ignore_ppc, uint, 0644); MODULE_PARM_DESC(ignore_ppc, "If the frequency of your machine gets wrongly" \ "limited by BIOS, this should help"); @@ -72,7 +78,7 @@ MODULE_PARM_DESC(ignore_ppc, "If the frequency of your machine gets wrongly" \ #define PPC_REGISTERED 1 #define PPC_IN_USE 2 -static int acpi_processor_ppc_status = 0; +static int acpi_processor_ppc_status; static int acpi_processor_ppc_notifier(struct notifier_block *nb, unsigned long event, void *data) @@ -81,13 +87,18 @@ static int acpi_processor_ppc_notifier(struct notifier_block *nb, struct acpi_processor *pr; unsigned int ppc = 0; - if (ignore_ppc) + if (event == CPUFREQ_START && ignore_ppc <= 0) { + ignore_ppc = 0; return 0; + } - mutex_lock(&performance_mutex); + if (ignore_ppc) + return 0; if (event != CPUFREQ_INCOMPATIBLE) - goto out; + return 0; + + mutex_lock(&performance_mutex); pr = per_cpu(processors, policy->cpu); if (!pr || !pr->performance) diff --git a/drivers/acpi/processor_throttling.c b/drivers/acpi/processor_throttling.c index a2c3f9cfa54..a56fc6c4394 100644 --- a/drivers/acpi/processor_throttling.c +++ b/drivers/acpi/processor_throttling.c @@ -827,7 +827,6 @@ static int acpi_processor_get_throttling_ptc(struct acpi_processor *pr) static int acpi_processor_get_throttling(struct acpi_processor *pr) { cpumask_t saved_mask; - cpumask_of_cpu_ptr_declare(new_mask); int ret; if (!pr) @@ -839,8 +838,7 @@ static int acpi_processor_get_throttling(struct acpi_processor *pr) * Migrate task to the cpu pointed by pr. */ saved_mask = current->cpus_allowed; - cpumask_of_cpu_ptr_next(new_mask, pr->id); - set_cpus_allowed_ptr(current, new_mask); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(pr->id)); ret = pr->throttling.acpi_processor_get_throttling(pr); /* restore the previous state */ set_cpus_allowed_ptr(current, &saved_mask); @@ -989,7 +987,6 @@ static int acpi_processor_set_throttling_ptc(struct acpi_processor *pr, int acpi_processor_set_throttling(struct acpi_processor *pr, int state) { cpumask_t saved_mask; - cpumask_of_cpu_ptr_declare(new_mask); int ret = 0; unsigned int i; struct acpi_processor *match_pr; @@ -1028,8 +1025,7 @@ int acpi_processor_set_throttling(struct acpi_processor *pr, int state) * it can be called only for the cpu pointed by pr. */ if (p_throttling->shared_type == DOMAIN_COORD_TYPE_SW_ANY) { - cpumask_of_cpu_ptr_next(new_mask, pr->id); - set_cpus_allowed_ptr(current, new_mask); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(pr->id)); ret = p_throttling->acpi_processor_set_throttling(pr, t_state.target_state); } else { @@ -1060,8 +1056,7 @@ int acpi_processor_set_throttling(struct acpi_processor *pr, int state) continue; } t_state.cpu = i; - cpumask_of_cpu_ptr_next(new_mask, i); - set_cpus_allowed_ptr(current, new_mask); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(i)); ret = match_pr->throttling. acpi_processor_set_throttling( match_pr, t_state.target_state); diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index 192688344ed..f52931e1c16 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c @@ -66,8 +66,8 @@ #include <linux/ctype.h> #ifdef CONFIG_PPC_OF -#include <asm/of_device.h> -#include <asm/of_platform.h> +#include <linux/of_device.h> +#include <linux/of_platform.h> #endif #define PFX "ipmi_si: " diff --git a/drivers/char/mxser.c b/drivers/char/mxser.c index e30575e8764..b638403e8e9 100644 --- a/drivers/char/mxser.c +++ b/drivers/char/mxser.c @@ -1612,8 +1612,10 @@ static int mxser_ioctl_special(unsigned int cmd, void __user *argp) switch (cmd) { case MOXA_GET_MAJOR: - printk(KERN_WARNING "mxser: '%s' uses deprecated ioctl %x, fix " - "your userspace\n", current->comm, cmd); + if (printk_ratelimit()) + printk(KERN_WARNING "mxser: '%s' uses deprecated ioctl " + "%x (GET_MAJOR), fix your userspace\n", + current->comm, cmd); return put_user(ttymajor, (int __user *)argp); case MOXA_CHKPORTENABLE: diff --git a/drivers/char/pcmcia/ipwireless/hardware.c b/drivers/char/pcmcia/ipwireless/hardware.c index 7d500f82195..4c1820cad71 100644 --- a/drivers/char/pcmcia/ipwireless/hardware.c +++ b/drivers/char/pcmcia/ipwireless/hardware.c @@ -568,7 +568,7 @@ static struct ipw_rx_packet *pool_allocate(struct ipw_hardware *hw, list_del(&packet->queue); } else { const int min_capacity = - ipwireless_ppp_mru(hw->network + 2); + ipwireless_ppp_mru(hw->network) + 2; int new_capacity; spin_unlock_irqrestore(&hw->lock, flags); diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 8d6a3ff0267..8a67f16987d 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -825,6 +825,9 @@ static int cpufreq_add_dev(struct sys_device *sys_dev) policy->user_policy.min = policy->cpuinfo.min_freq; policy->user_policy.max = policy->cpuinfo.max_freq; + blocking_notifier_call_chain(&cpufreq_policy_notifier_list, + CPUFREQ_START, policy); + #ifdef CONFIG_SMP #ifdef CONFIG_HOTPLUG_CPU diff --git a/drivers/firmware/dcdbas.c b/drivers/firmware/dcdbas.c index c66817e7717..50a071f1c94 100644 --- a/drivers/firmware/dcdbas.c +++ b/drivers/firmware/dcdbas.c @@ -245,7 +245,6 @@ static ssize_t host_control_on_shutdown_store(struct device *dev, static int smi_request(struct smi_cmd *smi_cmd) { cpumask_t old_mask; - cpumask_of_cpu_ptr(new_mask, 0); int ret = 0; if (smi_cmd->magic != SMI_CMD_MAGIC) { @@ -256,7 +255,7 @@ static int smi_request(struct smi_cmd *smi_cmd) /* SMI requires CPU 0 */ old_mask = current->cpus_allowed; - set_cpus_allowed_ptr(current, new_mask); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(0)); if (smp_processor_id() != 0) { dev_dbg(&dcdbas_pdev->dev, "%s: failed to get CPU 0\n", __func__); diff --git a/drivers/firmware/iscsi_ibft_find.c b/drivers/firmware/iscsi_ibft_find.c index 11f17440fea..d53fbbfefa3 100644 --- a/drivers/firmware/iscsi_ibft_find.c +++ b/drivers/firmware/iscsi_ibft_find.c @@ -81,4 +81,3 @@ void __init reserve_ibft_region(void) if (ibft_addr) reserve_bootmem(pos, PAGE_ALIGN(len), BOOTMEM_DEFAULT); } -EXPORT_SYMBOL_GPL(reserve_ibft_region); diff --git a/drivers/ide/ppc/pmac.c b/drivers/ide/ppc/pmac.c index c521bf6e1bf..fa2be26272d 100644 --- a/drivers/ide/ppc/pmac.c +++ b/drivers/ide/ppc/pmac.c @@ -1086,6 +1086,11 @@ static int __devinit pmac_ide_setup_device(pmac_ide_hwif_t *pmif, hw_regs_t *hw) /* Make sure we have sane timings */ sanitize_timings(pmif); + host = ide_host_alloc(&d, hws); + if (host == NULL) + return -ENOMEM; + hwif = host->ports[0]; + #ifndef CONFIG_PPC64 /* XXX FIXME: Media bay stuff need re-organizing */ if (np->parent && np->parent->name @@ -1119,11 +1124,11 @@ static int __devinit pmac_ide_setup_device(pmac_ide_hwif_t *pmif, hw_regs_t *hw) pmif->mdev ? "macio" : "PCI", pmif->aapl_bus_id, pmif->mediabay ? " (mediabay)" : "", hw->irq); - rc = ide_host_add(&d, hws, &host); - if (rc) + rc = ide_host_register(host, &d, hws); + if (rc) { + ide_host_free(host); return rc; - - hwif = host->ports[0]; + } return 0; } diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c index 5eea4356d70..90663e01a56 100644 --- a/drivers/lguest/core.c +++ b/drivers/lguest/core.c @@ -135,6 +135,7 @@ static void unmap_switcher(void) /* Now we just need to free the pages we copied the switcher into */ for (i = 0; i < TOTAL_SWITCHER_PAGES; i++) __free_pages(switcher_page[i], 0); + kfree(switcher_page); } /*H:032 diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c index 0414ddf8758..a1039068f95 100644 --- a/drivers/lguest/interrupts_and_traps.c +++ b/drivers/lguest/interrupts_and_traps.c @@ -406,7 +406,8 @@ void load_guest_idt_entry(struct lg_cpu *cpu, unsigned int num, u32 lo, u32 hi) * deliver_trap() to bounce it back into the Guest. */ static void default_idt_entry(struct desc_struct *idt, int trap, - const unsigned long handler) + const unsigned long handler, + const struct desc_struct *base) { /* A present interrupt gate. */ u32 flags = 0x8e00; @@ -415,6 +416,10 @@ static void default_idt_entry(struct desc_struct *idt, * the Guest to use the "int" instruction to trigger it. */ if (trap == LGUEST_TRAP_ENTRY) flags |= (GUEST_PL << 13); + else if (base) + /* Copy priv. level from what Guest asked for. This allows + * debug (int 3) traps from Guest userspace, for example. */ + flags |= (base->b & 0x6000); /* Now pack it into the IDT entry in its weird format. */ idt->a = (LGUEST_CS<<16) | (handler&0x0000FFFF); @@ -428,7 +433,7 @@ void setup_default_idt_entries(struct lguest_ro_state *state, unsigned int i; for (i = 0; i < ARRAY_SIZE(state->guest_idt); i++) - default_idt_entry(&state->guest_idt[i], i, def[i]); + default_idt_entry(&state->guest_idt[i], i, def[i], NULL); } /*H:240 We don't use the IDT entries in the "struct lguest" directly, instead @@ -442,6 +447,8 @@ void copy_traps(const struct lg_cpu *cpu, struct desc_struct *idt, /* We can simply copy the direct traps, otherwise we use the default * ones in the Switcher: they will return to the Host. */ for (i = 0; i < ARRAY_SIZE(cpu->arch.idt); i++) { + const struct desc_struct *gidt = &cpu->arch.idt[i]; + /* If no Guest can ever override this trap, leave it alone. */ if (!direct_trap(i)) continue; @@ -449,12 +456,15 @@ void copy_traps(const struct lg_cpu *cpu, struct desc_struct *idt, /* Only trap gates (type 15) can go direct to the Guest. * Interrupt gates (type 14) disable interrupts as they are * entered, which we never let the Guest do. Not present - * entries (type 0x0) also can't go direct, of course. */ - if (idt_type(cpu->arch.idt[i].a, cpu->arch.idt[i].b) == 0xF) - idt[i] = cpu->arch.idt[i]; + * entries (type 0x0) also can't go direct, of course. + * + * If it can't go direct, we still need to copy the priv. level: + * they might want to give userspace access to a software + * interrupt. */ + if (idt_type(gidt->a, gidt->b) == 0xF) + idt[i] = *gidt; else - /* Reset it to the default. */ - default_idt_entry(&idt[i], i, def[i]); + default_idt_entry(&idt[i], i, def[i], gidt); } } diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c index 95dfda52b4f..bf7942327bd 100644 --- a/drivers/lguest/x86/core.c +++ b/drivers/lguest/x86/core.c @@ -480,7 +480,7 @@ void __init lguest_arch_host_init(void) * bit on its CPU, depending on the argument (0 == unset). */ on_each_cpu(adjust_pge, (void *)0, 1); /* Turn off the feature in the global feature set. */ - clear_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability); + clear_cpu_cap(&boot_cpu_data, X86_FEATURE_PGE); } put_online_cpus(); }; @@ -491,7 +491,7 @@ void __exit lguest_arch_host_fini(void) /* If we had PGE before we started, turn it back on now. */ get_online_cpus(); if (cpu_had_pge) { - set_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability); + set_cpu_cap(&boot_cpu_data, X86_FEATURE_PGE); /* adjust_pge's argument "1" means set PGE. */ on_each_cpu(adjust_pge, (void *)1, 1); } diff --git a/drivers/mfd/mfd-core.c b/drivers/mfd/mfd-core.c index 0454be4266c..9c9c126ed33 100644 --- a/drivers/mfd/mfd-core.c +++ b/drivers/mfd/mfd-core.c @@ -15,24 +15,24 @@ #include <linux/platform_device.h> #include <linux/mfd/core.h> -static int mfd_add_device(struct platform_device *parent, - const struct mfd_cell *cell, - struct resource *mem_base, - int irq_base) +static int mfd_add_device(struct device *parent, int id, + const struct mfd_cell *cell, + struct resource *mem_base, + int irq_base) { struct resource res[cell->num_resources]; struct platform_device *pdev; int ret = -ENOMEM; int r; - pdev = platform_device_alloc(cell->name, parent->id); + pdev = platform_device_alloc(cell->name, id); if (!pdev) goto fail_alloc; - pdev->dev.parent = &parent->dev; + pdev->dev.parent = parent; ret = platform_device_add_data(pdev, - cell, sizeof(struct mfd_cell)); + cell->platform_data, cell->data_size); if (ret) goto fail_device; @@ -75,17 +75,16 @@ fail_alloc: return ret; } -int mfd_add_devices( - struct platform_device *parent, - const struct mfd_cell *cells, int n_devs, - struct resource *mem_base, - int irq_base) +int mfd_add_devices(struct device *parent, int id, + const struct mfd_cell *cells, int n_devs, + struct resource *mem_base, + int irq_base) { int i; int ret = 0; for (i = 0; i < n_devs; i++) { - ret = mfd_add_device(parent, cells + i, mem_base, irq_base); + ret = mfd_add_device(parent, id, cells + i, mem_base, irq_base); if (ret) break; } @@ -99,14 +98,13 @@ EXPORT_SYMBOL(mfd_add_devices); static int mfd_remove_devices_fn(struct device *dev, void *unused) { - platform_device_unregister( - container_of(dev, struct platform_device, dev)); + platform_device_unregister(to_platform_device(dev)); return 0; } -void mfd_remove_devices(struct platform_device *parent) +void mfd_remove_devices(struct device *parent) { - device_for_each_child(&parent->dev, NULL, mfd_remove_devices_fn); + device_for_each_child(parent, NULL, mfd_remove_devices_fn); } EXPORT_SYMBOL(mfd_remove_devices); diff --git a/drivers/mfd/tc6393xb.c b/drivers/mfd/tc6393xb.c index 94e55e8e7ce..f4fd797c159 100644 --- a/drivers/mfd/tc6393xb.c +++ b/drivers/mfd/tc6393xb.c @@ -466,8 +466,12 @@ static int __devinit tc6393xb_probe(struct platform_device *dev) tc6393xb_attach_irq(dev); tc6393xb_cells[TC6393XB_CELL_NAND].driver_data = tcpd->nand_data; + tc6393xb_cells[TC6393XB_CELL_NAND].platform_data = + &tc6393xb_cells[TC6393XB_CELL_NAND]; + tc6393xb_cells[TC6393XB_CELL_NAND].data_size = + sizeof(tc6393xb_cells[TC6393XB_CELL_NAND]); - retval = mfd_add_devices(dev, + retval = mfd_add_devices(&dev->dev, dev->id, tc6393xb_cells, ARRAY_SIZE(tc6393xb_cells), iomem, tcpd->irq_base); @@ -501,7 +505,7 @@ static int __devexit tc6393xb_remove(struct platform_device *dev) struct tc6393xb *tc6393xb = platform_get_drvdata(dev); int ret; - mfd_remove_devices(dev); + mfd_remove_devices(&dev->dev); if (tc6393xb->irq) tc6393xb_detach_irq(dev); diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index f5ade1904aa..fa50e9ede0e 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -426,7 +426,7 @@ config ENCLOSURE_SERVICES config SGI_XP tristate "Support communication between SGI SSIs" - depends on IA64_GENERIC || IA64_SGI_SN2 + depends on IA64_GENERIC || IA64_SGI_SN2 || IA64_SGI_UV || (X86_64 && SMP) select IA64_UNCACHED_ALLOCATOR if IA64_GENERIC || IA64_SGI_SN2 select GENERIC_ALLOCATOR if IA64_GENERIC || IA64_SGI_SN2 ---help--- @@ -450,4 +450,27 @@ config HP_ILO To compile this driver as a module, choose M here: the module will be called hpilo. +config SGI_GRU + tristate "SGI GRU driver" + depends on (X86_64 || IA64_SGI_UV || IA64_GENERIC) && SMP + default n + select MMU_NOTIFIER + ---help--- + The GRU is a hardware resource located in the system chipset. The GRU + contains memory that can be mmapped into the user address space. This memory is + used to communicate with the GRU to perform functions such as load/store, + scatter/gather, bcopy, AMOs, etc. The GRU is directly accessed by user + instructions using user virtual addresses. GRU instructions (ex., bcopy) use + user virtual addresses for operands. + + If you are not running on a SGI UV system, say N. + +config SGI_GRU_DEBUG + bool "SGI GRU driver debug" + depends on SGI_GRU + default n + ---help--- + This option enables addition debugging code for the SGI GRU driver. If + you are unsure, say N. + endif # MISC_DEVICES diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile index f5e273420c0..c6c13f60b45 100644 --- a/drivers/misc/Makefile +++ b/drivers/misc/Makefile @@ -28,4 +28,5 @@ obj-$(CONFIG_INTEL_MENLOW) += intel_menlow.o obj-$(CONFIG_ENCLOSURE_SERVICES) += enclosure.o obj-$(CONFIG_KGDB_TESTS) += kgdbts.o obj-$(CONFIG_SGI_XP) += sgi-xp/ +obj-$(CONFIG_SGI_GRU) += sgi-gru/ obj-$(CONFIG_HP_ILO) += hpilo.o diff --git a/drivers/misc/sgi-gru/Makefile b/drivers/misc/sgi-gru/Makefile new file mode 100644 index 00000000000..d03597a521b --- /dev/null +++ b/drivers/misc/sgi-gru/Makefile @@ -0,0 +1,3 @@ +obj-$(CONFIG_SGI_GRU) := gru.o +gru-y := grufile.o grumain.o grufault.o grutlbpurge.o gruprocfs.o grukservices.o + diff --git a/drivers/misc/sgi-gru/gru.h b/drivers/misc/sgi-gru/gru.h new file mode 100644 index 00000000000..40df7cb3f0a --- /dev/null +++ b/drivers/misc/sgi-gru/gru.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef __GRU_H__ +#define __GRU_H__ + +/* + * GRU architectural definitions + */ +#define GRU_CACHE_LINE_BYTES 64 +#define GRU_HANDLE_STRIDE 256 +#define GRU_CB_BASE 0 +#define GRU_DS_BASE 0x20000 + +/* + * Size used to map GRU GSeg + */ +#if defined CONFIG_IA64 +#define GRU_GSEG_PAGESIZE (256 * 1024UL) +#elif defined CONFIG_X86_64 +#define GRU_GSEG_PAGESIZE (256 * 1024UL) /* ZZZ 2MB ??? */ +#else +#error "Unsupported architecture" +#endif + +/* + * Structure for obtaining GRU resource information + */ +struct gru_chiplet_info { + int node; + int chiplet; + int blade; + int total_dsr_bytes; + int total_cbr; + int total_user_dsr_bytes; + int total_user_cbr; + int free_user_dsr_bytes; + int free_user_cbr; +}; + +/* Flags for GRU options on the gru_create_context() call */ +/* Select one of the follow 4 options to specify how TLB misses are handled */ +#define GRU_OPT_MISS_DEFAULT 0x0000 /* Use default mode */ +#define GRU_OPT_MISS_USER_POLL 0x0001 /* User will poll CB for faults */ +#define GRU_OPT_MISS_FMM_INTR 0x0002 /* Send interrupt to cpu to + handle fault */ +#define GRU_OPT_MISS_FMM_POLL 0x0003 /* Use system polling thread */ +#define GRU_OPT_MISS_MASK 0x0003 /* Mask for TLB MISS option */ + + + +#endif /* __GRU_H__ */ diff --git a/drivers/misc/sgi-gru/gru_instructions.h b/drivers/misc/sgi-gru/gru_instructions.h new file mode 100644 index 00000000000..0dc36225c7c --- /dev/null +++ b/drivers/misc/sgi-gru/gru_instructions.h @@ -0,0 +1,669 @@ +/* + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef __GRU_INSTRUCTIONS_H__ +#define __GRU_INSTRUCTIONS_H__ + +#define gru_flush_cache_hook(p) +#define gru_emulator_wait_hook(p, w) + +/* + * Architecture dependent functions + */ + +#if defined CONFIG_IA64 +#include <linux/compiler.h> +#include <asm/intrinsics.h> +#define __flush_cache(p) ia64_fc(p) +/* Use volatile on IA64 to ensure ordering via st4.rel */ +#define gru_ordered_store_int(p,v) \ + do { \ + barrier(); \ + *((volatile int *)(p)) = v; /* force st.rel */ \ + } while (0) +#elif defined CONFIG_X86_64 +#define __flush_cache(p) clflush(p) +#define gru_ordered_store_int(p,v) \ + do { \ + barrier(); \ + *(int *)p = v; \ + } while (0) +#else +#error "Unsupported architecture" +#endif + +/* + * Control block status and exception codes + */ +#define CBS_IDLE 0 +#define CBS_EXCEPTION 1 +#define CBS_ACTIVE 2 +#define CBS_CALL_OS 3 + +/* CB substatus bitmasks */ +#define CBSS_MSG_QUEUE_MASK 7 +#define CBSS_IMPLICIT_ABORT_ACTIVE_MASK 8 + +/* CB substatus message queue values (low 3 bits of substatus) */ +#define CBSS_NO_ERROR 0 +#define CBSS_LB_OVERFLOWED 1 +#define CBSS_QLIMIT_REACHED 2 +#define CBSS_PAGE_OVERFLOW 3 +#define CBSS_AMO_NACKED 4 +#define CBSS_PUT_NACKED 5 + +/* + * Structure used to fetch exception detail for CBs that terminate with + * CBS_EXCEPTION + */ +struct control_block_extended_exc_detail { + unsigned long cb; + int opc; + int ecause; + int exopc; + long exceptdet0; + int exceptdet1; +}; + +/* + * Instruction formats + */ + +/* + * Generic instruction format. + * This definition has precise bit field definitions. + */ +struct gru_instruction_bits { + /* DW 0 - low */ + unsigned int icmd: 1; + unsigned char ima: 3; /* CB_DelRep, unmapped mode */ + unsigned char reserved0: 4; + unsigned int xtype: 3; + unsigned int iaa0: 2; + unsigned int iaa1: 2; + unsigned char reserved1: 1; + unsigned char opc: 8; /* opcode */ + unsigned char exopc: 8; /* extended opcode */ + /* DW 0 - high */ + unsigned int idef2: 22; /* TRi0 */ + unsigned char reserved2: 2; + unsigned char istatus: 2; + unsigned char isubstatus:4; + unsigned char reserved3: 2; + /* DW 1 */ + unsigned long idef4; /* 42 bits: TRi1, BufSize */ + /* DW 2-6 */ + unsigned long idef1; /* BAddr0 */ + unsigned long idef5; /* Nelem */ + unsigned long idef6; /* Stride, Operand1 */ + unsigned long idef3; /* BAddr1, Value, Operand2 */ + unsigned long reserved4; + /* DW 7 */ + unsigned long avalue; /* AValue */ +}; + +/* + * Generic instruction with friendlier names. This format is used + * for inline instructions. + */ +struct gru_instruction { + /* DW 0 */ + unsigned int op32; /* icmd,xtype,iaa0,ima,opc */ + unsigned int tri0; + unsigned long tri1_bufsize; /* DW 1 */ + unsigned long baddr0; /* DW 2 */ + unsigned long nelem; /* DW 3 */ + unsigned long op1_stride; /* DW 4 */ + unsigned long op2_value_baddr1; /* DW 5 */ + unsigned long reserved0; /* DW 6 */ + unsigned long avalue; /* DW 7 */ +}; + +/* Some shifts and masks for the low 32 bits of a GRU command */ +#define GRU_CB_ICMD_SHFT 0 +#define GRU_CB_ICMD_MASK 0x1 +#define GRU_CB_XTYPE_SHFT 8 +#define GRU_CB_XTYPE_MASK 0x7 +#define GRU_CB_IAA0_SHFT 11 +#define GRU_CB_IAA0_MASK 0x3 +#define GRU_CB_IAA1_SHFT 13 +#define GRU_CB_IAA1_MASK 0x3 +#define GRU_CB_IMA_SHFT 1 +#define GRU_CB_IMA_MASK 0x3 +#define GRU_CB_OPC_SHFT 16 +#define GRU_CB_OPC_MASK 0xff +#define GRU_CB_EXOPC_SHFT 24 +#define GRU_CB_EXOPC_MASK 0xff + +/* GRU instruction opcodes (opc field) */ +#define OP_NOP 0x00 +#define OP_BCOPY 0x01 +#define OP_VLOAD 0x02 +#define OP_IVLOAD 0x03 +#define OP_VSTORE 0x04 +#define OP_IVSTORE 0x05 +#define OP_VSET 0x06 +#define OP_IVSET 0x07 +#define OP_MESQ 0x08 +#define OP_GAMXR 0x09 +#define OP_GAMIR 0x0a +#define OP_GAMIRR 0x0b +#define OP_GAMER 0x0c +#define OP_GAMERR 0x0d +#define OP_BSTORE 0x0e +#define OP_VFLUSH 0x0f + + +/* Extended opcodes values (exopc field) */ + +/* GAMIR - AMOs with implicit operands */ +#define EOP_IR_FETCH 0x01 /* Plain fetch of memory */ +#define EOP_IR_CLR 0x02 /* Fetch and clear */ +#define EOP_IR_INC 0x05 /* Fetch and increment */ +#define EOP_IR_DEC 0x07 /* Fetch and decrement */ +#define EOP_IR_QCHK1 0x0d /* Queue check, 64 byte msg */ +#define EOP_IR_QCHK2 0x0e /* Queue check, 128 byte msg */ + +/* GAMIRR - Registered AMOs with implicit operands */ +#define EOP_IRR_FETCH 0x01 /* Registered fetch of memory */ +#define EOP_IRR_CLR 0x02 /* Registered fetch and clear */ +#define EOP_IRR_INC 0x05 /* Registered fetch and increment */ +#define EOP_IRR_DEC 0x07 /* Registered fetch and decrement */ +#define EOP_IRR_DECZ 0x0f /* Registered fetch and decrement, update on zero*/ + +/* GAMER - AMOs with explicit operands */ +#define EOP_ER_SWAP 0x00 /* Exchange argument and memory */ +#define EOP_ER_OR 0x01 /* Logical OR with memory */ +#define EOP_ER_AND 0x02 /* Logical AND with memory */ +#define EOP_ER_XOR 0x03 /* Logical XOR with memory */ +#define EOP_ER_ADD 0x04 /* Add value to memory */ +#define EOP_ER_CSWAP 0x08 /* Compare with operand2, write operand1 if match*/ +#define EOP_ER_CADD 0x0c /* Queue check, operand1*64 byte msg */ + +/* GAMERR - Registered AMOs with explicit operands */ +#define EOP_ERR_SWAP 0x00 /* Exchange argument and memory */ +#define EOP_ERR_OR 0x01 /* Logical OR with memory */ +#define EOP_ERR_AND 0x02 /* Logical AND with memory */ +#define EOP_ERR_XOR 0x03 /* Logical XOR with memory */ +#define EOP_ERR_ADD 0x04 /* Add value to memory */ +#define EOP_ERR_CSWAP 0x08 /* Compare with operand2, write operand1 if match*/ +#define EOP_ERR_EPOLL 0x09 /* Poll for equality */ +#define EOP_ERR_NPOLL 0x0a /* Poll for inequality */ + +/* GAMXR - SGI Arithmetic unit */ +#define EOP_XR_CSWAP 0x0b /* Masked compare exchange */ + + +/* Transfer types (xtype field) */ +#define XTYPE_B 0x0 /* byte */ +#define XTYPE_S 0x1 /* short (2-byte) */ +#define XTYPE_W 0x2 /* word (4-byte) */ +#define XTYPE_DW 0x3 /* doubleword (8-byte) */ +#define XTYPE_CL 0x6 /* cacheline (64-byte) */ + + +/* Instruction access attributes (iaa0, iaa1 fields) */ +#define IAA_RAM 0x0 /* normal cached RAM access */ +#define IAA_NCRAM 0x2 /* noncoherent RAM access */ +#define IAA_MMIO 0x1 /* noncoherent memory-mapped I/O space */ +#define IAA_REGISTER 0x3 /* memory-mapped registers, etc. */ + + +/* Instruction mode attributes (ima field) */ +#define IMA_MAPPED 0x0 /* Virtual mode */ +#define IMA_CB_DELAY 0x1 /* hold read responses until status changes */ +#define IMA_UNMAPPED 0x2 /* bypass the TLBs (OS only) */ +#define IMA_INTERRUPT 0x4 /* Interrupt when instruction completes */ + +/* CBE ecause bits */ +#define CBE_CAUSE_RI (1 << 0) +#define CBE_CAUSE_INVALID_INSTRUCTION (1 << 1) +#define CBE_CAUSE_UNMAPPED_MODE_FORBIDDEN (1 << 2) +#define CBE_CAUSE_PE_CHECK_DATA_ERROR (1 << 3) +#define CBE_CAUSE_IAA_GAA_MISMATCH (1 << 4) +#define CBE_CAUSE_DATA_SEGMENT_LIMIT_EXCEPTION (1 << 5) +#define CBE_CAUSE_OS_FATAL_TLB_FAULT (1 << 6) +#define CBE_CAUSE_EXECUTION_HW_ERROR (1 << 7) +#define CBE_CAUSE_TLBHW_ERROR (1 << 8) +#define CBE_CAUSE_RA_REQUEST_TIMEOUT (1 << 9) +#define CBE_CAUSE_HA_REQUEST_TIMEOUT (1 << 10) +#define CBE_CAUSE_RA_RESPONSE_FATAL (1 << 11) +#define CBE_CAUSE_RA_RESPONSE_NON_FATAL (1 << 12) +#define CBE_CAUSE_HA_RESPONSE_FATAL (1 << 13) +#define CBE_CAUSE_HA_RESPONSE_NON_FATAL (1 << 14) +#define CBE_CAUSE_ADDRESS_SPACE_DECODE_ERROR (1 << 15) +#define CBE_CAUSE_RESPONSE_DATA_ERROR (1 << 16) +#define CBE_CAUSE_PROTOCOL_STATE_DATA_ERROR (1 << 17) + +/* + * Exceptions are retried for the following cases. If any OTHER bits are set + * in ecause, the exception is not retryable. + */ +#define EXCEPTION_RETRY_BITS (CBE_CAUSE_RESPONSE_DATA_ERROR | \ + CBE_CAUSE_RA_REQUEST_TIMEOUT | \ + CBE_CAUSE_TLBHW_ERROR | \ + CBE_CAUSE_HA_REQUEST_TIMEOUT) + +/* Message queue head structure */ +union gru_mesqhead { + unsigned long val; + struct { + unsigned int head; + unsigned int limit; + }; +}; + + +/* Generate the low word of a GRU instruction */ +static inline unsigned int +__opword(unsigned char opcode, unsigned char exopc, unsigned char xtype, + unsigned char iaa0, unsigned char iaa1, + unsigned char ima) +{ + return (1 << GRU_CB_ICMD_SHFT) | + (iaa0 << GRU_CB_IAA0_SHFT) | + (iaa1 << GRU_CB_IAA1_SHFT) | + (ima << GRU_CB_IMA_SHFT) | + (xtype << GRU_CB_XTYPE_SHFT) | + (opcode << GRU_CB_OPC_SHFT) | + (exopc << GRU_CB_EXOPC_SHFT); +} + +/* + * Architecture specific intrinsics + */ +static inline void gru_flush_cache(void *p) +{ + __flush_cache(p); +} + +/* + * Store the lower 32 bits of the command including the "start" bit. Then + * start the instruction executing. + */ +static inline void gru_start_instruction(struct gru_instruction *ins, int op32) +{ + gru_ordered_store_int(ins, op32); +} + + +/* Convert "hints" to IMA */ +#define CB_IMA(h) ((h) | IMA_UNMAPPED) + +/* Convert data segment cache line index into TRI0 / TRI1 value */ +#define GRU_DINDEX(i) ((i) * GRU_CACHE_LINE_BYTES) + +/* Inline functions for GRU instructions. + * Note: + * - nelem and stride are in elements + * - tri0/tri1 is in bytes for the beginning of the data segment. + */ +static inline void gru_vload(void *cb, unsigned long mem_addr, + unsigned int tri0, unsigned char xtype, unsigned long nelem, + unsigned long stride, unsigned long hints) +{ + struct gru_instruction *ins = (struct gru_instruction *)cb; + + ins->baddr0 = (long)mem_addr; + ins->nelem = nelem; + ins->tri0 = tri0; + ins->op1_stride = stride; + gru_start_instruction(ins, __opword(OP_VLOAD, 0, xtype, IAA_RAM, 0, + CB_IMA(hints))); +} + +static inline void gru_vstore(void *cb, unsigned long mem_addr, + unsigned int tri0, unsigned char xtype, unsigned long nelem, + unsigned long stride, unsigned long hints) +{ + struct gru_instruction *ins = (void *)cb; + + ins->baddr0 = (long)mem_addr; + ins->nelem = nelem; + ins->tri0 = tri0; + ins->op1_stride = stride; + gru_start_instruction(ins, __opword(OP_VSTORE, 0, xtype, IAA_RAM, 0, + CB_IMA(hints))); +} + +static inline void gru_ivload(void *cb, unsigned long mem_addr, + unsigned int tri0, unsigned int tri1, unsigned char xtype, + unsigned long nelem, unsigned long hints) +{ + struct gru_instruction *ins = (void *)cb; + + ins->baddr0 = (long)mem_addr; + ins->nelem = nelem; + ins->tri0 = tri0; + ins->tri1_bufsize = tri1; + gru_start_instruction(ins, __opword(OP_IVLOAD, 0, xtype, IAA_RAM, 0, + CB_IMA(hints))); +} + +static inline void gru_ivstore(void *cb, unsigned long mem_addr, + unsigned int tri0, unsigned int tri1, + unsigned char xtype, unsigned long nelem, unsigned long hints) +{ + struct gru_instruction *ins = (void *)cb; + + ins->baddr0 = (long)mem_addr; + ins->nelem = nelem; + ins->tri0 = tri0; + ins->tri1_bufsize = tri1; + gru_start_instruction(ins, __opword(OP_IVSTORE, 0, xtype, IAA_RAM, 0, + CB_IMA(hints))); +} + +static inline void gru_vset(void *cb, unsigned long mem_addr, + unsigned long value, unsigned char xtype, unsigned long nelem, + unsigned long stride, unsigned long hints) +{ + struct gru_instruction *ins = (void *)cb; + + ins->baddr0 = (long)mem_addr; + ins->op2_value_baddr1 = value; + ins->nelem = nelem; + ins->op1_stride = stride; + gru_start_instruction(ins, __opword(OP_VSET, 0, xtype, IAA_RAM, 0, + CB_IMA(hints))); +} + +static inline void gru_ivset(void *cb, unsigned long mem_addr, + unsigned int tri1, unsigned long value, unsigned char xtype, + unsigned long nelem, unsigned long hints) +{ + struct gru_instruction *ins = (void *)cb; + + ins->baddr0 = (long)mem_addr; + ins->op2_value_baddr1 = value; + ins->nelem = nelem; + ins->tri1_bufsize = tri1; + gru_start_instruction(ins, __opword(OP_IVSET, 0, xtype, IAA_RAM, 0, + CB_IMA(hints))); +} + +static inline void gru_vflush(void *cb, unsigned long mem_addr, + unsigned long nelem, unsigned char xtype, unsigned long stride, + unsigned long hints) +{ + struct gru_instruction *ins = (void *)cb; + + ins->baddr0 = (long)mem_addr; + ins->op1_stride = stride; + ins->nelem = nelem; + gru_start_instruction(ins, __opword(OP_VFLUSH, 0, xtype, IAA_RAM, 0, + CB_IMA(hints))); +} + +static inline void gru_nop(void *cb, int hints) +{ + struct gru_instruction *ins = (void *)cb; + + gru_start_instruction(ins, __opword(OP_NOP, 0, 0, 0, 0, CB_IMA(hints))); +} + + +static inline void gru_bcopy(void *cb, const unsigned long src, + unsigned long dest, + unsigned int tri0, unsigned int xtype, unsigned long nelem, + unsigned int bufsize, unsigned long hints) +{ + struct gru_instruction *ins = (void *)cb; + + ins->baddr0 = (long)src; + ins->op2_value_baddr1 = (long)dest; + ins->nelem = nelem; + ins->tri0 = tri0; + ins->tri1_bufsize = bufsize; + gru_start_instruction(ins, __opword(OP_BCOPY, 0, xtype, IAA_RAM, + IAA_RAM, CB_IMA(hints))); +} + +static inline void gru_bstore(void *cb, const unsigned long src, + unsigned long dest, unsigned int tri0, unsigned int xtype, + unsigned long nelem, unsigned long hints) +{ + struct gru_instruction *ins = (void *)cb; + + ins->baddr0 = (long)src; + ins->op2_value_baddr1 = (long)dest; + ins->nelem = nelem; + ins->tri0 = tri0; + gru_start_instruction(ins, __opword(OP_BSTORE, 0, xtype, 0, IAA_RAM, + CB_IMA(hints))); +} + +static inline void gru_gamir(void *cb, int exopc, unsigned long src, + unsigned int xtype, unsigned long hints) +{ + struct gru_instruction *ins = (void *)cb; + + ins->baddr0 = (long)src; + gru_start_instruction(ins, __opword(OP_GAMIR, exopc, xtype, IAA_RAM, 0, + CB_IMA(hints))); +} + +static inline void gru_gamirr(void *cb, int exopc, unsigned long src, + unsigned int xtype, unsigned long hints) +{ + struct gru_instruction *ins = (void *)cb; + + ins->baddr0 = (long)src; + gru_start_instruction(ins, __opword(OP_GAMIRR, exopc, xtype, IAA_RAM, 0, + CB_IMA(hints))); +} + +static inline void gru_gamer(void *cb, int exopc, unsigned long src, + unsigned int xtype, + unsigned long operand1, unsigned long operand2, + unsigned long hints) +{ + struct gru_instruction *ins = (void *)cb; + + ins->baddr0 = (long)src; + ins->op1_stride = operand1; + ins->op2_value_baddr1 = operand2; + gru_start_instruction(ins, __opword(OP_GAMER, exopc, xtype, IAA_RAM, 0, + CB_IMA(hints))); +} + +static inline void gru_gamerr(void *cb, int exopc, unsigned long src, + unsigned int xtype, unsigned long operand1, + unsigned long operand2, unsigned long hints) +{ + struct gru_instruction *ins = (void *)cb; + + ins->baddr0 = (long)src; + ins->op1_stride = operand1; + ins->op2_value_baddr1 = operand2; + gru_start_instruction(ins, __opword(OP_GAMERR, exopc, xtype, IAA_RAM, 0, + CB_IMA(hints))); +} + +static inline void gru_gamxr(void *cb, unsigned long src, + unsigned int tri0, unsigned long hints) +{ + struct gru_instruction *ins = (void *)cb; + + ins->baddr0 = (long)src; + ins->nelem = 4; + gru_start_instruction(ins, __opword(OP_GAMXR, EOP_XR_CSWAP, XTYPE_DW, + IAA_RAM, 0, CB_IMA(hints))); +} + +static inline void gru_mesq(void *cb, unsigned long queue, + unsigned long tri0, unsigned long nelem, + unsigned long hints) +{ + struct gru_instruction *ins = (void *)cb; + + ins->baddr0 = (long)queue; + ins->nelem = nelem; + ins->tri0 = tri0; + gru_start_instruction(ins, __opword(OP_MESQ, 0, XTYPE_CL, IAA_RAM, 0, + CB_IMA(hints))); +} + +static inline unsigned long gru_get_amo_value(void *cb) +{ + struct gru_instruction *ins = (void *)cb; + + return ins->avalue; +} + +static inline int gru_get_amo_value_head(void *cb) +{ + struct gru_instruction *ins = (void *)cb; + + return ins->avalue & 0xffffffff; +} + +static inline int gru_get_amo_value_limit(void *cb) +{ + struct gru_instruction *ins = (void *)cb; + + return ins->avalue >> 32; +} + +static inline union gru_mesqhead gru_mesq_head(int head, int limit) +{ + union gru_mesqhead mqh; + + mqh.head = head; + mqh.limit = limit; + return mqh; +} + +/* + * Get struct control_block_extended_exc_detail for CB. + */ +extern int gru_get_cb_exception_detail(void *cb, + struct control_block_extended_exc_detail *excdet); + +#define GRU_EXC_STR_SIZE 256 + +extern int gru_check_status_proc(void *cb); +extern int gru_wait_proc(void *cb); +extern void gru_wait_abort_proc(void *cb); + +/* + * Control block definition for checking status + */ +struct gru_control_block_status { + unsigned int icmd :1; + unsigned int unused1 :31; + unsigned int unused2 :24; + unsigned int istatus :2; + unsigned int isubstatus :4; + unsigned int inused3 :2; +}; + +/* Get CB status */ +static inline int gru_get_cb_status(void *cb) +{ + struct gru_control_block_status *cbs = (void *)cb; + + return cbs->istatus; +} + +/* Get CB message queue substatus */ +static inline int gru_get_cb_message_queue_substatus(void *cb) +{ + struct gru_control_block_status *cbs = (void *)cb; + + return cbs->isubstatus & CBSS_MSG_QUEUE_MASK; +} + +/* Get CB substatus */ +static inline int gru_get_cb_substatus(void *cb) +{ + struct gru_control_block_status *cbs = (void *)cb; + + return cbs->isubstatus; +} + +/* Check the status of a CB. If the CB is in UPM mode, call the + * OS to handle the UPM status. + * Returns the CB status field value (0 for normal completion) + */ +static inline int gru_check_status(void *cb) +{ + struct gru_control_block_status *cbs = (void *)cb; + int ret = cbs->istatus; + + if (ret == CBS_CALL_OS) + ret = gru_check_status_proc(cb); + return ret; +} + +/* Wait for CB to complete. + * Returns the CB status field value (0 for normal completion) + */ +static inline int gru_wait(void *cb) +{ + struct gru_control_block_status *cbs = (void *)cb; + int ret = cbs->istatus;; + + if (ret != CBS_IDLE) + ret = gru_wait_proc(cb); + return ret; +} + +/* Wait for CB to complete. Aborts program if error. (Note: error does NOT + * mean TLB mis - only fatal errors such as memory parity error or user + * bugs will cause termination. + */ +static inline void gru_wait_abort(void *cb) +{ + struct gru_control_block_status *cbs = (void *)cb; + + if (cbs->istatus != CBS_IDLE) + gru_wait_abort_proc(cb); +} + + +/* + * Get a pointer to a control block + * gseg - GSeg address returned from gru_get_thread_gru_segment() + * index - index of desired CB + */ +static inline void *gru_get_cb_pointer(void *gseg, + int index) +{ + return gseg + GRU_CB_BASE + index * GRU_HANDLE_STRIDE; +} + +/* + * Get a pointer to a cacheline in the data segment portion of a GSeg + * gseg - GSeg address returned from gru_get_thread_gru_segment() + * index - index of desired cache line + */ +static inline void *gru_get_data_pointer(void *gseg, int index) +{ + return gseg + GRU_DS_BASE + index * GRU_CACHE_LINE_BYTES; +} + +/* + * Convert a vaddr into the tri index within the GSEG + * vaddr - virtual address of within gseg + */ +static inline int gru_get_tri(void *vaddr) +{ + return ((unsigned long)vaddr & (GRU_GSEG_PAGESIZE - 1)) - GRU_DS_BASE; +} +#endif /* __GRU_INSTRUCTIONS_H__ */ diff --git a/drivers/misc/sgi-gru/grufault.c b/drivers/misc/sgi-gru/grufault.c new file mode 100644 index 00000000000..3d33015bbf3 --- /dev/null +++ b/drivers/misc/sgi-gru/grufault.c @@ -0,0 +1,633 @@ +/* + * SN Platform GRU Driver + * + * FAULT HANDLER FOR GRU DETECTED TLB MISSES + * + * This file contains code that handles TLB misses within the GRU. + * These misses are reported either via interrupts or user polling of + * the user CB. + * + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/spinlock.h> +#include <linux/mm.h> +#include <linux/hugetlb.h> +#include <linux/device.h> +#include <linux/io.h> +#include <linux/uaccess.h> +#include <asm/pgtable.h> +#include "gru.h" +#include "grutables.h" +#include "grulib.h" +#include "gru_instructions.h" +#include <asm/uv/uv_hub.h> + +/* + * Test if a physical address is a valid GRU GSEG address + */ +static inline int is_gru_paddr(unsigned long paddr) +{ + return paddr >= gru_start_paddr && paddr < gru_end_paddr; +} + +/* + * Find the vma of a GRU segment. Caller must hold mmap_sem. + */ +struct vm_area_struct *gru_find_vma(unsigned long vaddr) +{ + struct vm_area_struct *vma; + + vma = find_vma(current->mm, vaddr); + if (vma && vma->vm_start <= vaddr && vma->vm_ops == &gru_vm_ops) + return vma; + return NULL; +} + +/* + * Find and lock the gts that contains the specified user vaddr. + * + * Returns: + * - *gts with the mmap_sem locked for read and the GTS locked. + * - NULL if vaddr invalid OR is not a valid GSEG vaddr. + */ + +static struct gru_thread_state *gru_find_lock_gts(unsigned long vaddr) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + struct gru_thread_state *gts = NULL; + + down_read(&mm->mmap_sem); + vma = gru_find_vma(vaddr); + if (vma) + gts = gru_find_thread_state(vma, TSID(vaddr, vma)); + if (gts) + mutex_lock(>s->ts_ctxlock); + else + up_read(&mm->mmap_sem); + return gts; +} + +static struct gru_thread_state *gru_alloc_locked_gts(unsigned long vaddr) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + struct gru_thread_state *gts = NULL; + + down_write(&mm->mmap_sem); + vma = gru_find_vma(vaddr); + if (vma) + gts = gru_alloc_thread_state(vma, TSID(vaddr, vma)); + if (gts) { + mutex_lock(>s->ts_ctxlock); + downgrade_write(&mm->mmap_sem); + } else { + up_write(&mm->mmap_sem); + } + + return gts; +} + +/* + * Unlock a GTS that was previously locked with gru_find_lock_gts(). + */ +static void gru_unlock_gts(struct gru_thread_state *gts) +{ + mutex_unlock(>s->ts_ctxlock); + up_read(¤t->mm->mmap_sem); +} + +/* + * Set a CB.istatus to active using a user virtual address. This must be done + * just prior to a TFH RESTART. The new cb.istatus is an in-cache status ONLY. + * If the line is evicted, the status may be lost. The in-cache update + * is necessary to prevent the user from seeing a stale cb.istatus that will + * change as soon as the TFH restart is complete. Races may cause an + * occasional failure to clear the cb.istatus, but that is ok. + * + * If the cb address is not valid (should not happen, but...), nothing + * bad will happen.. The get_user()/put_user() will fail but there + * are no bad side-effects. + */ +static void gru_cb_set_istatus_active(unsigned long __user *cb) +{ + union { + struct gru_instruction_bits bits; + unsigned long dw; + } u; + + if (cb) { + get_user(u.dw, cb); + u.bits.istatus = CBS_ACTIVE; + put_user(u.dw, cb); + } +} + +/* + * Convert a interrupt IRQ to a pointer to the GRU GTS that caused the + * interrupt. Interrupts are always sent to a cpu on the blade that contains the + * GRU (except for headless blades which are not currently supported). A blade + * has N grus; a block of N consecutive IRQs is assigned to the GRUs. The IRQ + * number uniquely identifies the GRU chiplet on the local blade that caused the + * interrupt. Always called in interrupt context. + */ +static inline struct gru_state *irq_to_gru(int irq) +{ + return &gru_base[uv_numa_blade_id()]->bs_grus[irq - IRQ_GRU]; +} + +/* + * Read & clear a TFM + * + * The GRU has an array of fault maps. A map is private to a cpu + * Only one cpu will be accessing a cpu's fault map. + * + * This function scans the cpu-private fault map & clears all bits that + * are set. The function returns a bitmap that indicates the bits that + * were cleared. Note that sense the maps may be updated asynchronously by + * the GRU, atomic operations must be used to clear bits. + */ +static void get_clear_fault_map(struct gru_state *gru, + struct gru_tlb_fault_map *map) +{ + unsigned long i, k; + struct gru_tlb_fault_map *tfm; + + tfm = get_tfm_for_cpu(gru, gru_cpu_fault_map_id()); + prefetchw(tfm); /* Helps on hardware, required for emulator */ + for (i = 0; i < BITS_TO_LONGS(GRU_NUM_CBE); i++) { + k = tfm->fault_bits[i]; + if (k) + k = xchg(&tfm->fault_bits[i], 0UL); + map->fault_bits[i] = k; + } + + /* + * Not functionally required but helps performance. (Required + * on emulator) + */ + gru_flush_cache(tfm); +} + +/* + * Atomic (interrupt context) & non-atomic (user context) functions to + * convert a vaddr into a physical address. The size of the page + * is returned in pageshift. + * returns: + * 0 - successful + * < 0 - error code + * 1 - (atomic only) try again in non-atomic context + */ +static int non_atomic_pte_lookup(struct vm_area_struct *vma, + unsigned long vaddr, int write, + unsigned long *paddr, int *pageshift) +{ + struct page *page; + + /* ZZZ Need to handle HUGE pages */ + if (is_vm_hugetlb_page(vma)) + return -EFAULT; + *pageshift = PAGE_SHIFT; + if (get_user_pages + (current, current->mm, vaddr, 1, write, 0, &page, NULL) <= 0) + return -EFAULT; + *paddr = page_to_phys(page); + put_page(page); + return 0; +} + +/* + * + * atomic_pte_lookup + * + * Convert a user virtual address to a physical address + * Only supports Intel large pages (2MB only) on x86_64. + * ZZZ - hugepage support is incomplete + */ +static int atomic_pte_lookup(struct vm_area_struct *vma, unsigned long vaddr, + int write, unsigned long *paddr, int *pageshift) +{ + pgd_t *pgdp; + pmd_t *pmdp; + pud_t *pudp; + pte_t pte; + + WARN_ON(irqs_disabled()); /* ZZZ debug */ + + local_irq_disable(); + pgdp = pgd_offset(vma->vm_mm, vaddr); + if (unlikely(pgd_none(*pgdp))) + goto err; + + pudp = pud_offset(pgdp, vaddr); + if (unlikely(pud_none(*pudp))) + goto err; + + pmdp = pmd_offset(pudp, vaddr); + if (unlikely(pmd_none(*pmdp))) + goto err; +#ifdef CONFIG_X86_64 + if (unlikely(pmd_large(*pmdp))) + pte = *(pte_t *) pmdp; + else +#endif + pte = *pte_offset_kernel(pmdp, vaddr); + + local_irq_enable(); + + if (unlikely(!pte_present(pte) || + (write && (!pte_write(pte) || !pte_dirty(pte))))) + return 1; + + *paddr = pte_pfn(pte) << PAGE_SHIFT; + *pageshift = is_vm_hugetlb_page(vma) ? HPAGE_SHIFT : PAGE_SHIFT; + return 0; + +err: + local_irq_enable(); + return 1; +} + +/* + * Drop a TLB entry into the GRU. The fault is described by info in an TFH. + * Input: + * cb Address of user CBR. Null if not running in user context + * Return: + * 0 = dropin, exception, or switch to UPM successful + * 1 = range invalidate active + * < 0 = error code + * + */ +static int gru_try_dropin(struct gru_thread_state *gts, + struct gru_tlb_fault_handle *tfh, + unsigned long __user *cb) +{ + struct mm_struct *mm = gts->ts_mm; + struct vm_area_struct *vma; + int pageshift, asid, write, ret; + unsigned long paddr, gpa, vaddr; + + /* + * NOTE: The GRU contains magic hardware that eliminates races between + * TLB invalidates and TLB dropins. If an invalidate occurs + * in the window between reading the TFH and the subsequent TLB dropin, + * the dropin is ignored. This eliminates the need for additional locks. + */ + + /* + * Error if TFH state is IDLE or FMM mode & the user issuing a UPM call. + * Might be a hardware race OR a stupid user. Ignore FMM because FMM + * is a transient state. + */ + if (tfh->state == TFHSTATE_IDLE) + goto failidle; + if (tfh->state == TFHSTATE_MISS_FMM && cb) + goto failfmm; + + write = (tfh->cause & TFHCAUSE_TLB_MOD) != 0; + vaddr = tfh->missvaddr; + asid = tfh->missasid; + if (asid == 0) + goto failnoasid; + + rmb(); /* TFH must be cache resident before reading ms_range_active */ + + /* + * TFH is cache resident - at least briefly. Fail the dropin + * if a range invalidate is active. + */ + if (atomic_read(>s->ts_gms->ms_range_active)) + goto failactive; + + vma = find_vma(mm, vaddr); + if (!vma) + goto failinval; + + /* + * Atomic lookup is faster & usually works even if called in non-atomic + * context. + */ + ret = atomic_pte_lookup(vma, vaddr, write, &paddr, &pageshift); + if (ret) { + if (!cb) + goto failupm; + if (non_atomic_pte_lookup(vma, vaddr, write, &paddr, + &pageshift)) + goto failinval; + } + if (is_gru_paddr(paddr)) + goto failinval; + + paddr = paddr & ~((1UL << pageshift) - 1); + gpa = uv_soc_phys_ram_to_gpa(paddr); + gru_cb_set_istatus_active(cb); + tfh_write_restart(tfh, gpa, GAA_RAM, vaddr, asid, write, + GRU_PAGESIZE(pageshift)); + STAT(tlb_dropin); + gru_dbg(grudev, + "%s: tfh 0x%p, vaddr 0x%lx, asid 0x%x, ps %d, gpa 0x%lx\n", + ret ? "non-atomic" : "atomic", tfh, vaddr, asid, + pageshift, gpa); + return 0; + +failnoasid: + /* No asid (delayed unload). */ + STAT(tlb_dropin_fail_no_asid); + gru_dbg(grudev, "FAILED no_asid tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr); + if (!cb) + tfh_user_polling_mode(tfh); + else + gru_flush_cache(tfh); + return -EAGAIN; + +failupm: + /* Atomic failure switch CBR to UPM */ + tfh_user_polling_mode(tfh); + STAT(tlb_dropin_fail_upm); + gru_dbg(grudev, "FAILED upm tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr); + return 1; + +failfmm: + /* FMM state on UPM call */ + STAT(tlb_dropin_fail_fmm); + gru_dbg(grudev, "FAILED fmm tfh: 0x%p, state %d\n", tfh, tfh->state); + return 0; + +failidle: + /* TFH was idle - no miss pending */ + gru_flush_cache(tfh); + if (cb) + gru_flush_cache(cb); + STAT(tlb_dropin_fail_idle); + gru_dbg(grudev, "FAILED idle tfh: 0x%p, state %d\n", tfh, tfh->state); + return 0; + +failinval: + /* All errors (atomic & non-atomic) switch CBR to EXCEPTION state */ + tfh_exception(tfh); + STAT(tlb_dropin_fail_invalid); + gru_dbg(grudev, "FAILED inval tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr); + return -EFAULT; + +failactive: + /* Range invalidate active. Switch to UPM iff atomic */ + if (!cb) + tfh_user_polling_mode(tfh); + else + gru_flush_cache(tfh); + STAT(tlb_dropin_fail_range_active); + gru_dbg(grudev, "FAILED range active: tfh 0x%p, vaddr 0x%lx\n", + tfh, vaddr); + return 1; +} + +/* + * Process an external interrupt from the GRU. This interrupt is + * caused by a TLB miss. + * Note that this is the interrupt handler that is registered with linux + * interrupt handlers. + */ +irqreturn_t gru_intr(int irq, void *dev_id) +{ + struct gru_state *gru; + struct gru_tlb_fault_map map; + struct gru_thread_state *gts; + struct gru_tlb_fault_handle *tfh = NULL; + int cbrnum, ctxnum; + + STAT(intr); + + gru = irq_to_gru(irq); + if (!gru) { + dev_err(grudev, "GRU: invalid interrupt: cpu %d, irq %d\n", + raw_smp_processor_id(), irq); + return IRQ_NONE; + } + get_clear_fault_map(gru, &map); + gru_dbg(grudev, "irq %d, gru %x, map 0x%lx\n", irq, gru->gs_gid, + map.fault_bits[0]); + + for_each_cbr_in_tfm(cbrnum, map.fault_bits) { + tfh = get_tfh_by_index(gru, cbrnum); + prefetchw(tfh); /* Helps on hdw, required for emulator */ + + /* + * When hardware sets a bit in the faultmap, it implicitly + * locks the GRU context so that it cannot be unloaded. + * The gts cannot change until a TFH start/writestart command + * is issued. + */ + ctxnum = tfh->ctxnum; + gts = gru->gs_gts[ctxnum]; + + /* + * This is running in interrupt context. Trylock the mmap_sem. + * If it fails, retry the fault in user context. + */ + if (down_read_trylock(>s->ts_mm->mmap_sem)) { + gru_try_dropin(gts, tfh, NULL); + up_read(>s->ts_mm->mmap_sem); + } else { + tfh_user_polling_mode(tfh); + } + } + return IRQ_HANDLED; +} + + +static int gru_user_dropin(struct gru_thread_state *gts, + struct gru_tlb_fault_handle *tfh, + unsigned long __user *cb) +{ + struct gru_mm_struct *gms = gts->ts_gms; + int ret; + + while (1) { + wait_event(gms->ms_wait_queue, + atomic_read(&gms->ms_range_active) == 0); + prefetchw(tfh); /* Helps on hdw, required for emulator */ + ret = gru_try_dropin(gts, tfh, cb); + if (ret <= 0) + return ret; + STAT(call_os_wait_queue); + } +} + +/* + * This interface is called as a result of a user detecting a "call OS" bit + * in a user CB. Normally means that a TLB fault has occurred. + * cb - user virtual address of the CB + */ +int gru_handle_user_call_os(unsigned long cb) +{ + struct gru_tlb_fault_handle *tfh; + struct gru_thread_state *gts; + unsigned long __user *cbp; + int ucbnum, cbrnum, ret = -EINVAL; + + STAT(call_os); + gru_dbg(grudev, "address 0x%lx\n", cb); + + /* sanity check the cb pointer */ + ucbnum = get_cb_number((void *)cb); + if ((cb & (GRU_HANDLE_STRIDE - 1)) || ucbnum >= GRU_NUM_CB) + return -EINVAL; + cbp = (unsigned long *)cb; + + gts = gru_find_lock_gts(cb); + if (!gts) + return -EINVAL; + + if (ucbnum >= gts->ts_cbr_au_count * GRU_CBR_AU_SIZE) { + ret = -EINVAL; + goto exit; + } + + /* + * If force_unload is set, the UPM TLB fault is phony. The task + * has migrated to another node and the GSEG must be moved. Just + * unload the context. The task will page fault and assign a new + * context. + */ + ret = -EAGAIN; + cbrnum = thread_cbr_number(gts, ucbnum); + if (gts->ts_force_unload) { + gru_unload_context(gts, 1); + } else if (gts->ts_gru) { + tfh = get_tfh_by_index(gts->ts_gru, cbrnum); + ret = gru_user_dropin(gts, tfh, cbp); + } +exit: + gru_unlock_gts(gts); + return ret; +} + +/* + * Fetch the exception detail information for a CB that terminated with + * an exception. + */ +int gru_get_exception_detail(unsigned long arg) +{ + struct control_block_extended_exc_detail excdet; + struct gru_control_block_extended *cbe; + struct gru_thread_state *gts; + int ucbnum, cbrnum, ret; + + STAT(user_exception); + if (copy_from_user(&excdet, (void __user *)arg, sizeof(excdet))) + return -EFAULT; + + gru_dbg(grudev, "address 0x%lx\n", excdet.cb); + gts = gru_find_lock_gts(excdet.cb); + if (!gts) + return -EINVAL; + + if (gts->ts_gru) { + ucbnum = get_cb_number((void *)excdet.cb); + cbrnum = thread_cbr_number(gts, ucbnum); + cbe = get_cbe_by_index(gts->ts_gru, cbrnum); + excdet.opc = cbe->opccpy; + excdet.exopc = cbe->exopccpy; + excdet.ecause = cbe->ecause; + excdet.exceptdet0 = cbe->idef1upd; + excdet.exceptdet1 = cbe->idef3upd; + ret = 0; + } else { + ret = -EAGAIN; + } + gru_unlock_gts(gts); + + gru_dbg(grudev, "address 0x%lx, ecause 0x%x\n", excdet.cb, + excdet.ecause); + if (!ret && copy_to_user((void __user *)arg, &excdet, sizeof(excdet))) + ret = -EFAULT; + return ret; +} + +/* + * User request to unload a context. Content is saved for possible reload. + */ +int gru_user_unload_context(unsigned long arg) +{ + struct gru_thread_state *gts; + struct gru_unload_context_req req; + + STAT(user_unload_context); + if (copy_from_user(&req, (void __user *)arg, sizeof(req))) + return -EFAULT; + + gru_dbg(grudev, "gseg 0x%lx\n", req.gseg); + + gts = gru_find_lock_gts(req.gseg); + if (!gts) + return -EINVAL; + + if (gts->ts_gru) + gru_unload_context(gts, 1); + gru_unlock_gts(gts); + + return 0; +} + +/* + * User request to flush a range of virtual addresses from the GRU TLB + * (Mainly for testing). + */ +int gru_user_flush_tlb(unsigned long arg) +{ + struct gru_thread_state *gts; + struct gru_flush_tlb_req req; + + STAT(user_flush_tlb); + if (copy_from_user(&req, (void __user *)arg, sizeof(req))) + return -EFAULT; + + gru_dbg(grudev, "gseg 0x%lx, vaddr 0x%lx, len 0x%lx\n", req.gseg, + req.vaddr, req.len); + + gts = gru_find_lock_gts(req.gseg); + if (!gts) + return -EINVAL; + + gru_flush_tlb_range(gts->ts_gms, req.vaddr, req.vaddr + req.len); + gru_unlock_gts(gts); + + return 0; +} + +/* + * Register the current task as the user of the GSEG slice. + * Needed for TLB fault interrupt targeting. + */ +int gru_set_task_slice(long address) +{ + struct gru_thread_state *gts; + + STAT(set_task_slice); + gru_dbg(grudev, "address 0x%lx\n", address); + gts = gru_alloc_locked_gts(address); + if (!gts) + return -EINVAL; + + gts->ts_tgid_owner = current->tgid; + gru_unlock_gts(gts); + + return 0; +} diff --git a/drivers/misc/sgi-gru/grufile.c b/drivers/misc/sgi-gru/grufile.c new file mode 100644 index 00000000000..23c91f5f6b6 --- /dev/null +++ b/drivers/misc/sgi-gru/grufile.c @@ -0,0 +1,485 @@ +/* + * SN Platform GRU Driver + * + * FILE OPERATIONS & DRIVER INITIALIZATION + * + * This file supports the user system call for file open, close, mmap, etc. + * This also incudes the driver initialization code. + * + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/slab.h> +#include <linux/mm.h> +#include <linux/io.h> +#include <linux/smp_lock.h> +#include <linux/spinlock.h> +#include <linux/device.h> +#include <linux/miscdevice.h> +#include <linux/interrupt.h> +#include <linux/proc_fs.h> +#include <linux/uaccess.h> +#include "gru.h" +#include "grulib.h" +#include "grutables.h" + +#if defined CONFIG_X86_64 +#include <asm/genapic.h> +#include <asm/irq.h> +#define IS_UV() is_uv_system() +#elif defined CONFIG_IA64 +#include <asm/system.h> +#include <asm/sn/simulator.h> +/* temp support for running on hardware simulator */ +#define IS_UV() IS_MEDUSA() || ia64_platform_is("uv") +#else +#define IS_UV() 0 +#endif + +#include <asm/uv/uv_hub.h> +#include <asm/uv/uv_mmrs.h> + +struct gru_blade_state *gru_base[GRU_MAX_BLADES] __read_mostly; +unsigned long gru_start_paddr, gru_end_paddr __read_mostly; +struct gru_stats_s gru_stats; + +/* Guaranteed user available resources on each node */ +static int max_user_cbrs, max_user_dsr_bytes; + +static struct file_operations gru_fops; +static struct miscdevice gru_miscdev; + + +/* + * gru_vma_close + * + * Called when unmapping a device mapping. Frees all gru resources + * and tables belonging to the vma. + */ +static void gru_vma_close(struct vm_area_struct *vma) +{ + struct gru_vma_data *vdata; + struct gru_thread_state *gts; + struct list_head *entry, *next; + + if (!vma->vm_private_data) + return; + + vdata = vma->vm_private_data; + vma->vm_private_data = NULL; + gru_dbg(grudev, "vma %p, file %p, vdata %p\n", vma, vma->vm_file, + vdata); + list_for_each_safe(entry, next, &vdata->vd_head) { + gts = + list_entry(entry, struct gru_thread_state, ts_next); + list_del(>s->ts_next); + mutex_lock(>s->ts_ctxlock); + if (gts->ts_gru) + gru_unload_context(gts, 0); + mutex_unlock(>s->ts_ctxlock); + gts_drop(gts); + } + kfree(vdata); + STAT(vdata_free); +} + +/* + * gru_file_mmap + * + * Called when mmaping the device. Initializes the vma with a fault handler + * and private data structure necessary to allocate, track, and free the + * underlying pages. + */ +static int gru_file_mmap(struct file *file, struct vm_area_struct *vma) +{ + if ((vma->vm_flags & (VM_SHARED | VM_WRITE)) != (VM_SHARED | VM_WRITE)) + return -EPERM; + + if (vma->vm_start & (GRU_GSEG_PAGESIZE - 1) || + vma->vm_end & (GRU_GSEG_PAGESIZE - 1)) + return -EINVAL; + + vma->vm_flags |= + (VM_IO | VM_DONTCOPY | VM_LOCKED | VM_DONTEXPAND | VM_PFNMAP | + VM_RESERVED); + vma->vm_page_prot = PAGE_SHARED; + vma->vm_ops = &gru_vm_ops; + + vma->vm_private_data = gru_alloc_vma_data(vma, 0); + if (!vma->vm_private_data) + return -ENOMEM; + + gru_dbg(grudev, "file %p, vaddr 0x%lx, vma %p, vdata %p\n", + file, vma->vm_start, vma, vma->vm_private_data); + return 0; +} + +/* + * Create a new GRU context + */ +static int gru_create_new_context(unsigned long arg) +{ + struct gru_create_context_req req; + struct vm_area_struct *vma; + struct gru_vma_data *vdata; + int ret = -EINVAL; + + + if (copy_from_user(&req, (void __user *)arg, sizeof(req))) + return -EFAULT; + + if (req.data_segment_bytes == 0 || + req.data_segment_bytes > max_user_dsr_bytes) + return -EINVAL; + if (!req.control_blocks || !req.maximum_thread_count || + req.control_blocks > max_user_cbrs) + return -EINVAL; + + if (!(req.options & GRU_OPT_MISS_MASK)) + req.options |= GRU_OPT_MISS_FMM_INTR; + + down_write(¤t->mm->mmap_sem); + vma = gru_find_vma(req.gseg); + if (vma) { + vdata = vma->vm_private_data; + vdata->vd_user_options = req.options; + vdata->vd_dsr_au_count = + GRU_DS_BYTES_TO_AU(req.data_segment_bytes); + vdata->vd_cbr_au_count = GRU_CB_COUNT_TO_AU(req.control_blocks); + ret = 0; + } + up_write(¤t->mm->mmap_sem); + + return ret; +} + +/* + * Get GRU configuration info (temp - for emulator testing) + */ +static long gru_get_config_info(unsigned long arg) +{ + struct gru_config_info info; + int nodesperblade; + + if (num_online_nodes() > 1 && + (uv_node_to_blade_id(1) == uv_node_to_blade_id(0))) + nodesperblade = 2; + else + nodesperblade = 1; + info.cpus = num_online_cpus(); + info.nodes = num_online_nodes(); + info.blades = info.nodes / nodesperblade; + info.chiplets = GRU_CHIPLETS_PER_BLADE * info.blades; + + if (copy_to_user((void __user *)arg, &info, sizeof(info))) + return -EFAULT; + return 0; +} + +/* + * Get GRU chiplet status + */ +static long gru_get_chiplet_status(unsigned long arg) +{ + struct gru_state *gru; + struct gru_chiplet_info info; + + if (copy_from_user(&info, (void __user *)arg, sizeof(info))) + return -EFAULT; + + if (info.node == -1) + info.node = numa_node_id(); + if (info.node >= num_possible_nodes() || + info.chiplet >= GRU_CHIPLETS_PER_HUB || + info.node < 0 || info.chiplet < 0) + return -EINVAL; + + info.blade = uv_node_to_blade_id(info.node); + gru = get_gru(info.blade, info.chiplet); + + info.total_dsr_bytes = GRU_NUM_DSR_BYTES; + info.total_cbr = GRU_NUM_CB; + info.total_user_dsr_bytes = GRU_NUM_DSR_BYTES - + gru->gs_reserved_dsr_bytes; + info.total_user_cbr = GRU_NUM_CB - gru->gs_reserved_cbrs; + info.free_user_dsr_bytes = hweight64(gru->gs_dsr_map) * + GRU_DSR_AU_BYTES; + info.free_user_cbr = hweight64(gru->gs_cbr_map) * GRU_CBR_AU_SIZE; + + if (copy_to_user((void __user *)arg, &info, sizeof(info))) + return -EFAULT; + return 0; +} + +/* + * gru_file_unlocked_ioctl + * + * Called to update file attributes via IOCTL calls. + */ +static long gru_file_unlocked_ioctl(struct file *file, unsigned int req, + unsigned long arg) +{ + int err = -EBADRQC; + + gru_dbg(grudev, "file %p\n", file); + + switch (req) { + case GRU_CREATE_CONTEXT: + err = gru_create_new_context(arg); + break; + case GRU_SET_TASK_SLICE: + err = gru_set_task_slice(arg); + break; + case GRU_USER_GET_EXCEPTION_DETAIL: + err = gru_get_exception_detail(arg); + break; + case GRU_USER_UNLOAD_CONTEXT: + err = gru_user_unload_context(arg); + break; + case GRU_GET_CHIPLET_STATUS: + err = gru_get_chiplet_status(arg); + break; + case GRU_USER_FLUSH_TLB: + err = gru_user_flush_tlb(arg); + break; + case GRU_USER_CALL_OS: + err = gru_handle_user_call_os(arg); + break; + case GRU_GET_CONFIG_INFO: + err = gru_get_config_info(arg); + break; + } + return err; +} + +/* + * Called at init time to build tables for all GRUs that are present in the + * system. + */ +static void gru_init_chiplet(struct gru_state *gru, unsigned long paddr, + void *vaddr, int nid, int bid, int grunum) +{ + spin_lock_init(&gru->gs_lock); + spin_lock_init(&gru->gs_asid_lock); + gru->gs_gru_base_paddr = paddr; + gru->gs_gru_base_vaddr = vaddr; + gru->gs_gid = bid * GRU_CHIPLETS_PER_BLADE + grunum; + gru->gs_blade = gru_base[bid]; + gru->gs_blade_id = bid; + gru->gs_cbr_map = (GRU_CBR_AU == 64) ? ~0 : (1UL << GRU_CBR_AU) - 1; + gru->gs_dsr_map = (1UL << GRU_DSR_AU) - 1; + gru_tgh_flush_init(gru); + gru_dbg(grudev, "bid %d, nid %d, gru %x, vaddr %p (0x%lx)\n", + bid, nid, gru->gs_gid, gru->gs_gru_base_vaddr, + gru->gs_gru_base_paddr); + gru_kservices_init(gru); +} + +static int gru_init_tables(unsigned long gru_base_paddr, void *gru_base_vaddr) +{ + int pnode, nid, bid, chip; + int cbrs, dsrbytes, n; + int order = get_order(sizeof(struct gru_blade_state)); + struct page *page; + struct gru_state *gru; + unsigned long paddr; + void *vaddr; + + max_user_cbrs = GRU_NUM_CB; + max_user_dsr_bytes = GRU_NUM_DSR_BYTES; + for_each_online_node(nid) { + bid = uv_node_to_blade_id(nid); + pnode = uv_node_to_pnode(nid); + if (gru_base[bid]) + continue; + page = alloc_pages_node(nid, GFP_KERNEL, order); + if (!page) + goto fail; + gru_base[bid] = page_address(page); + memset(gru_base[bid], 0, sizeof(struct gru_blade_state)); + gru_base[bid]->bs_lru_gru = &gru_base[bid]->bs_grus[0]; + spin_lock_init(&gru_base[bid]->bs_lock); + + dsrbytes = 0; + cbrs = 0; + for (gru = gru_base[bid]->bs_grus, chip = 0; + chip < GRU_CHIPLETS_PER_BLADE; + chip++, gru++) { + paddr = gru_chiplet_paddr(gru_base_paddr, pnode, chip); + vaddr = gru_chiplet_vaddr(gru_base_vaddr, pnode, chip); + gru_init_chiplet(gru, paddr, vaddr, bid, nid, chip); + n = hweight64(gru->gs_cbr_map) * GRU_CBR_AU_SIZE; + cbrs = max(cbrs, n); + n = hweight64(gru->gs_dsr_map) * GRU_DSR_AU_BYTES; + dsrbytes = max(dsrbytes, n); + } + max_user_cbrs = min(max_user_cbrs, cbrs); + max_user_dsr_bytes = min(max_user_dsr_bytes, dsrbytes); + } + + return 0; + +fail: + for (nid--; nid >= 0; nid--) + free_pages((unsigned long)gru_base[nid], order); + return -ENOMEM; +} + +#ifdef CONFIG_IA64 + +static int get_base_irq(void) +{ + return IRQ_GRU; +} + +#elif defined CONFIG_X86_64 + +static void noop(unsigned int irq) +{ +} + +static struct irq_chip gru_chip = { + .name = "gru", + .mask = noop, + .unmask = noop, + .ack = noop, +}; + +static int get_base_irq(void) +{ + set_irq_chip(IRQ_GRU, &gru_chip); + set_irq_chip(IRQ_GRU + 1, &gru_chip); + return IRQ_GRU; +} +#endif + +/* + * gru_init + * + * Called at boot or module load time to initialize the GRUs. + */ +static int __init gru_init(void) +{ + int ret, irq, chip; + char id[10]; + void *gru_start_vaddr; + + if (!IS_UV()) + return 0; + +#if defined CONFIG_IA64 + gru_start_paddr = 0xd000000000UL; /* ZZZZZZZZZZZZZZZZZZZ fixme */ +#else + gru_start_paddr = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR) & + 0x7fffffffffffUL; + +#endif + gru_start_vaddr = __va(gru_start_paddr); + gru_end_paddr = gru_start_paddr + MAX_NUMNODES * GRU_SIZE; + printk(KERN_INFO "GRU space: 0x%lx - 0x%lx\n", + gru_start_paddr, gru_end_paddr); + irq = get_base_irq(); + for (chip = 0; chip < GRU_CHIPLETS_PER_BLADE; chip++) { + ret = request_irq(irq + chip, gru_intr, 0, id, NULL); + if (ret) { + printk(KERN_ERR "%s: request_irq failed\n", + GRU_DRIVER_ID_STR); + goto exit1; + } + } + + ret = misc_register(&gru_miscdev); + if (ret) { + printk(KERN_ERR "%s: misc_register failed\n", + GRU_DRIVER_ID_STR); + goto exit1; + } + + ret = gru_proc_init(); + if (ret) { + printk(KERN_ERR "%s: proc init failed\n", GRU_DRIVER_ID_STR); + goto exit2; + } + + ret = gru_init_tables(gru_start_paddr, gru_start_vaddr); + if (ret) { + printk(KERN_ERR "%s: init tables failed\n", GRU_DRIVER_ID_STR); + goto exit3; + } + + printk(KERN_INFO "%s: v%s\n", GRU_DRIVER_ID_STR, + GRU_DRIVER_VERSION_STR); + return 0; + +exit3: + gru_proc_exit(); +exit2: + misc_deregister(&gru_miscdev); +exit1: + for (--chip; chip >= 0; chip--) + free_irq(irq + chip, NULL); + return ret; + +} + +static void __exit gru_exit(void) +{ + int i, bid; + int order = get_order(sizeof(struct gru_state) * + GRU_CHIPLETS_PER_BLADE); + + for (i = 0; i < GRU_CHIPLETS_PER_BLADE; i++) + free_irq(IRQ_GRU + i, NULL); + + for (bid = 0; bid < GRU_MAX_BLADES; bid++) + free_pages((unsigned long)gru_base[bid], order); + + misc_deregister(&gru_miscdev); + gru_proc_exit(); +} + +static struct file_operations gru_fops = { + .owner = THIS_MODULE, + .unlocked_ioctl = gru_file_unlocked_ioctl, + .mmap = gru_file_mmap, +}; + +static struct miscdevice gru_miscdev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "gru", + .fops = &gru_fops, +}; + +struct vm_operations_struct gru_vm_ops = { + .close = gru_vma_close, + .fault = gru_fault, +}; + +module_init(gru_init); +module_exit(gru_exit); + +module_param(gru_options, ulong, 0644); +MODULE_PARM_DESC(gru_options, "Various debug options"); + +MODULE_AUTHOR("Silicon Graphics, Inc."); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION(GRU_DRIVER_ID_STR GRU_DRIVER_VERSION_STR); +MODULE_VERSION(GRU_DRIVER_VERSION_STR); + diff --git a/drivers/misc/sgi-gru/gruhandles.h b/drivers/misc/sgi-gru/gruhandles.h new file mode 100644 index 00000000000..d16031d6267 --- /dev/null +++ b/drivers/misc/sgi-gru/gruhandles.h @@ -0,0 +1,663 @@ +/* + * SN Platform GRU Driver + * + * GRU HANDLE DEFINITION + * + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef __GRUHANDLES_H__ +#define __GRUHANDLES_H__ +#include "gru_instructions.h" + +/* + * Manifest constants for GRU Memory Map + */ +#define GRU_GSEG0_BASE 0 +#define GRU_MCS_BASE (64 * 1024 * 1024) +#define GRU_SIZE (128UL * 1024 * 1024) + +/* Handle & resource counts */ +#define GRU_NUM_CB 128 +#define GRU_NUM_DSR_BYTES (32 * 1024) +#define GRU_NUM_TFM 16 +#define GRU_NUM_TGH 24 +#define GRU_NUM_CBE 128 +#define GRU_NUM_TFH 128 +#define GRU_NUM_CCH 16 +#define GRU_NUM_GSH 1 + +/* Maximum resource counts that can be reserved by user programs */ +#define GRU_NUM_USER_CBR GRU_NUM_CBE +#define GRU_NUM_USER_DSR_BYTES GRU_NUM_DSR_BYTES + +/* Bytes per handle & handle stride. Code assumes all cb, tfh, cbe handles + * are the same */ +#define GRU_HANDLE_BYTES 64 +#define GRU_HANDLE_STRIDE 256 + +/* Base addresses of handles */ +#define GRU_TFM_BASE (GRU_MCS_BASE + 0x00000) +#define GRU_TGH_BASE (GRU_MCS_BASE + 0x08000) +#define GRU_CBE_BASE (GRU_MCS_BASE + 0x10000) +#define GRU_TFH_BASE (GRU_MCS_BASE + 0x18000) +#define GRU_CCH_BASE (GRU_MCS_BASE + 0x20000) +#define GRU_GSH_BASE (GRU_MCS_BASE + 0x30000) + +/* User gseg constants */ +#define GRU_GSEG_STRIDE (4 * 1024 * 1024) +#define GSEG_BASE(a) ((a) & ~(GRU_GSEG_PAGESIZE - 1)) + +/* Data segment constants */ +#define GRU_DSR_AU_BYTES 1024 +#define GRU_DSR_CL (GRU_NUM_DSR_BYTES / GRU_CACHE_LINE_BYTES) +#define GRU_DSR_AU_CL (GRU_DSR_AU_BYTES / GRU_CACHE_LINE_BYTES) +#define GRU_DSR_AU (GRU_NUM_DSR_BYTES / GRU_DSR_AU_BYTES) + +/* Control block constants */ +#define GRU_CBR_AU_SIZE 2 +#define GRU_CBR_AU (GRU_NUM_CBE / GRU_CBR_AU_SIZE) + +/* Convert resource counts to the number of AU */ +#define GRU_DS_BYTES_TO_AU(n) DIV_ROUND_UP(n, GRU_DSR_AU_BYTES) +#define GRU_CB_COUNT_TO_AU(n) DIV_ROUND_UP(n, GRU_CBR_AU_SIZE) + +/* UV limits */ +#define GRU_CHIPLETS_PER_HUB 2 +#define GRU_HUBS_PER_BLADE 1 +#define GRU_CHIPLETS_PER_BLADE (GRU_HUBS_PER_BLADE * GRU_CHIPLETS_PER_HUB) + +/* User GRU Gseg offsets */ +#define GRU_CB_BASE 0 +#define GRU_CB_LIMIT (GRU_CB_BASE + GRU_HANDLE_STRIDE * GRU_NUM_CBE) +#define GRU_DS_BASE 0x20000 +#define GRU_DS_LIMIT (GRU_DS_BASE + GRU_NUM_DSR_BYTES) + +/* Convert a GRU physical address to the chiplet offset */ +#define GSEGPOFF(h) ((h) & (GRU_SIZE - 1)) + +/* Convert an arbitrary handle address to the beginning of the GRU segment */ +#ifndef __PLUGIN__ +#define GRUBASE(h) ((void *)((unsigned long)(h) & ~(GRU_SIZE - 1))) +#else +extern void *gmu_grubase(void *h); +#define GRUBASE(h) gmu_grubase(h) +#endif + +/* General addressing macros. */ +static inline void *get_gseg_base_address(void *base, int ctxnum) +{ + return (void *)(base + GRU_GSEG0_BASE + GRU_GSEG_STRIDE * ctxnum); +} + +static inline void *get_gseg_base_address_cb(void *base, int ctxnum, int line) +{ + return (void *)(get_gseg_base_address(base, ctxnum) + + GRU_CB_BASE + GRU_HANDLE_STRIDE * line); +} + +static inline void *get_gseg_base_address_ds(void *base, int ctxnum, int line) +{ + return (void *)(get_gseg_base_address(base, ctxnum) + GRU_DS_BASE + + GRU_CACHE_LINE_BYTES * line); +} + +static inline struct gru_tlb_fault_map *get_tfm(void *base, int ctxnum) +{ + return (struct gru_tlb_fault_map *)(base + GRU_TFM_BASE + + ctxnum * GRU_HANDLE_STRIDE); +} + +static inline struct gru_tlb_global_handle *get_tgh(void *base, int ctxnum) +{ + return (struct gru_tlb_global_handle *)(base + GRU_TGH_BASE + + ctxnum * GRU_HANDLE_STRIDE); +} + +static inline struct gru_control_block_extended *get_cbe(void *base, int ctxnum) +{ + return (struct gru_control_block_extended *)(base + GRU_CBE_BASE + + ctxnum * GRU_HANDLE_STRIDE); +} + +static inline struct gru_tlb_fault_handle *get_tfh(void *base, int ctxnum) +{ + return (struct gru_tlb_fault_handle *)(base + GRU_TFH_BASE + + ctxnum * GRU_HANDLE_STRIDE); +} + +static inline struct gru_context_configuration_handle *get_cch(void *base, + int ctxnum) +{ + return (struct gru_context_configuration_handle *)(base + + GRU_CCH_BASE + ctxnum * GRU_HANDLE_STRIDE); +} + +static inline unsigned long get_cb_number(void *cb) +{ + return (((unsigned long)cb - GRU_CB_BASE) % GRU_GSEG_PAGESIZE) / + GRU_HANDLE_STRIDE; +} + +/* byte offset to a specific GRU chiplet. (p=pnode, c=chiplet (0 or 1)*/ +static inline unsigned long gru_chiplet_paddr(unsigned long paddr, int pnode, + int chiplet) +{ + return paddr + GRU_SIZE * (2 * pnode + chiplet); +} + +static inline void *gru_chiplet_vaddr(void *vaddr, int pnode, int chiplet) +{ + return vaddr + GRU_SIZE * (2 * pnode + chiplet); +} + + + +/* + * Global TLB Fault Map + * Bitmap of outstanding TLB misses needing interrupt/polling service. + * + */ +struct gru_tlb_fault_map { + unsigned long fault_bits[BITS_TO_LONGS(GRU_NUM_CBE)]; + unsigned long fill0[2]; + unsigned long done_bits[BITS_TO_LONGS(GRU_NUM_CBE)]; + unsigned long fill1[2]; +}; + +/* + * TGH - TLB Global Handle + * Used for TLB flushing. + * + */ +struct gru_tlb_global_handle { + unsigned int cmd:1; /* DW 0 */ + unsigned int delresp:1; + unsigned int opc:1; + unsigned int fill1:5; + + unsigned int fill2:8; + + unsigned int status:2; + unsigned long fill3:2; + unsigned int state:3; + unsigned long fill4:1; + + unsigned int cause:3; + unsigned long fill5:37; + + unsigned long vaddr:64; /* DW 1 */ + + unsigned int asid:24; /* DW 2 */ + unsigned int fill6:8; + + unsigned int pagesize:5; + unsigned int fill7:11; + + unsigned int global:1; + unsigned int fill8:15; + + unsigned long vaddrmask:39; /* DW 3 */ + unsigned int fill9:9; + unsigned int n:10; + unsigned int fill10:6; + + unsigned int ctxbitmap:16; /* DW4 */ + unsigned long fill11[3]; +}; + +enum gru_tgh_cmd { + TGHCMD_START +}; + +enum gru_tgh_opc { + TGHOP_TLBNOP, + TGHOP_TLBINV +}; + +enum gru_tgh_status { + TGHSTATUS_IDLE, + TGHSTATUS_EXCEPTION, + TGHSTATUS_ACTIVE +}; + +enum gru_tgh_state { + TGHSTATE_IDLE, + TGHSTATE_PE_INVAL, + TGHSTATE_INTERRUPT_INVAL, + TGHSTATE_WAITDONE, + TGHSTATE_RESTART_CTX, +}; + +/* + * TFH - TLB Global Handle + * Used for TLB dropins into the GRU TLB. + * + */ +struct gru_tlb_fault_handle { + unsigned int cmd:1; /* DW 0 - low 32*/ + unsigned int delresp:1; + unsigned int fill0:2; + unsigned int opc:3; + unsigned int fill1:9; + + unsigned int status:2; + unsigned int fill2:1; + unsigned int color:1; + unsigned int state:3; + unsigned int fill3:1; + + unsigned int cause:7; /* DW 0 - high 32 */ + unsigned int fill4:1; + + unsigned int indexway:12; + unsigned int fill5:4; + + unsigned int ctxnum:4; + unsigned int fill6:12; + + unsigned long missvaddr:64; /* DW 1 */ + + unsigned int missasid:24; /* DW 2 */ + unsigned int fill7:8; + unsigned int fillasid:24; + unsigned int dirty:1; + unsigned int gaa:2; + unsigned long fill8:5; + + unsigned long pfn:41; /* DW 3 */ + unsigned int fill9:7; + unsigned int pagesize:5; + unsigned int fill10:11; + + unsigned long fillvaddr:64; /* DW 4 */ + + unsigned long fill11[3]; +}; + +enum gru_tfh_opc { + TFHOP_NOOP, + TFHOP_RESTART, + TFHOP_WRITE_ONLY, + TFHOP_WRITE_RESTART, + TFHOP_EXCEPTION, + TFHOP_USER_POLLING_MODE = 7, +}; + +enum tfh_status { + TFHSTATUS_IDLE, + TFHSTATUS_EXCEPTION, + TFHSTATUS_ACTIVE, +}; + +enum tfh_state { + TFHSTATE_INACTIVE, + TFHSTATE_IDLE, + TFHSTATE_MISS_UPM, + TFHSTATE_MISS_FMM, + TFHSTATE_HW_ERR, + TFHSTATE_WRITE_TLB, + TFHSTATE_RESTART_CBR, +}; + +/* TFH cause bits */ +enum tfh_cause { + TFHCAUSE_NONE, + TFHCAUSE_TLB_MISS, + TFHCAUSE_TLB_MOD, + TFHCAUSE_HW_ERROR_RR, + TFHCAUSE_HW_ERROR_MAIN_ARRAY, + TFHCAUSE_HW_ERROR_VALID, + TFHCAUSE_HW_ERROR_PAGESIZE, + TFHCAUSE_INSTRUCTION_EXCEPTION, + TFHCAUSE_UNCORRECTIBLE_ERROR, +}; + +/* GAA values */ +#define GAA_RAM 0x0 +#define GAA_NCRAM 0x2 +#define GAA_MMIO 0x1 +#define GAA_REGISTER 0x3 + +/* GRU paddr shift for pfn. (NOTE: shift is NOT by actual pagesize) */ +#define GRU_PADDR_SHIFT 12 + +/* + * Context Configuration handle + * Used to allocate resources to a GSEG context. + * + */ +struct gru_context_configuration_handle { + unsigned int cmd:1; /* DW0 */ + unsigned int delresp:1; + unsigned int opc:3; + unsigned int unmap_enable:1; + unsigned int req_slice_set_enable:1; + unsigned int req_slice:2; + unsigned int cb_int_enable:1; + unsigned int tlb_int_enable:1; + unsigned int tfm_fault_bit_enable:1; + unsigned int tlb_int_select:4; + + unsigned int status:2; + unsigned int state:2; + unsigned int reserved2:4; + + unsigned int cause:4; + unsigned int tfm_done_bit_enable:1; + unsigned int unused:3; + + unsigned int dsr_allocation_map; + + unsigned long cbr_allocation_map; /* DW1 */ + + unsigned int asid[8]; /* DW 2 - 5 */ + unsigned short sizeavail[8]; /* DW 6 - 7 */ +} __attribute__ ((packed)); + +enum gru_cch_opc { + CCHOP_START = 1, + CCHOP_ALLOCATE, + CCHOP_INTERRUPT, + CCHOP_DEALLOCATE, + CCHOP_INTERRUPT_SYNC, +}; + +enum gru_cch_status { + CCHSTATUS_IDLE, + CCHSTATUS_EXCEPTION, + CCHSTATUS_ACTIVE, +}; + +enum gru_cch_state { + CCHSTATE_INACTIVE, + CCHSTATE_MAPPED, + CCHSTATE_ACTIVE, + CCHSTATE_INTERRUPTED, +}; + +/* CCH Exception cause */ +enum gru_cch_cause { + CCHCAUSE_REGION_REGISTER_WRITE_ERROR = 1, + CCHCAUSE_ILLEGAL_OPCODE = 2, + CCHCAUSE_INVALID_START_REQUEST = 3, + CCHCAUSE_INVALID_ALLOCATION_REQUEST = 4, + CCHCAUSE_INVALID_DEALLOCATION_REQUEST = 5, + CCHCAUSE_INVALID_INTERRUPT_REQUEST = 6, + CCHCAUSE_CCH_BUSY = 7, + CCHCAUSE_NO_CBRS_TO_ALLOCATE = 8, + CCHCAUSE_BAD_TFM_CONFIG = 9, + CCHCAUSE_CBR_RESOURCES_OVERSUBSCRIPED = 10, + CCHCAUSE_DSR_RESOURCES_OVERSUBSCRIPED = 11, + CCHCAUSE_CBR_DEALLOCATION_ERROR = 12, +}; +/* + * CBE - Control Block Extended + * Maintains internal GRU state for active CBs. + * + */ +struct gru_control_block_extended { + unsigned int reserved0:1; /* DW 0 - low */ + unsigned int imacpy:3; + unsigned int reserved1:4; + unsigned int xtypecpy:3; + unsigned int iaa0cpy:2; + unsigned int iaa1cpy:2; + unsigned int reserved2:1; + unsigned int opccpy:8; + unsigned int exopccpy:8; + + unsigned int idef2cpy:22; /* DW 0 - high */ + unsigned int reserved3:10; + + unsigned int idef4cpy:22; /* DW 1 */ + unsigned int reserved4:10; + unsigned int idef4upd:22; + unsigned int reserved5:10; + + unsigned long idef1upd:64; /* DW 2 */ + + unsigned long idef5cpy:64; /* DW 3 */ + + unsigned long idef6cpy:64; /* DW 4 */ + + unsigned long idef3upd:64; /* DW 5 */ + + unsigned long idef5upd:64; /* DW 6 */ + + unsigned int idef2upd:22; /* DW 7 */ + unsigned int reserved6:10; + + unsigned int ecause:20; + unsigned int cbrstate:4; + unsigned int cbrexecstatus:8; +}; + +enum gru_cbr_state { + CBRSTATE_INACTIVE, + CBRSTATE_IDLE, + CBRSTATE_PE_CHECK, + CBRSTATE_QUEUED, + CBRSTATE_WAIT_RESPONSE, + CBRSTATE_INTERRUPTED, + CBRSTATE_INTERRUPTED_MISS_FMM, + CBRSTATE_BUSY_INTERRUPT_MISS_FMM, + CBRSTATE_INTERRUPTED_MISS_UPM, + CBRSTATE_BUSY_INTERRUPTED_MISS_UPM, + CBRSTATE_REQUEST_ISSUE, + CBRSTATE_BUSY_INTERRUPT, +}; + +/* CBE cbrexecstatus bits */ +#define CBR_EXS_ABORT_OCC_BIT 0 +#define CBR_EXS_INT_OCC_BIT 1 +#define CBR_EXS_PENDING_BIT 2 +#define CBR_EXS_QUEUED_BIT 3 +#define CBR_EXS_TLBHW_BIT 4 +#define CBR_EXS_EXCEPTION_BIT 5 + +#define CBR_EXS_ABORT_OCC (1 << CBR_EXS_ABORT_OCC_BIT) +#define CBR_EXS_INT_OCC (1 << CBR_EXS_INT_OCC_BIT) +#define CBR_EXS_PENDING (1 << CBR_EXS_PENDING_BIT) +#define CBR_EXS_QUEUED (1 << CBR_EXS_QUEUED_BIT) +#define CBR_EXS_TLBHW (1 << CBR_EXS_TLBHW_BIT) +#define CBR_EXS_EXCEPTION (1 << CBR_EXS_EXCEPTION_BIT) + +/* CBE ecause bits - defined in gru_instructions.h */ + +/* + * Convert a processor pagesize into the strange encoded pagesize used by the + * GRU. Processor pagesize is encoded as log of bytes per page. (or PAGE_SHIFT) + * pagesize log pagesize grupagesize + * 4k 12 0 + * 16k 14 1 + * 64k 16 2 + * 256k 18 3 + * 1m 20 4 + * 2m 21 5 + * 4m 22 6 + * 16m 24 7 + * 64m 26 8 + * ... + */ +#define GRU_PAGESIZE(sh) ((((sh) > 20 ? (sh) + 2: (sh)) >> 1) - 6) +#define GRU_SIZEAVAIL(sh) (1UL << GRU_PAGESIZE(sh)) + +/* minimum TLB purge count to ensure a full purge */ +#define GRUMAXINVAL 1024UL + + +/* Extract the status field from a kernel handle */ +#define GET_MSEG_HANDLE_STATUS(h) (((*(unsigned long *)(h)) >> 16) & 3) + +static inline void start_instruction(void *h) +{ + unsigned long *w0 = h; + + wmb(); /* setting CMD bit must be last */ + *w0 = *w0 | 1; + gru_flush_cache(h); +} + +static inline int wait_instruction_complete(void *h) +{ + int status; + + do { + cpu_relax(); + barrier(); + status = GET_MSEG_HANDLE_STATUS(h); + } while (status == CCHSTATUS_ACTIVE); + return status; +} + +#if defined CONFIG_IA64 +static inline void cch_allocate_set_asids( + struct gru_context_configuration_handle *cch, int asidval) +{ + int i; + + for (i = 0; i <= RGN_HPAGE; i++) { /* assume HPAGE is last region */ + cch->asid[i] = (asidval++); +#if 0 + /* ZZZ hugepages not supported yet */ + if (i == RGN_HPAGE) + cch->sizeavail[i] = GRU_SIZEAVAIL(hpage_shift); + else +#endif + cch->sizeavail[i] = GRU_SIZEAVAIL(PAGE_SHIFT); + } +} +#elif defined CONFIG_X86_64 +static inline void cch_allocate_set_asids( + struct gru_context_configuration_handle *cch, int asidval) +{ + int i; + + for (i = 0; i < 8; i++) { + cch->asid[i] = asidval++; + cch->sizeavail[i] = GRU_SIZEAVAIL(PAGE_SHIFT) | + GRU_SIZEAVAIL(21); + } +} +#endif + +static inline int cch_allocate(struct gru_context_configuration_handle *cch, + int asidval, unsigned long cbrmap, + unsigned long dsrmap) +{ + cch_allocate_set_asids(cch, asidval); + cch->dsr_allocation_map = dsrmap; + cch->cbr_allocation_map = cbrmap; + cch->opc = CCHOP_ALLOCATE; + start_instruction(cch); + return wait_instruction_complete(cch); +} + +static inline int cch_start(struct gru_context_configuration_handle *cch) +{ + cch->opc = CCHOP_START; + start_instruction(cch); + return wait_instruction_complete(cch); +} + +static inline int cch_interrupt(struct gru_context_configuration_handle *cch) +{ + cch->opc = CCHOP_INTERRUPT; + start_instruction(cch); + return wait_instruction_complete(cch); +} + +static inline int cch_deallocate(struct gru_context_configuration_handle *cch) +{ + cch->opc = CCHOP_DEALLOCATE; + start_instruction(cch); + return wait_instruction_complete(cch); +} + +static inline int cch_interrupt_sync(struct gru_context_configuration_handle + *cch) +{ + cch->opc = CCHOP_INTERRUPT_SYNC; + start_instruction(cch); + return wait_instruction_complete(cch); +} + +static inline int tgh_invalidate(struct gru_tlb_global_handle *tgh, + unsigned long vaddr, unsigned long vaddrmask, + int asid, int pagesize, int global, int n, + unsigned short ctxbitmap) +{ + tgh->vaddr = vaddr; + tgh->asid = asid; + tgh->pagesize = pagesize; + tgh->n = n; + tgh->global = global; + tgh->vaddrmask = vaddrmask; + tgh->ctxbitmap = ctxbitmap; + tgh->opc = TGHOP_TLBINV; + start_instruction(tgh); + return wait_instruction_complete(tgh); +} + +static inline void tfh_write_only(struct gru_tlb_fault_handle *tfh, + unsigned long pfn, unsigned long vaddr, + int asid, int dirty, int pagesize) +{ + tfh->fillasid = asid; + tfh->fillvaddr = vaddr; + tfh->pfn = pfn; + tfh->dirty = dirty; + tfh->pagesize = pagesize; + tfh->opc = TFHOP_WRITE_ONLY; + start_instruction(tfh); +} + +static inline void tfh_write_restart(struct gru_tlb_fault_handle *tfh, + unsigned long paddr, int gaa, + unsigned long vaddr, int asid, int dirty, + int pagesize) +{ + tfh->fillasid = asid; + tfh->fillvaddr = vaddr; + tfh->pfn = paddr >> GRU_PADDR_SHIFT; + tfh->gaa = gaa; + tfh->dirty = dirty; + tfh->pagesize = pagesize; + tfh->opc = TFHOP_WRITE_RESTART; + start_instruction(tfh); +} + +static inline void tfh_restart(struct gru_tlb_fault_handle *tfh) +{ + tfh->opc = TFHOP_RESTART; + start_instruction(tfh); +} + +static inline void tfh_user_polling_mode(struct gru_tlb_fault_handle *tfh) +{ + tfh->opc = TFHOP_USER_POLLING_MODE; + start_instruction(tfh); +} + +static inline void tfh_exception(struct gru_tlb_fault_handle *tfh) +{ + tfh->opc = TFHOP_EXCEPTION; + start_instruction(tfh); +} + +#endif /* __GRUHANDLES_H__ */ diff --git a/drivers/misc/sgi-gru/grukservices.c b/drivers/misc/sgi-gru/grukservices.c new file mode 100644 index 00000000000..dfd49af0fe1 --- /dev/null +++ b/drivers/misc/sgi-gru/grukservices.c @@ -0,0 +1,679 @@ +/* + * SN Platform GRU Driver + * + * KERNEL SERVICES THAT USE THE GRU + * + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/slab.h> +#include <linux/mm.h> +#include <linux/smp_lock.h> +#include <linux/spinlock.h> +#include <linux/device.h> +#include <linux/miscdevice.h> +#include <linux/proc_fs.h> +#include <linux/interrupt.h> +#include <linux/uaccess.h> +#include "gru.h" +#include "grulib.h" +#include "grutables.h" +#include "grukservices.h" +#include "gru_instructions.h" +#include <asm/uv/uv_hub.h> + +/* + * Kernel GRU Usage + * + * The following is an interim algorithm for management of kernel GRU + * resources. This will likely be replaced when we better understand the + * kernel/user requirements. + * + * At boot time, the kernel permanently reserves a fixed number of + * CBRs/DSRs for each cpu to use. The resources are all taken from + * the GRU chiplet 1 on the blade. This leaves the full set of resources + * of chiplet 0 available to be allocated to a single user. + */ + +/* Blade percpu resources PERMANENTLY reserved for kernel use */ +#define GRU_NUM_KERNEL_CBR 1 +#define GRU_NUM_KERNEL_DSR_BYTES 256 +#define KERNEL_CTXNUM 15 + +/* GRU instruction attributes for all instructions */ +#define IMA IMA_CB_DELAY + +/* GRU cacheline size is always 64 bytes - even on arches with 128 byte lines */ +#define __gru_cacheline_aligned__ \ + __attribute__((__aligned__(GRU_CACHE_LINE_BYTES))) + +#define MAGIC 0x1234567887654321UL + +/* Default retry count for GRU errors on kernel instructions */ +#define EXCEPTION_RETRY_LIMIT 3 + +/* Status of message queue sections */ +#define MQS_EMPTY 0 +#define MQS_FULL 1 +#define MQS_NOOP 2 + +/*----------------- RESOURCE MANAGEMENT -------------------------------------*/ +/* optimized for x86_64 */ +struct message_queue { + union gru_mesqhead head __gru_cacheline_aligned__; /* CL 0 */ + int qlines; /* DW 1 */ + long hstatus[2]; + void *next __gru_cacheline_aligned__;/* CL 1 */ + void *limit; + void *start; + void *start2; + char data ____cacheline_aligned; /* CL 2 */ +}; + +/* First word in every message - used by mesq interface */ +struct message_header { + char present; + char present2; + char lines; + char fill; +}; + +#define QLINES(mq) ((mq) + offsetof(struct message_queue, qlines)) +#define HSTATUS(mq, h) ((mq) + offsetof(struct message_queue, hstatus[h])) + +static int gru_get_cpu_resources(int dsr_bytes, void **cb, void **dsr) +{ + struct gru_blade_state *bs; + int lcpu; + + BUG_ON(dsr_bytes > GRU_NUM_KERNEL_DSR_BYTES); + preempt_disable(); + bs = gru_base[uv_numa_blade_id()]; + lcpu = uv_blade_processor_id(); + *cb = bs->kernel_cb + lcpu * GRU_HANDLE_STRIDE; + *dsr = bs->kernel_dsr + lcpu * GRU_NUM_KERNEL_DSR_BYTES; + return 0; +} + +static void gru_free_cpu_resources(void *cb, void *dsr) +{ + preempt_enable(); +} + +int gru_get_cb_exception_detail(void *cb, + struct control_block_extended_exc_detail *excdet) +{ + struct gru_control_block_extended *cbe; + + cbe = get_cbe(GRUBASE(cb), get_cb_number(cb)); + excdet->opc = cbe->opccpy; + excdet->exopc = cbe->exopccpy; + excdet->ecause = cbe->ecause; + excdet->exceptdet0 = cbe->idef1upd; + excdet->exceptdet1 = cbe->idef3upd; + return 0; +} + +char *gru_get_cb_exception_detail_str(int ret, void *cb, + char *buf, int size) +{ + struct gru_control_block_status *gen = (void *)cb; + struct control_block_extended_exc_detail excdet; + + if (ret > 0 && gen->istatus == CBS_EXCEPTION) { + gru_get_cb_exception_detail(cb, &excdet); + snprintf(buf, size, + "GRU exception: cb %p, opc %d, exopc %d, ecause 0x%x," + "excdet0 0x%lx, excdet1 0x%x", + gen, excdet.opc, excdet.exopc, excdet.ecause, + excdet.exceptdet0, excdet.exceptdet1); + } else { + snprintf(buf, size, "No exception"); + } + return buf; +} + +static int gru_wait_idle_or_exception(struct gru_control_block_status *gen) +{ + while (gen->istatus >= CBS_ACTIVE) { + cpu_relax(); + barrier(); + } + return gen->istatus; +} + +static int gru_retry_exception(void *cb) +{ + struct gru_control_block_status *gen = (void *)cb; + struct control_block_extended_exc_detail excdet; + int retry = EXCEPTION_RETRY_LIMIT; + + while (1) { + if (gru_get_cb_message_queue_substatus(cb)) + break; + if (gru_wait_idle_or_exception(gen) == CBS_IDLE) + return CBS_IDLE; + + gru_get_cb_exception_detail(cb, &excdet); + if (excdet.ecause & ~EXCEPTION_RETRY_BITS) + break; + if (retry-- == 0) + break; + gen->icmd = 1; + gru_flush_cache(gen); + } + return CBS_EXCEPTION; +} + +int gru_check_status_proc(void *cb) +{ + struct gru_control_block_status *gen = (void *)cb; + int ret; + + ret = gen->istatus; + if (ret != CBS_EXCEPTION) + return ret; + return gru_retry_exception(cb); + +} + +int gru_wait_proc(void *cb) +{ + struct gru_control_block_status *gen = (void *)cb; + int ret; + + ret = gru_wait_idle_or_exception(gen); + if (ret == CBS_EXCEPTION) + ret = gru_retry_exception(cb); + + return ret; +} + +void gru_abort(int ret, void *cb, char *str) +{ + char buf[GRU_EXC_STR_SIZE]; + + panic("GRU FATAL ERROR: %s - %s\n", str, + gru_get_cb_exception_detail_str(ret, cb, buf, sizeof(buf))); +} + +void gru_wait_abort_proc(void *cb) +{ + int ret; + + ret = gru_wait_proc(cb); + if (ret) + gru_abort(ret, cb, "gru_wait_abort"); +} + + +/*------------------------------ MESSAGE QUEUES -----------------------------*/ + +/* Internal status . These are NOT returned to the user. */ +#define MQIE_AGAIN -1 /* try again */ + + +/* + * Save/restore the "present" flag that is in the second line of 2-line + * messages + */ +static inline int get_present2(void *p) +{ + struct message_header *mhdr = p + GRU_CACHE_LINE_BYTES; + return mhdr->present; +} + +static inline void restore_present2(void *p, int val) +{ + struct message_header *mhdr = p + GRU_CACHE_LINE_BYTES; + mhdr->present = val; +} + +/* + * Create a message queue. + * qlines - message queue size in cache lines. Includes 2-line header. + */ +int gru_create_message_queue(void *p, unsigned int bytes) +{ + struct message_queue *mq = p; + unsigned int qlines; + + qlines = bytes / GRU_CACHE_LINE_BYTES - 2; + memset(mq, 0, bytes); + mq->start = &mq->data; + mq->start2 = &mq->data + (qlines / 2 - 1) * GRU_CACHE_LINE_BYTES; + mq->next = &mq->data; + mq->limit = &mq->data + (qlines - 2) * GRU_CACHE_LINE_BYTES; + mq->qlines = qlines; + mq->hstatus[0] = 0; + mq->hstatus[1] = 1; + mq->head = gru_mesq_head(2, qlines / 2 + 1); + return 0; +} +EXPORT_SYMBOL_GPL(gru_create_message_queue); + +/* + * Send a NOOP message to a message queue + * Returns: + * 0 - if queue is full after the send. This is the normal case + * but various races can change this. + * -1 - if mesq sent successfully but queue not full + * >0 - unexpected error. MQE_xxx returned + */ +static int send_noop_message(void *cb, + unsigned long mq, void *mesg) +{ + const struct message_header noop_header = { + .present = MQS_NOOP, .lines = 1}; + unsigned long m; + int substatus, ret; + struct message_header save_mhdr, *mhdr = mesg; + + STAT(mesq_noop); + save_mhdr = *mhdr; + *mhdr = noop_header; + gru_mesq(cb, mq, gru_get_tri(mhdr), 1, IMA); + ret = gru_wait(cb); + + if (ret) { + substatus = gru_get_cb_message_queue_substatus(cb); + switch (substatus) { + case CBSS_NO_ERROR: + STAT(mesq_noop_unexpected_error); + ret = MQE_UNEXPECTED_CB_ERR; + break; + case CBSS_LB_OVERFLOWED: + STAT(mesq_noop_lb_overflow); + ret = MQE_CONGESTION; + break; + case CBSS_QLIMIT_REACHED: + STAT(mesq_noop_qlimit_reached); + ret = 0; + break; + case CBSS_AMO_NACKED: + STAT(mesq_noop_amo_nacked); + ret = MQE_CONGESTION; + break; + case CBSS_PUT_NACKED: + STAT(mesq_noop_put_nacked); + m = mq + (gru_get_amo_value_head(cb) << 6); + gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, 1, 1, + IMA); + if (gru_wait(cb) == CBS_IDLE) + ret = MQIE_AGAIN; + else + ret = MQE_UNEXPECTED_CB_ERR; + break; + case CBSS_PAGE_OVERFLOW: + default: + BUG(); + } + } + *mhdr = save_mhdr; + return ret; +} + +/* + * Handle a gru_mesq full. + */ +static int send_message_queue_full(void *cb, + unsigned long mq, void *mesg, int lines) +{ + union gru_mesqhead mqh; + unsigned int limit, head; + unsigned long avalue; + int half, qlines, save; + + /* Determine if switching to first/second half of q */ + avalue = gru_get_amo_value(cb); + head = gru_get_amo_value_head(cb); + limit = gru_get_amo_value_limit(cb); + + /* + * Fetch "qlines" from the queue header. Since the queue may be + * in memory that can't be accessed using socket addresses, use + * the GRU to access the data. Use DSR space from the message. + */ + save = *(int *)mesg; + gru_vload(cb, QLINES(mq), gru_get_tri(mesg), XTYPE_W, 1, 1, IMA); + if (gru_wait(cb) != CBS_IDLE) + goto cberr; + qlines = *(int *)mesg; + *(int *)mesg = save; + half = (limit != qlines); + + if (half) + mqh = gru_mesq_head(qlines / 2 + 1, qlines); + else + mqh = gru_mesq_head(2, qlines / 2 + 1); + + /* Try to get lock for switching head pointer */ + gru_gamir(cb, EOP_IR_CLR, HSTATUS(mq, half), XTYPE_DW, IMA); + if (gru_wait(cb) != CBS_IDLE) + goto cberr; + if (!gru_get_amo_value(cb)) { + STAT(mesq_qf_locked); + return MQE_QUEUE_FULL; + } + + /* Got the lock. Send optional NOP if queue not full, */ + if (head != limit) { + if (send_noop_message(cb, mq, mesg)) { + gru_gamir(cb, EOP_IR_INC, HSTATUS(mq, half), + XTYPE_DW, IMA); + if (gru_wait(cb) != CBS_IDLE) + goto cberr; + STAT(mesq_qf_noop_not_full); + return MQIE_AGAIN; + } + avalue++; + } + + /* Then flip queuehead to other half of queue. */ + gru_gamer(cb, EOP_ERR_CSWAP, mq, XTYPE_DW, mqh.val, avalue, IMA); + if (gru_wait(cb) != CBS_IDLE) + goto cberr; + + /* If not successfully in swapping queue head, clear the hstatus lock */ + if (gru_get_amo_value(cb) != avalue) { + STAT(mesq_qf_switch_head_failed); + gru_gamir(cb, EOP_IR_INC, HSTATUS(mq, half), XTYPE_DW, IMA); + if (gru_wait(cb) != CBS_IDLE) + goto cberr; + } + return MQIE_AGAIN; +cberr: + STAT(mesq_qf_unexpected_error); + return MQE_UNEXPECTED_CB_ERR; +} + + +/* + * Handle a gru_mesq failure. Some of these failures are software recoverable + * or retryable. + */ +static int send_message_failure(void *cb, + unsigned long mq, + void *mesg, + int lines) +{ + int substatus, ret = 0; + unsigned long m; + + substatus = gru_get_cb_message_queue_substatus(cb); + switch (substatus) { + case CBSS_NO_ERROR: + STAT(mesq_send_unexpected_error); + ret = MQE_UNEXPECTED_CB_ERR; + break; + case CBSS_LB_OVERFLOWED: + STAT(mesq_send_lb_overflow); + ret = MQE_CONGESTION; + break; + case CBSS_QLIMIT_REACHED: + STAT(mesq_send_qlimit_reached); + ret = send_message_queue_full(cb, mq, mesg, lines); + break; + case CBSS_AMO_NACKED: + STAT(mesq_send_amo_nacked); + ret = MQE_CONGESTION; + break; + case CBSS_PUT_NACKED: + STAT(mesq_send_put_nacked); + m =mq + (gru_get_amo_value_head(cb) << 6); + gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, lines, 1, IMA); + if (gru_wait(cb) == CBS_IDLE) + ret = MQE_OK; + else + ret = MQE_UNEXPECTED_CB_ERR; + break; + default: + BUG(); + } + return ret; +} + +/* + * Send a message to a message queue + * cb GRU control block to use to send message + * mq message queue + * mesg message. ust be vaddr within a GSEG + * bytes message size (<= 2 CL) + */ +int gru_send_message_gpa(unsigned long mq, void *mesg, unsigned int bytes) +{ + struct message_header *mhdr; + void *cb; + void *dsr; + int istatus, clines, ret; + + STAT(mesq_send); + BUG_ON(bytes < sizeof(int) || bytes > 2 * GRU_CACHE_LINE_BYTES); + + clines = (bytes + GRU_CACHE_LINE_BYTES - 1) / GRU_CACHE_LINE_BYTES; + if (gru_get_cpu_resources(bytes, &cb, &dsr)) + return MQE_BUG_NO_RESOURCES; + memcpy(dsr, mesg, bytes); + mhdr = dsr; + mhdr->present = MQS_FULL; + mhdr->lines = clines; + if (clines == 2) { + mhdr->present2 = get_present2(mhdr); + restore_present2(mhdr, MQS_FULL); + } + + do { + ret = MQE_OK; + gru_mesq(cb, mq, gru_get_tri(mhdr), clines, IMA); + istatus = gru_wait(cb); + if (istatus != CBS_IDLE) + ret = send_message_failure(cb, mq, dsr, clines); + } while (ret == MQIE_AGAIN); + gru_free_cpu_resources(cb, dsr); + + if (ret) + STAT(mesq_send_failed); + return ret; +} +EXPORT_SYMBOL_GPL(gru_send_message_gpa); + +/* + * Advance the receive pointer for the queue to the next message. + */ +void gru_free_message(void *rmq, void *mesg) +{ + struct message_queue *mq = rmq; + struct message_header *mhdr = mq->next; + void *next, *pnext; + int half = -1; + int lines = mhdr->lines; + + if (lines == 2) + restore_present2(mhdr, MQS_EMPTY); + mhdr->present = MQS_EMPTY; + + pnext = mq->next; + next = pnext + GRU_CACHE_LINE_BYTES * lines; + if (next == mq->limit) { + next = mq->start; + half = 1; + } else if (pnext < mq->start2 && next >= mq->start2) { + half = 0; + } + + if (half >= 0) + mq->hstatus[half] = 1; + mq->next = next; +} +EXPORT_SYMBOL_GPL(gru_free_message); + +/* + * Get next message from message queue. Return NULL if no message + * present. User must call next_message() to move to next message. + * rmq message queue + */ +void *gru_get_next_message(void *rmq) +{ + struct message_queue *mq = rmq; + struct message_header *mhdr = mq->next; + int present = mhdr->present; + + /* skip NOOP messages */ + STAT(mesq_receive); + while (present == MQS_NOOP) { + gru_free_message(rmq, mhdr); + mhdr = mq->next; + present = mhdr->present; + } + + /* Wait for both halves of 2 line messages */ + if (present == MQS_FULL && mhdr->lines == 2 && + get_present2(mhdr) == MQS_EMPTY) + present = MQS_EMPTY; + + if (!present) { + STAT(mesq_receive_none); + return NULL; + } + + if (mhdr->lines == 2) + restore_present2(mhdr, mhdr->present2); + + return mhdr; +} +EXPORT_SYMBOL_GPL(gru_get_next_message); + +/* ---------------------- GRU DATA COPY FUNCTIONS ---------------------------*/ + +/* + * Copy a block of data using the GRU resources + */ +int gru_copy_gpa(unsigned long dest_gpa, unsigned long src_gpa, + unsigned int bytes) +{ + void *cb; + void *dsr; + int ret; + + STAT(copy_gpa); + if (gru_get_cpu_resources(GRU_NUM_KERNEL_DSR_BYTES, &cb, &dsr)) + return MQE_BUG_NO_RESOURCES; + gru_bcopy(cb, src_gpa, dest_gpa, gru_get_tri(dsr), + XTYPE_B, bytes, GRU_NUM_KERNEL_DSR_BYTES, IMA); + ret = gru_wait(cb); + gru_free_cpu_resources(cb, dsr); + return ret; +} +EXPORT_SYMBOL_GPL(gru_copy_gpa); + +/* ------------------- KERNEL QUICKTESTS RUN AT STARTUP ----------------*/ +/* Temp - will delete after we gain confidence in the GRU */ +static __cacheline_aligned unsigned long word0; +static __cacheline_aligned unsigned long word1; + +static int quicktest(struct gru_state *gru) +{ + void *cb; + void *ds; + unsigned long *p; + + cb = get_gseg_base_address_cb(gru->gs_gru_base_vaddr, KERNEL_CTXNUM, 0); + ds = get_gseg_base_address_ds(gru->gs_gru_base_vaddr, KERNEL_CTXNUM, 0); + p = ds; + word0 = MAGIC; + + gru_vload(cb, uv_gpa(&word0), 0, XTYPE_DW, 1, 1, IMA); + if (gru_wait(cb) != CBS_IDLE) + BUG(); + + if (*(unsigned long *)ds != MAGIC) + BUG(); + gru_vstore(cb, uv_gpa(&word1), 0, XTYPE_DW, 1, 1, IMA); + if (gru_wait(cb) != CBS_IDLE) + BUG(); + + if (word0 != word1 || word0 != MAGIC) { + printk + ("GRU quicktest err: gru %d, found 0x%lx, expected 0x%lx\n", + gru->gs_gid, word1, MAGIC); + BUG(); /* ZZZ should not be fatal */ + } + + return 0; +} + + +int gru_kservices_init(struct gru_state *gru) +{ + struct gru_blade_state *bs; + struct gru_context_configuration_handle *cch; + unsigned long cbr_map, dsr_map; + int err, num, cpus_possible; + + /* + * Currently, resources are reserved ONLY on the second chiplet + * on each blade. This leaves ALL resources on chiplet 0 available + * for user code. + */ + bs = gru->gs_blade; + if (gru != &bs->bs_grus[1]) + return 0; + + cpus_possible = uv_blade_nr_possible_cpus(gru->gs_blade_id); + + num = GRU_NUM_KERNEL_CBR * cpus_possible; + cbr_map = gru_reserve_cb_resources(gru, GRU_CB_COUNT_TO_AU(num), NULL); + gru->gs_reserved_cbrs += num; + + num = GRU_NUM_KERNEL_DSR_BYTES * cpus_possible; + dsr_map = gru_reserve_ds_resources(gru, GRU_DS_BYTES_TO_AU(num), NULL); + gru->gs_reserved_dsr_bytes += num; + + gru->gs_active_contexts++; + __set_bit(KERNEL_CTXNUM, &gru->gs_context_map); + cch = get_cch(gru->gs_gru_base_vaddr, KERNEL_CTXNUM); + + bs->kernel_cb = get_gseg_base_address_cb(gru->gs_gru_base_vaddr, + KERNEL_CTXNUM, 0); + bs->kernel_dsr = get_gseg_base_address_ds(gru->gs_gru_base_vaddr, + KERNEL_CTXNUM, 0); + + lock_cch_handle(cch); + cch->tfm_fault_bit_enable = 0; + cch->tlb_int_enable = 0; + cch->tfm_done_bit_enable = 0; + cch->unmap_enable = 1; + err = cch_allocate(cch, 0, cbr_map, dsr_map); + if (err) { + gru_dbg(grudev, + "Unable to allocate kernel CCH: gru %d, err %d\n", + gru->gs_gid, err); + BUG(); + } + if (cch_start(cch)) { + gru_dbg(grudev, "Unable to start kernel CCH: gru %d, err %d\n", + gru->gs_gid, err); + BUG(); + } + unlock_cch_handle(cch); + + if (gru_options & GRU_QUICKLOOK) + quicktest(gru); + return 0; +} diff --git a/drivers/misc/sgi-gru/grukservices.h b/drivers/misc/sgi-gru/grukservices.h new file mode 100644 index 00000000000..eb17e0a3ac6 --- /dev/null +++ b/drivers/misc/sgi-gru/grukservices.h @@ -0,0 +1,134 @@ + +/* + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#ifndef __GRU_KSERVICES_H_ +#define __GRU_KSERVICES_H_ + + +/* + * Message queues using the GRU to send/receive messages. + * + * These function allow the user to create a message queue for + * sending/receiving 1 or 2 cacheline messages using the GRU. + * + * Processes SENDING messages will use a kernel CBR/DSR to send + * the message. This is transparent to the caller. + * + * The receiver does not use any GRU resources. + * + * The functions support: + * - single receiver + * - multiple senders + * - cross partition message + * + * Missing features ZZZ: + * - user options for dealing with timeouts, queue full, etc. + * - gru_create_message_queue() needs interrupt vector info + */ + +/* + * Initialize a user allocated chunk of memory to be used as + * a message queue. The caller must ensure that the queue is + * in contiguous physical memory and is cacheline aligned. + * + * Message queue size is the total number of bytes allocated + * to the queue including a 2 cacheline header that is used + * to manage the queue. + * + * Input: + * p pointer to user allocated memory. + * bytes size of message queue in bytes + * + * Errors: + * 0 OK + * >0 error + */ +extern int gru_create_message_queue(void *p, unsigned int bytes); + +/* + * Send a message to a message queue. + * + * Note: The message queue transport mechanism uses the first 32 + * bits of the message. Users should avoid using these bits. + * + * + * Input: + * xmq message queue - must be a UV global physical address + * mesg pointer to message. Must be 64-bit aligned + * bytes size of message in bytes + * + * Output: + * 0 message sent + * >0 Send failure - see error codes below + * + */ +extern int gru_send_message_gpa(unsigned long mq_gpa, void *mesg, + unsigned int bytes); + +/* Status values for gru_send_message() */ +#define MQE_OK 0 /* message sent successfully */ +#define MQE_CONGESTION 1 /* temporary congestion, try again */ +#define MQE_QUEUE_FULL 2 /* queue is full */ +#define MQE_UNEXPECTED_CB_ERR 3 /* unexpected CB error */ +#define MQE_PAGE_OVERFLOW 10 /* BUG - queue overflowed a page */ +#define MQE_BUG_NO_RESOURCES 11 /* BUG - could not alloc GRU cb/dsr */ + +/* + * Advance the receive pointer for the message queue to the next message. + * Note: current API requires messages to be gotten & freed in order. Future + * API extensions may allow for out-of-order freeing. + * + * Input + * mq message queue + * mesq message being freed + */ +extern void gru_free_message(void *mq, void *mesq); + +/* + * Get next message from message queue. Returns pointer to + * message OR NULL if no message present. + * User must call gru_free_message() after message is processed + * in order to move the queue pointers to next message. + * + * Input + * mq message queue + * + * Output: + * p pointer to message + * NULL no message available + */ +extern void *gru_get_next_message(void *mq); + + +/* + * Copy data using the GRU. Source or destination can be located in a remote + * partition. + * + * Input: + * dest_gpa destination global physical address + * src_gpa source global physical address + * bytes number of bytes to copy + * + * Output: + * 0 OK + * >0 error + */ +extern int gru_copy_gpa(unsigned long dest_gpa, unsigned long src_gpa, + unsigned int bytes); + +#endif /* __GRU_KSERVICES_H_ */ diff --git a/drivers/misc/sgi-gru/grulib.h b/drivers/misc/sgi-gru/grulib.h new file mode 100644 index 00000000000..e56e196a699 --- /dev/null +++ b/drivers/misc/sgi-gru/grulib.h @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef __GRULIB_H__ +#define __GRULIB_H__ + +#define GRU_BASENAME "gru" +#define GRU_FULLNAME "/dev/gru" +#define GRU_IOCTL_NUM 'G' + +/* + * Maximum number of GRU segments that a user can have open + * ZZZ temp - set high for testing. Revisit. + */ +#define GRU_MAX_OPEN_CONTEXTS 32 + +/* Set Number of Request Blocks */ +#define GRU_CREATE_CONTEXT _IOWR(GRU_IOCTL_NUM, 1, void *) + +/* Register task as using the slice */ +#define GRU_SET_TASK_SLICE _IOWR(GRU_IOCTL_NUM, 5, void *) + +/* Fetch exception detail */ +#define GRU_USER_GET_EXCEPTION_DETAIL _IOWR(GRU_IOCTL_NUM, 6, void *) + +/* For user call_os handling - normally a TLB fault */ +#define GRU_USER_CALL_OS _IOWR(GRU_IOCTL_NUM, 8, void *) + +/* For user unload context */ +#define GRU_USER_UNLOAD_CONTEXT _IOWR(GRU_IOCTL_NUM, 9, void *) + +/* For fetching GRU chiplet status */ +#define GRU_GET_CHIPLET_STATUS _IOWR(GRU_IOCTL_NUM, 10, void *) + +/* For user TLB flushing (primarily for tests) */ +#define GRU_USER_FLUSH_TLB _IOWR(GRU_IOCTL_NUM, 50, void *) + +/* Get some config options (primarily for tests & emulator) */ +#define GRU_GET_CONFIG_INFO _IOWR(GRU_IOCTL_NUM, 51, void *) + +#define CONTEXT_WINDOW_BYTES(th) (GRU_GSEG_PAGESIZE * (th)) +#define THREAD_POINTER(p, th) (p + GRU_GSEG_PAGESIZE * (th)) + +/* + * Structure used to pass TLB flush parameters to the driver + */ +struct gru_create_context_req { + unsigned long gseg; + unsigned int data_segment_bytes; + unsigned int control_blocks; + unsigned int maximum_thread_count; + unsigned int options; +}; + +/* + * Structure used to pass unload context parameters to the driver + */ +struct gru_unload_context_req { + unsigned long gseg; +}; + +/* + * Structure used to pass TLB flush parameters to the driver + */ +struct gru_flush_tlb_req { + unsigned long gseg; + unsigned long vaddr; + size_t len; +}; + +/* + * GRU configuration info (temp - for testing) + */ +struct gru_config_info { + int cpus; + int blades; + int nodes; + int chiplets; + int fill[16]; +}; + +#endif /* __GRULIB_H__ */ diff --git a/drivers/misc/sgi-gru/grumain.c b/drivers/misc/sgi-gru/grumain.c new file mode 100644 index 00000000000..0eeb8dddd2f --- /dev/null +++ b/drivers/misc/sgi-gru/grumain.c @@ -0,0 +1,802 @@ +/* + * SN Platform GRU Driver + * + * DRIVER TABLE MANAGER + GRU CONTEXT LOAD/UNLOAD + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + */ + +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/mm.h> +#include <linux/spinlock.h> +#include <linux/sched.h> +#include <linux/device.h> +#include <linux/list.h> +#include <asm/uv/uv_hub.h> +#include "gru.h" +#include "grutables.h" +#include "gruhandles.h" + +unsigned long gru_options __read_mostly; + +static struct device_driver gru_driver = { + .name = "gru" +}; + +static struct device gru_device = { + .bus_id = {0}, + .driver = &gru_driver, +}; + +struct device *grudev = &gru_device; + +/* + * Select a gru fault map to be used by the current cpu. Note that + * multiple cpus may be using the same map. + * ZZZ should "shift" be used?? Depends on HT cpu numbering + * ZZZ should be inline but did not work on emulator + */ +int gru_cpu_fault_map_id(void) +{ + return uv_blade_processor_id() % GRU_NUM_TFM; +} + +/*--------- ASID Management ------------------------------------------- + * + * Initially, assign asids sequentially from MIN_ASID .. MAX_ASID. + * Once MAX is reached, flush the TLB & start over. However, + * some asids may still be in use. There won't be many (percentage wise) still + * in use. Search active contexts & determine the value of the first + * asid in use ("x"s below). Set "limit" to this value. + * This defines a block of assignable asids. + * + * When "limit" is reached, search forward from limit+1 and determine the + * next block of assignable asids. + * + * Repeat until MAX_ASID is reached, then start over again. + * + * Each time MAX_ASID is reached, increment the asid generation. Since + * the search for in-use asids only checks contexts with GRUs currently + * assigned, asids in some contexts will be missed. Prior to loading + * a context, the asid generation of the GTS asid is rechecked. If it + * doesn't match the current generation, a new asid will be assigned. + * + * 0---------------x------------x---------------------x----| + * ^-next ^-limit ^-MAX_ASID + * + * All asid manipulation & context loading/unloading is protected by the + * gs_lock. + */ + +/* Hit the asid limit. Start over */ +static int gru_wrap_asid(struct gru_state *gru) +{ + gru_dbg(grudev, "gru %p\n", gru); + STAT(asid_wrap); + gru->gs_asid_gen++; + gru_flush_all_tlb(gru); + return MIN_ASID; +} + +/* Find the next chunk of unused asids */ +static int gru_reset_asid_limit(struct gru_state *gru, int asid) +{ + int i, gid, inuse_asid, limit; + + gru_dbg(grudev, "gru %p, asid 0x%x\n", gru, asid); + STAT(asid_next); + limit = MAX_ASID; + if (asid >= limit) + asid = gru_wrap_asid(gru); + gid = gru->gs_gid; +again: + for (i = 0; i < GRU_NUM_CCH; i++) { + if (!gru->gs_gts[i]) + continue; + inuse_asid = gru->gs_gts[i]->ts_gms->ms_asids[gid].mt_asid; + gru_dbg(grudev, "gru %p, inuse_asid 0x%x, cxtnum %d, gts %p\n", + gru, inuse_asid, i, gru->gs_gts[i]); + if (inuse_asid == asid) { + asid += ASID_INC; + if (asid >= limit) { + /* + * empty range: reset the range limit and + * start over + */ + limit = MAX_ASID; + if (asid >= MAX_ASID) + asid = gru_wrap_asid(gru); + goto again; + } + } + + if ((inuse_asid > asid) && (inuse_asid < limit)) + limit = inuse_asid; + } + gru->gs_asid_limit = limit; + gru->gs_asid = asid; + gru_dbg(grudev, "gru %p, new asid 0x%x, new_limit 0x%x\n", gru, asid, + limit); + return asid; +} + +/* Assign a new ASID to a thread context. */ +static int gru_assign_asid(struct gru_state *gru) +{ + int asid; + + spin_lock(&gru->gs_asid_lock); + gru->gs_asid += ASID_INC; + asid = gru->gs_asid; + if (asid >= gru->gs_asid_limit) + asid = gru_reset_asid_limit(gru, asid); + spin_unlock(&gru->gs_asid_lock); + + gru_dbg(grudev, "gru %p, asid 0x%x\n", gru, asid); + return asid; +} + +/* + * Clear n bits in a word. Return a word indicating the bits that were cleared. + * Optionally, build an array of chars that contain the bit numbers allocated. + */ +static unsigned long reserve_resources(unsigned long *p, int n, int mmax, + char *idx) +{ + unsigned long bits = 0; + int i; + + do { + i = find_first_bit(p, mmax); + if (i == mmax) + BUG(); + __clear_bit(i, p); + __set_bit(i, &bits); + if (idx) + *idx++ = i; + } while (--n); + return bits; +} + +unsigned long gru_reserve_cb_resources(struct gru_state *gru, int cbr_au_count, + char *cbmap) +{ + return reserve_resources(&gru->gs_cbr_map, cbr_au_count, GRU_CBR_AU, + cbmap); +} + +unsigned long gru_reserve_ds_resources(struct gru_state *gru, int dsr_au_count, + char *dsmap) +{ + return reserve_resources(&gru->gs_dsr_map, dsr_au_count, GRU_DSR_AU, + dsmap); +} + +static void reserve_gru_resources(struct gru_state *gru, + struct gru_thread_state *gts) +{ + gru->gs_active_contexts++; + gts->ts_cbr_map = + gru_reserve_cb_resources(gru, gts->ts_cbr_au_count, + gts->ts_cbr_idx); + gts->ts_dsr_map = + gru_reserve_ds_resources(gru, gts->ts_dsr_au_count, NULL); +} + +static void free_gru_resources(struct gru_state *gru, + struct gru_thread_state *gts) +{ + gru->gs_active_contexts--; + gru->gs_cbr_map |= gts->ts_cbr_map; + gru->gs_dsr_map |= gts->ts_dsr_map; +} + +/* + * Check if a GRU has sufficient free resources to satisfy an allocation + * request. Note: GRU locks may or may not be held when this is called. If + * not held, recheck after acquiring the appropriate locks. + * + * Returns 1 if sufficient resources, 0 if not + */ +static int check_gru_resources(struct gru_state *gru, int cbr_au_count, + int dsr_au_count, int max_active_contexts) +{ + return hweight64(gru->gs_cbr_map) >= cbr_au_count + && hweight64(gru->gs_dsr_map) >= dsr_au_count + && gru->gs_active_contexts < max_active_contexts; +} + +/* + * TLB manangment requires tracking all GRU chiplets that have loaded a GSEG + * context. + */ +static int gru_load_mm_tracker(struct gru_state *gru, struct gru_mm_struct *gms, + int ctxnum) +{ + struct gru_mm_tracker *asids = &gms->ms_asids[gru->gs_gid]; + unsigned short ctxbitmap = (1 << ctxnum); + int asid; + + spin_lock(&gms->ms_asid_lock); + asid = asids->mt_asid; + + if (asid == 0 || asids->mt_asid_gen != gru->gs_asid_gen) { + asid = gru_assign_asid(gru); + asids->mt_asid = asid; + asids->mt_asid_gen = gru->gs_asid_gen; + STAT(asid_new); + } else { + STAT(asid_reuse); + } + + BUG_ON(asids->mt_ctxbitmap & ctxbitmap); + asids->mt_ctxbitmap |= ctxbitmap; + if (!test_bit(gru->gs_gid, gms->ms_asidmap)) + __set_bit(gru->gs_gid, gms->ms_asidmap); + spin_unlock(&gms->ms_asid_lock); + + gru_dbg(grudev, + "gru %x, gms %p, ctxnum 0x%d, asid 0x%x, asidmap 0x%lx\n", + gru->gs_gid, gms, ctxnum, asid, gms->ms_asidmap[0]); + return asid; +} + +static void gru_unload_mm_tracker(struct gru_state *gru, + struct gru_mm_struct *gms, int ctxnum) +{ + struct gru_mm_tracker *asids; + unsigned short ctxbitmap; + + asids = &gms->ms_asids[gru->gs_gid]; + ctxbitmap = (1 << ctxnum); + spin_lock(&gms->ms_asid_lock); + BUG_ON((asids->mt_ctxbitmap & ctxbitmap) != ctxbitmap); + asids->mt_ctxbitmap ^= ctxbitmap; + gru_dbg(grudev, "gru %x, gms %p, ctxnum 0x%d, asidmap 0x%lx\n", + gru->gs_gid, gms, ctxnum, gms->ms_asidmap[0]); + spin_unlock(&gms->ms_asid_lock); +} + +/* + * Decrement the reference count on a GTS structure. Free the structure + * if the reference count goes to zero. + */ +void gts_drop(struct gru_thread_state *gts) +{ + if (gts && atomic_dec_return(>s->ts_refcnt) == 0) { + gru_drop_mmu_notifier(gts->ts_gms); + kfree(gts); + STAT(gts_free); + } +} + +/* + * Locate the GTS structure for the current thread. + */ +static struct gru_thread_state *gru_find_current_gts_nolock(struct gru_vma_data + *vdata, int tsid) +{ + struct gru_thread_state *gts; + + list_for_each_entry(gts, &vdata->vd_head, ts_next) + if (gts->ts_tsid == tsid) + return gts; + return NULL; +} + +/* + * Allocate a thread state structure. + */ +static struct gru_thread_state *gru_alloc_gts(struct vm_area_struct *vma, + struct gru_vma_data *vdata, + int tsid) +{ + struct gru_thread_state *gts; + int bytes; + + bytes = DSR_BYTES(vdata->vd_dsr_au_count) + + CBR_BYTES(vdata->vd_cbr_au_count); + bytes += sizeof(struct gru_thread_state); + gts = kzalloc(bytes, GFP_KERNEL); + if (!gts) + return NULL; + + STAT(gts_alloc); + atomic_set(>s->ts_refcnt, 1); + mutex_init(>s->ts_ctxlock); + gts->ts_cbr_au_count = vdata->vd_cbr_au_count; + gts->ts_dsr_au_count = vdata->vd_dsr_au_count; + gts->ts_user_options = vdata->vd_user_options; + gts->ts_tsid = tsid; + gts->ts_user_options = vdata->vd_user_options; + gts->ts_ctxnum = NULLCTX; + gts->ts_mm = current->mm; + gts->ts_vma = vma; + gts->ts_tlb_int_select = -1; + gts->ts_gms = gru_register_mmu_notifier(); + if (!gts->ts_gms) + goto err; + + gru_dbg(grudev, "alloc vdata %p, new gts %p\n", vdata, gts); + return gts; + +err: + gts_drop(gts); + return NULL; +} + +/* + * Allocate a vma private data structure. + */ +struct gru_vma_data *gru_alloc_vma_data(struct vm_area_struct *vma, int tsid) +{ + struct gru_vma_data *vdata = NULL; + + vdata = kmalloc(sizeof(*vdata), GFP_KERNEL); + if (!vdata) + return NULL; + + INIT_LIST_HEAD(&vdata->vd_head); + spin_lock_init(&vdata->vd_lock); + gru_dbg(grudev, "alloc vdata %p\n", vdata); + return vdata; +} + +/* + * Find the thread state structure for the current thread. + */ +struct gru_thread_state *gru_find_thread_state(struct vm_area_struct *vma, + int tsid) +{ + struct gru_vma_data *vdata = vma->vm_private_data; + struct gru_thread_state *gts; + + spin_lock(&vdata->vd_lock); + gts = gru_find_current_gts_nolock(vdata, tsid); + spin_unlock(&vdata->vd_lock); + gru_dbg(grudev, "vma %p, gts %p\n", vma, gts); + return gts; +} + +/* + * Allocate a new thread state for a GSEG. Note that races may allow + * another thread to race to create a gts. + */ +struct gru_thread_state *gru_alloc_thread_state(struct vm_area_struct *vma, + int tsid) +{ + struct gru_vma_data *vdata = vma->vm_private_data; + struct gru_thread_state *gts, *ngts; + + gts = gru_alloc_gts(vma, vdata, tsid); + if (!gts) + return NULL; + + spin_lock(&vdata->vd_lock); + ngts = gru_find_current_gts_nolock(vdata, tsid); + if (ngts) { + gts_drop(gts); + gts = ngts; + STAT(gts_double_allocate); + } else { + list_add(>s->ts_next, &vdata->vd_head); + } + spin_unlock(&vdata->vd_lock); + gru_dbg(grudev, "vma %p, gts %p\n", vma, gts); + return gts; +} + +/* + * Free the GRU context assigned to the thread state. + */ +static void gru_free_gru_context(struct gru_thread_state *gts) +{ + struct gru_state *gru; + + gru = gts->ts_gru; + gru_dbg(grudev, "gts %p, gru %p\n", gts, gru); + + spin_lock(&gru->gs_lock); + gru->gs_gts[gts->ts_ctxnum] = NULL; + free_gru_resources(gru, gts); + BUG_ON(test_bit(gts->ts_ctxnum, &gru->gs_context_map) == 0); + __clear_bit(gts->ts_ctxnum, &gru->gs_context_map); + gts->ts_ctxnum = NULLCTX; + gts->ts_gru = NULL; + spin_unlock(&gru->gs_lock); + + gts_drop(gts); + STAT(free_context); +} + +/* + * Prefetching cachelines help hardware performance. + * (Strictly a performance enhancement. Not functionally required). + */ +static void prefetch_data(void *p, int num, int stride) +{ + while (num-- > 0) { + prefetchw(p); + p += stride; + } +} + +static inline long gru_copy_handle(void *d, void *s) +{ + memcpy(d, s, GRU_HANDLE_BYTES); + return GRU_HANDLE_BYTES; +} + +/* rewrite in assembly & use lots of prefetch */ +static void gru_load_context_data(void *save, void *grubase, int ctxnum, + unsigned long cbrmap, unsigned long dsrmap) +{ + void *gseg, *cb, *cbe; + unsigned long length; + int i, scr; + + gseg = grubase + ctxnum * GRU_GSEG_STRIDE; + length = hweight64(dsrmap) * GRU_DSR_AU_BYTES; + prefetch_data(gseg + GRU_DS_BASE, length / GRU_CACHE_LINE_BYTES, + GRU_CACHE_LINE_BYTES); + + cb = gseg + GRU_CB_BASE; + cbe = grubase + GRU_CBE_BASE; + for_each_cbr_in_allocation_map(i, &cbrmap, scr) { + prefetch_data(cb, 1, GRU_CACHE_LINE_BYTES); + prefetch_data(cbe + i * GRU_HANDLE_STRIDE, 1, + GRU_CACHE_LINE_BYTES); + cb += GRU_HANDLE_STRIDE; + } + + cb = gseg + GRU_CB_BASE; + for_each_cbr_in_allocation_map(i, &cbrmap, scr) { + save += gru_copy_handle(cb, save); + save += gru_copy_handle(cbe + i * GRU_HANDLE_STRIDE, save); + cb += GRU_HANDLE_STRIDE; + } + + memcpy(gseg + GRU_DS_BASE, save, length); +} + +static void gru_unload_context_data(void *save, void *grubase, int ctxnum, + unsigned long cbrmap, unsigned long dsrmap) +{ + void *gseg, *cb, *cbe; + unsigned long length; + int i, scr; + + gseg = grubase + ctxnum * GRU_GSEG_STRIDE; + + cb = gseg + GRU_CB_BASE; + cbe = grubase + GRU_CBE_BASE; + for_each_cbr_in_allocation_map(i, &cbrmap, scr) { + save += gru_copy_handle(save, cb); + save += gru_copy_handle(save, cbe + i * GRU_HANDLE_STRIDE); + cb += GRU_HANDLE_STRIDE; + } + length = hweight64(dsrmap) * GRU_DSR_AU_BYTES; + memcpy(save, gseg + GRU_DS_BASE, length); +} + +void gru_unload_context(struct gru_thread_state *gts, int savestate) +{ + struct gru_state *gru = gts->ts_gru; + struct gru_context_configuration_handle *cch; + int ctxnum = gts->ts_ctxnum; + + zap_vma_ptes(gts->ts_vma, UGRUADDR(gts), GRU_GSEG_PAGESIZE); + cch = get_cch(gru->gs_gru_base_vaddr, ctxnum); + + lock_cch_handle(cch); + if (cch_interrupt_sync(cch)) + BUG(); + gru_dbg(grudev, "gts %p\n", gts); + + gru_unload_mm_tracker(gru, gts->ts_gms, gts->ts_ctxnum); + if (savestate) + gru_unload_context_data(gts->ts_gdata, gru->gs_gru_base_vaddr, + ctxnum, gts->ts_cbr_map, + gts->ts_dsr_map); + + if (cch_deallocate(cch)) + BUG(); + gts->ts_force_unload = 0; /* ts_force_unload locked by CCH lock */ + unlock_cch_handle(cch); + + gru_free_gru_context(gts); + STAT(unload_context); +} + +/* + * Load a GRU context by copying it from the thread data structure in memory + * to the GRU. + */ +static void gru_load_context(struct gru_thread_state *gts) +{ + struct gru_state *gru = gts->ts_gru; + struct gru_context_configuration_handle *cch; + int err, asid, ctxnum = gts->ts_ctxnum; + + gru_dbg(grudev, "gts %p\n", gts); + cch = get_cch(gru->gs_gru_base_vaddr, ctxnum); + + lock_cch_handle(cch); + asid = gru_load_mm_tracker(gru, gts->ts_gms, gts->ts_ctxnum); + cch->tfm_fault_bit_enable = + (gts->ts_user_options == GRU_OPT_MISS_FMM_POLL + || gts->ts_user_options == GRU_OPT_MISS_FMM_INTR); + cch->tlb_int_enable = (gts->ts_user_options == GRU_OPT_MISS_FMM_INTR); + if (cch->tlb_int_enable) { + gts->ts_tlb_int_select = gru_cpu_fault_map_id(); + cch->tlb_int_select = gts->ts_tlb_int_select; + } + cch->tfm_done_bit_enable = 0; + err = cch_allocate(cch, asid, gts->ts_cbr_map, gts->ts_dsr_map); + if (err) { + gru_dbg(grudev, + "err %d: cch %p, gts %p, cbr 0x%lx, dsr 0x%lx\n", + err, cch, gts, gts->ts_cbr_map, gts->ts_dsr_map); + BUG(); + } + + gru_load_context_data(gts->ts_gdata, gru->gs_gru_base_vaddr, ctxnum, + gts->ts_cbr_map, gts->ts_dsr_map); + + if (cch_start(cch)) + BUG(); + unlock_cch_handle(cch); + + STAT(load_context); +} + +/* + * Update fields in an active CCH: + * - retarget interrupts on local blade + * - force a delayed context unload by clearing the CCH asids. This + * forces TLB misses for new GRU instructions. The context is unloaded + * when the next TLB miss occurs. + */ +static int gru_update_cch(struct gru_thread_state *gts, int int_select) +{ + struct gru_context_configuration_handle *cch; + struct gru_state *gru = gts->ts_gru; + int i, ctxnum = gts->ts_ctxnum, ret = 0; + + cch = get_cch(gru->gs_gru_base_vaddr, ctxnum); + + lock_cch_handle(cch); + if (cch->state == CCHSTATE_ACTIVE) { + if (gru->gs_gts[gts->ts_ctxnum] != gts) + goto exit; + if (cch_interrupt(cch)) + BUG(); + if (int_select >= 0) { + gts->ts_tlb_int_select = int_select; + cch->tlb_int_select = int_select; + } else { + for (i = 0; i < 8; i++) + cch->asid[i] = 0; + cch->tfm_fault_bit_enable = 0; + cch->tlb_int_enable = 0; + gts->ts_force_unload = 1; + } + if (cch_start(cch)) + BUG(); + ret = 1; + } +exit: + unlock_cch_handle(cch); + return ret; +} + +/* + * Update CCH tlb interrupt select. Required when all the following is true: + * - task's GRU context is loaded into a GRU + * - task is using interrupt notification for TLB faults + * - task has migrated to a different cpu on the same blade where + * it was previously running. + */ +static int gru_retarget_intr(struct gru_thread_state *gts) +{ + if (gts->ts_tlb_int_select < 0 + || gts->ts_tlb_int_select == gru_cpu_fault_map_id()) + return 0; + + gru_dbg(grudev, "retarget from %d to %d\n", gts->ts_tlb_int_select, + gru_cpu_fault_map_id()); + return gru_update_cch(gts, gru_cpu_fault_map_id()); +} + + +/* + * Insufficient GRU resources available on the local blade. Steal a context from + * a process. This is a hack until a _real_ resource scheduler is written.... + */ +#define next_ctxnum(n) ((n) < GRU_NUM_CCH - 2 ? (n) + 1 : 0) +#define next_gru(b, g) (((g) < &(b)->bs_grus[GRU_CHIPLETS_PER_BLADE - 1]) ? \ + ((g)+1) : &(b)->bs_grus[0]) + +static void gru_steal_context(struct gru_thread_state *gts) +{ + struct gru_blade_state *blade; + struct gru_state *gru, *gru0; + struct gru_thread_state *ngts = NULL; + int ctxnum, ctxnum0, flag = 0, cbr, dsr; + + cbr = gts->ts_cbr_au_count; + dsr = gts->ts_dsr_au_count; + + preempt_disable(); + blade = gru_base[uv_numa_blade_id()]; + spin_lock(&blade->bs_lock); + + ctxnum = next_ctxnum(blade->bs_lru_ctxnum); + gru = blade->bs_lru_gru; + if (ctxnum == 0) + gru = next_gru(blade, gru); + ctxnum0 = ctxnum; + gru0 = gru; + while (1) { + if (check_gru_resources(gru, cbr, dsr, GRU_NUM_CCH)) + break; + spin_lock(&gru->gs_lock); + for (; ctxnum < GRU_NUM_CCH; ctxnum++) { + if (flag && gru == gru0 && ctxnum == ctxnum0) + break; + ngts = gru->gs_gts[ctxnum]; + /* + * We are grabbing locks out of order, so trylock is + * needed. GTSs are usually not locked, so the odds of + * success are high. If trylock fails, try to steal a + * different GSEG. + */ + if (ngts && mutex_trylock(&ngts->ts_ctxlock)) + break; + ngts = NULL; + flag = 1; + } + spin_unlock(&gru->gs_lock); + if (ngts || (flag && gru == gru0 && ctxnum == ctxnum0)) + break; + ctxnum = 0; + gru = next_gru(blade, gru); + } + blade->bs_lru_gru = gru; + blade->bs_lru_ctxnum = ctxnum; + spin_unlock(&blade->bs_lock); + preempt_enable(); + + if (ngts) { + STAT(steal_context); + ngts->ts_steal_jiffies = jiffies; + gru_unload_context(ngts, 1); + mutex_unlock(&ngts->ts_ctxlock); + } else { + STAT(steal_context_failed); + } + gru_dbg(grudev, + "stole gru %x, ctxnum %d from gts %p. Need cb %d, ds %d;" + " avail cb %ld, ds %ld\n", + gru->gs_gid, ctxnum, ngts, cbr, dsr, hweight64(gru->gs_cbr_map), + hweight64(gru->gs_dsr_map)); +} + +/* + * Scan the GRUs on the local blade & assign a GRU context. + */ +static struct gru_state *gru_assign_gru_context(struct gru_thread_state *gts) +{ + struct gru_state *gru, *grux; + int i, max_active_contexts; + + preempt_disable(); + +again: + gru = NULL; + max_active_contexts = GRU_NUM_CCH; + for_each_gru_on_blade(grux, uv_numa_blade_id(), i) { + if (check_gru_resources(grux, gts->ts_cbr_au_count, + gts->ts_dsr_au_count, + max_active_contexts)) { + gru = grux; + max_active_contexts = grux->gs_active_contexts; + if (max_active_contexts == 0) + break; + } + } + + if (gru) { + spin_lock(&gru->gs_lock); + if (!check_gru_resources(gru, gts->ts_cbr_au_count, + gts->ts_dsr_au_count, GRU_NUM_CCH)) { + spin_unlock(&gru->gs_lock); + goto again; + } + reserve_gru_resources(gru, gts); + gts->ts_gru = gru; + gts->ts_ctxnum = + find_first_zero_bit(&gru->gs_context_map, GRU_NUM_CCH); + BUG_ON(gts->ts_ctxnum == GRU_NUM_CCH); + atomic_inc(>s->ts_refcnt); + gru->gs_gts[gts->ts_ctxnum] = gts; + __set_bit(gts->ts_ctxnum, &gru->gs_context_map); + spin_unlock(&gru->gs_lock); + + STAT(assign_context); + gru_dbg(grudev, + "gseg %p, gts %p, gru %x, ctx %d, cbr %d, dsr %d\n", + gseg_virtual_address(gts->ts_gru, gts->ts_ctxnum), gts, + gts->ts_gru->gs_gid, gts->ts_ctxnum, + gts->ts_cbr_au_count, gts->ts_dsr_au_count); + } else { + gru_dbg(grudev, "failed to allocate a GTS %s\n", ""); + STAT(assign_context_failed); + } + + preempt_enable(); + return gru; +} + +/* + * gru_nopage + * + * Map the user's GRU segment + * + * Note: gru segments alway mmaped on GRU_GSEG_PAGESIZE boundaries. + */ +int gru_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + struct gru_thread_state *gts; + unsigned long paddr, vaddr; + + vaddr = (unsigned long)vmf->virtual_address; + gru_dbg(grudev, "vma %p, vaddr 0x%lx (0x%lx)\n", + vma, vaddr, GSEG_BASE(vaddr)); + STAT(nopfn); + + /* The following check ensures vaddr is a valid address in the VMA */ + gts = gru_find_thread_state(vma, TSID(vaddr, vma)); + if (!gts) + return VM_FAULT_SIGBUS; + +again: + preempt_disable(); + mutex_lock(>s->ts_ctxlock); + if (gts->ts_gru) { + if (gts->ts_gru->gs_blade_id != uv_numa_blade_id()) { + STAT(migrated_nopfn_unload); + gru_unload_context(gts, 1); + } else { + if (gru_retarget_intr(gts)) + STAT(migrated_nopfn_retarget); + } + } + + if (!gts->ts_gru) { + if (!gru_assign_gru_context(gts)) { + mutex_unlock(>s->ts_ctxlock); + preempt_enable(); + schedule_timeout(GRU_ASSIGN_DELAY); /* true hack ZZZ */ + if (gts->ts_steal_jiffies + GRU_STEAL_DELAY < jiffies) + gru_steal_context(gts); + goto again; + } + gru_load_context(gts); + paddr = gseg_physical_address(gts->ts_gru, gts->ts_ctxnum); + remap_pfn_range(vma, vaddr & ~(GRU_GSEG_PAGESIZE - 1), + paddr >> PAGE_SHIFT, GRU_GSEG_PAGESIZE, + vma->vm_page_prot); + } + + mutex_unlock(>s->ts_ctxlock); + preempt_enable(); + + return VM_FAULT_NOPAGE; +} + diff --git a/drivers/misc/sgi-gru/gruprocfs.c b/drivers/misc/sgi-gru/gruprocfs.c new file mode 100644 index 00000000000..533923f83f1 --- /dev/null +++ b/drivers/misc/sgi-gru/gruprocfs.c @@ -0,0 +1,336 @@ +/* + * SN Platform GRU Driver + * + * PROC INTERFACES + * + * This file supports the /proc interfaces for the GRU driver + * + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/proc_fs.h> +#include <linux/device.h> +#include <linux/seq_file.h> +#include <linux/uaccess.h> +#include "gru.h" +#include "grulib.h" +#include "grutables.h" + +#define printstat(s, f) printstat_val(s, &gru_stats.f, #f) + +static void printstat_val(struct seq_file *s, atomic_long_t *v, char *id) +{ + unsigned long val = atomic_long_read(v); + + if (val) + seq_printf(s, "%16lu %s\n", val, id); +} + +static int statistics_show(struct seq_file *s, void *p) +{ + printstat(s, vdata_alloc); + printstat(s, vdata_free); + printstat(s, gts_alloc); + printstat(s, gts_free); + printstat(s, vdata_double_alloc); + printstat(s, gts_double_allocate); + printstat(s, assign_context); + printstat(s, assign_context_failed); + printstat(s, free_context); + printstat(s, load_context); + printstat(s, unload_context); + printstat(s, steal_context); + printstat(s, steal_context_failed); + printstat(s, nopfn); + printstat(s, break_cow); + printstat(s, asid_new); + printstat(s, asid_next); + printstat(s, asid_wrap); + printstat(s, asid_reuse); + printstat(s, intr); + printstat(s, call_os); + printstat(s, call_os_check_for_bug); + printstat(s, call_os_wait_queue); + printstat(s, user_flush_tlb); + printstat(s, user_unload_context); + printstat(s, user_exception); + printstat(s, set_task_slice); + printstat(s, migrate_check); + printstat(s, migrated_retarget); + printstat(s, migrated_unload); + printstat(s, migrated_unload_delay); + printstat(s, migrated_nopfn_retarget); + printstat(s, migrated_nopfn_unload); + printstat(s, tlb_dropin); + printstat(s, tlb_dropin_fail_no_asid); + printstat(s, tlb_dropin_fail_upm); + printstat(s, tlb_dropin_fail_invalid); + printstat(s, tlb_dropin_fail_range_active); + printstat(s, tlb_dropin_fail_idle); + printstat(s, tlb_dropin_fail_fmm); + printstat(s, mmu_invalidate_range); + printstat(s, mmu_invalidate_page); + printstat(s, mmu_clear_flush_young); + printstat(s, flush_tlb); + printstat(s, flush_tlb_gru); + printstat(s, flush_tlb_gru_tgh); + printstat(s, flush_tlb_gru_zero_asid); + printstat(s, copy_gpa); + printstat(s, mesq_receive); + printstat(s, mesq_receive_none); + printstat(s, mesq_send); + printstat(s, mesq_send_failed); + printstat(s, mesq_noop); + printstat(s, mesq_send_unexpected_error); + printstat(s, mesq_send_lb_overflow); + printstat(s, mesq_send_qlimit_reached); + printstat(s, mesq_send_amo_nacked); + printstat(s, mesq_send_put_nacked); + printstat(s, mesq_qf_not_full); + printstat(s, mesq_qf_locked); + printstat(s, mesq_qf_noop_not_full); + printstat(s, mesq_qf_switch_head_failed); + printstat(s, mesq_qf_unexpected_error); + printstat(s, mesq_noop_unexpected_error); + printstat(s, mesq_noop_lb_overflow); + printstat(s, mesq_noop_qlimit_reached); + printstat(s, mesq_noop_amo_nacked); + printstat(s, mesq_noop_put_nacked); + return 0; +} + +static ssize_t statistics_write(struct file *file, const char __user *userbuf, + size_t count, loff_t *data) +{ + memset(&gru_stats, 0, sizeof(gru_stats)); + return count; +} + +static int options_show(struct seq_file *s, void *p) +{ + seq_printf(s, "0x%lx\n", gru_options); + return 0; +} + +static ssize_t options_write(struct file *file, const char __user *userbuf, + size_t count, loff_t *data) +{ + unsigned long val; + char buf[80]; + + if (copy_from_user + (buf, userbuf, count < sizeof(buf) ? count : sizeof(buf))) + return -EFAULT; + if (!strict_strtoul(buf, 10, &val)) + gru_options = val; + + return count; +} + +static int cch_seq_show(struct seq_file *file, void *data) +{ + long gid = *(long *)data; + int i; + struct gru_state *gru = GID_TO_GRU(gid); + struct gru_thread_state *ts; + const char *mode[] = { "??", "UPM", "INTR", "OS_POLL" }; + + if (gid == 0) + seq_printf(file, "#%5s%5s%6s%9s%6s%8s%8s\n", "gid", "bid", + "ctx#", "pid", "cbrs", "dsbytes", "mode"); + if (gru) + for (i = 0; i < GRU_NUM_CCH; i++) { + ts = gru->gs_gts[i]; + if (!ts) + continue; + seq_printf(file, " %5d%5d%6d%9d%6d%8d%8s\n", + gru->gs_gid, gru->gs_blade_id, i, + ts->ts_tgid_owner, + ts->ts_cbr_au_count * GRU_CBR_AU_SIZE, + ts->ts_cbr_au_count * GRU_DSR_AU_BYTES, + mode[ts->ts_user_options & + GRU_OPT_MISS_MASK]); + } + + return 0; +} + +static int gru_seq_show(struct seq_file *file, void *data) +{ + long gid = *(long *)data, ctxfree, cbrfree, dsrfree; + struct gru_state *gru = GID_TO_GRU(gid); + + if (gid == 0) { + seq_printf(file, "#%5s%5s%7s%6s%6s%8s%6s%6s\n", "gid", "nid", + "ctx", "cbr", "dsr", "ctx", "cbr", "dsr"); + seq_printf(file, "#%5s%5s%7s%6s%6s%8s%6s%6s\n", "", "", "busy", + "busy", "busy", "free", "free", "free"); + } + if (gru) { + ctxfree = GRU_NUM_CCH - gru->gs_active_contexts; + cbrfree = hweight64(gru->gs_cbr_map) * GRU_CBR_AU_SIZE; + dsrfree = hweight64(gru->gs_dsr_map) * GRU_DSR_AU_BYTES; + seq_printf(file, " %5d%5d%7ld%6ld%6ld%8ld%6ld%6ld\n", + gru->gs_gid, gru->gs_blade_id, GRU_NUM_CCH - ctxfree, + GRU_NUM_CBE - cbrfree, GRU_NUM_DSR_BYTES - dsrfree, + ctxfree, cbrfree, dsrfree); + } + + return 0; +} + +static void seq_stop(struct seq_file *file, void *data) +{ +} + +static void *seq_start(struct seq_file *file, loff_t *gid) +{ + if (*gid < GRU_MAX_GRUS) + return gid; + return NULL; +} + +static void *seq_next(struct seq_file *file, void *data, loff_t *gid) +{ + (*gid)++; + if (*gid < GRU_MAX_GRUS) + return gid; + return NULL; +} + +static const struct seq_operations cch_seq_ops = { + .start = seq_start, + .next = seq_next, + .stop = seq_stop, + .show = cch_seq_show +}; + +static const struct seq_operations gru_seq_ops = { + .start = seq_start, + .next = seq_next, + .stop = seq_stop, + .show = gru_seq_show +}; + +static int statistics_open(struct inode *inode, struct file *file) +{ + return single_open(file, statistics_show, NULL); +} + +static int options_open(struct inode *inode, struct file *file) +{ + return single_open(file, options_show, NULL); +} + +static int cch_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &cch_seq_ops); +} + +static int gru_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &gru_seq_ops); +} + +/* *INDENT-OFF* */ +static const struct file_operations statistics_fops = { + .open = statistics_open, + .read = seq_read, + .write = statistics_write, + .llseek = seq_lseek, + .release = single_release, +}; + +static const struct file_operations options_fops = { + .open = options_open, + .read = seq_read, + .write = options_write, + .llseek = seq_lseek, + .release = single_release, +}; + +static const struct file_operations cch_fops = { + .open = cch_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; +static const struct file_operations gru_fops = { + .open = gru_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +static struct proc_entry { + char *name; + int mode; + const struct file_operations *fops; + struct proc_dir_entry *entry; +} proc_files[] = { + {"statistics", 0644, &statistics_fops}, + {"debug_options", 0644, &options_fops}, + {"cch_status", 0444, &cch_fops}, + {"gru_status", 0444, &gru_fops}, + {NULL} +}; +/* *INDENT-ON* */ + +static struct proc_dir_entry *proc_gru __read_mostly; + +static int create_proc_file(struct proc_entry *p) +{ + p->entry = create_proc_entry(p->name, p->mode, proc_gru); + if (!p->entry) + return -1; + p->entry->proc_fops = p->fops; + return 0; +} + +static void delete_proc_files(void) +{ + struct proc_entry *p; + + if (proc_gru) { + for (p = proc_files; p->name; p++) + if (p->entry) + remove_proc_entry(p->name, proc_gru); + remove_proc_entry("gru", NULL); + } +} + +int gru_proc_init(void) +{ + struct proc_entry *p; + + proc_mkdir("sgi_uv", NULL); + proc_gru = proc_mkdir("sgi_uv/gru", NULL); + + for (p = proc_files; p->name; p++) + if (create_proc_file(p)) + goto err; + return 0; + +err: + delete_proc_files(); + return -1; +} + +void gru_proc_exit(void) +{ + delete_proc_files(); +} diff --git a/drivers/misc/sgi-gru/grutables.h b/drivers/misc/sgi-gru/grutables.h new file mode 100644 index 00000000000..4251018f70f --- /dev/null +++ b/drivers/misc/sgi-gru/grutables.h @@ -0,0 +1,609 @@ +/* + * SN Platform GRU Driver + * + * GRU DRIVER TABLES, MACROS, externs, etc + * + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef __GRUTABLES_H__ +#define __GRUTABLES_H__ + +/* + * GRU Chiplet: + * The GRU is a user addressible memory accelerator. It provides + * several forms of load, store, memset, bcopy instructions. In addition, it + * contains special instructions for AMOs, sending messages to message + * queues, etc. + * + * The GRU is an integral part of the node controller. It connects + * directly to the cpu socket. In its current implementation, there are 2 + * GRU chiplets in the node controller on each blade (~node). + * + * The entire GRU memory space is fully coherent and cacheable by the cpus. + * + * Each GRU chiplet has a physical memory map that looks like the following: + * + * +-----------------+ + * |/////////////////| + * |/////////////////| + * |/////////////////| + * |/////////////////| + * |/////////////////| + * |/////////////////| + * |/////////////////| + * |/////////////////| + * +-----------------+ + * | system control | + * +-----------------+ _______ +-------------+ + * |/////////////////| / | | + * |/////////////////| / | | + * |/////////////////| / | instructions| + * |/////////////////| / | | + * |/////////////////| / | | + * |/////////////////| / |-------------| + * |/////////////////| / | | + * +-----------------+ | | + * | context 15 | | data | + * +-----------------+ | | + * | ...... | \ | | + * +-----------------+ \____________ +-------------+ + * | context 1 | + * +-----------------+ + * | context 0 | + * +-----------------+ + * + * Each of the "contexts" is a chunk of memory that can be mmaped into user + * space. The context consists of 2 parts: + * + * - an instruction space that can be directly accessed by the user + * to issue GRU instructions and to check instruction status. + * + * - a data area that acts as normal RAM. + * + * User instructions contain virtual addresses of data to be accessed by the + * GRU. The GRU contains a TLB that is used to convert these user virtual + * addresses to physical addresses. + * + * The "system control" area of the GRU chiplet is used by the kernel driver + * to manage user contexts and to perform functions such as TLB dropin and + * purging. + * + * One context may be reserved for the kernel and used for cross-partition + * communication. The GRU will also be used to asynchronously zero out + * large blocks of memory (not currently implemented). + * + * + * Tables: + * + * VDATA-VMA Data - Holds a few parameters. Head of linked list of + * GTS tables for threads using the GSEG + * GTS - Gru Thread State - contains info for managing a GSEG context. A + * GTS is allocated for each thread accessing a + * GSEG. + * GTD - GRU Thread Data - contains shadow copy of GRU data when GSEG is + * not loaded into a GRU + * GMS - GRU Memory Struct - Used to manage TLB shootdowns. Tracks GRUs + * where a GSEG has been loaded. Similar to + * an mm_struct but for GRU. + * + * GS - GRU State - Used to manage the state of a GRU chiplet + * BS - Blade State - Used to manage state of all GRU chiplets + * on a blade + * + * + * Normal task tables for task using GRU. + * - 2 threads in process + * - 2 GSEGs open in process + * - GSEG1 is being used by both threads + * - GSEG2 is used only by thread 2 + * + * task -->| + * task ---+---> mm ->------ (notifier) -------+-> gms + * | | + * |--> vma -> vdata ---> gts--->| GSEG1 (thread1) + * | | | + * | +-> gts--->| GSEG1 (thread2) + * | | + * |--> vma -> vdata ---> gts--->| GSEG2 (thread2) + * . + * . + * + * GSEGs are marked DONTCOPY on fork + * + * At open + * file.private_data -> NULL + * + * At mmap, + * vma -> vdata + * + * After gseg reference + * vma -> vdata ->gts + * + * After fork + * parent + * vma -> vdata -> gts + * child + * (vma is not copied) + * + */ + +#include <linux/rmap.h> +#include <linux/interrupt.h> +#include <linux/mutex.h> +#include <linux/wait.h> +#include <linux/mmu_notifier.h> +#include "gru.h" +#include "gruhandles.h" + +extern struct gru_stats_s gru_stats; +extern struct gru_blade_state *gru_base[]; +extern unsigned long gru_start_paddr, gru_end_paddr; + +#define GRU_MAX_BLADES MAX_NUMNODES +#define GRU_MAX_GRUS (GRU_MAX_BLADES * GRU_CHIPLETS_PER_BLADE) + +#define GRU_DRIVER_ID_STR "SGI GRU Device Driver" +#define GRU_DRIVER_VERSION_STR "0.80" + +/* + * GRU statistics. + */ +struct gru_stats_s { + atomic_long_t vdata_alloc; + atomic_long_t vdata_free; + atomic_long_t gts_alloc; + atomic_long_t gts_free; + atomic_long_t vdata_double_alloc; + atomic_long_t gts_double_allocate; + atomic_long_t assign_context; + atomic_long_t assign_context_failed; + atomic_long_t free_context; + atomic_long_t load_context; + atomic_long_t unload_context; + atomic_long_t steal_context; + atomic_long_t steal_context_failed; + atomic_long_t nopfn; + atomic_long_t break_cow; + atomic_long_t asid_new; + atomic_long_t asid_next; + atomic_long_t asid_wrap; + atomic_long_t asid_reuse; + atomic_long_t intr; + atomic_long_t call_os; + atomic_long_t call_os_check_for_bug; + atomic_long_t call_os_wait_queue; + atomic_long_t user_flush_tlb; + atomic_long_t user_unload_context; + atomic_long_t user_exception; + atomic_long_t set_task_slice; + atomic_long_t migrate_check; + atomic_long_t migrated_retarget; + atomic_long_t migrated_unload; + atomic_long_t migrated_unload_delay; + atomic_long_t migrated_nopfn_retarget; + atomic_long_t migrated_nopfn_unload; + atomic_long_t tlb_dropin; + atomic_long_t tlb_dropin_fail_no_asid; + atomic_long_t tlb_dropin_fail_upm; + atomic_long_t tlb_dropin_fail_invalid; + atomic_long_t tlb_dropin_fail_range_active; + atomic_long_t tlb_dropin_fail_idle; + atomic_long_t tlb_dropin_fail_fmm; + atomic_long_t mmu_invalidate_range; + atomic_long_t mmu_invalidate_page; + atomic_long_t mmu_clear_flush_young; + atomic_long_t flush_tlb; + atomic_long_t flush_tlb_gru; + atomic_long_t flush_tlb_gru_tgh; + atomic_long_t flush_tlb_gru_zero_asid; + + atomic_long_t copy_gpa; + + atomic_long_t mesq_receive; + atomic_long_t mesq_receive_none; + atomic_long_t mesq_send; + atomic_long_t mesq_send_failed; + atomic_long_t mesq_noop; + atomic_long_t mesq_send_unexpected_error; + atomic_long_t mesq_send_lb_overflow; + atomic_long_t mesq_send_qlimit_reached; + atomic_long_t mesq_send_amo_nacked; + atomic_long_t mesq_send_put_nacked; + atomic_long_t mesq_qf_not_full; + atomic_long_t mesq_qf_locked; + atomic_long_t mesq_qf_noop_not_full; + atomic_long_t mesq_qf_switch_head_failed; + atomic_long_t mesq_qf_unexpected_error; + atomic_long_t mesq_noop_unexpected_error; + atomic_long_t mesq_noop_lb_overflow; + atomic_long_t mesq_noop_qlimit_reached; + atomic_long_t mesq_noop_amo_nacked; + atomic_long_t mesq_noop_put_nacked; + +}; + +#define OPT_DPRINT 1 +#define OPT_STATS 2 +#define GRU_QUICKLOOK 4 + + +#define IRQ_GRU 110 /* Starting IRQ number for interrupts */ + +/* Delay in jiffies between attempts to assign a GRU context */ +#define GRU_ASSIGN_DELAY ((HZ * 20) / 1000) + +/* + * If a process has it's context stolen, min delay in jiffies before trying to + * steal a context from another process. + */ +#define GRU_STEAL_DELAY ((HZ * 200) / 1000) + +#define STAT(id) do { \ + if (gru_options & OPT_STATS) \ + atomic_long_inc(&gru_stats.id); \ + } while (0) + +#ifdef CONFIG_SGI_GRU_DEBUG +#define gru_dbg(dev, fmt, x...) \ + do { \ + if (gru_options & OPT_DPRINT) \ + dev_dbg(dev, "%s: " fmt, __func__, x); \ + } while (0) +#else +#define gru_dbg(x...) +#endif + +/*----------------------------------------------------------------------------- + * ASID management + */ +#define MAX_ASID 0xfffff0 +#define MIN_ASID 8 +#define ASID_INC 8 /* number of regions */ + +/* Generate a GRU asid value from a GRU base asid & a virtual address. */ +#if defined CONFIG_IA64 +#define VADDR_HI_BIT 64 +#define GRUREGION(addr) ((addr) >> (VADDR_HI_BIT - 3) & 3) +#elif defined __x86_64 +#define VADDR_HI_BIT 48 +#define GRUREGION(addr) (0) /* ZZZ could do better */ +#else +#error "Unsupported architecture" +#endif +#define GRUASID(asid, addr) ((asid) + GRUREGION(addr)) + +/*------------------------------------------------------------------------------ + * File & VMS Tables + */ + +struct gru_state; + +/* + * This structure is pointed to from the mmstruct via the notifier pointer. + * There is one of these per address space. + */ +struct gru_mm_tracker { + unsigned int mt_asid_gen; /* ASID wrap count */ + int mt_asid; /* current base ASID for gru */ + unsigned short mt_ctxbitmap; /* bitmap of contexts using + asid */ +}; + +struct gru_mm_struct { + struct mmu_notifier ms_notifier; + atomic_t ms_refcnt; + spinlock_t ms_asid_lock; /* protects ASID assignment */ + atomic_t ms_range_active;/* num range_invals active */ + char ms_released; + wait_queue_head_t ms_wait_queue; + DECLARE_BITMAP(ms_asidmap, GRU_MAX_GRUS); + struct gru_mm_tracker ms_asids[GRU_MAX_GRUS]; +}; + +/* + * One of these structures is allocated when a GSEG is mmaped. The + * structure is pointed to by the vma->vm_private_data field in the vma struct. + */ +struct gru_vma_data { + spinlock_t vd_lock; /* Serialize access to vma */ + struct list_head vd_head; /* head of linked list of gts */ + long vd_user_options;/* misc user option flags */ + int vd_cbr_au_count; + int vd_dsr_au_count; +}; + +/* + * One of these is allocated for each thread accessing a mmaped GRU. A linked + * list of these structure is hung off the struct gru_vma_data in the mm_struct. + */ +struct gru_thread_state { + struct list_head ts_next; /* list - head at vma-private */ + struct mutex ts_ctxlock; /* load/unload CTX lock */ + struct mm_struct *ts_mm; /* mm currently mapped to + context */ + struct vm_area_struct *ts_vma; /* vma of GRU context */ + struct gru_state *ts_gru; /* GRU where the context is + loaded */ + struct gru_mm_struct *ts_gms; /* asid & ioproc struct */ + unsigned long ts_cbr_map; /* map of allocated CBRs */ + unsigned long ts_dsr_map; /* map of allocated DATA + resources */ + unsigned long ts_steal_jiffies;/* jiffies when context last + stolen */ + long ts_user_options;/* misc user option flags */ + pid_t ts_tgid_owner; /* task that is using the + context - for migration */ + int ts_tsid; /* thread that owns the + structure */ + int ts_tlb_int_select;/* target cpu if interrupts + enabled */ + int ts_ctxnum; /* context number where the + context is loaded */ + atomic_t ts_refcnt; /* reference count GTS */ + unsigned char ts_dsr_au_count;/* Number of DSR resources + required for contest */ + unsigned char ts_cbr_au_count;/* Number of CBR resources + required for contest */ + char ts_force_unload;/* force context to be unloaded + after migration */ + char ts_cbr_idx[GRU_CBR_AU];/* CBR numbers of each + allocated CB */ + unsigned long ts_gdata[0]; /* save area for GRU data (CB, + DS, CBE) */ +}; + +/* + * Threaded programs actually allocate an array of GSEGs when a context is + * created. Each thread uses a separate GSEG. TSID is the index into the GSEG + * array. + */ +#define TSID(a, v) (((a) - (v)->vm_start) / GRU_GSEG_PAGESIZE) +#define UGRUADDR(gts) ((gts)->ts_vma->vm_start + \ + (gts)->ts_tsid * GRU_GSEG_PAGESIZE) + +#define NULLCTX (-1) /* if context not loaded into GRU */ + +/*----------------------------------------------------------------------------- + * GRU State Tables + */ + +/* + * One of these exists for each GRU chiplet. + */ +struct gru_state { + struct gru_blade_state *gs_blade; /* GRU state for entire + blade */ + unsigned long gs_gru_base_paddr; /* Physical address of + gru segments (64) */ + void *gs_gru_base_vaddr; /* Virtual address of + gru segments (64) */ + unsigned char gs_gid; /* unique GRU number */ + unsigned char gs_tgh_local_shift; /* used to pick TGH for + local flush */ + unsigned char gs_tgh_first_remote; /* starting TGH# for + remote flush */ + unsigned short gs_blade_id; /* blade of GRU */ + spinlock_t gs_asid_lock; /* lock used for + assigning asids */ + spinlock_t gs_lock; /* lock used for + assigning contexts */ + + /* -- the following are protected by the gs_asid_lock spinlock ---- */ + unsigned int gs_asid; /* Next availe ASID */ + unsigned int gs_asid_limit; /* Limit of available + ASIDs */ + unsigned int gs_asid_gen; /* asid generation. + Inc on wrap */ + + /* --- the following fields are protected by the gs_lock spinlock --- */ + unsigned long gs_context_map; /* bitmap to manage + contexts in use */ + unsigned long gs_cbr_map; /* bitmap to manage CB + resources */ + unsigned long gs_dsr_map; /* bitmap used to manage + DATA resources */ + unsigned int gs_reserved_cbrs; /* Number of kernel- + reserved cbrs */ + unsigned int gs_reserved_dsr_bytes; /* Bytes of kernel- + reserved dsrs */ + unsigned short gs_active_contexts; /* number of contexts + in use */ + struct gru_thread_state *gs_gts[GRU_NUM_CCH]; /* GTS currently using + the context */ +}; + +/* + * This structure contains the GRU state for all the GRUs on a blade. + */ +struct gru_blade_state { + void *kernel_cb; /* First kernel + reserved cb */ + void *kernel_dsr; /* First kernel + reserved DSR */ + /* ---- the following are protected by the bs_lock spinlock ---- */ + spinlock_t bs_lock; /* lock used for + stealing contexts */ + int bs_lru_ctxnum; /* STEAL - last context + stolen */ + struct gru_state *bs_lru_gru; /* STEAL - last gru + stolen */ + + struct gru_state bs_grus[GRU_CHIPLETS_PER_BLADE]; +}; + +/*----------------------------------------------------------------------------- + * Address Primitives + */ +#define get_tfm_for_cpu(g, c) \ + ((struct gru_tlb_fault_map *)get_tfm((g)->gs_gru_base_vaddr, (c))) +#define get_tfh_by_index(g, i) \ + ((struct gru_tlb_fault_handle *)get_tfh((g)->gs_gru_base_vaddr, (i))) +#define get_tgh_by_index(g, i) \ + ((struct gru_tlb_global_handle *)get_tgh((g)->gs_gru_base_vaddr, (i))) +#define get_cbe_by_index(g, i) \ + ((struct gru_control_block_extended *)get_cbe((g)->gs_gru_base_vaddr,\ + (i))) + +/*----------------------------------------------------------------------------- + * Useful Macros + */ + +/* Given a blade# & chiplet#, get a pointer to the GRU */ +#define get_gru(b, c) (&gru_base[b]->bs_grus[c]) + +/* Number of bytes to save/restore when unloading/loading GRU contexts */ +#define DSR_BYTES(dsr) ((dsr) * GRU_DSR_AU_BYTES) +#define CBR_BYTES(cbr) ((cbr) * GRU_HANDLE_BYTES * GRU_CBR_AU_SIZE * 2) + +/* Convert a user CB number to the actual CBRNUM */ +#define thread_cbr_number(gts, n) ((gts)->ts_cbr_idx[(n) / GRU_CBR_AU_SIZE] \ + * GRU_CBR_AU_SIZE + (n) % GRU_CBR_AU_SIZE) + +/* Convert a gid to a pointer to the GRU */ +#define GID_TO_GRU(gid) \ + (gru_base[(gid) / GRU_CHIPLETS_PER_BLADE] ? \ + (&gru_base[(gid) / GRU_CHIPLETS_PER_BLADE]-> \ + bs_grus[(gid) % GRU_CHIPLETS_PER_BLADE]) : \ + NULL) + +/* Scan all active GRUs in a GRU bitmap */ +#define for_each_gru_in_bitmap(gid, map) \ + for ((gid) = find_first_bit((map), GRU_MAX_GRUS); (gid) < GRU_MAX_GRUS;\ + (gid)++, (gid) = find_next_bit((map), GRU_MAX_GRUS, (gid))) + +/* Scan all active GRUs on a specific blade */ +#define for_each_gru_on_blade(gru, nid, i) \ + for ((gru) = gru_base[nid]->bs_grus, (i) = 0; \ + (i) < GRU_CHIPLETS_PER_BLADE; \ + (i)++, (gru)++) + +/* Scan all active GTSs on a gru. Note: must hold ss_lock to use this macro. */ +#define for_each_gts_on_gru(gts, gru, ctxnum) \ + for ((ctxnum) = 0; (ctxnum) < GRU_NUM_CCH; (ctxnum)++) \ + if (((gts) = (gru)->gs_gts[ctxnum])) + +/* Scan each CBR whose bit is set in a TFM (or copy of) */ +#define for_each_cbr_in_tfm(i, map) \ + for ((i) = find_first_bit(map, GRU_NUM_CBE); \ + (i) < GRU_NUM_CBE; \ + (i)++, (i) = find_next_bit(map, GRU_NUM_CBE, i)) + +/* Scan each CBR in a CBR bitmap. Note: multiple CBRs in an allocation unit */ +#define for_each_cbr_in_allocation_map(i, map, k) \ + for ((k) = find_first_bit(map, GRU_CBR_AU); (k) < GRU_CBR_AU; \ + (k) = find_next_bit(map, GRU_CBR_AU, (k) + 1)) \ + for ((i) = (k)*GRU_CBR_AU_SIZE; \ + (i) < ((k) + 1) * GRU_CBR_AU_SIZE; (i)++) + +/* Scan each DSR in a DSR bitmap. Note: multiple DSRs in an allocation unit */ +#define for_each_dsr_in_allocation_map(i, map, k) \ + for ((k) = find_first_bit((const unsigned long *)map, GRU_DSR_AU);\ + (k) < GRU_DSR_AU; \ + (k) = find_next_bit((const unsigned long *)map, \ + GRU_DSR_AU, (k) + 1)) \ + for ((i) = (k) * GRU_DSR_AU_CL; \ + (i) < ((k) + 1) * GRU_DSR_AU_CL; (i)++) + +#define gseg_physical_address(gru, ctxnum) \ + ((gru)->gs_gru_base_paddr + ctxnum * GRU_GSEG_STRIDE) +#define gseg_virtual_address(gru, ctxnum) \ + ((gru)->gs_gru_base_vaddr + ctxnum * GRU_GSEG_STRIDE) + +/*----------------------------------------------------------------------------- + * Lock / Unlock GRU handles + * Use the "delresp" bit in the handle as a "lock" bit. + */ + +/* Lock hierarchy checking enabled only in emulator */ + +static inline void __lock_handle(void *h) +{ + while (test_and_set_bit(1, h)) + cpu_relax(); +} + +static inline void __unlock_handle(void *h) +{ + clear_bit(1, h); +} + +static inline void lock_cch_handle(struct gru_context_configuration_handle *cch) +{ + __lock_handle(cch); +} + +static inline void unlock_cch_handle(struct gru_context_configuration_handle + *cch) +{ + __unlock_handle(cch); +} + +static inline void lock_tgh_handle(struct gru_tlb_global_handle *tgh) +{ + __lock_handle(tgh); +} + +static inline void unlock_tgh_handle(struct gru_tlb_global_handle *tgh) +{ + __unlock_handle(tgh); +} + +/*----------------------------------------------------------------------------- + * Function prototypes & externs + */ +struct gru_unload_context_req; + +extern struct vm_operations_struct gru_vm_ops; +extern struct device *grudev; + +extern struct gru_vma_data *gru_alloc_vma_data(struct vm_area_struct *vma, + int tsid); +extern struct gru_thread_state *gru_find_thread_state(struct vm_area_struct + *vma, int tsid); +extern struct gru_thread_state *gru_alloc_thread_state(struct vm_area_struct + *vma, int tsid); +extern void gru_unload_context(struct gru_thread_state *gts, int savestate); +extern void gts_drop(struct gru_thread_state *gts); +extern void gru_tgh_flush_init(struct gru_state *gru); +extern int gru_kservices_init(struct gru_state *gru); +extern irqreturn_t gru_intr(int irq, void *dev_id); +extern int gru_handle_user_call_os(unsigned long address); +extern int gru_user_flush_tlb(unsigned long arg); +extern int gru_user_unload_context(unsigned long arg); +extern int gru_get_exception_detail(unsigned long arg); +extern int gru_set_task_slice(long address); +extern int gru_cpu_fault_map_id(void); +extern struct vm_area_struct *gru_find_vma(unsigned long vaddr); +extern void gru_flush_all_tlb(struct gru_state *gru); +extern int gru_proc_init(void); +extern void gru_proc_exit(void); + +extern unsigned long gru_reserve_cb_resources(struct gru_state *gru, + int cbr_au_count, char *cbmap); +extern unsigned long gru_reserve_ds_resources(struct gru_state *gru, + int dsr_au_count, char *dsmap); +extern int gru_fault(struct vm_area_struct *, struct vm_fault *vmf); +extern struct gru_mm_struct *gru_register_mmu_notifier(void); +extern void gru_drop_mmu_notifier(struct gru_mm_struct *gms); + +extern void gru_flush_tlb_range(struct gru_mm_struct *gms, unsigned long start, + unsigned long len); + +extern unsigned long gru_options; + +#endif /* __GRUTABLES_H__ */ diff --git a/drivers/misc/sgi-gru/grutlbpurge.c b/drivers/misc/sgi-gru/grutlbpurge.c new file mode 100644 index 00000000000..bcfd5425e2e --- /dev/null +++ b/drivers/misc/sgi-gru/grutlbpurge.c @@ -0,0 +1,372 @@ +/* + * SN Platform GRU Driver + * + * MMUOPS callbacks + TLB flushing + * + * This file handles emu notifier callbacks from the core kernel. The callbacks + * are used to update the TLB in the GRU as a result of changes in the + * state of a process address space. This file also handles TLB invalidates + * from the GRU driver. + * + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/kernel.h> +#include <linux/list.h> +#include <linux/spinlock.h> +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/device.h> +#include <linux/hugetlb.h> +#include <linux/delay.h> +#include <linux/timex.h> +#include <linux/delay.h> +#include <linux/srcu.h> +#include <asm/processor.h> +#include "gru.h" +#include "grutables.h" +#include <asm/uv/uv_hub.h> + +#define gru_random() get_cycles() + +/* ---------------------------------- TLB Invalidation functions -------- + * get_tgh_handle + * + * Find a TGH to use for issuing a TLB invalidate. For GRUs that are on the + * local blade, use a fixed TGH that is a function of the blade-local cpu + * number. Normally, this TGH is private to the cpu & no contention occurs for + * the TGH. For offblade GRUs, select a random TGH in the range above the + * private TGHs. A spinlock is required to access this TGH & the lock must be + * released when the invalidate is completes. This sucks, but it is the best we + * can do. + * + * Note that the spinlock is IN the TGH handle so locking does not involve + * additional cache lines. + * + */ +static inline int get_off_blade_tgh(struct gru_state *gru) +{ + int n; + + n = GRU_NUM_TGH - gru->gs_tgh_first_remote; + n = gru_random() % n; + n += gru->gs_tgh_first_remote; + return n; +} + +static inline int get_on_blade_tgh(struct gru_state *gru) +{ + return uv_blade_processor_id() >> gru->gs_tgh_local_shift; +} + +static struct gru_tlb_global_handle *get_lock_tgh_handle(struct gru_state + *gru) +{ + struct gru_tlb_global_handle *tgh; + int n; + + preempt_disable(); + if (uv_numa_blade_id() == gru->gs_blade_id) + n = get_on_blade_tgh(gru); + else + n = get_off_blade_tgh(gru); + tgh = get_tgh_by_index(gru, n); + lock_tgh_handle(tgh); + + return tgh; +} + +static void get_unlock_tgh_handle(struct gru_tlb_global_handle *tgh) +{ + unlock_tgh_handle(tgh); + preempt_enable(); +} + +/* + * gru_flush_tlb_range + * + * General purpose TLB invalidation function. This function scans every GRU in + * the ENTIRE system (partition) looking for GRUs where the specified MM has + * been accessed by the GRU. For each GRU found, the TLB must be invalidated OR + * the ASID invalidated. Invalidating an ASID causes a new ASID to be assigned + * on the next fault. This effectively flushes the ENTIRE TLB for the MM at the + * cost of (possibly) a large number of future TLBmisses. + * + * The current algorithm is optimized based on the following (somewhat true) + * assumptions: + * - GRU contexts are not loaded into a GRU unless a reference is made to + * the data segment or control block (this is true, not an assumption). + * If a DS/CB is referenced, the user will also issue instructions that + * cause TLBmisses. It is not necessary to optimize for the case where + * contexts are loaded but no instructions cause TLB misses. (I know + * this will happen but I'm not optimizing for it). + * - GRU instructions to invalidate TLB entries are SLOOOOWWW - normally + * a few usec but in unusual cases, it could be longer. Avoid if + * possible. + * - intrablade process migration between cpus is not frequent but is + * common. + * - a GRU context is not typically migrated to a different GRU on the + * blade because of intrablade migration + * - interblade migration is rare. Processes migrate their GRU context to + * the new blade. + * - if interblade migration occurs, migration back to the original blade + * is very very rare (ie., no optimization for this case) + * - most GRU instruction operate on a subset of the user REGIONS. Code + * & shared library regions are not likely targets of GRU instructions. + * + * To help improve the efficiency of TLB invalidation, the GMS data + * structure is maintained for EACH address space (MM struct). The GMS is + * also the structure that contains the pointer to the mmu callout + * functions. This structure is linked to the mm_struct for the address space + * using the mmu "register" function. The mmu interfaces are used to + * provide the callbacks for TLB invalidation. The GMS contains: + * + * - asid[maxgrus] array. ASIDs are assigned to a GRU when a context is + * loaded into the GRU. + * - asidmap[maxgrus]. bitmap to make it easier to find non-zero asids in + * the above array + * - ctxbitmap[maxgrus]. Indicates the contexts that are currently active + * in the GRU for the address space. This bitmap must be passed to the + * GRU to do an invalidate. + * + * The current algorithm for invalidating TLBs is: + * - scan the asidmap for GRUs where the context has been loaded, ie, + * asid is non-zero. + * - for each gru found: + * - if the ctxtmap is non-zero, there are active contexts in the + * GRU. TLB invalidate instructions must be issued to the GRU. + * - if the ctxtmap is zero, no context is active. Set the ASID to + * zero to force a full TLB invalidation. This is fast but will + * cause a lot of TLB misses if the context is reloaded onto the + * GRU + * + */ + +void gru_flush_tlb_range(struct gru_mm_struct *gms, unsigned long start, + unsigned long len) +{ + struct gru_state *gru; + struct gru_mm_tracker *asids; + struct gru_tlb_global_handle *tgh; + unsigned long num; + int grupagesize, pagesize, pageshift, gid, asid; + + /* ZZZ TODO - handle huge pages */ + pageshift = PAGE_SHIFT; + pagesize = (1UL << pageshift); + grupagesize = GRU_PAGESIZE(pageshift); + num = min(((len + pagesize - 1) >> pageshift), GRUMAXINVAL); + + STAT(flush_tlb); + gru_dbg(grudev, "gms %p, start 0x%lx, len 0x%lx, asidmap 0x%lx\n", gms, + start, len, gms->ms_asidmap[0]); + + spin_lock(&gms->ms_asid_lock); + for_each_gru_in_bitmap(gid, gms->ms_asidmap) { + STAT(flush_tlb_gru); + gru = GID_TO_GRU(gid); + asids = gms->ms_asids + gid; + asid = asids->mt_asid; + if (asids->mt_ctxbitmap && asid) { + STAT(flush_tlb_gru_tgh); + asid = GRUASID(asid, start); + gru_dbg(grudev, + " FLUSH gruid %d, asid 0x%x, num %ld, cbmap 0x%x\n", + gid, asid, num, asids->mt_ctxbitmap); + tgh = get_lock_tgh_handle(gru); + tgh_invalidate(tgh, start, 0, asid, grupagesize, 0, + num - 1, asids->mt_ctxbitmap); + get_unlock_tgh_handle(tgh); + } else { + STAT(flush_tlb_gru_zero_asid); + asids->mt_asid = 0; + __clear_bit(gru->gs_gid, gms->ms_asidmap); + gru_dbg(grudev, + " CLEARASID gruid %d, asid 0x%x, cbtmap 0x%x, asidmap 0x%lx\n", + gid, asid, asids->mt_ctxbitmap, + gms->ms_asidmap[0]); + } + } + spin_unlock(&gms->ms_asid_lock); +} + +/* + * Flush the entire TLB on a chiplet. + */ +void gru_flush_all_tlb(struct gru_state *gru) +{ + struct gru_tlb_global_handle *tgh; + + gru_dbg(grudev, "gru %p, gid %d\n", gru, gru->gs_gid); + tgh = get_lock_tgh_handle(gru); + tgh_invalidate(tgh, 0, ~0, 0, 1, 1, GRUMAXINVAL - 1, 0); + get_unlock_tgh_handle(tgh); + preempt_enable(); +} + +/* + * MMUOPS notifier callout functions + */ +static void gru_invalidate_range_start(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, unsigned long end) +{ + struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct, + ms_notifier); + + STAT(mmu_invalidate_range); + atomic_inc(&gms->ms_range_active); + gru_dbg(grudev, "gms %p, start 0x%lx, end 0x%lx, act %d\n", gms, + start, end, atomic_read(&gms->ms_range_active)); + gru_flush_tlb_range(gms, start, end - start); +} + +static void gru_invalidate_range_end(struct mmu_notifier *mn, + struct mm_struct *mm, unsigned long start, + unsigned long end) +{ + struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct, + ms_notifier); + + /* ..._and_test() provides needed barrier */ + (void)atomic_dec_and_test(&gms->ms_range_active); + + wake_up_all(&gms->ms_wait_queue); + gru_dbg(grudev, "gms %p, start 0x%lx, end 0x%lx\n", gms, start, end); +} + +static void gru_invalidate_page(struct mmu_notifier *mn, struct mm_struct *mm, + unsigned long address) +{ + struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct, + ms_notifier); + + STAT(mmu_invalidate_page); + gru_flush_tlb_range(gms, address, PAGE_SIZE); + gru_dbg(grudev, "gms %p, address 0x%lx\n", gms, address); +} + +static void gru_release(struct mmu_notifier *mn, struct mm_struct *mm) +{ + struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct, + ms_notifier); + + gms->ms_released = 1; + gru_dbg(grudev, "gms %p\n", gms); +} + + +static const struct mmu_notifier_ops gru_mmuops = { + .invalidate_page = gru_invalidate_page, + .invalidate_range_start = gru_invalidate_range_start, + .invalidate_range_end = gru_invalidate_range_end, + .release = gru_release, +}; + +/* Move this to the basic mmu_notifier file. But for now... */ +static struct mmu_notifier *mmu_find_ops(struct mm_struct *mm, + const struct mmu_notifier_ops *ops) +{ + struct mmu_notifier *mn, *gru_mn = NULL; + struct hlist_node *n; + + if (mm->mmu_notifier_mm) { + rcu_read_lock(); + hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, + hlist) + if (mn->ops == ops) { + gru_mn = mn; + break; + } + rcu_read_unlock(); + } + return gru_mn; +} + +struct gru_mm_struct *gru_register_mmu_notifier(void) +{ + struct gru_mm_struct *gms; + struct mmu_notifier *mn; + + mn = mmu_find_ops(current->mm, &gru_mmuops); + if (mn) { + gms = container_of(mn, struct gru_mm_struct, ms_notifier); + atomic_inc(&gms->ms_refcnt); + } else { + gms = kzalloc(sizeof(*gms), GFP_KERNEL); + if (gms) { + spin_lock_init(&gms->ms_asid_lock); + gms->ms_notifier.ops = &gru_mmuops; + atomic_set(&gms->ms_refcnt, 1); + init_waitqueue_head(&gms->ms_wait_queue); + __mmu_notifier_register(&gms->ms_notifier, current->mm); + } + } + gru_dbg(grudev, "gms %p, refcnt %d\n", gms, + atomic_read(&gms->ms_refcnt)); + return gms; +} + +void gru_drop_mmu_notifier(struct gru_mm_struct *gms) +{ + gru_dbg(grudev, "gms %p, refcnt %d, released %d\n", gms, + atomic_read(&gms->ms_refcnt), gms->ms_released); + if (atomic_dec_return(&gms->ms_refcnt) == 0) { + if (!gms->ms_released) + mmu_notifier_unregister(&gms->ms_notifier, current->mm); + kfree(gms); + } +} + +/* + * Setup TGH parameters. There are: + * - 24 TGH handles per GRU chiplet + * - a portion (MAX_LOCAL_TGH) of the handles are reserved for + * use by blade-local cpus + * - the rest are used by off-blade cpus. This usage is + * less frequent than blade-local usage. + * + * For now, use 16 handles for local flushes, 8 for remote flushes. If the blade + * has less tan or equal to 16 cpus, each cpu has a unique handle that it can + * use. + */ +#define MAX_LOCAL_TGH 16 + +void gru_tgh_flush_init(struct gru_state *gru) +{ + int cpus, shift = 0, n; + + cpus = uv_blade_nr_possible_cpus(gru->gs_blade_id); + + /* n = cpus rounded up to next power of 2 */ + if (cpus) { + n = 1 << fls(cpus - 1); + + /* + * shift count for converting local cpu# to TGH index + * 0 if cpus <= MAX_LOCAL_TGH, + * 1 if cpus <= 2*MAX_LOCAL_TGH, + * etc + */ + shift = max(0, fls(n - 1) - fls(MAX_LOCAL_TGH - 1)); + } + gru->gs_tgh_local_shift = shift; + + /* first starting TGH index to use for remote purges */ + gru->gs_tgh_first_remote = (cpus + (1 << shift) - 1) >> shift; + +} diff --git a/drivers/misc/sgi-xp/Makefile b/drivers/misc/sgi-xp/Makefile index b6e40a7958c..35ce2857807 100644 --- a/drivers/misc/sgi-xp/Makefile +++ b/drivers/misc/sgi-xp/Makefile @@ -3,9 +3,17 @@ # obj-$(CONFIG_SGI_XP) += xp.o -xp-y := xp_main.o xp_nofault.o +xp-y := xp_main.o +xp-$(CONFIG_IA64_SGI_SN2) += xp_sn2.o xp_nofault.o +xp-$(CONFIG_IA64_GENERIC) += xp_sn2.o xp_nofault.o xp_uv.o +xp-$(CONFIG_IA64_SGI_UV) += xp_uv.o +xp-$(CONFIG_X86_64) += xp_uv.o obj-$(CONFIG_SGI_XP) += xpc.o xpc-y := xpc_main.o xpc_channel.o xpc_partition.o +xpc-$(CONFIG_IA64_SGI_SN2) += xpc_sn2.o +xpc-$(CONFIG_IA64_GENERIC) += xpc_sn2.o xpc_uv.o +xpc-$(CONFIG_IA64_SGI_UV) += xpc_uv.o +xpc-$(CONFIG_X86_64) += xpc_uv.o obj-$(CONFIG_SGI_XP) += xpnet.o diff --git a/drivers/misc/sgi-xp/xp.h b/drivers/misc/sgi-xp/xp.h index 03a87a307e3..859a5281c61 100644 --- a/drivers/misc/sgi-xp/xp.h +++ b/drivers/misc/sgi-xp/xp.h @@ -13,11 +13,34 @@ #ifndef _DRIVERS_MISC_SGIXP_XP_H #define _DRIVERS_MISC_SGIXP_XP_H -#include <linux/cache.h> -#include <linux/hardirq.h> #include <linux/mutex.h> -#include <asm/sn/types.h> -#include <asm/sn/bte.h> + +#ifdef CONFIG_IA64 +#include <asm/system.h> +#include <asm/sn/arch.h> /* defines is_shub1() and is_shub2() */ +#define is_shub() ia64_platform_is("sn2") +#define is_uv() ia64_platform_is("uv") +#endif +#ifdef CONFIG_X86_64 +#include <asm/genapic.h> +#define is_uv() is_uv_system() +#endif + +#ifndef is_shub1 +#define is_shub1() 0 +#endif + +#ifndef is_shub2 +#define is_shub2() 0 +#endif + +#ifndef is_shub +#define is_shub() 0 +#endif + +#ifndef is_uv +#define is_uv() 0 +#endif #ifdef USE_DBUG_ON #define DBUG_ON(condition) BUG_ON(condition) @@ -26,133 +49,56 @@ #endif /* - * Define the maximum number of logically defined partitions the system - * can support. It is constrained by the maximum number of hardware - * partitionable regions. The term 'region' in this context refers to the - * minimum number of nodes that can comprise an access protection grouping. - * The access protection is in regards to memory, IPI and IOI. + * Define the maximum number of partitions the system can possibly support. + * It is based on the maximum number of hardware partitionable regions. The + * term 'region' in this context refers to the minimum number of nodes that + * can comprise an access protection grouping. The access protection is in + * regards to memory, IPI and IOI. * * The maximum number of hardware partitionable regions is equal to the * maximum number of nodes in the entire system divided by the minimum number * of nodes that comprise an access protection grouping. */ -#define XP_MAX_PARTITIONS 64 - -/* - * Define the number of u64s required to represent all the C-brick nasids - * as a bitmap. The cross-partition kernel modules deal only with - * C-brick nasids, thus the need for bitmaps which don't account for - * odd-numbered (non C-brick) nasids. - */ -#define XP_MAX_PHYSNODE_ID (MAX_NUMALINK_NODES / 2) -#define XP_NASID_MASK_BYTES ((XP_MAX_PHYSNODE_ID + 7) / 8) -#define XP_NASID_MASK_WORDS ((XP_MAX_PHYSNODE_ID + 63) / 64) - -/* - * Wrapper for bte_copy() that should it return a failure status will retry - * the bte_copy() once in the hope that the failure was due to a temporary - * aberration (i.e., the link going down temporarily). - * - * src - physical address of the source of the transfer. - * vdst - virtual address of the destination of the transfer. - * len - number of bytes to transfer from source to destination. - * mode - see bte_copy() for definition. - * notification - see bte_copy() for definition. - * - * Note: xp_bte_copy() should never be called while holding a spinlock. - */ -static inline bte_result_t -xp_bte_copy(u64 src, u64 vdst, u64 len, u64 mode, void *notification) -{ - bte_result_t ret; - u64 pdst = ia64_tpa(vdst); - - /* - * Ensure that the physically mapped memory is contiguous. - * - * We do this by ensuring that the memory is from region 7 only. - * If the need should arise to use memory from one of the other - * regions, then modify the BUG_ON() statement to ensure that the - * memory from that region is always physically contiguous. - */ - BUG_ON(REGION_NUMBER(vdst) != RGN_KERNEL); - - ret = bte_copy(src, pdst, len, mode, notification); - if ((ret != BTE_SUCCESS) && BTE_ERROR_RETRY(ret)) { - if (!in_interrupt()) - cond_resched(); - - ret = bte_copy(src, pdst, len, mode, notification); - } - - return ret; -} +#define XP_MAX_NPARTITIONS_SN2 64 +#define XP_MAX_NPARTITIONS_UV 256 /* * XPC establishes channel connections between the local partition and any * other partition that is currently up. Over these channels, kernel-level * `users' can communicate with their counterparts on the other partitions. * - * The maxinum number of channels is limited to eight. For performance reasons, - * the internal cross partition structures require sixteen bytes per channel, - * and eight allows all of this interface-shared info to fit in one cache line. - * - * XPC_NCHANNELS reflects the total number of channels currently defined. * If the need for additional channels arises, one can simply increase - * XPC_NCHANNELS accordingly. If the day should come where that number - * exceeds the MAXIMUM number of channels allowed (eight), then one will need - * to make changes to the XPC code to allow for this. + * XPC_MAX_NCHANNELS accordingly. If the day should come where that number + * exceeds the absolute MAXIMUM number of channels possible (eight), then one + * will need to make changes to the XPC code to accommodate for this. + * + * The absolute maximum number of channels possible is limited to eight for + * performance reasons on sn2 hardware. The internal cross partition structures + * require sixteen bytes per channel, and eight allows all of this + * interface-shared info to fit in one 128-byte cacheline. */ #define XPC_MEM_CHANNEL 0 /* memory channel number */ #define XPC_NET_CHANNEL 1 /* network channel number */ -#define XPC_NCHANNELS 2 /* #of defined channels */ -#define XPC_MAX_NCHANNELS 8 /* max #of channels allowed */ +#define XPC_MAX_NCHANNELS 2 /* max #of channels allowed */ -#if XPC_NCHANNELS > XPC_MAX_NCHANNELS -#error XPC_NCHANNELS exceeds MAXIMUM allowed. +#if XPC_MAX_NCHANNELS > 8 +#error XPC_MAX_NCHANNELS exceeds absolute MAXIMUM possible. #endif /* - * The format of an XPC message is as follows: - * - * +-------+--------------------------------+ - * | flags |////////////////////////////////| - * +-------+--------------------------------+ - * | message # | - * +----------------------------------------+ - * | payload (user-defined message) | - * | | - * : - * | | - * +----------------------------------------+ - * - * The size of the payload is defined by the user via xpc_connect(). A user- - * defined message resides in the payload area. - * - * The user should have no dealings with the message header, but only the - * message's payload. When a message entry is allocated (via xpc_allocate()) - * a pointer to the payload area is returned and not the actual beginning of - * the XPC message. The user then constructs a message in the payload area - * and passes that pointer as an argument on xpc_send() or xpc_send_notify(). - * - * The size of a message entry (within a message queue) must be a cacheline - * sized multiple in order to facilitate the BTE transfer of messages from one - * message queue to another. A macro, XPC_MSG_SIZE(), is provided for the user + * Define macro, XPC_MSG_SIZE(), is provided for the user * that wants to fit as many msg entries as possible in a given memory size * (e.g. a memory page). */ -struct xpc_msg { - u8 flags; /* FOR XPC INTERNAL USE ONLY */ - u8 reserved[7]; /* FOR XPC INTERNAL USE ONLY */ - s64 number; /* FOR XPC INTERNAL USE ONLY */ - - u64 payload; /* user defined portion of message */ -}; +#define XPC_MSG_MAX_SIZE 128 +#define XPC_MSG_HDR_MAX_SIZE 16 +#define XPC_MSG_PAYLOAD_MAX_SIZE (XPC_MSG_MAX_SIZE - XPC_MSG_HDR_MAX_SIZE) -#define XPC_MSG_PAYLOAD_OFFSET (u64) (&((struct xpc_msg *)0)->payload) #define XPC_MSG_SIZE(_payload_size) \ - L1_CACHE_ALIGN(XPC_MSG_PAYLOAD_OFFSET + (_payload_size)) + ALIGN(XPC_MSG_HDR_MAX_SIZE + (_payload_size), \ + is_uv() ? 64 : 128) + /* * Define the return values and values passed to user's callout functions. @@ -233,8 +179,20 @@ enum xp_retval { xpDisconnected, /* 51: channel disconnected (closed) */ xpBteCopyError, /* 52: bte_copy() returned error */ + xpSalError, /* 53: sn SAL error */ + xpRsvdPageNotSet, /* 54: the reserved page is not set up */ + xpPayloadTooBig, /* 55: payload too large for message slot */ + + xpUnsupported, /* 56: unsupported functionality or resource */ + xpNeedMoreInfo, /* 57: more info is needed by SAL */ - xpUnknownReason /* 53: unknown reason - must be last in enum */ + xpGruCopyError, /* 58: gru_copy_gru() returned error */ + xpGruSendMqError, /* 59: gru send message queue related error */ + + xpBadChannelNumber, /* 60: invalid channel number */ + xpBadMsgType, /* 60: invalid message type */ + + xpUnknownReason /* 61: unknown reason - must be last in enum */ }; /* @@ -285,6 +243,9 @@ typedef void (*xpc_channel_func) (enum xp_retval reason, short partid, * calling xpc_received(). * * All other reason codes indicate failure. + * + * NOTE: The user defined function must be callable by an interrupt handler + * and thus cannot block. */ typedef void (*xpc_notify_func) (enum xp_retval reason, short partid, int ch_number, void *key); @@ -308,23 +269,22 @@ struct xpc_registration { xpc_channel_func func; /* function to call */ void *key; /* pointer to user's key */ u16 nentries; /* #of msg entries in local msg queue */ - u16 msg_size; /* message queue's message size */ + u16 entry_size; /* message queue's message entry size */ u32 assigned_limit; /* limit on #of assigned kthreads */ u32 idle_limit; /* limit on #of idle kthreads */ } ____cacheline_aligned; #define XPC_CHANNEL_REGISTERED(_c) (xpc_registrations[_c].func != NULL) -/* the following are valid xpc_allocate() flags */ +/* the following are valid xpc_send() or xpc_send_notify() flags */ #define XPC_WAIT 0 /* wait flag */ #define XPC_NOWAIT 1 /* no wait flag */ struct xpc_interface { void (*connect) (int); void (*disconnect) (int); - enum xp_retval (*allocate) (short, int, u32, void **); - enum xp_retval (*send) (short, int, void *); - enum xp_retval (*send_notify) (short, int, void *, + enum xp_retval (*send) (short, int, u32, void *, u16); + enum xp_retval (*send_notify) (short, int, u32, void *, u16, xpc_notify_func, void *); void (*received) (short, int, void *); enum xp_retval (*partid_to_nasids) (short, void *); @@ -334,10 +294,9 @@ extern struct xpc_interface xpc_interface; extern void xpc_set_interface(void (*)(int), void (*)(int), - enum xp_retval (*)(short, int, u32, void **), - enum xp_retval (*)(short, int, void *), - enum xp_retval (*)(short, int, void *, - xpc_notify_func, void *), + enum xp_retval (*)(short, int, u32, void *, u16), + enum xp_retval (*)(short, int, u32, void *, u16, + xpc_notify_func, void *), void (*)(short, int, void *), enum xp_retval (*)(short, void *)); extern void xpc_clear_interface(void); @@ -347,22 +306,19 @@ extern enum xp_retval xpc_connect(int, xpc_channel_func, void *, u16, extern void xpc_disconnect(int); static inline enum xp_retval -xpc_allocate(short partid, int ch_number, u32 flags, void **payload) -{ - return xpc_interface.allocate(partid, ch_number, flags, payload); -} - -static inline enum xp_retval -xpc_send(short partid, int ch_number, void *payload) +xpc_send(short partid, int ch_number, u32 flags, void *payload, + u16 payload_size) { - return xpc_interface.send(partid, ch_number, payload); + return xpc_interface.send(partid, ch_number, flags, payload, + payload_size); } static inline enum xp_retval -xpc_send_notify(short partid, int ch_number, void *payload, - xpc_notify_func func, void *key) +xpc_send_notify(short partid, int ch_number, u32 flags, void *payload, + u16 payload_size, xpc_notify_func func, void *key) { - return xpc_interface.send_notify(partid, ch_number, payload, func, key); + return xpc_interface.send_notify(partid, ch_number, flags, payload, + payload_size, func, key); } static inline void @@ -377,8 +333,23 @@ xpc_partid_to_nasids(short partid, void *nasids) return xpc_interface.partid_to_nasids(partid, nasids); } +extern short xp_max_npartitions; +extern short xp_partition_id; +extern u8 xp_region_size; + +extern unsigned long (*xp_pa) (void *); +extern enum xp_retval (*xp_remote_memcpy) (unsigned long, const unsigned long, + size_t); +extern int (*xp_cpu_to_nasid) (int); + extern u64 xp_nofault_PIOR_target; extern int xp_nofault_PIOR(void *); extern int xp_error_PIOR(void); +extern struct device *xp; +extern enum xp_retval xp_init_sn2(void); +extern enum xp_retval xp_init_uv(void); +extern void xp_exit_sn2(void); +extern void xp_exit_uv(void); + #endif /* _DRIVERS_MISC_SGIXP_XP_H */ diff --git a/drivers/misc/sgi-xp/xp_main.c b/drivers/misc/sgi-xp/xp_main.c index 196480b691a..66a1d19e08a 100644 --- a/drivers/misc/sgi-xp/xp_main.c +++ b/drivers/misc/sgi-xp/xp_main.c @@ -14,29 +14,48 @@ * */ -#include <linux/kernel.h> -#include <linux/interrupt.h> #include <linux/module.h> -#include <linux/mutex.h> -#include <asm/sn/intr.h> -#include <asm/sn/sn_sal.h> +#include <linux/device.h> #include "xp.h" -/* - * The export of xp_nofault_PIOR needs to happen here since it is defined - * in drivers/misc/sgi-xp/xp_nofault.S. The target of the nofault read is - * defined here. - */ -EXPORT_SYMBOL_GPL(xp_nofault_PIOR); +/* define the XP debug device structures to be used with dev_dbg() et al */ + +struct device_driver xp_dbg_name = { + .name = "xp" +}; + +struct device xp_dbg_subname = { + .bus_id = {0}, /* set to "" */ + .driver = &xp_dbg_name +}; + +struct device *xp = &xp_dbg_subname; + +/* max #of partitions possible */ +short xp_max_npartitions; +EXPORT_SYMBOL_GPL(xp_max_npartitions); + +short xp_partition_id; +EXPORT_SYMBOL_GPL(xp_partition_id); + +u8 xp_region_size; +EXPORT_SYMBOL_GPL(xp_region_size); + +unsigned long (*xp_pa) (void *addr); +EXPORT_SYMBOL_GPL(xp_pa); + +enum xp_retval (*xp_remote_memcpy) (unsigned long dst_gpa, + const unsigned long src_gpa, size_t len); +EXPORT_SYMBOL_GPL(xp_remote_memcpy); -u64 xp_nofault_PIOR_target; -EXPORT_SYMBOL_GPL(xp_nofault_PIOR_target); +int (*xp_cpu_to_nasid) (int cpuid); +EXPORT_SYMBOL_GPL(xp_cpu_to_nasid); /* * xpc_registrations[] keeps track of xpc_connect()'s done by the kernel-level * users of XPC. */ -struct xpc_registration xpc_registrations[XPC_NCHANNELS]; +struct xpc_registration xpc_registrations[XPC_MAX_NCHANNELS]; EXPORT_SYMBOL_GPL(xpc_registrations); /* @@ -51,10 +70,9 @@ xpc_notloaded(void) struct xpc_interface xpc_interface = { (void (*)(int))xpc_notloaded, (void (*)(int))xpc_notloaded, - (enum xp_retval(*)(short, int, u32, void **))xpc_notloaded, - (enum xp_retval(*)(short, int, void *))xpc_notloaded, - (enum xp_retval(*)(short, int, void *, xpc_notify_func, void *)) - xpc_notloaded, + (enum xp_retval(*)(short, int, u32, void *, u16))xpc_notloaded, + (enum xp_retval(*)(short, int, u32, void *, u16, xpc_notify_func, + void *))xpc_notloaded, (void (*)(short, int, void *))xpc_notloaded, (enum xp_retval(*)(short, void *))xpc_notloaded }; @@ -66,16 +84,14 @@ EXPORT_SYMBOL_GPL(xpc_interface); void xpc_set_interface(void (*connect) (int), void (*disconnect) (int), - enum xp_retval (*allocate) (short, int, u32, void **), - enum xp_retval (*send) (short, int, void *), - enum xp_retval (*send_notify) (short, int, void *, + enum xp_retval (*send) (short, int, u32, void *, u16), + enum xp_retval (*send_notify) (short, int, u32, void *, u16, xpc_notify_func, void *), void (*received) (short, int, void *), enum xp_retval (*partid_to_nasids) (short, void *)) { xpc_interface.connect = connect; xpc_interface.disconnect = disconnect; - xpc_interface.allocate = allocate; xpc_interface.send = send; xpc_interface.send_notify = send_notify; xpc_interface.received = received; @@ -91,13 +107,11 @@ xpc_clear_interface(void) { xpc_interface.connect = (void (*)(int))xpc_notloaded; xpc_interface.disconnect = (void (*)(int))xpc_notloaded; - xpc_interface.allocate = (enum xp_retval(*)(short, int, u32, - void **))xpc_notloaded; - xpc_interface.send = (enum xp_retval(*)(short, int, void *)) + xpc_interface.send = (enum xp_retval(*)(short, int, u32, void *, u16)) xpc_notloaded; - xpc_interface.send_notify = (enum xp_retval(*)(short, int, void *, - xpc_notify_func, - void *))xpc_notloaded; + xpc_interface.send_notify = (enum xp_retval(*)(short, int, u32, void *, + u16, xpc_notify_func, + void *))xpc_notloaded; xpc_interface.received = (void (*)(short, int, void *)) xpc_notloaded; xpc_interface.partid_to_nasids = (enum xp_retval(*)(short, void *)) @@ -135,11 +149,14 @@ xpc_connect(int ch_number, xpc_channel_func func, void *key, u16 payload_size, { struct xpc_registration *registration; - DBUG_ON(ch_number < 0 || ch_number >= XPC_NCHANNELS); + DBUG_ON(ch_number < 0 || ch_number >= XPC_MAX_NCHANNELS); DBUG_ON(payload_size == 0 || nentries == 0); DBUG_ON(func == NULL); DBUG_ON(assigned_limit == 0 || idle_limit > assigned_limit); + if (XPC_MSG_SIZE(payload_size) > XPC_MSG_MAX_SIZE) + return xpPayloadTooBig; + registration = &xpc_registrations[ch_number]; if (mutex_lock_interruptible(®istration->mutex) != 0) @@ -152,7 +169,7 @@ xpc_connect(int ch_number, xpc_channel_func func, void *key, u16 payload_size, } /* register the channel for connection */ - registration->msg_size = XPC_MSG_SIZE(payload_size); + registration->entry_size = XPC_MSG_SIZE(payload_size); registration->nentries = nentries; registration->assigned_limit = assigned_limit; registration->idle_limit = idle_limit; @@ -185,7 +202,7 @@ xpc_disconnect(int ch_number) { struct xpc_registration *registration; - DBUG_ON(ch_number < 0 || ch_number >= XPC_NCHANNELS); + DBUG_ON(ch_number < 0 || ch_number >= XPC_MAX_NCHANNELS); registration = &xpc_registrations[ch_number]; @@ -206,7 +223,7 @@ xpc_disconnect(int ch_number) registration->func = NULL; registration->key = NULL; registration->nentries = 0; - registration->msg_size = 0; + registration->entry_size = 0; registration->assigned_limit = 0; registration->idle_limit = 0; @@ -221,39 +238,21 @@ EXPORT_SYMBOL_GPL(xpc_disconnect); int __init xp_init(void) { - int ret, ch_number; - u64 func_addr = *(u64 *)xp_nofault_PIOR; - u64 err_func_addr = *(u64 *)xp_error_PIOR; - - if (!ia64_platform_is("sn2")) - return -ENODEV; + enum xp_retval ret; + int ch_number; - /* - * Register a nofault code region which performs a cross-partition - * PIO read. If the PIO read times out, the MCA handler will consume - * the error and return to a kernel-provided instruction to indicate - * an error. This PIO read exists because it is guaranteed to timeout - * if the destination is down (AMO operations do not timeout on at - * least some CPUs on Shubs <= v1.2, which unfortunately we have to - * work around). - */ - ret = sn_register_nofault_code(func_addr, err_func_addr, err_func_addr, - 1, 1); - if (ret != 0) { - printk(KERN_ERR "XP: can't register nofault code, error=%d\n", - ret); - } - /* - * Setup the nofault PIO read target. (There is no special reason why - * SH_IPI_ACCESS was selected.) - */ - if (is_shub2()) - xp_nofault_PIOR_target = SH2_IPI_ACCESS0; + if (is_shub()) + ret = xp_init_sn2(); + else if (is_uv()) + ret = xp_init_uv(); else - xp_nofault_PIOR_target = SH1_IPI_ACCESS; + ret = xpUnsupported; + + if (ret != xpSuccess) + return -ENODEV; /* initialize the connection registration mutex */ - for (ch_number = 0; ch_number < XPC_NCHANNELS; ch_number++) + for (ch_number = 0; ch_number < XPC_MAX_NCHANNELS; ch_number++) mutex_init(&xpc_registrations[ch_number].mutex); return 0; @@ -264,12 +263,10 @@ module_init(xp_init); void __exit xp_exit(void) { - u64 func_addr = *(u64 *)xp_nofault_PIOR; - u64 err_func_addr = *(u64 *)xp_error_PIOR; - - /* unregister the PIO read nofault code region */ - (void)sn_register_nofault_code(func_addr, err_func_addr, - err_func_addr, 1, 0); + if (is_shub()) + xp_exit_sn2(); + else if (is_uv()) + xp_exit_uv(); } module_exit(xp_exit); diff --git a/drivers/misc/sgi-xp/xp_sn2.c b/drivers/misc/sgi-xp/xp_sn2.c new file mode 100644 index 00000000000..1440134caf3 --- /dev/null +++ b/drivers/misc/sgi-xp/xp_sn2.c @@ -0,0 +1,146 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + */ + +/* + * Cross Partition (XP) sn2-based functions. + * + * Architecture specific implementation of common functions. + */ + +#include <linux/module.h> +#include <linux/device.h> +#include <asm/sn/bte.h> +#include <asm/sn/sn_sal.h> +#include "xp.h" + +/* + * The export of xp_nofault_PIOR needs to happen here since it is defined + * in drivers/misc/sgi-xp/xp_nofault.S. The target of the nofault read is + * defined here. + */ +EXPORT_SYMBOL_GPL(xp_nofault_PIOR); + +u64 xp_nofault_PIOR_target; +EXPORT_SYMBOL_GPL(xp_nofault_PIOR_target); + +/* + * Register a nofault code region which performs a cross-partition PIO read. + * If the PIO read times out, the MCA handler will consume the error and + * return to a kernel-provided instruction to indicate an error. This PIO read + * exists because it is guaranteed to timeout if the destination is down + * (amo operations do not timeout on at least some CPUs on Shubs <= v1.2, + * which unfortunately we have to work around). + */ +static enum xp_retval +xp_register_nofault_code_sn2(void) +{ + int ret; + u64 func_addr; + u64 err_func_addr; + + func_addr = *(u64 *)xp_nofault_PIOR; + err_func_addr = *(u64 *)xp_error_PIOR; + ret = sn_register_nofault_code(func_addr, err_func_addr, err_func_addr, + 1, 1); + if (ret != 0) { + dev_err(xp, "can't register nofault code, error=%d\n", ret); + return xpSalError; + } + /* + * Setup the nofault PIO read target. (There is no special reason why + * SH_IPI_ACCESS was selected.) + */ + if (is_shub1()) + xp_nofault_PIOR_target = SH1_IPI_ACCESS; + else if (is_shub2()) + xp_nofault_PIOR_target = SH2_IPI_ACCESS0; + + return xpSuccess; +} + +static void +xp_unregister_nofault_code_sn2(void) +{ + u64 func_addr = *(u64 *)xp_nofault_PIOR; + u64 err_func_addr = *(u64 *)xp_error_PIOR; + + /* unregister the PIO read nofault code region */ + (void)sn_register_nofault_code(func_addr, err_func_addr, + err_func_addr, 1, 0); +} + +/* + * Convert a virtual memory address to a physical memory address. + */ +static unsigned long +xp_pa_sn2(void *addr) +{ + return __pa(addr); +} + +/* + * Wrapper for bte_copy(). + * + * dst_pa - physical address of the destination of the transfer. + * src_pa - physical address of the source of the transfer. + * len - number of bytes to transfer from source to destination. + * + * Note: xp_remote_memcpy_sn2() should never be called while holding a spinlock. + */ +static enum xp_retval +xp_remote_memcpy_sn2(unsigned long dst_pa, const unsigned long src_pa, + size_t len) +{ + bte_result_t ret; + + ret = bte_copy(src_pa, dst_pa, len, (BTE_NOTIFY | BTE_WACQUIRE), NULL); + if (ret == BTE_SUCCESS) + return xpSuccess; + + if (is_shub2()) { + dev_err(xp, "bte_copy() on shub2 failed, error=0x%x dst_pa=" + "0x%016lx src_pa=0x%016lx len=%ld\\n", ret, dst_pa, + src_pa, len); + } else { + dev_err(xp, "bte_copy() failed, error=%d dst_pa=0x%016lx " + "src_pa=0x%016lx len=%ld\\n", ret, dst_pa, src_pa, len); + } + + return xpBteCopyError; +} + +static int +xp_cpu_to_nasid_sn2(int cpuid) +{ + return cpuid_to_nasid(cpuid); +} + +enum xp_retval +xp_init_sn2(void) +{ + BUG_ON(!is_shub()); + + xp_max_npartitions = XP_MAX_NPARTITIONS_SN2; + xp_partition_id = sn_partition_id; + xp_region_size = sn_region_size; + + xp_pa = xp_pa_sn2; + xp_remote_memcpy = xp_remote_memcpy_sn2; + xp_cpu_to_nasid = xp_cpu_to_nasid_sn2; + + return xp_register_nofault_code_sn2(); +} + +void +xp_exit_sn2(void) +{ + BUG_ON(!is_shub()); + + xp_unregister_nofault_code_sn2(); +} + diff --git a/drivers/misc/sgi-xp/xp_uv.c b/drivers/misc/sgi-xp/xp_uv.c new file mode 100644 index 00000000000..d9f7ce2510b --- /dev/null +++ b/drivers/misc/sgi-xp/xp_uv.c @@ -0,0 +1,72 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + */ + +/* + * Cross Partition (XP) uv-based functions. + * + * Architecture specific implementation of common functions. + * + */ + +#include <linux/device.h> +#include <asm/uv/uv_hub.h> +#include "../sgi-gru/grukservices.h" +#include "xp.h" + +/* + * Convert a virtual memory address to a physical memory address. + */ +static unsigned long +xp_pa_uv(void *addr) +{ + return uv_gpa(addr); +} + +static enum xp_retval +xp_remote_memcpy_uv(unsigned long dst_gpa, const unsigned long src_gpa, + size_t len) +{ + int ret; + + ret = gru_copy_gpa(dst_gpa, src_gpa, len); + if (ret == 0) + return xpSuccess; + + dev_err(xp, "gru_copy_gpa() failed, dst_gpa=0x%016lx src_gpa=0x%016lx " + "len=%ld\n", dst_gpa, src_gpa, len); + return xpGruCopyError; +} + +static int +xp_cpu_to_nasid_uv(int cpuid) +{ + /* ??? Is this same as sn2 nasid in mach/part bitmaps set up by SAL? */ + return UV_PNODE_TO_NASID(uv_cpu_to_pnode(cpuid)); +} + +enum xp_retval +xp_init_uv(void) +{ + BUG_ON(!is_uv()); + + xp_max_npartitions = XP_MAX_NPARTITIONS_UV; + xp_partition_id = 0; /* !!! not correct value */ + xp_region_size = 0; /* !!! not correct value */ + + xp_pa = xp_pa_uv; + xp_remote_memcpy = xp_remote_memcpy_uv; + xp_cpu_to_nasid = xp_cpu_to_nasid_uv; + + return xpSuccess; +} + +void +xp_exit_uv(void) +{ + BUG_ON(!is_uv()); +} diff --git a/drivers/misc/sgi-xp/xpc.h b/drivers/misc/sgi-xp/xpc.h index 11ac267ed68..619208d6186 100644 --- a/drivers/misc/sgi-xp/xpc.h +++ b/drivers/misc/sgi-xp/xpc.h @@ -13,18 +13,10 @@ #ifndef _DRIVERS_MISC_SGIXP_XPC_H #define _DRIVERS_MISC_SGIXP_XPC_H -#include <linux/interrupt.h> -#include <linux/sysctl.h> -#include <linux/device.h> -#include <linux/mutex.h> +#include <linux/wait.h> #include <linux/completion.h> -#include <asm/pgtable.h> -#include <asm/processor.h> -#include <asm/sn/bte.h> -#include <asm/sn/clksupport.h> -#include <asm/sn/addrs.h> -#include <asm/sn/mspec.h> -#include <asm/sn/shub_mmr.h> +#include <linux/timer.h> +#include <linux/sched.h> #include "xp.h" /* @@ -36,23 +28,7 @@ #define XPC_VERSION_MAJOR(_v) ((_v) >> 4) #define XPC_VERSION_MINOR(_v) ((_v) & 0xf) -/* - * The next macros define word or bit representations for given - * C-brick nasid in either the SAL provided bit array representing - * nasids in the partition/machine or the AMO_t array used for - * inter-partition initiation communications. - * - * For SN2 machines, C-Bricks are alway even numbered NASIDs. As - * such, some space will be saved by insisting that nasid information - * passed from SAL always be packed for C-Bricks and the - * cross-partition interrupts use the same packing scheme. - */ -#define XPC_NASID_W_INDEX(_n) (((_n) / 64) / 2) -#define XPC_NASID_B_INDEX(_n) (((_n) / 2) & (64 - 1)) -#define XPC_NASID_IN_ARRAY(_n, _p) ((_p)[XPC_NASID_W_INDEX(_n)] & \ - (1UL << XPC_NASID_B_INDEX(_n))) -#define XPC_NASID_FROM_W_B(_w, _b) (((_w) * 64 + (_b)) * 2) - +/* define frequency of the heartbeat and frequency how often it's checked */ #define XPC_HB_DEFAULT_INTERVAL 5 /* incr HB every x secs */ #define XPC_HB_CHECK_DEFAULT_INTERVAL 20 /* check HB every x secs */ @@ -72,11 +48,11 @@ * * reserved page header * - * The first cacheline of the reserved page contains the header - * (struct xpc_rsvd_page). Before SAL initialization has completed, + * The first two 64-byte cachelines of the reserved page contain the + * header (struct xpc_rsvd_page). Before SAL initialization has completed, * SAL has set up the following fields of the reserved page header: - * SAL_signature, SAL_version, partid, and nasids_size. The other - * fields are set up by XPC. (xpc_rsvd_page points to the local + * SAL_signature, SAL_version, SAL_partid, and SAL_nasids_size. The + * other fields are set up by XPC. (xpc_rsvd_page points to the local * partition's reserved page.) * * part_nasids mask @@ -87,14 +63,16 @@ * the actual nasids in the entire machine (mach_nasids). We're only * interested in the even numbered nasids (which contain the processors * and/or memory), so we only need half as many bits to represent the - * nasids. The part_nasids mask is located starting at the first cacheline - * following the reserved page header. The mach_nasids mask follows right - * after the part_nasids mask. The size in bytes of each mask is reflected - * by the reserved page header field 'nasids_size'. (Local partition's - * mask pointers are xpc_part_nasids and xpc_mach_nasids.) + * nasids. When mapping nasid to bit in a mask (or bit to nasid) be sure + * to either divide or multiply by 2. The part_nasids mask is located + * starting at the first cacheline following the reserved page header. The + * mach_nasids mask follows right after the part_nasids mask. The size in + * bytes of each mask is reflected by the reserved page header field + * 'SAL_nasids_size'. (Local partition's mask pointers are xpc_part_nasids + * and xpc_mach_nasids.) * - * vars - * vars part + * vars (ia64-sn2 only) + * vars part (ia64-sn2 only) * * Immediately following the mach_nasids mask are the XPC variables * required by other partitions. First are those that are generic to all @@ -102,43 +80,26 @@ * which are partition specific (vars part). These are setup by XPC. * (Local partition's vars pointers are xpc_vars and xpc_vars_part.) * - * Note: Until vars_pa is set, the partition XPC code has not been initialized. + * Note: Until 'ts_jiffies' is set non-zero, the partition XPC code has not been + * initialized. */ struct xpc_rsvd_page { u64 SAL_signature; /* SAL: unique signature */ u64 SAL_version; /* SAL: version */ - u8 partid; /* SAL: partition ID */ + short SAL_partid; /* SAL: partition ID */ + short max_npartitions; /* value of XPC_MAX_PARTITIONS */ u8 version; - u8 pad1[6]; /* align to next u64 in cacheline */ - u64 vars_pa; /* physical address of struct xpc_vars */ - struct timespec stamp; /* time when reserved page was setup by XPC */ - u64 pad2[9]; /* align to last u64 in cacheline */ - u64 nasids_size; /* SAL: size of each nasid mask in bytes */ + u8 pad1[3]; /* align to next u64 in 1st 64-byte cacheline */ + union { + unsigned long vars_pa; /* phys address of struct xpc_vars */ + unsigned long activate_mq_gpa; /* gru phy addr of activate_mq */ + } sn; + unsigned long ts_jiffies; /* timestamp when rsvd pg was setup by XPC */ + u64 pad2[10]; /* align to last u64 in 2nd 64-byte cacheline */ + u64 SAL_nasids_size; /* SAL: size of each nasid mask in bytes */ }; -#define XPC_RP_VERSION _XPC_VERSION(1, 1) /* version 1.1 of the reserved page */ - -#define XPC_SUPPORTS_RP_STAMP(_version) \ - (_version >= _XPC_VERSION(1, 1)) - -/* - * compare stamps - the return value is: - * - * < 0, if stamp1 < stamp2 - * = 0, if stamp1 == stamp2 - * > 0, if stamp1 > stamp2 - */ -static inline int -xpc_compare_stamps(struct timespec *stamp1, struct timespec *stamp2) -{ - int ret; - - ret = stamp1->tv_sec - stamp2->tv_sec; - if (ret == 0) - ret = stamp1->tv_nsec - stamp2->tv_nsec; - - return ret; -} +#define XPC_RP_VERSION _XPC_VERSION(2, 0) /* version 2.0 of the reserved page */ /* * Define the structures by which XPC variables can be exported to other @@ -154,85 +115,40 @@ xpc_compare_stamps(struct timespec *stamp1, struct timespec *stamp2) * reflected by incrementing either the major or minor version numbers * of struct xpc_vars. */ -struct xpc_vars { +struct xpc_vars_sn2 { u8 version; u64 heartbeat; - u64 heartbeating_to_mask; + DECLARE_BITMAP(heartbeating_to_mask, XP_MAX_NPARTITIONS_SN2); u64 heartbeat_offline; /* if 0, heartbeat should be changing */ - int act_nasid; - int act_phys_cpuid; - u64 vars_part_pa; - u64 amos_page_pa; /* paddr of page of AMOs from MSPEC driver */ - AMO_t *amos_page; /* vaddr of page of AMOs from MSPEC driver */ + int activate_IRQ_nasid; + int activate_IRQ_phys_cpuid; + unsigned long vars_part_pa; + unsigned long amos_page_pa;/* paddr of page of amos from MSPEC driver */ + struct amo *amos_page; /* vaddr of page of amos from MSPEC driver */ }; #define XPC_V_VERSION _XPC_VERSION(3, 1) /* version 3.1 of the cross vars */ -#define XPC_SUPPORTS_DISENGAGE_REQUEST(_version) \ - (_version >= _XPC_VERSION(3, 1)) - -static inline int -xpc_hb_allowed(short partid, struct xpc_vars *vars) -{ - return ((vars->heartbeating_to_mask & (1UL << partid)) != 0); -} - -static inline void -xpc_allow_hb(short partid, struct xpc_vars *vars) -{ - u64 old_mask, new_mask; - - do { - old_mask = vars->heartbeating_to_mask; - new_mask = (old_mask | (1UL << partid)); - } while (cmpxchg(&vars->heartbeating_to_mask, old_mask, new_mask) != - old_mask); -} - -static inline void -xpc_disallow_hb(short partid, struct xpc_vars *vars) -{ - u64 old_mask, new_mask; - - do { - old_mask = vars->heartbeating_to_mask; - new_mask = (old_mask & ~(1UL << partid)); - } while (cmpxchg(&vars->heartbeating_to_mask, old_mask, new_mask) != - old_mask); -} - -/* - * The AMOs page consists of a number of AMO variables which are divided into - * four groups, The first two groups are used to identify an IRQ's sender. - * These two groups consist of 64 and 128 AMO variables respectively. The last - * two groups, consisting of just one AMO variable each, are used to identify - * the remote partitions that are currently engaged (from the viewpoint of - * the XPC running on the remote partition). - */ -#define XPC_NOTIFY_IRQ_AMOS 0 -#define XPC_ACTIVATE_IRQ_AMOS (XPC_NOTIFY_IRQ_AMOS + XP_MAX_PARTITIONS) -#define XPC_ENGAGED_PARTITIONS_AMO (XPC_ACTIVATE_IRQ_AMOS + XP_NASID_MASK_WORDS) -#define XPC_DISENGAGE_REQUEST_AMO (XPC_ENGAGED_PARTITIONS_AMO + 1) - /* * The following structure describes the per partition specific variables. * * An array of these structures, one per partition, will be defined. As a * partition becomes active XPC will copy the array entry corresponding to - * itself from that partition. It is desirable that the size of this - * structure evenly divide into a cacheline, such that none of the entries - * in this array crosses a cacheline boundary. As it is now, each entry - * occupies half a cacheline. + * itself from that partition. It is desirable that the size of this structure + * evenly divides into a 128-byte cacheline, such that none of the entries in + * this array crosses a 128-byte cacheline boundary. As it is now, each entry + * occupies 64-bytes. */ -struct xpc_vars_part { +struct xpc_vars_part_sn2 { u64 magic; - u64 openclose_args_pa; /* physical address of open and close args */ - u64 GPs_pa; /* physical address of Get/Put values */ + unsigned long openclose_args_pa; /* phys addr of open and close args */ + unsigned long GPs_pa; /* physical address of Get/Put values */ + + unsigned long chctl_amo_pa; /* physical address of chctl flags' amo */ - u64 IPI_amo_pa; /* physical address of IPI AMO_t structure */ - int IPI_nasid; /* nasid of where to send IPIs */ - int IPI_phys_cpuid; /* physical CPU ID of where to send IPIs */ + int notify_IRQ_nasid; /* nasid of where to send notify IRQs */ + int notify_IRQ_phys_cpuid; /* CPUID of where to send notify IRQs */ u8 nchannels; /* #of defined channels supported */ @@ -248,20 +164,95 @@ struct xpc_vars_part { * MAGIC2 indicates that this partition has pulled the remote partititions * per partition variables that pertain to this partition. */ -#define XPC_VP_MAGIC1 0x0053524156435058L /* 'XPCVARS\0'L (little endian) */ -#define XPC_VP_MAGIC2 0x0073726176435058L /* 'XPCvars\0'L (little endian) */ +#define XPC_VP_MAGIC1_SN2 0x0053524156435058L /* 'XPCVARS\0'L (little endian) */ +#define XPC_VP_MAGIC2_SN2 0x0073726176435058L /* 'XPCvars\0'L (little endian) */ /* the reserved page sizes and offsets */ #define XPC_RP_HEADER_SIZE L1_CACHE_ALIGN(sizeof(struct xpc_rsvd_page)) -#define XPC_RP_VARS_SIZE L1_CACHE_ALIGN(sizeof(struct xpc_vars)) +#define XPC_RP_VARS_SIZE L1_CACHE_ALIGN(sizeof(struct xpc_vars_sn2)) -#define XPC_RP_PART_NASIDS(_rp) ((u64 *)((u8 *)(_rp) + XPC_RP_HEADER_SIZE)) -#define XPC_RP_MACH_NASIDS(_rp) (XPC_RP_PART_NASIDS(_rp) + xp_nasid_mask_words) -#define XPC_RP_VARS(_rp) ((struct xpc_vars *)(XPC_RP_MACH_NASIDS(_rp) + \ - xp_nasid_mask_words)) -#define XPC_RP_VARS_PART(_rp) ((struct xpc_vars_part *) \ - ((u8 *)XPC_RP_VARS(_rp) + XPC_RP_VARS_SIZE)) +#define XPC_RP_PART_NASIDS(_rp) ((unsigned long *)((u8 *)(_rp) + \ + XPC_RP_HEADER_SIZE)) +#define XPC_RP_MACH_NASIDS(_rp) (XPC_RP_PART_NASIDS(_rp) + \ + xpc_nasid_mask_nlongs) +#define XPC_RP_VARS(_rp) ((struct xpc_vars_sn2 *) \ + (XPC_RP_MACH_NASIDS(_rp) + \ + xpc_nasid_mask_nlongs)) + +/* + * The activate_mq is used to send/receive GRU messages that affect XPC's + * heartbeat, partition active state, and channel state. This is UV only. + */ +struct xpc_activate_mq_msghdr_uv { + short partid; /* sender's partid */ + u8 act_state; /* sender's act_state at time msg sent */ + u8 type; /* message's type */ + unsigned long rp_ts_jiffies; /* timestamp of sender's rp setup by XPC */ +}; + +/* activate_mq defined message types */ +#define XPC_ACTIVATE_MQ_MSG_SYNC_ACT_STATE_UV 0 +#define XPC_ACTIVATE_MQ_MSG_INC_HEARTBEAT_UV 1 +#define XPC_ACTIVATE_MQ_MSG_OFFLINE_HEARTBEAT_UV 2 +#define XPC_ACTIVATE_MQ_MSG_ONLINE_HEARTBEAT_UV 3 + +#define XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV 4 +#define XPC_ACTIVATE_MQ_MSG_DEACTIVATE_REQ_UV 5 + +#define XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREQUEST_UV 6 +#define XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREPLY_UV 7 +#define XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREQUEST_UV 8 +#define XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREPLY_UV 9 + +#define XPC_ACTIVATE_MQ_MSG_MARK_ENGAGED_UV 10 +#define XPC_ACTIVATE_MQ_MSG_MARK_DISENGAGED_UV 11 + +struct xpc_activate_mq_msg_uv { + struct xpc_activate_mq_msghdr_uv hdr; +}; + +struct xpc_activate_mq_msg_heartbeat_req_uv { + struct xpc_activate_mq_msghdr_uv hdr; + u64 heartbeat; +}; + +struct xpc_activate_mq_msg_activate_req_uv { + struct xpc_activate_mq_msghdr_uv hdr; + unsigned long rp_gpa; + unsigned long activate_mq_gpa; +}; + +struct xpc_activate_mq_msg_deactivate_req_uv { + struct xpc_activate_mq_msghdr_uv hdr; + enum xp_retval reason; +}; + +struct xpc_activate_mq_msg_chctl_closerequest_uv { + struct xpc_activate_mq_msghdr_uv hdr; + short ch_number; + enum xp_retval reason; +}; + +struct xpc_activate_mq_msg_chctl_closereply_uv { + struct xpc_activate_mq_msghdr_uv hdr; + short ch_number; +}; + +struct xpc_activate_mq_msg_chctl_openrequest_uv { + struct xpc_activate_mq_msghdr_uv hdr; + short ch_number; + short entry_size; /* size of notify_mq's GRU messages */ + short local_nentries; /* ??? Is this needed? What is? */ +}; + +struct xpc_activate_mq_msg_chctl_openreply_uv { + struct xpc_activate_mq_msghdr_uv hdr; + short ch_number; + short remote_nentries; /* ??? Is this needed? What is? */ + short local_nentries; /* ??? Is this needed? What is? */ + unsigned long local_notify_mq_gpa; +}; /* * Functions registered by add_timer() or called by kernel_thread() only @@ -270,22 +261,22 @@ struct xpc_vars_part { * the passed argument. */ #define XPC_PACK_ARGS(_arg1, _arg2) \ - ((((u64) _arg1) & 0xffffffff) | \ - ((((u64) _arg2) & 0xffffffff) << 32)) + ((((u64)_arg1) & 0xffffffff) | \ + ((((u64)_arg2) & 0xffffffff) << 32)) -#define XPC_UNPACK_ARG1(_args) (((u64) _args) & 0xffffffff) -#define XPC_UNPACK_ARG2(_args) ((((u64) _args) >> 32) & 0xffffffff) +#define XPC_UNPACK_ARG1(_args) (((u64)_args) & 0xffffffff) +#define XPC_UNPACK_ARG2(_args) ((((u64)_args) >> 32) & 0xffffffff) /* * Define a Get/Put value pair (pointers) used with a message queue. */ -struct xpc_gp { +struct xpc_gp_sn2 { s64 get; /* Get value */ s64 put; /* Put value */ }; #define XPC_GP_SIZE \ - L1_CACHE_ALIGN(sizeof(struct xpc_gp) * XPC_NCHANNELS) + L1_CACHE_ALIGN(sizeof(struct xpc_gp_sn2) * XPC_MAX_NCHANNELS) /* * Define a structure that contains arguments associated with opening and @@ -293,31 +284,89 @@ struct xpc_gp { */ struct xpc_openclose_args { u16 reason; /* reason why channel is closing */ - u16 msg_size; /* sizeof each message entry */ + u16 entry_size; /* sizeof each message entry */ u16 remote_nentries; /* #of message entries in remote msg queue */ u16 local_nentries; /* #of message entries in local msg queue */ - u64 local_msgqueue_pa; /* physical address of local message queue */ + unsigned long local_msgqueue_pa; /* phys addr of local message queue */ }; #define XPC_OPENCLOSE_ARGS_SIZE \ - L1_CACHE_ALIGN(sizeof(struct xpc_openclose_args) * XPC_NCHANNELS) + L1_CACHE_ALIGN(sizeof(struct xpc_openclose_args) * \ + XPC_MAX_NCHANNELS) -/* struct xpc_msg flags */ -#define XPC_M_DONE 0x01 /* msg has been received/consumed */ -#define XPC_M_READY 0x02 /* msg is ready to be sent */ -#define XPC_M_INTERRUPT 0x04 /* send interrupt when msg consumed */ +/* + * Structures to define a fifo singly-linked list. + */ -#define XPC_MSG_ADDRESS(_payload) \ - ((struct xpc_msg *)((u8 *)(_payload) - XPC_MSG_PAYLOAD_OFFSET)) +struct xpc_fifo_entry_uv { + struct xpc_fifo_entry_uv *next; +}; + +struct xpc_fifo_head_uv { + struct xpc_fifo_entry_uv *first; + struct xpc_fifo_entry_uv *last; + spinlock_t lock; + int n_entries; +}; /* - * Defines notify entry. + * Define a sn2 styled message. + * + * A user-defined message resides in the payload area. The max size of the + * payload is defined by the user via xpc_connect(). + * + * The size of a message entry (within a message queue) must be a 128-byte + * cacheline sized multiple in order to facilitate the BTE transfer of messages + * from one message queue to another. + */ +struct xpc_msg_sn2 { + u8 flags; /* FOR XPC INTERNAL USE ONLY */ + u8 reserved[7]; /* FOR XPC INTERNAL USE ONLY */ + s64 number; /* FOR XPC INTERNAL USE ONLY */ + + u64 payload; /* user defined portion of message */ +}; + +/* struct xpc_msg_sn2 flags */ + +#define XPC_M_SN2_DONE 0x01 /* msg has been received/consumed */ +#define XPC_M_SN2_READY 0x02 /* msg is ready to be sent */ +#define XPC_M_SN2_INTERRUPT 0x04 /* send interrupt when msg consumed */ + +/* + * The format of a uv XPC notify_mq GRU message is as follows: + * + * A user-defined message resides in the payload area. The max size of the + * payload is defined by the user via xpc_connect(). + * + * The size of a message (payload and header) sent via the GRU must be either 1 + * or 2 GRU_CACHE_LINE_BYTES in length. + */ + +struct xpc_notify_mq_msghdr_uv { + union { + unsigned int gru_msg_hdr; /* FOR GRU INTERNAL USE ONLY */ + struct xpc_fifo_entry_uv next; /* FOR XPC INTERNAL USE ONLY */ + } u; + short partid; /* FOR XPC INTERNAL USE ONLY */ + u8 ch_number; /* FOR XPC INTERNAL USE ONLY */ + u8 size; /* FOR XPC INTERNAL USE ONLY */ + unsigned int msg_slot_number; /* FOR XPC INTERNAL USE ONLY */ +}; + +struct xpc_notify_mq_msg_uv { + struct xpc_notify_mq_msghdr_uv hdr; + unsigned long payload; +}; + +/* + * Define sn2's notify entry. * * This is used to notify a message's sender that their message was received * and consumed by the intended recipient. */ -struct xpc_notify { +struct xpc_notify_sn2 { u8 type; /* type of notification */ /* the following two fields are only used if type == XPC_N_CALL */ @@ -325,9 +374,20 @@ struct xpc_notify { void *key; /* pointer to user's key */ }; -/* struct xpc_notify type of notification */ +/* struct xpc_notify_sn2 type of notification */ + +#define XPC_N_CALL 0x01 /* notify function provided by user */ -#define XPC_N_CALL 0x01 /* notify function provided by user */ +/* + * Define uv's version of the notify entry. It additionally is used to allocate + * a msg slot on the remote partition into which is copied a sent message. + */ +struct xpc_send_msg_slot_uv { + struct xpc_fifo_entry_uv next; + unsigned int msg_slot_number; + xpc_notify_func func; /* user's notify function */ + void *key; /* pointer to user's key */ +}; /* * Define the structure that manages all the stuff required by a channel. In @@ -339,8 +399,12 @@ struct xpc_notify { * There is an array of these structures for each remote partition. It is * allocated at the time a partition becomes active. The array contains one * of these structures for each potential channel connection to that partition. + */ + +/* + * The following is sn2 only. * - * Each of these structures manages two message queues (circular buffers). + * Each channel structure manages two message queues (circular buffers). * They are allocated at the time a channel connection is made. One of * these message queues (local_msgqueue) holds the locally created messages * that are destined for the remote partition. The other of these message @@ -407,58 +471,72 @@ struct xpc_notify { * new messages, by the clearing of the message flags of the acknowledged * messages. */ + +struct xpc_channel_sn2 { + struct xpc_openclose_args *local_openclose_args; /* args passed on */ + /* opening or closing of channel */ + + void *local_msgqueue_base; /* base address of kmalloc'd space */ + struct xpc_msg_sn2 *local_msgqueue; /* local message queue */ + void *remote_msgqueue_base; /* base address of kmalloc'd space */ + struct xpc_msg_sn2 *remote_msgqueue; /* cached copy of remote */ + /* partition's local message queue */ + unsigned long remote_msgqueue_pa; /* phys addr of remote partition's */ + /* local message queue */ + + struct xpc_notify_sn2 *notify_queue;/* notify queue for messages sent */ + + /* various flavors of local and remote Get/Put values */ + + struct xpc_gp_sn2 *local_GP; /* local Get/Put values */ + struct xpc_gp_sn2 remote_GP; /* remote Get/Put values */ + struct xpc_gp_sn2 w_local_GP; /* working local Get/Put values */ + struct xpc_gp_sn2 w_remote_GP; /* working remote Get/Put values */ + s64 next_msg_to_pull; /* Put value of next msg to pull */ + + struct mutex msg_to_pull_mutex; /* next msg to pull serialization */ +}; + +struct xpc_channel_uv { + unsigned long remote_notify_mq_gpa; /* gru phys address of remote */ + /* partition's notify mq */ + + struct xpc_send_msg_slot_uv *send_msg_slots; + struct xpc_notify_mq_msg_uv *recv_msg_slots; + + struct xpc_fifo_head_uv msg_slot_free_list; + struct xpc_fifo_head_uv recv_msg_list; /* deliverable payloads */ +}; + struct xpc_channel { short partid; /* ID of remote partition connected */ spinlock_t lock; /* lock for updating this structure */ - u32 flags; /* general flags */ + unsigned int flags; /* general flags */ enum xp_retval reason; /* reason why channel is disconnect'g */ int reason_line; /* line# disconnect initiated from */ u16 number; /* channel # */ - u16 msg_size; /* sizeof each msg entry */ + u16 entry_size; /* sizeof each msg entry */ u16 local_nentries; /* #of msg entries in local msg queue */ u16 remote_nentries; /* #of msg entries in remote msg queue */ - void *local_msgqueue_base; /* base address of kmalloc'd space */ - struct xpc_msg *local_msgqueue; /* local message queue */ - void *remote_msgqueue_base; /* base address of kmalloc'd space */ - struct xpc_msg *remote_msgqueue; /* cached copy of remote partition's */ - /* local message queue */ - u64 remote_msgqueue_pa; /* phys addr of remote partition's */ - /* local message queue */ - atomic_t references; /* #of external references to queues */ atomic_t n_on_msg_allocate_wq; /* #on msg allocation wait queue */ wait_queue_head_t msg_allocate_wq; /* msg allocation wait queue */ - u8 delayed_IPI_flags; /* IPI flags received, but delayed */ + u8 delayed_chctl_flags; /* chctl flags received, but delayed */ /* action until channel disconnected */ - /* queue of msg senders who want to be notified when msg received */ - atomic_t n_to_notify; /* #of msg senders to notify */ - struct xpc_notify *notify_queue; /* notify queue for messages sent */ xpc_channel_func func; /* user's channel function */ void *key; /* pointer to user's key */ - struct mutex msg_to_pull_mutex; /* next msg to pull serialization */ struct completion wdisconnect_wait; /* wait for channel disconnect */ - struct xpc_openclose_args *local_openclose_args; /* args passed on */ - /* opening or closing of channel */ - - /* various flavors of local and remote Get/Put values */ - - struct xpc_gp *local_GP; /* local Get/Put values */ - struct xpc_gp remote_GP; /* remote Get/Put values */ - struct xpc_gp w_local_GP; /* working local Get/Put values */ - struct xpc_gp w_remote_GP; /* working remote Get/Put values */ - s64 next_msg_to_pull; /* Put value of next msg to pull */ - /* kthread management related fields */ atomic_t kthreads_assigned; /* #of kthreads assigned to channel */ @@ -469,6 +547,11 @@ struct xpc_channel { wait_queue_head_t idle_wq; /* idle kthread wait queue */ + union { + struct xpc_channel_sn2 sn2; + struct xpc_channel_uv uv; + } sn; + } ____cacheline_aligned; /* struct xpc_channel flags */ @@ -501,33 +584,128 @@ struct xpc_channel { #define XPC_C_WDISCONNECT 0x00040000 /* waiting for channel disconnect */ /* - * Manages channels on a partition basis. There is one of these structures + * The channel control flags (chctl) union consists of a 64-bit variable which + * is divided up into eight bytes, ordered from right to left. Byte zero + * pertains to channel 0, byte one to channel 1, and so on. Each channel's byte + * can have one or more of the chctl flags set in it. + */ + +union xpc_channel_ctl_flags { + u64 all_flags; + u8 flags[XPC_MAX_NCHANNELS]; +}; + +/* chctl flags */ +#define XPC_CHCTL_CLOSEREQUEST 0x01 +#define XPC_CHCTL_CLOSEREPLY 0x02 +#define XPC_CHCTL_OPENREQUEST 0x04 +#define XPC_CHCTL_OPENREPLY 0x08 +#define XPC_CHCTL_MSGREQUEST 0x10 + +#define XPC_OPENCLOSE_CHCTL_FLAGS \ + (XPC_CHCTL_CLOSEREQUEST | XPC_CHCTL_CLOSEREPLY | \ + XPC_CHCTL_OPENREQUEST | XPC_CHCTL_OPENREPLY) +#define XPC_MSG_CHCTL_FLAGS XPC_CHCTL_MSGREQUEST + +static inline int +xpc_any_openclose_chctl_flags_set(union xpc_channel_ctl_flags *chctl) +{ + int ch_number; + + for (ch_number = 0; ch_number < XPC_MAX_NCHANNELS; ch_number++) { + if (chctl->flags[ch_number] & XPC_OPENCLOSE_CHCTL_FLAGS) + return 1; + } + return 0; +} + +static inline int +xpc_any_msg_chctl_flags_set(union xpc_channel_ctl_flags *chctl) +{ + int ch_number; + + for (ch_number = 0; ch_number < XPC_MAX_NCHANNELS; ch_number++) { + if (chctl->flags[ch_number] & XPC_MSG_CHCTL_FLAGS) + return 1; + } + return 0; +} + +/* + * Manage channels on a partition basis. There is one of these structures * for each partition (a partition will never utilize the structure that * represents itself). */ + +struct xpc_partition_sn2 { + unsigned long remote_amos_page_pa; /* paddr of partition's amos page */ + int activate_IRQ_nasid; /* active partition's act/deact nasid */ + int activate_IRQ_phys_cpuid; /* active part's act/deact phys cpuid */ + + unsigned long remote_vars_pa; /* phys addr of partition's vars */ + unsigned long remote_vars_part_pa; /* paddr of partition's vars part */ + u8 remote_vars_version; /* version# of partition's vars */ + + void *local_GPs_base; /* base address of kmalloc'd space */ + struct xpc_gp_sn2 *local_GPs; /* local Get/Put values */ + void *remote_GPs_base; /* base address of kmalloc'd space */ + struct xpc_gp_sn2 *remote_GPs; /* copy of remote partition's local */ + /* Get/Put values */ + unsigned long remote_GPs_pa; /* phys addr of remote partition's local */ + /* Get/Put values */ + + void *local_openclose_args_base; /* base address of kmalloc'd space */ + struct xpc_openclose_args *local_openclose_args; /* local's args */ + unsigned long remote_openclose_args_pa; /* phys addr of remote's args */ + + int notify_IRQ_nasid; /* nasid of where to send notify IRQs */ + int notify_IRQ_phys_cpuid; /* CPUID of where to send notify IRQs */ + char notify_IRQ_owner[8]; /* notify IRQ's owner's name */ + + struct amo *remote_chctl_amo_va; /* addr of remote chctl flags' amo */ + struct amo *local_chctl_amo_va; /* address of chctl flags' amo */ + + struct timer_list dropped_notify_IRQ_timer; /* dropped IRQ timer */ +}; + +struct xpc_partition_uv { + unsigned long remote_activate_mq_gpa; /* gru phys address of remote */ + /* partition's activate mq */ + spinlock_t flags_lock; /* protect updating of flags */ + unsigned int flags; /* general flags */ + u8 remote_act_state; /* remote partition's act_state */ + u8 act_state_req; /* act_state request from remote partition */ + enum xp_retval reason; /* reason for deactivate act_state request */ + u64 heartbeat; /* incremented by remote partition */ +}; + +/* struct xpc_partition_uv flags */ + +#define XPC_P_HEARTBEAT_OFFLINE_UV 0x00000001 +#define XPC_P_ENGAGED_UV 0x00000002 + +/* struct xpc_partition_uv act_state change requests */ + +#define XPC_P_ASR_ACTIVATE_UV 0x01 +#define XPC_P_ASR_REACTIVATE_UV 0x02 +#define XPC_P_ASR_DEACTIVATE_UV 0x03 + struct xpc_partition { /* XPC HB infrastructure */ u8 remote_rp_version; /* version# of partition's rsvd pg */ - struct timespec remote_rp_stamp; /* time when rsvd pg was initialized */ - u64 remote_rp_pa; /* phys addr of partition's rsvd pg */ - u64 remote_vars_pa; /* phys addr of partition's vars */ - u64 remote_vars_part_pa; /* phys addr of partition's vars part */ + unsigned long remote_rp_ts_jiffies; /* timestamp when rsvd pg setup */ + unsigned long remote_rp_pa; /* phys addr of partition's rsvd pg */ u64 last_heartbeat; /* HB at last read */ - u64 remote_amos_page_pa; /* phys addr of partition's amos page */ - int remote_act_nasid; /* active part's act/deact nasid */ - int remote_act_phys_cpuid; /* active part's act/deact phys cpuid */ - u32 act_IRQ_rcvd; /* IRQs since activation */ + u32 activate_IRQ_rcvd; /* IRQs since activation */ spinlock_t act_lock; /* protect updating of act_state */ u8 act_state; /* from XPC HB viewpoint */ - u8 remote_vars_version; /* version# of partition's vars */ enum xp_retval reason; /* reason partition is deactivating */ int reason_line; /* line# deactivation initiated from */ - int reactivate_nasid; /* nasid in partition to reactivate */ - unsigned long disengage_request_timeout; /* timeout in jiffies */ - struct timer_list disengage_request_timer; + unsigned long disengage_timeout; /* timeout in jiffies */ + struct timer_list disengage_timer; /* XPC infrastructure referencing and teardown control */ @@ -535,85 +713,63 @@ struct xpc_partition { wait_queue_head_t teardown_wq; /* kthread waiting to teardown infra */ atomic_t references; /* #of references to infrastructure */ - /* - * NONE OF THE PRECEDING FIELDS OF THIS STRUCTURE WILL BE CLEARED WHEN - * XPC SETS UP THE NECESSARY INFRASTRUCTURE TO SUPPORT CROSS PARTITION - * COMMUNICATION. ALL OF THE FOLLOWING FIELDS WILL BE CLEARED. (THE - * 'nchannels' FIELD MUST BE THE FIRST OF THE FIELDS TO BE CLEARED.) - */ - u8 nchannels; /* #of defined channels supported */ atomic_t nchannels_active; /* #of channels that are not DISCONNECTED */ atomic_t nchannels_engaged; /* #of channels engaged with remote part */ struct xpc_channel *channels; /* array of channel structures */ - void *local_GPs_base; /* base address of kmalloc'd space */ - struct xpc_gp *local_GPs; /* local Get/Put values */ - void *remote_GPs_base; /* base address of kmalloc'd space */ - struct xpc_gp *remote_GPs; /* copy of remote partition's local */ - /* Get/Put values */ - u64 remote_GPs_pa; /* phys address of remote partition's local */ - /* Get/Put values */ + /* fields used for managing channel avialability and activity */ - /* fields used to pass args when opening or closing a channel */ + union xpc_channel_ctl_flags chctl; /* chctl flags yet to be processed */ + spinlock_t chctl_lock; /* chctl flags lock */ - void *local_openclose_args_base; /* base address of kmalloc'd space */ - struct xpc_openclose_args *local_openclose_args; /* local's args */ void *remote_openclose_args_base; /* base address of kmalloc'd space */ struct xpc_openclose_args *remote_openclose_args; /* copy of remote's */ /* args */ - u64 remote_openclose_args_pa; /* phys addr of remote's args */ - - /* IPI sending, receiving and handling related fields */ - - int remote_IPI_nasid; /* nasid of where to send IPIs */ - int remote_IPI_phys_cpuid; /* phys CPU ID of where to send IPIs */ - AMO_t *remote_IPI_amo_va; /* address of remote IPI AMO_t structure */ - - AMO_t *local_IPI_amo_va; /* address of IPI AMO_t structure */ - u64 local_IPI_amo; /* IPI amo flags yet to be handled */ - char IPI_owner[8]; /* IPI owner's name */ - struct timer_list dropped_IPI_timer; /* dropped IPI timer */ - - spinlock_t IPI_lock; /* IPI handler lock */ /* channel manager related fields */ atomic_t channel_mgr_requests; /* #of requests to activate chan mgr */ wait_queue_head_t channel_mgr_wq; /* channel mgr's wait queue */ + union { + struct xpc_partition_sn2 sn2; + struct xpc_partition_uv uv; + } sn; + } ____cacheline_aligned; /* struct xpc_partition act_state values (for XPC HB) */ -#define XPC_P_INACTIVE 0x00 /* partition is not active */ -#define XPC_P_ACTIVATION_REQ 0x01 /* created thread to activate */ -#define XPC_P_ACTIVATING 0x02 /* activation thread started */ -#define XPC_P_ACTIVE 0x03 /* xpc_partition_up() was called */ -#define XPC_P_DEACTIVATING 0x04 /* partition deactivation initiated */ +#define XPC_P_AS_INACTIVE 0x00 /* partition is not active */ +#define XPC_P_AS_ACTIVATION_REQ 0x01 /* created thread to activate */ +#define XPC_P_AS_ACTIVATING 0x02 /* activation thread started */ +#define XPC_P_AS_ACTIVE 0x03 /* xpc_partition_up() was called */ +#define XPC_P_AS_DEACTIVATING 0x04 /* partition deactivation initiated */ #define XPC_DEACTIVATE_PARTITION(_p, _reason) \ xpc_deactivate_partition(__LINE__, (_p), (_reason)) /* struct xpc_partition setup_state values */ -#define XPC_P_UNSET 0x00 /* infrastructure was never setup */ -#define XPC_P_SETUP 0x01 /* infrastructure is setup */ -#define XPC_P_WTEARDOWN 0x02 /* waiting to teardown infrastructure */ -#define XPC_P_TORNDOWN 0x03 /* infrastructure is torndown */ +#define XPC_P_SS_UNSET 0x00 /* infrastructure was never setup */ +#define XPC_P_SS_SETUP 0x01 /* infrastructure is setup */ +#define XPC_P_SS_WTEARDOWN 0x02 /* waiting to teardown infrastructure */ +#define XPC_P_SS_TORNDOWN 0x03 /* infrastructure is torndown */ /* - * struct xpc_partition IPI_timer #of seconds to wait before checking for - * dropped IPIs. These occur whenever an IPI amo write doesn't complete until - * after the IPI was received. + * struct xpc_partition_sn2's dropped notify IRQ timer is set to wait the + * following interval #of seconds before checking for dropped notify IRQs. + * These can occur whenever an IRQ's associated amo write doesn't complete + * until after the IRQ was received. */ -#define XPC_P_DROPPED_IPI_WAIT (0.25 * HZ) +#define XPC_DROPPED_NOTIFY_IRQ_WAIT_INTERVAL (0.25 * HZ) /* number of seconds to wait for other partitions to disengage */ -#define XPC_DISENGAGE_REQUEST_DEFAULT_TIMELIMIT 90 +#define XPC_DISENGAGE_DEFAULT_TIMELIMIT 90 -/* interval in seconds to print 'waiting disengagement' messages */ -#define XPC_DISENGAGE_PRINTMSG_INTERVAL 10 +/* interval in seconds to print 'waiting deactivation' messages */ +#define XPC_DEACTIVATE_PRINTMSG_INTERVAL 10 #define XPC_PARTID(_p) ((short)((_p) - &xpc_partitions[0])) @@ -623,33 +779,92 @@ extern struct xpc_registration xpc_registrations[]; /* found in xpc_main.c */ extern struct device *xpc_part; extern struct device *xpc_chan; -extern int xpc_disengage_request_timelimit; -extern int xpc_disengage_request_timedout; -extern irqreturn_t xpc_notify_IRQ_handler(int, void *); -extern void xpc_dropped_IPI_check(struct xpc_partition *); +extern int xpc_disengage_timelimit; +extern int xpc_disengage_timedout; +extern int xpc_activate_IRQ_rcvd; +extern spinlock_t xpc_activate_IRQ_rcvd_lock; +extern wait_queue_head_t xpc_activate_IRQ_wq; +extern void *xpc_heartbeating_to_mask; +extern void *xpc_kzalloc_cacheline_aligned(size_t, gfp_t, void **); extern void xpc_activate_partition(struct xpc_partition *); extern void xpc_activate_kthreads(struct xpc_channel *, int); extern void xpc_create_kthreads(struct xpc_channel *, int, int); extern void xpc_disconnect_wait(int); +extern int (*xpc_setup_partitions_sn) (void); +extern enum xp_retval (*xpc_get_partition_rsvd_page_pa) (void *, u64 *, + unsigned long *, + size_t *); +extern int (*xpc_setup_rsvd_page_sn) (struct xpc_rsvd_page *); +extern void (*xpc_heartbeat_init) (void); +extern void (*xpc_heartbeat_exit) (void); +extern void (*xpc_increment_heartbeat) (void); +extern void (*xpc_offline_heartbeat) (void); +extern void (*xpc_online_heartbeat) (void); +extern enum xp_retval (*xpc_get_remote_heartbeat) (struct xpc_partition *); +extern enum xp_retval (*xpc_make_first_contact) (struct xpc_partition *); +extern u64 (*xpc_get_chctl_all_flags) (struct xpc_partition *); +extern enum xp_retval (*xpc_setup_msg_structures) (struct xpc_channel *); +extern void (*xpc_teardown_msg_structures) (struct xpc_channel *); +extern void (*xpc_notify_senders_of_disconnect) (struct xpc_channel *); +extern void (*xpc_process_msg_chctl_flags) (struct xpc_partition *, int); +extern int (*xpc_n_of_deliverable_payloads) (struct xpc_channel *); +extern void *(*xpc_get_deliverable_payload) (struct xpc_channel *); +extern void (*xpc_request_partition_activation) (struct xpc_rsvd_page *, + unsigned long, int); +extern void (*xpc_request_partition_reactivation) (struct xpc_partition *); +extern void (*xpc_request_partition_deactivation) (struct xpc_partition *); +extern void (*xpc_cancel_partition_deactivation_request) ( + struct xpc_partition *); +extern void (*xpc_process_activate_IRQ_rcvd) (void); +extern enum xp_retval (*xpc_setup_ch_structures_sn) (struct xpc_partition *); +extern void (*xpc_teardown_ch_structures_sn) (struct xpc_partition *); + +extern void (*xpc_indicate_partition_engaged) (struct xpc_partition *); +extern int (*xpc_partition_engaged) (short); +extern int (*xpc_any_partition_engaged) (void); +extern void (*xpc_indicate_partition_disengaged) (struct xpc_partition *); +extern void (*xpc_assume_partition_disengaged) (short); + +extern void (*xpc_send_chctl_closerequest) (struct xpc_channel *, + unsigned long *); +extern void (*xpc_send_chctl_closereply) (struct xpc_channel *, + unsigned long *); +extern void (*xpc_send_chctl_openrequest) (struct xpc_channel *, + unsigned long *); +extern void (*xpc_send_chctl_openreply) (struct xpc_channel *, unsigned long *); + +extern void (*xpc_save_remote_msgqueue_pa) (struct xpc_channel *, + unsigned long); + +extern enum xp_retval (*xpc_send_payload) (struct xpc_channel *, u32, void *, + u16, u8, xpc_notify_func, void *); +extern void (*xpc_received_payload) (struct xpc_channel *, void *); + +/* found in xpc_sn2.c */ +extern int xpc_init_sn2(void); +extern void xpc_exit_sn2(void); + +/* found in xpc_uv.c */ +extern int xpc_init_uv(void); +extern void xpc_exit_uv(void); /* found in xpc_partition.c */ extern int xpc_exiting; -extern struct xpc_vars *xpc_vars; +extern int xpc_nasid_mask_nlongs; extern struct xpc_rsvd_page *xpc_rsvd_page; -extern struct xpc_vars_part *xpc_vars_part; -extern struct xpc_partition xpc_partitions[XP_MAX_PARTITIONS + 1]; -extern char *xpc_remote_copy_buffer; -extern void *xpc_remote_copy_buffer_base; +extern unsigned long *xpc_mach_nasids; +extern struct xpc_partition *xpc_partitions; extern void *xpc_kmalloc_cacheline_aligned(size_t, gfp_t, void **); -extern struct xpc_rsvd_page *xpc_rsvd_page_init(void); -extern void xpc_allow_IPI_ops(void); -extern void xpc_restrict_IPI_ops(void); -extern int xpc_identify_act_IRQ_sender(void); +extern int xpc_setup_rsvd_page(void); +extern void xpc_teardown_rsvd_page(void); +extern int xpc_identify_activate_IRQ_sender(void); extern int xpc_partition_disengaged(struct xpc_partition *); extern enum xp_retval xpc_mark_partition_active(struct xpc_partition *); extern void xpc_mark_partition_inactive(struct xpc_partition *); extern void xpc_discovery(void); -extern void xpc_check_remote_hb(void); +extern enum xp_retval xpc_get_remote_rp(int, unsigned long *, + struct xpc_rsvd_page *, + unsigned long *); extern void xpc_deactivate_partition(const int, struct xpc_partition *, enum xp_retval); extern enum xp_retval xpc_initiate_partid_to_nasids(short, void *); @@ -657,21 +872,52 @@ extern enum xp_retval xpc_initiate_partid_to_nasids(short, void *); /* found in xpc_channel.c */ extern void xpc_initiate_connect(int); extern void xpc_initiate_disconnect(int); -extern enum xp_retval xpc_initiate_allocate(short, int, u32, void **); -extern enum xp_retval xpc_initiate_send(short, int, void *); -extern enum xp_retval xpc_initiate_send_notify(short, int, void *, +extern enum xp_retval xpc_allocate_msg_wait(struct xpc_channel *); +extern enum xp_retval xpc_initiate_send(short, int, u32, void *, u16); +extern enum xp_retval xpc_initiate_send_notify(short, int, u32, void *, u16, xpc_notify_func, void *); extern void xpc_initiate_received(short, int, void *); -extern enum xp_retval xpc_setup_infrastructure(struct xpc_partition *); -extern enum xp_retval xpc_pull_remote_vars_part(struct xpc_partition *); -extern void xpc_process_channel_activity(struct xpc_partition *); +extern void xpc_process_sent_chctl_flags(struct xpc_partition *); extern void xpc_connected_callout(struct xpc_channel *); -extern void xpc_deliver_msg(struct xpc_channel *); +extern void xpc_deliver_payload(struct xpc_channel *); extern void xpc_disconnect_channel(const int, struct xpc_channel *, enum xp_retval, unsigned long *); extern void xpc_disconnect_callout(struct xpc_channel *, enum xp_retval); extern void xpc_partition_going_down(struct xpc_partition *, enum xp_retval); -extern void xpc_teardown_infrastructure(struct xpc_partition *); + +static inline int +xpc_hb_allowed(short partid, void *heartbeating_to_mask) +{ + return test_bit(partid, heartbeating_to_mask); +} + +static inline int +xpc_any_hbs_allowed(void) +{ + DBUG_ON(xpc_heartbeating_to_mask == NULL); + return !bitmap_empty(xpc_heartbeating_to_mask, xp_max_npartitions); +} + +static inline void +xpc_allow_hb(short partid) +{ + DBUG_ON(xpc_heartbeating_to_mask == NULL); + set_bit(partid, xpc_heartbeating_to_mask); +} + +static inline void +xpc_disallow_hb(short partid) +{ + DBUG_ON(xpc_heartbeating_to_mask == NULL); + clear_bit(partid, xpc_heartbeating_to_mask); +} + +static inline void +xpc_disallow_all_hbs(void) +{ + DBUG_ON(xpc_heartbeating_to_mask == NULL); + bitmap_zero(xpc_heartbeating_to_mask, xp_max_npartitions); +} static inline void xpc_wakeup_channel_mgr(struct xpc_partition *part) @@ -713,7 +959,7 @@ xpc_part_deref(struct xpc_partition *part) s32 refs = atomic_dec_return(&part->references); DBUG_ON(refs < 0); - if (refs == 0 && part->setup_state == XPC_P_WTEARDOWN) + if (refs == 0 && part->setup_state == XPC_P_SS_WTEARDOWN) wake_up(&part->teardown_wq); } @@ -723,7 +969,7 @@ xpc_part_ref(struct xpc_partition *part) int setup; atomic_inc(&part->references); - setup = (part->setup_state == XPC_P_SETUP); + setup = (part->setup_state == XPC_P_SS_SETUP); if (!setup) xpc_part_deref(part); @@ -741,416 +987,4 @@ xpc_part_ref(struct xpc_partition *part) (_p)->reason_line = _line; \ } -/* - * This next set of inlines are used to keep track of when a partition is - * potentially engaged in accessing memory belonging to another partition. - */ - -static inline void -xpc_mark_partition_engaged(struct xpc_partition *part) -{ - unsigned long irq_flags; - AMO_t *amo = (AMO_t *)__va(part->remote_amos_page_pa + - (XPC_ENGAGED_PARTITIONS_AMO * - sizeof(AMO_t))); - - local_irq_save(irq_flags); - - /* set bit corresponding to our partid in remote partition's AMO */ - FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_OR, - (1UL << sn_partition_id)); - /* - * We must always use the nofault function regardless of whether we - * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we - * didn't, we'd never know that the other partition is down and would - * keep sending IPIs and AMOs to it until the heartbeat times out. - */ - (void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo-> - variable), - xp_nofault_PIOR_target)); - - local_irq_restore(irq_flags); -} - -static inline void -xpc_mark_partition_disengaged(struct xpc_partition *part) -{ - unsigned long irq_flags; - AMO_t *amo = (AMO_t *)__va(part->remote_amos_page_pa + - (XPC_ENGAGED_PARTITIONS_AMO * - sizeof(AMO_t))); - - local_irq_save(irq_flags); - - /* clear bit corresponding to our partid in remote partition's AMO */ - FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND, - ~(1UL << sn_partition_id)); - /* - * We must always use the nofault function regardless of whether we - * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we - * didn't, we'd never know that the other partition is down and would - * keep sending IPIs and AMOs to it until the heartbeat times out. - */ - (void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo-> - variable), - xp_nofault_PIOR_target)); - - local_irq_restore(irq_flags); -} - -static inline void -xpc_request_partition_disengage(struct xpc_partition *part) -{ - unsigned long irq_flags; - AMO_t *amo = (AMO_t *)__va(part->remote_amos_page_pa + - (XPC_DISENGAGE_REQUEST_AMO * sizeof(AMO_t))); - - local_irq_save(irq_flags); - - /* set bit corresponding to our partid in remote partition's AMO */ - FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_OR, - (1UL << sn_partition_id)); - /* - * We must always use the nofault function regardless of whether we - * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we - * didn't, we'd never know that the other partition is down and would - * keep sending IPIs and AMOs to it until the heartbeat times out. - */ - (void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo-> - variable), - xp_nofault_PIOR_target)); - - local_irq_restore(irq_flags); -} - -static inline void -xpc_cancel_partition_disengage_request(struct xpc_partition *part) -{ - unsigned long irq_flags; - AMO_t *amo = (AMO_t *)__va(part->remote_amos_page_pa + - (XPC_DISENGAGE_REQUEST_AMO * sizeof(AMO_t))); - - local_irq_save(irq_flags); - - /* clear bit corresponding to our partid in remote partition's AMO */ - FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND, - ~(1UL << sn_partition_id)); - /* - * We must always use the nofault function regardless of whether we - * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we - * didn't, we'd never know that the other partition is down and would - * keep sending IPIs and AMOs to it until the heartbeat times out. - */ - (void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo-> - variable), - xp_nofault_PIOR_target)); - - local_irq_restore(irq_flags); -} - -static inline u64 -xpc_partition_engaged(u64 partid_mask) -{ - AMO_t *amo = xpc_vars->amos_page + XPC_ENGAGED_PARTITIONS_AMO; - - /* return our partition's AMO variable ANDed with partid_mask */ - return (FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_LOAD) & - partid_mask); -} - -static inline u64 -xpc_partition_disengage_requested(u64 partid_mask) -{ - AMO_t *amo = xpc_vars->amos_page + XPC_DISENGAGE_REQUEST_AMO; - - /* return our partition's AMO variable ANDed with partid_mask */ - return (FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_LOAD) & - partid_mask); -} - -static inline void -xpc_clear_partition_engaged(u64 partid_mask) -{ - AMO_t *amo = xpc_vars->amos_page + XPC_ENGAGED_PARTITIONS_AMO; - - /* clear bit(s) based on partid_mask in our partition's AMO */ - FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND, - ~partid_mask); -} - -static inline void -xpc_clear_partition_disengage_request(u64 partid_mask) -{ - AMO_t *amo = xpc_vars->amos_page + XPC_DISENGAGE_REQUEST_AMO; - - /* clear bit(s) based on partid_mask in our partition's AMO */ - FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND, - ~partid_mask); -} - -/* - * The following set of macros and inlines are used for the sending and - * receiving of IPIs (also known as IRQs). There are two flavors of IPIs, - * one that is associated with partition activity (SGI_XPC_ACTIVATE) and - * the other that is associated with channel activity (SGI_XPC_NOTIFY). - */ - -static inline u64 -xpc_IPI_receive(AMO_t *amo) -{ - return FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_CLEAR); -} - -static inline enum xp_retval -xpc_IPI_send(AMO_t *amo, u64 flag, int nasid, int phys_cpuid, int vector) -{ - int ret = 0; - unsigned long irq_flags; - - local_irq_save(irq_flags); - - FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_OR, flag); - sn_send_IPI_phys(nasid, phys_cpuid, vector, 0); - - /* - * We must always use the nofault function regardless of whether we - * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we - * didn't, we'd never know that the other partition is down and would - * keep sending IPIs and AMOs to it until the heartbeat times out. - */ - ret = xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->variable), - xp_nofault_PIOR_target)); - - local_irq_restore(irq_flags); - - return ((ret == 0) ? xpSuccess : xpPioReadError); -} - -/* - * IPIs associated with SGI_XPC_ACTIVATE IRQ. - */ - -/* - * Flag the appropriate AMO variable and send an IPI to the specified node. - */ -static inline void -xpc_activate_IRQ_send(u64 amos_page_pa, int from_nasid, int to_nasid, - int to_phys_cpuid) -{ - int w_index = XPC_NASID_W_INDEX(from_nasid); - int b_index = XPC_NASID_B_INDEX(from_nasid); - AMO_t *amos = (AMO_t *)__va(amos_page_pa + - (XPC_ACTIVATE_IRQ_AMOS * sizeof(AMO_t))); - - (void)xpc_IPI_send(&amos[w_index], (1UL << b_index), to_nasid, - to_phys_cpuid, SGI_XPC_ACTIVATE); -} - -static inline void -xpc_IPI_send_activate(struct xpc_vars *vars) -{ - xpc_activate_IRQ_send(vars->amos_page_pa, cnodeid_to_nasid(0), - vars->act_nasid, vars->act_phys_cpuid); -} - -static inline void -xpc_IPI_send_activated(struct xpc_partition *part) -{ - xpc_activate_IRQ_send(part->remote_amos_page_pa, cnodeid_to_nasid(0), - part->remote_act_nasid, - part->remote_act_phys_cpuid); -} - -static inline void -xpc_IPI_send_reactivate(struct xpc_partition *part) -{ - xpc_activate_IRQ_send(xpc_vars->amos_page_pa, part->reactivate_nasid, - xpc_vars->act_nasid, xpc_vars->act_phys_cpuid); -} - -static inline void -xpc_IPI_send_disengage(struct xpc_partition *part) -{ - xpc_activate_IRQ_send(part->remote_amos_page_pa, cnodeid_to_nasid(0), - part->remote_act_nasid, - part->remote_act_phys_cpuid); -} - -/* - * IPIs associated with SGI_XPC_NOTIFY IRQ. - */ - -/* - * Send an IPI to the remote partition that is associated with the - * specified channel. - */ -#define XPC_NOTIFY_IRQ_SEND(_ch, _ipi_f, _irq_f) \ - xpc_notify_IRQ_send(_ch, _ipi_f, #_ipi_f, _irq_f) - -static inline void -xpc_notify_IRQ_send(struct xpc_channel *ch, u8 ipi_flag, char *ipi_flag_string, - unsigned long *irq_flags) -{ - struct xpc_partition *part = &xpc_partitions[ch->partid]; - enum xp_retval ret; - - if (likely(part->act_state != XPC_P_DEACTIVATING)) { - ret = xpc_IPI_send(part->remote_IPI_amo_va, - (u64)ipi_flag << (ch->number * 8), - part->remote_IPI_nasid, - part->remote_IPI_phys_cpuid, SGI_XPC_NOTIFY); - dev_dbg(xpc_chan, "%s sent to partid=%d, channel=%d, ret=%d\n", - ipi_flag_string, ch->partid, ch->number, ret); - if (unlikely(ret != xpSuccess)) { - if (irq_flags != NULL) - spin_unlock_irqrestore(&ch->lock, *irq_flags); - XPC_DEACTIVATE_PARTITION(part, ret); - if (irq_flags != NULL) - spin_lock_irqsave(&ch->lock, *irq_flags); - } - } -} - -/* - * Make it look like the remote partition, which is associated with the - * specified channel, sent us an IPI. This faked IPI will be handled - * by xpc_dropped_IPI_check(). - */ -#define XPC_NOTIFY_IRQ_SEND_LOCAL(_ch, _ipi_f) \ - xpc_notify_IRQ_send_local(_ch, _ipi_f, #_ipi_f) - -static inline void -xpc_notify_IRQ_send_local(struct xpc_channel *ch, u8 ipi_flag, - char *ipi_flag_string) -{ - struct xpc_partition *part = &xpc_partitions[ch->partid]; - - FETCHOP_STORE_OP(TO_AMO((u64)&part->local_IPI_amo_va->variable), - FETCHOP_OR, ((u64)ipi_flag << (ch->number * 8))); - dev_dbg(xpc_chan, "%s sent local from partid=%d, channel=%d\n", - ipi_flag_string, ch->partid, ch->number); -} - -/* - * The sending and receiving of IPIs includes the setting of an AMO variable - * to indicate the reason the IPI was sent. The 64-bit variable is divided - * up into eight bytes, ordered from right to left. Byte zero pertains to - * channel 0, byte one to channel 1, and so on. Each byte is described by - * the following IPI flags. - */ - -#define XPC_IPI_CLOSEREQUEST 0x01 -#define XPC_IPI_CLOSEREPLY 0x02 -#define XPC_IPI_OPENREQUEST 0x04 -#define XPC_IPI_OPENREPLY 0x08 -#define XPC_IPI_MSGREQUEST 0x10 - -/* given an AMO variable and a channel#, get its associated IPI flags */ -#define XPC_GET_IPI_FLAGS(_amo, _c) ((u8) (((_amo) >> ((_c) * 8)) & 0xff)) -#define XPC_SET_IPI_FLAGS(_amo, _c, _f) (_amo) |= ((u64) (_f) << ((_c) * 8)) - -#define XPC_ANY_OPENCLOSE_IPI_FLAGS_SET(_amo) ((_amo) & 0x0f0f0f0f0f0f0f0fUL) -#define XPC_ANY_MSG_IPI_FLAGS_SET(_amo) ((_amo) & 0x1010101010101010UL) - -static inline void -xpc_IPI_send_closerequest(struct xpc_channel *ch, unsigned long *irq_flags) -{ - struct xpc_openclose_args *args = ch->local_openclose_args; - - args->reason = ch->reason; - - XPC_NOTIFY_IRQ_SEND(ch, XPC_IPI_CLOSEREQUEST, irq_flags); -} - -static inline void -xpc_IPI_send_closereply(struct xpc_channel *ch, unsigned long *irq_flags) -{ - XPC_NOTIFY_IRQ_SEND(ch, XPC_IPI_CLOSEREPLY, irq_flags); -} - -static inline void -xpc_IPI_send_openrequest(struct xpc_channel *ch, unsigned long *irq_flags) -{ - struct xpc_openclose_args *args = ch->local_openclose_args; - - args->msg_size = ch->msg_size; - args->local_nentries = ch->local_nentries; - - XPC_NOTIFY_IRQ_SEND(ch, XPC_IPI_OPENREQUEST, irq_flags); -} - -static inline void -xpc_IPI_send_openreply(struct xpc_channel *ch, unsigned long *irq_flags) -{ - struct xpc_openclose_args *args = ch->local_openclose_args; - - args->remote_nentries = ch->remote_nentries; - args->local_nentries = ch->local_nentries; - args->local_msgqueue_pa = __pa(ch->local_msgqueue); - - XPC_NOTIFY_IRQ_SEND(ch, XPC_IPI_OPENREPLY, irq_flags); -} - -static inline void -xpc_IPI_send_msgrequest(struct xpc_channel *ch) -{ - XPC_NOTIFY_IRQ_SEND(ch, XPC_IPI_MSGREQUEST, NULL); -} - -static inline void -xpc_IPI_send_local_msgrequest(struct xpc_channel *ch) -{ - XPC_NOTIFY_IRQ_SEND_LOCAL(ch, XPC_IPI_MSGREQUEST); -} - -/* - * Memory for XPC's AMO variables is allocated by the MSPEC driver. These - * pages are located in the lowest granule. The lowest granule uses 4k pages - * for cached references and an alternate TLB handler to never provide a - * cacheable mapping for the entire region. This will prevent speculative - * reading of cached copies of our lines from being issued which will cause - * a PI FSB Protocol error to be generated by the SHUB. For XPC, we need 64 - * AMO variables (based on XP_MAX_PARTITIONS) for message notification and an - * additional 128 AMO variables (based on XP_NASID_MASK_WORDS) for partition - * activation and 2 AMO variables for partition deactivation. - */ -static inline AMO_t * -xpc_IPI_init(int index) -{ - AMO_t *amo = xpc_vars->amos_page + index; - - (void)xpc_IPI_receive(amo); /* clear AMO variable */ - return amo; -} - -static inline enum xp_retval -xpc_map_bte_errors(bte_result_t error) -{ - return ((error == BTE_SUCCESS) ? xpSuccess : xpBteCopyError); -} - -/* - * Check to see if there is any channel activity to/from the specified - * partition. - */ -static inline void -xpc_check_for_channel_activity(struct xpc_partition *part) -{ - u64 IPI_amo; - unsigned long irq_flags; - - IPI_amo = xpc_IPI_receive(part->local_IPI_amo_va); - if (IPI_amo == 0) - return; - - spin_lock_irqsave(&part->IPI_lock, irq_flags); - part->local_IPI_amo |= IPI_amo; - spin_unlock_irqrestore(&part->IPI_lock, irq_flags); - - dev_dbg(xpc_chan, "received IPI from partid=%d, IPI_amo=0x%lx\n", - XPC_PARTID(part), IPI_amo); - - xpc_wakeup_channel_mgr(part); -} - #endif /* _DRIVERS_MISC_SGIXP_XPC_H */ diff --git a/drivers/misc/sgi-xp/xpc_channel.c b/drivers/misc/sgi-xp/xpc_channel.c index 9c90c2d55c0..9cd2ebe2a3b 100644 --- a/drivers/misc/sgi-xp/xpc_channel.c +++ b/drivers/misc/sgi-xp/xpc_channel.c @@ -14,536 +14,10 @@ * */ -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/sched.h> -#include <linux/cache.h> -#include <linux/interrupt.h> -#include <linux/mutex.h> -#include <linux/completion.h> -#include <asm/sn/bte.h> -#include <asm/sn/sn_sal.h> +#include <linux/device.h> #include "xpc.h" /* - * Guarantee that the kzalloc'd memory is cacheline aligned. - */ -static void * -xpc_kzalloc_cacheline_aligned(size_t size, gfp_t flags, void **base) -{ - /* see if kzalloc will give us cachline aligned memory by default */ - *base = kzalloc(size, flags); - if (*base == NULL) - return NULL; - - if ((u64)*base == L1_CACHE_ALIGN((u64)*base)) - return *base; - - kfree(*base); - - /* nope, we'll have to do it ourselves */ - *base = kzalloc(size + L1_CACHE_BYTES, flags); - if (*base == NULL) - return NULL; - - return (void *)L1_CACHE_ALIGN((u64)*base); -} - -/* - * Set up the initial values for the XPartition Communication channels. - */ -static void -xpc_initialize_channels(struct xpc_partition *part, short partid) -{ - int ch_number; - struct xpc_channel *ch; - - for (ch_number = 0; ch_number < part->nchannels; ch_number++) { - ch = &part->channels[ch_number]; - - ch->partid = partid; - ch->number = ch_number; - ch->flags = XPC_C_DISCONNECTED; - - ch->local_GP = &part->local_GPs[ch_number]; - ch->local_openclose_args = - &part->local_openclose_args[ch_number]; - - atomic_set(&ch->kthreads_assigned, 0); - atomic_set(&ch->kthreads_idle, 0); - atomic_set(&ch->kthreads_active, 0); - - atomic_set(&ch->references, 0); - atomic_set(&ch->n_to_notify, 0); - - spin_lock_init(&ch->lock); - mutex_init(&ch->msg_to_pull_mutex); - init_completion(&ch->wdisconnect_wait); - - atomic_set(&ch->n_on_msg_allocate_wq, 0); - init_waitqueue_head(&ch->msg_allocate_wq); - init_waitqueue_head(&ch->idle_wq); - } -} - -/* - * Setup the infrastructure necessary to support XPartition Communication - * between the specified remote partition and the local one. - */ -enum xp_retval -xpc_setup_infrastructure(struct xpc_partition *part) -{ - int ret, cpuid; - struct timer_list *timer; - short partid = XPC_PARTID(part); - - /* - * Zero out MOST of the entry for this partition. Only the fields - * starting with `nchannels' will be zeroed. The preceding fields must - * remain `viable' across partition ups and downs, since they may be - * referenced during this memset() operation. - */ - memset(&part->nchannels, 0, sizeof(struct xpc_partition) - - offsetof(struct xpc_partition, nchannels)); - - /* - * Allocate all of the channel structures as a contiguous chunk of - * memory. - */ - part->channels = kzalloc(sizeof(struct xpc_channel) * XPC_NCHANNELS, - GFP_KERNEL); - if (part->channels == NULL) { - dev_err(xpc_chan, "can't get memory for channels\n"); - return xpNoMemory; - } - - part->nchannels = XPC_NCHANNELS; - - /* allocate all the required GET/PUT values */ - - part->local_GPs = xpc_kzalloc_cacheline_aligned(XPC_GP_SIZE, - GFP_KERNEL, - &part->local_GPs_base); - if (part->local_GPs == NULL) { - kfree(part->channels); - part->channels = NULL; - dev_err(xpc_chan, "can't get memory for local get/put " - "values\n"); - return xpNoMemory; - } - - part->remote_GPs = xpc_kzalloc_cacheline_aligned(XPC_GP_SIZE, - GFP_KERNEL, - &part-> - remote_GPs_base); - if (part->remote_GPs == NULL) { - dev_err(xpc_chan, "can't get memory for remote get/put " - "values\n"); - kfree(part->local_GPs_base); - part->local_GPs = NULL; - kfree(part->channels); - part->channels = NULL; - return xpNoMemory; - } - - /* allocate all the required open and close args */ - - part->local_openclose_args = - xpc_kzalloc_cacheline_aligned(XPC_OPENCLOSE_ARGS_SIZE, GFP_KERNEL, - &part->local_openclose_args_base); - if (part->local_openclose_args == NULL) { - dev_err(xpc_chan, "can't get memory for local connect args\n"); - kfree(part->remote_GPs_base); - part->remote_GPs = NULL; - kfree(part->local_GPs_base); - part->local_GPs = NULL; - kfree(part->channels); - part->channels = NULL; - return xpNoMemory; - } - - part->remote_openclose_args = - xpc_kzalloc_cacheline_aligned(XPC_OPENCLOSE_ARGS_SIZE, GFP_KERNEL, - &part->remote_openclose_args_base); - if (part->remote_openclose_args == NULL) { - dev_err(xpc_chan, "can't get memory for remote connect args\n"); - kfree(part->local_openclose_args_base); - part->local_openclose_args = NULL; - kfree(part->remote_GPs_base); - part->remote_GPs = NULL; - kfree(part->local_GPs_base); - part->local_GPs = NULL; - kfree(part->channels); - part->channels = NULL; - return xpNoMemory; - } - - xpc_initialize_channels(part, partid); - - atomic_set(&part->nchannels_active, 0); - atomic_set(&part->nchannels_engaged, 0); - - /* local_IPI_amo were set to 0 by an earlier memset() */ - - /* Initialize this partitions AMO_t structure */ - part->local_IPI_amo_va = xpc_IPI_init(partid); - - spin_lock_init(&part->IPI_lock); - - atomic_set(&part->channel_mgr_requests, 1); - init_waitqueue_head(&part->channel_mgr_wq); - - sprintf(part->IPI_owner, "xpc%02d", partid); - ret = request_irq(SGI_XPC_NOTIFY, xpc_notify_IRQ_handler, IRQF_SHARED, - part->IPI_owner, (void *)(u64)partid); - if (ret != 0) { - dev_err(xpc_chan, "can't register NOTIFY IRQ handler, " - "errno=%d\n", -ret); - kfree(part->remote_openclose_args_base); - part->remote_openclose_args = NULL; - kfree(part->local_openclose_args_base); - part->local_openclose_args = NULL; - kfree(part->remote_GPs_base); - part->remote_GPs = NULL; - kfree(part->local_GPs_base); - part->local_GPs = NULL; - kfree(part->channels); - part->channels = NULL; - return xpLackOfResources; - } - - /* Setup a timer to check for dropped IPIs */ - timer = &part->dropped_IPI_timer; - init_timer(timer); - timer->function = (void (*)(unsigned long))xpc_dropped_IPI_check; - timer->data = (unsigned long)part; - timer->expires = jiffies + XPC_P_DROPPED_IPI_WAIT; - add_timer(timer); - - /* - * With the setting of the partition setup_state to XPC_P_SETUP, we're - * declaring that this partition is ready to go. - */ - part->setup_state = XPC_P_SETUP; - - /* - * Setup the per partition specific variables required by the - * remote partition to establish channel connections with us. - * - * The setting of the magic # indicates that these per partition - * specific variables are ready to be used. - */ - xpc_vars_part[partid].GPs_pa = __pa(part->local_GPs); - xpc_vars_part[partid].openclose_args_pa = - __pa(part->local_openclose_args); - xpc_vars_part[partid].IPI_amo_pa = __pa(part->local_IPI_amo_va); - cpuid = raw_smp_processor_id(); /* any CPU in this partition will do */ - xpc_vars_part[partid].IPI_nasid = cpuid_to_nasid(cpuid); - xpc_vars_part[partid].IPI_phys_cpuid = cpu_physical_id(cpuid); - xpc_vars_part[partid].nchannels = part->nchannels; - xpc_vars_part[partid].magic = XPC_VP_MAGIC1; - - return xpSuccess; -} - -/* - * Create a wrapper that hides the underlying mechanism for pulling a cacheline - * (or multiple cachelines) from a remote partition. - * - * src must be a cacheline aligned physical address on the remote partition. - * dst must be a cacheline aligned virtual address on this partition. - * cnt must be an cacheline sized - */ -static enum xp_retval -xpc_pull_remote_cachelines(struct xpc_partition *part, void *dst, - const void *src, size_t cnt) -{ - bte_result_t bte_ret; - - DBUG_ON((u64)src != L1_CACHE_ALIGN((u64)src)); - DBUG_ON((u64)dst != L1_CACHE_ALIGN((u64)dst)); - DBUG_ON(cnt != L1_CACHE_ALIGN(cnt)); - - if (part->act_state == XPC_P_DEACTIVATING) - return part->reason; - - bte_ret = xp_bte_copy((u64)src, (u64)dst, (u64)cnt, - (BTE_NORMAL | BTE_WACQUIRE), NULL); - if (bte_ret == BTE_SUCCESS) - return xpSuccess; - - dev_dbg(xpc_chan, "xp_bte_copy() from partition %d failed, ret=%d\n", - XPC_PARTID(part), bte_ret); - - return xpc_map_bte_errors(bte_ret); -} - -/* - * Pull the remote per partition specific variables from the specified - * partition. - */ -enum xp_retval -xpc_pull_remote_vars_part(struct xpc_partition *part) -{ - u8 buffer[L1_CACHE_BYTES * 2]; - struct xpc_vars_part *pulled_entry_cacheline = - (struct xpc_vars_part *)L1_CACHE_ALIGN((u64)buffer); - struct xpc_vars_part *pulled_entry; - u64 remote_entry_cacheline_pa, remote_entry_pa; - short partid = XPC_PARTID(part); - enum xp_retval ret; - - /* pull the cacheline that contains the variables we're interested in */ - - DBUG_ON(part->remote_vars_part_pa != - L1_CACHE_ALIGN(part->remote_vars_part_pa)); - DBUG_ON(sizeof(struct xpc_vars_part) != L1_CACHE_BYTES / 2); - - remote_entry_pa = part->remote_vars_part_pa + - sn_partition_id * sizeof(struct xpc_vars_part); - - remote_entry_cacheline_pa = (remote_entry_pa & ~(L1_CACHE_BYTES - 1)); - - pulled_entry = (struct xpc_vars_part *)((u64)pulled_entry_cacheline + - (remote_entry_pa & - (L1_CACHE_BYTES - 1))); - - ret = xpc_pull_remote_cachelines(part, pulled_entry_cacheline, - (void *)remote_entry_cacheline_pa, - L1_CACHE_BYTES); - if (ret != xpSuccess) { - dev_dbg(xpc_chan, "failed to pull XPC vars_part from " - "partition %d, ret=%d\n", partid, ret); - return ret; - } - - /* see if they've been set up yet */ - - if (pulled_entry->magic != XPC_VP_MAGIC1 && - pulled_entry->magic != XPC_VP_MAGIC2) { - - if (pulled_entry->magic != 0) { - dev_dbg(xpc_chan, "partition %d's XPC vars_part for " - "partition %d has bad magic value (=0x%lx)\n", - partid, sn_partition_id, pulled_entry->magic); - return xpBadMagic; - } - - /* they've not been initialized yet */ - return xpRetry; - } - - if (xpc_vars_part[partid].magic == XPC_VP_MAGIC1) { - - /* validate the variables */ - - if (pulled_entry->GPs_pa == 0 || - pulled_entry->openclose_args_pa == 0 || - pulled_entry->IPI_amo_pa == 0) { - - dev_err(xpc_chan, "partition %d's XPC vars_part for " - "partition %d are not valid\n", partid, - sn_partition_id); - return xpInvalidAddress; - } - - /* the variables we imported look to be valid */ - - part->remote_GPs_pa = pulled_entry->GPs_pa; - part->remote_openclose_args_pa = - pulled_entry->openclose_args_pa; - part->remote_IPI_amo_va = - (AMO_t *)__va(pulled_entry->IPI_amo_pa); - part->remote_IPI_nasid = pulled_entry->IPI_nasid; - part->remote_IPI_phys_cpuid = pulled_entry->IPI_phys_cpuid; - - if (part->nchannels > pulled_entry->nchannels) - part->nchannels = pulled_entry->nchannels; - - /* let the other side know that we've pulled their variables */ - - xpc_vars_part[partid].magic = XPC_VP_MAGIC2; - } - - if (pulled_entry->magic == XPC_VP_MAGIC1) - return xpRetry; - - return xpSuccess; -} - -/* - * Get the IPI flags and pull the openclose args and/or remote GPs as needed. - */ -static u64 -xpc_get_IPI_flags(struct xpc_partition *part) -{ - unsigned long irq_flags; - u64 IPI_amo; - enum xp_retval ret; - - /* - * See if there are any IPI flags to be handled. - */ - - spin_lock_irqsave(&part->IPI_lock, irq_flags); - IPI_amo = part->local_IPI_amo; - if (IPI_amo != 0) - part->local_IPI_amo = 0; - - spin_unlock_irqrestore(&part->IPI_lock, irq_flags); - - if (XPC_ANY_OPENCLOSE_IPI_FLAGS_SET(IPI_amo)) { - ret = xpc_pull_remote_cachelines(part, - part->remote_openclose_args, - (void *)part-> - remote_openclose_args_pa, - XPC_OPENCLOSE_ARGS_SIZE); - if (ret != xpSuccess) { - XPC_DEACTIVATE_PARTITION(part, ret); - - dev_dbg(xpc_chan, "failed to pull openclose args from " - "partition %d, ret=%d\n", XPC_PARTID(part), - ret); - - /* don't bother processing IPIs anymore */ - IPI_amo = 0; - } - } - - if (XPC_ANY_MSG_IPI_FLAGS_SET(IPI_amo)) { - ret = xpc_pull_remote_cachelines(part, part->remote_GPs, - (void *)part->remote_GPs_pa, - XPC_GP_SIZE); - if (ret != xpSuccess) { - XPC_DEACTIVATE_PARTITION(part, ret); - - dev_dbg(xpc_chan, "failed to pull GPs from partition " - "%d, ret=%d\n", XPC_PARTID(part), ret); - - /* don't bother processing IPIs anymore */ - IPI_amo = 0; - } - } - - return IPI_amo; -} - -/* - * Allocate the local message queue and the notify queue. - */ -static enum xp_retval -xpc_allocate_local_msgqueue(struct xpc_channel *ch) -{ - unsigned long irq_flags; - int nentries; - size_t nbytes; - - for (nentries = ch->local_nentries; nentries > 0; nentries--) { - - nbytes = nentries * ch->msg_size; - ch->local_msgqueue = xpc_kzalloc_cacheline_aligned(nbytes, - GFP_KERNEL, - &ch->local_msgqueue_base); - if (ch->local_msgqueue == NULL) - continue; - - nbytes = nentries * sizeof(struct xpc_notify); - ch->notify_queue = kzalloc(nbytes, GFP_KERNEL); - if (ch->notify_queue == NULL) { - kfree(ch->local_msgqueue_base); - ch->local_msgqueue = NULL; - continue; - } - - spin_lock_irqsave(&ch->lock, irq_flags); - if (nentries < ch->local_nentries) { - dev_dbg(xpc_chan, "nentries=%d local_nentries=%d, " - "partid=%d, channel=%d\n", nentries, - ch->local_nentries, ch->partid, ch->number); - - ch->local_nentries = nentries; - } - spin_unlock_irqrestore(&ch->lock, irq_flags); - return xpSuccess; - } - - dev_dbg(xpc_chan, "can't get memory for local message queue and notify " - "queue, partid=%d, channel=%d\n", ch->partid, ch->number); - return xpNoMemory; -} - -/* - * Allocate the cached remote message queue. - */ -static enum xp_retval -xpc_allocate_remote_msgqueue(struct xpc_channel *ch) -{ - unsigned long irq_flags; - int nentries; - size_t nbytes; - - DBUG_ON(ch->remote_nentries <= 0); - - for (nentries = ch->remote_nentries; nentries > 0; nentries--) { - - nbytes = nentries * ch->msg_size; - ch->remote_msgqueue = xpc_kzalloc_cacheline_aligned(nbytes, - GFP_KERNEL, - &ch->remote_msgqueue_base); - if (ch->remote_msgqueue == NULL) - continue; - - spin_lock_irqsave(&ch->lock, irq_flags); - if (nentries < ch->remote_nentries) { - dev_dbg(xpc_chan, "nentries=%d remote_nentries=%d, " - "partid=%d, channel=%d\n", nentries, - ch->remote_nentries, ch->partid, ch->number); - - ch->remote_nentries = nentries; - } - spin_unlock_irqrestore(&ch->lock, irq_flags); - return xpSuccess; - } - - dev_dbg(xpc_chan, "can't get memory for cached remote message queue, " - "partid=%d, channel=%d\n", ch->partid, ch->number); - return xpNoMemory; -} - -/* - * Allocate message queues and other stuff associated with a channel. - * - * Note: Assumes all of the channel sizes are filled in. - */ -static enum xp_retval -xpc_allocate_msgqueues(struct xpc_channel *ch) -{ - unsigned long irq_flags; - enum xp_retval ret; - - DBUG_ON(ch->flags & XPC_C_SETUP); - - ret = xpc_allocate_local_msgqueue(ch); - if (ret != xpSuccess) - return ret; - - ret = xpc_allocate_remote_msgqueue(ch); - if (ret != xpSuccess) { - kfree(ch->local_msgqueue_base); - ch->local_msgqueue = NULL; - kfree(ch->notify_queue); - ch->notify_queue = NULL; - return ret; - } - - spin_lock_irqsave(&ch->lock, irq_flags); - ch->flags |= XPC_C_SETUP; - spin_unlock_irqrestore(&ch->lock, irq_flags); - - return xpSuccess; -} - -/* * Process a connect message from a remote partition. * * Note: xpc_process_connect() is expecting to be called with the @@ -565,30 +39,29 @@ xpc_process_connect(struct xpc_channel *ch, unsigned long *irq_flags) if (!(ch->flags & XPC_C_SETUP)) { spin_unlock_irqrestore(&ch->lock, *irq_flags); - ret = xpc_allocate_msgqueues(ch); + ret = xpc_setup_msg_structures(ch); spin_lock_irqsave(&ch->lock, *irq_flags); if (ret != xpSuccess) XPC_DISCONNECT_CHANNEL(ch, ret, irq_flags); + ch->flags |= XPC_C_SETUP; + if (ch->flags & (XPC_C_CONNECTED | XPC_C_DISCONNECTING)) return; - DBUG_ON(!(ch->flags & XPC_C_SETUP)); DBUG_ON(ch->local_msgqueue == NULL); DBUG_ON(ch->remote_msgqueue == NULL); } if (!(ch->flags & XPC_C_OPENREPLY)) { ch->flags |= XPC_C_OPENREPLY; - xpc_IPI_send_openreply(ch, irq_flags); + xpc_send_chctl_openreply(ch, irq_flags); } if (!(ch->flags & XPC_C_ROPENREPLY)) return; - DBUG_ON(ch->remote_msgqueue_pa == 0); - ch->flags = (XPC_C_CONNECTED | XPC_C_SETUP); /* clear all else */ dev_info(xpc_chan, "channel %d to partition %d connected\n", @@ -600,99 +73,6 @@ xpc_process_connect(struct xpc_channel *ch, unsigned long *irq_flags) } /* - * Notify those who wanted to be notified upon delivery of their message. - */ -static void -xpc_notify_senders(struct xpc_channel *ch, enum xp_retval reason, s64 put) -{ - struct xpc_notify *notify; - u8 notify_type; - s64 get = ch->w_remote_GP.get - 1; - - while (++get < put && atomic_read(&ch->n_to_notify) > 0) { - - notify = &ch->notify_queue[get % ch->local_nentries]; - - /* - * See if the notify entry indicates it was associated with - * a message who's sender wants to be notified. It is possible - * that it is, but someone else is doing or has done the - * notification. - */ - notify_type = notify->type; - if (notify_type == 0 || - cmpxchg(¬ify->type, notify_type, 0) != notify_type) { - continue; - } - - DBUG_ON(notify_type != XPC_N_CALL); - - atomic_dec(&ch->n_to_notify); - - if (notify->func != NULL) { - dev_dbg(xpc_chan, "notify->func() called, notify=0x%p, " - "msg_number=%ld, partid=%d, channel=%d\n", - (void *)notify, get, ch->partid, ch->number); - - notify->func(reason, ch->partid, ch->number, - notify->key); - - dev_dbg(xpc_chan, "notify->func() returned, " - "notify=0x%p, msg_number=%ld, partid=%d, " - "channel=%d\n", (void *)notify, get, - ch->partid, ch->number); - } - } -} - -/* - * Free up message queues and other stuff that were allocated for the specified - * channel. - * - * Note: ch->reason and ch->reason_line are left set for debugging purposes, - * they're cleared when XPC_C_DISCONNECTED is cleared. - */ -static void -xpc_free_msgqueues(struct xpc_channel *ch) -{ - DBUG_ON(!spin_is_locked(&ch->lock)); - DBUG_ON(atomic_read(&ch->n_to_notify) != 0); - - ch->remote_msgqueue_pa = 0; - ch->func = NULL; - ch->key = NULL; - ch->msg_size = 0; - ch->local_nentries = 0; - ch->remote_nentries = 0; - ch->kthreads_assigned_limit = 0; - ch->kthreads_idle_limit = 0; - - ch->local_GP->get = 0; - ch->local_GP->put = 0; - ch->remote_GP.get = 0; - ch->remote_GP.put = 0; - ch->w_local_GP.get = 0; - ch->w_local_GP.put = 0; - ch->w_remote_GP.get = 0; - ch->w_remote_GP.put = 0; - ch->next_msg_to_pull = 0; - - if (ch->flags & XPC_C_SETUP) { - ch->flags &= ~XPC_C_SETUP; - - dev_dbg(xpc_chan, "ch->flags=0x%x, partid=%d, channel=%d\n", - ch->flags, ch->partid, ch->number); - - kfree(ch->local_msgqueue_base); - ch->local_msgqueue = NULL; - kfree(ch->remote_msgqueue_base); - ch->remote_msgqueue = NULL; - kfree(ch->notify_queue); - ch->notify_queue = NULL; - } -} - -/* * spin_lock_irqsave() is expected to be held on entry. */ static void @@ -717,9 +97,9 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags) DBUG_ON((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) && !(ch->flags & XPC_C_DISCONNECTINGCALLOUT_MADE)); - if (part->act_state == XPC_P_DEACTIVATING) { + if (part->act_state == XPC_P_AS_DEACTIVATING) { /* can't proceed until the other side disengages from us */ - if (xpc_partition_engaged(1UL << ch->partid)) + if (xpc_partition_engaged(ch->partid)) return; } else { @@ -731,7 +111,7 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags) if (!(ch->flags & XPC_C_CLOSEREPLY)) { ch->flags |= XPC_C_CLOSEREPLY; - xpc_IPI_send_closereply(ch, irq_flags); + xpc_send_chctl_closereply(ch, irq_flags); } if (!(ch->flags & XPC_C_RCLOSEREPLY)) @@ -740,8 +120,8 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags) /* wake those waiting for notify completion */ if (atomic_read(&ch->n_to_notify) > 0) { - /* >>> we do callout while holding ch->lock */ - xpc_notify_senders(ch, ch->reason, ch->w_local_GP.put); + /* we do callout while holding ch->lock, callout can't block */ + xpc_notify_senders_of_disconnect(ch); } /* both sides are disconnected now */ @@ -752,10 +132,24 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags) spin_lock_irqsave(&ch->lock, *irq_flags); } + DBUG_ON(atomic_read(&ch->n_to_notify) != 0); + /* it's now safe to free the channel's message queues */ - xpc_free_msgqueues(ch); + xpc_teardown_msg_structures(ch); - /* mark disconnected, clear all other flags except XPC_C_WDISCONNECT */ + ch->func = NULL; + ch->key = NULL; + ch->entry_size = 0; + ch->local_nentries = 0; + ch->remote_nentries = 0; + ch->kthreads_assigned_limit = 0; + ch->kthreads_idle_limit = 0; + + /* + * Mark the channel disconnected and clear all other flags, including + * XPC_C_SETUP (because of call to xpc_teardown_msg_structures()) but + * not including XPC_C_WDISCONNECT (if it was set). + */ ch->flags = (XPC_C_DISCONNECTED | (ch->flags & XPC_C_WDISCONNECT)); atomic_dec(&part->nchannels_active); @@ -768,15 +162,15 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags) if (ch->flags & XPC_C_WDISCONNECT) { /* we won't lose the CPU since we're holding ch->lock */ complete(&ch->wdisconnect_wait); - } else if (ch->delayed_IPI_flags) { - if (part->act_state != XPC_P_DEACTIVATING) { - /* time to take action on any delayed IPI flags */ - spin_lock(&part->IPI_lock); - XPC_SET_IPI_FLAGS(part->local_IPI_amo, ch->number, - ch->delayed_IPI_flags); - spin_unlock(&part->IPI_lock); + } else if (ch->delayed_chctl_flags) { + if (part->act_state != XPC_P_AS_DEACTIVATING) { + /* time to take action on any delayed chctl flags */ + spin_lock(&part->chctl_lock); + part->chctl.flags[ch->number] |= + ch->delayed_chctl_flags; + spin_unlock(&part->chctl_lock); } - ch->delayed_IPI_flags = 0; + ch->delayed_chctl_flags = 0; } } @@ -784,8 +178,8 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags) * Process a change in the channel's remote connection state. */ static void -xpc_process_openclose_IPI(struct xpc_partition *part, int ch_number, - u8 IPI_flags) +xpc_process_openclose_chctl_flags(struct xpc_partition *part, int ch_number, + u8 chctl_flags) { unsigned long irq_flags; struct xpc_openclose_args *args = @@ -800,24 +194,24 @@ again: if ((ch->flags & XPC_C_DISCONNECTED) && (ch->flags & XPC_C_WDISCONNECT)) { /* - * Delay processing IPI flags until thread waiting disconnect + * Delay processing chctl flags until thread waiting disconnect * has had a chance to see that the channel is disconnected. */ - ch->delayed_IPI_flags |= IPI_flags; + ch->delayed_chctl_flags |= chctl_flags; spin_unlock_irqrestore(&ch->lock, irq_flags); return; } - if (IPI_flags & XPC_IPI_CLOSEREQUEST) { + if (chctl_flags & XPC_CHCTL_CLOSEREQUEST) { - dev_dbg(xpc_chan, "XPC_IPI_CLOSEREQUEST (reason=%d) received " + dev_dbg(xpc_chan, "XPC_CHCTL_CLOSEREQUEST (reason=%d) received " "from partid=%d, channel=%d\n", args->reason, ch->partid, ch->number); /* * If RCLOSEREQUEST is set, we're probably waiting for * RCLOSEREPLY. We should find it and a ROPENREQUEST packed - * with this RCLOSEREQUEST in the IPI_flags. + * with this RCLOSEREQUEST in the chctl_flags. */ if (ch->flags & XPC_C_RCLOSEREQUEST) { @@ -826,8 +220,8 @@ again: DBUG_ON(!(ch->flags & XPC_C_CLOSEREPLY)); DBUG_ON(ch->flags & XPC_C_RCLOSEREPLY); - DBUG_ON(!(IPI_flags & XPC_IPI_CLOSEREPLY)); - IPI_flags &= ~XPC_IPI_CLOSEREPLY; + DBUG_ON(!(chctl_flags & XPC_CHCTL_CLOSEREPLY)); + chctl_flags &= ~XPC_CHCTL_CLOSEREPLY; ch->flags |= XPC_C_RCLOSEREPLY; /* both sides have finished disconnecting */ @@ -837,17 +231,15 @@ again: } if (ch->flags & XPC_C_DISCONNECTED) { - if (!(IPI_flags & XPC_IPI_OPENREQUEST)) { - if ((XPC_GET_IPI_FLAGS(part->local_IPI_amo, - ch_number) & - XPC_IPI_OPENREQUEST)) { - - DBUG_ON(ch->delayed_IPI_flags != 0); - spin_lock(&part->IPI_lock); - XPC_SET_IPI_FLAGS(part->local_IPI_amo, - ch_number, - XPC_IPI_CLOSEREQUEST); - spin_unlock(&part->IPI_lock); + if (!(chctl_flags & XPC_CHCTL_OPENREQUEST)) { + if (part->chctl.flags[ch_number] & + XPC_CHCTL_OPENREQUEST) { + + DBUG_ON(ch->delayed_chctl_flags != 0); + spin_lock(&part->chctl_lock); + part->chctl.flags[ch_number] |= + XPC_CHCTL_CLOSEREQUEST; + spin_unlock(&part->chctl_lock); } spin_unlock_irqrestore(&ch->lock, irq_flags); return; @@ -860,7 +252,7 @@ again: ch->flags |= (XPC_C_CONNECTING | XPC_C_ROPENREQUEST); } - IPI_flags &= ~(XPC_IPI_OPENREQUEST | XPC_IPI_OPENREPLY); + chctl_flags &= ~(XPC_CHCTL_OPENREQUEST | XPC_CHCTL_OPENREPLY); /* * The meaningful CLOSEREQUEST connection state fields are: @@ -878,7 +270,7 @@ again: XPC_DISCONNECT_CHANNEL(ch, reason, &irq_flags); - DBUG_ON(IPI_flags & XPC_IPI_CLOSEREPLY); + DBUG_ON(chctl_flags & XPC_CHCTL_CLOSEREPLY); spin_unlock_irqrestore(&ch->lock, irq_flags); return; } @@ -886,13 +278,13 @@ again: xpc_process_disconnect(ch, &irq_flags); } - if (IPI_flags & XPC_IPI_CLOSEREPLY) { + if (chctl_flags & XPC_CHCTL_CLOSEREPLY) { - dev_dbg(xpc_chan, "XPC_IPI_CLOSEREPLY received from partid=%d," - " channel=%d\n", ch->partid, ch->number); + dev_dbg(xpc_chan, "XPC_CHCTL_CLOSEREPLY received from partid=" + "%d, channel=%d\n", ch->partid, ch->number); if (ch->flags & XPC_C_DISCONNECTED) { - DBUG_ON(part->act_state != XPC_P_DEACTIVATING); + DBUG_ON(part->act_state != XPC_P_AS_DEACTIVATING); spin_unlock_irqrestore(&ch->lock, irq_flags); return; } @@ -900,15 +292,14 @@ again: DBUG_ON(!(ch->flags & XPC_C_CLOSEREQUEST)); if (!(ch->flags & XPC_C_RCLOSEREQUEST)) { - if ((XPC_GET_IPI_FLAGS(part->local_IPI_amo, ch_number) - & XPC_IPI_CLOSEREQUEST)) { - - DBUG_ON(ch->delayed_IPI_flags != 0); - spin_lock(&part->IPI_lock); - XPC_SET_IPI_FLAGS(part->local_IPI_amo, - ch_number, - XPC_IPI_CLOSEREPLY); - spin_unlock(&part->IPI_lock); + if (part->chctl.flags[ch_number] & + XPC_CHCTL_CLOSEREQUEST) { + + DBUG_ON(ch->delayed_chctl_flags != 0); + spin_lock(&part->chctl_lock); + part->chctl.flags[ch_number] |= + XPC_CHCTL_CLOSEREPLY; + spin_unlock(&part->chctl_lock); } spin_unlock_irqrestore(&ch->lock, irq_flags); return; @@ -922,21 +313,21 @@ again: } } - if (IPI_flags & XPC_IPI_OPENREQUEST) { + if (chctl_flags & XPC_CHCTL_OPENREQUEST) { - dev_dbg(xpc_chan, "XPC_IPI_OPENREQUEST (msg_size=%d, " + dev_dbg(xpc_chan, "XPC_CHCTL_OPENREQUEST (entry_size=%d, " "local_nentries=%d) received from partid=%d, " - "channel=%d\n", args->msg_size, args->local_nentries, + "channel=%d\n", args->entry_size, args->local_nentries, ch->partid, ch->number); - if (part->act_state == XPC_P_DEACTIVATING || + if (part->act_state == XPC_P_AS_DEACTIVATING || (ch->flags & XPC_C_ROPENREQUEST)) { spin_unlock_irqrestore(&ch->lock, irq_flags); return; } if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_WDISCONNECT)) { - ch->delayed_IPI_flags |= XPC_IPI_OPENREQUEST; + ch->delayed_chctl_flags |= XPC_CHCTL_OPENREQUEST; spin_unlock_irqrestore(&ch->lock, irq_flags); return; } @@ -947,10 +338,10 @@ again: /* * The meaningful OPENREQUEST connection state fields are: - * msg_size = size of channel's messages in bytes + * entry_size = size of channel's messages in bytes * local_nentries = remote partition's local_nentries */ - if (args->msg_size == 0 || args->local_nentries == 0) { + if (args->entry_size == 0 || args->local_nentries == 0) { /* assume OPENREQUEST was delayed by mistake */ spin_unlock_irqrestore(&ch->lock, irq_flags); return; @@ -960,14 +351,14 @@ again: ch->remote_nentries = args->local_nentries; if (ch->flags & XPC_C_OPENREQUEST) { - if (args->msg_size != ch->msg_size) { + if (args->entry_size != ch->entry_size) { XPC_DISCONNECT_CHANNEL(ch, xpUnequalMsgSizes, &irq_flags); spin_unlock_irqrestore(&ch->lock, irq_flags); return; } } else { - ch->msg_size = args->msg_size; + ch->entry_size = args->entry_size; XPC_SET_REASON(ch, 0, 0); ch->flags &= ~XPC_C_DISCONNECTED; @@ -978,13 +369,13 @@ again: xpc_process_connect(ch, &irq_flags); } - if (IPI_flags & XPC_IPI_OPENREPLY) { + if (chctl_flags & XPC_CHCTL_OPENREPLY) { - dev_dbg(xpc_chan, "XPC_IPI_OPENREPLY (local_msgqueue_pa=0x%lx, " - "local_nentries=%d, remote_nentries=%d) received from " - "partid=%d, channel=%d\n", args->local_msgqueue_pa, - args->local_nentries, args->remote_nentries, - ch->partid, ch->number); + dev_dbg(xpc_chan, "XPC_CHCTL_OPENREPLY (local_msgqueue_pa=" + "0x%lx, local_nentries=%d, remote_nentries=%d) " + "received from partid=%d, channel=%d\n", + args->local_msgqueue_pa, args->local_nentries, + args->remote_nentries, ch->partid, ch->number); if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_DISCONNECTED)) { spin_unlock_irqrestore(&ch->lock, irq_flags); @@ -1012,10 +403,10 @@ again: DBUG_ON(args->remote_nentries == 0); ch->flags |= XPC_C_ROPENREPLY; - ch->remote_msgqueue_pa = args->local_msgqueue_pa; + xpc_save_remote_msgqueue_pa(ch, args->local_msgqueue_pa); if (args->local_nentries < ch->remote_nentries) { - dev_dbg(xpc_chan, "XPC_IPI_OPENREPLY: new " + dev_dbg(xpc_chan, "XPC_CHCTL_OPENREPLY: new " "remote_nentries=%d, old remote_nentries=%d, " "partid=%d, channel=%d\n", args->local_nentries, ch->remote_nentries, @@ -1024,7 +415,7 @@ again: ch->remote_nentries = args->local_nentries; } if (args->remote_nentries < ch->local_nentries) { - dev_dbg(xpc_chan, "XPC_IPI_OPENREPLY: new " + dev_dbg(xpc_chan, "XPC_CHCTL_OPENREPLY: new " "local_nentries=%d, old local_nentries=%d, " "partid=%d, channel=%d\n", args->remote_nentries, ch->local_nentries, @@ -1082,7 +473,7 @@ xpc_connect_channel(struct xpc_channel *ch) ch->local_nentries = registration->nentries; if (ch->flags & XPC_C_ROPENREQUEST) { - if (registration->msg_size != ch->msg_size) { + if (registration->entry_size != ch->entry_size) { /* the local and remote sides aren't the same */ /* @@ -1101,7 +492,7 @@ xpc_connect_channel(struct xpc_channel *ch) return xpUnequalMsgSizes; } } else { - ch->msg_size = registration->msg_size; + ch->entry_size = registration->entry_size; XPC_SET_REASON(ch, 0, 0); ch->flags &= ~XPC_C_DISCONNECTED; @@ -1114,7 +505,7 @@ xpc_connect_channel(struct xpc_channel *ch) /* initiate the connection */ ch->flags |= (XPC_C_OPENREQUEST | XPC_C_CONNECTING); - xpc_IPI_send_openrequest(ch, &irq_flags); + xpc_send_chctl_openrequest(ch, &irq_flags); xpc_process_connect(ch, &irq_flags); @@ -1123,152 +514,16 @@ xpc_connect_channel(struct xpc_channel *ch) return xpSuccess; } -/* - * Clear some of the msg flags in the local message queue. - */ -static inline void -xpc_clear_local_msgqueue_flags(struct xpc_channel *ch) -{ - struct xpc_msg *msg; - s64 get; - - get = ch->w_remote_GP.get; - do { - msg = (struct xpc_msg *)((u64)ch->local_msgqueue + - (get % ch->local_nentries) * - ch->msg_size); - msg->flags = 0; - } while (++get < ch->remote_GP.get); -} - -/* - * Clear some of the msg flags in the remote message queue. - */ -static inline void -xpc_clear_remote_msgqueue_flags(struct xpc_channel *ch) -{ - struct xpc_msg *msg; - s64 put; - - put = ch->w_remote_GP.put; - do { - msg = (struct xpc_msg *)((u64)ch->remote_msgqueue + - (put % ch->remote_nentries) * - ch->msg_size); - msg->flags = 0; - } while (++put < ch->remote_GP.put); -} - -static void -xpc_process_msg_IPI(struct xpc_partition *part, int ch_number) -{ - struct xpc_channel *ch = &part->channels[ch_number]; - int nmsgs_sent; - - ch->remote_GP = part->remote_GPs[ch_number]; - - /* See what, if anything, has changed for each connected channel */ - - xpc_msgqueue_ref(ch); - - if (ch->w_remote_GP.get == ch->remote_GP.get && - ch->w_remote_GP.put == ch->remote_GP.put) { - /* nothing changed since GPs were last pulled */ - xpc_msgqueue_deref(ch); - return; - } - - if (!(ch->flags & XPC_C_CONNECTED)) { - xpc_msgqueue_deref(ch); - return; - } - - /* - * First check to see if messages recently sent by us have been - * received by the other side. (The remote GET value will have - * changed since we last looked at it.) - */ - - if (ch->w_remote_GP.get != ch->remote_GP.get) { - - /* - * We need to notify any senders that want to be notified - * that their sent messages have been received by their - * intended recipients. We need to do this before updating - * w_remote_GP.get so that we don't allocate the same message - * queue entries prematurely (see xpc_allocate_msg()). - */ - if (atomic_read(&ch->n_to_notify) > 0) { - /* - * Notify senders that messages sent have been - * received and delivered by the other side. - */ - xpc_notify_senders(ch, xpMsgDelivered, - ch->remote_GP.get); - } - - /* - * Clear msg->flags in previously sent messages, so that - * they're ready for xpc_allocate_msg(). - */ - xpc_clear_local_msgqueue_flags(ch); - - ch->w_remote_GP.get = ch->remote_GP.get; - - dev_dbg(xpc_chan, "w_remote_GP.get changed to %ld, partid=%d, " - "channel=%d\n", ch->w_remote_GP.get, ch->partid, - ch->number); - - /* - * If anyone was waiting for message queue entries to become - * available, wake them up. - */ - if (atomic_read(&ch->n_on_msg_allocate_wq) > 0) - wake_up(&ch->msg_allocate_wq); - } - - /* - * Now check for newly sent messages by the other side. (The remote - * PUT value will have changed since we last looked at it.) - */ - - if (ch->w_remote_GP.put != ch->remote_GP.put) { - /* - * Clear msg->flags in previously received messages, so that - * they're ready for xpc_get_deliverable_msg(). - */ - xpc_clear_remote_msgqueue_flags(ch); - - ch->w_remote_GP.put = ch->remote_GP.put; - - dev_dbg(xpc_chan, "w_remote_GP.put changed to %ld, partid=%d, " - "channel=%d\n", ch->w_remote_GP.put, ch->partid, - ch->number); - - nmsgs_sent = ch->w_remote_GP.put - ch->w_local_GP.get; - if (nmsgs_sent > 0) { - dev_dbg(xpc_chan, "msgs waiting to be copied and " - "delivered=%d, partid=%d, channel=%d\n", - nmsgs_sent, ch->partid, ch->number); - - if (ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) - xpc_activate_kthreads(ch, nmsgs_sent); - } - } - - xpc_msgqueue_deref(ch); -} - void -xpc_process_channel_activity(struct xpc_partition *part) +xpc_process_sent_chctl_flags(struct xpc_partition *part) { unsigned long irq_flags; - u64 IPI_amo, IPI_flags; + union xpc_channel_ctl_flags chctl; struct xpc_channel *ch; int ch_number; u32 ch_flags; - IPI_amo = xpc_get_IPI_flags(part); + chctl.all_flags = xpc_get_chctl_all_flags(part); /* * Initiate channel connections for registered channels. @@ -1281,14 +536,14 @@ xpc_process_channel_activity(struct xpc_partition *part) ch = &part->channels[ch_number]; /* - * Process any open or close related IPI flags, and then deal + * Process any open or close related chctl flags, and then deal * with connecting or disconnecting the channel as required. */ - IPI_flags = XPC_GET_IPI_FLAGS(IPI_amo, ch_number); - - if (XPC_ANY_OPENCLOSE_IPI_FLAGS_SET(IPI_flags)) - xpc_process_openclose_IPI(part, ch_number, IPI_flags); + if (chctl.flags[ch_number] & XPC_OPENCLOSE_CHCTL_FLAGS) { + xpc_process_openclose_chctl_flags(part, ch_number, + chctl.flags[ch_number]); + } ch_flags = ch->flags; /* need an atomic snapshot of flags */ @@ -1299,7 +554,7 @@ xpc_process_channel_activity(struct xpc_partition *part) continue; } - if (part->act_state == XPC_P_DEACTIVATING) + if (part->act_state == XPC_P_AS_DEACTIVATING) continue; if (!(ch_flags & XPC_C_CONNECTED)) { @@ -1315,13 +570,13 @@ xpc_process_channel_activity(struct xpc_partition *part) } /* - * Process any message related IPI flags, this may involve the - * activation of kthreads to deliver any pending messages sent - * from the other partition. + * Process any message related chctl flags, this may involve + * the activation of kthreads to deliver any pending messages + * sent from the other partition. */ - if (XPC_ANY_MSG_IPI_FLAGS_SET(IPI_flags)) - xpc_process_msg_IPI(part, ch_number); + if (chctl.flags[ch_number] & XPC_MSG_CHCTL_FLAGS) + xpc_process_msg_chctl_flags(part, ch_number); } } @@ -1369,59 +624,6 @@ xpc_partition_going_down(struct xpc_partition *part, enum xp_retval reason) } /* - * Teardown the infrastructure necessary to support XPartition Communication - * between the specified remote partition and the local one. - */ -void -xpc_teardown_infrastructure(struct xpc_partition *part) -{ - short partid = XPC_PARTID(part); - - /* - * We start off by making this partition inaccessible to local - * processes by marking it as no longer setup. Then we make it - * inaccessible to remote processes by clearing the XPC per partition - * specific variable's magic # (which indicates that these variables - * are no longer valid) and by ignoring all XPC notify IPIs sent to - * this partition. - */ - - DBUG_ON(atomic_read(&part->nchannels_engaged) != 0); - DBUG_ON(atomic_read(&part->nchannels_active) != 0); - DBUG_ON(part->setup_state != XPC_P_SETUP); - part->setup_state = XPC_P_WTEARDOWN; - - xpc_vars_part[partid].magic = 0; - - free_irq(SGI_XPC_NOTIFY, (void *)(u64)partid); - - /* - * Before proceeding with the teardown we have to wait until all - * existing references cease. - */ - wait_event(part->teardown_wq, (atomic_read(&part->references) == 0)); - - /* now we can begin tearing down the infrastructure */ - - part->setup_state = XPC_P_TORNDOWN; - - /* in case we've still got outstanding timers registered... */ - del_timer_sync(&part->dropped_IPI_timer); - - kfree(part->remote_openclose_args_base); - part->remote_openclose_args = NULL; - kfree(part->local_openclose_args_base); - part->local_openclose_args = NULL; - kfree(part->remote_GPs_base); - part->remote_GPs = NULL; - kfree(part->local_GPs_base); - part->local_GPs = NULL; - kfree(part->channels); - part->channels = NULL; - part->local_IPI_amo_va = NULL; -} - -/* * Called by XP at the time of channel connection registration to cause * XPC to establish connections to all currently active partitions. */ @@ -1432,9 +634,9 @@ xpc_initiate_connect(int ch_number) struct xpc_partition *part; struct xpc_channel *ch; - DBUG_ON(ch_number < 0 || ch_number >= XPC_NCHANNELS); + DBUG_ON(ch_number < 0 || ch_number >= XPC_MAX_NCHANNELS); - for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) { + for (partid = 0; partid < xp_max_npartitions; partid++) { part = &xpc_partitions[partid]; if (xpc_part_ref(part)) { @@ -1488,10 +690,10 @@ xpc_initiate_disconnect(int ch_number) struct xpc_partition *part; struct xpc_channel *ch; - DBUG_ON(ch_number < 0 || ch_number >= XPC_NCHANNELS); + DBUG_ON(ch_number < 0 || ch_number >= XPC_MAX_NCHANNELS); /* initiate the channel disconnect for every active partition */ - for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) { + for (partid = 0; partid < xp_max_npartitions; partid++) { part = &xpc_partitions[partid]; if (xpc_part_ref(part)) { @@ -1550,7 +752,7 @@ xpc_disconnect_channel(const int line, struct xpc_channel *ch, XPC_C_ROPENREQUEST | XPC_C_ROPENREPLY | XPC_C_CONNECTING | XPC_C_CONNECTED); - xpc_IPI_send_closerequest(ch, irq_flags); + xpc_send_chctl_closerequest(ch, irq_flags); if (channel_was_connected) ch->flags |= XPC_C_WASCONNECTED; @@ -1598,7 +800,7 @@ xpc_disconnect_callout(struct xpc_channel *ch, enum xp_retval reason) * Wait for a message entry to become available for the specified channel, * but don't wait any longer than 1 jiffy. */ -static enum xp_retval +enum xp_retval xpc_allocate_msg_wait(struct xpc_channel *ch) { enum xp_retval ret; @@ -1625,315 +827,54 @@ xpc_allocate_msg_wait(struct xpc_channel *ch) } /* - * Allocate an entry for a message from the message queue associated with the - * specified channel. - */ -static enum xp_retval -xpc_allocate_msg(struct xpc_channel *ch, u32 flags, - struct xpc_msg **address_of_msg) -{ - struct xpc_msg *msg; - enum xp_retval ret; - s64 put; - - /* this reference will be dropped in xpc_send_msg() */ - xpc_msgqueue_ref(ch); - - if (ch->flags & XPC_C_DISCONNECTING) { - xpc_msgqueue_deref(ch); - return ch->reason; - } - if (!(ch->flags & XPC_C_CONNECTED)) { - xpc_msgqueue_deref(ch); - return xpNotConnected; - } - - /* - * Get the next available message entry from the local message queue. - * If none are available, we'll make sure that we grab the latest - * GP values. - */ - ret = xpTimeout; - - while (1) { - - put = ch->w_local_GP.put; - rmb(); /* guarantee that .put loads before .get */ - if (put - ch->w_remote_GP.get < ch->local_nentries) { - - /* There are available message entries. We need to try - * to secure one for ourselves. We'll do this by trying - * to increment w_local_GP.put as long as someone else - * doesn't beat us to it. If they do, we'll have to - * try again. - */ - if (cmpxchg(&ch->w_local_GP.put, put, put + 1) == put) { - /* we got the entry referenced by put */ - break; - } - continue; /* try again */ - } - - /* - * There aren't any available msg entries at this time. - * - * In waiting for a message entry to become available, - * we set a timeout in case the other side is not - * sending completion IPIs. This lets us fake an IPI - * that will cause the IPI handler to fetch the latest - * GP values as if an IPI was sent by the other side. - */ - if (ret == xpTimeout) - xpc_IPI_send_local_msgrequest(ch); - - if (flags & XPC_NOWAIT) { - xpc_msgqueue_deref(ch); - return xpNoWait; - } - - ret = xpc_allocate_msg_wait(ch); - if (ret != xpInterrupted && ret != xpTimeout) { - xpc_msgqueue_deref(ch); - return ret; - } - } - - /* get the message's address and initialize it */ - msg = (struct xpc_msg *)((u64)ch->local_msgqueue + - (put % ch->local_nentries) * ch->msg_size); - - DBUG_ON(msg->flags != 0); - msg->number = put; - - dev_dbg(xpc_chan, "w_local_GP.put changed to %ld; msg=0x%p, " - "msg_number=%ld, partid=%d, channel=%d\n", put + 1, - (void *)msg, msg->number, ch->partid, ch->number); - - *address_of_msg = msg; - - return xpSuccess; -} - -/* - * Allocate an entry for a message from the message queue associated with the - * specified channel. NOTE that this routine can sleep waiting for a message - * entry to become available. To not sleep, pass in the XPC_NOWAIT flag. + * Send a message that contains the user's payload on the specified channel + * connected to the specified partition. * - * Arguments: + * NOTE that this routine can sleep waiting for a message entry to become + * available. To not sleep, pass in the XPC_NOWAIT flag. * - * partid - ID of partition to which the channel is connected. - * ch_number - channel #. - * flags - see xpc.h for valid flags. - * payload - address of the allocated payload area pointer (filled in on - * return) in which the user-defined message is constructed. - */ -enum xp_retval -xpc_initiate_allocate(short partid, int ch_number, u32 flags, void **payload) -{ - struct xpc_partition *part = &xpc_partitions[partid]; - enum xp_retval ret = xpUnknownReason; - struct xpc_msg *msg = NULL; - - DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS); - DBUG_ON(ch_number < 0 || ch_number >= part->nchannels); - - *payload = NULL; - - if (xpc_part_ref(part)) { - ret = xpc_allocate_msg(&part->channels[ch_number], flags, &msg); - xpc_part_deref(part); - - if (msg != NULL) - *payload = &msg->payload; - } - - return ret; -} - -/* - * Now we actually send the messages that are ready to be sent by advancing - * the local message queue's Put value and then send an IPI to the recipient - * partition. - */ -static void -xpc_send_msgs(struct xpc_channel *ch, s64 initial_put) -{ - struct xpc_msg *msg; - s64 put = initial_put + 1; - int send_IPI = 0; - - while (1) { - - while (1) { - if (put == ch->w_local_GP.put) - break; - - msg = (struct xpc_msg *)((u64)ch->local_msgqueue + - (put % ch->local_nentries) * - ch->msg_size); - - if (!(msg->flags & XPC_M_READY)) - break; - - put++; - } - - if (put == initial_put) { - /* nothing's changed */ - break; - } - - if (cmpxchg_rel(&ch->local_GP->put, initial_put, put) != - initial_put) { - /* someone else beat us to it */ - DBUG_ON(ch->local_GP->put < initial_put); - break; - } - - /* we just set the new value of local_GP->put */ - - dev_dbg(xpc_chan, "local_GP->put changed to %ld, partid=%d, " - "channel=%d\n", put, ch->partid, ch->number); - - send_IPI = 1; - - /* - * We need to ensure that the message referenced by - * local_GP->put is not XPC_M_READY or that local_GP->put - * equals w_local_GP.put, so we'll go have a look. - */ - initial_put = put; - } - - if (send_IPI) - xpc_IPI_send_msgrequest(ch); -} - -/* - * Common code that does the actual sending of the message by advancing the - * local message queue's Put value and sends an IPI to the partition the - * message is being sent to. - */ -static enum xp_retval -xpc_send_msg(struct xpc_channel *ch, struct xpc_msg *msg, u8 notify_type, - xpc_notify_func func, void *key) -{ - enum xp_retval ret = xpSuccess; - struct xpc_notify *notify = notify; - s64 put, msg_number = msg->number; - - DBUG_ON(notify_type == XPC_N_CALL && func == NULL); - DBUG_ON((((u64)msg - (u64)ch->local_msgqueue) / ch->msg_size) != - msg_number % ch->local_nentries); - DBUG_ON(msg->flags & XPC_M_READY); - - if (ch->flags & XPC_C_DISCONNECTING) { - /* drop the reference grabbed in xpc_allocate_msg() */ - xpc_msgqueue_deref(ch); - return ch->reason; - } - - if (notify_type != 0) { - /* - * Tell the remote side to send an ACK interrupt when the - * message has been delivered. - */ - msg->flags |= XPC_M_INTERRUPT; - - atomic_inc(&ch->n_to_notify); - - notify = &ch->notify_queue[msg_number % ch->local_nentries]; - notify->func = func; - notify->key = key; - notify->type = notify_type; - - /* >>> is a mb() needed here? */ - - if (ch->flags & XPC_C_DISCONNECTING) { - /* - * An error occurred between our last error check and - * this one. We will try to clear the type field from - * the notify entry. If we succeed then - * xpc_disconnect_channel() didn't already process - * the notify entry. - */ - if (cmpxchg(¬ify->type, notify_type, 0) == - notify_type) { - atomic_dec(&ch->n_to_notify); - ret = ch->reason; - } - - /* drop the reference grabbed in xpc_allocate_msg() */ - xpc_msgqueue_deref(ch); - return ret; - } - } - - msg->flags |= XPC_M_READY; - - /* - * The preceding store of msg->flags must occur before the following - * load of ch->local_GP->put. - */ - mb(); - - /* see if the message is next in line to be sent, if so send it */ - - put = ch->local_GP->put; - if (put == msg_number) - xpc_send_msgs(ch, put); - - /* drop the reference grabbed in xpc_allocate_msg() */ - xpc_msgqueue_deref(ch); - return ret; -} - -/* - * Send a message previously allocated using xpc_initiate_allocate() on the - * specified channel connected to the specified partition. - * - * This routine will not wait for the message to be received, nor will - * notification be given when it does happen. Once this routine has returned - * the message entry allocated via xpc_initiate_allocate() is no longer - * accessable to the caller. - * - * This routine, although called by users, does not call xpc_part_ref() to - * ensure that the partition infrastructure is in place. It relies on the - * fact that we called xpc_msgqueue_ref() in xpc_allocate_msg(). + * Once sent, this routine will not wait for the message to be received, nor + * will notification be given when it does happen. * * Arguments: * * partid - ID of partition to which the channel is connected. * ch_number - channel # to send message on. - * payload - pointer to the payload area allocated via - * xpc_initiate_allocate(). + * flags - see xp.h for valid flags. + * payload - pointer to the payload which is to be sent. + * payload_size - size of the payload in bytes. */ enum xp_retval -xpc_initiate_send(short partid, int ch_number, void *payload) +xpc_initiate_send(short partid, int ch_number, u32 flags, void *payload, + u16 payload_size) { struct xpc_partition *part = &xpc_partitions[partid]; - struct xpc_msg *msg = XPC_MSG_ADDRESS(payload); - enum xp_retval ret; + enum xp_retval ret = xpUnknownReason; - dev_dbg(xpc_chan, "msg=0x%p, partid=%d, channel=%d\n", (void *)msg, + dev_dbg(xpc_chan, "payload=0x%p, partid=%d, channel=%d\n", payload, partid, ch_number); - DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS); + DBUG_ON(partid < 0 || partid >= xp_max_npartitions); DBUG_ON(ch_number < 0 || ch_number >= part->nchannels); - DBUG_ON(msg == NULL); + DBUG_ON(payload == NULL); - ret = xpc_send_msg(&part->channels[ch_number], msg, 0, NULL, NULL); + if (xpc_part_ref(part)) { + ret = xpc_send_payload(&part->channels[ch_number], flags, + payload, payload_size, 0, NULL, NULL); + xpc_part_deref(part); + } return ret; } /* - * Send a message previously allocated using xpc_initiate_allocate on the - * specified channel connected to the specified partition. + * Send a message that contains the user's payload on the specified channel + * connected to the specified partition. * - * This routine will not wait for the message to be sent. Once this routine - * has returned the message entry allocated via xpc_initiate_allocate() is no - * longer accessable to the caller. + * NOTE that this routine can sleep waiting for a message entry to become + * available. To not sleep, pass in the XPC_NOWAIT flag. + * + * This routine will not wait for the message to be sent or received. * * Once the remote end of the channel has received the message, the function * passed as an argument to xpc_initiate_send_notify() will be called. This @@ -1943,158 +884,51 @@ xpc_initiate_send(short partid, int ch_number, void *payload) * * If this routine returns an error, the caller's function will NOT be called. * - * This routine, although called by users, does not call xpc_part_ref() to - * ensure that the partition infrastructure is in place. It relies on the - * fact that we called xpc_msgqueue_ref() in xpc_allocate_msg(). - * * Arguments: * * partid - ID of partition to which the channel is connected. * ch_number - channel # to send message on. - * payload - pointer to the payload area allocated via - * xpc_initiate_allocate(). + * flags - see xp.h for valid flags. + * payload - pointer to the payload which is to be sent. + * payload_size - size of the payload in bytes. * func - function to call with asynchronous notification of message * receipt. THIS FUNCTION MUST BE NON-BLOCKING. * key - user-defined key to be passed to the function when it's called. */ enum xp_retval -xpc_initiate_send_notify(short partid, int ch_number, void *payload, - xpc_notify_func func, void *key) +xpc_initiate_send_notify(short partid, int ch_number, u32 flags, void *payload, + u16 payload_size, xpc_notify_func func, void *key) { struct xpc_partition *part = &xpc_partitions[partid]; - struct xpc_msg *msg = XPC_MSG_ADDRESS(payload); - enum xp_retval ret; + enum xp_retval ret = xpUnknownReason; - dev_dbg(xpc_chan, "msg=0x%p, partid=%d, channel=%d\n", (void *)msg, + dev_dbg(xpc_chan, "payload=0x%p, partid=%d, channel=%d\n", payload, partid, ch_number); - DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS); + DBUG_ON(partid < 0 || partid >= xp_max_npartitions); DBUG_ON(ch_number < 0 || ch_number >= part->nchannels); - DBUG_ON(msg == NULL); + DBUG_ON(payload == NULL); DBUG_ON(func == NULL); - ret = xpc_send_msg(&part->channels[ch_number], msg, XPC_N_CALL, - func, key); - return ret; -} - -static struct xpc_msg * -xpc_pull_remote_msg(struct xpc_channel *ch, s64 get) -{ - struct xpc_partition *part = &xpc_partitions[ch->partid]; - struct xpc_msg *remote_msg, *msg; - u32 msg_index, nmsgs; - u64 msg_offset; - enum xp_retval ret; - - if (mutex_lock_interruptible(&ch->msg_to_pull_mutex) != 0) { - /* we were interrupted by a signal */ - return NULL; - } - - while (get >= ch->next_msg_to_pull) { - - /* pull as many messages as are ready and able to be pulled */ - - msg_index = ch->next_msg_to_pull % ch->remote_nentries; - - DBUG_ON(ch->next_msg_to_pull >= ch->w_remote_GP.put); - nmsgs = ch->w_remote_GP.put - ch->next_msg_to_pull; - if (msg_index + nmsgs > ch->remote_nentries) { - /* ignore the ones that wrap the msg queue for now */ - nmsgs = ch->remote_nentries - msg_index; - } - - msg_offset = msg_index * ch->msg_size; - msg = (struct xpc_msg *)((u64)ch->remote_msgqueue + msg_offset); - remote_msg = (struct xpc_msg *)(ch->remote_msgqueue_pa + - msg_offset); - - ret = xpc_pull_remote_cachelines(part, msg, remote_msg, - nmsgs * ch->msg_size); - if (ret != xpSuccess) { - - dev_dbg(xpc_chan, "failed to pull %d msgs starting with" - " msg %ld from partition %d, channel=%d, " - "ret=%d\n", nmsgs, ch->next_msg_to_pull, - ch->partid, ch->number, ret); - - XPC_DEACTIVATE_PARTITION(part, ret); - - mutex_unlock(&ch->msg_to_pull_mutex); - return NULL; - } - - ch->next_msg_to_pull += nmsgs; + if (xpc_part_ref(part)) { + ret = xpc_send_payload(&part->channels[ch_number], flags, + payload, payload_size, XPC_N_CALL, func, + key); + xpc_part_deref(part); } - - mutex_unlock(&ch->msg_to_pull_mutex); - - /* return the message we were looking for */ - msg_offset = (get % ch->remote_nentries) * ch->msg_size; - msg = (struct xpc_msg *)((u64)ch->remote_msgqueue + msg_offset); - - return msg; -} - -/* - * Get a message to be delivered. - */ -static struct xpc_msg * -xpc_get_deliverable_msg(struct xpc_channel *ch) -{ - struct xpc_msg *msg = NULL; - s64 get; - - do { - if (ch->flags & XPC_C_DISCONNECTING) - break; - - get = ch->w_local_GP.get; - rmb(); /* guarantee that .get loads before .put */ - if (get == ch->w_remote_GP.put) - break; - - /* There are messages waiting to be pulled and delivered. - * We need to try to secure one for ourselves. We'll do this - * by trying to increment w_local_GP.get and hope that no one - * else beats us to it. If they do, we'll we'll simply have - * to try again for the next one. - */ - - if (cmpxchg(&ch->w_local_GP.get, get, get + 1) == get) { - /* we got the entry referenced by get */ - - dev_dbg(xpc_chan, "w_local_GP.get changed to %ld, " - "partid=%d, channel=%d\n", get + 1, - ch->partid, ch->number); - - /* pull the message from the remote partition */ - - msg = xpc_pull_remote_msg(ch, get); - - DBUG_ON(msg != NULL && msg->number != get); - DBUG_ON(msg != NULL && (msg->flags & XPC_M_DONE)); - DBUG_ON(msg != NULL && !(msg->flags & XPC_M_READY)); - - break; - } - - } while (1); - - return msg; + return ret; } /* - * Deliver a message to its intended recipient. + * Deliver a message's payload to its intended recipient. */ void -xpc_deliver_msg(struct xpc_channel *ch) +xpc_deliver_payload(struct xpc_channel *ch) { - struct xpc_msg *msg; + void *payload; - msg = xpc_get_deliverable_msg(ch); - if (msg != NULL) { + payload = xpc_get_deliverable_payload(ch); + if (payload != NULL) { /* * This ref is taken to protect the payload itself from being @@ -2106,18 +940,16 @@ xpc_deliver_msg(struct xpc_channel *ch) atomic_inc(&ch->kthreads_active); if (ch->func != NULL) { - dev_dbg(xpc_chan, "ch->func() called, msg=0x%p, " - "msg_number=%ld, partid=%d, channel=%d\n", - (void *)msg, msg->number, ch->partid, + dev_dbg(xpc_chan, "ch->func() called, payload=0x%p " + "partid=%d channel=%d\n", payload, ch->partid, ch->number); /* deliver the message to its intended recipient */ - ch->func(xpMsgReceived, ch->partid, ch->number, - &msg->payload, ch->key); + ch->func(xpMsgReceived, ch->partid, ch->number, payload, + ch->key); - dev_dbg(xpc_chan, "ch->func() returned, msg=0x%p, " - "msg_number=%ld, partid=%d, channel=%d\n", - (void *)msg, msg->number, ch->partid, + dev_dbg(xpc_chan, "ch->func() returned, payload=0x%p " + "partid=%d channel=%d\n", payload, ch->partid, ch->number); } @@ -2126,118 +958,31 @@ xpc_deliver_msg(struct xpc_channel *ch) } /* - * Now we actually acknowledge the messages that have been delivered and ack'd - * by advancing the cached remote message queue's Get value and if requested - * send an IPI to the message sender's partition. - */ -static void -xpc_acknowledge_msgs(struct xpc_channel *ch, s64 initial_get, u8 msg_flags) -{ - struct xpc_msg *msg; - s64 get = initial_get + 1; - int send_IPI = 0; - - while (1) { - - while (1) { - if (get == ch->w_local_GP.get) - break; - - msg = (struct xpc_msg *)((u64)ch->remote_msgqueue + - (get % ch->remote_nentries) * - ch->msg_size); - - if (!(msg->flags & XPC_M_DONE)) - break; - - msg_flags |= msg->flags; - get++; - } - - if (get == initial_get) { - /* nothing's changed */ - break; - } - - if (cmpxchg_rel(&ch->local_GP->get, initial_get, get) != - initial_get) { - /* someone else beat us to it */ - DBUG_ON(ch->local_GP->get <= initial_get); - break; - } - - /* we just set the new value of local_GP->get */ - - dev_dbg(xpc_chan, "local_GP->get changed to %ld, partid=%d, " - "channel=%d\n", get, ch->partid, ch->number); - - send_IPI = (msg_flags & XPC_M_INTERRUPT); - - /* - * We need to ensure that the message referenced by - * local_GP->get is not XPC_M_DONE or that local_GP->get - * equals w_local_GP.get, so we'll go have a look. - */ - initial_get = get; - } - - if (send_IPI) - xpc_IPI_send_msgrequest(ch); -} - -/* - * Acknowledge receipt of a delivered message. - * - * If a message has XPC_M_INTERRUPT set, send an interrupt to the partition - * that sent the message. + * Acknowledge receipt of a delivered message's payload. * * This function, although called by users, does not call xpc_part_ref() to * ensure that the partition infrastructure is in place. It relies on the - * fact that we called xpc_msgqueue_ref() in xpc_deliver_msg(). + * fact that we called xpc_msgqueue_ref() in xpc_deliver_payload(). * * Arguments: * * partid - ID of partition to which the channel is connected. * ch_number - channel # message received on. * payload - pointer to the payload area allocated via - * xpc_initiate_allocate(). + * xpc_initiate_send() or xpc_initiate_send_notify(). */ void xpc_initiate_received(short partid, int ch_number, void *payload) { struct xpc_partition *part = &xpc_partitions[partid]; struct xpc_channel *ch; - struct xpc_msg *msg = XPC_MSG_ADDRESS(payload); - s64 get, msg_number = msg->number; - DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS); + DBUG_ON(partid < 0 || partid >= xp_max_npartitions); DBUG_ON(ch_number < 0 || ch_number >= part->nchannels); ch = &part->channels[ch_number]; + xpc_received_payload(ch, payload); - dev_dbg(xpc_chan, "msg=0x%p, msg_number=%ld, partid=%d, channel=%d\n", - (void *)msg, msg_number, ch->partid, ch->number); - - DBUG_ON((((u64)msg - (u64)ch->remote_msgqueue) / ch->msg_size) != - msg_number % ch->remote_nentries); - DBUG_ON(msg->flags & XPC_M_DONE); - - msg->flags |= XPC_M_DONE; - - /* - * The preceding store of msg->flags must occur before the following - * load of ch->local_GP->get. - */ - mb(); - - /* - * See if this message is next in line to be acknowledged as having - * been delivered. - */ - get = ch->local_GP->get; - if (get == msg_number) - xpc_acknowledge_msgs(ch, get, msg->flags); - - /* the call to xpc_msgqueue_ref() was done by xpc_deliver_msg() */ + /* the call to xpc_msgqueue_ref() was done by xpc_deliver_payload() */ xpc_msgqueue_deref(ch); } diff --git a/drivers/misc/sgi-xp/xpc_main.c b/drivers/misc/sgi-xp/xpc_main.c index 579b01ff82d..46325fc8481 100644 --- a/drivers/misc/sgi-xp/xpc_main.c +++ b/drivers/misc/sgi-xp/xpc_main.c @@ -25,37 +25,31 @@ * * Caveats: * - * . We currently have no way to determine which nasid an IPI came - * from. Thus, xpc_IPI_send() does a remote AMO write followed by - * an IPI. The AMO indicates where data is to be pulled from, so - * after the IPI arrives, the remote partition checks the AMO word. - * The IPI can actually arrive before the AMO however, so other code - * must periodically check for this case. Also, remote AMO operations - * do not reliably time out. Thus we do a remote PIO read solely to - * know whether the remote partition is down and whether we should - * stop sending IPIs to it. This remote PIO read operation is set up - * in a special nofault region so SAL knows to ignore (and cleanup) - * any errors due to the remote AMO write, PIO read, and/or PIO - * write operations. + * . Currently on sn2, we have no way to determine which nasid an IRQ + * came from. Thus, xpc_send_IRQ_sn2() does a remote amo write + * followed by an IPI. The amo indicates where data is to be pulled + * from, so after the IPI arrives, the remote partition checks the amo + * word. The IPI can actually arrive before the amo however, so other + * code must periodically check for this case. Also, remote amo + * operations do not reliably time out. Thus we do a remote PIO read + * solely to know whether the remote partition is down and whether we + * should stop sending IPIs to it. This remote PIO read operation is + * set up in a special nofault region so SAL knows to ignore (and + * cleanup) any errors due to the remote amo write, PIO read, and/or + * PIO write operations. * * If/when new hardware solves this IPI problem, we should abandon * the current approach. * */ -#include <linux/kernel.h> #include <linux/module.h> -#include <linux/init.h> -#include <linux/cache.h> -#include <linux/interrupt.h> +#include <linux/sysctl.h> +#include <linux/device.h> #include <linux/delay.h> #include <linux/reboot.h> -#include <linux/completion.h> #include <linux/kdebug.h> #include <linux/kthread.h> -#include <linux/uaccess.h> -#include <asm/sn/intr.h> -#include <asm/sn/sn_sal.h> #include "xpc.h" /* define two XPC debug device structures to be used with dev_dbg() et al */ @@ -89,9 +83,9 @@ static int xpc_hb_check_interval = XPC_HB_CHECK_DEFAULT_INTERVAL; static int xpc_hb_check_min_interval = 10; static int xpc_hb_check_max_interval = 120; -int xpc_disengage_request_timelimit = XPC_DISENGAGE_REQUEST_DEFAULT_TIMELIMIT; -static int xpc_disengage_request_min_timelimit; /* = 0 */ -static int xpc_disengage_request_max_timelimit = 120; +int xpc_disengage_timelimit = XPC_DISENGAGE_DEFAULT_TIMELIMIT; +static int xpc_disengage_min_timelimit; /* = 0 */ +static int xpc_disengage_max_timelimit = 120; static ctl_table xpc_sys_xpc_hb_dir[] = { { @@ -124,14 +118,14 @@ static ctl_table xpc_sys_xpc_dir[] = { .child = xpc_sys_xpc_hb_dir}, { .ctl_name = CTL_UNNUMBERED, - .procname = "disengage_request_timelimit", - .data = &xpc_disengage_request_timelimit, + .procname = "disengage_timelimit", + .data = &xpc_disengage_timelimit, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_minmax, .strategy = &sysctl_intvec, - .extra1 = &xpc_disengage_request_min_timelimit, - .extra2 = &xpc_disengage_request_max_timelimit}, + .extra1 = &xpc_disengage_min_timelimit, + .extra2 = &xpc_disengage_max_timelimit}, {} }; static ctl_table xpc_sys_dir[] = { @@ -144,16 +138,19 @@ static ctl_table xpc_sys_dir[] = { }; static struct ctl_table_header *xpc_sysctl; -/* non-zero if any remote partition disengage request was timed out */ -int xpc_disengage_request_timedout; +/* non-zero if any remote partition disengage was timed out */ +int xpc_disengage_timedout; -/* #of IRQs received */ -static atomic_t xpc_act_IRQ_rcvd; +/* #of activate IRQs received and not yet processed */ +int xpc_activate_IRQ_rcvd; +DEFINE_SPINLOCK(xpc_activate_IRQ_rcvd_lock); /* IRQ handler notifies this wait queue on receipt of an IRQ */ -static DECLARE_WAIT_QUEUE_HEAD(xpc_act_IRQ_wq); +DECLARE_WAIT_QUEUE_HEAD(xpc_activate_IRQ_wq); static unsigned long xpc_hb_check_timeout; +static struct timer_list xpc_hb_timer; +void *xpc_heartbeating_to_mask; /* notification that the xpc_hb_checker thread has exited */ static DECLARE_COMPLETION(xpc_hb_checker_exited); @@ -161,8 +158,6 @@ static DECLARE_COMPLETION(xpc_hb_checker_exited); /* notification that the xpc_discovery thread has exited */ static DECLARE_COMPLETION(xpc_discovery_exited); -static struct timer_list xpc_hb_timer; - static void xpc_kthread_waitmsgs(struct xpc_partition *, struct xpc_channel *); static int xpc_system_reboot(struct notifier_block *, unsigned long, void *); @@ -175,31 +170,76 @@ static struct notifier_block xpc_die_notifier = { .notifier_call = xpc_system_die, }; +int (*xpc_setup_partitions_sn) (void); +enum xp_retval (*xpc_get_partition_rsvd_page_pa) (void *buf, u64 *cookie, + unsigned long *rp_pa, + size_t *len); +int (*xpc_setup_rsvd_page_sn) (struct xpc_rsvd_page *rp); +void (*xpc_heartbeat_init) (void); +void (*xpc_heartbeat_exit) (void); +void (*xpc_increment_heartbeat) (void); +void (*xpc_offline_heartbeat) (void); +void (*xpc_online_heartbeat) (void); +enum xp_retval (*xpc_get_remote_heartbeat) (struct xpc_partition *part); + +enum xp_retval (*xpc_make_first_contact) (struct xpc_partition *part); +void (*xpc_notify_senders_of_disconnect) (struct xpc_channel *ch); +u64 (*xpc_get_chctl_all_flags) (struct xpc_partition *part); +enum xp_retval (*xpc_setup_msg_structures) (struct xpc_channel *ch); +void (*xpc_teardown_msg_structures) (struct xpc_channel *ch); +void (*xpc_process_msg_chctl_flags) (struct xpc_partition *part, int ch_number); +int (*xpc_n_of_deliverable_payloads) (struct xpc_channel *ch); +void *(*xpc_get_deliverable_payload) (struct xpc_channel *ch); + +void (*xpc_request_partition_activation) (struct xpc_rsvd_page *remote_rp, + unsigned long remote_rp_pa, + int nasid); +void (*xpc_request_partition_reactivation) (struct xpc_partition *part); +void (*xpc_request_partition_deactivation) (struct xpc_partition *part); +void (*xpc_cancel_partition_deactivation_request) (struct xpc_partition *part); + +void (*xpc_process_activate_IRQ_rcvd) (void); +enum xp_retval (*xpc_setup_ch_structures_sn) (struct xpc_partition *part); +void (*xpc_teardown_ch_structures_sn) (struct xpc_partition *part); + +void (*xpc_indicate_partition_engaged) (struct xpc_partition *part); +int (*xpc_partition_engaged) (short partid); +int (*xpc_any_partition_engaged) (void); +void (*xpc_indicate_partition_disengaged) (struct xpc_partition *part); +void (*xpc_assume_partition_disengaged) (short partid); + +void (*xpc_send_chctl_closerequest) (struct xpc_channel *ch, + unsigned long *irq_flags); +void (*xpc_send_chctl_closereply) (struct xpc_channel *ch, + unsigned long *irq_flags); +void (*xpc_send_chctl_openrequest) (struct xpc_channel *ch, + unsigned long *irq_flags); +void (*xpc_send_chctl_openreply) (struct xpc_channel *ch, + unsigned long *irq_flags); + +void (*xpc_save_remote_msgqueue_pa) (struct xpc_channel *ch, + unsigned long msgqueue_pa); + +enum xp_retval (*xpc_send_payload) (struct xpc_channel *ch, u32 flags, + void *payload, u16 payload_size, + u8 notify_type, xpc_notify_func func, + void *key); +void (*xpc_received_payload) (struct xpc_channel *ch, void *payload); + /* - * Timer function to enforce the timelimit on the partition disengage request. + * Timer function to enforce the timelimit on the partition disengage. */ static void -xpc_timeout_partition_disengage_request(unsigned long data) +xpc_timeout_partition_disengage(unsigned long data) { struct xpc_partition *part = (struct xpc_partition *)data; - DBUG_ON(time_before(jiffies, part->disengage_request_timeout)); + DBUG_ON(time_is_after_jiffies(part->disengage_timeout)); (void)xpc_partition_disengaged(part); - DBUG_ON(part->disengage_request_timeout != 0); - DBUG_ON(xpc_partition_engaged(1UL << XPC_PARTID(part)) != 0); -} - -/* - * Notify the heartbeat check thread that an IRQ has been received. - */ -static irqreturn_t -xpc_act_IRQ_handler(int irq, void *dev_id) -{ - atomic_inc(&xpc_act_IRQ_rcvd); - wake_up_interruptible(&xpc_act_IRQ_wq); - return IRQ_HANDLED; + DBUG_ON(part->disengage_timeout != 0); + DBUG_ON(xpc_partition_engaged(XPC_PARTID(part))); } /* @@ -210,15 +250,63 @@ xpc_act_IRQ_handler(int irq, void *dev_id) static void xpc_hb_beater(unsigned long dummy) { - xpc_vars->heartbeat++; + xpc_increment_heartbeat(); - if (time_after_eq(jiffies, xpc_hb_check_timeout)) - wake_up_interruptible(&xpc_act_IRQ_wq); + if (time_is_before_eq_jiffies(xpc_hb_check_timeout)) + wake_up_interruptible(&xpc_activate_IRQ_wq); xpc_hb_timer.expires = jiffies + (xpc_hb_interval * HZ); add_timer(&xpc_hb_timer); } +static void +xpc_start_hb_beater(void) +{ + xpc_heartbeat_init(); + init_timer(&xpc_hb_timer); + xpc_hb_timer.function = xpc_hb_beater; + xpc_hb_beater(0); +} + +static void +xpc_stop_hb_beater(void) +{ + del_timer_sync(&xpc_hb_timer); + xpc_heartbeat_exit(); +} + +/* + * At periodic intervals, scan through all active partitions and ensure + * their heartbeat is still active. If not, the partition is deactivated. + */ +static void +xpc_check_remote_hb(void) +{ + struct xpc_partition *part; + short partid; + enum xp_retval ret; + + for (partid = 0; partid < xp_max_npartitions; partid++) { + + if (xpc_exiting) + break; + + if (partid == xp_partition_id) + continue; + + part = &xpc_partitions[partid]; + + if (part->act_state == XPC_P_AS_INACTIVE || + part->act_state == XPC_P_AS_DEACTIVATING) { + continue; + } + + ret = xpc_get_remote_heartbeat(part); + if (ret != xpSuccess) + XPC_DEACTIVATE_PARTITION(part, ret); + } +} + /* * This thread is responsible for nearly all of the partition * activation/deactivation. @@ -226,67 +314,57 @@ xpc_hb_beater(unsigned long dummy) static int xpc_hb_checker(void *ignore) { - int last_IRQ_count = 0; - int new_IRQ_count; int force_IRQ = 0; - cpumask_of_cpu_ptr(cpumask, XPC_HB_CHECK_CPU); /* this thread was marked active by xpc_hb_init() */ - set_cpus_allowed_ptr(current, cpumask); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(XPC_HB_CHECK_CPU)); /* set our heartbeating to other partitions into motion */ xpc_hb_check_timeout = jiffies + (xpc_hb_check_interval * HZ); - xpc_hb_beater(0); + xpc_start_hb_beater(); while (!xpc_exiting) { dev_dbg(xpc_part, "woke up with %d ticks rem; %d IRQs have " "been received\n", (int)(xpc_hb_check_timeout - jiffies), - atomic_read(&xpc_act_IRQ_rcvd) - last_IRQ_count); + xpc_activate_IRQ_rcvd); /* checking of remote heartbeats is skewed by IRQ handling */ - if (time_after_eq(jiffies, xpc_hb_check_timeout)) { + if (time_is_before_eq_jiffies(xpc_hb_check_timeout)) { + xpc_hb_check_timeout = jiffies + + (xpc_hb_check_interval * HZ); + dev_dbg(xpc_part, "checking remote heartbeats\n"); xpc_check_remote_hb(); /* - * We need to periodically recheck to ensure no - * IPI/AMO pairs have been missed. That check - * must always reset xpc_hb_check_timeout. + * On sn2 we need to periodically recheck to ensure no + * IRQ/amo pairs have been missed. */ - force_IRQ = 1; + if (is_shub()) + force_IRQ = 1; } /* check for outstanding IRQs */ - new_IRQ_count = atomic_read(&xpc_act_IRQ_rcvd); - if (last_IRQ_count < new_IRQ_count || force_IRQ != 0) { + if (xpc_activate_IRQ_rcvd > 0 || force_IRQ != 0) { force_IRQ = 0; - - dev_dbg(xpc_part, "found an IRQ to process; will be " - "resetting xpc_hb_check_timeout\n"); - - last_IRQ_count += xpc_identify_act_IRQ_sender(); - if (last_IRQ_count < new_IRQ_count) { - /* retry once to help avoid missing AMO */ - (void)xpc_identify_act_IRQ_sender(); - } - last_IRQ_count = new_IRQ_count; - - xpc_hb_check_timeout = jiffies + - (xpc_hb_check_interval * HZ); + dev_dbg(xpc_part, "processing activate IRQs " + "received\n"); + xpc_process_activate_IRQ_rcvd(); } /* wait for IRQ or timeout */ - (void)wait_event_interruptible(xpc_act_IRQ_wq, - (last_IRQ_count < - atomic_read(&xpc_act_IRQ_rcvd) - || time_after_eq(jiffies, - xpc_hb_check_timeout) || + (void)wait_event_interruptible(xpc_activate_IRQ_wq, + (time_is_before_eq_jiffies( + xpc_hb_check_timeout) || + xpc_activate_IRQ_rcvd > 0 || xpc_exiting)); } + xpc_stop_hb_beater(); + dev_dbg(xpc_part, "heartbeat checker is exiting\n"); /* mark this thread as having exited */ @@ -312,37 +390,8 @@ xpc_initiate_discovery(void *ignore) } /* - * Establish first contact with the remote partititon. This involves pulling - * the XPC per partition variables from the remote partition and waiting for - * the remote partition to pull ours. - */ -static enum xp_retval -xpc_make_first_contact(struct xpc_partition *part) -{ - enum xp_retval ret; - - while ((ret = xpc_pull_remote_vars_part(part)) != xpSuccess) { - if (ret != xpRetry) { - XPC_DEACTIVATE_PARTITION(part, ret); - return ret; - } - - dev_dbg(xpc_chan, "waiting to make first contact with " - "partition %d\n", XPC_PARTID(part)); - - /* wait a 1/4 of a second or so */ - (void)msleep_interruptible(250); - - if (part->act_state == XPC_P_DEACTIVATING) - return part->reason; - } - - return xpc_mark_partition_active(part); -} - -/* * The first kthread assigned to a newly activated partition is the one - * created by XPC HB with which it calls xpc_partition_up(). XPC hangs on to + * created by XPC HB with which it calls xpc_activating(). XPC hangs on to * that kthread until the partition is brought down, at which time that kthread * returns back to XPC HB. (The return of that kthread will signify to XPC HB * that XPC has dismantled all communication infrastructure for the associated @@ -355,11 +404,11 @@ xpc_make_first_contact(struct xpc_partition *part) static void xpc_channel_mgr(struct xpc_partition *part) { - while (part->act_state != XPC_P_DEACTIVATING || + while (part->act_state != XPC_P_AS_DEACTIVATING || atomic_read(&part->nchannels_active) > 0 || !xpc_partition_disengaged(part)) { - xpc_process_channel_activity(part); + xpc_process_sent_chctl_flags(part); /* * Wait until we've been requested to activate kthreads or @@ -377,8 +426,8 @@ xpc_channel_mgr(struct xpc_partition *part) atomic_dec(&part->channel_mgr_requests); (void)wait_event_interruptible(part->channel_mgr_wq, (atomic_read(&part->channel_mgr_requests) > 0 || - part->local_IPI_amo != 0 || - (part->act_state == XPC_P_DEACTIVATING && + part->chctl.all_flags != 0 || + (part->act_state == XPC_P_AS_DEACTIVATING && atomic_read(&part->nchannels_active) == 0 && xpc_partition_disengaged(part)))); atomic_set(&part->channel_mgr_requests, 1); @@ -386,47 +435,163 @@ xpc_channel_mgr(struct xpc_partition *part) } /* - * When XPC HB determines that a partition has come up, it will create a new - * kthread and that kthread will call this function to attempt to set up the - * basic infrastructure used for Cross Partition Communication with the newly - * upped partition. - * - * The kthread that was created by XPC HB and which setup the XPC - * infrastructure will remain assigned to the partition until the partition - * goes down. At which time the kthread will teardown the XPC infrastructure - * and then exit. - * - * XPC HB will put the remote partition's XPC per partition specific variables - * physical address into xpc_partitions[partid].remote_vars_part_pa prior to - * calling xpc_partition_up(). + * Guarantee that the kzalloc'd memory is cacheline aligned. */ -static void -xpc_partition_up(struct xpc_partition *part) +void * +xpc_kzalloc_cacheline_aligned(size_t size, gfp_t flags, void **base) +{ + /* see if kzalloc will give us cachline aligned memory by default */ + *base = kzalloc(size, flags); + if (*base == NULL) + return NULL; + + if ((u64)*base == L1_CACHE_ALIGN((u64)*base)) + return *base; + + kfree(*base); + + /* nope, we'll have to do it ourselves */ + *base = kzalloc(size + L1_CACHE_BYTES, flags); + if (*base == NULL) + return NULL; + + return (void *)L1_CACHE_ALIGN((u64)*base); +} + +/* + * Setup the channel structures necessary to support XPartition Communication + * between the specified remote partition and the local one. + */ +static enum xp_retval +xpc_setup_ch_structures(struct xpc_partition *part) { + enum xp_retval ret; + int ch_number; + struct xpc_channel *ch; + short partid = XPC_PARTID(part); + + /* + * Allocate all of the channel structures as a contiguous chunk of + * memory. + */ DBUG_ON(part->channels != NULL); + part->channels = kzalloc(sizeof(struct xpc_channel) * XPC_MAX_NCHANNELS, + GFP_KERNEL); + if (part->channels == NULL) { + dev_err(xpc_chan, "can't get memory for channels\n"); + return xpNoMemory; + } - dev_dbg(xpc_chan, "activating partition %d\n", XPC_PARTID(part)); + /* allocate the remote open and close args */ - if (xpc_setup_infrastructure(part) != xpSuccess) - return; + part->remote_openclose_args = + xpc_kzalloc_cacheline_aligned(XPC_OPENCLOSE_ARGS_SIZE, + GFP_KERNEL, &part-> + remote_openclose_args_base); + if (part->remote_openclose_args == NULL) { + dev_err(xpc_chan, "can't get memory for remote connect args\n"); + ret = xpNoMemory; + goto out_1; + } + + part->chctl.all_flags = 0; + spin_lock_init(&part->chctl_lock); + + atomic_set(&part->channel_mgr_requests, 1); + init_waitqueue_head(&part->channel_mgr_wq); + + part->nchannels = XPC_MAX_NCHANNELS; + + atomic_set(&part->nchannels_active, 0); + atomic_set(&part->nchannels_engaged, 0); + + for (ch_number = 0; ch_number < part->nchannels; ch_number++) { + ch = &part->channels[ch_number]; + + ch->partid = partid; + ch->number = ch_number; + ch->flags = XPC_C_DISCONNECTED; + + atomic_set(&ch->kthreads_assigned, 0); + atomic_set(&ch->kthreads_idle, 0); + atomic_set(&ch->kthreads_active, 0); + + atomic_set(&ch->references, 0); + atomic_set(&ch->n_to_notify, 0); + + spin_lock_init(&ch->lock); + init_completion(&ch->wdisconnect_wait); + + atomic_set(&ch->n_on_msg_allocate_wq, 0); + init_waitqueue_head(&ch->msg_allocate_wq); + init_waitqueue_head(&ch->idle_wq); + } + + ret = xpc_setup_ch_structures_sn(part); + if (ret != xpSuccess) + goto out_2; + + /* + * With the setting of the partition setup_state to XPC_P_SS_SETUP, + * we're declaring that this partition is ready to go. + */ + part->setup_state = XPC_P_SS_SETUP; + + return xpSuccess; + + /* setup of ch structures failed */ +out_2: + kfree(part->remote_openclose_args_base); + part->remote_openclose_args = NULL; +out_1: + kfree(part->channels); + part->channels = NULL; + return ret; +} + +/* + * Teardown the channel structures necessary to support XPartition Communication + * between the specified remote partition and the local one. + */ +static void +xpc_teardown_ch_structures(struct xpc_partition *part) +{ + DBUG_ON(atomic_read(&part->nchannels_engaged) != 0); + DBUG_ON(atomic_read(&part->nchannels_active) != 0); /* - * The kthread that XPC HB called us with will become the - * channel manager for this partition. It will not return - * back to XPC HB until the partition's XPC infrastructure - * has been dismantled. + * Make this partition inaccessible to local processes by marking it + * as no longer setup. Then wait before proceeding with the teardown + * until all existing references cease. */ + DBUG_ON(part->setup_state != XPC_P_SS_SETUP); + part->setup_state = XPC_P_SS_WTEARDOWN; - (void)xpc_part_ref(part); /* this will always succeed */ + wait_event(part->teardown_wq, (atomic_read(&part->references) == 0)); - if (xpc_make_first_contact(part) == xpSuccess) - xpc_channel_mgr(part); + /* now we can begin tearing down the infrastructure */ - xpc_part_deref(part); + xpc_teardown_ch_structures_sn(part); - xpc_teardown_infrastructure(part); + kfree(part->remote_openclose_args_base); + part->remote_openclose_args = NULL; + kfree(part->channels); + part->channels = NULL; + + part->setup_state = XPC_P_SS_TORNDOWN; } +/* + * When XPC HB determines that a partition has come up, it will create a new + * kthread and that kthread will call this function to attempt to set up the + * basic infrastructure used for Cross Partition Communication with the newly + * upped partition. + * + * The kthread that was created by XPC HB and which setup the XPC + * infrastructure will remain assigned to the partition becoming the channel + * manager for that partition until the partition is deactivating, at which + * time the kthread will teardown the XPC infrastructure and then exit. + */ static int xpc_activating(void *__partid) { @@ -434,64 +599,47 @@ xpc_activating(void *__partid) struct xpc_partition *part = &xpc_partitions[partid]; unsigned long irq_flags; - DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS); + DBUG_ON(partid < 0 || partid >= xp_max_npartitions); spin_lock_irqsave(&part->act_lock, irq_flags); - if (part->act_state == XPC_P_DEACTIVATING) { - part->act_state = XPC_P_INACTIVE; + if (part->act_state == XPC_P_AS_DEACTIVATING) { + part->act_state = XPC_P_AS_INACTIVE; spin_unlock_irqrestore(&part->act_lock, irq_flags); part->remote_rp_pa = 0; return 0; } /* indicate the thread is activating */ - DBUG_ON(part->act_state != XPC_P_ACTIVATION_REQ); - part->act_state = XPC_P_ACTIVATING; + DBUG_ON(part->act_state != XPC_P_AS_ACTIVATION_REQ); + part->act_state = XPC_P_AS_ACTIVATING; XPC_SET_REASON(part, 0, 0); spin_unlock_irqrestore(&part->act_lock, irq_flags); - dev_dbg(xpc_part, "bringing partition %d up\n", partid); + dev_dbg(xpc_part, "activating partition %d\n", partid); - /* - * Register the remote partition's AMOs with SAL so it can handle - * and cleanup errors within that address range should the remote - * partition go down. We don't unregister this range because it is - * difficult to tell when outstanding writes to the remote partition - * are finished and thus when it is safe to unregister. This should - * not result in wasted space in the SAL xp_addr_region table because - * we should get the same page for remote_amos_page_pa after module - * reloads and system reboots. - */ - if (sn_register_xp_addr_region(part->remote_amos_page_pa, - PAGE_SIZE, 1) < 0) { - dev_warn(xpc_part, "xpc_partition_up(%d) failed to register " - "xp_addr region\n", partid); + xpc_allow_hb(partid); - spin_lock_irqsave(&part->act_lock, irq_flags); - part->act_state = XPC_P_INACTIVE; - XPC_SET_REASON(part, xpPhysAddrRegFailed, __LINE__); - spin_unlock_irqrestore(&part->act_lock, irq_flags); - part->remote_rp_pa = 0; - return 0; - } + if (xpc_setup_ch_structures(part) == xpSuccess) { + (void)xpc_part_ref(part); /* this will always succeed */ - xpc_allow_hb(partid, xpc_vars); - xpc_IPI_send_activated(part); + if (xpc_make_first_contact(part) == xpSuccess) { + xpc_mark_partition_active(part); + xpc_channel_mgr(part); + /* won't return until partition is deactivating */ + } - /* - * xpc_partition_up() holds this thread and marks this partition as - * XPC_P_ACTIVE by calling xpc_hb_mark_active(). - */ - (void)xpc_partition_up(part); + xpc_part_deref(part); + xpc_teardown_ch_structures(part); + } - xpc_disallow_hb(partid, xpc_vars); + xpc_disallow_hb(partid); xpc_mark_partition_inactive(part); if (part->reason == xpReactivating) { /* interrupting ourselves results in activating partition */ - xpc_IPI_send_reactivate(part); + xpc_request_partition_reactivation(part); } return 0; @@ -506,9 +654,9 @@ xpc_activate_partition(struct xpc_partition *part) spin_lock_irqsave(&part->act_lock, irq_flags); - DBUG_ON(part->act_state != XPC_P_INACTIVE); + DBUG_ON(part->act_state != XPC_P_AS_INACTIVE); - part->act_state = XPC_P_ACTIVATION_REQ; + part->act_state = XPC_P_AS_ACTIVATION_REQ; XPC_SET_REASON(part, xpCloneKThread, __LINE__); spin_unlock_irqrestore(&part->act_lock, irq_flags); @@ -517,62 +665,12 @@ xpc_activate_partition(struct xpc_partition *part) partid); if (IS_ERR(kthread)) { spin_lock_irqsave(&part->act_lock, irq_flags); - part->act_state = XPC_P_INACTIVE; + part->act_state = XPC_P_AS_INACTIVE; XPC_SET_REASON(part, xpCloneKThreadFailed, __LINE__); spin_unlock_irqrestore(&part->act_lock, irq_flags); } } -/* - * Handle the receipt of a SGI_XPC_NOTIFY IRQ by seeing whether the specified - * partition actually sent it. Since SGI_XPC_NOTIFY IRQs may be shared by more - * than one partition, we use an AMO_t structure per partition to indicate - * whether a partition has sent an IPI or not. If it has, then wake up the - * associated kthread to handle it. - * - * All SGI_XPC_NOTIFY IRQs received by XPC are the result of IPIs sent by XPC - * running on other partitions. - * - * Noteworthy Arguments: - * - * irq - Interrupt ReQuest number. NOT USED. - * - * dev_id - partid of IPI's potential sender. - */ -irqreturn_t -xpc_notify_IRQ_handler(int irq, void *dev_id) -{ - short partid = (short)(u64)dev_id; - struct xpc_partition *part = &xpc_partitions[partid]; - - DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS); - - if (xpc_part_ref(part)) { - xpc_check_for_channel_activity(part); - - xpc_part_deref(part); - } - return IRQ_HANDLED; -} - -/* - * Check to see if xpc_notify_IRQ_handler() dropped any IPIs on the floor - * because the write to their associated IPI amo completed after the IRQ/IPI - * was received. - */ -void -xpc_dropped_IPI_check(struct xpc_partition *part) -{ - if (xpc_part_ref(part)) { - xpc_check_for_channel_activity(part); - - part->dropped_IPI_timer.expires = jiffies + - XPC_P_DROPPED_IPI_WAIT; - add_timer(&part->dropped_IPI_timer); - xpc_part_deref(part); - } -} - void xpc_activate_kthreads(struct xpc_channel *ch, int needed) { @@ -617,9 +715,9 @@ xpc_kthread_waitmsgs(struct xpc_partition *part, struct xpc_channel *ch) do { /* deliver messages to their intended recipients */ - while (ch->w_local_GP.get < ch->w_remote_GP.put && + while (xpc_n_of_deliverable_payloads(ch) > 0 && !(ch->flags & XPC_C_DISCONNECTING)) { - xpc_deliver_msg(ch); + xpc_deliver_payload(ch); } if (atomic_inc_return(&ch->kthreads_idle) > @@ -633,7 +731,7 @@ xpc_kthread_waitmsgs(struct xpc_partition *part, struct xpc_channel *ch) "wait_event_interruptible_exclusive()\n"); (void)wait_event_interruptible_exclusive(ch->idle_wq, - (ch->w_local_GP.get < ch->w_remote_GP.put || + (xpc_n_of_deliverable_payloads(ch) > 0 || (ch->flags & XPC_C_DISCONNECTING))); atomic_dec(&ch->kthreads_idle); @@ -678,7 +776,7 @@ xpc_kthread_start(void *args) * additional kthreads to help deliver them. We only * need one less than total #of messages to deliver. */ - n_needed = ch->w_remote_GP.put - ch->w_local_GP.get - 1; + n_needed = xpc_n_of_deliverable_payloads(ch) - 1; if (n_needed > 0 && !(ch->flags & XPC_C_DISCONNECTING)) xpc_activate_kthreads(ch, n_needed); @@ -704,11 +802,9 @@ xpc_kthread_start(void *args) } spin_unlock_irqrestore(&ch->lock, irq_flags); - if (atomic_dec_return(&ch->kthreads_assigned) == 0) { - if (atomic_dec_return(&part->nchannels_engaged) == 0) { - xpc_mark_partition_disengaged(part); - xpc_IPI_send_disengage(part); - } + if (atomic_dec_return(&ch->kthreads_assigned) == 0 && + atomic_dec_return(&part->nchannels_engaged) == 0) { + xpc_indicate_partition_disengaged(part); } xpc_msgqueue_deref(ch); @@ -759,9 +855,9 @@ xpc_create_kthreads(struct xpc_channel *ch, int needed, } else if (ch->flags & XPC_C_DISCONNECTING) { break; - } else if (atomic_inc_return(&ch->kthreads_assigned) == 1) { - if (atomic_inc_return(&part->nchannels_engaged) == 1) - xpc_mark_partition_engaged(part); + } else if (atomic_inc_return(&ch->kthreads_assigned) == 1 && + atomic_inc_return(&part->nchannels_engaged) == 1) { + xpc_indicate_partition_engaged(part); } (void)xpc_part_ref(part); xpc_msgqueue_ref(ch); @@ -783,8 +879,7 @@ xpc_create_kthreads(struct xpc_channel *ch, int needed, if (atomic_dec_return(&ch->kthreads_assigned) == 0 && atomic_dec_return(&part->nchannels_engaged) == 0) { - xpc_mark_partition_disengaged(part); - xpc_IPI_send_disengage(part); + xpc_indicate_partition_disengaged(part); } xpc_msgqueue_deref(ch); xpc_part_deref(part); @@ -816,7 +911,7 @@ xpc_disconnect_wait(int ch_number) int wakeup_channel_mgr; /* now wait for all callouts to the caller's function to cease */ - for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) { + for (partid = 0; partid < xp_max_npartitions; partid++) { part = &xpc_partitions[partid]; if (!xpc_part_ref(part)) @@ -835,16 +930,15 @@ xpc_disconnect_wait(int ch_number) DBUG_ON(!(ch->flags & XPC_C_DISCONNECTED)); wakeup_channel_mgr = 0; - if (ch->delayed_IPI_flags) { - if (part->act_state != XPC_P_DEACTIVATING) { - spin_lock(&part->IPI_lock); - XPC_SET_IPI_FLAGS(part->local_IPI_amo, - ch->number, - ch->delayed_IPI_flags); - spin_unlock(&part->IPI_lock); + if (ch->delayed_chctl_flags) { + if (part->act_state != XPC_P_AS_DEACTIVATING) { + spin_lock(&part->chctl_lock); + part->chctl.flags[ch->number] |= + ch->delayed_chctl_flags; + spin_unlock(&part->chctl_lock); wakeup_channel_mgr = 1; } - ch->delayed_IPI_flags = 0; + ch->delayed_chctl_flags = 0; } ch->flags &= ~XPC_C_WDISCONNECT; @@ -857,13 +951,63 @@ xpc_disconnect_wait(int ch_number) } } +static int +xpc_setup_partitions(void) +{ + short partid; + struct xpc_partition *part; + + xpc_partitions = kzalloc(sizeof(struct xpc_partition) * + xp_max_npartitions, GFP_KERNEL); + if (xpc_partitions == NULL) { + dev_err(xpc_part, "can't get memory for partition structure\n"); + return -ENOMEM; + } + + /* + * The first few fields of each entry of xpc_partitions[] need to + * be initialized now so that calls to xpc_connect() and + * xpc_disconnect() can be made prior to the activation of any remote + * partition. NOTE THAT NONE OF THE OTHER FIELDS BELONGING TO THESE + * ENTRIES ARE MEANINGFUL UNTIL AFTER AN ENTRY'S CORRESPONDING + * PARTITION HAS BEEN ACTIVATED. + */ + for (partid = 0; partid < xp_max_npartitions; partid++) { + part = &xpc_partitions[partid]; + + DBUG_ON((u64)part != L1_CACHE_ALIGN((u64)part)); + + part->activate_IRQ_rcvd = 0; + spin_lock_init(&part->act_lock); + part->act_state = XPC_P_AS_INACTIVE; + XPC_SET_REASON(part, 0, 0); + + init_timer(&part->disengage_timer); + part->disengage_timer.function = + xpc_timeout_partition_disengage; + part->disengage_timer.data = (unsigned long)part; + + part->setup_state = XPC_P_SS_UNSET; + init_waitqueue_head(&part->teardown_wq); + atomic_set(&part->references, 0); + } + + return xpc_setup_partitions_sn(); +} + +static void +xpc_teardown_partitions(void) +{ + kfree(xpc_partitions); +} + static void xpc_do_exit(enum xp_retval reason) { short partid; int active_part_count, printed_waiting_msg = 0; struct xpc_partition *part; - unsigned long printmsg_time, disengage_request_timeout = 0; + unsigned long printmsg_time, disengage_timeout = 0; /* a 'rmmod XPC' and a 'reboot' cannot both end up here together */ DBUG_ON(xpc_exiting == 1); @@ -874,10 +1018,7 @@ xpc_do_exit(enum xp_retval reason) * the heartbeat checker thread in case it's sleeping. */ xpc_exiting = 1; - wake_up_interruptible(&xpc_act_IRQ_wq); - - /* ignore all incoming interrupts */ - free_irq(SGI_XPC_ACTIVATE, NULL); + wake_up_interruptible(&xpc_activate_IRQ_wq); /* wait for the discovery thread to exit */ wait_for_completion(&xpc_discovery_exited); @@ -890,17 +1031,17 @@ xpc_do_exit(enum xp_retval reason) /* wait for all partitions to become inactive */ - printmsg_time = jiffies + (XPC_DISENGAGE_PRINTMSG_INTERVAL * HZ); - xpc_disengage_request_timedout = 0; + printmsg_time = jiffies + (XPC_DEACTIVATE_PRINTMSG_INTERVAL * HZ); + xpc_disengage_timedout = 0; do { active_part_count = 0; - for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) { + for (partid = 0; partid < xp_max_npartitions; partid++) { part = &xpc_partitions[partid]; if (xpc_partition_disengaged(part) && - part->act_state == XPC_P_INACTIVE) { + part->act_state == XPC_P_AS_INACTIVE) { continue; } @@ -908,36 +1049,32 @@ xpc_do_exit(enum xp_retval reason) XPC_DEACTIVATE_PARTITION(part, reason); - if (part->disengage_request_timeout > - disengage_request_timeout) { - disengage_request_timeout = - part->disengage_request_timeout; - } + if (part->disengage_timeout > disengage_timeout) + disengage_timeout = part->disengage_timeout; } - if (xpc_partition_engaged(-1UL)) { - if (time_after(jiffies, printmsg_time)) { + if (xpc_any_partition_engaged()) { + if (time_is_before_jiffies(printmsg_time)) { dev_info(xpc_part, "waiting for remote " - "partitions to disengage, timeout in " - "%ld seconds\n", - (disengage_request_timeout - jiffies) - / HZ); + "partitions to deactivate, timeout in " + "%ld seconds\n", (disengage_timeout - + jiffies) / HZ); printmsg_time = jiffies + - (XPC_DISENGAGE_PRINTMSG_INTERVAL * HZ); + (XPC_DEACTIVATE_PRINTMSG_INTERVAL * HZ); printed_waiting_msg = 1; } } else if (active_part_count > 0) { if (printed_waiting_msg) { dev_info(xpc_part, "waiting for local partition" - " to disengage\n"); + " to deactivate\n"); printed_waiting_msg = 0; } } else { - if (!xpc_disengage_request_timedout) { + if (!xpc_disengage_timedout) { dev_info(xpc_part, "all partitions have " - "disengaged\n"); + "deactivated\n"); } break; } @@ -947,33 +1084,28 @@ xpc_do_exit(enum xp_retval reason) } while (1); - DBUG_ON(xpc_partition_engaged(-1UL)); + DBUG_ON(xpc_any_partition_engaged()); + DBUG_ON(xpc_any_hbs_allowed() != 0); - /* indicate to others that our reserved page is uninitialized */ - xpc_rsvd_page->vars_pa = 0; - - /* now it's time to eliminate our heartbeat */ - del_timer_sync(&xpc_hb_timer); - DBUG_ON(xpc_vars->heartbeating_to_mask != 0); + xpc_teardown_rsvd_page(); if (reason == xpUnloading) { - /* take ourselves off of the reboot_notifier_list */ - (void)unregister_reboot_notifier(&xpc_reboot_notifier); - - /* take ourselves off of the die_notifier list */ (void)unregister_die_notifier(&xpc_die_notifier); + (void)unregister_reboot_notifier(&xpc_reboot_notifier); } - /* close down protections for IPI operations */ - xpc_restrict_IPI_ops(); - /* clear the interface to XPC's functions */ xpc_clear_interface(); if (xpc_sysctl) unregister_sysctl_table(xpc_sysctl); - kfree(xpc_remote_copy_buffer_base); + xpc_teardown_partitions(); + + if (is_shub()) + xpc_exit_sn2(); + else + xpc_exit_uv(); } /* @@ -1003,60 +1135,57 @@ xpc_system_reboot(struct notifier_block *nb, unsigned long event, void *unused) } /* - * Notify other partitions to disengage from all references to our memory. + * Notify other partitions to deactivate from us by first disengaging from all + * references to our memory. */ static void -xpc_die_disengage(void) +xpc_die_deactivate(void) { struct xpc_partition *part; short partid; - unsigned long engaged; - long time, printmsg_time, disengage_request_timeout; + int any_engaged; + long keep_waiting; + long wait_to_print; /* keep xpc_hb_checker thread from doing anything (just in case) */ xpc_exiting = 1; - xpc_vars->heartbeating_to_mask = 0; /* indicate we're deactivated */ + xpc_disallow_all_hbs(); /*indicate we're deactivated */ - for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) { + for (partid = 0; partid < xp_max_npartitions; partid++) { part = &xpc_partitions[partid]; - if (!XPC_SUPPORTS_DISENGAGE_REQUEST(part-> - remote_vars_version)) { - - /* just in case it was left set by an earlier XPC */ - xpc_clear_partition_engaged(1UL << partid); - continue; - } - - if (xpc_partition_engaged(1UL << partid) || - part->act_state != XPC_P_INACTIVE) { - xpc_request_partition_disengage(part); - xpc_mark_partition_disengaged(part); - xpc_IPI_send_disengage(part); + if (xpc_partition_engaged(partid) || + part->act_state != XPC_P_AS_INACTIVE) { + xpc_request_partition_deactivation(part); + xpc_indicate_partition_disengaged(part); } } - time = rtc_time(); - printmsg_time = time + - (XPC_DISENGAGE_PRINTMSG_INTERVAL * sn_rtc_cycles_per_second); - disengage_request_timeout = time + - (xpc_disengage_request_timelimit * sn_rtc_cycles_per_second); - - /* wait for all other partitions to disengage from us */ + /* + * Though we requested that all other partitions deactivate from us, + * we only wait until they've all disengaged or we've reached the + * defined timelimit. + * + * Given that one iteration through the following while-loop takes + * approximately 200 microseconds, calculate the #of loops to take + * before bailing and the #of loops before printing a waiting message. + */ + keep_waiting = xpc_disengage_timelimit * 1000 * 5; + wait_to_print = XPC_DEACTIVATE_PRINTMSG_INTERVAL * 1000 * 5; while (1) { - engaged = xpc_partition_engaged(-1UL); - if (!engaged) { - dev_info(xpc_part, "all partitions have disengaged\n"); + any_engaged = xpc_any_partition_engaged(); + if (!any_engaged) { + dev_info(xpc_part, "all partitions have deactivated\n"); break; } - time = rtc_time(); - if (time >= disengage_request_timeout) { - for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) { - if (engaged & (1UL << partid)) { - dev_info(xpc_part, "disengage from " + if (!keep_waiting--) { + for (partid = 0; partid < xp_max_npartitions; + partid++) { + if (xpc_partition_engaged(partid)) { + dev_info(xpc_part, "deactivate from " "remote partition %d timed " "out\n", partid); } @@ -1064,15 +1193,15 @@ xpc_die_disengage(void) break; } - if (time >= printmsg_time) { + if (!wait_to_print--) { dev_info(xpc_part, "waiting for remote partitions to " - "disengage, timeout in %ld seconds\n", - (disengage_request_timeout - time) / - sn_rtc_cycles_per_second); - printmsg_time = time + - (XPC_DISENGAGE_PRINTMSG_INTERVAL * - sn_rtc_cycles_per_second); + "deactivate, timeout in %ld seconds\n", + keep_waiting / (1000 * 5)); + wait_to_print = XPC_DEACTIVATE_PRINTMSG_INTERVAL * + 1000 * 5; } + + udelay(200); } } @@ -1087,10 +1216,11 @@ xpc_die_disengage(void) static int xpc_system_die(struct notifier_block *nb, unsigned long event, void *unused) { +#ifdef CONFIG_IA64 /* !!! temporary kludge */ switch (event) { case DIE_MACHINE_RESTART: case DIE_MACHINE_HALT: - xpc_die_disengage(); + xpc_die_deactivate(); break; case DIE_KDEBUG_ENTER: @@ -1101,8 +1231,7 @@ xpc_system_die(struct notifier_block *nb, unsigned long event, void *unused) /* fall through */ case DIE_MCA_MONARCH_ENTER: case DIE_INIT_MONARCH_ENTER: - xpc_vars->heartbeat++; - xpc_vars->heartbeat_offline = 1; + xpc_offline_heartbeat(); break; case DIE_KDEBUG_LEAVE: @@ -1113,10 +1242,12 @@ xpc_system_die(struct notifier_block *nb, unsigned long event, void *unused) /* fall through */ case DIE_MCA_MONARCH_LEAVE: case DIE_INIT_MONARCH_LEAVE: - xpc_vars->heartbeat++; - xpc_vars->heartbeat_offline = 0; + xpc_online_heartbeat(); break; } +#else + xpc_die_deactivate(); +#endif return NOTIFY_DONE; } @@ -1125,105 +1256,52 @@ int __init xpc_init(void) { int ret; - short partid; - struct xpc_partition *part; struct task_struct *kthread; - size_t buf_size; - - if (!ia64_platform_is("sn2")) - return -ENODEV; - - buf_size = max(XPC_RP_VARS_SIZE, - XPC_RP_HEADER_SIZE + XP_NASID_MASK_BYTES); - xpc_remote_copy_buffer = xpc_kmalloc_cacheline_aligned(buf_size, - GFP_KERNEL, - &xpc_remote_copy_buffer_base); - if (xpc_remote_copy_buffer == NULL) - return -ENOMEM; snprintf(xpc_part->bus_id, BUS_ID_SIZE, "part"); snprintf(xpc_chan->bus_id, BUS_ID_SIZE, "chan"); - xpc_sysctl = register_sysctl_table(xpc_sys_dir); - - /* - * The first few fields of each entry of xpc_partitions[] need to - * be initialized now so that calls to xpc_connect() and - * xpc_disconnect() can be made prior to the activation of any remote - * partition. NOTE THAT NONE OF THE OTHER FIELDS BELONGING TO THESE - * ENTRIES ARE MEANINGFUL UNTIL AFTER AN ENTRY'S CORRESPONDING - * PARTITION HAS BEEN ACTIVATED. - */ - for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) { - part = &xpc_partitions[partid]; - - DBUG_ON((u64)part != L1_CACHE_ALIGN((u64)part)); - - part->act_IRQ_rcvd = 0; - spin_lock_init(&part->act_lock); - part->act_state = XPC_P_INACTIVE; - XPC_SET_REASON(part, 0, 0); + if (is_shub()) { + /* + * The ia64-sn2 architecture supports at most 64 partitions. + * And the inability to unregister remote amos restricts us + * further to only support exactly 64 partitions on this + * architecture, no less. + */ + if (xp_max_npartitions != 64) { + dev_err(xpc_part, "max #of partitions not set to 64\n"); + ret = -EINVAL; + } else { + ret = xpc_init_sn2(); + } - init_timer(&part->disengage_request_timer); - part->disengage_request_timer.function = - xpc_timeout_partition_disengage_request; - part->disengage_request_timer.data = (unsigned long)part; + } else if (is_uv()) { + ret = xpc_init_uv(); - part->setup_state = XPC_P_UNSET; - init_waitqueue_head(&part->teardown_wq); - atomic_set(&part->references, 0); + } else { + ret = -ENODEV; } - /* - * Open up protections for IPI operations (and AMO operations on - * Shub 1.1 systems). - */ - xpc_allow_IPI_ops(); - - /* - * Interrupts being processed will increment this atomic variable and - * awaken the heartbeat thread which will process the interrupts. - */ - atomic_set(&xpc_act_IRQ_rcvd, 0); + if (ret != 0) + return ret; - /* - * This is safe to do before the xpc_hb_checker thread has started - * because the handler releases a wait queue. If an interrupt is - * received before the thread is waiting, it will not go to sleep, - * but rather immediately process the interrupt. - */ - ret = request_irq(SGI_XPC_ACTIVATE, xpc_act_IRQ_handler, 0, - "xpc hb", NULL); + ret = xpc_setup_partitions(); if (ret != 0) { - dev_err(xpc_part, "can't register ACTIVATE IRQ handler, " - "errno=%d\n", -ret); - - xpc_restrict_IPI_ops(); - - if (xpc_sysctl) - unregister_sysctl_table(xpc_sysctl); - - kfree(xpc_remote_copy_buffer_base); - return -EBUSY; + dev_err(xpc_part, "can't get memory for partition structure\n"); + goto out_1; } + xpc_sysctl = register_sysctl_table(xpc_sys_dir); + /* * Fill the partition reserved page with the information needed by * other partitions to discover we are alive and establish initial * communications. */ - xpc_rsvd_page = xpc_rsvd_page_init(); - if (xpc_rsvd_page == NULL) { - dev_err(xpc_part, "could not setup our reserved page\n"); - - free_irq(SGI_XPC_ACTIVATE, NULL); - xpc_restrict_IPI_ops(); - - if (xpc_sysctl) - unregister_sysctl_table(xpc_sysctl); - - kfree(xpc_remote_copy_buffer_base); - return -EBUSY; + ret = xpc_setup_rsvd_page(); + if (ret != 0) { + dev_err(xpc_part, "can't setup our reserved page\n"); + goto out_2; } /* add ourselves to the reboot_notifier_list */ @@ -1236,9 +1314,6 @@ xpc_init(void) if (ret != 0) dev_warn(xpc_part, "can't register die notifier\n"); - init_timer(&xpc_hb_timer); - xpc_hb_timer.function = xpc_hb_beater; - /* * The real work-horse behind xpc. This processes incoming * interrupts and monitors remote heartbeats. @@ -1246,25 +1321,8 @@ xpc_init(void) kthread = kthread_run(xpc_hb_checker, NULL, XPC_HB_CHECK_THREAD_NAME); if (IS_ERR(kthread)) { dev_err(xpc_part, "failed while forking hb check thread\n"); - - /* indicate to others that our reserved page is uninitialized */ - xpc_rsvd_page->vars_pa = 0; - - /* take ourselves off of the reboot_notifier_list */ - (void)unregister_reboot_notifier(&xpc_reboot_notifier); - - /* take ourselves off of the die_notifier list */ - (void)unregister_die_notifier(&xpc_die_notifier); - - del_timer_sync(&xpc_hb_timer); - free_irq(SGI_XPC_ACTIVATE, NULL); - xpc_restrict_IPI_ops(); - - if (xpc_sysctl) - unregister_sysctl_table(xpc_sysctl); - - kfree(xpc_remote_copy_buffer_base); - return -EBUSY; + ret = -EBUSY; + goto out_3; } /* @@ -1286,11 +1344,28 @@ xpc_init(void) /* set the interface to point at XPC's functions */ xpc_set_interface(xpc_initiate_connect, xpc_initiate_disconnect, - xpc_initiate_allocate, xpc_initiate_send, - xpc_initiate_send_notify, xpc_initiate_received, - xpc_initiate_partid_to_nasids); + xpc_initiate_send, xpc_initiate_send_notify, + xpc_initiate_received, xpc_initiate_partid_to_nasids); return 0; + + /* initialization was not successful */ +out_3: + xpc_teardown_rsvd_page(); + + (void)unregister_die_notifier(&xpc_die_notifier); + (void)unregister_reboot_notifier(&xpc_reboot_notifier); +out_2: + if (xpc_sysctl) + unregister_sysctl_table(xpc_sysctl); + + xpc_teardown_partitions(); +out_1: + if (is_shub()) + xpc_exit_sn2(); + else + xpc_exit_uv(); + return ret; } module_init(xpc_init); @@ -1315,9 +1390,9 @@ module_param(xpc_hb_check_interval, int, 0); MODULE_PARM_DESC(xpc_hb_check_interval, "Number of seconds between " "heartbeat checks."); -module_param(xpc_disengage_request_timelimit, int, 0); -MODULE_PARM_DESC(xpc_disengage_request_timelimit, "Number of seconds to wait " - "for disengage request to complete."); +module_param(xpc_disengage_timelimit, int, 0); +MODULE_PARM_DESC(xpc_disengage_timelimit, "Number of seconds to wait " + "for disengage to complete."); module_param(xpc_kdebug_ignore, int, 0); MODULE_PARM_DESC(xpc_kdebug_ignore, "Should lack of heartbeat be ignored by " diff --git a/drivers/misc/sgi-xp/xpc_partition.c b/drivers/misc/sgi-xp/xpc_partition.c index 7dd4b5812c4..6722f6fe4dc 100644 --- a/drivers/misc/sgi-xp/xpc_partition.c +++ b/drivers/misc/sgi-xp/xpc_partition.c @@ -15,57 +15,22 @@ * */ -#include <linux/kernel.h> -#include <linux/sysctl.h> -#include <linux/cache.h> -#include <linux/mmzone.h> -#include <linux/nodemask.h> -#include <asm/uncached.h> -#include <asm/sn/bte.h> -#include <asm/sn/intr.h> -#include <asm/sn/sn_sal.h> -#include <asm/sn/nodepda.h> -#include <asm/sn/addrs.h> +#include <linux/device.h> +#include <linux/hardirq.h> #include "xpc.h" /* XPC is exiting flag */ int xpc_exiting; -/* SH_IPI_ACCESS shub register value on startup */ -static u64 xpc_sh1_IPI_access; -static u64 xpc_sh2_IPI_access0; -static u64 xpc_sh2_IPI_access1; -static u64 xpc_sh2_IPI_access2; -static u64 xpc_sh2_IPI_access3; - -/* original protection values for each node */ -u64 xpc_prot_vec[MAX_NUMNODES]; - /* this partition's reserved page pointers */ struct xpc_rsvd_page *xpc_rsvd_page; -static u64 *xpc_part_nasids; -static u64 *xpc_mach_nasids; -struct xpc_vars *xpc_vars; -struct xpc_vars_part *xpc_vars_part; +static unsigned long *xpc_part_nasids; +unsigned long *xpc_mach_nasids; -static int xp_nasid_mask_bytes; /* actual size in bytes of nasid mask */ -static int xp_nasid_mask_words; /* actual size in words of nasid mask */ - -/* - * For performance reasons, each entry of xpc_partitions[] is cacheline - * aligned. And xpc_partitions[] is padded with an additional entry at the - * end so that the last legitimate entry doesn't share its cacheline with - * another variable. - */ -struct xpc_partition xpc_partitions[XP_MAX_PARTITIONS + 1]; +static int xpc_nasid_mask_nbytes; /* #of bytes in nasid mask */ +int xpc_nasid_mask_nlongs; /* #of longs in nasid mask */ -/* - * Generic buffer used to store a local copy of portions of a remote - * partition's reserved page (either its header and part_nasids mask, - * or its vars). - */ -char *xpc_remote_copy_buffer; -void *xpc_remote_copy_buffer_base; +struct xpc_partition *xpc_partitions; /* * Guarantee that the kmalloc'd memory is cacheline aligned. @@ -95,56 +60,59 @@ xpc_kmalloc_cacheline_aligned(size_t size, gfp_t flags, void **base) * Given a nasid, get the physical address of the partition's reserved page * for that nasid. This function returns 0 on any error. */ -static u64 +static unsigned long xpc_get_rsvd_page_pa(int nasid) { - bte_result_t bte_res; - s64 status; + enum xp_retval ret; u64 cookie = 0; - u64 rp_pa = nasid; /* seed with nasid */ - u64 len = 0; - u64 buf = buf; - u64 buf_len = 0; + unsigned long rp_pa = nasid; /* seed with nasid */ + size_t len = 0; + size_t buf_len = 0; + void *buf = buf; void *buf_base = NULL; while (1) { - status = sn_partition_reserved_page_pa(buf, &cookie, &rp_pa, - &len); + /* !!! rp_pa will need to be _gpa on UV. + * ??? So do we save it into the architecture specific parts + * ??? of the xpc_partition structure? Do we rename this + * ??? function or have two versions? Rename rp_pa for UV to + * ??? rp_gpa? + */ + ret = xpc_get_partition_rsvd_page_pa(buf, &cookie, &rp_pa, + &len); - dev_dbg(xpc_part, "SAL returned with status=%li, cookie=" - "0x%016lx, address=0x%016lx, len=0x%016lx\n", - status, cookie, rp_pa, len); + dev_dbg(xpc_part, "SAL returned with ret=%d, cookie=0x%016lx, " + "address=0x%016lx, len=0x%016lx\n", ret, + (unsigned long)cookie, rp_pa, len); - if (status != SALRET_MORE_PASSES) + if (ret != xpNeedMoreInfo) break; + /* !!! L1_CACHE_ALIGN() is only a sn2-bte_copy requirement */ if (L1_CACHE_ALIGN(len) > buf_len) { kfree(buf_base); buf_len = L1_CACHE_ALIGN(len); - buf = (u64)xpc_kmalloc_cacheline_aligned(buf_len, - GFP_KERNEL, - &buf_base); + buf = xpc_kmalloc_cacheline_aligned(buf_len, GFP_KERNEL, + &buf_base); if (buf_base == NULL) { dev_err(xpc_part, "unable to kmalloc " "len=0x%016lx\n", buf_len); - status = SALRET_ERROR; + ret = xpNoMemory; break; } } - bte_res = xp_bte_copy(rp_pa, buf, buf_len, - (BTE_NOTIFY | BTE_WACQUIRE), NULL); - if (bte_res != BTE_SUCCESS) { - dev_dbg(xpc_part, "xp_bte_copy failed %i\n", bte_res); - status = SALRET_ERROR; + ret = xp_remote_memcpy(xp_pa(buf), rp_pa, buf_len); + if (ret != xpSuccess) { + dev_dbg(xpc_part, "xp_remote_memcpy failed %d\n", ret); break; } } kfree(buf_base); - if (status != SALRET_OK) + if (ret != xpSuccess) rp_pa = 0; dev_dbg(xpc_part, "reserved page at phys address 0x%016lx\n", rp_pa); @@ -156,300 +124,77 @@ xpc_get_rsvd_page_pa(int nasid) * other partitions to discover we are alive and establish initial * communications. */ -struct xpc_rsvd_page * -xpc_rsvd_page_init(void) +int +xpc_setup_rsvd_page(void) { + int ret; struct xpc_rsvd_page *rp; - AMO_t *amos_page; - u64 rp_pa, nasid_array = 0; - int i, ret; + unsigned long rp_pa; + unsigned long new_ts_jiffies; /* get the local reserved page's address */ preempt_disable(); - rp_pa = xpc_get_rsvd_page_pa(cpuid_to_nasid(smp_processor_id())); + rp_pa = xpc_get_rsvd_page_pa(xp_cpu_to_nasid(smp_processor_id())); preempt_enable(); if (rp_pa == 0) { dev_err(xpc_part, "SAL failed to locate the reserved page\n"); - return NULL; + return -ESRCH; } rp = (struct xpc_rsvd_page *)__va(rp_pa); - if (rp->partid != sn_partition_id) { - dev_err(xpc_part, "the reserved page's partid of %d should be " - "%d\n", rp->partid, sn_partition_id); - return NULL; + if (rp->SAL_version < 3) { + /* SAL_versions < 3 had a SAL_partid defined as a u8 */ + rp->SAL_partid &= 0xff; + } + BUG_ON(rp->SAL_partid != xp_partition_id); + + if (rp->SAL_partid < 0 || rp->SAL_partid >= xp_max_npartitions) { + dev_err(xpc_part, "the reserved page's partid of %d is outside " + "supported range (< 0 || >= %d)\n", rp->SAL_partid, + xp_max_npartitions); + return -EINVAL; } rp->version = XPC_RP_VERSION; + rp->max_npartitions = xp_max_npartitions; /* establish the actual sizes of the nasid masks */ if (rp->SAL_version == 1) { /* SAL_version 1 didn't set the nasids_size field */ - rp->nasids_size = 128; + rp->SAL_nasids_size = 128; } - xp_nasid_mask_bytes = rp->nasids_size; - xp_nasid_mask_words = xp_nasid_mask_bytes / 8; + xpc_nasid_mask_nbytes = rp->SAL_nasids_size; + xpc_nasid_mask_nlongs = BITS_TO_LONGS(rp->SAL_nasids_size * + BITS_PER_BYTE); /* setup the pointers to the various items in the reserved page */ xpc_part_nasids = XPC_RP_PART_NASIDS(rp); xpc_mach_nasids = XPC_RP_MACH_NASIDS(rp); - xpc_vars = XPC_RP_VARS(rp); - xpc_vars_part = XPC_RP_VARS_PART(rp); - - /* - * Before clearing xpc_vars, see if a page of AMOs had been previously - * allocated. If not we'll need to allocate one and set permissions - * so that cross-partition AMOs are allowed. - * - * The allocated AMO page needs MCA reporting to remain disabled after - * XPC has unloaded. To make this work, we keep a copy of the pointer - * to this page (i.e., amos_page) in the struct xpc_vars structure, - * which is pointed to by the reserved page, and re-use that saved copy - * on subsequent loads of XPC. This AMO page is never freed, and its - * memory protections are never restricted. - */ - amos_page = xpc_vars->amos_page; - if (amos_page == NULL) { - amos_page = (AMO_t *)TO_AMO(uncached_alloc_page(0, 1)); - if (amos_page == NULL) { - dev_err(xpc_part, "can't allocate page of AMOs\n"); - return NULL; - } - - /* - * Open up AMO-R/W to cpu. This is done for Shub 1.1 systems - * when xpc_allow_IPI_ops() is called via xpc_hb_init(). - */ - if (!enable_shub_wars_1_1()) { - ret = sn_change_memprotect(ia64_tpa((u64)amos_page), - PAGE_SIZE, - SN_MEMPROT_ACCESS_CLASS_1, - &nasid_array); - if (ret != 0) { - dev_err(xpc_part, "can't change memory " - "protections\n"); - uncached_free_page(__IA64_UNCACHED_OFFSET | - TO_PHYS((u64)amos_page), 1); - return NULL; - } - } - } else if (!IS_AMO_ADDRESS((u64)amos_page)) { - /* - * EFI's XPBOOT can also set amos_page in the reserved page, - * but it happens to leave it as an uncached physical address - * and we need it to be an uncached virtual, so we'll have to - * convert it. - */ - if (!IS_AMO_PHYS_ADDRESS((u64)amos_page)) { - dev_err(xpc_part, "previously used amos_page address " - "is bad = 0x%p\n", (void *)amos_page); - return NULL; - } - amos_page = (AMO_t *)TO_AMO((u64)amos_page); - } - - /* clear xpc_vars */ - memset(xpc_vars, 0, sizeof(struct xpc_vars)); - - xpc_vars->version = XPC_V_VERSION; - xpc_vars->act_nasid = cpuid_to_nasid(0); - xpc_vars->act_phys_cpuid = cpu_physical_id(0); - xpc_vars->vars_part_pa = __pa(xpc_vars_part); - xpc_vars->amos_page_pa = ia64_tpa((u64)amos_page); - xpc_vars->amos_page = amos_page; /* save for next load of XPC */ - - /* clear xpc_vars_part */ - memset((u64 *)xpc_vars_part, 0, sizeof(struct xpc_vars_part) * - XP_MAX_PARTITIONS); - - /* initialize the activate IRQ related AMO variables */ - for (i = 0; i < xp_nasid_mask_words; i++) - (void)xpc_IPI_init(XPC_ACTIVATE_IRQ_AMOS + i); - - /* initialize the engaged remote partitions related AMO variables */ - (void)xpc_IPI_init(XPC_ENGAGED_PARTITIONS_AMO); - (void)xpc_IPI_init(XPC_DISENGAGE_REQUEST_AMO); - /* timestamp of when reserved page was setup by XPC */ - rp->stamp = CURRENT_TIME; + ret = xpc_setup_rsvd_page_sn(rp); + if (ret != 0) + return ret; /* + * Set timestamp of when reserved page was setup by XPC. * This signifies to the remote partition that our reserved * page is initialized. */ - rp->vars_pa = __pa(xpc_vars); + new_ts_jiffies = jiffies; + if (new_ts_jiffies == 0 || new_ts_jiffies == rp->ts_jiffies) + new_ts_jiffies++; + rp->ts_jiffies = new_ts_jiffies; - return rp; + xpc_rsvd_page = rp; + return 0; } -/* - * Change protections to allow IPI operations (and AMO operations on - * Shub 1.1 systems). - */ void -xpc_allow_IPI_ops(void) +xpc_teardown_rsvd_page(void) { - int node; - int nasid; - - /* >>> Change SH_IPI_ACCESS code to use SAL call once it is available */ - - if (is_shub2()) { - xpc_sh2_IPI_access0 = - (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS0)); - xpc_sh2_IPI_access1 = - (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS1)); - xpc_sh2_IPI_access2 = - (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS2)); - xpc_sh2_IPI_access3 = - (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS3)); - - for_each_online_node(node) { - nasid = cnodeid_to_nasid(node); - HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0), - -1UL); - HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1), - -1UL); - HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2), - -1UL); - HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3), - -1UL); - } - - } else { - xpc_sh1_IPI_access = - (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH1_IPI_ACCESS)); - - for_each_online_node(node) { - nasid = cnodeid_to_nasid(node); - HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS), - -1UL); - - /* - * Since the BIST collides with memory operations on - * SHUB 1.1 sn_change_memprotect() cannot be used. - */ - if (enable_shub_wars_1_1()) { - /* open up everything */ - xpc_prot_vec[node] = (u64)HUB_L((u64 *) - GLOBAL_MMR_ADDR - (nasid, - SH1_MD_DQLP_MMR_DIR_PRIVEC0)); - HUB_S((u64 *) - GLOBAL_MMR_ADDR(nasid, - SH1_MD_DQLP_MMR_DIR_PRIVEC0), - -1UL); - HUB_S((u64 *) - GLOBAL_MMR_ADDR(nasid, - SH1_MD_DQRP_MMR_DIR_PRIVEC0), - -1UL); - } - } - } -} - -/* - * Restrict protections to disallow IPI operations (and AMO operations on - * Shub 1.1 systems). - */ -void -xpc_restrict_IPI_ops(void) -{ - int node; - int nasid; - - /* >>> Change SH_IPI_ACCESS code to use SAL call once it is available */ - - if (is_shub2()) { - - for_each_online_node(node) { - nasid = cnodeid_to_nasid(node); - HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0), - xpc_sh2_IPI_access0); - HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1), - xpc_sh2_IPI_access1); - HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2), - xpc_sh2_IPI_access2); - HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3), - xpc_sh2_IPI_access3); - } - - } else { - - for_each_online_node(node) { - nasid = cnodeid_to_nasid(node); - HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS), - xpc_sh1_IPI_access); - - if (enable_shub_wars_1_1()) { - HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, - SH1_MD_DQLP_MMR_DIR_PRIVEC0), - xpc_prot_vec[node]); - HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, - SH1_MD_DQRP_MMR_DIR_PRIVEC0), - xpc_prot_vec[node]); - } - } - } -} - -/* - * At periodic intervals, scan through all active partitions and ensure - * their heartbeat is still active. If not, the partition is deactivated. - */ -void -xpc_check_remote_hb(void) -{ - struct xpc_vars *remote_vars; - struct xpc_partition *part; - short partid; - bte_result_t bres; - - remote_vars = (struct xpc_vars *)xpc_remote_copy_buffer; - - for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) { - - if (xpc_exiting) - break; - - if (partid == sn_partition_id) - continue; - - part = &xpc_partitions[partid]; - - if (part->act_state == XPC_P_INACTIVE || - part->act_state == XPC_P_DEACTIVATING) { - continue; - } - - /* pull the remote_hb cache line */ - bres = xp_bte_copy(part->remote_vars_pa, - (u64)remote_vars, - XPC_RP_VARS_SIZE, - (BTE_NOTIFY | BTE_WACQUIRE), NULL); - if (bres != BTE_SUCCESS) { - XPC_DEACTIVATE_PARTITION(part, - xpc_map_bte_errors(bres)); - continue; - } - - dev_dbg(xpc_part, "partid = %d, heartbeat = %ld, last_heartbeat" - " = %ld, heartbeat_offline = %ld, HB_mask = 0x%lx\n", - partid, remote_vars->heartbeat, part->last_heartbeat, - remote_vars->heartbeat_offline, - remote_vars->heartbeating_to_mask); - - if (((remote_vars->heartbeat == part->last_heartbeat) && - (remote_vars->heartbeat_offline == 0)) || - !xpc_hb_allowed(sn_partition_id, remote_vars)) { - - XPC_DEACTIVATE_PARTITION(part, xpNoHeartbeat); - continue; - } - - part->last_heartbeat = remote_vars->heartbeat; - } + /* a zero timestamp indicates our rsvd page is not initialized */ + xpc_rsvd_page->ts_jiffies = 0; } /* @@ -459,11 +204,12 @@ xpc_check_remote_hb(void) * is large enough to contain a copy of their reserved page header and * part_nasids mask. */ -static enum xp_retval -xpc_get_remote_rp(int nasid, u64 *discovered_nasids, - struct xpc_rsvd_page *remote_rp, u64 *remote_rp_pa) +enum xp_retval +xpc_get_remote_rp(int nasid, unsigned long *discovered_nasids, + struct xpc_rsvd_page *remote_rp, unsigned long *remote_rp_pa) { - int bres, i; + int l; + enum xp_retval ret; /* get the reserved page's physical address */ @@ -472,355 +218,45 @@ xpc_get_remote_rp(int nasid, u64 *discovered_nasids, return xpNoRsvdPageAddr; /* pull over the reserved page header and part_nasids mask */ - bres = xp_bte_copy(*remote_rp_pa, (u64)remote_rp, - XPC_RP_HEADER_SIZE + xp_nasid_mask_bytes, - (BTE_NOTIFY | BTE_WACQUIRE), NULL); - if (bres != BTE_SUCCESS) - return xpc_map_bte_errors(bres); + ret = xp_remote_memcpy(xp_pa(remote_rp), *remote_rp_pa, + XPC_RP_HEADER_SIZE + xpc_nasid_mask_nbytes); + if (ret != xpSuccess) + return ret; if (discovered_nasids != NULL) { - u64 *remote_part_nasids = XPC_RP_PART_NASIDS(remote_rp); - - for (i = 0; i < xp_nasid_mask_words; i++) - discovered_nasids[i] |= remote_part_nasids[i]; - } - - /* check that the partid is for another partition */ + unsigned long *remote_part_nasids = + XPC_RP_PART_NASIDS(remote_rp); - if (remote_rp->partid < 1 || - remote_rp->partid > (XP_MAX_PARTITIONS - 1)) { - return xpInvalidPartid; + for (l = 0; l < xpc_nasid_mask_nlongs; l++) + discovered_nasids[l] |= remote_part_nasids[l]; } - if (remote_rp->partid == sn_partition_id) - return xpLocalPartid; + /* zero timestamp indicates the reserved page has not been setup */ + if (remote_rp->ts_jiffies == 0) + return xpRsvdPageNotSet; if (XPC_VERSION_MAJOR(remote_rp->version) != XPC_VERSION_MAJOR(XPC_RP_VERSION)) { return xpBadVersion; } - return xpSuccess; -} - -/* - * Get a copy of the remote partition's XPC variables from the reserved page. - * - * remote_vars points to a buffer that is cacheline aligned for BTE copies and - * assumed to be of size XPC_RP_VARS_SIZE. - */ -static enum xp_retval -xpc_get_remote_vars(u64 remote_vars_pa, struct xpc_vars *remote_vars) -{ - int bres; - - if (remote_vars_pa == 0) - return xpVarsNotSet; - - /* pull over the cross partition variables */ - bres = xp_bte_copy(remote_vars_pa, (u64)remote_vars, XPC_RP_VARS_SIZE, - (BTE_NOTIFY | BTE_WACQUIRE), NULL); - if (bres != BTE_SUCCESS) - return xpc_map_bte_errors(bres); - - if (XPC_VERSION_MAJOR(remote_vars->version) != - XPC_VERSION_MAJOR(XPC_V_VERSION)) { - return xpBadVersion; - } - - return xpSuccess; -} - -/* - * Update the remote partition's info. - */ -static void -xpc_update_partition_info(struct xpc_partition *part, u8 remote_rp_version, - struct timespec *remote_rp_stamp, u64 remote_rp_pa, - u64 remote_vars_pa, struct xpc_vars *remote_vars) -{ - part->remote_rp_version = remote_rp_version; - dev_dbg(xpc_part, " remote_rp_version = 0x%016x\n", - part->remote_rp_version); - - part->remote_rp_stamp = *remote_rp_stamp; - dev_dbg(xpc_part, " remote_rp_stamp (tv_sec = 0x%lx tv_nsec = 0x%lx\n", - part->remote_rp_stamp.tv_sec, part->remote_rp_stamp.tv_nsec); - - part->remote_rp_pa = remote_rp_pa; - dev_dbg(xpc_part, " remote_rp_pa = 0x%016lx\n", part->remote_rp_pa); - - part->remote_vars_pa = remote_vars_pa; - dev_dbg(xpc_part, " remote_vars_pa = 0x%016lx\n", - part->remote_vars_pa); - - part->last_heartbeat = remote_vars->heartbeat; - dev_dbg(xpc_part, " last_heartbeat = 0x%016lx\n", - part->last_heartbeat); - - part->remote_vars_part_pa = remote_vars->vars_part_pa; - dev_dbg(xpc_part, " remote_vars_part_pa = 0x%016lx\n", - part->remote_vars_part_pa); - - part->remote_act_nasid = remote_vars->act_nasid; - dev_dbg(xpc_part, " remote_act_nasid = 0x%x\n", - part->remote_act_nasid); - - part->remote_act_phys_cpuid = remote_vars->act_phys_cpuid; - dev_dbg(xpc_part, " remote_act_phys_cpuid = 0x%x\n", - part->remote_act_phys_cpuid); - - part->remote_amos_page_pa = remote_vars->amos_page_pa; - dev_dbg(xpc_part, " remote_amos_page_pa = 0x%lx\n", - part->remote_amos_page_pa); - - part->remote_vars_version = remote_vars->version; - dev_dbg(xpc_part, " remote_vars_version = 0x%x\n", - part->remote_vars_version); -} - -/* - * Prior code has determined the nasid which generated an IPI. Inspect - * that nasid to determine if its partition needs to be activated or - * deactivated. - * - * A partition is consider "awaiting activation" if our partition - * flags indicate it is not active and it has a heartbeat. A - * partition is considered "awaiting deactivation" if our partition - * flags indicate it is active but it has no heartbeat or it is not - * sending its heartbeat to us. - * - * To determine the heartbeat, the remote nasid must have a properly - * initialized reserved page. - */ -static void -xpc_identify_act_IRQ_req(int nasid) -{ - struct xpc_rsvd_page *remote_rp; - struct xpc_vars *remote_vars; - u64 remote_rp_pa; - u64 remote_vars_pa; - int remote_rp_version; - int reactivate = 0; - int stamp_diff; - struct timespec remote_rp_stamp = { 0, 0 }; - short partid; - struct xpc_partition *part; - enum xp_retval ret; - - /* pull over the reserved page structure */ - - remote_rp = (struct xpc_rsvd_page *)xpc_remote_copy_buffer; - - ret = xpc_get_remote_rp(nasid, NULL, remote_rp, &remote_rp_pa); - if (ret != xpSuccess) { - dev_warn(xpc_part, "unable to get reserved page from nasid %d, " - "which sent interrupt, reason=%d\n", nasid, ret); - return; - } - - remote_vars_pa = remote_rp->vars_pa; - remote_rp_version = remote_rp->version; - if (XPC_SUPPORTS_RP_STAMP(remote_rp_version)) - remote_rp_stamp = remote_rp->stamp; - - partid = remote_rp->partid; - part = &xpc_partitions[partid]; - - /* pull over the cross partition variables */ - - remote_vars = (struct xpc_vars *)xpc_remote_copy_buffer; - - ret = xpc_get_remote_vars(remote_vars_pa, remote_vars); - if (ret != xpSuccess) { - - dev_warn(xpc_part, "unable to get XPC variables from nasid %d, " - "which sent interrupt, reason=%d\n", nasid, ret); - - XPC_DEACTIVATE_PARTITION(part, ret); - return; - } - - part->act_IRQ_rcvd++; - - dev_dbg(xpc_part, "partid for nasid %d is %d; IRQs = %d; HB = " - "%ld:0x%lx\n", (int)nasid, (int)partid, part->act_IRQ_rcvd, - remote_vars->heartbeat, remote_vars->heartbeating_to_mask); - - if (xpc_partition_disengaged(part) && - part->act_state == XPC_P_INACTIVE) { - - xpc_update_partition_info(part, remote_rp_version, - &remote_rp_stamp, remote_rp_pa, - remote_vars_pa, remote_vars); - - if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) { - if (xpc_partition_disengage_requested(1UL << partid)) { - /* - * Other side is waiting on us to disengage, - * even though we already have. - */ - return; - } - } else { - /* other side doesn't support disengage requests */ - xpc_clear_partition_disengage_request(1UL << partid); - } - - xpc_activate_partition(part); - return; - } - - DBUG_ON(part->remote_rp_version == 0); - DBUG_ON(part->remote_vars_version == 0); - - if (!XPC_SUPPORTS_RP_STAMP(part->remote_rp_version)) { - DBUG_ON(XPC_SUPPORTS_DISENGAGE_REQUEST(part-> - remote_vars_version)); - - if (!XPC_SUPPORTS_RP_STAMP(remote_rp_version)) { - DBUG_ON(XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars-> - version)); - /* see if the other side rebooted */ - if (part->remote_amos_page_pa == - remote_vars->amos_page_pa && - xpc_hb_allowed(sn_partition_id, remote_vars)) { - /* doesn't look that way, so ignore the IPI */ - return; - } - } - - /* - * Other side rebooted and previous XPC didn't support the - * disengage request, so we don't need to do anything special. - */ - - xpc_update_partition_info(part, remote_rp_version, - &remote_rp_stamp, remote_rp_pa, - remote_vars_pa, remote_vars); - part->reactivate_nasid = nasid; - XPC_DEACTIVATE_PARTITION(part, xpReactivating); - return; - } - - DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)); - - if (!XPC_SUPPORTS_RP_STAMP(remote_rp_version)) { - DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->version)); - - /* - * Other side rebooted and previous XPC did support the - * disengage request, but the new one doesn't. - */ - - xpc_clear_partition_engaged(1UL << partid); - xpc_clear_partition_disengage_request(1UL << partid); - - xpc_update_partition_info(part, remote_rp_version, - &remote_rp_stamp, remote_rp_pa, - remote_vars_pa, remote_vars); - reactivate = 1; - - } else { - DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->version)); - - stamp_diff = xpc_compare_stamps(&part->remote_rp_stamp, - &remote_rp_stamp); - if (stamp_diff != 0) { - DBUG_ON(stamp_diff >= 0); - - /* - * Other side rebooted and the previous XPC did support - * the disengage request, as does the new one. - */ - - DBUG_ON(xpc_partition_engaged(1UL << partid)); - DBUG_ON(xpc_partition_disengage_requested(1UL << - partid)); - - xpc_update_partition_info(part, remote_rp_version, - &remote_rp_stamp, - remote_rp_pa, remote_vars_pa, - remote_vars); - reactivate = 1; - } - } - - if (part->disengage_request_timeout > 0 && - !xpc_partition_disengaged(part)) { - /* still waiting on other side to disengage from us */ - return; - } - - if (reactivate) { - part->reactivate_nasid = nasid; - XPC_DEACTIVATE_PARTITION(part, xpReactivating); - - } else if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version) && - xpc_partition_disengage_requested(1UL << partid)) { - XPC_DEACTIVATE_PARTITION(part, xpOtherGoingDown); + /* check that both remote and local partids are valid for each side */ + if (remote_rp->SAL_partid < 0 || + remote_rp->SAL_partid >= xp_max_npartitions || + remote_rp->max_npartitions <= xp_partition_id) { + return xpInvalidPartid; } -} -/* - * Loop through the activation AMO variables and process any bits - * which are set. Each bit indicates a nasid sending a partition - * activation or deactivation request. - * - * Return #of IRQs detected. - */ -int -xpc_identify_act_IRQ_sender(void) -{ - int word, bit; - u64 nasid_mask; - u64 nasid; /* remote nasid */ - int n_IRQs_detected = 0; - AMO_t *act_amos; - - act_amos = xpc_vars->amos_page + XPC_ACTIVATE_IRQ_AMOS; - - /* scan through act AMO variable looking for non-zero entries */ - for (word = 0; word < xp_nasid_mask_words; word++) { - - if (xpc_exiting) - break; - - nasid_mask = xpc_IPI_receive(&act_amos[word]); - if (nasid_mask == 0) { - /* no IRQs from nasids in this variable */ - continue; - } - - dev_dbg(xpc_part, "AMO[%d] gave back 0x%lx\n", word, - nasid_mask); - - /* - * If this nasid has been added to the machine since - * our partition was reset, this will retain the - * remote nasid in our reserved pages machine mask. - * This is used in the event of module reload. - */ - xpc_mach_nasids[word] |= nasid_mask; - - /* locate the nasid(s) which sent interrupts */ + if (remote_rp->SAL_partid == xp_partition_id) + return xpLocalPartid; - for (bit = 0; bit < (8 * sizeof(u64)); bit++) { - if (nasid_mask & (1UL << bit)) { - n_IRQs_detected++; - nasid = XPC_NASID_FROM_W_B(word, bit); - dev_dbg(xpc_part, "interrupt from nasid %ld\n", - nasid); - xpc_identify_act_IRQ_req(nasid); - } - } - } - return n_IRQs_detected; + return xpSuccess; } /* - * See if the other side has responded to a partition disengage request - * from us. + * See if the other side has responded to a partition deactivate request + * from us. Though we requested the remote partition to deactivate with regard + * to us, we really only need to wait for the other side to disengage from us. */ int xpc_partition_disengaged(struct xpc_partition *part) @@ -828,41 +264,37 @@ xpc_partition_disengaged(struct xpc_partition *part) short partid = XPC_PARTID(part); int disengaged; - disengaged = (xpc_partition_engaged(1UL << partid) == 0); - if (part->disengage_request_timeout) { + disengaged = !xpc_partition_engaged(partid); + if (part->disengage_timeout) { if (!disengaged) { - if (time_before(jiffies, - part->disengage_request_timeout)) { + if (time_is_after_jiffies(part->disengage_timeout)) { /* timelimit hasn't been reached yet */ return 0; } /* - * Other side hasn't responded to our disengage + * Other side hasn't responded to our deactivate * request in a timely fashion, so assume it's dead. */ - dev_info(xpc_part, "disengage from remote partition %d " - "timed out\n", partid); - xpc_disengage_request_timedout = 1; - xpc_clear_partition_engaged(1UL << partid); + dev_info(xpc_part, "deactivate request to remote " + "partition %d timed out\n", partid); + xpc_disengage_timedout = 1; + xpc_assume_partition_disengaged(partid); disengaged = 1; } - part->disengage_request_timeout = 0; + part->disengage_timeout = 0; /* cancel the timer function, provided it's not us */ - if (!in_interrupt()) { - del_singleshot_timer_sync(&part-> - disengage_request_timer); - } + if (!in_interrupt()) + del_singleshot_timer_sync(&part->disengage_timer); - DBUG_ON(part->act_state != XPC_P_DEACTIVATING && - part->act_state != XPC_P_INACTIVE); - if (part->act_state != XPC_P_INACTIVE) + DBUG_ON(part->act_state != XPC_P_AS_DEACTIVATING && + part->act_state != XPC_P_AS_INACTIVE); + if (part->act_state != XPC_P_AS_INACTIVE) xpc_wakeup_channel_mgr(part); - if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) - xpc_cancel_partition_disengage_request(part); + xpc_cancel_partition_deactivation_request(part); } return disengaged; } @@ -879,8 +311,8 @@ xpc_mark_partition_active(struct xpc_partition *part) dev_dbg(xpc_part, "setting partition %d to ACTIVE\n", XPC_PARTID(part)); spin_lock_irqsave(&part->act_lock, irq_flags); - if (part->act_state == XPC_P_ACTIVATING) { - part->act_state = XPC_P_ACTIVE; + if (part->act_state == XPC_P_AS_ACTIVATING) { + part->act_state = XPC_P_AS_ACTIVE; ret = xpSuccess; } else { DBUG_ON(part->reason == xpSuccess); @@ -892,7 +324,7 @@ xpc_mark_partition_active(struct xpc_partition *part) } /* - * Notify XPC that the partition is down. + * Start the process of deactivating the specified partition. */ void xpc_deactivate_partition(const int line, struct xpc_partition *part, @@ -902,16 +334,16 @@ xpc_deactivate_partition(const int line, struct xpc_partition *part, spin_lock_irqsave(&part->act_lock, irq_flags); - if (part->act_state == XPC_P_INACTIVE) { + if (part->act_state == XPC_P_AS_INACTIVE) { XPC_SET_REASON(part, reason, line); spin_unlock_irqrestore(&part->act_lock, irq_flags); if (reason == xpReactivating) { /* we interrupt ourselves to reactivate partition */ - xpc_IPI_send_reactivate(part); + xpc_request_partition_reactivation(part); } return; } - if (part->act_state == XPC_P_DEACTIVATING) { + if (part->act_state == XPC_P_AS_DEACTIVATING) { if ((part->reason == xpUnloading && reason != xpUnloading) || reason == xpReactivating) { XPC_SET_REASON(part, reason, line); @@ -920,22 +352,18 @@ xpc_deactivate_partition(const int line, struct xpc_partition *part, return; } - part->act_state = XPC_P_DEACTIVATING; + part->act_state = XPC_P_AS_DEACTIVATING; XPC_SET_REASON(part, reason, line); spin_unlock_irqrestore(&part->act_lock, irq_flags); - if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) { - xpc_request_partition_disengage(part); - xpc_IPI_send_disengage(part); + /* ask remote partition to deactivate with regard to us */ + xpc_request_partition_deactivation(part); - /* set a timelimit on the disengage request */ - part->disengage_request_timeout = jiffies + - (xpc_disengage_request_timelimit * HZ); - part->disengage_request_timer.expires = - part->disengage_request_timeout; - add_timer(&part->disengage_request_timer); - } + /* set a timelimit on the disengage phase of the deactivation request */ + part->disengage_timeout = jiffies + (xpc_disengage_timelimit * HZ); + part->disengage_timer.expires = part->disengage_timeout; + add_timer(&part->disengage_timer); dev_dbg(xpc_part, "bringing partition %d down, reason = %d\n", XPC_PARTID(part), reason); @@ -955,7 +383,7 @@ xpc_mark_partition_inactive(struct xpc_partition *part) XPC_PARTID(part)); spin_lock_irqsave(&part->act_lock, irq_flags); - part->act_state = XPC_P_INACTIVE; + part->act_state = XPC_P_AS_INACTIVE; spin_unlock_irqrestore(&part->act_lock, irq_flags); part->remote_rp_pa = 0; } @@ -974,28 +402,22 @@ xpc_discovery(void) { void *remote_rp_base; struct xpc_rsvd_page *remote_rp; - struct xpc_vars *remote_vars; - u64 remote_rp_pa; - u64 remote_vars_pa; + unsigned long remote_rp_pa; int region; int region_size; int max_regions; int nasid; struct xpc_rsvd_page *rp; - short partid; - struct xpc_partition *part; - u64 *discovered_nasids; + unsigned long *discovered_nasids; enum xp_retval ret; remote_rp = xpc_kmalloc_cacheline_aligned(XPC_RP_HEADER_SIZE + - xp_nasid_mask_bytes, + xpc_nasid_mask_nbytes, GFP_KERNEL, &remote_rp_base); if (remote_rp == NULL) return; - remote_vars = (struct xpc_vars *)remote_rp; - - discovered_nasids = kzalloc(sizeof(u64) * xp_nasid_mask_words, + discovered_nasids = kzalloc(sizeof(long) * xpc_nasid_mask_nlongs, GFP_KERNEL); if (discovered_nasids == NULL) { kfree(remote_rp_base); @@ -1010,7 +432,7 @@ xpc_discovery(void) * protection is in regards to memory, IOI and IPI. */ max_regions = 64; - region_size = sn_region_size; + region_size = xp_region_size; switch (region_size) { case 128: @@ -1038,28 +460,28 @@ xpc_discovery(void) dev_dbg(xpc_part, "checking nasid %d\n", nasid); - if (XPC_NASID_IN_ARRAY(nasid, xpc_part_nasids)) { + if (test_bit(nasid / 2, xpc_part_nasids)) { dev_dbg(xpc_part, "PROM indicates Nasid %d is " "part of the local partition; skipping " "region\n", nasid); break; } - if (!(XPC_NASID_IN_ARRAY(nasid, xpc_mach_nasids))) { + if (!(test_bit(nasid / 2, xpc_mach_nasids))) { dev_dbg(xpc_part, "PROM indicates Nasid %d was " "not on Numa-Link network at reset\n", nasid); continue; } - if (XPC_NASID_IN_ARRAY(nasid, discovered_nasids)) { + if (test_bit(nasid / 2, discovered_nasids)) { dev_dbg(xpc_part, "Nasid %d is part of a " "partition which was previously " "discovered\n", nasid); continue; } - /* pull over the reserved page structure */ + /* pull over the rsvd page header & part_nasids mask */ ret = xpc_get_remote_rp(nasid, discovered_nasids, remote_rp, &remote_rp_pa); @@ -1074,72 +496,8 @@ xpc_discovery(void) continue; } - remote_vars_pa = remote_rp->vars_pa; - - partid = remote_rp->partid; - part = &xpc_partitions[partid]; - - /* pull over the cross partition variables */ - - ret = xpc_get_remote_vars(remote_vars_pa, remote_vars); - if (ret != xpSuccess) { - dev_dbg(xpc_part, "unable to get XPC variables " - "from nasid %d, reason=%d\n", nasid, - ret); - - XPC_DEACTIVATE_PARTITION(part, ret); - continue; - } - - if (part->act_state != XPC_P_INACTIVE) { - dev_dbg(xpc_part, "partition %d on nasid %d is " - "already activating\n", partid, nasid); - break; - } - - /* - * Register the remote partition's AMOs with SAL so it - * can handle and cleanup errors within that address - * range should the remote partition go down. We don't - * unregister this range because it is difficult to - * tell when outstanding writes to the remote partition - * are finished and thus when it is thus safe to - * unregister. This should not result in wasted space - * in the SAL xp_addr_region table because we should - * get the same page for remote_act_amos_pa after - * module reloads and system reboots. - */ - if (sn_register_xp_addr_region - (remote_vars->amos_page_pa, PAGE_SIZE, 1) < 0) { - dev_dbg(xpc_part, - "partition %d failed to " - "register xp_addr region 0x%016lx\n", - partid, remote_vars->amos_page_pa); - - XPC_SET_REASON(part, xpPhysAddrRegFailed, - __LINE__); - break; - } - - /* - * The remote nasid is valid and available. - * Send an interrupt to that nasid to notify - * it that we are ready to begin activation. - */ - dev_dbg(xpc_part, "sending an interrupt to AMO 0x%lx, " - "nasid %d, phys_cpuid 0x%x\n", - remote_vars->amos_page_pa, - remote_vars->act_nasid, - remote_vars->act_phys_cpuid); - - if (XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars-> - version)) { - part->remote_amos_page_pa = - remote_vars->amos_page_pa; - xpc_mark_partition_disengaged(part); - xpc_cancel_partition_disengage_request(part); - } - xpc_IPI_send_activate(remote_vars); + xpc_request_partition_activation(remote_rp, + remote_rp_pa, nasid); } } @@ -1155,20 +513,16 @@ enum xp_retval xpc_initiate_partid_to_nasids(short partid, void *nasid_mask) { struct xpc_partition *part; - u64 part_nasid_pa; - int bte_res; + unsigned long part_nasid_pa; part = &xpc_partitions[partid]; if (part->remote_rp_pa == 0) return xpPartitionDown; - memset(nasid_mask, 0, XP_NASID_MASK_BYTES); - - part_nasid_pa = (u64)XPC_RP_PART_NASIDS(part->remote_rp_pa); + memset(nasid_mask, 0, xpc_nasid_mask_nbytes); - bte_res = xp_bte_copy(part_nasid_pa, (u64)nasid_mask, - xp_nasid_mask_bytes, (BTE_NOTIFY | BTE_WACQUIRE), - NULL); + part_nasid_pa = (unsigned long)XPC_RP_PART_NASIDS(part->remote_rp_pa); - return xpc_map_bte_errors(bte_res); + return xp_remote_memcpy(xp_pa(nasid_mask), part_nasid_pa, + xpc_nasid_mask_nbytes); } diff --git a/drivers/misc/sgi-xp/xpc_sn2.c b/drivers/misc/sgi-xp/xpc_sn2.c new file mode 100644 index 00000000000..b4882ccf634 --- /dev/null +++ b/drivers/misc/sgi-xp/xpc_sn2.c @@ -0,0 +1,2404 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + */ + +/* + * Cross Partition Communication (XPC) sn2-based functions. + * + * Architecture specific implementation of common functions. + * + */ + +#include <linux/delay.h> +#include <asm/uncached.h> +#include <asm/sn/mspec.h> +#include <asm/sn/sn_sal.h> +#include "xpc.h" + +/* + * Define the number of u64s required to represent all the C-brick nasids + * as a bitmap. The cross-partition kernel modules deal only with + * C-brick nasids, thus the need for bitmaps which don't account for + * odd-numbered (non C-brick) nasids. + */ +#define XPC_MAX_PHYSNODES_SN2 (MAX_NUMALINK_NODES / 2) +#define XP_NASID_MASK_BYTES_SN2 ((XPC_MAX_PHYSNODES_SN2 + 7) / 8) +#define XP_NASID_MASK_WORDS_SN2 ((XPC_MAX_PHYSNODES_SN2 + 63) / 64) + +/* + * Memory for XPC's amo variables is allocated by the MSPEC driver. These + * pages are located in the lowest granule. The lowest granule uses 4k pages + * for cached references and an alternate TLB handler to never provide a + * cacheable mapping for the entire region. This will prevent speculative + * reading of cached copies of our lines from being issued which will cause + * a PI FSB Protocol error to be generated by the SHUB. For XPC, we need 64 + * amo variables (based on XP_MAX_NPARTITIONS_SN2) to identify the senders of + * NOTIFY IRQs, 128 amo variables (based on XP_NASID_MASK_WORDS_SN2) to identify + * the senders of ACTIVATE IRQs, 1 amo variable to identify which remote + * partitions (i.e., XPCs) consider themselves currently engaged with the + * local XPC and 1 amo variable to request partition deactivation. + */ +#define XPC_NOTIFY_IRQ_AMOS_SN2 0 +#define XPC_ACTIVATE_IRQ_AMOS_SN2 (XPC_NOTIFY_IRQ_AMOS_SN2 + \ + XP_MAX_NPARTITIONS_SN2) +#define XPC_ENGAGED_PARTITIONS_AMO_SN2 (XPC_ACTIVATE_IRQ_AMOS_SN2 + \ + XP_NASID_MASK_WORDS_SN2) +#define XPC_DEACTIVATE_REQUEST_AMO_SN2 (XPC_ENGAGED_PARTITIONS_AMO_SN2 + 1) + +/* + * Buffer used to store a local copy of portions of a remote partition's + * reserved page (either its header and part_nasids mask, or its vars). + */ +static void *xpc_remote_copy_buffer_base_sn2; +static char *xpc_remote_copy_buffer_sn2; + +static struct xpc_vars_sn2 *xpc_vars_sn2; +static struct xpc_vars_part_sn2 *xpc_vars_part_sn2; + +static int +xpc_setup_partitions_sn_sn2(void) +{ + /* nothing needs to be done */ + return 0; +} + +/* SH_IPI_ACCESS shub register value on startup */ +static u64 xpc_sh1_IPI_access_sn2; +static u64 xpc_sh2_IPI_access0_sn2; +static u64 xpc_sh2_IPI_access1_sn2; +static u64 xpc_sh2_IPI_access2_sn2; +static u64 xpc_sh2_IPI_access3_sn2; + +/* + * Change protections to allow IPI operations. + */ +static void +xpc_allow_IPI_ops_sn2(void) +{ + int node; + int nasid; + + /* !!! The following should get moved into SAL. */ + if (is_shub2()) { + xpc_sh2_IPI_access0_sn2 = + (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS0)); + xpc_sh2_IPI_access1_sn2 = + (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS1)); + xpc_sh2_IPI_access2_sn2 = + (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS2)); + xpc_sh2_IPI_access3_sn2 = + (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS3)); + + for_each_online_node(node) { + nasid = cnodeid_to_nasid(node); + HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0), + -1UL); + HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1), + -1UL); + HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2), + -1UL); + HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3), + -1UL); + } + } else { + xpc_sh1_IPI_access_sn2 = + (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH1_IPI_ACCESS)); + + for_each_online_node(node) { + nasid = cnodeid_to_nasid(node); + HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS), + -1UL); + } + } +} + +/* + * Restrict protections to disallow IPI operations. + */ +static void +xpc_disallow_IPI_ops_sn2(void) +{ + int node; + int nasid; + + /* !!! The following should get moved into SAL. */ + if (is_shub2()) { + for_each_online_node(node) { + nasid = cnodeid_to_nasid(node); + HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0), + xpc_sh2_IPI_access0_sn2); + HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1), + xpc_sh2_IPI_access1_sn2); + HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2), + xpc_sh2_IPI_access2_sn2); + HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3), + xpc_sh2_IPI_access3_sn2); + } + } else { + for_each_online_node(node) { + nasid = cnodeid_to_nasid(node); + HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS), + xpc_sh1_IPI_access_sn2); + } + } +} + +/* + * The following set of functions are used for the sending and receiving of + * IRQs (also known as IPIs). There are two flavors of IRQs, one that is + * associated with partition activity (SGI_XPC_ACTIVATE) and the other that + * is associated with channel activity (SGI_XPC_NOTIFY). + */ + +static u64 +xpc_receive_IRQ_amo_sn2(struct amo *amo) +{ + return FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_CLEAR); +} + +static enum xp_retval +xpc_send_IRQ_sn2(struct amo *amo, u64 flag, int nasid, int phys_cpuid, + int vector) +{ + int ret = 0; + unsigned long irq_flags; + + local_irq_save(irq_flags); + + FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_OR, flag); + sn_send_IPI_phys(nasid, phys_cpuid, vector, 0); + + /* + * We must always use the nofault function regardless of whether we + * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we + * didn't, we'd never know that the other partition is down and would + * keep sending IRQs and amos to it until the heartbeat times out. + */ + ret = xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->variable), + xp_nofault_PIOR_target)); + + local_irq_restore(irq_flags); + + return (ret == 0) ? xpSuccess : xpPioReadError; +} + +static struct amo * +xpc_init_IRQ_amo_sn2(int index) +{ + struct amo *amo = xpc_vars_sn2->amos_page + index; + + (void)xpc_receive_IRQ_amo_sn2(amo); /* clear amo variable */ + return amo; +} + +/* + * Functions associated with SGI_XPC_ACTIVATE IRQ. + */ + +/* + * Notify the heartbeat check thread that an activate IRQ has been received. + */ +static irqreturn_t +xpc_handle_activate_IRQ_sn2(int irq, void *dev_id) +{ + unsigned long irq_flags; + + spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags); + xpc_activate_IRQ_rcvd++; + spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags); + + wake_up_interruptible(&xpc_activate_IRQ_wq); + return IRQ_HANDLED; +} + +/* + * Flag the appropriate amo variable and send an IRQ to the specified node. + */ +static void +xpc_send_activate_IRQ_sn2(unsigned long amos_page_pa, int from_nasid, + int to_nasid, int to_phys_cpuid) +{ + struct amo *amos = (struct amo *)__va(amos_page_pa + + (XPC_ACTIVATE_IRQ_AMOS_SN2 * + sizeof(struct amo))); + + (void)xpc_send_IRQ_sn2(&amos[BIT_WORD(from_nasid / 2)], + BIT_MASK(from_nasid / 2), to_nasid, + to_phys_cpuid, SGI_XPC_ACTIVATE); +} + +static void +xpc_send_local_activate_IRQ_sn2(int from_nasid) +{ + unsigned long irq_flags; + struct amo *amos = (struct amo *)__va(xpc_vars_sn2->amos_page_pa + + (XPC_ACTIVATE_IRQ_AMOS_SN2 * + sizeof(struct amo))); + + /* fake the sending and receipt of an activate IRQ from remote nasid */ + FETCHOP_STORE_OP(TO_AMO((u64)&amos[BIT_WORD(from_nasid / 2)].variable), + FETCHOP_OR, BIT_MASK(from_nasid / 2)); + + spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags); + xpc_activate_IRQ_rcvd++; + spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags); + + wake_up_interruptible(&xpc_activate_IRQ_wq); +} + +/* + * Functions associated with SGI_XPC_NOTIFY IRQ. + */ + +/* + * Check to see if any chctl flags were sent from the specified partition. + */ +static void +xpc_check_for_sent_chctl_flags_sn2(struct xpc_partition *part) +{ + union xpc_channel_ctl_flags chctl; + unsigned long irq_flags; + + chctl.all_flags = xpc_receive_IRQ_amo_sn2(part->sn.sn2. + local_chctl_amo_va); + if (chctl.all_flags == 0) + return; + + spin_lock_irqsave(&part->chctl_lock, irq_flags); + part->chctl.all_flags |= chctl.all_flags; + spin_unlock_irqrestore(&part->chctl_lock, irq_flags); + + dev_dbg(xpc_chan, "received notify IRQ from partid=%d, chctl.all_flags=" + "0x%lx\n", XPC_PARTID(part), chctl.all_flags); + + xpc_wakeup_channel_mgr(part); +} + +/* + * Handle the receipt of a SGI_XPC_NOTIFY IRQ by seeing whether the specified + * partition actually sent it. Since SGI_XPC_NOTIFY IRQs may be shared by more + * than one partition, we use an amo structure per partition to indicate + * whether a partition has sent an IRQ or not. If it has, then wake up the + * associated kthread to handle it. + * + * All SGI_XPC_NOTIFY IRQs received by XPC are the result of IRQs sent by XPC + * running on other partitions. + * + * Noteworthy Arguments: + * + * irq - Interrupt ReQuest number. NOT USED. + * + * dev_id - partid of IRQ's potential sender. + */ +static irqreturn_t +xpc_handle_notify_IRQ_sn2(int irq, void *dev_id) +{ + short partid = (short)(u64)dev_id; + struct xpc_partition *part = &xpc_partitions[partid]; + + DBUG_ON(partid < 0 || partid >= XP_MAX_NPARTITIONS_SN2); + + if (xpc_part_ref(part)) { + xpc_check_for_sent_chctl_flags_sn2(part); + + xpc_part_deref(part); + } + return IRQ_HANDLED; +} + +/* + * Check to see if xpc_handle_notify_IRQ_sn2() dropped any IRQs on the floor + * because the write to their associated amo variable completed after the IRQ + * was received. + */ +static void +xpc_check_for_dropped_notify_IRQ_sn2(struct xpc_partition *part) +{ + struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2; + + if (xpc_part_ref(part)) { + xpc_check_for_sent_chctl_flags_sn2(part); + + part_sn2->dropped_notify_IRQ_timer.expires = jiffies + + XPC_DROPPED_NOTIFY_IRQ_WAIT_INTERVAL; + add_timer(&part_sn2->dropped_notify_IRQ_timer); + xpc_part_deref(part); + } +} + +/* + * Send a notify IRQ to the remote partition that is associated with the + * specified channel. + */ +static void +xpc_send_notify_IRQ_sn2(struct xpc_channel *ch, u8 chctl_flag, + char *chctl_flag_string, unsigned long *irq_flags) +{ + struct xpc_partition *part = &xpc_partitions[ch->partid]; + struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2; + union xpc_channel_ctl_flags chctl = { 0 }; + enum xp_retval ret; + + if (likely(part->act_state != XPC_P_AS_DEACTIVATING)) { + chctl.flags[ch->number] = chctl_flag; + ret = xpc_send_IRQ_sn2(part_sn2->remote_chctl_amo_va, + chctl.all_flags, + part_sn2->notify_IRQ_nasid, + part_sn2->notify_IRQ_phys_cpuid, + SGI_XPC_NOTIFY); + dev_dbg(xpc_chan, "%s sent to partid=%d, channel=%d, ret=%d\n", + chctl_flag_string, ch->partid, ch->number, ret); + if (unlikely(ret != xpSuccess)) { + if (irq_flags != NULL) + spin_unlock_irqrestore(&ch->lock, *irq_flags); + XPC_DEACTIVATE_PARTITION(part, ret); + if (irq_flags != NULL) + spin_lock_irqsave(&ch->lock, *irq_flags); + } + } +} + +#define XPC_SEND_NOTIFY_IRQ_SN2(_ch, _ipi_f, _irq_f) \ + xpc_send_notify_IRQ_sn2(_ch, _ipi_f, #_ipi_f, _irq_f) + +/* + * Make it look like the remote partition, which is associated with the + * specified channel, sent us a notify IRQ. This faked IRQ will be handled + * by xpc_check_for_dropped_notify_IRQ_sn2(). + */ +static void +xpc_send_local_notify_IRQ_sn2(struct xpc_channel *ch, u8 chctl_flag, + char *chctl_flag_string) +{ + struct xpc_partition *part = &xpc_partitions[ch->partid]; + union xpc_channel_ctl_flags chctl = { 0 }; + + chctl.flags[ch->number] = chctl_flag; + FETCHOP_STORE_OP(TO_AMO((u64)&part->sn.sn2.local_chctl_amo_va-> + variable), FETCHOP_OR, chctl.all_flags); + dev_dbg(xpc_chan, "%s sent local from partid=%d, channel=%d\n", + chctl_flag_string, ch->partid, ch->number); +} + +#define XPC_SEND_LOCAL_NOTIFY_IRQ_SN2(_ch, _ipi_f) \ + xpc_send_local_notify_IRQ_sn2(_ch, _ipi_f, #_ipi_f) + +static void +xpc_send_chctl_closerequest_sn2(struct xpc_channel *ch, + unsigned long *irq_flags) +{ + struct xpc_openclose_args *args = ch->sn.sn2.local_openclose_args; + + args->reason = ch->reason; + XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_CLOSEREQUEST, irq_flags); +} + +static void +xpc_send_chctl_closereply_sn2(struct xpc_channel *ch, unsigned long *irq_flags) +{ + XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_CLOSEREPLY, irq_flags); +} + +static void +xpc_send_chctl_openrequest_sn2(struct xpc_channel *ch, unsigned long *irq_flags) +{ + struct xpc_openclose_args *args = ch->sn.sn2.local_openclose_args; + + args->entry_size = ch->entry_size; + args->local_nentries = ch->local_nentries; + XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_OPENREQUEST, irq_flags); +} + +static void +xpc_send_chctl_openreply_sn2(struct xpc_channel *ch, unsigned long *irq_flags) +{ + struct xpc_openclose_args *args = ch->sn.sn2.local_openclose_args; + + args->remote_nentries = ch->remote_nentries; + args->local_nentries = ch->local_nentries; + args->local_msgqueue_pa = xp_pa(ch->sn.sn2.local_msgqueue); + XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_OPENREPLY, irq_flags); +} + +static void +xpc_send_chctl_msgrequest_sn2(struct xpc_channel *ch) +{ + XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_MSGREQUEST, NULL); +} + +static void +xpc_send_chctl_local_msgrequest_sn2(struct xpc_channel *ch) +{ + XPC_SEND_LOCAL_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_MSGREQUEST); +} + +static void +xpc_save_remote_msgqueue_pa_sn2(struct xpc_channel *ch, + unsigned long msgqueue_pa) +{ + ch->sn.sn2.remote_msgqueue_pa = msgqueue_pa; +} + +/* + * This next set of functions are used to keep track of when a partition is + * potentially engaged in accessing memory belonging to another partition. + */ + +static void +xpc_indicate_partition_engaged_sn2(struct xpc_partition *part) +{ + unsigned long irq_flags; + struct amo *amo = (struct amo *)__va(part->sn.sn2.remote_amos_page_pa + + (XPC_ENGAGED_PARTITIONS_AMO_SN2 * + sizeof(struct amo))); + + local_irq_save(irq_flags); + + /* set bit corresponding to our partid in remote partition's amo */ + FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_OR, + BIT(sn_partition_id)); + + /* + * We must always use the nofault function regardless of whether we + * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we + * didn't, we'd never know that the other partition is down and would + * keep sending IRQs and amos to it until the heartbeat times out. + */ + (void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo-> + variable), + xp_nofault_PIOR_target)); + + local_irq_restore(irq_flags); +} + +static void +xpc_indicate_partition_disengaged_sn2(struct xpc_partition *part) +{ + struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2; + unsigned long irq_flags; + struct amo *amo = (struct amo *)__va(part_sn2->remote_amos_page_pa + + (XPC_ENGAGED_PARTITIONS_AMO_SN2 * + sizeof(struct amo))); + + local_irq_save(irq_flags); + + /* clear bit corresponding to our partid in remote partition's amo */ + FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND, + ~BIT(sn_partition_id)); + + /* + * We must always use the nofault function regardless of whether we + * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we + * didn't, we'd never know that the other partition is down and would + * keep sending IRQs and amos to it until the heartbeat times out. + */ + (void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo-> + variable), + xp_nofault_PIOR_target)); + + local_irq_restore(irq_flags); + + /* + * Send activate IRQ to get other side to see that we've cleared our + * bit in their engaged partitions amo. + */ + xpc_send_activate_IRQ_sn2(part_sn2->remote_amos_page_pa, + cnodeid_to_nasid(0), + part_sn2->activate_IRQ_nasid, + part_sn2->activate_IRQ_phys_cpuid); +} + +static void +xpc_assume_partition_disengaged_sn2(short partid) +{ + struct amo *amo = xpc_vars_sn2->amos_page + + XPC_ENGAGED_PARTITIONS_AMO_SN2; + + /* clear bit(s) based on partid mask in our partition's amo */ + FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND, + ~BIT(partid)); +} + +static int +xpc_partition_engaged_sn2(short partid) +{ + struct amo *amo = xpc_vars_sn2->amos_page + + XPC_ENGAGED_PARTITIONS_AMO_SN2; + + /* our partition's amo variable ANDed with partid mask */ + return (FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_LOAD) & + BIT(partid)) != 0; +} + +static int +xpc_any_partition_engaged_sn2(void) +{ + struct amo *amo = xpc_vars_sn2->amos_page + + XPC_ENGAGED_PARTITIONS_AMO_SN2; + + /* our partition's amo variable */ + return FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_LOAD) != 0; +} + +/* original protection values for each node */ +static u64 xpc_prot_vec_sn2[MAX_NUMNODES]; + +/* + * Change protections to allow amo operations on non-Shub 1.1 systems. + */ +static enum xp_retval +xpc_allow_amo_ops_sn2(struct amo *amos_page) +{ + u64 nasid_array = 0; + int ret; + + /* + * On SHUB 1.1, we cannot call sn_change_memprotect() since the BIST + * collides with memory operations. On those systems we call + * xpc_allow_amo_ops_shub_wars_1_1_sn2() instead. + */ + if (!enable_shub_wars_1_1()) { + ret = sn_change_memprotect(ia64_tpa((u64)amos_page), PAGE_SIZE, + SN_MEMPROT_ACCESS_CLASS_1, + &nasid_array); + if (ret != 0) + return xpSalError; + } + return xpSuccess; +} + +/* + * Change protections to allow amo operations on Shub 1.1 systems. + */ +static void +xpc_allow_amo_ops_shub_wars_1_1_sn2(void) +{ + int node; + int nasid; + + if (!enable_shub_wars_1_1()) + return; + + for_each_online_node(node) { + nasid = cnodeid_to_nasid(node); + /* save current protection values */ + xpc_prot_vec_sn2[node] = + (u64)HUB_L((u64 *)GLOBAL_MMR_ADDR(nasid, + SH1_MD_DQLP_MMR_DIR_PRIVEC0)); + /* open up everything */ + HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, + SH1_MD_DQLP_MMR_DIR_PRIVEC0), + -1UL); + HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, + SH1_MD_DQRP_MMR_DIR_PRIVEC0), + -1UL); + } +} + +static enum xp_retval +xpc_get_partition_rsvd_page_pa_sn2(void *buf, u64 *cookie, unsigned long *rp_pa, + size_t *len) +{ + s64 status; + enum xp_retval ret; + + status = sn_partition_reserved_page_pa((u64)buf, cookie, rp_pa, len); + if (status == SALRET_OK) + ret = xpSuccess; + else if (status == SALRET_MORE_PASSES) + ret = xpNeedMoreInfo; + else + ret = xpSalError; + + return ret; +} + + +static int +xpc_setup_rsvd_page_sn_sn2(struct xpc_rsvd_page *rp) +{ + struct amo *amos_page; + int i; + int ret; + + xpc_vars_sn2 = XPC_RP_VARS(rp); + + rp->sn.vars_pa = xp_pa(xpc_vars_sn2); + + /* vars_part array follows immediately after vars */ + xpc_vars_part_sn2 = (struct xpc_vars_part_sn2 *)((u8 *)XPC_RP_VARS(rp) + + XPC_RP_VARS_SIZE); + + /* + * Before clearing xpc_vars_sn2, see if a page of amos had been + * previously allocated. If not we'll need to allocate one and set + * permissions so that cross-partition amos are allowed. + * + * The allocated amo page needs MCA reporting to remain disabled after + * XPC has unloaded. To make this work, we keep a copy of the pointer + * to this page (i.e., amos_page) in the struct xpc_vars_sn2 structure, + * which is pointed to by the reserved page, and re-use that saved copy + * on subsequent loads of XPC. This amo page is never freed, and its + * memory protections are never restricted. + */ + amos_page = xpc_vars_sn2->amos_page; + if (amos_page == NULL) { + amos_page = (struct amo *)TO_AMO(uncached_alloc_page(0, 1)); + if (amos_page == NULL) { + dev_err(xpc_part, "can't allocate page of amos\n"); + return -ENOMEM; + } + + /* + * Open up amo-R/W to cpu. This is done on Shub 1.1 systems + * when xpc_allow_amo_ops_shub_wars_1_1_sn2() is called. + */ + ret = xpc_allow_amo_ops_sn2(amos_page); + if (ret != xpSuccess) { + dev_err(xpc_part, "can't allow amo operations\n"); + uncached_free_page(__IA64_UNCACHED_OFFSET | + TO_PHYS((u64)amos_page), 1); + return -EPERM; + } + } + + /* clear xpc_vars_sn2 */ + memset(xpc_vars_sn2, 0, sizeof(struct xpc_vars_sn2)); + + xpc_vars_sn2->version = XPC_V_VERSION; + xpc_vars_sn2->activate_IRQ_nasid = cpuid_to_nasid(0); + xpc_vars_sn2->activate_IRQ_phys_cpuid = cpu_physical_id(0); + xpc_vars_sn2->vars_part_pa = xp_pa(xpc_vars_part_sn2); + xpc_vars_sn2->amos_page_pa = ia64_tpa((u64)amos_page); + xpc_vars_sn2->amos_page = amos_page; /* save for next load of XPC */ + + /* clear xpc_vars_part_sn2 */ + memset((u64 *)xpc_vars_part_sn2, 0, sizeof(struct xpc_vars_part_sn2) * + XP_MAX_NPARTITIONS_SN2); + + /* initialize the activate IRQ related amo variables */ + for (i = 0; i < xpc_nasid_mask_nlongs; i++) + (void)xpc_init_IRQ_amo_sn2(XPC_ACTIVATE_IRQ_AMOS_SN2 + i); + + /* initialize the engaged remote partitions related amo variables */ + (void)xpc_init_IRQ_amo_sn2(XPC_ENGAGED_PARTITIONS_AMO_SN2); + (void)xpc_init_IRQ_amo_sn2(XPC_DEACTIVATE_REQUEST_AMO_SN2); + + return 0; +} + +static void +xpc_increment_heartbeat_sn2(void) +{ + xpc_vars_sn2->heartbeat++; +} + +static void +xpc_offline_heartbeat_sn2(void) +{ + xpc_increment_heartbeat_sn2(); + xpc_vars_sn2->heartbeat_offline = 1; +} + +static void +xpc_online_heartbeat_sn2(void) +{ + xpc_increment_heartbeat_sn2(); + xpc_vars_sn2->heartbeat_offline = 0; +} + +static void +xpc_heartbeat_init_sn2(void) +{ + DBUG_ON(xpc_vars_sn2 == NULL); + + bitmap_zero(xpc_vars_sn2->heartbeating_to_mask, XP_MAX_NPARTITIONS_SN2); + xpc_heartbeating_to_mask = &xpc_vars_sn2->heartbeating_to_mask[0]; + xpc_online_heartbeat_sn2(); +} + +static void +xpc_heartbeat_exit_sn2(void) +{ + xpc_offline_heartbeat_sn2(); +} + +static enum xp_retval +xpc_get_remote_heartbeat_sn2(struct xpc_partition *part) +{ + struct xpc_vars_sn2 *remote_vars; + enum xp_retval ret; + + remote_vars = (struct xpc_vars_sn2 *)xpc_remote_copy_buffer_sn2; + + /* pull the remote vars structure that contains the heartbeat */ + ret = xp_remote_memcpy(xp_pa(remote_vars), + part->sn.sn2.remote_vars_pa, + XPC_RP_VARS_SIZE); + if (ret != xpSuccess) + return ret; + + dev_dbg(xpc_part, "partid=%d, heartbeat=%ld, last_heartbeat=%ld, " + "heartbeat_offline=%ld, HB_mask[0]=0x%lx\n", XPC_PARTID(part), + remote_vars->heartbeat, part->last_heartbeat, + remote_vars->heartbeat_offline, + remote_vars->heartbeating_to_mask[0]); + + if ((remote_vars->heartbeat == part->last_heartbeat && + remote_vars->heartbeat_offline == 0) || + !xpc_hb_allowed(sn_partition_id, + &remote_vars->heartbeating_to_mask)) { + ret = xpNoHeartbeat; + } else { + part->last_heartbeat = remote_vars->heartbeat; + } + + return ret; +} + +/* + * Get a copy of the remote partition's XPC variables from the reserved page. + * + * remote_vars points to a buffer that is cacheline aligned for BTE copies and + * assumed to be of size XPC_RP_VARS_SIZE. + */ +static enum xp_retval +xpc_get_remote_vars_sn2(unsigned long remote_vars_pa, + struct xpc_vars_sn2 *remote_vars) +{ + enum xp_retval ret; + + if (remote_vars_pa == 0) + return xpVarsNotSet; + + /* pull over the cross partition variables */ + ret = xp_remote_memcpy(xp_pa(remote_vars), remote_vars_pa, + XPC_RP_VARS_SIZE); + if (ret != xpSuccess) + return ret; + + if (XPC_VERSION_MAJOR(remote_vars->version) != + XPC_VERSION_MAJOR(XPC_V_VERSION)) { + return xpBadVersion; + } + + return xpSuccess; +} + +static void +xpc_request_partition_activation_sn2(struct xpc_rsvd_page *remote_rp, + unsigned long remote_rp_pa, int nasid) +{ + xpc_send_local_activate_IRQ_sn2(nasid); +} + +static void +xpc_request_partition_reactivation_sn2(struct xpc_partition *part) +{ + xpc_send_local_activate_IRQ_sn2(part->sn.sn2.activate_IRQ_nasid); +} + +static void +xpc_request_partition_deactivation_sn2(struct xpc_partition *part) +{ + struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2; + unsigned long irq_flags; + struct amo *amo = (struct amo *)__va(part_sn2->remote_amos_page_pa + + (XPC_DEACTIVATE_REQUEST_AMO_SN2 * + sizeof(struct amo))); + + local_irq_save(irq_flags); + + /* set bit corresponding to our partid in remote partition's amo */ + FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_OR, + BIT(sn_partition_id)); + + /* + * We must always use the nofault function regardless of whether we + * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we + * didn't, we'd never know that the other partition is down and would + * keep sending IRQs and amos to it until the heartbeat times out. + */ + (void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo-> + variable), + xp_nofault_PIOR_target)); + + local_irq_restore(irq_flags); + + /* + * Send activate IRQ to get other side to see that we've set our + * bit in their deactivate request amo. + */ + xpc_send_activate_IRQ_sn2(part_sn2->remote_amos_page_pa, + cnodeid_to_nasid(0), + part_sn2->activate_IRQ_nasid, + part_sn2->activate_IRQ_phys_cpuid); +} + +static void +xpc_cancel_partition_deactivation_request_sn2(struct xpc_partition *part) +{ + unsigned long irq_flags; + struct amo *amo = (struct amo *)__va(part->sn.sn2.remote_amos_page_pa + + (XPC_DEACTIVATE_REQUEST_AMO_SN2 * + sizeof(struct amo))); + + local_irq_save(irq_flags); + + /* clear bit corresponding to our partid in remote partition's amo */ + FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND, + ~BIT(sn_partition_id)); + + /* + * We must always use the nofault function regardless of whether we + * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we + * didn't, we'd never know that the other partition is down and would + * keep sending IRQs and amos to it until the heartbeat times out. + */ + (void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo-> + variable), + xp_nofault_PIOR_target)); + + local_irq_restore(irq_flags); +} + +static int +xpc_partition_deactivation_requested_sn2(short partid) +{ + struct amo *amo = xpc_vars_sn2->amos_page + + XPC_DEACTIVATE_REQUEST_AMO_SN2; + + /* our partition's amo variable ANDed with partid mask */ + return (FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_LOAD) & + BIT(partid)) != 0; +} + +/* + * Update the remote partition's info. + */ +static void +xpc_update_partition_info_sn2(struct xpc_partition *part, u8 remote_rp_version, + unsigned long *remote_rp_ts_jiffies, + unsigned long remote_rp_pa, + unsigned long remote_vars_pa, + struct xpc_vars_sn2 *remote_vars) +{ + struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2; + + part->remote_rp_version = remote_rp_version; + dev_dbg(xpc_part, " remote_rp_version = 0x%016x\n", + part->remote_rp_version); + + part->remote_rp_ts_jiffies = *remote_rp_ts_jiffies; + dev_dbg(xpc_part, " remote_rp_ts_jiffies = 0x%016lx\n", + part->remote_rp_ts_jiffies); + + part->remote_rp_pa = remote_rp_pa; + dev_dbg(xpc_part, " remote_rp_pa = 0x%016lx\n", part->remote_rp_pa); + + part_sn2->remote_vars_pa = remote_vars_pa; + dev_dbg(xpc_part, " remote_vars_pa = 0x%016lx\n", + part_sn2->remote_vars_pa); + + part->last_heartbeat = remote_vars->heartbeat; + dev_dbg(xpc_part, " last_heartbeat = 0x%016lx\n", + part->last_heartbeat); + + part_sn2->remote_vars_part_pa = remote_vars->vars_part_pa; + dev_dbg(xpc_part, " remote_vars_part_pa = 0x%016lx\n", + part_sn2->remote_vars_part_pa); + + part_sn2->activate_IRQ_nasid = remote_vars->activate_IRQ_nasid; + dev_dbg(xpc_part, " activate_IRQ_nasid = 0x%x\n", + part_sn2->activate_IRQ_nasid); + + part_sn2->activate_IRQ_phys_cpuid = + remote_vars->activate_IRQ_phys_cpuid; + dev_dbg(xpc_part, " activate_IRQ_phys_cpuid = 0x%x\n", + part_sn2->activate_IRQ_phys_cpuid); + + part_sn2->remote_amos_page_pa = remote_vars->amos_page_pa; + dev_dbg(xpc_part, " remote_amos_page_pa = 0x%lx\n", + part_sn2->remote_amos_page_pa); + + part_sn2->remote_vars_version = remote_vars->version; + dev_dbg(xpc_part, " remote_vars_version = 0x%x\n", + part_sn2->remote_vars_version); +} + +/* + * Prior code has determined the nasid which generated a activate IRQ. + * Inspect that nasid to determine if its partition needs to be activated + * or deactivated. + * + * A partition is considered "awaiting activation" if our partition + * flags indicate it is not active and it has a heartbeat. A + * partition is considered "awaiting deactivation" if our partition + * flags indicate it is active but it has no heartbeat or it is not + * sending its heartbeat to us. + * + * To determine the heartbeat, the remote nasid must have a properly + * initialized reserved page. + */ +static void +xpc_identify_activate_IRQ_req_sn2(int nasid) +{ + struct xpc_rsvd_page *remote_rp; + struct xpc_vars_sn2 *remote_vars; + unsigned long remote_rp_pa; + unsigned long remote_vars_pa; + int remote_rp_version; + int reactivate = 0; + unsigned long remote_rp_ts_jiffies = 0; + short partid; + struct xpc_partition *part; + struct xpc_partition_sn2 *part_sn2; + enum xp_retval ret; + + /* pull over the reserved page structure */ + + remote_rp = (struct xpc_rsvd_page *)xpc_remote_copy_buffer_sn2; + + ret = xpc_get_remote_rp(nasid, NULL, remote_rp, &remote_rp_pa); + if (ret != xpSuccess) { + dev_warn(xpc_part, "unable to get reserved page from nasid %d, " + "which sent interrupt, reason=%d\n", nasid, ret); + return; + } + + remote_vars_pa = remote_rp->sn.vars_pa; + remote_rp_version = remote_rp->version; + remote_rp_ts_jiffies = remote_rp->ts_jiffies; + + partid = remote_rp->SAL_partid; + part = &xpc_partitions[partid]; + part_sn2 = &part->sn.sn2; + + /* pull over the cross partition variables */ + + remote_vars = (struct xpc_vars_sn2 *)xpc_remote_copy_buffer_sn2; + + ret = xpc_get_remote_vars_sn2(remote_vars_pa, remote_vars); + if (ret != xpSuccess) { + dev_warn(xpc_part, "unable to get XPC variables from nasid %d, " + "which sent interrupt, reason=%d\n", nasid, ret); + + XPC_DEACTIVATE_PARTITION(part, ret); + return; + } + + part->activate_IRQ_rcvd++; + + dev_dbg(xpc_part, "partid for nasid %d is %d; IRQs = %d; HB = " + "%ld:0x%lx\n", (int)nasid, (int)partid, part->activate_IRQ_rcvd, + remote_vars->heartbeat, remote_vars->heartbeating_to_mask[0]); + + if (xpc_partition_disengaged(part) && + part->act_state == XPC_P_AS_INACTIVE) { + + xpc_update_partition_info_sn2(part, remote_rp_version, + &remote_rp_ts_jiffies, + remote_rp_pa, remote_vars_pa, + remote_vars); + + if (xpc_partition_deactivation_requested_sn2(partid)) { + /* + * Other side is waiting on us to deactivate even though + * we already have. + */ + return; + } + + xpc_activate_partition(part); + return; + } + + DBUG_ON(part->remote_rp_version == 0); + DBUG_ON(part_sn2->remote_vars_version == 0); + + if (remote_rp_ts_jiffies != part->remote_rp_ts_jiffies) { + + /* the other side rebooted */ + + DBUG_ON(xpc_partition_engaged_sn2(partid)); + DBUG_ON(xpc_partition_deactivation_requested_sn2(partid)); + + xpc_update_partition_info_sn2(part, remote_rp_version, + &remote_rp_ts_jiffies, + remote_rp_pa, remote_vars_pa, + remote_vars); + reactivate = 1; + } + + if (part->disengage_timeout > 0 && !xpc_partition_disengaged(part)) { + /* still waiting on other side to disengage from us */ + return; + } + + if (reactivate) + XPC_DEACTIVATE_PARTITION(part, xpReactivating); + else if (xpc_partition_deactivation_requested_sn2(partid)) + XPC_DEACTIVATE_PARTITION(part, xpOtherGoingDown); +} + +/* + * Loop through the activation amo variables and process any bits + * which are set. Each bit indicates a nasid sending a partition + * activation or deactivation request. + * + * Return #of IRQs detected. + */ +int +xpc_identify_activate_IRQ_sender_sn2(void) +{ + int l; + int b; + unsigned long nasid_mask_long; + u64 nasid; /* remote nasid */ + int n_IRQs_detected = 0; + struct amo *act_amos; + + act_amos = xpc_vars_sn2->amos_page + XPC_ACTIVATE_IRQ_AMOS_SN2; + + /* scan through activate amo variables looking for non-zero entries */ + for (l = 0; l < xpc_nasid_mask_nlongs; l++) { + + if (xpc_exiting) + break; + + nasid_mask_long = xpc_receive_IRQ_amo_sn2(&act_amos[l]); + + b = find_first_bit(&nasid_mask_long, BITS_PER_LONG); + if (b >= BITS_PER_LONG) { + /* no IRQs from nasids in this amo variable */ + continue; + } + + dev_dbg(xpc_part, "amo[%d] gave back 0x%lx\n", l, + nasid_mask_long); + + /* + * If this nasid has been added to the machine since + * our partition was reset, this will retain the + * remote nasid in our reserved pages machine mask. + * This is used in the event of module reload. + */ + xpc_mach_nasids[l] |= nasid_mask_long; + + /* locate the nasid(s) which sent interrupts */ + + do { + n_IRQs_detected++; + nasid = (l * BITS_PER_LONG + b) * 2; + dev_dbg(xpc_part, "interrupt from nasid %ld\n", nasid); + xpc_identify_activate_IRQ_req_sn2(nasid); + + b = find_next_bit(&nasid_mask_long, BITS_PER_LONG, + b + 1); + } while (b < BITS_PER_LONG); + } + return n_IRQs_detected; +} + +static void +xpc_process_activate_IRQ_rcvd_sn2(void) +{ + unsigned long irq_flags; + int n_IRQs_expected; + int n_IRQs_detected; + + DBUG_ON(xpc_activate_IRQ_rcvd == 0); + + spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags); + n_IRQs_expected = xpc_activate_IRQ_rcvd; + xpc_activate_IRQ_rcvd = 0; + spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags); + + n_IRQs_detected = xpc_identify_activate_IRQ_sender_sn2(); + if (n_IRQs_detected < n_IRQs_expected) { + /* retry once to help avoid missing amo */ + (void)xpc_identify_activate_IRQ_sender_sn2(); + } +} + +/* + * Setup the channel structures that are sn2 specific. + */ +static enum xp_retval +xpc_setup_ch_structures_sn_sn2(struct xpc_partition *part) +{ + struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2; + struct xpc_channel_sn2 *ch_sn2; + enum xp_retval retval; + int ret; + int cpuid; + int ch_number; + struct timer_list *timer; + short partid = XPC_PARTID(part); + + /* allocate all the required GET/PUT values */ + + part_sn2->local_GPs = + xpc_kzalloc_cacheline_aligned(XPC_GP_SIZE, GFP_KERNEL, + &part_sn2->local_GPs_base); + if (part_sn2->local_GPs == NULL) { + dev_err(xpc_chan, "can't get memory for local get/put " + "values\n"); + return xpNoMemory; + } + + part_sn2->remote_GPs = + xpc_kzalloc_cacheline_aligned(XPC_GP_SIZE, GFP_KERNEL, + &part_sn2->remote_GPs_base); + if (part_sn2->remote_GPs == NULL) { + dev_err(xpc_chan, "can't get memory for remote get/put " + "values\n"); + retval = xpNoMemory; + goto out_1; + } + + part_sn2->remote_GPs_pa = 0; + + /* allocate all the required open and close args */ + + part_sn2->local_openclose_args = + xpc_kzalloc_cacheline_aligned(XPC_OPENCLOSE_ARGS_SIZE, + GFP_KERNEL, &part_sn2-> + local_openclose_args_base); + if (part_sn2->local_openclose_args == NULL) { + dev_err(xpc_chan, "can't get memory for local connect args\n"); + retval = xpNoMemory; + goto out_2; + } + + part_sn2->remote_openclose_args_pa = 0; + + part_sn2->local_chctl_amo_va = xpc_init_IRQ_amo_sn2(partid); + + part_sn2->notify_IRQ_nasid = 0; + part_sn2->notify_IRQ_phys_cpuid = 0; + part_sn2->remote_chctl_amo_va = NULL; + + sprintf(part_sn2->notify_IRQ_owner, "xpc%02d", partid); + ret = request_irq(SGI_XPC_NOTIFY, xpc_handle_notify_IRQ_sn2, + IRQF_SHARED, part_sn2->notify_IRQ_owner, + (void *)(u64)partid); + if (ret != 0) { + dev_err(xpc_chan, "can't register NOTIFY IRQ handler, " + "errno=%d\n", -ret); + retval = xpLackOfResources; + goto out_3; + } + + /* Setup a timer to check for dropped notify IRQs */ + timer = &part_sn2->dropped_notify_IRQ_timer; + init_timer(timer); + timer->function = + (void (*)(unsigned long))xpc_check_for_dropped_notify_IRQ_sn2; + timer->data = (unsigned long)part; + timer->expires = jiffies + XPC_DROPPED_NOTIFY_IRQ_WAIT_INTERVAL; + add_timer(timer); + + for (ch_number = 0; ch_number < part->nchannels; ch_number++) { + ch_sn2 = &part->channels[ch_number].sn.sn2; + + ch_sn2->local_GP = &part_sn2->local_GPs[ch_number]; + ch_sn2->local_openclose_args = + &part_sn2->local_openclose_args[ch_number]; + + mutex_init(&ch_sn2->msg_to_pull_mutex); + } + + /* + * Setup the per partition specific variables required by the + * remote partition to establish channel connections with us. + * + * The setting of the magic # indicates that these per partition + * specific variables are ready to be used. + */ + xpc_vars_part_sn2[partid].GPs_pa = xp_pa(part_sn2->local_GPs); + xpc_vars_part_sn2[partid].openclose_args_pa = + xp_pa(part_sn2->local_openclose_args); + xpc_vars_part_sn2[partid].chctl_amo_pa = + xp_pa(part_sn2->local_chctl_amo_va); + cpuid = raw_smp_processor_id(); /* any CPU in this partition will do */ + xpc_vars_part_sn2[partid].notify_IRQ_nasid = cpuid_to_nasid(cpuid); + xpc_vars_part_sn2[partid].notify_IRQ_phys_cpuid = + cpu_physical_id(cpuid); + xpc_vars_part_sn2[partid].nchannels = part->nchannels; + xpc_vars_part_sn2[partid].magic = XPC_VP_MAGIC1_SN2; + + return xpSuccess; + + /* setup of ch structures failed */ +out_3: + kfree(part_sn2->local_openclose_args_base); + part_sn2->local_openclose_args = NULL; +out_2: + kfree(part_sn2->remote_GPs_base); + part_sn2->remote_GPs = NULL; +out_1: + kfree(part_sn2->local_GPs_base); + part_sn2->local_GPs = NULL; + return retval; +} + +/* + * Teardown the channel structures that are sn2 specific. + */ +static void +xpc_teardown_ch_structures_sn_sn2(struct xpc_partition *part) +{ + struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2; + short partid = XPC_PARTID(part); + + /* + * Indicate that the variables specific to the remote partition are no + * longer available for its use. + */ + xpc_vars_part_sn2[partid].magic = 0; + + /* in case we've still got outstanding timers registered... */ + del_timer_sync(&part_sn2->dropped_notify_IRQ_timer); + free_irq(SGI_XPC_NOTIFY, (void *)(u64)partid); + + kfree(part_sn2->local_openclose_args_base); + part_sn2->local_openclose_args = NULL; + kfree(part_sn2->remote_GPs_base); + part_sn2->remote_GPs = NULL; + kfree(part_sn2->local_GPs_base); + part_sn2->local_GPs = NULL; + part_sn2->local_chctl_amo_va = NULL; +} + +/* + * Create a wrapper that hides the underlying mechanism for pulling a cacheline + * (or multiple cachelines) from a remote partition. + * + * src_pa must be a cacheline aligned physical address on the remote partition. + * dst must be a cacheline aligned virtual address on this partition. + * cnt must be cacheline sized + */ +/* ??? Replace this function by call to xp_remote_memcpy() or bte_copy()? */ +static enum xp_retval +xpc_pull_remote_cachelines_sn2(struct xpc_partition *part, void *dst, + const unsigned long src_pa, size_t cnt) +{ + enum xp_retval ret; + + DBUG_ON(src_pa != L1_CACHE_ALIGN(src_pa)); + DBUG_ON((unsigned long)dst != L1_CACHE_ALIGN((unsigned long)dst)); + DBUG_ON(cnt != L1_CACHE_ALIGN(cnt)); + + if (part->act_state == XPC_P_AS_DEACTIVATING) + return part->reason; + + ret = xp_remote_memcpy(xp_pa(dst), src_pa, cnt); + if (ret != xpSuccess) { + dev_dbg(xpc_chan, "xp_remote_memcpy() from partition %d failed," + " ret=%d\n", XPC_PARTID(part), ret); + } + return ret; +} + +/* + * Pull the remote per partition specific variables from the specified + * partition. + */ +static enum xp_retval +xpc_pull_remote_vars_part_sn2(struct xpc_partition *part) +{ + struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2; + u8 buffer[L1_CACHE_BYTES * 2]; + struct xpc_vars_part_sn2 *pulled_entry_cacheline = + (struct xpc_vars_part_sn2 *)L1_CACHE_ALIGN((u64)buffer); + struct xpc_vars_part_sn2 *pulled_entry; + unsigned long remote_entry_cacheline_pa; + unsigned long remote_entry_pa; + short partid = XPC_PARTID(part); + enum xp_retval ret; + + /* pull the cacheline that contains the variables we're interested in */ + + DBUG_ON(part_sn2->remote_vars_part_pa != + L1_CACHE_ALIGN(part_sn2->remote_vars_part_pa)); + DBUG_ON(sizeof(struct xpc_vars_part_sn2) != L1_CACHE_BYTES / 2); + + remote_entry_pa = part_sn2->remote_vars_part_pa + + sn_partition_id * sizeof(struct xpc_vars_part_sn2); + + remote_entry_cacheline_pa = (remote_entry_pa & ~(L1_CACHE_BYTES - 1)); + + pulled_entry = (struct xpc_vars_part_sn2 *)((u64)pulled_entry_cacheline + + (remote_entry_pa & + (L1_CACHE_BYTES - 1))); + + ret = xpc_pull_remote_cachelines_sn2(part, pulled_entry_cacheline, + remote_entry_cacheline_pa, + L1_CACHE_BYTES); + if (ret != xpSuccess) { + dev_dbg(xpc_chan, "failed to pull XPC vars_part from " + "partition %d, ret=%d\n", partid, ret); + return ret; + } + + /* see if they've been set up yet */ + + if (pulled_entry->magic != XPC_VP_MAGIC1_SN2 && + pulled_entry->magic != XPC_VP_MAGIC2_SN2) { + + if (pulled_entry->magic != 0) { + dev_dbg(xpc_chan, "partition %d's XPC vars_part for " + "partition %d has bad magic value (=0x%lx)\n", + partid, sn_partition_id, pulled_entry->magic); + return xpBadMagic; + } + + /* they've not been initialized yet */ + return xpRetry; + } + + if (xpc_vars_part_sn2[partid].magic == XPC_VP_MAGIC1_SN2) { + + /* validate the variables */ + + if (pulled_entry->GPs_pa == 0 || + pulled_entry->openclose_args_pa == 0 || + pulled_entry->chctl_amo_pa == 0) { + + dev_err(xpc_chan, "partition %d's XPC vars_part for " + "partition %d are not valid\n", partid, + sn_partition_id); + return xpInvalidAddress; + } + + /* the variables we imported look to be valid */ + + part_sn2->remote_GPs_pa = pulled_entry->GPs_pa; + part_sn2->remote_openclose_args_pa = + pulled_entry->openclose_args_pa; + part_sn2->remote_chctl_amo_va = + (struct amo *)__va(pulled_entry->chctl_amo_pa); + part_sn2->notify_IRQ_nasid = pulled_entry->notify_IRQ_nasid; + part_sn2->notify_IRQ_phys_cpuid = + pulled_entry->notify_IRQ_phys_cpuid; + + if (part->nchannels > pulled_entry->nchannels) + part->nchannels = pulled_entry->nchannels; + + /* let the other side know that we've pulled their variables */ + + xpc_vars_part_sn2[partid].magic = XPC_VP_MAGIC2_SN2; + } + + if (pulled_entry->magic == XPC_VP_MAGIC1_SN2) + return xpRetry; + + return xpSuccess; +} + +/* + * Establish first contact with the remote partititon. This involves pulling + * the XPC per partition variables from the remote partition and waiting for + * the remote partition to pull ours. + */ +static enum xp_retval +xpc_make_first_contact_sn2(struct xpc_partition *part) +{ + struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2; + enum xp_retval ret; + + /* + * Register the remote partition's amos with SAL so it can handle + * and cleanup errors within that address range should the remote + * partition go down. We don't unregister this range because it is + * difficult to tell when outstanding writes to the remote partition + * are finished and thus when it is safe to unregister. This should + * not result in wasted space in the SAL xp_addr_region table because + * we should get the same page for remote_amos_page_pa after module + * reloads and system reboots. + */ + if (sn_register_xp_addr_region(part_sn2->remote_amos_page_pa, + PAGE_SIZE, 1) < 0) { + dev_warn(xpc_part, "xpc_activating(%d) failed to register " + "xp_addr region\n", XPC_PARTID(part)); + + ret = xpPhysAddrRegFailed; + XPC_DEACTIVATE_PARTITION(part, ret); + return ret; + } + + /* + * Send activate IRQ to get other side to activate if they've not + * already begun to do so. + */ + xpc_send_activate_IRQ_sn2(part_sn2->remote_amos_page_pa, + cnodeid_to_nasid(0), + part_sn2->activate_IRQ_nasid, + part_sn2->activate_IRQ_phys_cpuid); + + while ((ret = xpc_pull_remote_vars_part_sn2(part)) != xpSuccess) { + if (ret != xpRetry) { + XPC_DEACTIVATE_PARTITION(part, ret); + return ret; + } + + dev_dbg(xpc_part, "waiting to make first contact with " + "partition %d\n", XPC_PARTID(part)); + + /* wait a 1/4 of a second or so */ + (void)msleep_interruptible(250); + + if (part->act_state == XPC_P_AS_DEACTIVATING) + return part->reason; + } + + return xpSuccess; +} + +/* + * Get the chctl flags and pull the openclose args and/or remote GPs as needed. + */ +static u64 +xpc_get_chctl_all_flags_sn2(struct xpc_partition *part) +{ + struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2; + unsigned long irq_flags; + union xpc_channel_ctl_flags chctl; + enum xp_retval ret; + + /* + * See if there are any chctl flags to be handled. + */ + + spin_lock_irqsave(&part->chctl_lock, irq_flags); + chctl = part->chctl; + if (chctl.all_flags != 0) + part->chctl.all_flags = 0; + + spin_unlock_irqrestore(&part->chctl_lock, irq_flags); + + if (xpc_any_openclose_chctl_flags_set(&chctl)) { + ret = xpc_pull_remote_cachelines_sn2(part, part-> + remote_openclose_args, + part_sn2-> + remote_openclose_args_pa, + XPC_OPENCLOSE_ARGS_SIZE); + if (ret != xpSuccess) { + XPC_DEACTIVATE_PARTITION(part, ret); + + dev_dbg(xpc_chan, "failed to pull openclose args from " + "partition %d, ret=%d\n", XPC_PARTID(part), + ret); + + /* don't bother processing chctl flags anymore */ + chctl.all_flags = 0; + } + } + + if (xpc_any_msg_chctl_flags_set(&chctl)) { + ret = xpc_pull_remote_cachelines_sn2(part, part_sn2->remote_GPs, + part_sn2->remote_GPs_pa, + XPC_GP_SIZE); + if (ret != xpSuccess) { + XPC_DEACTIVATE_PARTITION(part, ret); + + dev_dbg(xpc_chan, "failed to pull GPs from partition " + "%d, ret=%d\n", XPC_PARTID(part), ret); + + /* don't bother processing chctl flags anymore */ + chctl.all_flags = 0; + } + } + + return chctl.all_flags; +} + +/* + * Allocate the local message queue and the notify queue. + */ +static enum xp_retval +xpc_allocate_local_msgqueue_sn2(struct xpc_channel *ch) +{ + struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2; + unsigned long irq_flags; + int nentries; + size_t nbytes; + + for (nentries = ch->local_nentries; nentries > 0; nentries--) { + + nbytes = nentries * ch->entry_size; + ch_sn2->local_msgqueue = + xpc_kzalloc_cacheline_aligned(nbytes, GFP_KERNEL, + &ch_sn2->local_msgqueue_base); + if (ch_sn2->local_msgqueue == NULL) + continue; + + nbytes = nentries * sizeof(struct xpc_notify_sn2); + ch_sn2->notify_queue = kzalloc(nbytes, GFP_KERNEL); + if (ch_sn2->notify_queue == NULL) { + kfree(ch_sn2->local_msgqueue_base); + ch_sn2->local_msgqueue = NULL; + continue; + } + + spin_lock_irqsave(&ch->lock, irq_flags); + if (nentries < ch->local_nentries) { + dev_dbg(xpc_chan, "nentries=%d local_nentries=%d, " + "partid=%d, channel=%d\n", nentries, + ch->local_nentries, ch->partid, ch->number); + + ch->local_nentries = nentries; + } + spin_unlock_irqrestore(&ch->lock, irq_flags); + return xpSuccess; + } + + dev_dbg(xpc_chan, "can't get memory for local message queue and notify " + "queue, partid=%d, channel=%d\n", ch->partid, ch->number); + return xpNoMemory; +} + +/* + * Allocate the cached remote message queue. + */ +static enum xp_retval +xpc_allocate_remote_msgqueue_sn2(struct xpc_channel *ch) +{ + struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2; + unsigned long irq_flags; + int nentries; + size_t nbytes; + + DBUG_ON(ch->remote_nentries <= 0); + + for (nentries = ch->remote_nentries; nentries > 0; nentries--) { + + nbytes = nentries * ch->entry_size; + ch_sn2->remote_msgqueue = + xpc_kzalloc_cacheline_aligned(nbytes, GFP_KERNEL, &ch_sn2-> + remote_msgqueue_base); + if (ch_sn2->remote_msgqueue == NULL) + continue; + + spin_lock_irqsave(&ch->lock, irq_flags); + if (nentries < ch->remote_nentries) { + dev_dbg(xpc_chan, "nentries=%d remote_nentries=%d, " + "partid=%d, channel=%d\n", nentries, + ch->remote_nentries, ch->partid, ch->number); + + ch->remote_nentries = nentries; + } + spin_unlock_irqrestore(&ch->lock, irq_flags); + return xpSuccess; + } + + dev_dbg(xpc_chan, "can't get memory for cached remote message queue, " + "partid=%d, channel=%d\n", ch->partid, ch->number); + return xpNoMemory; +} + +/* + * Allocate message queues and other stuff associated with a channel. + * + * Note: Assumes all of the channel sizes are filled in. + */ +static enum xp_retval +xpc_setup_msg_structures_sn2(struct xpc_channel *ch) +{ + struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2; + enum xp_retval ret; + + DBUG_ON(ch->flags & XPC_C_SETUP); + + ret = xpc_allocate_local_msgqueue_sn2(ch); + if (ret == xpSuccess) { + + ret = xpc_allocate_remote_msgqueue_sn2(ch); + if (ret != xpSuccess) { + kfree(ch_sn2->local_msgqueue_base); + ch_sn2->local_msgqueue = NULL; + kfree(ch_sn2->notify_queue); + ch_sn2->notify_queue = NULL; + } + } + return ret; +} + +/* + * Free up message queues and other stuff that were allocated for the specified + * channel. + */ +static void +xpc_teardown_msg_structures_sn2(struct xpc_channel *ch) +{ + struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2; + + DBUG_ON(!spin_is_locked(&ch->lock)); + + ch_sn2->remote_msgqueue_pa = 0; + + ch_sn2->local_GP->get = 0; + ch_sn2->local_GP->put = 0; + ch_sn2->remote_GP.get = 0; + ch_sn2->remote_GP.put = 0; + ch_sn2->w_local_GP.get = 0; + ch_sn2->w_local_GP.put = 0; + ch_sn2->w_remote_GP.get = 0; + ch_sn2->w_remote_GP.put = 0; + ch_sn2->next_msg_to_pull = 0; + + if (ch->flags & XPC_C_SETUP) { + dev_dbg(xpc_chan, "ch->flags=0x%x, partid=%d, channel=%d\n", + ch->flags, ch->partid, ch->number); + + kfree(ch_sn2->local_msgqueue_base); + ch_sn2->local_msgqueue = NULL; + kfree(ch_sn2->remote_msgqueue_base); + ch_sn2->remote_msgqueue = NULL; + kfree(ch_sn2->notify_queue); + ch_sn2->notify_queue = NULL; + } +} + +/* + * Notify those who wanted to be notified upon delivery of their message. + */ +static void +xpc_notify_senders_sn2(struct xpc_channel *ch, enum xp_retval reason, s64 put) +{ + struct xpc_notify_sn2 *notify; + u8 notify_type; + s64 get = ch->sn.sn2.w_remote_GP.get - 1; + + while (++get < put && atomic_read(&ch->n_to_notify) > 0) { + + notify = &ch->sn.sn2.notify_queue[get % ch->local_nentries]; + + /* + * See if the notify entry indicates it was associated with + * a message who's sender wants to be notified. It is possible + * that it is, but someone else is doing or has done the + * notification. + */ + notify_type = notify->type; + if (notify_type == 0 || + cmpxchg(¬ify->type, notify_type, 0) != notify_type) { + continue; + } + + DBUG_ON(notify_type != XPC_N_CALL); + + atomic_dec(&ch->n_to_notify); + + if (notify->func != NULL) { + dev_dbg(xpc_chan, "notify->func() called, notify=0x%p " + "msg_number=%ld partid=%d channel=%d\n", + (void *)notify, get, ch->partid, ch->number); + + notify->func(reason, ch->partid, ch->number, + notify->key); + + dev_dbg(xpc_chan, "notify->func() returned, notify=0x%p" + " msg_number=%ld partid=%d channel=%d\n", + (void *)notify, get, ch->partid, ch->number); + } + } +} + +static void +xpc_notify_senders_of_disconnect_sn2(struct xpc_channel *ch) +{ + xpc_notify_senders_sn2(ch, ch->reason, ch->sn.sn2.w_local_GP.put); +} + +/* + * Clear some of the msg flags in the local message queue. + */ +static inline void +xpc_clear_local_msgqueue_flags_sn2(struct xpc_channel *ch) +{ + struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2; + struct xpc_msg_sn2 *msg; + s64 get; + + get = ch_sn2->w_remote_GP.get; + do { + msg = (struct xpc_msg_sn2 *)((u64)ch_sn2->local_msgqueue + + (get % ch->local_nentries) * + ch->entry_size); + msg->flags = 0; + } while (++get < ch_sn2->remote_GP.get); +} + +/* + * Clear some of the msg flags in the remote message queue. + */ +static inline void +xpc_clear_remote_msgqueue_flags_sn2(struct xpc_channel *ch) +{ + struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2; + struct xpc_msg_sn2 *msg; + s64 put; + + put = ch_sn2->w_remote_GP.put; + do { + msg = (struct xpc_msg_sn2 *)((u64)ch_sn2->remote_msgqueue + + (put % ch->remote_nentries) * + ch->entry_size); + msg->flags = 0; + } while (++put < ch_sn2->remote_GP.put); +} + +static int +xpc_n_of_deliverable_payloads_sn2(struct xpc_channel *ch) +{ + return ch->sn.sn2.w_remote_GP.put - ch->sn.sn2.w_local_GP.get; +} + +static void +xpc_process_msg_chctl_flags_sn2(struct xpc_partition *part, int ch_number) +{ + struct xpc_channel *ch = &part->channels[ch_number]; + struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2; + int npayloads_sent; + + ch_sn2->remote_GP = part->sn.sn2.remote_GPs[ch_number]; + + /* See what, if anything, has changed for each connected channel */ + + xpc_msgqueue_ref(ch); + + if (ch_sn2->w_remote_GP.get == ch_sn2->remote_GP.get && + ch_sn2->w_remote_GP.put == ch_sn2->remote_GP.put) { + /* nothing changed since GPs were last pulled */ + xpc_msgqueue_deref(ch); + return; + } + + if (!(ch->flags & XPC_C_CONNECTED)) { + xpc_msgqueue_deref(ch); + return; + } + + /* + * First check to see if messages recently sent by us have been + * received by the other side. (The remote GET value will have + * changed since we last looked at it.) + */ + + if (ch_sn2->w_remote_GP.get != ch_sn2->remote_GP.get) { + + /* + * We need to notify any senders that want to be notified + * that their sent messages have been received by their + * intended recipients. We need to do this before updating + * w_remote_GP.get so that we don't allocate the same message + * queue entries prematurely (see xpc_allocate_msg()). + */ + if (atomic_read(&ch->n_to_notify) > 0) { + /* + * Notify senders that messages sent have been + * received and delivered by the other side. + */ + xpc_notify_senders_sn2(ch, xpMsgDelivered, + ch_sn2->remote_GP.get); + } + + /* + * Clear msg->flags in previously sent messages, so that + * they're ready for xpc_allocate_msg(). + */ + xpc_clear_local_msgqueue_flags_sn2(ch); + + ch_sn2->w_remote_GP.get = ch_sn2->remote_GP.get; + + dev_dbg(xpc_chan, "w_remote_GP.get changed to %ld, partid=%d, " + "channel=%d\n", ch_sn2->w_remote_GP.get, ch->partid, + ch->number); + + /* + * If anyone was waiting for message queue entries to become + * available, wake them up. + */ + if (atomic_read(&ch->n_on_msg_allocate_wq) > 0) + wake_up(&ch->msg_allocate_wq); + } + + /* + * Now check for newly sent messages by the other side. (The remote + * PUT value will have changed since we last looked at it.) + */ + + if (ch_sn2->w_remote_GP.put != ch_sn2->remote_GP.put) { + /* + * Clear msg->flags in previously received messages, so that + * they're ready for xpc_get_deliverable_payload_sn2(). + */ + xpc_clear_remote_msgqueue_flags_sn2(ch); + + ch_sn2->w_remote_GP.put = ch_sn2->remote_GP.put; + + dev_dbg(xpc_chan, "w_remote_GP.put changed to %ld, partid=%d, " + "channel=%d\n", ch_sn2->w_remote_GP.put, ch->partid, + ch->number); + + npayloads_sent = xpc_n_of_deliverable_payloads_sn2(ch); + if (npayloads_sent > 0) { + dev_dbg(xpc_chan, "msgs waiting to be copied and " + "delivered=%d, partid=%d, channel=%d\n", + npayloads_sent, ch->partid, ch->number); + + if (ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) + xpc_activate_kthreads(ch, npayloads_sent); + } + } + + xpc_msgqueue_deref(ch); +} + +static struct xpc_msg_sn2 * +xpc_pull_remote_msg_sn2(struct xpc_channel *ch, s64 get) +{ + struct xpc_partition *part = &xpc_partitions[ch->partid]; + struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2; + unsigned long remote_msg_pa; + struct xpc_msg_sn2 *msg; + u32 msg_index; + u32 nmsgs; + u64 msg_offset; + enum xp_retval ret; + + if (mutex_lock_interruptible(&ch_sn2->msg_to_pull_mutex) != 0) { + /* we were interrupted by a signal */ + return NULL; + } + + while (get >= ch_sn2->next_msg_to_pull) { + + /* pull as many messages as are ready and able to be pulled */ + + msg_index = ch_sn2->next_msg_to_pull % ch->remote_nentries; + + DBUG_ON(ch_sn2->next_msg_to_pull >= ch_sn2->w_remote_GP.put); + nmsgs = ch_sn2->w_remote_GP.put - ch_sn2->next_msg_to_pull; + if (msg_index + nmsgs > ch->remote_nentries) { + /* ignore the ones that wrap the msg queue for now */ + nmsgs = ch->remote_nentries - msg_index; + } + + msg_offset = msg_index * ch->entry_size; + msg = (struct xpc_msg_sn2 *)((u64)ch_sn2->remote_msgqueue + + msg_offset); + remote_msg_pa = ch_sn2->remote_msgqueue_pa + msg_offset; + + ret = xpc_pull_remote_cachelines_sn2(part, msg, remote_msg_pa, + nmsgs * ch->entry_size); + if (ret != xpSuccess) { + + dev_dbg(xpc_chan, "failed to pull %d msgs starting with" + " msg %ld from partition %d, channel=%d, " + "ret=%d\n", nmsgs, ch_sn2->next_msg_to_pull, + ch->partid, ch->number, ret); + + XPC_DEACTIVATE_PARTITION(part, ret); + + mutex_unlock(&ch_sn2->msg_to_pull_mutex); + return NULL; + } + + ch_sn2->next_msg_to_pull += nmsgs; + } + + mutex_unlock(&ch_sn2->msg_to_pull_mutex); + + /* return the message we were looking for */ + msg_offset = (get % ch->remote_nentries) * ch->entry_size; + msg = (struct xpc_msg_sn2 *)((u64)ch_sn2->remote_msgqueue + msg_offset); + + return msg; +} + +/* + * Get the next deliverable message's payload. + */ +static void * +xpc_get_deliverable_payload_sn2(struct xpc_channel *ch) +{ + struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2; + struct xpc_msg_sn2 *msg; + void *payload = NULL; + s64 get; + + do { + if (ch->flags & XPC_C_DISCONNECTING) + break; + + get = ch_sn2->w_local_GP.get; + rmb(); /* guarantee that .get loads before .put */ + if (get == ch_sn2->w_remote_GP.put) + break; + + /* There are messages waiting to be pulled and delivered. + * We need to try to secure one for ourselves. We'll do this + * by trying to increment w_local_GP.get and hope that no one + * else beats us to it. If they do, we'll we'll simply have + * to try again for the next one. + */ + + if (cmpxchg(&ch_sn2->w_local_GP.get, get, get + 1) == get) { + /* we got the entry referenced by get */ + + dev_dbg(xpc_chan, "w_local_GP.get changed to %ld, " + "partid=%d, channel=%d\n", get + 1, + ch->partid, ch->number); + + /* pull the message from the remote partition */ + + msg = xpc_pull_remote_msg_sn2(ch, get); + + DBUG_ON(msg != NULL && msg->number != get); + DBUG_ON(msg != NULL && (msg->flags & XPC_M_SN2_DONE)); + DBUG_ON(msg != NULL && !(msg->flags & XPC_M_SN2_READY)); + + payload = &msg->payload; + break; + } + + } while (1); + + return payload; +} + +/* + * Now we actually send the messages that are ready to be sent by advancing + * the local message queue's Put value and then send a chctl msgrequest to the + * recipient partition. + */ +static void +xpc_send_msgs_sn2(struct xpc_channel *ch, s64 initial_put) +{ + struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2; + struct xpc_msg_sn2 *msg; + s64 put = initial_put + 1; + int send_msgrequest = 0; + + while (1) { + + while (1) { + if (put == ch_sn2->w_local_GP.put) + break; + + msg = (struct xpc_msg_sn2 *)((u64)ch_sn2-> + local_msgqueue + (put % + ch->local_nentries) * + ch->entry_size); + + if (!(msg->flags & XPC_M_SN2_READY)) + break; + + put++; + } + + if (put == initial_put) { + /* nothing's changed */ + break; + } + + if (cmpxchg_rel(&ch_sn2->local_GP->put, initial_put, put) != + initial_put) { + /* someone else beat us to it */ + DBUG_ON(ch_sn2->local_GP->put < initial_put); + break; + } + + /* we just set the new value of local_GP->put */ + + dev_dbg(xpc_chan, "local_GP->put changed to %ld, partid=%d, " + "channel=%d\n", put, ch->partid, ch->number); + + send_msgrequest = 1; + + /* + * We need to ensure that the message referenced by + * local_GP->put is not XPC_M_SN2_READY or that local_GP->put + * equals w_local_GP.put, so we'll go have a look. + */ + initial_put = put; + } + + if (send_msgrequest) + xpc_send_chctl_msgrequest_sn2(ch); +} + +/* + * Allocate an entry for a message from the message queue associated with the + * specified channel. + */ +static enum xp_retval +xpc_allocate_msg_sn2(struct xpc_channel *ch, u32 flags, + struct xpc_msg_sn2 **address_of_msg) +{ + struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2; + struct xpc_msg_sn2 *msg; + enum xp_retval ret; + s64 put; + + /* + * Get the next available message entry from the local message queue. + * If none are available, we'll make sure that we grab the latest + * GP values. + */ + ret = xpTimeout; + + while (1) { + + put = ch_sn2->w_local_GP.put; + rmb(); /* guarantee that .put loads before .get */ + if (put - ch_sn2->w_remote_GP.get < ch->local_nentries) { + + /* There are available message entries. We need to try + * to secure one for ourselves. We'll do this by trying + * to increment w_local_GP.put as long as someone else + * doesn't beat us to it. If they do, we'll have to + * try again. + */ + if (cmpxchg(&ch_sn2->w_local_GP.put, put, put + 1) == + put) { + /* we got the entry referenced by put */ + break; + } + continue; /* try again */ + } + + /* + * There aren't any available msg entries at this time. + * + * In waiting for a message entry to become available, + * we set a timeout in case the other side is not sending + * completion interrupts. This lets us fake a notify IRQ + * that will cause the notify IRQ handler to fetch the latest + * GP values as if an interrupt was sent by the other side. + */ + if (ret == xpTimeout) + xpc_send_chctl_local_msgrequest_sn2(ch); + + if (flags & XPC_NOWAIT) + return xpNoWait; + + ret = xpc_allocate_msg_wait(ch); + if (ret != xpInterrupted && ret != xpTimeout) + return ret; + } + + /* get the message's address and initialize it */ + msg = (struct xpc_msg_sn2 *)((u64)ch_sn2->local_msgqueue + + (put % ch->local_nentries) * + ch->entry_size); + + DBUG_ON(msg->flags != 0); + msg->number = put; + + dev_dbg(xpc_chan, "w_local_GP.put changed to %ld; msg=0x%p, " + "msg_number=%ld, partid=%d, channel=%d\n", put + 1, + (void *)msg, msg->number, ch->partid, ch->number); + + *address_of_msg = msg; + return xpSuccess; +} + +/* + * Common code that does the actual sending of the message by advancing the + * local message queue's Put value and sends a chctl msgrequest to the + * partition the message is being sent to. + */ +static enum xp_retval +xpc_send_payload_sn2(struct xpc_channel *ch, u32 flags, void *payload, + u16 payload_size, u8 notify_type, xpc_notify_func func, + void *key) +{ + enum xp_retval ret = xpSuccess; + struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2; + struct xpc_msg_sn2 *msg = msg; + struct xpc_notify_sn2 *notify = notify; + s64 msg_number; + s64 put; + + DBUG_ON(notify_type == XPC_N_CALL && func == NULL); + + if (XPC_MSG_SIZE(payload_size) > ch->entry_size) + return xpPayloadTooBig; + + xpc_msgqueue_ref(ch); + + if (ch->flags & XPC_C_DISCONNECTING) { + ret = ch->reason; + goto out_1; + } + if (!(ch->flags & XPC_C_CONNECTED)) { + ret = xpNotConnected; + goto out_1; + } + + ret = xpc_allocate_msg_sn2(ch, flags, &msg); + if (ret != xpSuccess) + goto out_1; + + msg_number = msg->number; + + if (notify_type != 0) { + /* + * Tell the remote side to send an ACK interrupt when the + * message has been delivered. + */ + msg->flags |= XPC_M_SN2_INTERRUPT; + + atomic_inc(&ch->n_to_notify); + + notify = &ch_sn2->notify_queue[msg_number % ch->local_nentries]; + notify->func = func; + notify->key = key; + notify->type = notify_type; + + /* ??? Is a mb() needed here? */ + + if (ch->flags & XPC_C_DISCONNECTING) { + /* + * An error occurred between our last error check and + * this one. We will try to clear the type field from + * the notify entry. If we succeed then + * xpc_disconnect_channel() didn't already process + * the notify entry. + */ + if (cmpxchg(¬ify->type, notify_type, 0) == + notify_type) { + atomic_dec(&ch->n_to_notify); + ret = ch->reason; + } + goto out_1; + } + } + + memcpy(&msg->payload, payload, payload_size); + + msg->flags |= XPC_M_SN2_READY; + + /* + * The preceding store of msg->flags must occur before the following + * load of local_GP->put. + */ + mb(); + + /* see if the message is next in line to be sent, if so send it */ + + put = ch_sn2->local_GP->put; + if (put == msg_number) + xpc_send_msgs_sn2(ch, put); + +out_1: + xpc_msgqueue_deref(ch); + return ret; +} + +/* + * Now we actually acknowledge the messages that have been delivered and ack'd + * by advancing the cached remote message queue's Get value and if requested + * send a chctl msgrequest to the message sender's partition. + * + * If a message has XPC_M_SN2_INTERRUPT set, send an interrupt to the partition + * that sent the message. + */ +static void +xpc_acknowledge_msgs_sn2(struct xpc_channel *ch, s64 initial_get, u8 msg_flags) +{ + struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2; + struct xpc_msg_sn2 *msg; + s64 get = initial_get + 1; + int send_msgrequest = 0; + + while (1) { + + while (1) { + if (get == ch_sn2->w_local_GP.get) + break; + + msg = (struct xpc_msg_sn2 *)((u64)ch_sn2-> + remote_msgqueue + (get % + ch->remote_nentries) * + ch->entry_size); + + if (!(msg->flags & XPC_M_SN2_DONE)) + break; + + msg_flags |= msg->flags; + get++; + } + + if (get == initial_get) { + /* nothing's changed */ + break; + } + + if (cmpxchg_rel(&ch_sn2->local_GP->get, initial_get, get) != + initial_get) { + /* someone else beat us to it */ + DBUG_ON(ch_sn2->local_GP->get <= initial_get); + break; + } + + /* we just set the new value of local_GP->get */ + + dev_dbg(xpc_chan, "local_GP->get changed to %ld, partid=%d, " + "channel=%d\n", get, ch->partid, ch->number); + + send_msgrequest = (msg_flags & XPC_M_SN2_INTERRUPT); + + /* + * We need to ensure that the message referenced by + * local_GP->get is not XPC_M_SN2_DONE or that local_GP->get + * equals w_local_GP.get, so we'll go have a look. + */ + initial_get = get; + } + + if (send_msgrequest) + xpc_send_chctl_msgrequest_sn2(ch); +} + +static void +xpc_received_payload_sn2(struct xpc_channel *ch, void *payload) +{ + struct xpc_msg_sn2 *msg; + s64 msg_number; + s64 get; + + msg = container_of(payload, struct xpc_msg_sn2, payload); + msg_number = msg->number; + + dev_dbg(xpc_chan, "msg=0x%p, msg_number=%ld, partid=%d, channel=%d\n", + (void *)msg, msg_number, ch->partid, ch->number); + + DBUG_ON((((u64)msg - (u64)ch->remote_msgqueue) / ch->entry_size) != + msg_number % ch->remote_nentries); + DBUG_ON(msg->flags & XPC_M_SN2_DONE); + + msg->flags |= XPC_M_SN2_DONE; + + /* + * The preceding store of msg->flags must occur before the following + * load of local_GP->get. + */ + mb(); + + /* + * See if this message is next in line to be acknowledged as having + * been delivered. + */ + get = ch->sn.sn2.local_GP->get; + if (get == msg_number) + xpc_acknowledge_msgs_sn2(ch, get, msg->flags); +} + +int +xpc_init_sn2(void) +{ + int ret; + size_t buf_size; + + xpc_setup_partitions_sn = xpc_setup_partitions_sn_sn2; + xpc_get_partition_rsvd_page_pa = xpc_get_partition_rsvd_page_pa_sn2; + xpc_setup_rsvd_page_sn = xpc_setup_rsvd_page_sn_sn2; + xpc_increment_heartbeat = xpc_increment_heartbeat_sn2; + xpc_offline_heartbeat = xpc_offline_heartbeat_sn2; + xpc_online_heartbeat = xpc_online_heartbeat_sn2; + xpc_heartbeat_init = xpc_heartbeat_init_sn2; + xpc_heartbeat_exit = xpc_heartbeat_exit_sn2; + xpc_get_remote_heartbeat = xpc_get_remote_heartbeat_sn2; + + xpc_request_partition_activation = xpc_request_partition_activation_sn2; + xpc_request_partition_reactivation = + xpc_request_partition_reactivation_sn2; + xpc_request_partition_deactivation = + xpc_request_partition_deactivation_sn2; + xpc_cancel_partition_deactivation_request = + xpc_cancel_partition_deactivation_request_sn2; + + xpc_process_activate_IRQ_rcvd = xpc_process_activate_IRQ_rcvd_sn2; + xpc_setup_ch_structures_sn = xpc_setup_ch_structures_sn_sn2; + xpc_teardown_ch_structures_sn = xpc_teardown_ch_structures_sn_sn2; + xpc_make_first_contact = xpc_make_first_contact_sn2; + + xpc_get_chctl_all_flags = xpc_get_chctl_all_flags_sn2; + xpc_send_chctl_closerequest = xpc_send_chctl_closerequest_sn2; + xpc_send_chctl_closereply = xpc_send_chctl_closereply_sn2; + xpc_send_chctl_openrequest = xpc_send_chctl_openrequest_sn2; + xpc_send_chctl_openreply = xpc_send_chctl_openreply_sn2; + + xpc_save_remote_msgqueue_pa = xpc_save_remote_msgqueue_pa_sn2; + + xpc_setup_msg_structures = xpc_setup_msg_structures_sn2; + xpc_teardown_msg_structures = xpc_teardown_msg_structures_sn2; + + xpc_notify_senders_of_disconnect = xpc_notify_senders_of_disconnect_sn2; + xpc_process_msg_chctl_flags = xpc_process_msg_chctl_flags_sn2; + xpc_n_of_deliverable_payloads = xpc_n_of_deliverable_payloads_sn2; + xpc_get_deliverable_payload = xpc_get_deliverable_payload_sn2; + + xpc_indicate_partition_engaged = xpc_indicate_partition_engaged_sn2; + xpc_indicate_partition_disengaged = + xpc_indicate_partition_disengaged_sn2; + xpc_partition_engaged = xpc_partition_engaged_sn2; + xpc_any_partition_engaged = xpc_any_partition_engaged_sn2; + xpc_assume_partition_disengaged = xpc_assume_partition_disengaged_sn2; + + xpc_send_payload = xpc_send_payload_sn2; + xpc_received_payload = xpc_received_payload_sn2; + + if (offsetof(struct xpc_msg_sn2, payload) > XPC_MSG_HDR_MAX_SIZE) { + dev_err(xpc_part, "header portion of struct xpc_msg_sn2 is " + "larger than %d\n", XPC_MSG_HDR_MAX_SIZE); + return -E2BIG; + } + + buf_size = max(XPC_RP_VARS_SIZE, + XPC_RP_HEADER_SIZE + XP_NASID_MASK_BYTES_SN2); + xpc_remote_copy_buffer_sn2 = xpc_kmalloc_cacheline_aligned(buf_size, + GFP_KERNEL, + &xpc_remote_copy_buffer_base_sn2); + if (xpc_remote_copy_buffer_sn2 == NULL) { + dev_err(xpc_part, "can't get memory for remote copy buffer\n"); + return -ENOMEM; + } + + /* open up protections for IPI and [potentially] amo operations */ + xpc_allow_IPI_ops_sn2(); + xpc_allow_amo_ops_shub_wars_1_1_sn2(); + + /* + * This is safe to do before the xpc_hb_checker thread has started + * because the handler releases a wait queue. If an interrupt is + * received before the thread is waiting, it will not go to sleep, + * but rather immediately process the interrupt. + */ + ret = request_irq(SGI_XPC_ACTIVATE, xpc_handle_activate_IRQ_sn2, 0, + "xpc hb", NULL); + if (ret != 0) { + dev_err(xpc_part, "can't register ACTIVATE IRQ handler, " + "errno=%d\n", -ret); + xpc_disallow_IPI_ops_sn2(); + kfree(xpc_remote_copy_buffer_base_sn2); + } + return ret; +} + +void +xpc_exit_sn2(void) +{ + free_irq(SGI_XPC_ACTIVATE, NULL); + xpc_disallow_IPI_ops_sn2(); + kfree(xpc_remote_copy_buffer_base_sn2); +} diff --git a/drivers/misc/sgi-xp/xpc_uv.c b/drivers/misc/sgi-xp/xpc_uv.c new file mode 100644 index 00000000000..1ac694c0162 --- /dev/null +++ b/drivers/misc/sgi-xp/xpc_uv.c @@ -0,0 +1,1443 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + */ + +/* + * Cross Partition Communication (XPC) uv-based functions. + * + * Architecture specific implementation of common functions. + * + */ + +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/interrupt.h> +#include <linux/delay.h> +#include <linux/device.h> +#include <asm/uv/uv_hub.h> +#include "../sgi-gru/gru.h" +#include "../sgi-gru/grukservices.h" +#include "xpc.h" + +static atomic64_t xpc_heartbeat_uv; +static DECLARE_BITMAP(xpc_heartbeating_to_mask_uv, XP_MAX_NPARTITIONS_UV); + +#define XPC_ACTIVATE_MSG_SIZE_UV (1 * GRU_CACHE_LINE_BYTES) +#define XPC_NOTIFY_MSG_SIZE_UV (2 * GRU_CACHE_LINE_BYTES) + +#define XPC_ACTIVATE_MQ_SIZE_UV (4 * XP_MAX_NPARTITIONS_UV * \ + XPC_ACTIVATE_MSG_SIZE_UV) +#define XPC_NOTIFY_MQ_SIZE_UV (4 * XP_MAX_NPARTITIONS_UV * \ + XPC_NOTIFY_MSG_SIZE_UV) + +static void *xpc_activate_mq_uv; +static void *xpc_notify_mq_uv; + +static int +xpc_setup_partitions_sn_uv(void) +{ + short partid; + struct xpc_partition_uv *part_uv; + + for (partid = 0; partid < XP_MAX_NPARTITIONS_UV; partid++) { + part_uv = &xpc_partitions[partid].sn.uv; + + spin_lock_init(&part_uv->flags_lock); + part_uv->remote_act_state = XPC_P_AS_INACTIVE; + } + return 0; +} + +static void * +xpc_create_gru_mq_uv(unsigned int mq_size, int cpuid, unsigned int irq, + irq_handler_t irq_handler) +{ + int ret; + int nid; + int mq_order; + struct page *page; + void *mq; + + nid = cpu_to_node(cpuid); + mq_order = get_order(mq_size); + page = alloc_pages_node(nid, GFP_KERNEL | __GFP_ZERO | GFP_THISNODE, + mq_order); + if (page == NULL) { + dev_err(xpc_part, "xpc_create_gru_mq_uv() failed to alloc %d " + "bytes of memory on nid=%d for GRU mq\n", mq_size, nid); + return NULL; + } + + mq = page_address(page); + ret = gru_create_message_queue(mq, mq_size); + if (ret != 0) { + dev_err(xpc_part, "gru_create_message_queue() returned " + "error=%d\n", ret); + free_pages((unsigned long)mq, mq_order); + return NULL; + } + + /* !!! Need to do some other things to set up IRQ */ + + ret = request_irq(irq, irq_handler, 0, "xpc", NULL); + if (ret != 0) { + dev_err(xpc_part, "request_irq(irq=%d) returned error=%d\n", + irq, ret); + free_pages((unsigned long)mq, mq_order); + return NULL; + } + + /* !!! enable generation of irq when GRU mq op occurs to this mq */ + + /* ??? allow other partitions to access GRU mq? */ + + return mq; +} + +static void +xpc_destroy_gru_mq_uv(void *mq, unsigned int mq_size, unsigned int irq) +{ + /* ??? disallow other partitions to access GRU mq? */ + + /* !!! disable generation of irq when GRU mq op occurs to this mq */ + + free_irq(irq, NULL); + + free_pages((unsigned long)mq, get_order(mq_size)); +} + +static enum xp_retval +xpc_send_gru_msg(unsigned long mq_gpa, void *msg, size_t msg_size) +{ + enum xp_retval xp_ret; + int ret; + + while (1) { + ret = gru_send_message_gpa(mq_gpa, msg, msg_size); + if (ret == MQE_OK) { + xp_ret = xpSuccess; + break; + } + + if (ret == MQE_QUEUE_FULL) { + dev_dbg(xpc_chan, "gru_send_message_gpa() returned " + "error=MQE_QUEUE_FULL\n"); + /* !!! handle QLimit reached; delay & try again */ + /* ??? Do we add a limit to the number of retries? */ + (void)msleep_interruptible(10); + } else if (ret == MQE_CONGESTION) { + dev_dbg(xpc_chan, "gru_send_message_gpa() returned " + "error=MQE_CONGESTION\n"); + /* !!! handle LB Overflow; simply try again */ + /* ??? Do we add a limit to the number of retries? */ + } else { + /* !!! Currently this is MQE_UNEXPECTED_CB_ERR */ + dev_err(xpc_chan, "gru_send_message_gpa() returned " + "error=%d\n", ret); + xp_ret = xpGruSendMqError; + break; + } + } + return xp_ret; +} + +static void +xpc_process_activate_IRQ_rcvd_uv(void) +{ + unsigned long irq_flags; + short partid; + struct xpc_partition *part; + u8 act_state_req; + + DBUG_ON(xpc_activate_IRQ_rcvd == 0); + + spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags); + for (partid = 0; partid < XP_MAX_NPARTITIONS_UV; partid++) { + part = &xpc_partitions[partid]; + + if (part->sn.uv.act_state_req == 0) + continue; + + xpc_activate_IRQ_rcvd--; + BUG_ON(xpc_activate_IRQ_rcvd < 0); + + act_state_req = part->sn.uv.act_state_req; + part->sn.uv.act_state_req = 0; + spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags); + + if (act_state_req == XPC_P_ASR_ACTIVATE_UV) { + if (part->act_state == XPC_P_AS_INACTIVE) + xpc_activate_partition(part); + else if (part->act_state == XPC_P_AS_DEACTIVATING) + XPC_DEACTIVATE_PARTITION(part, xpReactivating); + + } else if (act_state_req == XPC_P_ASR_REACTIVATE_UV) { + if (part->act_state == XPC_P_AS_INACTIVE) + xpc_activate_partition(part); + else + XPC_DEACTIVATE_PARTITION(part, xpReactivating); + + } else if (act_state_req == XPC_P_ASR_DEACTIVATE_UV) { + XPC_DEACTIVATE_PARTITION(part, part->sn.uv.reason); + + } else { + BUG(); + } + + spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags); + if (xpc_activate_IRQ_rcvd == 0) + break; + } + spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags); + +} + +static void +xpc_handle_activate_mq_msg_uv(struct xpc_partition *part, + struct xpc_activate_mq_msghdr_uv *msg_hdr, + int *wakeup_hb_checker) +{ + unsigned long irq_flags; + struct xpc_partition_uv *part_uv = &part->sn.uv; + struct xpc_openclose_args *args; + + part_uv->remote_act_state = msg_hdr->act_state; + + switch (msg_hdr->type) { + case XPC_ACTIVATE_MQ_MSG_SYNC_ACT_STATE_UV: + /* syncing of remote_act_state was just done above */ + break; + + case XPC_ACTIVATE_MQ_MSG_INC_HEARTBEAT_UV: { + struct xpc_activate_mq_msg_heartbeat_req_uv *msg; + + msg = container_of(msg_hdr, + struct xpc_activate_mq_msg_heartbeat_req_uv, + hdr); + part_uv->heartbeat = msg->heartbeat; + break; + } + case XPC_ACTIVATE_MQ_MSG_OFFLINE_HEARTBEAT_UV: { + struct xpc_activate_mq_msg_heartbeat_req_uv *msg; + + msg = container_of(msg_hdr, + struct xpc_activate_mq_msg_heartbeat_req_uv, + hdr); + part_uv->heartbeat = msg->heartbeat; + + spin_lock_irqsave(&part_uv->flags_lock, irq_flags); + part_uv->flags |= XPC_P_HEARTBEAT_OFFLINE_UV; + spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags); + break; + } + case XPC_ACTIVATE_MQ_MSG_ONLINE_HEARTBEAT_UV: { + struct xpc_activate_mq_msg_heartbeat_req_uv *msg; + + msg = container_of(msg_hdr, + struct xpc_activate_mq_msg_heartbeat_req_uv, + hdr); + part_uv->heartbeat = msg->heartbeat; + + spin_lock_irqsave(&part_uv->flags_lock, irq_flags); + part_uv->flags &= ~XPC_P_HEARTBEAT_OFFLINE_UV; + spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags); + break; + } + case XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV: { + struct xpc_activate_mq_msg_activate_req_uv *msg; + + /* + * ??? Do we deal here with ts_jiffies being different + * ??? if act_state != XPC_P_AS_INACTIVE instead of + * ??? below? + */ + msg = container_of(msg_hdr, struct + xpc_activate_mq_msg_activate_req_uv, hdr); + + spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags); + if (part_uv->act_state_req == 0) + xpc_activate_IRQ_rcvd++; + part_uv->act_state_req = XPC_P_ASR_ACTIVATE_UV; + part->remote_rp_pa = msg->rp_gpa; /* !!! _pa is _gpa */ + part->remote_rp_ts_jiffies = msg_hdr->rp_ts_jiffies; + part_uv->remote_activate_mq_gpa = msg->activate_mq_gpa; + spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags); + + (*wakeup_hb_checker)++; + break; + } + case XPC_ACTIVATE_MQ_MSG_DEACTIVATE_REQ_UV: { + struct xpc_activate_mq_msg_deactivate_req_uv *msg; + + msg = container_of(msg_hdr, struct + xpc_activate_mq_msg_deactivate_req_uv, hdr); + + spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags); + if (part_uv->act_state_req == 0) + xpc_activate_IRQ_rcvd++; + part_uv->act_state_req = XPC_P_ASR_DEACTIVATE_UV; + part_uv->reason = msg->reason; + spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags); + + (*wakeup_hb_checker)++; + return; + } + case XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREQUEST_UV: { + struct xpc_activate_mq_msg_chctl_closerequest_uv *msg; + + msg = container_of(msg_hdr, struct + xpc_activate_mq_msg_chctl_closerequest_uv, + hdr); + args = &part->remote_openclose_args[msg->ch_number]; + args->reason = msg->reason; + + spin_lock_irqsave(&part->chctl_lock, irq_flags); + part->chctl.flags[msg->ch_number] |= XPC_CHCTL_CLOSEREQUEST; + spin_unlock_irqrestore(&part->chctl_lock, irq_flags); + + xpc_wakeup_channel_mgr(part); + break; + } + case XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREPLY_UV: { + struct xpc_activate_mq_msg_chctl_closereply_uv *msg; + + msg = container_of(msg_hdr, struct + xpc_activate_mq_msg_chctl_closereply_uv, + hdr); + + spin_lock_irqsave(&part->chctl_lock, irq_flags); + part->chctl.flags[msg->ch_number] |= XPC_CHCTL_CLOSEREPLY; + spin_unlock_irqrestore(&part->chctl_lock, irq_flags); + + xpc_wakeup_channel_mgr(part); + break; + } + case XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREQUEST_UV: { + struct xpc_activate_mq_msg_chctl_openrequest_uv *msg; + + msg = container_of(msg_hdr, struct + xpc_activate_mq_msg_chctl_openrequest_uv, + hdr); + args = &part->remote_openclose_args[msg->ch_number]; + args->entry_size = msg->entry_size; + args->local_nentries = msg->local_nentries; + + spin_lock_irqsave(&part->chctl_lock, irq_flags); + part->chctl.flags[msg->ch_number] |= XPC_CHCTL_OPENREQUEST; + spin_unlock_irqrestore(&part->chctl_lock, irq_flags); + + xpc_wakeup_channel_mgr(part); + break; + } + case XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREPLY_UV: { + struct xpc_activate_mq_msg_chctl_openreply_uv *msg; + + msg = container_of(msg_hdr, struct + xpc_activate_mq_msg_chctl_openreply_uv, hdr); + args = &part->remote_openclose_args[msg->ch_number]; + args->remote_nentries = msg->remote_nentries; + args->local_nentries = msg->local_nentries; + args->local_msgqueue_pa = msg->local_notify_mq_gpa; + + spin_lock_irqsave(&part->chctl_lock, irq_flags); + part->chctl.flags[msg->ch_number] |= XPC_CHCTL_OPENREPLY; + spin_unlock_irqrestore(&part->chctl_lock, irq_flags); + + xpc_wakeup_channel_mgr(part); + break; + } + case XPC_ACTIVATE_MQ_MSG_MARK_ENGAGED_UV: + spin_lock_irqsave(&part_uv->flags_lock, irq_flags); + part_uv->flags |= XPC_P_ENGAGED_UV; + spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags); + break; + + case XPC_ACTIVATE_MQ_MSG_MARK_DISENGAGED_UV: + spin_lock_irqsave(&part_uv->flags_lock, irq_flags); + part_uv->flags &= ~XPC_P_ENGAGED_UV; + spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags); + break; + + default: + dev_err(xpc_part, "received unknown activate_mq msg type=%d " + "from partition=%d\n", msg_hdr->type, XPC_PARTID(part)); + + /* get hb checker to deactivate from the remote partition */ + spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags); + if (part_uv->act_state_req == 0) + xpc_activate_IRQ_rcvd++; + part_uv->act_state_req = XPC_P_ASR_DEACTIVATE_UV; + part_uv->reason = xpBadMsgType; + spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags); + + (*wakeup_hb_checker)++; + return; + } + + if (msg_hdr->rp_ts_jiffies != part->remote_rp_ts_jiffies && + part->remote_rp_ts_jiffies != 0) { + /* + * ??? Does what we do here need to be sensitive to + * ??? act_state or remote_act_state? + */ + spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags); + if (part_uv->act_state_req == 0) + xpc_activate_IRQ_rcvd++; + part_uv->act_state_req = XPC_P_ASR_REACTIVATE_UV; + spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags); + + (*wakeup_hb_checker)++; + } +} + +static irqreturn_t +xpc_handle_activate_IRQ_uv(int irq, void *dev_id) +{ + struct xpc_activate_mq_msghdr_uv *msg_hdr; + short partid; + struct xpc_partition *part; + int wakeup_hb_checker = 0; + + while ((msg_hdr = gru_get_next_message(xpc_activate_mq_uv)) != NULL) { + + partid = msg_hdr->partid; + if (partid < 0 || partid >= XP_MAX_NPARTITIONS_UV) { + dev_err(xpc_part, "xpc_handle_activate_IRQ_uv() " + "received invalid partid=0x%x in message\n", + partid); + } else { + part = &xpc_partitions[partid]; + if (xpc_part_ref(part)) { + xpc_handle_activate_mq_msg_uv(part, msg_hdr, + &wakeup_hb_checker); + xpc_part_deref(part); + } + } + + gru_free_message(xpc_activate_mq_uv, msg_hdr); + } + + if (wakeup_hb_checker) + wake_up_interruptible(&xpc_activate_IRQ_wq); + + return IRQ_HANDLED; +} + +static enum xp_retval +xpc_send_activate_IRQ_uv(struct xpc_partition *part, void *msg, size_t msg_size, + int msg_type) +{ + struct xpc_activate_mq_msghdr_uv *msg_hdr = msg; + + DBUG_ON(msg_size > XPC_ACTIVATE_MSG_SIZE_UV); + + msg_hdr->type = msg_type; + msg_hdr->partid = XPC_PARTID(part); + msg_hdr->act_state = part->act_state; + msg_hdr->rp_ts_jiffies = xpc_rsvd_page->ts_jiffies; + + /* ??? Is holding a spin_lock (ch->lock) during this call a bad idea? */ + return xpc_send_gru_msg(part->sn.uv.remote_activate_mq_gpa, msg, + msg_size); +} + +static void +xpc_send_activate_IRQ_part_uv(struct xpc_partition *part, void *msg, + size_t msg_size, int msg_type) +{ + enum xp_retval ret; + + ret = xpc_send_activate_IRQ_uv(part, msg, msg_size, msg_type); + if (unlikely(ret != xpSuccess)) + XPC_DEACTIVATE_PARTITION(part, ret); +} + +static void +xpc_send_activate_IRQ_ch_uv(struct xpc_channel *ch, unsigned long *irq_flags, + void *msg, size_t msg_size, int msg_type) +{ + struct xpc_partition *part = &xpc_partitions[ch->number]; + enum xp_retval ret; + + ret = xpc_send_activate_IRQ_uv(part, msg, msg_size, msg_type); + if (unlikely(ret != xpSuccess)) { + if (irq_flags != NULL) + spin_unlock_irqrestore(&ch->lock, *irq_flags); + + XPC_DEACTIVATE_PARTITION(part, ret); + + if (irq_flags != NULL) + spin_lock_irqsave(&ch->lock, *irq_flags); + } +} + +static void +xpc_send_local_activate_IRQ_uv(struct xpc_partition *part, int act_state_req) +{ + unsigned long irq_flags; + struct xpc_partition_uv *part_uv = &part->sn.uv; + + /* + * !!! Make our side think that the remote parition sent an activate + * !!! message our way by doing what the activate IRQ handler would + * !!! do had one really been sent. + */ + + spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags); + if (part_uv->act_state_req == 0) + xpc_activate_IRQ_rcvd++; + part_uv->act_state_req = act_state_req; + spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags); + + wake_up_interruptible(&xpc_activate_IRQ_wq); +} + +static enum xp_retval +xpc_get_partition_rsvd_page_pa_uv(void *buf, u64 *cookie, unsigned long *rp_pa, + size_t *len) +{ + /* !!! call the UV version of sn_partition_reserved_page_pa() */ + return xpUnsupported; +} + +static int +xpc_setup_rsvd_page_sn_uv(struct xpc_rsvd_page *rp) +{ + rp->sn.activate_mq_gpa = uv_gpa(xpc_activate_mq_uv); + return 0; +} + +static void +xpc_send_heartbeat_uv(int msg_type) +{ + short partid; + struct xpc_partition *part; + struct xpc_activate_mq_msg_heartbeat_req_uv msg; + + /* + * !!! On uv we're broadcasting a heartbeat message every 5 seconds. + * !!! Whereas on sn2 we're bte_copy'ng the heartbeat info every 20 + * !!! seconds. This is an increase in numalink traffic. + * ??? Is this good? + */ + + msg.heartbeat = atomic64_inc_return(&xpc_heartbeat_uv); + + partid = find_first_bit(xpc_heartbeating_to_mask_uv, + XP_MAX_NPARTITIONS_UV); + + while (partid < XP_MAX_NPARTITIONS_UV) { + part = &xpc_partitions[partid]; + + xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg), + msg_type); + + partid = find_next_bit(xpc_heartbeating_to_mask_uv, + XP_MAX_NPARTITIONS_UV, partid + 1); + } +} + +static void +xpc_increment_heartbeat_uv(void) +{ + xpc_send_heartbeat_uv(XPC_ACTIVATE_MQ_MSG_INC_HEARTBEAT_UV); +} + +static void +xpc_offline_heartbeat_uv(void) +{ + xpc_send_heartbeat_uv(XPC_ACTIVATE_MQ_MSG_OFFLINE_HEARTBEAT_UV); +} + +static void +xpc_online_heartbeat_uv(void) +{ + xpc_send_heartbeat_uv(XPC_ACTIVATE_MQ_MSG_ONLINE_HEARTBEAT_UV); +} + +static void +xpc_heartbeat_init_uv(void) +{ + atomic64_set(&xpc_heartbeat_uv, 0); + bitmap_zero(xpc_heartbeating_to_mask_uv, XP_MAX_NPARTITIONS_UV); + xpc_heartbeating_to_mask = &xpc_heartbeating_to_mask_uv[0]; +} + +static void +xpc_heartbeat_exit_uv(void) +{ + xpc_send_heartbeat_uv(XPC_ACTIVATE_MQ_MSG_OFFLINE_HEARTBEAT_UV); +} + +static enum xp_retval +xpc_get_remote_heartbeat_uv(struct xpc_partition *part) +{ + struct xpc_partition_uv *part_uv = &part->sn.uv; + enum xp_retval ret = xpNoHeartbeat; + + if (part_uv->remote_act_state != XPC_P_AS_INACTIVE && + part_uv->remote_act_state != XPC_P_AS_DEACTIVATING) { + + if (part_uv->heartbeat != part->last_heartbeat || + (part_uv->flags & XPC_P_HEARTBEAT_OFFLINE_UV)) { + + part->last_heartbeat = part_uv->heartbeat; + ret = xpSuccess; + } + } + return ret; +} + +static void +xpc_request_partition_activation_uv(struct xpc_rsvd_page *remote_rp, + unsigned long remote_rp_gpa, int nasid) +{ + short partid = remote_rp->SAL_partid; + struct xpc_partition *part = &xpc_partitions[partid]; + struct xpc_activate_mq_msg_activate_req_uv msg; + + part->remote_rp_pa = remote_rp_gpa; /* !!! _pa here is really _gpa */ + part->remote_rp_ts_jiffies = remote_rp->ts_jiffies; + part->sn.uv.remote_activate_mq_gpa = remote_rp->sn.activate_mq_gpa; + + /* + * ??? Is it a good idea to make this conditional on what is + * ??? potentially stale state information? + */ + if (part->sn.uv.remote_act_state == XPC_P_AS_INACTIVE) { + msg.rp_gpa = uv_gpa(xpc_rsvd_page); + msg.activate_mq_gpa = xpc_rsvd_page->sn.activate_mq_gpa; + xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg), + XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV); + } + + if (part->act_state == XPC_P_AS_INACTIVE) + xpc_send_local_activate_IRQ_uv(part, XPC_P_ASR_ACTIVATE_UV); +} + +static void +xpc_request_partition_reactivation_uv(struct xpc_partition *part) +{ + xpc_send_local_activate_IRQ_uv(part, XPC_P_ASR_ACTIVATE_UV); +} + +static void +xpc_request_partition_deactivation_uv(struct xpc_partition *part) +{ + struct xpc_activate_mq_msg_deactivate_req_uv msg; + + /* + * ??? Is it a good idea to make this conditional on what is + * ??? potentially stale state information? + */ + if (part->sn.uv.remote_act_state != XPC_P_AS_DEACTIVATING && + part->sn.uv.remote_act_state != XPC_P_AS_INACTIVE) { + + msg.reason = part->reason; + xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg), + XPC_ACTIVATE_MQ_MSG_DEACTIVATE_REQ_UV); + } +} + +static void +xpc_cancel_partition_deactivation_request_uv(struct xpc_partition *part) +{ + /* nothing needs to be done */ + return; +} + +static void +xpc_init_fifo_uv(struct xpc_fifo_head_uv *head) +{ + head->first = NULL; + head->last = NULL; + spin_lock_init(&head->lock); + head->n_entries = 0; +} + +static void * +xpc_get_fifo_entry_uv(struct xpc_fifo_head_uv *head) +{ + unsigned long irq_flags; + struct xpc_fifo_entry_uv *first; + + spin_lock_irqsave(&head->lock, irq_flags); + first = head->first; + if (head->first != NULL) { + head->first = first->next; + if (head->first == NULL) + head->last = NULL; + } + head->n_entries++; + spin_unlock_irqrestore(&head->lock, irq_flags); + first->next = NULL; + return first; +} + +static void +xpc_put_fifo_entry_uv(struct xpc_fifo_head_uv *head, + struct xpc_fifo_entry_uv *last) +{ + unsigned long irq_flags; + + last->next = NULL; + spin_lock_irqsave(&head->lock, irq_flags); + if (head->last != NULL) + head->last->next = last; + else + head->first = last; + head->last = last; + head->n_entries--; + BUG_ON(head->n_entries < 0); + spin_unlock_irqrestore(&head->lock, irq_flags); +} + +static int +xpc_n_of_fifo_entries_uv(struct xpc_fifo_head_uv *head) +{ + return head->n_entries; +} + +/* + * Setup the channel structures that are uv specific. + */ +static enum xp_retval +xpc_setup_ch_structures_sn_uv(struct xpc_partition *part) +{ + struct xpc_channel_uv *ch_uv; + int ch_number; + + for (ch_number = 0; ch_number < part->nchannels; ch_number++) { + ch_uv = &part->channels[ch_number].sn.uv; + + xpc_init_fifo_uv(&ch_uv->msg_slot_free_list); + xpc_init_fifo_uv(&ch_uv->recv_msg_list); + } + + return xpSuccess; +} + +/* + * Teardown the channel structures that are uv specific. + */ +static void +xpc_teardown_ch_structures_sn_uv(struct xpc_partition *part) +{ + /* nothing needs to be done */ + return; +} + +static enum xp_retval +xpc_make_first_contact_uv(struct xpc_partition *part) +{ + struct xpc_activate_mq_msg_uv msg; + + /* + * We send a sync msg to get the remote partition's remote_act_state + * updated to our current act_state which at this point should + * be XPC_P_AS_ACTIVATING. + */ + xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg), + XPC_ACTIVATE_MQ_MSG_SYNC_ACT_STATE_UV); + + while (part->sn.uv.remote_act_state != XPC_P_AS_ACTIVATING) { + + dev_dbg(xpc_part, "waiting to make first contact with " + "partition %d\n", XPC_PARTID(part)); + + /* wait a 1/4 of a second or so */ + (void)msleep_interruptible(250); + + if (part->act_state == XPC_P_AS_DEACTIVATING) + return part->reason; + } + + return xpSuccess; +} + +static u64 +xpc_get_chctl_all_flags_uv(struct xpc_partition *part) +{ + unsigned long irq_flags; + union xpc_channel_ctl_flags chctl; + + spin_lock_irqsave(&part->chctl_lock, irq_flags); + chctl = part->chctl; + if (chctl.all_flags != 0) + part->chctl.all_flags = 0; + + spin_unlock_irqrestore(&part->chctl_lock, irq_flags); + return chctl.all_flags; +} + +static enum xp_retval +xpc_allocate_send_msg_slot_uv(struct xpc_channel *ch) +{ + struct xpc_channel_uv *ch_uv = &ch->sn.uv; + struct xpc_send_msg_slot_uv *msg_slot; + unsigned long irq_flags; + int nentries; + int entry; + size_t nbytes; + + for (nentries = ch->local_nentries; nentries > 0; nentries--) { + nbytes = nentries * sizeof(struct xpc_send_msg_slot_uv); + ch_uv->send_msg_slots = kzalloc(nbytes, GFP_KERNEL); + if (ch_uv->send_msg_slots == NULL) + continue; + + for (entry = 0; entry < nentries; entry++) { + msg_slot = &ch_uv->send_msg_slots[entry]; + + msg_slot->msg_slot_number = entry; + xpc_put_fifo_entry_uv(&ch_uv->msg_slot_free_list, + &msg_slot->next); + } + + spin_lock_irqsave(&ch->lock, irq_flags); + if (nentries < ch->local_nentries) + ch->local_nentries = nentries; + spin_unlock_irqrestore(&ch->lock, irq_flags); + return xpSuccess; + } + + return xpNoMemory; +} + +static enum xp_retval +xpc_allocate_recv_msg_slot_uv(struct xpc_channel *ch) +{ + struct xpc_channel_uv *ch_uv = &ch->sn.uv; + struct xpc_notify_mq_msg_uv *msg_slot; + unsigned long irq_flags; + int nentries; + int entry; + size_t nbytes; + + for (nentries = ch->remote_nentries; nentries > 0; nentries--) { + nbytes = nentries * ch->entry_size; + ch_uv->recv_msg_slots = kzalloc(nbytes, GFP_KERNEL); + if (ch_uv->recv_msg_slots == NULL) + continue; + + for (entry = 0; entry < nentries; entry++) { + msg_slot = ch_uv->recv_msg_slots + entry * + ch->entry_size; + + msg_slot->hdr.msg_slot_number = entry; + } + + spin_lock_irqsave(&ch->lock, irq_flags); + if (nentries < ch->remote_nentries) + ch->remote_nentries = nentries; + spin_unlock_irqrestore(&ch->lock, irq_flags); + return xpSuccess; + } + + return xpNoMemory; +} + +/* + * Allocate msg_slots associated with the channel. + */ +static enum xp_retval +xpc_setup_msg_structures_uv(struct xpc_channel *ch) +{ + static enum xp_retval ret; + struct xpc_channel_uv *ch_uv = &ch->sn.uv; + + DBUG_ON(ch->flags & XPC_C_SETUP); + + ret = xpc_allocate_send_msg_slot_uv(ch); + if (ret == xpSuccess) { + + ret = xpc_allocate_recv_msg_slot_uv(ch); + if (ret != xpSuccess) { + kfree(ch_uv->send_msg_slots); + xpc_init_fifo_uv(&ch_uv->msg_slot_free_list); + } + } + return ret; +} + +/* + * Free up msg_slots and clear other stuff that were setup for the specified + * channel. + */ +static void +xpc_teardown_msg_structures_uv(struct xpc_channel *ch) +{ + struct xpc_channel_uv *ch_uv = &ch->sn.uv; + + DBUG_ON(!spin_is_locked(&ch->lock)); + + ch_uv->remote_notify_mq_gpa = 0; + + if (ch->flags & XPC_C_SETUP) { + xpc_init_fifo_uv(&ch_uv->msg_slot_free_list); + kfree(ch_uv->send_msg_slots); + xpc_init_fifo_uv(&ch_uv->recv_msg_list); + kfree(ch_uv->recv_msg_slots); + } +} + +static void +xpc_send_chctl_closerequest_uv(struct xpc_channel *ch, unsigned long *irq_flags) +{ + struct xpc_activate_mq_msg_chctl_closerequest_uv msg; + + msg.ch_number = ch->number; + msg.reason = ch->reason; + xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg), + XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREQUEST_UV); +} + +static void +xpc_send_chctl_closereply_uv(struct xpc_channel *ch, unsigned long *irq_flags) +{ + struct xpc_activate_mq_msg_chctl_closereply_uv msg; + + msg.ch_number = ch->number; + xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg), + XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREPLY_UV); +} + +static void +xpc_send_chctl_openrequest_uv(struct xpc_channel *ch, unsigned long *irq_flags) +{ + struct xpc_activate_mq_msg_chctl_openrequest_uv msg; + + msg.ch_number = ch->number; + msg.entry_size = ch->entry_size; + msg.local_nentries = ch->local_nentries; + xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg), + XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREQUEST_UV); +} + +static void +xpc_send_chctl_openreply_uv(struct xpc_channel *ch, unsigned long *irq_flags) +{ + struct xpc_activate_mq_msg_chctl_openreply_uv msg; + + msg.ch_number = ch->number; + msg.local_nentries = ch->local_nentries; + msg.remote_nentries = ch->remote_nentries; + msg.local_notify_mq_gpa = uv_gpa(xpc_notify_mq_uv); + xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg), + XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREPLY_UV); +} + +static void +xpc_send_chctl_local_msgrequest_uv(struct xpc_partition *part, int ch_number) +{ + unsigned long irq_flags; + + spin_lock_irqsave(&part->chctl_lock, irq_flags); + part->chctl.flags[ch_number] |= XPC_CHCTL_MSGREQUEST; + spin_unlock_irqrestore(&part->chctl_lock, irq_flags); + + xpc_wakeup_channel_mgr(part); +} + +static void +xpc_save_remote_msgqueue_pa_uv(struct xpc_channel *ch, + unsigned long msgqueue_pa) +{ + ch->sn.uv.remote_notify_mq_gpa = msgqueue_pa; +} + +static void +xpc_indicate_partition_engaged_uv(struct xpc_partition *part) +{ + struct xpc_activate_mq_msg_uv msg; + + xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg), + XPC_ACTIVATE_MQ_MSG_MARK_ENGAGED_UV); +} + +static void +xpc_indicate_partition_disengaged_uv(struct xpc_partition *part) +{ + struct xpc_activate_mq_msg_uv msg; + + xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg), + XPC_ACTIVATE_MQ_MSG_MARK_DISENGAGED_UV); +} + +static void +xpc_assume_partition_disengaged_uv(short partid) +{ + struct xpc_partition_uv *part_uv = &xpc_partitions[partid].sn.uv; + unsigned long irq_flags; + + spin_lock_irqsave(&part_uv->flags_lock, irq_flags); + part_uv->flags &= ~XPC_P_ENGAGED_UV; + spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags); +} + +static int +xpc_partition_engaged_uv(short partid) +{ + return (xpc_partitions[partid].sn.uv.flags & XPC_P_ENGAGED_UV) != 0; +} + +static int +xpc_any_partition_engaged_uv(void) +{ + struct xpc_partition_uv *part_uv; + short partid; + + for (partid = 0; partid < XP_MAX_NPARTITIONS_UV; partid++) { + part_uv = &xpc_partitions[partid].sn.uv; + if ((part_uv->flags & XPC_P_ENGAGED_UV) != 0) + return 1; + } + return 0; +} + +static enum xp_retval +xpc_allocate_msg_slot_uv(struct xpc_channel *ch, u32 flags, + struct xpc_send_msg_slot_uv **address_of_msg_slot) +{ + enum xp_retval ret; + struct xpc_send_msg_slot_uv *msg_slot; + struct xpc_fifo_entry_uv *entry; + + while (1) { + entry = xpc_get_fifo_entry_uv(&ch->sn.uv.msg_slot_free_list); + if (entry != NULL) + break; + + if (flags & XPC_NOWAIT) + return xpNoWait; + + ret = xpc_allocate_msg_wait(ch); + if (ret != xpInterrupted && ret != xpTimeout) + return ret; + } + + msg_slot = container_of(entry, struct xpc_send_msg_slot_uv, next); + *address_of_msg_slot = msg_slot; + return xpSuccess; +} + +static void +xpc_free_msg_slot_uv(struct xpc_channel *ch, + struct xpc_send_msg_slot_uv *msg_slot) +{ + xpc_put_fifo_entry_uv(&ch->sn.uv.msg_slot_free_list, &msg_slot->next); + + /* wakeup anyone waiting for a free msg slot */ + if (atomic_read(&ch->n_on_msg_allocate_wq) > 0) + wake_up(&ch->msg_allocate_wq); +} + +static void +xpc_notify_sender_uv(struct xpc_channel *ch, + struct xpc_send_msg_slot_uv *msg_slot, + enum xp_retval reason) +{ + xpc_notify_func func = msg_slot->func; + + if (func != NULL && cmpxchg(&msg_slot->func, func, NULL) == func) { + + atomic_dec(&ch->n_to_notify); + + dev_dbg(xpc_chan, "msg_slot->func() called, msg_slot=0x%p " + "msg_slot_number=%d partid=%d channel=%d\n", msg_slot, + msg_slot->msg_slot_number, ch->partid, ch->number); + + func(reason, ch->partid, ch->number, msg_slot->key); + + dev_dbg(xpc_chan, "msg_slot->func() returned, msg_slot=0x%p " + "msg_slot_number=%d partid=%d channel=%d\n", msg_slot, + msg_slot->msg_slot_number, ch->partid, ch->number); + } +} + +static void +xpc_handle_notify_mq_ack_uv(struct xpc_channel *ch, + struct xpc_notify_mq_msg_uv *msg) +{ + struct xpc_send_msg_slot_uv *msg_slot; + int entry = msg->hdr.msg_slot_number % ch->local_nentries; + + msg_slot = &ch->sn.uv.send_msg_slots[entry]; + + BUG_ON(msg_slot->msg_slot_number != msg->hdr.msg_slot_number); + msg_slot->msg_slot_number += ch->local_nentries; + + if (msg_slot->func != NULL) + xpc_notify_sender_uv(ch, msg_slot, xpMsgDelivered); + + xpc_free_msg_slot_uv(ch, msg_slot); +} + +static void +xpc_handle_notify_mq_msg_uv(struct xpc_partition *part, + struct xpc_notify_mq_msg_uv *msg) +{ + struct xpc_partition_uv *part_uv = &part->sn.uv; + struct xpc_channel *ch; + struct xpc_channel_uv *ch_uv; + struct xpc_notify_mq_msg_uv *msg_slot; + unsigned long irq_flags; + int ch_number = msg->hdr.ch_number; + + if (unlikely(ch_number >= part->nchannels)) { + dev_err(xpc_part, "xpc_handle_notify_IRQ_uv() received invalid " + "channel number=0x%x in message from partid=%d\n", + ch_number, XPC_PARTID(part)); + + /* get hb checker to deactivate from the remote partition */ + spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags); + if (part_uv->act_state_req == 0) + xpc_activate_IRQ_rcvd++; + part_uv->act_state_req = XPC_P_ASR_DEACTIVATE_UV; + part_uv->reason = xpBadChannelNumber; + spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags); + + wake_up_interruptible(&xpc_activate_IRQ_wq); + return; + } + + ch = &part->channels[ch_number]; + xpc_msgqueue_ref(ch); + + if (!(ch->flags & XPC_C_CONNECTED)) { + xpc_msgqueue_deref(ch); + return; + } + + /* see if we're really dealing with an ACK for a previously sent msg */ + if (msg->hdr.size == 0) { + xpc_handle_notify_mq_ack_uv(ch, msg); + xpc_msgqueue_deref(ch); + return; + } + + /* we're dealing with a normal message sent via the notify_mq */ + ch_uv = &ch->sn.uv; + + msg_slot = (struct xpc_notify_mq_msg_uv *)((u64)ch_uv->recv_msg_slots + + (msg->hdr.msg_slot_number % ch->remote_nentries) * + ch->entry_size); + + BUG_ON(msg->hdr.msg_slot_number != msg_slot->hdr.msg_slot_number); + BUG_ON(msg_slot->hdr.size != 0); + + memcpy(msg_slot, msg, msg->hdr.size); + + xpc_put_fifo_entry_uv(&ch_uv->recv_msg_list, &msg_slot->hdr.u.next); + + if (ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) { + /* + * If there is an existing idle kthread get it to deliver + * the payload, otherwise we'll have to get the channel mgr + * for this partition to create a kthread to do the delivery. + */ + if (atomic_read(&ch->kthreads_idle) > 0) + wake_up_nr(&ch->idle_wq, 1); + else + xpc_send_chctl_local_msgrequest_uv(part, ch->number); + } + xpc_msgqueue_deref(ch); +} + +static irqreturn_t +xpc_handle_notify_IRQ_uv(int irq, void *dev_id) +{ + struct xpc_notify_mq_msg_uv *msg; + short partid; + struct xpc_partition *part; + + while ((msg = gru_get_next_message(xpc_notify_mq_uv)) != NULL) { + + partid = msg->hdr.partid; + if (partid < 0 || partid >= XP_MAX_NPARTITIONS_UV) { + dev_err(xpc_part, "xpc_handle_notify_IRQ_uv() received " + "invalid partid=0x%x in message\n", partid); + } else { + part = &xpc_partitions[partid]; + + if (xpc_part_ref(part)) { + xpc_handle_notify_mq_msg_uv(part, msg); + xpc_part_deref(part); + } + } + + gru_free_message(xpc_notify_mq_uv, msg); + } + + return IRQ_HANDLED; +} + +static int +xpc_n_of_deliverable_payloads_uv(struct xpc_channel *ch) +{ + return xpc_n_of_fifo_entries_uv(&ch->sn.uv.recv_msg_list); +} + +static void +xpc_process_msg_chctl_flags_uv(struct xpc_partition *part, int ch_number) +{ + struct xpc_channel *ch = &part->channels[ch_number]; + int ndeliverable_payloads; + + xpc_msgqueue_ref(ch); + + ndeliverable_payloads = xpc_n_of_deliverable_payloads_uv(ch); + + if (ndeliverable_payloads > 0 && + (ch->flags & XPC_C_CONNECTED) && + (ch->flags & XPC_C_CONNECTEDCALLOUT_MADE)) { + + xpc_activate_kthreads(ch, ndeliverable_payloads); + } + + xpc_msgqueue_deref(ch); +} + +static enum xp_retval +xpc_send_payload_uv(struct xpc_channel *ch, u32 flags, void *payload, + u16 payload_size, u8 notify_type, xpc_notify_func func, + void *key) +{ + enum xp_retval ret = xpSuccess; + struct xpc_send_msg_slot_uv *msg_slot = NULL; + struct xpc_notify_mq_msg_uv *msg; + u8 msg_buffer[XPC_NOTIFY_MSG_SIZE_UV]; + size_t msg_size; + + DBUG_ON(notify_type != XPC_N_CALL); + + msg_size = sizeof(struct xpc_notify_mq_msghdr_uv) + payload_size; + if (msg_size > ch->entry_size) + return xpPayloadTooBig; + + xpc_msgqueue_ref(ch); + + if (ch->flags & XPC_C_DISCONNECTING) { + ret = ch->reason; + goto out_1; + } + if (!(ch->flags & XPC_C_CONNECTED)) { + ret = xpNotConnected; + goto out_1; + } + + ret = xpc_allocate_msg_slot_uv(ch, flags, &msg_slot); + if (ret != xpSuccess) + goto out_1; + + if (func != NULL) { + atomic_inc(&ch->n_to_notify); + + msg_slot->key = key; + wmb(); /* a non-NULL func must hit memory after the key */ + msg_slot->func = func; + + if (ch->flags & XPC_C_DISCONNECTING) { + ret = ch->reason; + goto out_2; + } + } + + msg = (struct xpc_notify_mq_msg_uv *)&msg_buffer; + msg->hdr.partid = xp_partition_id; + msg->hdr.ch_number = ch->number; + msg->hdr.size = msg_size; + msg->hdr.msg_slot_number = msg_slot->msg_slot_number; + memcpy(&msg->payload, payload, payload_size); + + ret = xpc_send_gru_msg(ch->sn.uv.remote_notify_mq_gpa, msg, msg_size); + if (ret == xpSuccess) + goto out_1; + + XPC_DEACTIVATE_PARTITION(&xpc_partitions[ch->partid], ret); +out_2: + if (func != NULL) { + /* + * Try to NULL the msg_slot's func field. If we fail, then + * xpc_notify_senders_of_disconnect_uv() beat us to it, in which + * case we need to pretend we succeeded to send the message + * since the user will get a callout for the disconnect error + * by xpc_notify_senders_of_disconnect_uv(), and to also get an + * error returned here will confuse them. Additionally, since + * in this case the channel is being disconnected we don't need + * to put the the msg_slot back on the free list. + */ + if (cmpxchg(&msg_slot->func, func, NULL) != func) { + ret = xpSuccess; + goto out_1; + } + + msg_slot->key = NULL; + atomic_dec(&ch->n_to_notify); + } + xpc_free_msg_slot_uv(ch, msg_slot); +out_1: + xpc_msgqueue_deref(ch); + return ret; +} + +/* + * Tell the callers of xpc_send_notify() that the status of their payloads + * is unknown because the channel is now disconnecting. + * + * We don't worry about putting these msg_slots on the free list since the + * msg_slots themselves are about to be kfree'd. + */ +static void +xpc_notify_senders_of_disconnect_uv(struct xpc_channel *ch) +{ + struct xpc_send_msg_slot_uv *msg_slot; + int entry; + + DBUG_ON(!(ch->flags & XPC_C_DISCONNECTING)); + + for (entry = 0; entry < ch->local_nentries; entry++) { + + if (atomic_read(&ch->n_to_notify) == 0) + break; + + msg_slot = &ch->sn.uv.send_msg_slots[entry]; + if (msg_slot->func != NULL) + xpc_notify_sender_uv(ch, msg_slot, ch->reason); + } +} + +/* + * Get the next deliverable message's payload. + */ +static void * +xpc_get_deliverable_payload_uv(struct xpc_channel *ch) +{ + struct xpc_fifo_entry_uv *entry; + struct xpc_notify_mq_msg_uv *msg; + void *payload = NULL; + + if (!(ch->flags & XPC_C_DISCONNECTING)) { + entry = xpc_get_fifo_entry_uv(&ch->sn.uv.recv_msg_list); + if (entry != NULL) { + msg = container_of(entry, struct xpc_notify_mq_msg_uv, + hdr.u.next); + payload = &msg->payload; + } + } + return payload; +} + +static void +xpc_received_payload_uv(struct xpc_channel *ch, void *payload) +{ + struct xpc_notify_mq_msg_uv *msg; + enum xp_retval ret; + + msg = container_of(payload, struct xpc_notify_mq_msg_uv, payload); + + /* return an ACK to the sender of this message */ + + msg->hdr.partid = xp_partition_id; + msg->hdr.size = 0; /* size of zero indicates this is an ACK */ + + ret = xpc_send_gru_msg(ch->sn.uv.remote_notify_mq_gpa, msg, + sizeof(struct xpc_notify_mq_msghdr_uv)); + if (ret != xpSuccess) + XPC_DEACTIVATE_PARTITION(&xpc_partitions[ch->partid], ret); + + msg->hdr.msg_slot_number += ch->remote_nentries; +} + +int +xpc_init_uv(void) +{ + xpc_setup_partitions_sn = xpc_setup_partitions_sn_uv; + xpc_process_activate_IRQ_rcvd = xpc_process_activate_IRQ_rcvd_uv; + xpc_get_partition_rsvd_page_pa = xpc_get_partition_rsvd_page_pa_uv; + xpc_setup_rsvd_page_sn = xpc_setup_rsvd_page_sn_uv; + xpc_increment_heartbeat = xpc_increment_heartbeat_uv; + xpc_offline_heartbeat = xpc_offline_heartbeat_uv; + xpc_online_heartbeat = xpc_online_heartbeat_uv; + xpc_heartbeat_init = xpc_heartbeat_init_uv; + xpc_heartbeat_exit = xpc_heartbeat_exit_uv; + xpc_get_remote_heartbeat = xpc_get_remote_heartbeat_uv; + + xpc_request_partition_activation = xpc_request_partition_activation_uv; + xpc_request_partition_reactivation = + xpc_request_partition_reactivation_uv; + xpc_request_partition_deactivation = + xpc_request_partition_deactivation_uv; + xpc_cancel_partition_deactivation_request = + xpc_cancel_partition_deactivation_request_uv; + + xpc_setup_ch_structures_sn = xpc_setup_ch_structures_sn_uv; + xpc_teardown_ch_structures_sn = xpc_teardown_ch_structures_sn_uv; + + xpc_make_first_contact = xpc_make_first_contact_uv; + + xpc_get_chctl_all_flags = xpc_get_chctl_all_flags_uv; + xpc_send_chctl_closerequest = xpc_send_chctl_closerequest_uv; + xpc_send_chctl_closereply = xpc_send_chctl_closereply_uv; + xpc_send_chctl_openrequest = xpc_send_chctl_openrequest_uv; + xpc_send_chctl_openreply = xpc_send_chctl_openreply_uv; + + xpc_save_remote_msgqueue_pa = xpc_save_remote_msgqueue_pa_uv; + + xpc_setup_msg_structures = xpc_setup_msg_structures_uv; + xpc_teardown_msg_structures = xpc_teardown_msg_structures_uv; + + xpc_indicate_partition_engaged = xpc_indicate_partition_engaged_uv; + xpc_indicate_partition_disengaged = + xpc_indicate_partition_disengaged_uv; + xpc_assume_partition_disengaged = xpc_assume_partition_disengaged_uv; + xpc_partition_engaged = xpc_partition_engaged_uv; + xpc_any_partition_engaged = xpc_any_partition_engaged_uv; + + xpc_n_of_deliverable_payloads = xpc_n_of_deliverable_payloads_uv; + xpc_process_msg_chctl_flags = xpc_process_msg_chctl_flags_uv; + xpc_send_payload = xpc_send_payload_uv; + xpc_notify_senders_of_disconnect = xpc_notify_senders_of_disconnect_uv; + xpc_get_deliverable_payload = xpc_get_deliverable_payload_uv; + xpc_received_payload = xpc_received_payload_uv; + + if (sizeof(struct xpc_notify_mq_msghdr_uv) > XPC_MSG_HDR_MAX_SIZE) { + dev_err(xpc_part, "xpc_notify_mq_msghdr_uv is larger than %d\n", + XPC_MSG_HDR_MAX_SIZE); + return -E2BIG; + } + + /* ??? The cpuid argument's value is 0, is that what we want? */ + /* !!! The irq argument's value isn't correct. */ + xpc_activate_mq_uv = xpc_create_gru_mq_uv(XPC_ACTIVATE_MQ_SIZE_UV, 0, 0, + xpc_handle_activate_IRQ_uv); + if (xpc_activate_mq_uv == NULL) + return -ENOMEM; + + /* ??? The cpuid argument's value is 0, is that what we want? */ + /* !!! The irq argument's value isn't correct. */ + xpc_notify_mq_uv = xpc_create_gru_mq_uv(XPC_NOTIFY_MQ_SIZE_UV, 0, 0, + xpc_handle_notify_IRQ_uv); + if (xpc_notify_mq_uv == NULL) { + /* !!! The irq argument's value isn't correct. */ + xpc_destroy_gru_mq_uv(xpc_activate_mq_uv, + XPC_ACTIVATE_MQ_SIZE_UV, 0); + return -ENOMEM; + } + + return 0; +} + +void +xpc_exit_uv(void) +{ + /* !!! The irq argument's value isn't correct. */ + xpc_destroy_gru_mq_uv(xpc_notify_mq_uv, XPC_NOTIFY_MQ_SIZE_UV, 0); + + /* !!! The irq argument's value isn't correct. */ + xpc_destroy_gru_mq_uv(xpc_activate_mq_uv, XPC_ACTIVATE_MQ_SIZE_UV, 0); +} diff --git a/drivers/misc/sgi-xp/xpnet.c b/drivers/misc/sgi-xp/xpnet.c index 822dc8e8d7f..71513b3af70 100644 --- a/drivers/misc/sgi-xp/xpnet.c +++ b/drivers/misc/sgi-xp/xpnet.c @@ -21,21 +21,8 @@ */ #include <linux/module.h> -#include <linux/types.h> -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/ioport.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> -#include <linux/delay.h> -#include <linux/ethtool.h> -#include <linux/mii.h> -#include <linux/smp.h> -#include <linux/string.h> -#include <asm/sn/bte.h> -#include <asm/sn/io.h> -#include <asm/sn/sn_sal.h> -#include <asm/atomic.h> #include "xp.h" /* @@ -57,7 +44,7 @@ struct xpnet_message { u16 version; /* Version for this message */ u16 embedded_bytes; /* #of bytes embedded in XPC message */ u32 magic; /* Special number indicating this is xpnet */ - u64 buf_pa; /* phys address of buffer to retrieve */ + unsigned long buf_pa; /* phys address of buffer to retrieve */ u32 size; /* #of bytes in buffer */ u8 leadin_ignore; /* #of bytes to ignore at the beginning */ u8 tailout_ignore; /* #of bytes to ignore at the end */ @@ -70,11 +57,10 @@ struct xpnet_message { * * XPC expects each message to exist in an individual cacheline. */ -#define XPNET_MSG_SIZE (L1_CACHE_BYTES - XPC_MSG_PAYLOAD_OFFSET) +#define XPNET_MSG_SIZE XPC_MSG_PAYLOAD_MAX_SIZE #define XPNET_MSG_DATA_MAX \ - (XPNET_MSG_SIZE - (u64)(&((struct xpnet_message *)0)->data)) -#define XPNET_MSG_ALIGNED_SIZE (L1_CACHE_ALIGN(XPNET_MSG_SIZE)) -#define XPNET_MSG_NENTRIES (PAGE_SIZE / XPNET_MSG_ALIGNED_SIZE) + (XPNET_MSG_SIZE - offsetof(struct xpnet_message, data)) +#define XPNET_MSG_NENTRIES (PAGE_SIZE / XPC_MSG_MAX_SIZE) #define XPNET_MAX_KTHREADS (XPNET_MSG_NENTRIES + 1) #define XPNET_MAX_IDLE_KTHREADS (XPNET_MSG_NENTRIES + 1) @@ -105,7 +91,6 @@ struct xpnet_message { * then be released. */ struct xpnet_pending_msg { - struct list_head free_list; struct sk_buff *skb; atomic_t use_count; }; @@ -121,7 +106,7 @@ struct net_device *xpnet_device; * When we are notified of other partitions activating, we add them to * our bitmask of partitions to which we broadcast. */ -static u64 xpnet_broadcast_partitions; +static unsigned long *xpnet_broadcast_partitions; /* protect above */ static DEFINE_SPINLOCK(xpnet_broadcast_lock); @@ -141,16 +126,13 @@ static DEFINE_SPINLOCK(xpnet_broadcast_lock); #define XPNET_DEF_MTU (0x8000UL) /* - * The partition id is encapsulated in the MAC address. The following - * define locates the octet the partid is in. + * The partid is encapsulated in the MAC address beginning in the following + * octet and it consists of two octets. */ -#define XPNET_PARTID_OCTET 1 -#define XPNET_LICENSE_OCTET 2 +#define XPNET_PARTID_OCTET 2 + +/* Define the XPNET debug device structures to be used with dev_dbg() et al */ -/* - * Define the XPNET debug device structure that is to be used with dev_dbg(), - * dev_err(), dev_warn(), and dev_info(). - */ struct device_driver xpnet_dbg_name = { .name = "xpnet" }; @@ -169,7 +151,8 @@ static void xpnet_receive(short partid, int channel, struct xpnet_message *msg) { struct sk_buff *skb; - bte_result_t bret; + void *dst; + enum xp_retval ret; struct xpnet_dev_private *priv = (struct xpnet_dev_private *)xpnet_device->priv; @@ -201,7 +184,7 @@ xpnet_receive(short partid, int channel, struct xpnet_message *msg) /* * The allocated skb has some reserved space. - * In order to use bte_copy, we need to get the + * In order to use xp_remote_memcpy(), we need to get the * skb->data pointer moved forward. */ skb_reserve(skb, (L1_CACHE_BYTES - ((u64)skb->data & @@ -226,26 +209,21 @@ xpnet_receive(short partid, int channel, struct xpnet_message *msg) skb_copy_to_linear_data(skb, &msg->data, (size_t)msg->embedded_bytes); } else { + dst = (void *)((u64)skb->data & ~(L1_CACHE_BYTES - 1)); dev_dbg(xpnet, "transferring buffer to the skb->data area;\n\t" - "bte_copy(0x%p, 0x%p, %hu)\n", (void *)msg->buf_pa, - (void *)__pa((u64)skb->data & ~(L1_CACHE_BYTES - 1)), - msg->size); - - bret = bte_copy(msg->buf_pa, - __pa((u64)skb->data & ~(L1_CACHE_BYTES - 1)), - msg->size, (BTE_NOTIFY | BTE_WACQUIRE), NULL); + "xp_remote_memcpy(0x%p, 0x%p, %hu)\n", dst, + (void *)msg->buf_pa, msg->size); - if (bret != BTE_SUCCESS) { + ret = xp_remote_memcpy(xp_pa(dst), msg->buf_pa, msg->size); + if (ret != xpSuccess) { /* - * >>> Need better way of cleaning skb. Currently skb - * >>> appears in_use and we can't just call - * >>> dev_kfree_skb. + * !!! Need better way of cleaning skb. Currently skb + * !!! appears in_use and we can't just call + * !!! dev_kfree_skb. */ - dev_err(xpnet, "bte_copy(0x%p, 0x%p, 0x%hx) returned " - "error=0x%x\n", (void *)msg->buf_pa, - (void *)__pa((u64)skb->data & - ~(L1_CACHE_BYTES - 1)), - msg->size, bret); + dev_err(xpnet, "xp_remote_memcpy(0x%p, 0x%p, 0x%hx) " + "returned error=0x%x\n", dst, + (void *)msg->buf_pa, msg->size, ret); xpc_received(partid, channel, (void *)msg); @@ -285,9 +263,7 @@ static void xpnet_connection_activity(enum xp_retval reason, short partid, int channel, void *data, void *key) { - long bp; - - DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS); + DBUG_ON(partid < 0 || partid >= xp_max_npartitions); DBUG_ON(channel != XPC_NET_CHANNEL); switch (reason) { @@ -299,31 +275,28 @@ xpnet_connection_activity(enum xp_retval reason, short partid, int channel, case xpConnected: /* connection completed to a partition */ spin_lock_bh(&xpnet_broadcast_lock); - xpnet_broadcast_partitions |= 1UL << (partid - 1); - bp = xpnet_broadcast_partitions; + __set_bit(partid, xpnet_broadcast_partitions); spin_unlock_bh(&xpnet_broadcast_lock); netif_carrier_on(xpnet_device); - dev_dbg(xpnet, "%s connection created to partition %d; " - "xpnet_broadcast_partitions=0x%lx\n", - xpnet_device->name, partid, bp); + dev_dbg(xpnet, "%s connected to partition %d\n", + xpnet_device->name, partid); break; default: spin_lock_bh(&xpnet_broadcast_lock); - xpnet_broadcast_partitions &= ~(1UL << (partid - 1)); - bp = xpnet_broadcast_partitions; + __clear_bit(partid, xpnet_broadcast_partitions); spin_unlock_bh(&xpnet_broadcast_lock); - if (bp == 0) + if (bitmap_empty((unsigned long *)xpnet_broadcast_partitions, + xp_max_npartitions)) { netif_carrier_off(xpnet_device); + } - dev_dbg(xpnet, "%s disconnected from partition %d; " - "xpnet_broadcast_partitions=0x%lx\n", - xpnet_device->name, partid, bp); + dev_dbg(xpnet, "%s disconnected from partition %d\n", + xpnet_device->name, partid); break; - } } @@ -334,8 +307,10 @@ xpnet_dev_open(struct net_device *dev) dev_dbg(xpnet, "calling xpc_connect(%d, 0x%p, NULL, %ld, %ld, %ld, " "%ld)\n", XPC_NET_CHANNEL, xpnet_connection_activity, - XPNET_MSG_SIZE, XPNET_MSG_NENTRIES, XPNET_MAX_KTHREADS, - XPNET_MAX_IDLE_KTHREADS); + (unsigned long)XPNET_MSG_SIZE, + (unsigned long)XPNET_MSG_NENTRIES, + (unsigned long)XPNET_MAX_KTHREADS, + (unsigned long)XPNET_MAX_IDLE_KTHREADS); ret = xpc_connect(XPC_NET_CHANNEL, xpnet_connection_activity, NULL, XPNET_MSG_SIZE, XPNET_MSG_NENTRIES, @@ -426,35 +401,74 @@ xpnet_send_completed(enum xp_retval reason, short partid, int channel, } } +static void +xpnet_send(struct sk_buff *skb, struct xpnet_pending_msg *queued_msg, + u64 start_addr, u64 end_addr, u16 embedded_bytes, int dest_partid) +{ + u8 msg_buffer[XPNET_MSG_SIZE]; + struct xpnet_message *msg = (struct xpnet_message *)&msg_buffer; + u16 msg_size = sizeof(struct xpnet_message); + enum xp_retval ret; + + msg->embedded_bytes = embedded_bytes; + if (unlikely(embedded_bytes != 0)) { + msg->version = XPNET_VERSION_EMBED; + dev_dbg(xpnet, "calling memcpy(0x%p, 0x%p, 0x%lx)\n", + &msg->data, skb->data, (size_t)embedded_bytes); + skb_copy_from_linear_data(skb, &msg->data, + (size_t)embedded_bytes); + msg_size += embedded_bytes - 1; + } else { + msg->version = XPNET_VERSION; + } + msg->magic = XPNET_MAGIC; + msg->size = end_addr - start_addr; + msg->leadin_ignore = (u64)skb->data - start_addr; + msg->tailout_ignore = end_addr - (u64)skb_tail_pointer(skb); + msg->buf_pa = xp_pa((void *)start_addr); + + dev_dbg(xpnet, "sending XPC message to %d:%d\n" + KERN_DEBUG "msg->buf_pa=0x%lx, msg->size=%u, " + "msg->leadin_ignore=%u, msg->tailout_ignore=%u\n", + dest_partid, XPC_NET_CHANNEL, msg->buf_pa, msg->size, + msg->leadin_ignore, msg->tailout_ignore); + + atomic_inc(&queued_msg->use_count); + + ret = xpc_send_notify(dest_partid, XPC_NET_CHANNEL, XPC_NOWAIT, msg, + msg_size, xpnet_send_completed, queued_msg); + if (unlikely(ret != xpSuccess)) + atomic_dec(&queued_msg->use_count); +} + /* * Network layer has formatted a packet (skb) and is ready to place it * "on the wire". Prepare and send an xpnet_message to all partitions * which have connected with us and are targets of this packet. * * MAC-NOTE: For the XPNET driver, the MAC address contains the - * destination partition_id. If the destination partition id word - * is 0xff, this packet is to broadcast to all partitions. + * destination partid. If the destination partid octets are 0xffff, + * this packet is to be broadcast to all connected partitions. */ static int xpnet_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct xpnet_pending_msg *queued_msg; - enum xp_retval ret; - struct xpnet_message *msg; u64 start_addr, end_addr; - long dp; - u8 second_mac_octet; short dest_partid; - struct xpnet_dev_private *priv; - u16 embedded_bytes; - - priv = (struct xpnet_dev_private *)dev->priv; + struct xpnet_dev_private *priv = (struct xpnet_dev_private *)dev->priv; + u16 embedded_bytes = 0; dev_dbg(xpnet, ">skb->head=0x%p skb->data=0x%p skb->tail=0x%p " "skb->end=0x%p skb->len=%d\n", (void *)skb->head, (void *)skb->data, skb_tail_pointer(skb), skb_end_pointer(skb), skb->len); + if (skb->data[0] == 0x33) { + dev_kfree_skb(skb); + return 0; /* nothing needed to be done */ + } + /* * The xpnet_pending_msg tracks how many outstanding * xpc_send_notifies are relying on this skb. When none @@ -466,7 +480,6 @@ xpnet_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) "packet\n", sizeof(struct xpnet_pending_msg)); priv->stats.tx_errors++; - return -ENOMEM; } @@ -475,7 +488,6 @@ xpnet_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) end_addr = L1_CACHE_ALIGN((u64)skb_tail_pointer(skb)); /* calculate how many bytes to embed in the XPC message */ - embedded_bytes = 0; if (unlikely(skb->len <= XPNET_MSG_DATA_MAX)) { /* skb->data does fit so embed */ embedded_bytes = skb->len; @@ -491,82 +503,28 @@ xpnet_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) atomic_set(&queued_msg->use_count, 1); queued_msg->skb = skb; - second_mac_octet = skb->data[XPNET_PARTID_OCTET]; - if (second_mac_octet == 0xff) { + if (skb->data[0] == 0xff) { /* we are being asked to broadcast to all partitions */ - dp = xpnet_broadcast_partitions; - } else if (second_mac_octet != 0) { - dp = xpnet_broadcast_partitions & - (1UL << (second_mac_octet - 1)); - } else { - /* 0 is an invalid partid. Ignore */ - dp = 0; - } - dev_dbg(xpnet, "destination Partitions mask (dp) = 0x%lx\n", dp); - - /* - * If we wanted to allow promiscuous mode to work like an - * unswitched network, this would be a good point to OR in a - * mask of partitions which should be receiving all packets. - */ - - /* - * Main send loop. - */ - for (dest_partid = 1; dp && dest_partid < XP_MAX_PARTITIONS; - dest_partid++) { + for_each_bit(dest_partid, xpnet_broadcast_partitions, + xp_max_npartitions) { - if (!(dp & (1UL << (dest_partid - 1)))) { - /* not destined for this partition */ - continue; + xpnet_send(skb, queued_msg, start_addr, end_addr, + embedded_bytes, dest_partid); } + } else { + dest_partid = (short)skb->data[XPNET_PARTID_OCTET + 1]; + dest_partid |= (short)skb->data[XPNET_PARTID_OCTET + 0] << 8; - /* remove this partition from the destinations mask */ - dp &= ~(1UL << (dest_partid - 1)); - - /* found a partition to send to */ - - ret = xpc_allocate(dest_partid, XPC_NET_CHANNEL, - XPC_NOWAIT, (void **)&msg); - if (unlikely(ret != xpSuccess)) - continue; - - msg->embedded_bytes = embedded_bytes; - if (unlikely(embedded_bytes != 0)) { - msg->version = XPNET_VERSION_EMBED; - dev_dbg(xpnet, "calling memcpy(0x%p, 0x%p, 0x%lx)\n", - &msg->data, skb->data, (size_t)embedded_bytes); - skb_copy_from_linear_data(skb, &msg->data, - (size_t)embedded_bytes); - } else { - msg->version = XPNET_VERSION; - } - msg->magic = XPNET_MAGIC; - msg->size = end_addr - start_addr; - msg->leadin_ignore = (u64)skb->data - start_addr; - msg->tailout_ignore = end_addr - (u64)skb_tail_pointer(skb); - msg->buf_pa = __pa(start_addr); - - dev_dbg(xpnet, "sending XPC message to %d:%d\n" - KERN_DEBUG "msg->buf_pa=0x%lx, msg->size=%u, " - "msg->leadin_ignore=%u, msg->tailout_ignore=%u\n", - dest_partid, XPC_NET_CHANNEL, msg->buf_pa, msg->size, - msg->leadin_ignore, msg->tailout_ignore); - - atomic_inc(&queued_msg->use_count); - - ret = xpc_send_notify(dest_partid, XPC_NET_CHANNEL, msg, - xpnet_send_completed, queued_msg); - if (unlikely(ret != xpSuccess)) { - atomic_dec(&queued_msg->use_count); - continue; + if (dest_partid >= 0 && + dest_partid < xp_max_npartitions && + test_bit(dest_partid, xpnet_broadcast_partitions) != 0) { + + xpnet_send(skb, queued_msg, start_addr, end_addr, + embedded_bytes, dest_partid); } } if (atomic_dec_return(&queued_msg->use_count) == 0) { - dev_dbg(xpnet, "no partitions to receive packet destined for " - "%d\n", dest_partid); - dev_kfree_skb(skb); kfree(queued_msg); } @@ -594,23 +552,28 @@ xpnet_dev_tx_timeout(struct net_device *dev) static int __init xpnet_init(void) { - int i; - u32 license_num; - int result = -ENOMEM; + int result; - if (!ia64_platform_is("sn2")) + if (!is_shub() && !is_uv()) return -ENODEV; dev_info(xpnet, "registering network device %s\n", XPNET_DEVICE_NAME); + xpnet_broadcast_partitions = kzalloc(BITS_TO_LONGS(xp_max_npartitions) * + sizeof(long), GFP_KERNEL); + if (xpnet_broadcast_partitions == NULL) + return -ENOMEM; + /* * use ether_setup() to init the majority of our device * structure and then override the necessary pieces. */ xpnet_device = alloc_netdev(sizeof(struct xpnet_dev_private), XPNET_DEVICE_NAME, ether_setup); - if (xpnet_device == NULL) + if (xpnet_device == NULL) { + kfree(xpnet_broadcast_partitions); return -ENOMEM; + } netif_carrier_off(xpnet_device); @@ -628,14 +591,10 @@ xpnet_init(void) * MAC addresses. We chose the first octet of the MAC to be unlikely * to collide with any vendor's officially issued MAC. */ - xpnet_device->dev_addr[0] = 0xfe; - xpnet_device->dev_addr[XPNET_PARTID_OCTET] = sn_partition_id; - license_num = sn_partition_serial_number_val(); - for (i = 3; i >= 0; i--) { - xpnet_device->dev_addr[XPNET_LICENSE_OCTET + i] = - license_num & 0xff; - license_num = license_num >> 8; - } + xpnet_device->dev_addr[0] = 0x02; /* locally administered, no OUI */ + + xpnet_device->dev_addr[XPNET_PARTID_OCTET + 1] = xp_partition_id; + xpnet_device->dev_addr[XPNET_PARTID_OCTET + 0] = (xp_partition_id >> 8); /* * ether_setup() sets this to a multicast device. We are @@ -651,8 +610,10 @@ xpnet_init(void) xpnet_device->features = NETIF_F_NO_CSUM; result = register_netdev(xpnet_device); - if (result != 0) + if (result != 0) { free_netdev(xpnet_device); + kfree(xpnet_broadcast_partitions); + } return result; } @@ -666,8 +627,8 @@ xpnet_exit(void) xpnet_device[0].name); unregister_netdev(xpnet_device); - free_netdev(xpnet_device); + kfree(xpnet_broadcast_partitions); } module_exit(xpnet_exit); diff --git a/drivers/net/niu.c b/drivers/net/niu.c index 8ee7d7bb951..e4765b713ab 100644 --- a/drivers/net/niu.c +++ b/drivers/net/niu.c @@ -6417,7 +6417,7 @@ static int niu_ethflow_to_class(int flow_type, u64 *class) *class = CLASS_CODE_SCTP_IPV6; break; default: - return -1; + return 0; } return 1; diff --git a/drivers/net/ps3_gelic_wireless.c b/drivers/net/ps3_gelic_wireless.c index 6b2dee0cf3a..a834b52a6a2 100644 --- a/drivers/net/ps3_gelic_wireless.c +++ b/drivers/net/ps3_gelic_wireless.c @@ -1024,7 +1024,7 @@ static int gelic_wl_set_encode(struct net_device *netdev, struct gelic_wl_info *wl = port_wl(netdev_priv(netdev)); struct iw_point *enc = &data->encoding; __u16 flags; - unsigned int irqflag; + unsigned long irqflag; int key_index, index_specified; int ret = 0; @@ -1097,7 +1097,7 @@ static int gelic_wl_get_encode(struct net_device *netdev, { struct gelic_wl_info *wl = port_wl(netdev_priv(netdev)); struct iw_point *enc = &data->encoding; - unsigned int irqflag; + unsigned long irqflag; unsigned int key_index, index_specified; int ret = 0; @@ -1215,7 +1215,7 @@ static int gelic_wl_set_encodeext(struct net_device *netdev, struct iw_encode_ext *ext = (struct iw_encode_ext *)extra; __u16 alg; __u16 flags; - unsigned int irqflag; + unsigned long irqflag; int key_index; int ret = 0; @@ -1303,7 +1303,7 @@ static int gelic_wl_get_encodeext(struct net_device *netdev, struct gelic_wl_info *wl = port_wl(netdev_priv(netdev)); struct iw_point *enc = &data->encoding; struct iw_encode_ext *ext = (struct iw_encode_ext *)extra; - unsigned int irqflag; + unsigned long irqflag; int key_index; int ret = 0; int max_key_len; @@ -1426,7 +1426,7 @@ static int gelic_wl_priv_set_psk(struct net_device *net_dev, { struct gelic_wl_info *wl = port_wl(netdev_priv(net_dev)); unsigned int len; - unsigned int irqflag; + unsigned long irqflag; int ret = 0; pr_debug("%s:<- len=%d\n", __func__, data->data.length); @@ -1467,7 +1467,7 @@ static int gelic_wl_priv_get_psk(struct net_device *net_dev, { struct gelic_wl_info *wl = port_wl(netdev_priv(net_dev)); char *p; - unsigned int irqflag; + unsigned long irqflag; unsigned int i; pr_debug("%s:<-\n", __func__); diff --git a/drivers/net/wireless/ath5k/base.c b/drivers/net/wireless/ath5k/base.c index d9769c52734..ff3fad794b6 100644 --- a/drivers/net/wireless/ath5k/base.c +++ b/drivers/net/wireless/ath5k/base.c @@ -43,7 +43,9 @@ #include <linux/version.h> #include <linux/module.h> #include <linux/delay.h> +#include <linux/hardirq.h> #include <linux/if.h> +#include <linux/io.h> #include <linux/netdevice.h> #include <linux/cache.h> #include <linux/pci.h> @@ -471,9 +473,6 @@ ath5k_pci_probe(struct pci_dev *pdev, /* Set private data */ pci_set_drvdata(pdev, hw); - /* Enable msi for devices that support it */ - pci_enable_msi(pdev); - /* Setup interrupt handler */ ret = request_irq(pdev->irq, ath5k_intr, IRQF_SHARED, "ath", sc); if (ret) { @@ -551,7 +550,6 @@ err_ah: err_irq: free_irq(pdev->irq, sc); err_free: - pci_disable_msi(pdev); ieee80211_free_hw(hw); err_map: pci_iounmap(pdev, mem); @@ -573,7 +571,6 @@ ath5k_pci_remove(struct pci_dev *pdev) ath5k_detach(pdev, hw); ath5k_hw_detach(sc->ah); free_irq(pdev->irq, sc); - pci_disable_msi(pdev); pci_iounmap(pdev, sc->iobase); pci_release_region(pdev, 0); pci_disable_device(pdev); @@ -590,6 +587,9 @@ ath5k_pci_suspend(struct pci_dev *pdev, pm_message_t state) ath5k_led_off(sc); ath5k_stop_hw(sc); + + free_irq(pdev->irq, sc); + pci_disable_msi(pdev); pci_save_state(pdev); pci_disable_device(pdev); pci_set_power_state(pdev, PCI_D3hot); @@ -605,15 +605,12 @@ ath5k_pci_resume(struct pci_dev *pdev) struct ath5k_hw *ah = sc->ah; int i, err; - err = pci_set_power_state(pdev, PCI_D0); - if (err) - return err; + pci_restore_state(pdev); err = pci_enable_device(pdev); if (err) return err; - pci_restore_state(pdev); /* * Suspend/Resume resets the PCI configuration space, so we have to * re-disable the RETRY_TIMEOUT register (0x41) to keep @@ -621,7 +618,17 @@ ath5k_pci_resume(struct pci_dev *pdev) */ pci_write_config_byte(pdev, 0x41, 0); - ath5k_init(sc); + pci_enable_msi(pdev); + + err = request_irq(pdev->irq, ath5k_intr, IRQF_SHARED, "ath", sc); + if (err) { + ATH5K_ERR(sc, "request_irq failed\n"); + goto err_msi; + } + + err = ath5k_init(sc); + if (err) + goto err_irq; ath5k_led_enable(sc); /* @@ -635,6 +642,12 @@ ath5k_pci_resume(struct pci_dev *pdev) ath5k_hw_reset_key(ah, i); return 0; +err_irq: + free_irq(pdev->irq, sc); +err_msi: + pci_disable_msi(pdev); + pci_disable_device(pdev); + return err; } #endif /* CONFIG_PM */ @@ -1224,7 +1237,7 @@ ath5k_txbuf_setup(struct ath5k_softc *sc, struct ath5k_buf *bf) pktlen = skb->len; - if (!(info->flags & IEEE80211_TX_CTL_DO_NOT_ENCRYPT)) { + if (info->control.hw_key) { keyidx = info->control.hw_key->hw_key_idx; pktlen += info->control.icv_len; } @@ -1249,6 +1262,7 @@ ath5k_txbuf_setup(struct ath5k_softc *sc, struct ath5k_buf *bf) txq->link = &ds->ds_link; ath5k_hw_tx_start(ah, txq->qnum); + mmiowb(); spin_unlock_bh(&txq->lock); return 0; @@ -1583,7 +1597,6 @@ ath5k_rx_stop(struct ath5k_softc *sc) ath5k_hw_stop_pcu_recv(ah); /* disable PCU */ ath5k_hw_set_rx_filter(ah, 0); /* clear recv filter */ ath5k_hw_stop_rx_dma(ah); /* disable DMA engine */ - mdelay(3); /* 3ms is long enough for 1 frame */ ath5k_debug_printrxbuffs(sc, ah); @@ -1682,31 +1695,44 @@ ath5k_tasklet_rx(unsigned long data) struct ath5k_rx_status rs = {}; struct sk_buff *skb; struct ath5k_softc *sc = (void *)data; - struct ath5k_buf *bf; + struct ath5k_buf *bf, *bf_last; struct ath5k_desc *ds; int ret; int hdrlen; int pad; spin_lock(&sc->rxbuflock); + if (list_empty(&sc->rxbuf)) { + ATH5K_WARN(sc, "empty rx buf pool\n"); + goto unlock; + } + bf_last = list_entry(sc->rxbuf.prev, struct ath5k_buf, list); do { rxs.flag = 0; - if (unlikely(list_empty(&sc->rxbuf))) { - ATH5K_WARN(sc, "empty rx buf pool\n"); - break; - } bf = list_first_entry(&sc->rxbuf, struct ath5k_buf, list); BUG_ON(bf->skb == NULL); skb = bf->skb; ds = bf->desc; - /* TODO only one segment */ - pci_dma_sync_single_for_cpu(sc->pdev, sc->desc_daddr, - sc->desc_len, PCI_DMA_FROMDEVICE); - - if (unlikely(ds->ds_link == bf->daddr)) /* this is the end */ - break; + /* + * last buffer must not be freed to ensure proper hardware + * function. When the hardware finishes also a packet next to + * it, we are sure, it doesn't use it anymore and we can go on. + */ + if (bf_last == bf) + bf->flags |= 1; + if (bf->flags) { + struct ath5k_buf *bf_next = list_entry(bf->list.next, + struct ath5k_buf, list); + ret = sc->ah->ah_proc_rx_desc(sc->ah, bf_next->desc, + &rs); + if (ret) + break; + bf->flags &= ~1; + /* skip the overwritten one (even status is martian) */ + goto next; + } ret = sc->ah->ah_proc_rx_desc(sc->ah, ds, &rs); if (unlikely(ret == -EINPROGRESS)) @@ -1752,8 +1778,6 @@ ath5k_tasklet_rx(unsigned long data) goto next; } accept: - pci_dma_sync_single_for_cpu(sc->pdev, bf->skbaddr, - rs.rs_datalen, PCI_DMA_FROMDEVICE); pci_unmap_single(sc->pdev, bf->skbaddr, sc->rxbufsize, PCI_DMA_FROMDEVICE); bf->skb = NULL; @@ -1816,6 +1840,7 @@ accept: next: list_move_tail(&bf->list, &sc->rxbuf); } while (ath5k_rxbuf_setup(sc, bf) == 0); +unlock: spin_unlock(&sc->rxbuflock); } @@ -1840,9 +1865,6 @@ ath5k_tx_processq(struct ath5k_softc *sc, struct ath5k_txq *txq) list_for_each_entry_safe(bf, bf0, &txq->q, list) { ds = bf->desc; - /* TODO only one segment */ - pci_dma_sync_single_for_cpu(sc->pdev, sc->desc_daddr, - sc->desc_len, PCI_DMA_FROMDEVICE); ret = sc->ah->ah_proc_tx_desc(sc->ah, ds, &ts); if (unlikely(ret == -EINPROGRESS)) break; @@ -2015,8 +2037,6 @@ ath5k_beacon_send(struct ath5k_softc *sc) ATH5K_WARN(sc, "beacon queue %u didn't stop?\n", sc->bhalq); /* NB: hw still stops DMA, so proceed */ } - pci_dma_sync_single_for_cpu(sc->pdev, bf->skbaddr, bf->skb->len, - PCI_DMA_TODEVICE); ath5k_hw_put_tx_buf(ah, sc->bhalq, bf->daddr); ath5k_hw_tx_start(ah, sc->bhalq); @@ -2240,6 +2260,7 @@ ath5k_init(struct ath5k_softc *sc) ret = 0; done: + mmiowb(); mutex_unlock(&sc->lock); return ret; } @@ -2272,6 +2293,7 @@ ath5k_stop_locked(struct ath5k_softc *sc) if (!test_bit(ATH_STAT_INVALID, sc->status)) { ath5k_led_off(sc); ath5k_hw_set_intr(ah, 0); + synchronize_irq(sc->pdev->irq); } ath5k_txq_cleanup(sc); if (!test_bit(ATH_STAT_INVALID, sc->status)) { @@ -2321,9 +2343,13 @@ ath5k_stop_hw(struct ath5k_softc *sc) } } ath5k_txbuf_free(sc, sc->bbuf); + mmiowb(); mutex_unlock(&sc->lock); del_timer_sync(&sc->calib_tim); + tasklet_kill(&sc->rxtq); + tasklet_kill(&sc->txtq); + tasklet_kill(&sc->restq); return ret; } @@ -2550,8 +2576,6 @@ ath5k_init_leds(struct ath5k_softc *sc) struct pci_dev *pdev = sc->pdev; char name[ATH5K_LED_MAX_NAME_LEN + 1]; - sc->led_on = 0; /* active low */ - /* * Auto-enable soft led processing for IBM cards and for * 5211 minipci cards. @@ -2560,11 +2584,13 @@ ath5k_init_leds(struct ath5k_softc *sc) pdev->device == PCI_DEVICE_ID_ATHEROS_AR5211) { __set_bit(ATH_STAT_LEDSOFT, sc->status); sc->led_pin = 0; + sc->led_on = 0; /* active low */ } /* Enable softled on PIN1 on HP Compaq nc6xx, nc4000 & nx5000 laptops */ if (pdev->subsystem_vendor == PCI_VENDOR_ID_COMPAQ) { __set_bit(ATH_STAT_LEDSOFT, sc->status); sc->led_pin = 1; + sc->led_on = 1; /* active high */ } if (!test_bit(ATH_STAT_LEDSOFT, sc->status)) goto out; @@ -2783,6 +2809,7 @@ ath5k_config_interface(struct ieee80211_hw *hw, struct ieee80211_vif *vif, /* XXX: assoc id is set to 0 for now, mac80211 doesn't have * a clean way of letting us retrieve this yet. */ ath5k_hw_set_associd(ah, ah->ah_bssid, 0); + mmiowb(); } if (conf->changed & IEEE80211_IFCC_BEACON && @@ -2971,6 +2998,7 @@ ath5k_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, } unlock: + mmiowb(); mutex_unlock(&sc->lock); return ret; } @@ -3032,8 +3060,6 @@ ath5k_beacon_update(struct ieee80211_hw *hw, struct sk_buff *skb) ath5k_debug_dump_skb(sc, skb, "BC ", 1); - mutex_lock(&sc->lock); - if (sc->opmode != IEEE80211_IF_TYPE_IBSS) { ret = -EIO; goto end; @@ -3044,11 +3070,12 @@ ath5k_beacon_update(struct ieee80211_hw *hw, struct sk_buff *skb) ret = ath5k_beacon_setup(sc, sc->bbuf); if (ret) sc->bbuf->skb = NULL; - else + else { ath5k_beacon_config(sc); + mmiowb(); + } end: - mutex_unlock(&sc->lock); return ret; } diff --git a/drivers/net/wireless/ath5k/base.h b/drivers/net/wireless/ath5k/base.h index 47f414b09e6..d7e03e6b827 100644 --- a/drivers/net/wireless/ath5k/base.h +++ b/drivers/net/wireless/ath5k/base.h @@ -56,7 +56,7 @@ struct ath5k_buf { struct list_head list; - unsigned int flags; /* tx descriptor flags */ + unsigned int flags; /* rx descriptor flags */ struct ath5k_desc *desc; /* virtual addr of desc */ dma_addr_t daddr; /* physical addr of desc */ struct sk_buff *skb; /* skbuff for buf */ diff --git a/drivers/net/wireless/ath5k/hw.c b/drivers/net/wireless/ath5k/hw.c index c6d12c53bda..7ca87a55731 100644 --- a/drivers/net/wireless/ath5k/hw.c +++ b/drivers/net/wireless/ath5k/hw.c @@ -1440,6 +1440,7 @@ int ath5k_hw_stop_tx_dma(struct ath5k_hw *ah, unsigned int queue) /* Stop queue */ ath5k_hw_reg_write(ah, tx_queue, AR5K_CR); + ath5k_hw_reg_read(ah, AR5K_CR); } else { /* * Schedule TX disable and wait until queue is empty @@ -1456,6 +1457,8 @@ int ath5k_hw_stop_tx_dma(struct ath5k_hw *ah, unsigned int queue) /* Clear register */ ath5k_hw_reg_write(ah, 0, AR5K_QCU_TXD); + if (pending) + return -EBUSY; } /* TODO: Check for success else return error */ @@ -1716,6 +1719,7 @@ enum ath5k_int ath5k_hw_set_intr(struct ath5k_hw *ah, enum ath5k_int new_mask) /* ..re-enable interrupts */ ath5k_hw_reg_write(ah, AR5K_IER_ENABLE, AR5K_IER); + ath5k_hw_reg_read(ah, AR5K_IER); return old_mask; } diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c index e78319aa47c..3bf3a869361 100644 --- a/drivers/net/wireless/b43/main.c +++ b/drivers/net/wireless/b43/main.c @@ -4645,8 +4645,7 @@ static int b43_wireless_init(struct ssb_device *dev) } /* fill hw info */ - hw->flags = IEEE80211_HW_HOST_GEN_BEACON_TEMPLATE | - IEEE80211_HW_RX_INCLUDES_FCS | + hw->flags = IEEE80211_HW_RX_INCLUDES_FCS | IEEE80211_HW_SIGNAL_DBM | IEEE80211_HW_NOISE_DBM; diff --git a/drivers/net/wireless/b43/xmit.c b/drivers/net/wireless/b43/xmit.c index 8d54502222a..9dda8169f7c 100644 --- a/drivers/net/wireless/b43/xmit.c +++ b/drivers/net/wireless/b43/xmit.c @@ -192,7 +192,7 @@ int b43_generate_txhdr(struct b43_wldev *dev, const struct b43_phy *phy = &dev->phy; const struct ieee80211_hdr *wlhdr = (const struct ieee80211_hdr *)fragment_data; - int use_encryption = (!(info->flags & IEEE80211_TX_CTL_DO_NOT_ENCRYPT)); + int use_encryption = !!info->control.hw_key; __le16 fctl = wlhdr->frame_control; struct ieee80211_rate *fbrate; u8 rate, rate_fb; diff --git a/drivers/net/wireless/b43legacy/main.c b/drivers/net/wireless/b43legacy/main.c index a1b8bf3ee73..2541c81932f 100644 --- a/drivers/net/wireless/b43legacy/main.c +++ b/drivers/net/wireless/b43legacy/main.c @@ -3702,8 +3702,7 @@ static int b43legacy_wireless_init(struct ssb_device *dev) } /* fill hw info */ - hw->flags = IEEE80211_HW_HOST_GEN_BEACON_TEMPLATE | - IEEE80211_HW_RX_INCLUDES_FCS | + hw->flags = IEEE80211_HW_RX_INCLUDES_FCS | IEEE80211_HW_SIGNAL_DBM | IEEE80211_HW_NOISE_DBM; hw->queues = 1; /* FIXME: hardware has more queues */ @@ -3846,10 +3845,10 @@ static int b43legacy_resume(struct ssb_device *dev) goto out; } } - mutex_unlock(&wl->mutex); b43legacydbg(wl, "Device resumed.\n"); out: + mutex_unlock(&wl->mutex); return err; } diff --git a/drivers/net/wireless/b43legacy/xmit.c b/drivers/net/wireless/b43legacy/xmit.c index e969ed8d412..68e1f8c7872 100644 --- a/drivers/net/wireless/b43legacy/xmit.c +++ b/drivers/net/wireless/b43legacy/xmit.c @@ -192,7 +192,7 @@ static int generate_txhdr_fw3(struct b43legacy_wldev *dev, u16 cookie) { const struct ieee80211_hdr *wlhdr; - int use_encryption = (!(info->flags & IEEE80211_TX_CTL_DO_NOT_ENCRYPT)); + int use_encryption = !!info->control.hw_key; u16 fctl; u8 rate; struct ieee80211_rate *rate_fb; diff --git a/drivers/net/wireless/ipw2100.c b/drivers/net/wireless/ipw2100.c index 5bf9e00b070..c6f886ec08a 100644 --- a/drivers/net/wireless/ipw2100.c +++ b/drivers/net/wireless/ipw2100.c @@ -6442,6 +6442,7 @@ static int ipw2100_resume(struct pci_dev *pci_dev) if (err) { printk(KERN_ERR "%s: pci_enable_device failed on resume\n", dev->name); + mutex_unlock(&priv->action_mutex); return err; } pci_restore_state(pci_dev); @@ -7146,7 +7147,7 @@ static int ipw2100_wx_get_rate(struct net_device *dev, err = ipw2100_get_ordinal(priv, IPW_ORD_CURRENT_TX_RATE, &val, &len); if (err) { IPW_DEBUG_WX("failed querying ordinals.\n"); - return err; + goto done; } switch (val & TX_RATE_MASK) { diff --git a/drivers/net/wireless/iwlwifi/iwl-3945.c b/drivers/net/wireless/iwlwifi/iwl-3945.c index c2a76785b66..a51e0eaa133 100644 --- a/drivers/net/wireless/iwlwifi/iwl-3945.c +++ b/drivers/net/wireless/iwlwifi/iwl-3945.c @@ -630,7 +630,9 @@ static void iwl3945_pass_packet_to_mac80211(struct iwl3945_priv *priv, struct ieee80211_rx_status *stats) { struct iwl3945_rx_packet *pkt = (struct iwl3945_rx_packet *)rxb->skb->data; +#ifdef CONFIG_IWL3945_LEDS struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)IWL_RX_DATA(pkt); +#endif struct iwl3945_rx_frame_hdr *rx_hdr = IWL_RX_HDR(pkt); struct iwl3945_rx_frame_end *rx_end = IWL_RX_END(pkt); short len = le16_to_cpu(rx_hdr->len); diff --git a/drivers/net/wireless/iwlwifi/iwl-core.c b/drivers/net/wireless/iwlwifi/iwl-core.c index a44188bf445..e3427c205cc 100644 --- a/drivers/net/wireless/iwlwifi/iwl-core.c +++ b/drivers/net/wireless/iwlwifi/iwl-core.c @@ -818,8 +818,7 @@ int iwl_setup_mac(struct iwl_priv *priv) hw->rate_control_algorithm = "iwl-4965-rs"; /* Tell mac80211 our characteristics */ - hw->flags = IEEE80211_HW_HOST_GEN_BEACON_TEMPLATE | - IEEE80211_HW_SIGNAL_DBM | + hw->flags = IEEE80211_HW_SIGNAL_DBM | IEEE80211_HW_NOISE_DBM; /* Default value; 4 EDCA QOS priorities */ hw->queues = 4; diff --git a/drivers/net/wireless/iwlwifi/iwl-debug.h b/drivers/net/wireless/iwlwifi/iwl-debug.h index 58384805a49..d6d729e86bd 100644 --- a/drivers/net/wireless/iwlwifi/iwl-debug.h +++ b/drivers/net/wireless/iwlwifi/iwl-debug.h @@ -68,12 +68,8 @@ void iwl_dbgfs_unregister(struct iwl_priv *priv); #endif #else -static inline void IWL_DEBUG(int level, const char *fmt, ...) -{ -} -static inline void IWL_DEBUG_LIMIT(int level, const char *fmt, ...) -{ -} +#define IWL_DEBUG(level, fmt, args...) +#define IWL_DEBUG_LIMIT(level, fmt, args...) #endif /* CONFIG_IWLWIFI_DEBUG */ diff --git a/drivers/net/wireless/iwlwifi/iwl-led.c b/drivers/net/wireless/iwlwifi/iwl-led.c index 899d7a2567a..61250e6a7d1 100644 --- a/drivers/net/wireless/iwlwifi/iwl-led.c +++ b/drivers/net/wireless/iwlwifi/iwl-led.c @@ -268,7 +268,9 @@ static int iwl_get_blink_rate(struct iwl_priv *priv) if (tpt < 0) /* wrapparound */ tpt = -tpt; - IWL_DEBUG_LED("tpt %lld current_tpt %lld\n", tpt, current_tpt); + IWL_DEBUG_LED("tpt %lld current_tpt %llu\n", + (long long)tpt, + (unsigned long long)current_tpt); priv->led_tpt = current_tpt; if (!priv->allow_blinking) diff --git a/drivers/net/wireless/iwlwifi/iwl-scan.c b/drivers/net/wireless/iwlwifi/iwl-scan.c index efc750d2fc5..5a00ac23e2d 100644 --- a/drivers/net/wireless/iwlwifi/iwl-scan.c +++ b/drivers/net/wireless/iwlwifi/iwl-scan.c @@ -270,6 +270,7 @@ static void iwl_rx_scan_results_notif(struct iwl_priv *priv, static void iwl_rx_scan_complete_notif(struct iwl_priv *priv, struct iwl_rx_mem_buffer *rxb) { +#ifdef CONFIG_IWLWIFI_DEBUG struct iwl_rx_packet *pkt = (struct iwl_rx_packet *)rxb->skb->data; struct iwl_scancomplete_notification *scan_notif = (void *)pkt->u.raw; @@ -277,6 +278,7 @@ static void iwl_rx_scan_complete_notif(struct iwl_priv *priv, scan_notif->scanned_channels, scan_notif->tsf_low, scan_notif->tsf_high, scan_notif->status); +#endif /* The HW is no longer scanning */ clear_bit(STATUS_SCAN_HW, &priv->status); diff --git a/drivers/net/wireless/iwlwifi/iwl-tx.c b/drivers/net/wireless/iwlwifi/iwl-tx.c index 9b50b1052b0..f72cd0bf6aa 100644 --- a/drivers/net/wireless/iwlwifi/iwl-tx.c +++ b/drivers/net/wireless/iwlwifi/iwl-tx.c @@ -906,7 +906,7 @@ int iwl_tx_skb(struct iwl_priv *priv, struct sk_buff *skb) * first entry */ iwl_hw_txq_attach_buf_to_tfd(priv, tfd, txcmd_phys, len); - if (!(info->flags & IEEE80211_TX_CTL_DO_NOT_ENCRYPT)) + if (info->control.hw_key) iwl_tx_cmd_build_hwcrypto(priv, info, tx_cmd, skb, sta_id); /* Set up TFD's 2nd entry to point directly to remainder of skb, diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c index 4a22d3fba75..7c82ecfa30a 100644 --- a/drivers/net/wireless/iwlwifi/iwl3945-base.c +++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c @@ -2667,7 +2667,7 @@ static int iwl3945_tx_skb(struct iwl3945_priv *priv, struct sk_buff *skb) * first entry */ iwl3945_hw_txq_attach_buf_to_tfd(priv, tfd, txcmd_phys, len); - if (!(info->flags & IEEE80211_TX_CTL_DO_NOT_ENCRYPT)) + if (info->control.hw_key) iwl3945_build_tx_cmd_hwcrypto(priv, info, out_cmd, skb, 0); /* Set up TFD's 2nd entry to point directly to remainder of skb, @@ -7899,8 +7899,7 @@ static int iwl3945_pci_probe(struct pci_dev *pdev, const struct pci_device_id *e priv->ibss_beacon = NULL; /* Tell mac80211 our characteristics */ - hw->flags = IEEE80211_HW_HOST_GEN_BEACON_TEMPLATE | - IEEE80211_HW_SIGNAL_DBM | + hw->flags = IEEE80211_HW_SIGNAL_DBM | IEEE80211_HW_NOISE_DBM; /* 4 EDCA QOS priorities */ diff --git a/drivers/net/wireless/libertas/persistcfg.c b/drivers/net/wireless/libertas/persistcfg.c index 6d0ff8decaf..3309a9c3cfe 100644 --- a/drivers/net/wireless/libertas/persistcfg.c +++ b/drivers/net/wireless/libertas/persistcfg.c @@ -48,7 +48,7 @@ static ssize_t bootflag_get(struct device *dev, if (ret) return ret; - return snprintf(buf, 12, "0x%x\n", le32_to_cpu(defs.bootflag)); + return snprintf(buf, 12, "%d\n", le32_to_cpu(defs.bootflag)); } /** @@ -63,8 +63,8 @@ static ssize_t bootflag_set(struct device *dev, struct device_attribute *attr, int ret; memset(&cmd, 0, sizeof(cmd)); - ret = sscanf(buf, "%x", &datum); - if (ret != 1) + ret = sscanf(buf, "%d", &datum); + if ((ret != 1) || (datum > 1)) return -EINVAL; *((__le32 *)&cmd.data[0]) = cpu_to_le32(!!datum); @@ -91,7 +91,7 @@ static ssize_t boottime_get(struct device *dev, if (ret) return ret; - return snprintf(buf, 12, "0x%x\n", defs.boottime); + return snprintf(buf, 12, "%d\n", defs.boottime); } /** @@ -106,8 +106,8 @@ static ssize_t boottime_set(struct device *dev, int ret; memset(&cmd, 0, sizeof(cmd)); - ret = sscanf(buf, "%x", &datum); - if (ret != 1) + ret = sscanf(buf, "%d", &datum); + if ((ret != 1) || (datum > 255)) return -EINVAL; /* A too small boot time will result in the device booting into @@ -143,7 +143,7 @@ static ssize_t channel_get(struct device *dev, if (ret) return ret; - return snprintf(buf, 12, "0x%x\n", le16_to_cpu(defs.channel)); + return snprintf(buf, 12, "%d\n", le16_to_cpu(defs.channel)); } /** @@ -154,11 +154,11 @@ static ssize_t channel_set(struct device *dev, struct device_attribute *attr, { struct lbs_private *priv = to_net_dev(dev)->priv; struct cmd_ds_mesh_config cmd; - uint16_t datum; + uint32_t datum; int ret; memset(&cmd, 0, sizeof(cmd)); - ret = sscanf(buf, "%hx", &datum); + ret = sscanf(buf, "%d", &datum); if (ret != 1 || datum < 1 || datum > 11) return -EINVAL; @@ -274,8 +274,8 @@ static ssize_t protocol_id_set(struct device *dev, int ret; memset(&cmd, 0, sizeof(cmd)); - ret = sscanf(buf, "%x", &datum); - if (ret != 1) + ret = sscanf(buf, "%d", &datum); + if ((ret != 1) || (datum > 255)) return -EINVAL; /* fetch all other Information Element parameters */ @@ -328,8 +328,8 @@ static ssize_t metric_id_set(struct device *dev, struct device_attribute *attr, int ret; memset(&cmd, 0, sizeof(cmd)); - ret = sscanf(buf, "%x", &datum); - if (ret != 1) + ret = sscanf(buf, "%d", &datum); + if ((ret != 1) || (datum > 255)) return -EINVAL; /* fetch all other Information Element parameters */ @@ -382,8 +382,8 @@ static ssize_t capability_set(struct device *dev, struct device_attribute *attr, int ret; memset(&cmd, 0, sizeof(cmd)); - ret = sscanf(buf, "%x", &datum); - if (ret != 1) + ret = sscanf(buf, "%d", &datum); + if ((ret != 1) || (datum > 255)) return -EINVAL; /* fetch all other Information Element parameters */ diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 5816230d58f..248d31a7aa3 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -500,7 +500,7 @@ failed_hw: device_unregister(data->dev); failed_drvdata: ieee80211_free_hw(hw); - hwsim_radios[i] = 0; + hwsim_radios[i] = NULL; failed: mac80211_hwsim_free(); return err; diff --git a/drivers/net/wireless/rt2x00/rt2500usb.c b/drivers/net/wireless/rt2x00/rt2500usb.c index 3558cb21074..3078417b326 100644 --- a/drivers/net/wireless/rt2x00/rt2500usb.c +++ b/drivers/net/wireless/rt2x00/rt2500usb.c @@ -1121,6 +1121,7 @@ static void rt2500usb_write_beacon(struct queue_entry *entry) int pipe = usb_sndbulkpipe(usb_dev, 1); int length; u16 reg; + u32 word, len; /* * Add the descriptor in front of the skb. @@ -1130,6 +1131,17 @@ static void rt2500usb_write_beacon(struct queue_entry *entry) skbdesc->desc = entry->skb->data; /* + * Adjust the beacon databyte count. The current number is + * calculated before this function gets called, but falsely + * assumes that the descriptor was already present in the SKB. + */ + rt2x00_desc_read(skbdesc->desc, 0, &word); + len = rt2x00_get_field32(word, TXD_W0_DATABYTE_COUNT); + len += skbdesc->desc_len; + rt2x00_set_field32(&word, TXD_W0_DATABYTE_COUNT, len); + rt2x00_desc_write(skbdesc->desc, 0, word); + + /* * Disable beaconing while we are reloading the beacon data, * otherwise we might be sending out invalid data. */ @@ -1650,7 +1662,6 @@ static void rt2500usb_probe_hw_mode(struct rt2x00_dev *rt2x00dev) * Initialize all hw fields. */ rt2x00dev->hw->flags = - IEEE80211_HW_HOST_GEN_BEACON_TEMPLATE | IEEE80211_HW_RX_INCLUDES_FCS | IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING | IEEE80211_HW_SIGNAL_DBM; diff --git a/drivers/net/wireless/rt2x00/rt2x00.h b/drivers/net/wireless/rt2x00/rt2x00.h index 07b03b3c7ef..db2dc976d83 100644 --- a/drivers/net/wireless/rt2x00/rt2x00.h +++ b/drivers/net/wireless/rt2x00/rt2x00.h @@ -108,7 +108,10 @@ #define SHORT_PIFS ( SIFS + SHORT_SLOT_TIME ) #define DIFS ( PIFS + SLOT_TIME ) #define SHORT_DIFS ( SHORT_PIFS + SHORT_SLOT_TIME ) -#define EIFS ( SIFS + (8 * (IEEE80211_HEADER + ACK_SIZE)) ) +#define EIFS ( SIFS + DIFS + \ + (8 * (IEEE80211_HEADER + ACK_SIZE)) ) +#define SHORT_EIFS ( SIFS + SHORT_DIFS + \ + (8 * (IEEE80211_HEADER + ACK_SIZE)) ) /* * Chipset identification @@ -597,6 +600,7 @@ enum rt2x00_flags { DEVICE_STARTED_SUSPEND, DEVICE_ENABLED_RADIO, DEVICE_DISABLED_RADIO_HW, + DEVICE_DIRTY_CONFIG, /* * Driver features diff --git a/drivers/net/wireless/rt2x00/rt2x00config.c b/drivers/net/wireless/rt2x00/rt2x00config.c index f20ca712504..3f89516e833 100644 --- a/drivers/net/wireless/rt2x00/rt2x00config.c +++ b/drivers/net/wireless/rt2x00/rt2x00config.c @@ -271,7 +271,7 @@ config: libconf.sifs = SIFS; libconf.pifs = short_slot_time ? SHORT_PIFS : PIFS; libconf.difs = short_slot_time ? SHORT_DIFS : DIFS; - libconf.eifs = EIFS; + libconf.eifs = short_slot_time ? SHORT_EIFS : EIFS; } libconf.conf = conf; diff --git a/drivers/net/wireless/rt2x00/rt2x00dev.c b/drivers/net/wireless/rt2x00/rt2x00dev.c index 8c93eb8353b..f42283ad7b0 100644 --- a/drivers/net/wireless/rt2x00/rt2x00dev.c +++ b/drivers/net/wireless/rt2x00/rt2x00dev.c @@ -1013,6 +1013,7 @@ int rt2x00lib_start(struct rt2x00_dev *rt2x00dev) rt2x00dev->intf_associated = 0; __set_bit(DEVICE_STARTED, &rt2x00dev->flags); + __set_bit(DEVICE_DIRTY_CONFIG, &rt2x00dev->flags); return 0; } @@ -1237,9 +1238,9 @@ int rt2x00lib_resume(struct rt2x00_dev *rt2x00dev) /* * Reconfigure device. */ - rt2x00lib_config(rt2x00dev, &rt2x00dev->hw->conf, 1); - if (!rt2x00dev->hw->conf.radio_enabled) - rt2x00lib_disable_radio(rt2x00dev); + retval = rt2x00mac_config(rt2x00dev->hw, &rt2x00dev->hw->conf); + if (retval) + goto exit; /* * Iterator over each active interface to diff --git a/drivers/net/wireless/rt2x00/rt2x00lib.h b/drivers/net/wireless/rt2x00/rt2x00lib.h index f2c9b0e79b5..c5fb3a72cf3 100644 --- a/drivers/net/wireless/rt2x00/rt2x00lib.h +++ b/drivers/net/wireless/rt2x00/rt2x00lib.h @@ -125,13 +125,6 @@ void rt2x00queue_unmap_skb(struct rt2x00_dev *rt2x00dev, struct sk_buff *skb); void rt2x00queue_free_skb(struct rt2x00_dev *rt2x00dev, struct sk_buff *skb); /** - * rt2x00queue_free_skb - free a skb - * @rt2x00dev: Pointer to &struct rt2x00_dev. - * @skb: The skb to free. - */ -void rt2x00queue_free_skb(struct rt2x00_dev *rt2x00dev, struct sk_buff *skb); - -/** * rt2x00queue_write_tx_frame - Write TX frame to hardware * @queue: Queue over which the frame should be send * @skb: The skb to send diff --git a/drivers/net/wireless/rt2x00/rt2x00mac.c b/drivers/net/wireless/rt2x00/rt2x00mac.c index f1dcbaa80c3..c3ee4ecba79 100644 --- a/drivers/net/wireless/rt2x00/rt2x00mac.c +++ b/drivers/net/wireless/rt2x00/rt2x00mac.c @@ -63,7 +63,7 @@ static int rt2x00mac_tx_rts_cts(struct rt2x00_dev *rt2x00dev, */ memcpy(skb->cb, frag_skb->cb, sizeof(skb->cb)); rts_info = IEEE80211_SKB_CB(skb); - rts_info->flags |= IEEE80211_TX_CTL_DO_NOT_ENCRYPT; + rts_info->control.hw_key = NULL; rts_info->flags &= ~IEEE80211_TX_CTL_USE_RTS_CTS; rts_info->flags &= ~IEEE80211_TX_CTL_USE_CTS_PROTECT; rts_info->flags &= ~IEEE80211_TX_CTL_REQ_TX_STATUS; @@ -83,6 +83,7 @@ static int rt2x00mac_tx_rts_cts(struct rt2x00_dev *rt2x00dev, (struct ieee80211_rts *)(skb->data)); if (rt2x00queue_write_tx_frame(queue, skb)) { + dev_kfree_skb_any(skb); WARNING(rt2x00dev, "Failed to send RTS/CTS frame.\n"); return NETDEV_TX_BUSY; } @@ -96,7 +97,6 @@ int rt2x00mac_tx(struct ieee80211_hw *hw, struct sk_buff *skb) struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(skb); struct ieee80211_hdr *ieee80211hdr = (struct ieee80211_hdr *)skb->data; enum data_queue_qid qid = skb_get_queue_mapping(skb); - struct rt2x00_intf *intf = vif_to_intf(tx_info->control.vif); struct data_queue *queue; u16 frame_control; @@ -152,18 +152,6 @@ int rt2x00mac_tx(struct ieee80211_hw *hw, struct sk_buff *skb) } } - /* - * XXX: This is as wrong as the old mac80211 code was, - * due to beacons not getting sequence numbers assigned - * properly. - */ - if (tx_info->flags & IEEE80211_TX_CTL_ASSIGN_SEQ) { - if (tx_info->flags & IEEE80211_TX_CTL_FIRST_FRAGMENT) - intf->seqno += 0x10; - ieee80211hdr->seq_ctrl &= cpu_to_le16(IEEE80211_SCTL_FRAG); - ieee80211hdr->seq_ctrl |= cpu_to_le16(intf->seqno); - } - if (rt2x00queue_write_tx_frame(queue, skb)) { ieee80211_stop_queue(rt2x00dev->hw, qid); return NETDEV_TX_BUSY; @@ -322,6 +310,7 @@ EXPORT_SYMBOL_GPL(rt2x00mac_remove_interface); int rt2x00mac_config(struct ieee80211_hw *hw, struct ieee80211_conf *conf) { struct rt2x00_dev *rt2x00dev = hw->priv; + int force_reconfig; /* * Mac80211 might be calling this function while we are trying @@ -341,7 +330,17 @@ int rt2x00mac_config(struct ieee80211_hw *hw, struct ieee80211_conf *conf) rt2x00lib_toggle_rx(rt2x00dev, STATE_RADIO_RX_OFF); } - rt2x00lib_config(rt2x00dev, conf, 0); + /* + * When the DEVICE_DIRTY_CONFIG flag is set, the device has recently + * been started and the configuration must be forced upon the hardware. + * Otherwise registers will not be intialized correctly and could + * result in non-working hardware because essential registers aren't + * initialized. + */ + force_reconfig = + __test_and_clear_bit(DEVICE_DIRTY_CONFIG, &rt2x00dev->flags); + + rt2x00lib_config(rt2x00dev, conf, force_reconfig); /* * Reenable RX only if the radio should be on. diff --git a/drivers/net/wireless/rt2x00/rt2x00queue.c b/drivers/net/wireless/rt2x00/rt2x00queue.c index 7f442030f5a..3b27f6aa860 100644 --- a/drivers/net/wireless/rt2x00/rt2x00queue.c +++ b/drivers/net/wireless/rt2x00/rt2x00queue.c @@ -120,6 +120,7 @@ static void rt2x00queue_create_tx_descriptor(struct queue_entry *entry, { struct rt2x00_dev *rt2x00dev = entry->queue->rt2x00dev; struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(entry->skb); + struct rt2x00_intf *intf = vif_to_intf(tx_info->control.vif); struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)entry->skb->data; struct ieee80211_rate *rate = ieee80211_get_tx_rate(rt2x00dev->hw, tx_info); @@ -200,6 +201,31 @@ static void rt2x00queue_create_tx_descriptor(struct queue_entry *entry, } /* + * Hardware should insert sequence counter. + * FIXME: We insert a software sequence counter first for + * hardware that doesn't support hardware sequence counting. + * + * This is wrong because beacons are not getting sequence + * numbers assigned properly. + * + * A secondary problem exists for drivers that cannot toggle + * sequence counting per-frame, since those will override the + * sequence counter given by mac80211. + */ + if (tx_info->flags & IEEE80211_TX_CTL_ASSIGN_SEQ) { + spin_lock(&intf->lock); + + if (test_bit(ENTRY_TXD_FIRST_FRAGMENT, &txdesc->flags)) + intf->seqno += 0x10; + hdr->seq_ctrl &= cpu_to_le16(IEEE80211_SCTL_FRAG); + hdr->seq_ctrl |= cpu_to_le16(intf->seqno); + + spin_unlock(&intf->lock); + + __set_bit(ENTRY_TXD_GENERATE_SEQ, &txdesc->flags); + } + + /* * PLCP setup * Length calculation depends on OFDM/CCK rate. */ @@ -466,9 +492,12 @@ void rt2x00queue_init_rx(struct rt2x00_dev *rt2x00dev) if (!rt2x00dev->ops->lib->init_rxentry) return; - for (i = 0; i < queue->limit; i++) + for (i = 0; i < queue->limit; i++) { + queue->entries[i].flags = 0; + rt2x00dev->ops->lib->init_rxentry(rt2x00dev, &queue->entries[i]); + } } void rt2x00queue_init_tx(struct rt2x00_dev *rt2x00dev) @@ -482,9 +511,12 @@ void rt2x00queue_init_tx(struct rt2x00_dev *rt2x00dev) if (!rt2x00dev->ops->lib->init_txentry) continue; - for (i = 0; i < queue->limit; i++) + for (i = 0; i < queue->limit; i++) { + queue->entries[i].flags = 0; + rt2x00dev->ops->lib->init_txentry(rt2x00dev, &queue->entries[i]); + } } } diff --git a/drivers/net/wireless/rt2x00/rt2x00queue.h b/drivers/net/wireless/rt2x00/rt2x00queue.h index 8945945c892..a4a8c57004d 100644 --- a/drivers/net/wireless/rt2x00/rt2x00queue.h +++ b/drivers/net/wireless/rt2x00/rt2x00queue.h @@ -199,6 +199,7 @@ struct txdone_entry_desc { * @ENTRY_TXD_RTS_FRAME: This frame is a RTS frame. * @ENTRY_TXD_CTS_FRAME: This frame is a CTS-to-self frame. * @ENTRY_TXD_OFDM_RATE: This frame is send out with an OFDM rate. + * @ENTRY_TXD_GENERATE_SEQ: This frame requires sequence counter. * @ENTRY_TXD_FIRST_FRAGMENT: This is the first frame. * @ENTRY_TXD_MORE_FRAG: This frame is followed by another fragment. * @ENTRY_TXD_REQ_TIMESTAMP: Require timestamp to be inserted. @@ -210,6 +211,7 @@ enum txentry_desc_flags { ENTRY_TXD_RTS_FRAME, ENTRY_TXD_CTS_FRAME, ENTRY_TXD_OFDM_RATE, + ENTRY_TXD_GENERATE_SEQ, ENTRY_TXD_FIRST_FRAGMENT, ENTRY_TXD_MORE_FRAG, ENTRY_TXD_REQ_TIMESTAMP, diff --git a/drivers/net/wireless/rt2x00/rt2x00usb.c b/drivers/net/wireless/rt2x00/rt2x00usb.c index 83862e7f7ae..933e6cc9359 100644 --- a/drivers/net/wireless/rt2x00/rt2x00usb.c +++ b/drivers/net/wireless/rt2x00/rt2x00usb.c @@ -122,6 +122,38 @@ int rt2x00usb_vendor_request_buff(struct rt2x00_dev *rt2x00dev, } EXPORT_SYMBOL_GPL(rt2x00usb_vendor_request_buff); +int rt2x00usb_vendor_request_large_buff(struct rt2x00_dev *rt2x00dev, + const u8 request, const u8 requesttype, + const u16 offset, void *buffer, + const u16 buffer_length, + const int timeout) +{ + int status = 0; + unsigned char *tb; + u16 off, len, bsize; + + mutex_lock(&rt2x00dev->usb_cache_mutex); + + tb = buffer; + off = offset; + len = buffer_length; + while (len && !status) { + bsize = min_t(u16, CSR_CACHE_SIZE, len); + status = rt2x00usb_vendor_req_buff_lock(rt2x00dev, request, + requesttype, off, tb, + bsize, timeout); + + tb += bsize; + len -= bsize; + off += bsize; + } + + mutex_unlock(&rt2x00dev->usb_cache_mutex); + + return status; +} +EXPORT_SYMBOL_GPL(rt2x00usb_vendor_request_large_buff); + /* * TX data handlers. */ diff --git a/drivers/net/wireless/rt2x00/rt2x00usb.h b/drivers/net/wireless/rt2x00/rt2x00usb.h index aad794adf52..ee3875f894a 100644 --- a/drivers/net/wireless/rt2x00/rt2x00usb.h +++ b/drivers/net/wireless/rt2x00/rt2x00usb.h @@ -70,8 +70,7 @@ /* * Cache size */ -#define CSR_CACHE_SIZE 8 -#define CSR_CACHE_SIZE_FIRMWARE 64 +#define CSR_CACHE_SIZE 64 /* * USB request types. @@ -172,6 +171,25 @@ int rt2x00usb_vendor_req_buff_lock(struct rt2x00_dev *rt2x00dev, const u16 buffer_length, const int timeout); /** + * rt2x00usb_vendor_request_large_buff - Send register command to device (buffered) + * @rt2x00dev: Pointer to &struct rt2x00_dev + * @request: USB vendor command (See &enum rt2x00usb_vendor_request) + * @requesttype: Request type &USB_VENDOR_REQUEST_* + * @offset: Register start offset to perform action on + * @buffer: Buffer where information will be read/written to by device + * @buffer_length: Size of &buffer + * @timeout: Operation timeout + * + * This function is used to transfer register data in blocks larger + * then CSR_CACHE_SIZE. Use for firmware upload, keys and beacons. + */ +int rt2x00usb_vendor_request_large_buff(struct rt2x00_dev *rt2x00dev, + const u8 request, const u8 requesttype, + const u16 offset, void *buffer, + const u16 buffer_length, + const int timeout); + +/** * rt2x00usb_vendor_request_sw - Send single register command to device * @rt2x00dev: Pointer to &struct rt2x00_dev * @request: USB vendor command (See &enum rt2x00usb_vendor_request) diff --git a/drivers/net/wireless/rt2x00/rt61pci.c b/drivers/net/wireless/rt2x00/rt61pci.c index f7c1f92c144..fbe2a652e01 100644 --- a/drivers/net/wireless/rt2x00/rt61pci.c +++ b/drivers/net/wireless/rt2x00/rt61pci.c @@ -1544,7 +1544,8 @@ static void rt61pci_write_tx_desc(struct rt2x00_dev *rt2x00dev, rt2x00_set_field32(&word, TXD_W1_CWMIN, txdesc->cw_min); rt2x00_set_field32(&word, TXD_W1_CWMAX, txdesc->cw_max); rt2x00_set_field32(&word, TXD_W1_IV_OFFSET, IEEE80211_HEADER); - rt2x00_set_field32(&word, TXD_W1_HW_SEQUENCE, 1); + rt2x00_set_field32(&word, TXD_W1_HW_SEQUENCE, + test_bit(ENTRY_TXD_GENERATE_SEQ, &txdesc->flags)); rt2x00_set_field32(&word, TXD_W1_BUFFER_COUNT, 1); rt2x00_desc_write(txd, 1, word); @@ -2278,7 +2279,6 @@ static void rt61pci_probe_hw_mode(struct rt2x00_dev *rt2x00dev) * Initialize all hw fields. */ rt2x00dev->hw->flags = - IEEE80211_HW_HOST_GEN_BEACON_TEMPLATE | IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING | IEEE80211_HW_SIGNAL_DBM; rt2x00dev->hw->extra_tx_headroom = 0; diff --git a/drivers/net/wireless/rt2x00/rt73usb.c b/drivers/net/wireless/rt2x00/rt73usb.c index d383735ab8f..9761eaaa08b 100644 --- a/drivers/net/wireless/rt2x00/rt73usb.c +++ b/drivers/net/wireless/rt2x00/rt73usb.c @@ -890,9 +890,6 @@ static int rt73usb_load_firmware(struct rt2x00_dev *rt2x00dev, const void *data, unsigned int i; int status; u32 reg; - const char *ptr = data; - char *cache; - int buflen; /* * Wait for stable hardware. @@ -911,31 +908,12 @@ static int rt73usb_load_firmware(struct rt2x00_dev *rt2x00dev, const void *data, /* * Write firmware to device. - * We setup a seperate cache for this action, - * since we are going to write larger chunks of data - * then normally used cache size. */ - cache = kmalloc(CSR_CACHE_SIZE_FIRMWARE, GFP_KERNEL); - if (!cache) { - ERROR(rt2x00dev, "Failed to allocate firmware cache.\n"); - return -ENOMEM; - } - - for (i = 0; i < len; i += CSR_CACHE_SIZE_FIRMWARE) { - buflen = min_t(int, len - i, CSR_CACHE_SIZE_FIRMWARE); - - memcpy(cache, ptr, buflen); - - rt2x00usb_vendor_request(rt2x00dev, USB_MULTI_WRITE, - USB_VENDOR_REQUEST_OUT, - FIRMWARE_IMAGE_BASE + i, 0, - cache, buflen, - REGISTER_TIMEOUT32(buflen)); - - ptr += buflen; - } - - kfree(cache); + rt2x00usb_vendor_request_large_buff(rt2x00dev, USB_MULTI_WRITE, + USB_VENDOR_REQUEST_OUT, + FIRMWARE_IMAGE_BASE, + data, len, + REGISTER_TIMEOUT32(len)); /* * Send firmware request to device to load firmware, @@ -1303,7 +1281,8 @@ static void rt73usb_write_tx_desc(struct rt2x00_dev *rt2x00dev, rt2x00_set_field32(&word, TXD_W1_CWMIN, txdesc->cw_min); rt2x00_set_field32(&word, TXD_W1_CWMAX, txdesc->cw_max); rt2x00_set_field32(&word, TXD_W1_IV_OFFSET, IEEE80211_HEADER); - rt2x00_set_field32(&word, TXD_W1_HW_SEQUENCE, 1); + rt2x00_set_field32(&word, TXD_W1_HW_SEQUENCE, + test_bit(ENTRY_TXD_GENERATE_SEQ, &txdesc->flags)); rt2x00_desc_write(txd, 1, word); rt2x00_desc_read(txd, 2, &word); @@ -1352,6 +1331,7 @@ static void rt73usb_write_beacon(struct queue_entry *entry) struct skb_frame_desc *skbdesc = get_skb_frame_desc(entry->skb); unsigned int beacon_base; u32 reg; + u32 word, len; /* * Add the descriptor in front of the skb. @@ -1361,6 +1341,17 @@ static void rt73usb_write_beacon(struct queue_entry *entry) skbdesc->desc = entry->skb->data; /* + * Adjust the beacon databyte count. The current number is + * calculated before this function gets called, but falsely + * assumes that the descriptor was already present in the SKB. + */ + rt2x00_desc_read(skbdesc->desc, 0, &word); + len = rt2x00_get_field32(word, TXD_W0_DATABYTE_COUNT); + len += skbdesc->desc_len; + rt2x00_set_field32(&word, TXD_W0_DATABYTE_COUNT, len); + rt2x00_desc_write(skbdesc->desc, 0, word); + + /* * Disable beaconing while we are reloading the beacon data, * otherwise we might be sending out invalid data. */ @@ -1374,10 +1365,10 @@ static void rt73usb_write_beacon(struct queue_entry *entry) * Write entire beacon with descriptor to register. */ beacon_base = HW_BEACON_OFFSET(entry->entry_idx); - rt2x00usb_vendor_request(rt2x00dev, USB_MULTI_WRITE, - USB_VENDOR_REQUEST_OUT, beacon_base, 0, - entry->skb->data, entry->skb->len, - REGISTER_TIMEOUT32(entry->skb->len)); + rt2x00usb_vendor_request_large_buff(rt2x00dev, USB_MULTI_WRITE, + USB_VENDOR_REQUEST_OUT, beacon_base, + entry->skb->data, entry->skb->len, + REGISTER_TIMEOUT32(entry->skb->len)); /* * Clean up the beacon skb. @@ -1871,7 +1862,6 @@ static void rt73usb_probe_hw_mode(struct rt2x00_dev *rt2x00dev) * Initialize all hw fields. */ rt2x00dev->hw->flags = - IEEE80211_HW_HOST_GEN_BEACON_TEMPLATE | IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING | IEEE80211_HW_SIGNAL_DBM; rt2x00dev->hw->extra_tx_headroom = TXD_DESC_SIZE; diff --git a/drivers/net/wireless/rtl8187.h b/drivers/net/wireless/rtl8187.h index 3afb49f8866..1b0d750f662 100644 --- a/drivers/net/wireless/rtl8187.h +++ b/drivers/net/wireless/rtl8187.h @@ -47,11 +47,13 @@ struct rtl8187_rx_hdr { struct rtl8187b_rx_hdr { __le32 flags; __le64 mac_time; - u8 noise; - u8 signal; + u8 sq; + u8 rssi; u8 agc; - u8 reserved; - __le32 unused; + u8 flags2; + __le16 snr_long2end; + s8 pwdb_g12; + u8 fot; } __attribute__((packed)); /* {rtl8187,rtl8187b}_tx_info is in skb */ @@ -100,6 +102,7 @@ struct rtl8187_priv { struct usb_device *udev; u32 rx_conf; u16 txpwr_base; + u16 seqno; u8 asic_rev; u8 is_rtl8187b; enum { diff --git a/drivers/net/wireless/rtl8187_dev.c b/drivers/net/wireless/rtl8187_dev.c index d3067b1216c..177988efd66 100644 --- a/drivers/net/wireless/rtl8187_dev.c +++ b/drivers/net/wireless/rtl8187_dev.c @@ -169,6 +169,7 @@ static int rtl8187_tx(struct ieee80211_hw *dev, struct sk_buff *skb) { struct rtl8187_priv *priv = dev->priv; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_hdr *ieee80211hdr = (struct ieee80211_hdr *)skb->data; unsigned int ep; void *buf; struct urb *urb; @@ -234,6 +235,20 @@ static int rtl8187_tx(struct ieee80211_hw *dev, struct sk_buff *skb) ep = epmap[skb_get_queue_mapping(skb)]; } + /* FIXME: The sequence that follows is needed for this driver to + * work with mac80211 since "mac80211: fix TX sequence numbers". + * As with the temporary code in rt2x00, changes will be needed + * to get proper sequence numbers on beacons. In addition, this + * patch places the sequence number in the hardware state, which + * limits us to a single virtual state. + */ + if (info->flags & IEEE80211_TX_CTL_ASSIGN_SEQ) { + if (info->flags & IEEE80211_TX_CTL_FIRST_FRAGMENT) + priv->seqno += 0x10; + ieee80211hdr->seq_ctrl &= cpu_to_le16(IEEE80211_SCTL_FRAG); + ieee80211hdr->seq_ctrl |= cpu_to_le16(priv->seqno); + } + info->driver_data[0] = dev; info->driver_data[1] = urb; @@ -257,6 +272,7 @@ static void rtl8187_rx_cb(struct urb *urb) struct ieee80211_rx_status rx_status = { 0 }; int rate, signal; u32 flags; + u32 quality; spin_lock(&priv->rx_queue.lock); if (skb->next) @@ -280,44 +296,57 @@ static void rtl8187_rx_cb(struct urb *urb) flags = le32_to_cpu(hdr->flags); signal = hdr->signal & 0x7f; rx_status.antenna = (hdr->signal >> 7) & 1; - rx_status.signal = signal; rx_status.noise = hdr->noise; rx_status.mactime = le64_to_cpu(hdr->mac_time); - priv->signal = signal; priv->quality = signal; + rx_status.qual = priv->quality; priv->noise = hdr->noise; + rate = (flags >> 20) & 0xF; + if (rate > 3) { /* OFDM rate */ + if (signal > 90) + signal = 90; + else if (signal < 25) + signal = 25; + signal = 90 - signal; + } else { /* CCK rate */ + if (signal > 95) + signal = 95; + else if (signal < 30) + signal = 30; + signal = 95 - signal; + } + rx_status.signal = signal; + priv->signal = signal; } else { struct rtl8187b_rx_hdr *hdr = (typeof(hdr))(skb_tail_pointer(skb) - sizeof(*hdr)); + /* The Realtek datasheet for the RTL8187B shows that the RX + * header contains the following quantities: signal quality, + * RSSI, AGC, the received power in dB, and the measured SNR. + * In testing, none of these quantities show qualitative + * agreement with AP signal strength, except for the AGC, + * which is inversely proportional to the strength of the + * signal. In the following, the quality and signal strength + * are derived from the AGC. The arbitrary scaling constants + * are chosen to make the results close to the values obtained + * for a BCM4312 using b43 as the driver. The noise is ignored + * for now. + */ flags = le32_to_cpu(hdr->flags); - signal = hdr->agc >> 1; - rx_status.antenna = (hdr->signal >> 7) & 1; - rx_status.signal = 64 - min(hdr->noise, (u8)64); - rx_status.noise = hdr->noise; + quality = 170 - hdr->agc; + if (quality > 100) + quality = 100; + signal = 14 - hdr->agc / 2; + rx_status.qual = quality; + priv->quality = quality; + rx_status.signal = signal; + priv->signal = signal; + rx_status.antenna = (hdr->rssi >> 7) & 1; rx_status.mactime = le64_to_cpu(hdr->mac_time); - priv->signal = hdr->signal; - priv->quality = hdr->agc >> 1; - priv->noise = hdr->noise; + rate = (flags >> 20) & 0xF; } skb_trim(skb, flags & 0x0FFF); - rate = (flags >> 20) & 0xF; - if (rate > 3) { /* OFDM rate */ - if (signal > 90) - signal = 90; - else if (signal < 25) - signal = 25; - signal = 90 - signal; - } else { /* CCK rate */ - if (signal > 95) - signal = 95; - else if (signal < 30) - signal = 30; - signal = 95 - signal; - } - - rx_status.qual = priv->quality; - rx_status.signal = signal; rx_status.rate_idx = rate; rx_status.freq = dev->conf.channel->center_freq; rx_status.band = dev->conf.channel->band; @@ -1015,9 +1044,7 @@ static int __devinit rtl8187_probe(struct usb_interface *intf, priv->mode = IEEE80211_IF_TYPE_MNTR; dev->flags = IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING | - IEEE80211_HW_RX_INCLUDES_FCS | - IEEE80211_HW_SIGNAL_UNSPEC; - dev->max_signal = 65; + IEEE80211_HW_RX_INCLUDES_FCS; eeprom.data = dev; eeprom.register_read = rtl8187_eeprom_register_read; @@ -1132,10 +1159,16 @@ static int __devinit rtl8187_probe(struct usb_interface *intf, (*channel++).hw_value = txpwr >> 8; } - if (priv->is_rtl8187b) + if (priv->is_rtl8187b) { printk(KERN_WARNING "rtl8187: 8187B chip detected. Support " "is EXPERIMENTAL, and could damage your\n" " hardware, use at your own risk\n"); + dev->flags |= IEEE80211_HW_SIGNAL_DBM; + } else { + dev->flags |= IEEE80211_HW_SIGNAL_UNSPEC; + dev->max_signal = 65; + } + if ((id->driver_info == DEVICE_RTL8187) && priv->is_rtl8187b) printk(KERN_INFO "rtl8187: inconsistency between id with OEM" " info!\n"); diff --git a/drivers/net/wireless/zd1211rw/zd_mac.c b/drivers/net/wireless/zd1211rw/zd_mac.c index fcc532bb6a7..4d7b98b0503 100644 --- a/drivers/net/wireless/zd1211rw/zd_mac.c +++ b/drivers/net/wireless/zd1211rw/zd_mac.c @@ -935,7 +935,6 @@ struct ieee80211_hw *zd_mac_alloc_hw(struct usb_interface *intf) hw->wiphy->bands[IEEE80211_BAND_2GHZ] = &mac->band; hw->flags = IEEE80211_HW_RX_INCLUDES_FCS | - IEEE80211_HW_HOST_GEN_BEACON_TEMPLATE | IEEE80211_HW_SIGNAL_DB; hw->max_signal = 100; diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index 1323a43285d..ad27e9e225a 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -1103,7 +1103,7 @@ static inline void dbg_ctrl(struct controller *ctrl) dbg(" Power Indicator : %3s\n", PWR_LED(ctrl) ? "yes" : "no"); dbg(" Hot-Plug Surprise : %3s\n", HP_SUPR_RM(ctrl) ? "yes" : "no"); dbg(" EMI Present : %3s\n", EMI(ctrl) ? "yes" : "no"); - dbg(" Comamnd Completed : %3s\n", NO_CMD_CMPL(ctrl)? "no" : "yes"); + dbg(" Command Completed : %3s\n", NO_CMD_CMPL(ctrl)? "no" : "yes"); pciehp_readw(ctrl, SLOTSTATUS, ®16); dbg("Slot Status : 0x%04x\n", reg16); pciehp_readw(ctrl, SLOTCTRL, ®16); diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 15af618d36e..18354817173 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -126,7 +126,16 @@ static void msix_flush_writes(unsigned int irq) } } -static void msi_set_mask_bits(unsigned int irq, u32 mask, u32 flag) +/* + * PCI 2.3 does not specify mask bits for each MSI interrupt. Attempting to + * mask all MSI interrupts by clearing the MSI enable bit does not work + * reliably as devices without an INTx disable bit will then generate a + * level IRQ which will never be cleared. + * + * Returns 1 if it succeeded in masking the interrupt and 0 if the device + * doesn't support MSI masking. + */ +static int msi_set_mask_bits(unsigned int irq, u32 mask, u32 flag) { struct msi_desc *entry; @@ -144,8 +153,7 @@ static void msi_set_mask_bits(unsigned int irq, u32 mask, u32 flag) mask_bits |= flag & mask; pci_write_config_dword(entry->dev, pos, mask_bits); } else { - __msi_set_enable(entry->dev, entry->msi_attrib.pos, - !flag); + return 0; } break; case PCI_CAP_ID_MSIX: @@ -161,6 +169,7 @@ static void msi_set_mask_bits(unsigned int irq, u32 mask, u32 flag) break; } entry->msi_attrib.masked = !!flag; + return 1; } void read_msi_msg(unsigned int irq, struct msi_msg *msg) diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index 7764768b6a0..89a2f0fa10f 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -11,6 +11,7 @@ #include <linux/init.h> #include <linux/pci.h> #include <linux/module.h> +#include <linux/pci-aspm.h> #include <acpi/acpi.h> #include <acpi/acnamesp.h> #include <acpi/acresrc.h> @@ -372,6 +373,12 @@ static int __init acpi_pci_init(void) printk(KERN_INFO"ACPI FADT declares the system doesn't support MSI, so disable it\n"); pci_no_msi(); } + + if (acpi_gbl_FADT.boot_flags & BAF_PCIE_ASPM_CONTROL) { + printk(KERN_INFO"ACPI FADT declares the system doesn't support PCIe ASPM, so disable it\n"); + pcie_no_aspm(); + } + ret = register_acpi_bus_type(&acpi_pci_bus); if (ret) return 0; diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index e9c356236d2..0a3d856833f 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -572,6 +572,10 @@ int pci_set_power_state(struct pci_dev *dev, pci_power_t state) if (!ret) pci_update_current_state(dev); } + /* This device is quirked not to be put into D3, so + don't put it in D3 */ + if (state == PCI_D3hot && (dev->dev_flags & PCI_DEV_FLAGS_NO_D3)) + return 0; error = pci_raw_set_power_state(dev, state); @@ -1123,6 +1127,12 @@ int pci_enable_wake(struct pci_dev *dev, pci_power_t state, int enable) } /** + * pci_target_state - find an appropriate low power state for a given PCI dev + * @dev: PCI device + * + * Use underlying platform code to find a supported low power state for @dev. + * If the platform can't manage @dev, return the deepest state from which it + * can generate wake events, based on any available PME info. */ pci_power_t pci_target_state(struct pci_dev *dev) { diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index f82495583e6..9a7c9e1408a 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -55,7 +55,7 @@ struct pcie_link_state { struct endpoint_state endpoints[8]; }; -static int aspm_disabled; +static int aspm_disabled, aspm_force; static DEFINE_MUTEX(aspm_lock); static LIST_HEAD(link_list); @@ -510,6 +510,7 @@ static int pcie_aspm_sanity_check(struct pci_dev *pdev) { struct pci_dev *child_dev; int child_pos; + u32 reg32; /* * Some functions in a slot might not all be PCIE functions, very @@ -519,6 +520,19 @@ static int pcie_aspm_sanity_check(struct pci_dev *pdev) child_pos = pci_find_capability(child_dev, PCI_CAP_ID_EXP); if (!child_pos) return -EINVAL; + + /* + * Disable ASPM for pre-1.1 PCIe device, we follow MS to use + * RBER bit to determine if a function is 1.1 version device + */ + pci_read_config_dword(child_dev, child_pos + PCI_EXP_DEVCAP, + ®32); + if (!(reg32 & PCI_EXP_DEVCAP_RBER && !aspm_force)) { + printk("Pre-1.1 PCIe device detected, " + "disable ASPM for %s. It can be enabled forcedly" + " with 'pcie_aspm=force'\n", pci_name(pdev)); + return -EINVAL; + } } return 0; } @@ -802,11 +816,23 @@ void pcie_aspm_remove_sysfs_dev_files(struct pci_dev *pdev) static int __init pcie_aspm_disable(char *str) { - aspm_disabled = 1; + if (!strcmp(str, "off")) { + aspm_disabled = 1; + printk(KERN_INFO "PCIe ASPM is disabled\n"); + } else if (!strcmp(str, "force")) { + aspm_force = 1; + printk(KERN_INFO "PCIe ASPM is forcedly enabled\n"); + } return 1; } -__setup("pcie_noaspm", pcie_aspm_disable); +__setup("pcie_aspm=", pcie_aspm_disable); + +void pcie_no_aspm(void) +{ + if (!aspm_force) + aspm_disabled = 1; +} #ifdef CONFIG_ACPI #include <acpi/acpi_bus.h> diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index b1724cf31b6..7098dfb0744 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -163,12 +163,9 @@ static inline unsigned int pci_calc_resource_flags(unsigned int flags) return IORESOURCE_MEM; } -/* - * Find the extent of a PCI decode.. - */ -static u32 pci_size(u32 base, u32 maxbase, u32 mask) +static u64 pci_size(u64 base, u64 maxbase, u64 mask) { - u32 size = mask & maxbase; /* Find the significant bits */ + u64 size = mask & maxbase; /* Find the significant bits */ if (!size) return 0; @@ -184,135 +181,142 @@ static u32 pci_size(u32 base, u32 maxbase, u32 mask) return size; } -static u64 pci_size64(u64 base, u64 maxbase, u64 mask) -{ - u64 size = mask & maxbase; /* Find the significant bits */ - if (!size) - return 0; +enum pci_bar_type { + pci_bar_unknown, /* Standard PCI BAR probe */ + pci_bar_io, /* An io port BAR */ + pci_bar_mem32, /* A 32-bit memory BAR */ + pci_bar_mem64, /* A 64-bit memory BAR */ +}; - /* Get the lowest of them to find the decode size, and - from that the extent. */ - size = (size & ~(size-1)) - 1; +static inline enum pci_bar_type decode_bar(struct resource *res, u32 bar) +{ + if ((bar & PCI_BASE_ADDRESS_SPACE) == PCI_BASE_ADDRESS_SPACE_IO) { + res->flags = bar & ~PCI_BASE_ADDRESS_IO_MASK; + return pci_bar_io; + } - /* base == maxbase can be valid only if the BAR has - already been programmed with all 1s. */ - if (base == maxbase && ((base | size) & mask) != mask) - return 0; + res->flags = bar & ~PCI_BASE_ADDRESS_MEM_MASK; - return size; + if (res->flags == PCI_BASE_ADDRESS_MEM_TYPE_64) + return pci_bar_mem64; + return pci_bar_mem32; } -static inline int is_64bit_memory(u32 mask) +/* + * If the type is not unknown, we assume that the lowest bit is 'enable'. + * Returns 1 if the BAR was 64-bit and 0 if it was 32-bit. + */ +static int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type, + struct resource *res, unsigned int pos) { - if ((mask & (PCI_BASE_ADDRESS_SPACE|PCI_BASE_ADDRESS_MEM_TYPE_MASK)) == - (PCI_BASE_ADDRESS_SPACE_MEMORY|PCI_BASE_ADDRESS_MEM_TYPE_64)) - return 1; - return 0; -} + u32 l, sz, mask; -static void pci_read_bases(struct pci_dev *dev, unsigned int howmany, int rom) -{ - unsigned int pos, reg, next; - u32 l, sz; - struct resource *res; + mask = type ? ~PCI_ROM_ADDRESS_ENABLE : ~0; - for(pos=0; pos<howmany; pos = next) { - u64 l64; - u64 sz64; - u32 raw_sz; + res->name = pci_name(dev); - next = pos+1; - res = &dev->resource[pos]; - res->name = pci_name(dev); - reg = PCI_BASE_ADDRESS_0 + (pos << 2); - pci_read_config_dword(dev, reg, &l); - pci_write_config_dword(dev, reg, ~0); - pci_read_config_dword(dev, reg, &sz); - pci_write_config_dword(dev, reg, l); - if (!sz || sz == 0xffffffff) - continue; - if (l == 0xffffffff) - l = 0; - raw_sz = sz; - if ((l & PCI_BASE_ADDRESS_SPACE) == - PCI_BASE_ADDRESS_SPACE_MEMORY) { - sz = pci_size(l, sz, (u32)PCI_BASE_ADDRESS_MEM_MASK); - /* - * For 64bit prefetchable memory sz could be 0, if the - * real size is bigger than 4G, so we need to check - * szhi for that. - */ - if (!is_64bit_memory(l) && !sz) - continue; - res->start = l & PCI_BASE_ADDRESS_MEM_MASK; - res->flags |= l & ~PCI_BASE_ADDRESS_MEM_MASK; + pci_read_config_dword(dev, pos, &l); + pci_write_config_dword(dev, pos, mask); + pci_read_config_dword(dev, pos, &sz); + pci_write_config_dword(dev, pos, l); + + /* + * All bits set in sz means the device isn't working properly. + * If the BAR isn't implemented, all bits must be 0. If it's a + * memory BAR or a ROM, bit 0 must be clear; if it's an io BAR, bit + * 1 must be clear. + */ + if (!sz || sz == 0xffffffff) + goto fail; + + /* + * I don't know how l can have all bits set. Copied from old code. + * Maybe it fixes a bug on some ancient platform. + */ + if (l == 0xffffffff) + l = 0; + + if (type == pci_bar_unknown) { + type = decode_bar(res, l); + res->flags |= pci_calc_resource_flags(l) | IORESOURCE_SIZEALIGN; + if (type == pci_bar_io) { + l &= PCI_BASE_ADDRESS_IO_MASK; + mask = PCI_BASE_ADDRESS_IO_MASK & 0xffff; } else { - sz = pci_size(l, sz, PCI_BASE_ADDRESS_IO_MASK & 0xffff); - if (!sz) - continue; - res->start = l & PCI_BASE_ADDRESS_IO_MASK; - res->flags |= l & ~PCI_BASE_ADDRESS_IO_MASK; + l &= PCI_BASE_ADDRESS_MEM_MASK; + mask = (u32)PCI_BASE_ADDRESS_MEM_MASK; } - res->end = res->start + (unsigned long) sz; - res->flags |= pci_calc_resource_flags(l) | IORESOURCE_SIZEALIGN; - if (is_64bit_memory(l)) { - u32 szhi, lhi; - - pci_read_config_dword(dev, reg+4, &lhi); - pci_write_config_dword(dev, reg+4, ~0); - pci_read_config_dword(dev, reg+4, &szhi); - pci_write_config_dword(dev, reg+4, lhi); - sz64 = ((u64)szhi << 32) | raw_sz; - l64 = ((u64)lhi << 32) | l; - sz64 = pci_size64(l64, sz64, PCI_BASE_ADDRESS_MEM_MASK); - next++; -#if BITS_PER_LONG == 64 - if (!sz64) { - res->start = 0; - res->end = 0; - res->flags = 0; - continue; - } - res->start = l64 & PCI_BASE_ADDRESS_MEM_MASK; - res->end = res->start + sz64; -#else - if (sz64 > 0x100000000ULL) { - dev_err(&dev->dev, "BAR %d: can't handle 64-bit" - " BAR\n", pos); - res->start = 0; - res->flags = 0; - } else if (lhi) { - /* 64-bit wide address, treat as disabled */ - pci_write_config_dword(dev, reg, - l & ~(u32)PCI_BASE_ADDRESS_MEM_MASK); - pci_write_config_dword(dev, reg+4, 0); - res->start = 0; - res->end = sz; - } -#endif + } else { + res->flags |= (l & IORESOURCE_ROM_ENABLE); + l &= PCI_ROM_ADDRESS_MASK; + mask = (u32)PCI_ROM_ADDRESS_MASK; + } + + if (type == pci_bar_mem64) { + u64 l64 = l; + u64 sz64 = sz; + u64 mask64 = mask | (u64)~0 << 32; + + pci_read_config_dword(dev, pos + 4, &l); + pci_write_config_dword(dev, pos + 4, ~0); + pci_read_config_dword(dev, pos + 4, &sz); + pci_write_config_dword(dev, pos + 4, l); + + l64 |= ((u64)l << 32); + sz64 |= ((u64)sz << 32); + + sz64 = pci_size(l64, sz64, mask64); + + if (!sz64) + goto fail; + + if ((sizeof(resource_size_t) < 8) && (sz64 > 0x100000000ULL)) { + dev_err(&dev->dev, "can't handle 64-bit BAR\n"); + goto fail; + } else if ((sizeof(resource_size_t) < 8) && l) { + /* Address above 32-bit boundary; disable the BAR */ + pci_write_config_dword(dev, pos, 0); + pci_write_config_dword(dev, pos + 4, 0); + res->start = 0; + res->end = sz64; + } else { + res->start = l64; + res->end = l64 + sz64; } + } else { + sz = pci_size(l, sz, mask); + + if (!sz) + goto fail; + + res->start = l; + res->end = l + sz; } + + out: + return (type == pci_bar_mem64) ? 1 : 0; + fail: + res->flags = 0; + goto out; +} + +static void pci_read_bases(struct pci_dev *dev, unsigned int howmany, int rom) +{ + unsigned int pos, reg; + + for (pos = 0; pos < howmany; pos++) { + struct resource *res = &dev->resource[pos]; + reg = PCI_BASE_ADDRESS_0 + (pos << 2); + pos += __pci_read_base(dev, pci_bar_unknown, res, reg); + } + if (rom) { + struct resource *res = &dev->resource[PCI_ROM_RESOURCE]; dev->rom_base_reg = rom; - res = &dev->resource[PCI_ROM_RESOURCE]; - res->name = pci_name(dev); - pci_read_config_dword(dev, rom, &l); - pci_write_config_dword(dev, rom, ~PCI_ROM_ADDRESS_ENABLE); - pci_read_config_dword(dev, rom, &sz); - pci_write_config_dword(dev, rom, l); - if (l == 0xffffffff) - l = 0; - if (sz && sz != 0xffffffff) { - sz = pci_size(l, sz, (u32)PCI_ROM_ADDRESS_MASK); - if (sz) { - res->flags = (l & IORESOURCE_ROM_ENABLE) | - IORESOURCE_MEM | IORESOURCE_PREFETCH | - IORESOURCE_READONLY | IORESOURCE_CACHEABLE | - IORESOURCE_SIZEALIGN; - res->start = l & PCI_ROM_ADDRESS_MASK; - res->end = res->start + (unsigned long) sz; - } - } + res->flags = IORESOURCE_MEM | IORESOURCE_PREFETCH | + IORESOURCE_READONLY | IORESOURCE_CACHEABLE | + IORESOURCE_SIZEALIGN; + __pci_read_base(dev, pci_bar_mem32, res, rom); } } @@ -1053,7 +1057,8 @@ int pci_scan_slot(struct pci_bus *bus, int devfn) } } - if (bus->self) + /* only one slot has pcie device */ + if (bus->self && nr) pcie_aspm_init_link_state(bus->self); return nr; diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 12d489395fa..0fb36507428 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -923,6 +923,19 @@ static void __init quirk_ide_samemode(struct pci_dev *pdev) } DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_10, quirk_ide_samemode); +/* + * Some ATA devices break if put into D3 + */ + +static void __devinit quirk_no_ata_d3(struct pci_dev *pdev) +{ + /* Quirk the legacy ATA devices only. The AHCI ones are ok */ + if ((pdev->class >> 8) == PCI_CLASS_STORAGE_IDE) + pdev->dev_flags |= PCI_DEV_FLAGS_NO_D3; +} +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, PCI_ANY_ID, quirk_no_ata_d3); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_ATI, PCI_ANY_ID, quirk_no_ata_d3); + /* This was originally an Alpha specific thing, but it really fits here. * The i82375 PCI/EISA bridge appears as non-classified. Fix that. */ diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c index d397fa5f3a9..7af60b98d8a 100644 --- a/drivers/rtc/interface.c +++ b/drivers/rtc/interface.c @@ -20,7 +20,7 @@ int rtc_read_time(struct rtc_device *rtc, struct rtc_time *tm) err = mutex_lock_interruptible(&rtc->ops_lock); if (err) - return -EBUSY; + return err; if (!rtc->ops) err = -ENODEV; @@ -46,7 +46,7 @@ int rtc_set_time(struct rtc_device *rtc, struct rtc_time *tm) err = mutex_lock_interruptible(&rtc->ops_lock); if (err) - return -EBUSY; + return err; if (!rtc->ops) err = -ENODEV; @@ -66,7 +66,7 @@ int rtc_set_mmss(struct rtc_device *rtc, unsigned long secs) err = mutex_lock_interruptible(&rtc->ops_lock); if (err) - return -EBUSY; + return err; if (!rtc->ops) err = -ENODEV; @@ -106,7 +106,7 @@ static int rtc_read_alarm_internal(struct rtc_device *rtc, struct rtc_wkalrm *al err = mutex_lock_interruptible(&rtc->ops_lock); if (err) - return -EBUSY; + return err; if (rtc->ops == NULL) err = -ENODEV; @@ -293,7 +293,7 @@ int rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm) err = mutex_lock_interruptible(&rtc->ops_lock); if (err) - return -EBUSY; + return err; if (!rtc->ops) err = -ENODEV; diff --git a/drivers/rtc/rtc-dev.c b/drivers/rtc/rtc-dev.c index 0a870b7e5c3..856cc1af40d 100644 --- a/drivers/rtc/rtc-dev.c +++ b/drivers/rtc/rtc-dev.c @@ -221,7 +221,7 @@ static long rtc_dev_ioctl(struct file *file, err = mutex_lock_interruptible(&rtc->ops_lock); if (err) - return -EBUSY; + return err; /* check that the calling task has appropriate permissions * for certain ioctls. doing this check here is useful @@ -432,6 +432,8 @@ static int rtc_dev_release(struct inode *inode, struct file *file) #ifdef CONFIG_RTC_INTF_DEV_UIE_EMUL clear_uie(rtc); #endif + rtc_irq_set_state(rtc, NULL, 0); + if (rtc->ops->release) rtc->ops->release(rtc->dev.parent); diff --git a/drivers/s390/block/dasd_alias.c b/drivers/s390/block/dasd_alias.c index 2d8df0b3053..20676cdef4a 100644 --- a/drivers/s390/block/dasd_alias.c +++ b/drivers/s390/block/dasd_alias.c @@ -91,7 +91,8 @@ static struct alias_pav_group *_find_group(struct alias_lcu *lcu, else search_unit_addr = uid->base_unit_addr; list_for_each_entry(pos, &lcu->grouplist, group) { - if (pos->uid.base_unit_addr == search_unit_addr) + if (pos->uid.base_unit_addr == search_unit_addr && + !strncmp(pos->uid.vduit, uid->vduit, sizeof(uid->vduit))) return pos; }; return NULL; @@ -332,6 +333,7 @@ static int _add_device_to_lcu(struct alias_lcu *lcu, group->uid.base_unit_addr = uid->real_unit_addr; else group->uid.base_unit_addr = uid->base_unit_addr; + memcpy(group->uid.vduit, uid->vduit, sizeof(uid->vduit)); INIT_LIST_HEAD(&group->group); INIT_LIST_HEAD(&group->baselist); INIT_LIST_HEAD(&group->aliaslist); diff --git a/drivers/s390/block/dasd_devmap.c b/drivers/s390/block/dasd_devmap.c index d774e79476f..cd3335c1c30 100644 --- a/drivers/s390/block/dasd_devmap.c +++ b/drivers/s390/block/dasd_devmap.c @@ -913,7 +913,8 @@ dasd_vendor_show(struct device *dev, struct device_attribute *attr, char *buf) static DEVICE_ATTR(vendor, 0444, dasd_vendor_show, NULL); #define UID_STRLEN ( /* vendor */ 3 + 1 + /* serial */ 14 + 1 +\ - /* SSID */ 4 + 1 + /* unit addr */ 2 + 1) + /* SSID */ 4 + 1 + /* unit addr */ 2 + 1 +\ + /* vduit */ 32 + 1) static ssize_t dasd_uid_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -945,8 +946,17 @@ dasd_uid_show(struct device *dev, struct device_attribute *attr, char *buf) sprintf(ua_string, "%02x", uid->real_unit_addr); break; } - snprintf(uid_string, sizeof(uid_string), "%s.%s.%04x.%s", - uid->vendor, uid->serial, uid->ssid, ua_string); + if (strlen(uid->vduit) > 0) + snprintf(uid_string, sizeof(uid_string), + "%s.%s.%04x.%s.%s", + uid->vendor, uid->serial, + uid->ssid, ua_string, + uid->vduit); + else + snprintf(uid_string, sizeof(uid_string), + "%s.%s.%04x.%s", + uid->vendor, uid->serial, + uid->ssid, ua_string); spin_unlock(&dasd_devmap_lock); return snprintf(buf, PAGE_SIZE, "%s\n", uid_string); } diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 3590fdb5b2f..773b3fe275b 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -313,8 +313,8 @@ static int prefix(struct ccw1 *ccw, struct PFX_eckd_data *pfxdata, int trk, memset(pfxdata, 0, sizeof(*pfxdata)); /* prefix data */ pfxdata->format = 0; - pfxdata->base_address = basepriv->conf_data.ned1.unit_addr; - pfxdata->base_lss = basepriv->conf_data.ned1.ID; + pfxdata->base_address = basepriv->ned->unit_addr; + pfxdata->base_lss = basepriv->ned->ID; pfxdata->validity.define_extend = 1; /* private uid is kept up to date, conf_data may be outdated */ @@ -536,36 +536,40 @@ dasd_eckd_cdl_reclen(int recid) /* * Generate device unique id that specifies the physical device. */ -static int -dasd_eckd_generate_uid(struct dasd_device *device, struct dasd_uid *uid) +static int dasd_eckd_generate_uid(struct dasd_device *device, + struct dasd_uid *uid) { struct dasd_eckd_private *private; - struct dasd_eckd_confdata *confdata; + int count; private = (struct dasd_eckd_private *) device->private; if (!private) return -ENODEV; - confdata = &private->conf_data; - if (!confdata) + if (!private->ned || !private->gneq) return -ENODEV; memset(uid, 0, sizeof(struct dasd_uid)); - memcpy(uid->vendor, confdata->ned1.HDA_manufacturer, + memcpy(uid->vendor, private->ned->HDA_manufacturer, sizeof(uid->vendor) - 1); EBCASC(uid->vendor, sizeof(uid->vendor) - 1); - memcpy(uid->serial, confdata->ned1.HDA_location, + memcpy(uid->serial, private->ned->HDA_location, sizeof(uid->serial) - 1); EBCASC(uid->serial, sizeof(uid->serial) - 1); - uid->ssid = confdata->neq.subsystemID; - uid->real_unit_addr = confdata->ned1.unit_addr; - if (confdata->ned2.sneq.flags == 0x40 && - confdata->ned2.sneq.format == 0x0001) { - uid->type = confdata->ned2.sneq.sua_flags; + uid->ssid = private->gneq->subsystemID; + uid->real_unit_addr = private->ned->unit_addr;; + if (private->sneq) { + uid->type = private->sneq->sua_flags; if (uid->type == UA_BASE_PAV_ALIAS) - uid->base_unit_addr = confdata->ned2.sneq.base_unit_addr; + uid->base_unit_addr = private->sneq->base_unit_addr; } else { uid->type = UA_BASE_DEVICE; } + if (private->vdsneq) { + for (count = 0; count < 16; count++) { + sprintf(uid->vduit+2*count, "%02x", + private->vdsneq->uit[count]); + } + } return 0; } @@ -623,6 +627,15 @@ static int dasd_eckd_read_conf_lpm(struct dasd_device *device, ret = -ENOMEM; goto out_error; } + + /* + * buffer has to start with EBCDIC "V1.0" to show + * support for virtual device SNEQ + */ + rcd_buf[0] = 0xE5; + rcd_buf[1] = 0xF1; + rcd_buf[2] = 0x4B; + rcd_buf[3] = 0xF0; cqr = dasd_eckd_build_rcd_lpm(device, rcd_buf, ciw, lpm); if (IS_ERR(cqr)) { ret = PTR_ERR(cqr); @@ -646,8 +659,62 @@ out_error: return ret; } -static int -dasd_eckd_read_conf(struct dasd_device *device) +static int dasd_eckd_identify_conf_parts(struct dasd_eckd_private *private) +{ + + struct dasd_sneq *sneq; + int i, count; + + private->ned = NULL; + private->sneq = NULL; + private->vdsneq = NULL; + private->gneq = NULL; + count = private->conf_len / sizeof(struct dasd_sneq); + sneq = (struct dasd_sneq *)private->conf_data; + for (i = 0; i < count; ++i) { + if (sneq->flags.identifier == 1 && sneq->format == 1) + private->sneq = sneq; + else if (sneq->flags.identifier == 1 && sneq->format == 4) + private->vdsneq = (struct vd_sneq *)sneq; + else if (sneq->flags.identifier == 2) + private->gneq = (struct dasd_gneq *)sneq; + else if (sneq->flags.identifier == 3 && sneq->res1 == 1) + private->ned = (struct dasd_ned *)sneq; + sneq++; + } + if (!private->ned || !private->gneq) { + private->ned = NULL; + private->sneq = NULL; + private->vdsneq = NULL; + private->gneq = NULL; + return -EINVAL; + } + return 0; + +}; + +static unsigned char dasd_eckd_path_access(void *conf_data, int conf_len) +{ + struct dasd_gneq *gneq; + int i, count, found; + + count = conf_len / sizeof(*gneq); + gneq = (struct dasd_gneq *)conf_data; + found = 0; + for (i = 0; i < count; ++i) { + if (gneq->flags.identifier == 2) { + found = 1; + break; + } + gneq++; + } + if (found) + return ((char *)gneq)[18] & 0x07; + else + return 0; +} + +static int dasd_eckd_read_conf(struct dasd_device *device) { void *conf_data; int conf_len, conf_data_saved; @@ -661,7 +728,6 @@ dasd_eckd_read_conf(struct dasd_device *device) path_data->opm = ccw_device_get_path_mask(device->cdev); lpm = 0x80; conf_data_saved = 0; - /* get configuration data per operational path */ for (lpm = 0x80; lpm; lpm>>= 1) { if (lpm & path_data->opm){ @@ -678,22 +744,20 @@ dasd_eckd_read_conf(struct dasd_device *device) "data retrieved"); continue; /* no error */ } - if (conf_len != sizeof(struct dasd_eckd_confdata)) { - MESSAGE(KERN_WARNING, - "sizes of configuration data mismatch" - "%d (read) vs %ld (expected)", - conf_len, - sizeof(struct dasd_eckd_confdata)); - kfree(conf_data); - continue; /* no error */ - } /* save first valid configuration data */ - if (!conf_data_saved){ - memcpy(&private->conf_data, conf_data, - sizeof(struct dasd_eckd_confdata)); + if (!conf_data_saved) { + kfree(private->conf_data); + private->conf_data = conf_data; + private->conf_len = conf_len; + if (dasd_eckd_identify_conf_parts(private)) { + private->conf_data = NULL; + private->conf_len = 0; + kfree(conf_data); + continue; + } conf_data_saved++; } - switch (((char *)conf_data)[242] & 0x07){ + switch (dasd_eckd_path_access(conf_data, conf_len)) { case 0x02: path_data->npm |= lpm; break; @@ -701,7 +765,8 @@ dasd_eckd_read_conf(struct dasd_device *device) path_data->ppm |= lpm; break; } - kfree(conf_data); + if (conf_data != private->conf_data) + kfree(conf_data); } } return 0; @@ -952,6 +1017,7 @@ out_err2: dasd_free_block(device->block); device->block = NULL; out_err1: + kfree(private->conf_data); kfree(device->private); device->private = NULL; return rc; @@ -959,7 +1025,17 @@ out_err1: static void dasd_eckd_uncheck_device(struct dasd_device *device) { + struct dasd_eckd_private *private; + + private = (struct dasd_eckd_private *) device->private; dasd_alias_disconnect_device_from_lcu(device); + private->ned = NULL; + private->sneq = NULL; + private->vdsneq = NULL; + private->gneq = NULL; + private->conf_len = 0; + kfree(private->conf_data); + private->conf_data = NULL; } static struct dasd_ccw_req * @@ -1746,9 +1822,10 @@ dasd_eckd_fill_info(struct dasd_device * device, info->characteristics_size = sizeof(struct dasd_eckd_characteristics); memcpy(info->characteristics, &private->rdc_data, sizeof(struct dasd_eckd_characteristics)); - info->confdata_size = sizeof(struct dasd_eckd_confdata); - memcpy(info->configuration_data, &private->conf_data, - sizeof(struct dasd_eckd_confdata)); + info->confdata_size = min((unsigned long)private->conf_len, + sizeof(info->configuration_data)); + memcpy(info->configuration_data, private->conf_data, + info->confdata_size); return 0; } diff --git a/drivers/s390/block/dasd_eckd.h b/drivers/s390/block/dasd_eckd.h index fc2509c939b..4bf0aa5112c 100644 --- a/drivers/s390/block/dasd_eckd.h +++ b/drivers/s390/block/dasd_eckd.h @@ -231,133 +231,62 @@ struct dasd_eckd_characteristics { __u8 reserved3[10]; } __attribute__ ((packed)); -struct dasd_eckd_confdata { +/* elements of the configuration data */ +struct dasd_ned { struct { - struct { - unsigned char identifier:2; - unsigned char token_id:1; - unsigned char sno_valid:1; - unsigned char subst_sno:1; - unsigned char recNED:1; - unsigned char emuNED:1; - unsigned char reserved:1; - } __attribute__ ((packed)) flags; - __u8 descriptor; - __u8 dev_class; - __u8 reserved; - unsigned char dev_type[6]; - unsigned char dev_model[3]; - unsigned char HDA_manufacturer[3]; - unsigned char HDA_location[2]; - unsigned char HDA_seqno[12]; - __u8 ID; - __u8 unit_addr; - } __attribute__ ((packed)) ned1; - union { - struct { - struct { - unsigned char identifier:2; - unsigned char token_id:1; - unsigned char sno_valid:1; - unsigned char subst_sno:1; - unsigned char recNED:1; - unsigned char emuNED:1; - unsigned char reserved:1; - } __attribute__ ((packed)) flags; - __u8 descriptor; - __u8 reserved[2]; - unsigned char dev_type[6]; - unsigned char dev_model[3]; - unsigned char DASD_manufacturer[3]; - unsigned char DASD_location[2]; - unsigned char DASD_seqno[12]; - __u16 ID; - } __attribute__ ((packed)) ned; - struct { - unsigned char flags; /* byte 0 */ - unsigned char res1; /* byte 1 */ - __u16 format; /* byte 2-3 */ - unsigned char res2[4]; /* byte 4-7 */ - unsigned char sua_flags; /* byte 8 */ - __u8 base_unit_addr; /* byte 9 */ - unsigned char res3[22]; /* byte 10-31 */ - } __attribute__ ((packed)) sneq; - } __attribute__ ((packed)) ned2; + __u8 identifier:2; + __u8 token_id:1; + __u8 sno_valid:1; + __u8 subst_sno:1; + __u8 recNED:1; + __u8 emuNED:1; + __u8 reserved:1; + } __attribute__ ((packed)) flags; + __u8 descriptor; + __u8 dev_class; + __u8 reserved; + __u8 dev_type[6]; + __u8 dev_model[3]; + __u8 HDA_manufacturer[3]; + __u8 HDA_location[2]; + __u8 HDA_seqno[12]; + __u8 ID; + __u8 unit_addr; +} __attribute__ ((packed)); + +struct dasd_sneq { struct { - struct { - unsigned char identifier:2; - unsigned char token_id:1; - unsigned char sno_valid:1; - unsigned char subst_sno:1; - unsigned char recNED:1; - unsigned char emuNED:1; - unsigned char reserved:1; - } __attribute__ ((packed)) flags; - __u8 descriptor; - __u8 reserved[2]; - unsigned char cont_type[6]; - unsigned char cont_model[3]; - unsigned char cont_manufacturer[3]; - unsigned char cont_location[2]; - unsigned char cont_seqno[12]; - __u16 ID; - } __attribute__ ((packed)) ned3; + __u8 identifier:2; + __u8 reserved:6; + } __attribute__ ((packed)) flags; + __u8 res1; + __u16 format; + __u8 res2[4]; /* byte 4- 7 */ + __u8 sua_flags; /* byte 8 */ + __u8 base_unit_addr; /* byte 9 */ + __u8 res3[22]; /* byte 10-31 */ +} __attribute__ ((packed)); + +struct vd_sneq { struct { - struct { - unsigned char identifier:2; - unsigned char token_id:1; - unsigned char sno_valid:1; - unsigned char subst_sno:1; - unsigned char recNED:1; - unsigned char emuNED:1; - unsigned char reserved:1; - } __attribute__ ((packed)) flags; - __u8 descriptor; - __u8 reserved[2]; - unsigned char cont_type[6]; - unsigned char empty[3]; - unsigned char cont_manufacturer[3]; - unsigned char cont_location[2]; - unsigned char cont_seqno[12]; - __u16 ID; - } __attribute__ ((packed)) ned4; - unsigned char ned5[32]; - unsigned char ned6[32]; - unsigned char ned7[32]; + __u8 identifier:2; + __u8 reserved:6; + } __attribute__ ((packed)) flags; + __u8 res1; + __u16 format; + __u8 res2[4]; /* byte 4- 7 */ + __u8 uit[16]; /* byte 8-23 */ + __u8 res3[8]; /* byte 24-31 */ +} __attribute__ ((packed)); + +struct dasd_gneq { struct { - struct { - unsigned char identifier:2; - unsigned char reserved:6; - } __attribute__ ((packed)) flags; - __u8 selector; - __u16 interfaceID; - __u32 reserved; - __u16 subsystemID; - struct { - unsigned char sp0:1; - unsigned char sp1:1; - unsigned char reserved:5; - unsigned char scluster:1; - } __attribute__ ((packed)) spathID; - __u8 unit_address; - __u8 dev_ID; - __u8 dev_address; - __u8 adapterID; - __u16 link_address; - struct { - unsigned char parallel:1; - unsigned char escon:1; - unsigned char reserved:1; - unsigned char ficon:1; - unsigned char reserved2:4; - } __attribute__ ((packed)) protocol_type; - struct { - unsigned char PID_in_236:1; - unsigned char reserved:7; - } __attribute__ ((packed)) format_flags; - __u8 log_dev_address; - unsigned char reserved2[12]; - } __attribute__ ((packed)) neq; + __u8 identifier:2; + __u8 reserved:6; + } __attribute__ ((packed)) flags; + __u8 reserved[7]; + __u16 subsystemID; + __u8 reserved2[22]; } __attribute__ ((packed)); struct dasd_eckd_path { @@ -463,7 +392,14 @@ struct alias_pav_group { struct dasd_eckd_private { struct dasd_eckd_characteristics rdc_data; - struct dasd_eckd_confdata conf_data; + u8 *conf_data; + int conf_len; + /* pointers to specific parts in the conf_data */ + struct dasd_ned *ned; + struct dasd_sneq *sneq; + struct vd_sneq *vdsneq; + struct dasd_gneq *gneq; + struct dasd_eckd_path path_data; struct eckd_count count_area[5]; int init_cqr_status; diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h index fb2f931cf84..31ecaa4a40e 100644 --- a/drivers/s390/block/dasd_int.h +++ b/drivers/s390/block/dasd_int.h @@ -307,6 +307,7 @@ struct dasd_uid { __u16 ssid; __u8 real_unit_addr; __u8 base_unit_addr; + char vduit[33]; }; /* diff --git a/drivers/s390/char/sclp.c b/drivers/s390/char/sclp.c index 3c8b25e6c34..1fd8f2193ed 100644 --- a/drivers/s390/char/sclp.c +++ b/drivers/s390/char/sclp.c @@ -399,6 +399,7 @@ sclp_tod_from_jiffies(unsigned long jiffies) void sclp_sync_wait(void) { + unsigned long long old_tick; unsigned long flags; unsigned long cr0, cr0_sync; u64 timeout; @@ -419,11 +420,12 @@ sclp_sync_wait(void) if (!irq_context) local_bh_disable(); /* Enable service-signal interruption, disable timer interrupts */ + old_tick = local_tick_disable(); trace_hardirqs_on(); __ctl_store(cr0, 0, 0); cr0_sync = cr0; + cr0_sync &= 0xffff00a0; cr0_sync |= 0x00000200; - cr0_sync &= 0xFFFFF3AC; __ctl_load(cr0_sync, 0, 0); __raw_local_irq_stosm(0x01); /* Loop until driver state indicates finished request */ @@ -439,9 +441,9 @@ sclp_sync_wait(void) __ctl_load(cr0, 0, 0); if (!irq_context) _local_bh_enable(); + local_tick_enable(old_tick); local_irq_restore(flags); } - EXPORT_SYMBOL(sclp_sync_wait); /* Dispatch changes in send and receive mask to registered listeners. */ diff --git a/drivers/s390/char/sclp_cmd.c b/drivers/s390/char/sclp_cmd.c index 0c2b77493db..eb5f1b8bc57 100644 --- a/drivers/s390/char/sclp_cmd.c +++ b/drivers/s390/char/sclp_cmd.c @@ -427,6 +427,8 @@ static int sclp_mem_notifier(struct notifier_block *nb, sclp_attach_storage(id); switch (action) { case MEM_ONLINE: + case MEM_GOING_OFFLINE: + case MEM_CANCEL_OFFLINE: break; case MEM_GOING_ONLINE: rc = sclp_mem_change_state(start, size, 1); @@ -434,6 +436,9 @@ static int sclp_mem_notifier(struct notifier_block *nb, case MEM_CANCEL_ONLINE: sclp_mem_change_state(start, size, 0); break; + case MEM_OFFLINE: + sclp_mem_change_state(start, size, 0); + break; default: rc = -EINVAL; break; diff --git a/drivers/s390/char/sclp_config.c b/drivers/s390/char/sclp_config.c index fff4ff485d9..4cebd6ee6d2 100644 --- a/drivers/s390/char/sclp_config.c +++ b/drivers/s390/char/sclp_config.c @@ -8,7 +8,6 @@ #include <linux/init.h> #include <linux/errno.h> #include <linux/cpu.h> -#include <linux/kthread.h> #include <linux/sysdev.h> #include <linux/workqueue.h> #include <asm/smp.h> @@ -41,19 +40,9 @@ static void sclp_cpu_capability_notify(struct work_struct *work) put_online_cpus(); } -static int sclp_cpu_kthread(void *data) -{ - smp_rescan_cpus(); - return 0; -} - static void __ref sclp_cpu_change_notify(struct work_struct *work) { - /* Can't call smp_rescan_cpus() from workqueue context since it may - * deadlock in case of cpu hotplug. So we have to create a kernel - * thread in order to call it. - */ - kthread_run(sclp_cpu_kthread, NULL, "cpu_rescan"); + smp_rescan_cpus(); } static void sclp_conf_receiver_fn(struct evbuf_header *evbuf) diff --git a/drivers/s390/cio/idset.c b/drivers/s390/cio/idset.c index ef7bc0a125e..cf8f24a4b5e 100644 --- a/drivers/s390/cio/idset.c +++ b/drivers/s390/cio/idset.c @@ -5,7 +5,7 @@ * Author(s): Peter Oberparleiter <peter.oberparleiter@de.ibm.com> */ -#include <linux/slab.h> +#include <linux/vmalloc.h> #include <linux/bitops.h> #include "idset.h" #include "css.h" @@ -25,18 +25,18 @@ static struct idset *idset_new(int num_ssid, int num_id) { struct idset *set; - set = kzalloc(sizeof(struct idset) + bitmap_size(num_ssid, num_id), - GFP_KERNEL); + set = vmalloc(sizeof(struct idset) + bitmap_size(num_ssid, num_id)); if (set) { set->num_ssid = num_ssid; set->num_id = num_id; + memset(set->bitmap, 0, bitmap_size(num_ssid, num_id)); } return set; } void idset_free(struct idset *set) { - kfree(set); + vfree(set); } void idset_clear(struct idset *set) diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index d10c73cc168..d15648514a0 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -1355,7 +1355,7 @@ int qdio_allocate(struct qdio_initialize *init_data) goto out_rel; /* qdr is used in ccw1.cda which is u32 */ - irq_ptr->qdr = kzalloc(sizeof(struct qdr), GFP_KERNEL | GFP_DMA); + irq_ptr->qdr = (struct qdr *) get_zeroed_page(GFP_KERNEL | GFP_DMA); if (!irq_ptr->qdr) goto out_rel; WARN_ON((unsigned long)irq_ptr->qdr & 0xfff); diff --git a/drivers/s390/cio/qdio_perf.c b/drivers/s390/cio/qdio_perf.c index ea01b85b1cc..ec5c4a41423 100644 --- a/drivers/s390/cio/qdio_perf.c +++ b/drivers/s390/cio/qdio_perf.c @@ -142,7 +142,7 @@ int __init qdio_setup_perf_stats(void) return 0; } -void __exit qdio_remove_perf_stats(void) +void qdio_remove_perf_stats(void) { #ifdef CONFIG_PROC_FS remove_proc_entry("qdio_perf", NULL); diff --git a/drivers/s390/cio/qdio_setup.c b/drivers/s390/cio/qdio_setup.c index f0923a8aced..1bd2a208db2 100644 --- a/drivers/s390/cio/qdio_setup.c +++ b/drivers/s390/cio/qdio_setup.c @@ -325,7 +325,7 @@ void qdio_release_memory(struct qdio_irq *irq_ptr) kmem_cache_free(qdio_q_cache, q); } } - kfree(irq_ptr->qdr); + free_page((unsigned long) irq_ptr->qdr); free_page(irq_ptr->chsc_page); free_page((unsigned long) irq_ptr); } @@ -515,7 +515,7 @@ int __init qdio_setup_init(void) return 0; } -void __exit qdio_setup_exit(void) +void qdio_setup_exit(void) { kmem_cache_destroy(qdio_q_cache); } diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c index 79954bd6bfa..292b60da6dc 100644 --- a/drivers/s390/kvm/kvm_virtio.c +++ b/drivers/s390/kvm/kvm_virtio.c @@ -352,7 +352,7 @@ static __init int early_put_chars(u32 vtermno, const char *buf, int count) return len; } -void s390_virtio_console_init(void) +void __init s390_virtio_console_init(void) { virtio_cons_early_init(early_put_chars); } diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index 1895dbb553c..80971c21ea1 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -419,6 +419,7 @@ struct qeth_qdio_out_buffer { int next_element_to_fill; struct sk_buff_head skb_list; struct list_head ctx_list; + int is_header[16]; }; struct qeth_card; @@ -785,7 +786,7 @@ void qeth_core_remove_osn_attributes(struct device *); /* exports for qeth discipline device drivers */ extern struct qeth_card_list_struct qeth_core_card_list; - +extern struct kmem_cache *qeth_core_header_cache; extern struct qeth_dbf_info qeth_dbf[QETH_DBF_INFOS]; void qeth_set_allowed_threads(struct qeth_card *, unsigned long , int); @@ -843,7 +844,7 @@ int qeth_get_priority_queue(struct qeth_card *, struct sk_buff *, int, int); int qeth_get_elements_no(struct qeth_card *, void *, struct sk_buff *, int); int qeth_do_send_packet_fast(struct qeth_card *, struct qeth_qdio_out_q *, struct sk_buff *, struct qeth_hdr *, int, - struct qeth_eddp_context *); + struct qeth_eddp_context *, int, int); int qeth_do_send_packet(struct qeth_card *, struct qeth_qdio_out_q *, struct sk_buff *, struct qeth_hdr *, int, struct qeth_eddp_context *); diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index cebb25e36e8..bd420d1b9a0 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -19,8 +19,8 @@ #include <linux/mii.h> #include <linux/kthread.h> -#include <asm-s390/ebcdic.h> -#include <asm-s390/io.h> +#include <asm/ebcdic.h> +#include <asm/io.h> #include <asm/s390_rdev.h> #include "qeth_core.h" @@ -48,6 +48,8 @@ EXPORT_SYMBOL_GPL(qeth_dbf); struct qeth_card_list_struct qeth_core_card_list; EXPORT_SYMBOL_GPL(qeth_core_card_list); +struct kmem_cache *qeth_core_header_cache; +EXPORT_SYMBOL_GPL(qeth_core_header_cache); static struct device *qeth_core_root_dev; static unsigned int known_devices[][10] = QETH_MODELLIST_ARRAY; @@ -933,6 +935,10 @@ static void qeth_clear_output_buffer(struct qeth_qdio_out_q *queue, } qeth_eddp_buf_release_contexts(buf); for (i = 0; i < QETH_MAX_BUFFER_ELEMENTS(queue->card); ++i) { + if (buf->buffer->element[i].addr && buf->is_header[i]) + kmem_cache_free(qeth_core_header_cache, + buf->buffer->element[i].addr); + buf->is_header[i] = 0; buf->buffer->element[i].length = 0; buf->buffer->element[i].addr = NULL; buf->buffer->element[i].flags = 0; @@ -3002,8 +3008,8 @@ int qeth_get_elements_no(struct qeth_card *card, void *hdr, if (skb_shinfo(skb)->nr_frags > 0) elements_needed = (skb_shinfo(skb)->nr_frags + 1); if (elements_needed == 0) - elements_needed = 1 + (((((unsigned long) hdr) % PAGE_SIZE) - + skb->len) >> PAGE_SHIFT); + elements_needed = 1 + (((((unsigned long) skb->data) % + PAGE_SIZE) + skb->len) >> PAGE_SHIFT); if ((elements_needed + elems) > QETH_MAX_BUFFER_ELEMENTS(card)) { QETH_DBF_MESSAGE(2, "Invalid size of IP packet " "(Number=%d / Length=%d). Discarded.\n", @@ -3015,7 +3021,8 @@ int qeth_get_elements_no(struct qeth_card *card, void *hdr, EXPORT_SYMBOL_GPL(qeth_get_elements_no); static inline void __qeth_fill_buffer(struct sk_buff *skb, - struct qdio_buffer *buffer, int is_tso, int *next_element_to_fill) + struct qdio_buffer *buffer, int is_tso, int *next_element_to_fill, + int offset) { int length = skb->len; int length_here; @@ -3027,6 +3034,11 @@ static inline void __qeth_fill_buffer(struct sk_buff *skb, data = skb->data; first_lap = (is_tso == 0 ? 1 : 0); + if (offset >= 0) { + data = skb->data + offset; + first_lap = 0; + } + while (length > 0) { /* length_here is the remaining amount of data in this page */ length_here = PAGE_SIZE - ((unsigned long) data % PAGE_SIZE); @@ -3058,22 +3070,22 @@ static inline void __qeth_fill_buffer(struct sk_buff *skb, } static inline int qeth_fill_buffer(struct qeth_qdio_out_q *queue, - struct qeth_qdio_out_buffer *buf, struct sk_buff *skb) + struct qeth_qdio_out_buffer *buf, struct sk_buff *skb, + struct qeth_hdr *hdr, int offset, int hd_len) { struct qdio_buffer *buffer; - struct qeth_hdr_tso *hdr; int flush_cnt = 0, hdr_len, large_send = 0; buffer = buf->buffer; atomic_inc(&skb->users); skb_queue_tail(&buf->skb_list, skb); - hdr = (struct qeth_hdr_tso *) skb->data; /*check first on TSO ....*/ - if (hdr->hdr.hdr.l3.id == QETH_HEADER_TYPE_TSO) { + if (hdr->hdr.l3.id == QETH_HEADER_TYPE_TSO) { int element = buf->next_element_to_fill; - hdr_len = sizeof(struct qeth_hdr_tso) + hdr->ext.dg_hdr_len; + hdr_len = sizeof(struct qeth_hdr_tso) + + ((struct qeth_hdr_tso *)hdr)->ext.dg_hdr_len; /*fill first buffer entry only with header information */ buffer->element[element].addr = skb->data; buffer->element[element].length = hdr_len; @@ -3083,9 +3095,20 @@ static inline int qeth_fill_buffer(struct qeth_qdio_out_q *queue, skb->len -= hdr_len; large_send = 1; } + + if (offset >= 0) { + int element = buf->next_element_to_fill; + buffer->element[element].addr = hdr; + buffer->element[element].length = sizeof(struct qeth_hdr) + + hd_len; + buffer->element[element].flags = SBAL_FLAGS_FIRST_FRAG; + buf->is_header[element] = 1; + buf->next_element_to_fill++; + } + if (skb_shinfo(skb)->nr_frags == 0) __qeth_fill_buffer(skb, buffer, large_send, - (int *)&buf->next_element_to_fill); + (int *)&buf->next_element_to_fill, offset); else __qeth_fill_buffer_frag(skb, buffer, large_send, (int *)&buf->next_element_to_fill); @@ -3115,7 +3138,7 @@ static inline int qeth_fill_buffer(struct qeth_qdio_out_q *queue, int qeth_do_send_packet_fast(struct qeth_card *card, struct qeth_qdio_out_q *queue, struct sk_buff *skb, struct qeth_hdr *hdr, int elements_needed, - struct qeth_eddp_context *ctx) + struct qeth_eddp_context *ctx, int offset, int hd_len) { struct qeth_qdio_out_buffer *buffer; int buffers_needed = 0; @@ -3148,7 +3171,7 @@ int qeth_do_send_packet_fast(struct qeth_card *card, } atomic_set(&queue->state, QETH_OUT_Q_UNLOCKED); if (ctx == NULL) { - qeth_fill_buffer(queue, buffer, skb); + qeth_fill_buffer(queue, buffer, skb, hdr, offset, hd_len); qeth_flush_buffers(queue, index, 1); } else { flush_cnt = qeth_eddp_fill_buffer(queue, ctx, index); @@ -3224,7 +3247,7 @@ int qeth_do_send_packet(struct qeth_card *card, struct qeth_qdio_out_q *queue, } } if (ctx == NULL) - tmp = qeth_fill_buffer(queue, buffer, skb); + tmp = qeth_fill_buffer(queue, buffer, skb, hdr, -1, 0); else { tmp = qeth_eddp_fill_buffer(queue, ctx, queue->next_buf_to_fill); @@ -4443,8 +4466,17 @@ static int __init qeth_core_init(void) rc = IS_ERR(qeth_core_root_dev) ? PTR_ERR(qeth_core_root_dev) : 0; if (rc) goto register_err; - return 0; + qeth_core_header_cache = kmem_cache_create("qeth_hdr", + sizeof(struct qeth_hdr) + ETH_HLEN, 64, 0, NULL); + if (!qeth_core_header_cache) { + rc = -ENOMEM; + goto slab_err; + } + + return 0; +slab_err: + s390_root_dev_unregister(qeth_core_root_dev); register_err: driver_remove_file(&qeth_core_ccwgroup_driver.driver, &driver_attr_group); @@ -4466,6 +4498,7 @@ static void __exit qeth_core_exit(void) &driver_attr_group); ccwgroup_driver_unregister(&qeth_core_ccwgroup_driver); ccw_driver_unregister(&qeth_ccw_driver); + kmem_cache_destroy(qeth_core_header_cache); qeth_unregister_dbf_views(); PRINT_INFO("core functions removed\n"); } diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index a8b069cd9a4..b3cee032f57 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -243,8 +243,7 @@ static void qeth_l2_get_packet_type(struct qeth_card *card, static void qeth_l2_fill_header(struct qeth_card *card, struct qeth_hdr *hdr, struct sk_buff *skb, int ipv, int cast_type) { - struct vlan_ethhdr *veth = (struct vlan_ethhdr *)((skb->data) + - QETH_HEADER_SIZE); + struct vlan_ethhdr *veth = (struct vlan_ethhdr *)skb_mac_header(skb); memset(hdr, 0, sizeof(struct qeth_hdr)); hdr->hdr.l2.id = QETH_HEADER_TYPE_LAYER2; @@ -621,6 +620,9 @@ static int qeth_l2_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) int tx_bytes = skb->len; enum qeth_large_send_types large_send = QETH_LARGE_SEND_NO; struct qeth_eddp_context *ctx = NULL; + int data_offset = -1; + int elements_needed = 0; + int hd_len = 0; if ((card->state != CARD_STATE_UP) || !card->lan_online) { card->stats.tx_carrier_errors++; @@ -643,13 +645,32 @@ static int qeth_l2_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) if (card->info.type == QETH_CARD_TYPE_OSN) hdr = (struct qeth_hdr *)skb->data; else { - /* create a clone with writeable headroom */ - new_skb = skb_realloc_headroom(skb, sizeof(struct qeth_hdr)); - if (!new_skb) - goto tx_drop; - hdr = (struct qeth_hdr *)skb_push(new_skb, + if ((card->info.type == QETH_CARD_TYPE_IQD) && (!large_send) && + (skb_shinfo(skb)->nr_frags == 0)) { + new_skb = skb; + data_offset = ETH_HLEN; + hd_len = ETH_HLEN; + hdr = kmem_cache_alloc(qeth_core_header_cache, + GFP_ATOMIC); + if (!hdr) + goto tx_drop; + elements_needed++; + skb_reset_mac_header(new_skb); + qeth_l2_fill_header(card, hdr, new_skb, ipv, cast_type); + hdr->hdr.l2.pkt_length = new_skb->len; + memcpy(((char *)hdr) + sizeof(struct qeth_hdr), + skb_mac_header(new_skb), ETH_HLEN); + } else { + /* create a clone with writeable headroom */ + new_skb = skb_realloc_headroom(skb, + sizeof(struct qeth_hdr)); + if (!new_skb) + goto tx_drop; + hdr = (struct qeth_hdr *)skb_push(new_skb, sizeof(struct qeth_hdr)); - qeth_l2_fill_header(card, hdr, new_skb, ipv, cast_type); + skb_set_mac_header(new_skb, sizeof(struct qeth_hdr)); + qeth_l2_fill_header(card, hdr, new_skb, ipv, cast_type); + } } if (large_send == QETH_LARGE_SEND_EDDP) { @@ -660,9 +681,13 @@ static int qeth_l2_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) goto tx_drop; } } else { - elements = qeth_get_elements_no(card, (void *)hdr, new_skb, 0); - if (!elements) + elements = qeth_get_elements_no(card, (void *)hdr, new_skb, + elements_needed); + if (!elements) { + if (data_offset >= 0) + kmem_cache_free(qeth_core_header_cache, hdr); goto tx_drop; + } } if ((large_send == QETH_LARGE_SEND_NO) && @@ -674,7 +699,7 @@ static int qeth_l2_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) elements, ctx); else rc = qeth_do_send_packet_fast(card, queue, new_skb, hdr, - elements, ctx); + elements, ctx, data_offset, hd_len); if (!rc) { card->stats.tx_packets++; card->stats.tx_bytes += tx_bytes; @@ -701,6 +726,9 @@ static int qeth_l2_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) if (ctx != NULL) qeth_eddp_put_context(ctx); + if (data_offset >= 0) + kmem_cache_free(qeth_core_header_cache, hdr); + if (rc == -EBUSY) { if (new_skb != skb) dev_kfree_skb_any(new_skb); diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 3e1d1385735..dd72c3c2016 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -2604,6 +2604,7 @@ static int qeth_l3_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) int tx_bytes = skb->len; enum qeth_large_send_types large_send = QETH_LARGE_SEND_NO; struct qeth_eddp_context *ctx = NULL; + int data_offset = -1; if ((card->info.type == QETH_CARD_TYPE_IQD) && (skb->protocol != htons(ETH_P_IPV6)) && @@ -2624,14 +2625,28 @@ static int qeth_l3_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) card->perf_stats.outbound_start_time = qeth_get_micros(); } - /* create a clone with writeable headroom */ - new_skb = skb_realloc_headroom(skb, sizeof(struct qeth_hdr_tso) + - VLAN_HLEN); - if (!new_skb) - goto tx_drop; + if (skb_is_gso(skb)) + large_send = card->options.large_send; + + if ((card->info.type == QETH_CARD_TYPE_IQD) && (!large_send) && + (skb_shinfo(skb)->nr_frags == 0)) { + new_skb = skb; + data_offset = ETH_HLEN; + hdr = kmem_cache_alloc(qeth_core_header_cache, GFP_ATOMIC); + if (!hdr) + goto tx_drop; + elements_needed++; + } else { + /* create a clone with writeable headroom */ + new_skb = skb_realloc_headroom(skb, sizeof(struct qeth_hdr_tso) + + VLAN_HLEN); + if (!new_skb) + goto tx_drop; + } if (card->info.type == QETH_CARD_TYPE_IQD) { - skb_pull(new_skb, ETH_HLEN); + if (data_offset < 0) + skb_pull(new_skb, ETH_HLEN); } else { if (new_skb->protocol == htons(ETH_P_IP)) { if (card->dev->type == ARPHRD_IEEE802_TR) @@ -2657,9 +2672,6 @@ static int qeth_l3_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) netif_stop_queue(dev); - if (skb_is_gso(new_skb)) - large_send = card->options.large_send; - /* fix hardware limitation: as long as we do not have sbal * chaining we can not send long frag lists so we temporary * switch to EDDP @@ -2677,9 +2689,16 @@ static int qeth_l3_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) qeth_tso_fill_header(card, hdr, new_skb); elements_needed++; } else { - hdr = (struct qeth_hdr *)skb_push(new_skb, + if (data_offset < 0) { + hdr = (struct qeth_hdr *)skb_push(new_skb, sizeof(struct qeth_hdr)); - qeth_l3_fill_header(card, hdr, new_skb, ipv, cast_type); + qeth_l3_fill_header(card, hdr, new_skb, ipv, + cast_type); + } else { + qeth_l3_fill_header(card, hdr, new_skb, ipv, + cast_type); + hdr->hdr.l3.length = new_skb->len - data_offset; + } } if (large_send == QETH_LARGE_SEND_EDDP) { @@ -2695,8 +2714,11 @@ static int qeth_l3_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) } else { int elems = qeth_get_elements_no(card, (void *)hdr, new_skb, elements_needed); - if (!elems) + if (!elems) { + if (data_offset >= 0) + kmem_cache_free(qeth_core_header_cache, hdr); goto tx_drop; + } elements_needed += elems; } @@ -2709,7 +2731,7 @@ static int qeth_l3_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) elements_needed, ctx); else rc = qeth_do_send_packet_fast(card, queue, new_skb, hdr, - elements_needed, ctx); + elements_needed, ctx, data_offset, 0); if (!rc) { card->stats.tx_packets++; @@ -2737,6 +2759,9 @@ static int qeth_l3_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) if (ctx != NULL) qeth_eddp_put_context(ctx); + if (data_offset >= 0) + kmem_cache_free(qeth_core_header_cache, hdr); + if (rc == -EBUSY) { if (new_skb != skb) dev_kfree_skb_any(new_skb); diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c index 7a4409ab30e..a319a20ed44 100644 --- a/drivers/scsi/qla2xxx/qla_attr.c +++ b/drivers/scsi/qla2xxx/qla_attr.c @@ -8,6 +8,7 @@ #include <linux/kthread.h> #include <linux/vmalloc.h> +#include <linux/delay.h> static int qla24xx_vport_disable(struct fc_vport *, bool); diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c index a97f1ae11f7..342e12fb1c2 100644 --- a/drivers/serial/8250.c +++ b/drivers/serial/8250.c @@ -1885,7 +1885,7 @@ static int serial8250_startup(struct uart_port *port) * the interrupt is enabled. Delays are necessary to * allow register changes to become visible. */ - spin_lock(&up->port.lock); + spin_lock_irqsave(&up->port.lock, flags); if (up->port.flags & UPF_SHARE_IRQ) disable_irq_nosync(up->port.irq); @@ -1901,7 +1901,7 @@ static int serial8250_startup(struct uart_port *port) if (up->port.flags & UPF_SHARE_IRQ) enable_irq(up->port.irq); - spin_unlock(&up->port.lock); + spin_unlock_irqrestore(&up->port.lock, flags); /* * If the interrupt is not reasserted, setup a timer to diff --git a/drivers/serial/Makefile b/drivers/serial/Makefile index 3a0bbbe17aa..7e7383e890d 100644 --- a/drivers/serial/Makefile +++ b/drivers/serial/Makefile @@ -42,7 +42,6 @@ obj-$(CONFIG_SERIAL_68328) += 68328serial.o obj-$(CONFIG_SERIAL_68360) += 68360serial.o obj-$(CONFIG_SERIAL_COLDFIRE) += mcfserial.o obj-$(CONFIG_SERIAL_MCF) += mcf.o -obj-$(CONFIG_V850E_UART) += v850e_uart.o obj-$(CONFIG_SERIAL_PMACZILOG) += pmac_zilog.o obj-$(CONFIG_SERIAL_LH7A40X) += serial_lh7a40x.o obj-$(CONFIG_SERIAL_DZ) += dz.o diff --git a/drivers/serial/cpm_uart/cpm_uart.h b/drivers/serial/cpm_uart/cpm_uart.h index 5c76e0ae058..7274b527a3c 100644 --- a/drivers/serial/cpm_uart/cpm_uart.h +++ b/drivers/serial/cpm_uart/cpm_uart.h @@ -50,6 +50,15 @@ #define SCC_WAIT_CLOSING 100 +#define GPIO_CTS 0 +#define GPIO_RTS 1 +#define GPIO_DCD 2 +#define GPIO_DSR 3 +#define GPIO_DTR 4 +#define GPIO_RI 5 + +#define NUM_GPIOS (GPIO_RI+1) + struct uart_cpm_port { struct uart_port port; u16 rx_nrfifos; @@ -68,6 +77,7 @@ struct uart_cpm_port { unsigned char *rx_buf; u32 flags; void (*set_lineif)(struct uart_cpm_port *); + struct clk *clk; u8 brg; uint dp_addr; void *mem_addr; @@ -82,6 +92,7 @@ struct uart_cpm_port { int wait_closing; /* value to combine with opcode to form cpm command */ u32 command; + int gpios[NUM_GPIOS]; }; extern int cpm_uart_nr; diff --git a/drivers/serial/cpm_uart/cpm_uart_core.c b/drivers/serial/cpm_uart/cpm_uart_core.c index a4f86927a74..25efca5a7a1 100644 --- a/drivers/serial/cpm_uart/cpm_uart_core.c +++ b/drivers/serial/cpm_uart/cpm_uart_core.c @@ -43,6 +43,9 @@ #include <linux/dma-mapping.h> #include <linux/fs_uart_pd.h> #include <linux/of_platform.h> +#include <linux/gpio.h> +#include <linux/of_gpio.h> +#include <linux/clk.h> #include <asm/io.h> #include <asm/irq.h> @@ -96,13 +99,41 @@ static unsigned int cpm_uart_tx_empty(struct uart_port *port) static void cpm_uart_set_mctrl(struct uart_port *port, unsigned int mctrl) { - /* Whee. Do nothing. */ + struct uart_cpm_port *pinfo = (struct uart_cpm_port *)port; + + if (pinfo->gpios[GPIO_RTS] >= 0) + gpio_set_value(pinfo->gpios[GPIO_RTS], !(mctrl & TIOCM_RTS)); + + if (pinfo->gpios[GPIO_DTR] >= 0) + gpio_set_value(pinfo->gpios[GPIO_DTR], !(mctrl & TIOCM_DTR)); } static unsigned int cpm_uart_get_mctrl(struct uart_port *port) { - /* Whee. Do nothing. */ - return TIOCM_CAR | TIOCM_DSR | TIOCM_CTS; + struct uart_cpm_port *pinfo = (struct uart_cpm_port *)port; + unsigned int mctrl = TIOCM_CTS | TIOCM_DSR | TIOCM_CAR; + + if (pinfo->gpios[GPIO_CTS] >= 0) { + if (gpio_get_value(pinfo->gpios[GPIO_CTS])) + mctrl &= ~TIOCM_CTS; + } + + if (pinfo->gpios[GPIO_DSR] >= 0) { + if (gpio_get_value(pinfo->gpios[GPIO_DSR])) + mctrl &= ~TIOCM_DSR; + } + + if (pinfo->gpios[GPIO_DCD] >= 0) { + if (gpio_get_value(pinfo->gpios[GPIO_DCD])) + mctrl &= ~TIOCM_CAR; + } + + if (pinfo->gpios[GPIO_RI] >= 0) { + if (!gpio_get_value(pinfo->gpios[GPIO_RI])) + mctrl |= TIOCM_RNG; + } + + return mctrl; } /* @@ -566,7 +597,10 @@ static void cpm_uart_set_termios(struct uart_port *port, out_be16(&sccp->scc_psmr, (sbits << 12) | scval); } - cpm_set_brg(pinfo->brg - 1, baud); + if (pinfo->clk) + clk_set_rate(pinfo->clk, baud); + else + cpm_set_brg(pinfo->brg - 1, baud); spin_unlock_irqrestore(&port->lock, flags); } @@ -991,14 +1025,23 @@ static int cpm_uart_init_port(struct device_node *np, void __iomem *mem, *pram; int len; int ret; + int i; - data = of_get_property(np, "fsl,cpm-brg", &len); - if (!data || len != 4) { - printk(KERN_ERR "CPM UART %s has no/invalid " - "fsl,cpm-brg property.\n", np->name); - return -EINVAL; + data = of_get_property(np, "clock", NULL); + if (data) { + struct clk *clk = clk_get(NULL, (const char*)data); + if (!IS_ERR(clk)) + pinfo->clk = clk; + } + if (!pinfo->clk) { + data = of_get_property(np, "fsl,cpm-brg", &len); + if (!data || len != 4) { + printk(KERN_ERR "CPM UART %s has no/invalid " + "fsl,cpm-brg property.\n", np->name); + return -EINVAL; + } + pinfo->brg = *data; } - pinfo->brg = *data; data = of_get_property(np, "fsl,cpm-command", &len); if (!data || len != 4) { @@ -1050,6 +1093,9 @@ static int cpm_uart_init_port(struct device_node *np, goto out_pram; } + for (i = 0; i < NUM_GPIOS; i++) + pinfo->gpios[i] = of_get_gpio(np, i); + return cpm_uart_request_port(&pinfo->port); out_pram: diff --git a/drivers/serial/v850e_uart.c b/drivers/serial/v850e_uart.c deleted file mode 100644 index 5acf061b6cd..00000000000 --- a/drivers/serial/v850e_uart.c +++ /dev/null @@ -1,548 +0,0 @@ -/* - * drivers/serial/v850e_uart.c -- Serial I/O using V850E on-chip UART or UARTB - * - * Copyright (C) 2001,02,03 NEC Electronics Corporation - * Copyright (C) 2001,02,03 Miles Bader <miles@gnu.org> - * - * This file is subject to the terms and conditions of the GNU General - * Public License. See the file COPYING in the main directory of this - * archive for more details. - * - * Written by Miles Bader <miles@gnu.org> - */ - -/* This driver supports both the original V850E UART interface (called - merely `UART' in the docs) and the newer `UARTB' interface, which is - roughly a superset of the first one. The selection is made at - configure time -- if CONFIG_V850E_UARTB is defined, then UARTB is - presumed, otherwise the old UART -- as these are on-CPU UARTS, a system - can never have both. - - The UARTB interface also has a 16-entry FIFO mode, which is not - yet supported by this driver. */ - -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/module.h> -#include <linux/console.h> -#include <linux/tty.h> -#include <linux/tty_flip.h> -#include <linux/serial.h> -#include <linux/serial_core.h> - -#include <asm/v850e_uart.h> - -/* Initial UART state. This may be overridden by machine-dependent headers. */ -#ifndef V850E_UART_INIT_BAUD -#define V850E_UART_INIT_BAUD 115200 -#endif -#ifndef V850E_UART_INIT_CFLAGS -#define V850E_UART_INIT_CFLAGS (B115200 | CS8 | CREAD) -#endif - -/* A string used for prefixing printed descriptions; since the same UART - macro is actually used on other chips than the V850E. This must be a - constant string. */ -#ifndef V850E_UART_CHIP_NAME -#define V850E_UART_CHIP_NAME "V850E" -#endif - -#define V850E_UART_MINOR_BASE 64 /* First tty minor number */ - - -/* Low-level UART functions. */ - -/* Configure and turn on uart channel CHAN, using the termios `control - modes' bits in CFLAGS, and a baud-rate of BAUD. */ -void v850e_uart_configure (unsigned chan, unsigned cflags, unsigned baud) -{ - int flags; - v850e_uart_speed_t old_speed; - v850e_uart_config_t old_config; - v850e_uart_speed_t new_speed = v850e_uart_calc_speed (baud); - v850e_uart_config_t new_config = v850e_uart_calc_config (cflags); - - /* Disable interrupts while we're twiddling the hardware. */ - local_irq_save (flags); - -#ifdef V850E_UART_PRE_CONFIGURE - V850E_UART_PRE_CONFIGURE (chan, cflags, baud); -#endif - - old_config = V850E_UART_CONFIG (chan); - old_speed = v850e_uart_speed (chan); - - if (! v850e_uart_speed_eq (old_speed, new_speed)) { - /* The baud rate has changed. First, disable the UART. */ - V850E_UART_CONFIG (chan) = V850E_UART_CONFIG_FINI; - old_config = 0; /* Force the uart to be re-initialized. */ - - /* Reprogram the baud-rate generator. */ - v850e_uart_set_speed (chan, new_speed); - } - - if (! (old_config & V850E_UART_CONFIG_ENABLED)) { - /* If we are using the uart for the first time, start by - enabling it, which must be done before turning on any - other bits. */ - V850E_UART_CONFIG (chan) = V850E_UART_CONFIG_INIT; - /* See the initial state. */ - old_config = V850E_UART_CONFIG (chan); - } - - if (new_config != old_config) { - /* Which of the TXE/RXE bits we'll temporarily turn off - before changing other control bits. */ - unsigned temp_disable = 0; - /* Which of the TXE/RXE bits will be enabled. */ - unsigned enable = 0; - unsigned changed_bits = new_config ^ old_config; - - /* Which of RX/TX will be enabled in the new configuration. */ - if (new_config & V850E_UART_CONFIG_RX_BITS) - enable |= (new_config & V850E_UART_CONFIG_RX_ENABLE); - if (new_config & V850E_UART_CONFIG_TX_BITS) - enable |= (new_config & V850E_UART_CONFIG_TX_ENABLE); - - /* Figure out which of RX/TX needs to be disabled; note - that this will only happen if they're not already - disabled. */ - if (changed_bits & V850E_UART_CONFIG_RX_BITS) - temp_disable - |= (old_config & V850E_UART_CONFIG_RX_ENABLE); - if (changed_bits & V850E_UART_CONFIG_TX_BITS) - temp_disable - |= (old_config & V850E_UART_CONFIG_TX_ENABLE); - - /* We have to turn off RX and/or TX mode before changing - any associated control bits. */ - if (temp_disable) - V850E_UART_CONFIG (chan) = old_config & ~temp_disable; - - /* Write the new control bits, while RX/TX are disabled. */ - if (changed_bits & ~enable) - V850E_UART_CONFIG (chan) = new_config & ~enable; - - v850e_uart_config_delay (new_config, new_speed); - - /* Write the final version, with enable bits turned on. */ - V850E_UART_CONFIG (chan) = new_config; - } - - local_irq_restore (flags); -} - - -/* Low-level console. */ - -#ifdef CONFIG_V850E_UART_CONSOLE - -static void v850e_uart_cons_write (struct console *co, - const char *s, unsigned count) -{ - if (count > 0) { - unsigned chan = co->index; - unsigned irq = V850E_UART_TX_IRQ (chan); - int irq_was_enabled, irq_was_pending, flags; - - /* We don't want to get `transmission completed' - interrupts, since we're busy-waiting, so we disable them - while sending (we don't disable interrupts entirely - because sending over a serial line is really slow). We - save the status of the tx interrupt and restore it when - we're done so that using printk doesn't interfere with - normal serial transmission (other than interleaving the - output, of course!). This should work correctly even if - this function is interrupted and the interrupt printks - something. */ - - /* Disable interrupts while fiddling with tx interrupt. */ - local_irq_save (flags); - /* Get current tx interrupt status. */ - irq_was_enabled = v850e_intc_irq_enabled (irq); - irq_was_pending = v850e_intc_irq_pending (irq); - /* Disable tx interrupt if necessary. */ - if (irq_was_enabled) - v850e_intc_disable_irq (irq); - /* Turn interrupts back on. */ - local_irq_restore (flags); - - /* Send characters. */ - while (count > 0) { - int ch = *s++; - - if (ch == '\n') { - /* We don't have the benefit of a tty - driver, so translate NL into CR LF. */ - v850e_uart_wait_for_xmit_ok (chan); - v850e_uart_putc (chan, '\r'); - } - - v850e_uart_wait_for_xmit_ok (chan); - v850e_uart_putc (chan, ch); - - count--; - } - - /* Restore saved tx interrupt status. */ - if (irq_was_enabled) { - /* Wait for the last character we sent to be - completely transmitted (as we'll get an - interrupt interrupt at that point). */ - v850e_uart_wait_for_xmit_done (chan); - /* Clear pending interrupts received due - to our transmission, unless there was already - one pending, in which case we want the - handler to be called. */ - if (! irq_was_pending) - v850e_intc_clear_pending_irq (irq); - /* ... and then turn back on handling. */ - v850e_intc_enable_irq (irq); - } - } -} - -extern struct uart_driver v850e_uart_driver; -static struct console v850e_uart_cons = -{ - .name = "ttyS", - .write = v850e_uart_cons_write, - .device = uart_console_device, - .flags = CON_PRINTBUFFER, - .cflag = V850E_UART_INIT_CFLAGS, - .index = -1, - .data = &v850e_uart_driver, -}; - -void v850e_uart_cons_init (unsigned chan) -{ - v850e_uart_configure (chan, V850E_UART_INIT_CFLAGS, - V850E_UART_INIT_BAUD); - v850e_uart_cons.index = chan; - register_console (&v850e_uart_cons); - printk ("Console: %s on-chip UART channel %d\n", - V850E_UART_CHIP_NAME, chan); -} - -/* This is what the init code actually calls. */ -static int v850e_uart_console_init (void) -{ - v850e_uart_cons_init (V850E_UART_CONSOLE_CHANNEL); - return 0; -} -console_initcall(v850e_uart_console_init); - -#define V850E_UART_CONSOLE &v850e_uart_cons - -#else /* !CONFIG_V850E_UART_CONSOLE */ -#define V850E_UART_CONSOLE 0 -#endif /* CONFIG_V850E_UART_CONSOLE */ - -/* TX/RX interrupt handlers. */ - -static void v850e_uart_stop_tx (struct uart_port *port); - -void v850e_uart_tx (struct uart_port *port) -{ - struct circ_buf *xmit = &port->info->xmit; - int stopped = uart_tx_stopped (port); - - if (v850e_uart_xmit_ok (port->line)) { - int tx_ch; - - if (port->x_char) { - tx_ch = port->x_char; - port->x_char = 0; - } else if (!uart_circ_empty (xmit) && !stopped) { - tx_ch = xmit->buf[xmit->tail]; - xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); - } else - goto no_xmit; - - v850e_uart_putc (port->line, tx_ch); - port->icount.tx++; - - if (uart_circ_chars_pending (xmit) < WAKEUP_CHARS) - uart_write_wakeup (port); - } - - no_xmit: - if (uart_circ_empty (xmit) || stopped) - v850e_uart_stop_tx (port, stopped); -} - -static irqreturn_t v850e_uart_tx_irq(int irq, void *data) -{ - struct uart_port *port = data; - v850e_uart_tx (port); - return IRQ_HANDLED; -} - -static irqreturn_t v850e_uart_rx_irq(int irq, void *data) -{ - struct uart_port *port = data; - unsigned ch_stat = TTY_NORMAL; - unsigned ch = v850e_uart_getc (port->line); - unsigned err = v850e_uart_err (port->line); - - if (err) { - if (err & V850E_UART_ERR_OVERRUN) { - ch_stat = TTY_OVERRUN; - port->icount.overrun++; - } else if (err & V850E_UART_ERR_FRAME) { - ch_stat = TTY_FRAME; - port->icount.frame++; - } else if (err & V850E_UART_ERR_PARITY) { - ch_stat = TTY_PARITY; - port->icount.parity++; - } - } - - port->icount.rx++; - - tty_insert_flip_char (port->info->port.tty, ch, ch_stat); - tty_schedule_flip (port->info->port.tty); - - return IRQ_HANDLED; -} - - -/* Control functions for the serial framework. */ - -static void v850e_uart_nop (struct uart_port *port) { } -static int v850e_uart_success (struct uart_port *port) { return 0; } - -static unsigned v850e_uart_tx_empty (struct uart_port *port) -{ - return TIOCSER_TEMT; /* Can't detect. */ -} - -static void v850e_uart_set_mctrl (struct uart_port *port, unsigned mctrl) -{ -#ifdef V850E_UART_SET_RTS - V850E_UART_SET_RTS (port->line, (mctrl & TIOCM_RTS)); -#endif -} - -static unsigned v850e_uart_get_mctrl (struct uart_port *port) -{ - /* We don't support DCD or DSR, so consider them permanently active. */ - int mctrl = TIOCM_CAR | TIOCM_DSR; - - /* We may support CTS. */ -#ifdef V850E_UART_CTS - mctrl |= V850E_UART_CTS(port->line) ? TIOCM_CTS : 0; -#else - mctrl |= TIOCM_CTS; -#endif - - return mctrl; -} - -static void v850e_uart_start_tx (struct uart_port *port) -{ - v850e_intc_disable_irq (V850E_UART_TX_IRQ (port->line)); - v850e_uart_tx (port); - v850e_intc_enable_irq (V850E_UART_TX_IRQ (port->line)); -} - -static void v850e_uart_stop_tx (struct uart_port *port) -{ - v850e_intc_disable_irq (V850E_UART_TX_IRQ (port->line)); -} - -static void v850e_uart_start_rx (struct uart_port *port) -{ - v850e_intc_enable_irq (V850E_UART_RX_IRQ (port->line)); -} - -static void v850e_uart_stop_rx (struct uart_port *port) -{ - v850e_intc_disable_irq (V850E_UART_RX_IRQ (port->line)); -} - -static void v850e_uart_break_ctl (struct uart_port *port, int break_ctl) -{ - /* Umm, do this later. */ -} - -static int v850e_uart_startup (struct uart_port *port) -{ - int err; - - /* Alloc RX irq. */ - err = request_irq (V850E_UART_RX_IRQ (port->line), v850e_uart_rx_irq, - IRQF_DISABLED, "v850e_uart", port); - if (err) - return err; - - /* Alloc TX irq. */ - err = request_irq (V850E_UART_TX_IRQ (port->line), v850e_uart_tx_irq, - IRQF_DISABLED, "v850e_uart", port); - if (err) { - free_irq (V850E_UART_RX_IRQ (port->line), port); - return err; - } - - v850e_uart_start_rx (port); - - return 0; -} - -static void v850e_uart_shutdown (struct uart_port *port) -{ - /* Disable port interrupts. */ - free_irq (V850E_UART_TX_IRQ (port->line), port); - free_irq (V850E_UART_RX_IRQ (port->line), port); - - /* Turn off xmit/recv enable bits. */ - V850E_UART_CONFIG (port->line) - &= ~(V850E_UART_CONFIG_TX_ENABLE - | V850E_UART_CONFIG_RX_ENABLE); - /* Then reset the channel. */ - V850E_UART_CONFIG (port->line) = 0; -} - -static void -v850e_uart_set_termios (struct uart_port *port, struct ktermios *termios, - struct ktermios *old) -{ - unsigned cflags = termios->c_cflag; - - /* Restrict flags to legal values. */ - if ((cflags & CSIZE) != CS7 && (cflags & CSIZE) != CS8) - /* The new value of CSIZE is invalid, use the old value. */ - cflags = (cflags & ~CSIZE) - | (old ? (old->c_cflag & CSIZE) : CS8); - - termios->c_cflag = cflags; - - v850e_uart_configure (port->line, cflags, - uart_get_baud_rate (port, termios, old, - v850e_uart_min_baud(), - v850e_uart_max_baud())); -} - -static const char *v850e_uart_type (struct uart_port *port) -{ - return port->type == PORT_V850E_UART ? "v850e_uart" : 0; -} - -static void v850e_uart_config_port (struct uart_port *port, int flags) -{ - if (flags & UART_CONFIG_TYPE) - port->type = PORT_V850E_UART; -} - -static int -v850e_uart_verify_port (struct uart_port *port, struct serial_struct *ser) -{ - if (ser->type != PORT_UNKNOWN && ser->type != PORT_V850E_UART) - return -EINVAL; - if (ser->irq != V850E_UART_TX_IRQ (port->line)) - return -EINVAL; - return 0; -} - -static struct uart_ops v850e_uart_ops = { - .tx_empty = v850e_uart_tx_empty, - .get_mctrl = v850e_uart_get_mctrl, - .set_mctrl = v850e_uart_set_mctrl, - .start_tx = v850e_uart_start_tx, - .stop_tx = v850e_uart_stop_tx, - .stop_rx = v850e_uart_stop_rx, - .enable_ms = v850e_uart_nop, - .break_ctl = v850e_uart_break_ctl, - .startup = v850e_uart_startup, - .shutdown = v850e_uart_shutdown, - .set_termios = v850e_uart_set_termios, - .type = v850e_uart_type, - .release_port = v850e_uart_nop, - .request_port = v850e_uart_success, - .config_port = v850e_uart_config_port, - .verify_port = v850e_uart_verify_port, -}; - -/* Initialization and cleanup. */ - -static struct uart_driver v850e_uart_driver = { - .owner = THIS_MODULE, - .driver_name = "v850e_uart", - .dev_name = "ttyS", - .major = TTY_MAJOR, - .minor = V850E_UART_MINOR_BASE, - .nr = V850E_UART_NUM_CHANNELS, - .cons = V850E_UART_CONSOLE, -}; - - -static struct uart_port v850e_uart_ports[V850E_UART_NUM_CHANNELS]; - -static int __init v850e_uart_init (void) -{ - int rval; - - printk (KERN_INFO "%s on-chip UART\n", V850E_UART_CHIP_NAME); - - rval = uart_register_driver (&v850e_uart_driver); - if (rval == 0) { - unsigned chan; - - for (chan = 0; chan < V850E_UART_NUM_CHANNELS; chan++) { - struct uart_port *port = &v850e_uart_ports[chan]; - - memset (port, 0, sizeof *port); - - port->ops = &v850e_uart_ops; - port->line = chan; - port->iotype = UPIO_MEM; - port->flags = UPF_BOOT_AUTOCONF; - - /* We actually use multiple IRQs, but the serial - framework seems to mainly use this for - informational purposes anyway. Here we use the TX - irq. */ - port->irq = V850E_UART_TX_IRQ (chan); - - /* The serial framework doesn't really use these - membase/mapbase fields for anything useful, but - it requires that they be something non-zero to - consider the port `valid', and also uses them - for informational purposes. */ - port->membase = (void *)V850E_UART_BASE_ADDR (chan); - port->mapbase = V850E_UART_BASE_ADDR (chan); - - /* The framework insists on knowing the uart's master - clock freq, though it doesn't seem to do anything - useful for us with it. We must make it at least - higher than (the maximum baud rate * 16), otherwise - the framework will puke during its internal - calculations, and force the baud rate to be 9600. - To be accurate though, just repeat the calculation - we use when actually setting the speed. */ - port->uartclk = v850e_uart_max_clock() * 16; - - uart_add_one_port (&v850e_uart_driver, port); - } - } - - return rval; -} - -static void __exit v850e_uart_exit (void) -{ - unsigned chan; - - for (chan = 0; chan < V850E_UART_NUM_CHANNELS; chan++) - uart_remove_one_port (&v850e_uart_driver, - &v850e_uart_ports[chan]); - - uart_unregister_driver (&v850e_uart_driver); -} - -module_init (v850e_uart_init); -module_exit (v850e_uart_exit); - -MODULE_AUTHOR ("Miles Bader"); -MODULE_DESCRIPTION ("NEC " V850E_UART_CHIP_NAME " on-chip UART"); -MODULE_LICENSE ("GPL"); diff --git a/drivers/spi/mpc52xx_psc_spi.c b/drivers/spi/mpc52xx_psc_spi.c index 604e5f0a2d9..25eda71f4bf 100644 --- a/drivers/spi/mpc52xx_psc_spi.c +++ b/drivers/spi/mpc52xx_psc_spi.c @@ -148,7 +148,6 @@ static int mpc52xx_psc_spi_transfer_rxtx(struct spi_device *spi, unsigned rfalarm; unsigned send_at_once = MPC52xx_PSC_BUFSIZE; unsigned recv_at_once; - unsigned bpw = mps->bits_per_word / 8; if (!t->tx_buf && !t->rx_buf && t->len) return -EINVAL; @@ -164,22 +163,15 @@ static int mpc52xx_psc_spi_transfer_rxtx(struct spi_device *spi, } dev_dbg(&spi->dev, "send %d bytes...\n", send_at_once); - if (tx_buf) { - for (; send_at_once; sb++, send_at_once--) { - /* set EOF flag */ - if (mps->bits_per_word - && (sb + 1) % bpw == 0) - out_8(&psc->ircr2, 0x01); + for (; send_at_once; sb++, send_at_once--) { + /* set EOF flag before the last word is sent */ + if (send_at_once == 1) + out_8(&psc->ircr2, 0x01); + + if (tx_buf) out_8(&psc->mpc52xx_psc_buffer_8, tx_buf[sb]); - } - } else { - for (; send_at_once; sb++, send_at_once--) { - /* set EOF flag */ - if (mps->bits_per_word - && ((sb + 1) % bpw) == 0) - out_8(&psc->ircr2, 0x01); + else out_8(&psc->mpc52xx_psc_buffer_8, 0); - } } diff --git a/drivers/spi/spi_s3c24xx.c b/drivers/spi/spi_s3c24xx.c index 0885cc357a3..1c643c9e1f1 100644 --- a/drivers/spi/spi_s3c24xx.c +++ b/drivers/spi/spi_s3c24xx.c @@ -270,6 +270,7 @@ static int __init s3c24xx_spi_probe(struct platform_device *pdev) /* setup the master state. */ master->num_chipselect = hw->pdata->num_cs; + master->bus_num = pdata->bus_num; /* setup the state for the bitbang driver */ diff --git a/drivers/usb/gadget/m66592-udc.c b/drivers/usb/gadget/m66592-udc.c index 8da7535c0c7..77b44fb48f0 100644 --- a/drivers/usb/gadget/m66592-udc.c +++ b/drivers/usb/gadget/m66592-udc.c @@ -1593,7 +1593,7 @@ static int __init m66592_probe(struct platform_device *pdev) m66592->gadget.ops = &m66592_gadget_ops; device_initialize(&m66592->gadget.dev); - dev_set_name(&m66592->gadget, "gadget"); + dev_set_name(&m66592->gadget.dev, "gadget"); m66592->gadget.is_dualspeed = 1; m66592->gadget.dev.parent = &pdev->dev; m66592->gadget.dev.dma_mask = pdev->dev.dma_mask; diff --git a/drivers/video/arkfb.c b/drivers/video/arkfb.c index 5001bd4ef46..38a1e8308c8 100644 --- a/drivers/video/arkfb.c +++ b/drivers/video/arkfb.c @@ -1126,11 +1126,8 @@ static int ark_pci_resume (struct pci_dev* dev) acquire_console_sem(); mutex_lock(&(par->open_lock)); - if (par->ref_count == 0) { - mutex_unlock(&(par->open_lock)); - release_console_sem(); - return 0; - } + if (par->ref_count == 0) + goto fail; pci_set_power_state(dev, PCI_D0); pci_restore_state(dev); @@ -1143,8 +1140,8 @@ static int ark_pci_resume (struct pci_dev* dev) arkfb_set_par(info); fb_set_suspend(info, 0); - mutex_unlock(&(par->open_lock)); fail: + mutex_unlock(&(par->open_lock)); release_console_sem(); return 0; } diff --git a/drivers/video/backlight/platform_lcd.c b/drivers/video/backlight/platform_lcd.c index 72d44dbfce8..738694d2388 100644 --- a/drivers/video/backlight/platform_lcd.c +++ b/drivers/video/backlight/platform_lcd.c @@ -92,7 +92,7 @@ static int __devinit platform_lcd_probe(struct platform_device *pdev) plcd->us = dev; plcd->pdata = pdata; - plcd->lcd = lcd_device_register("platform-lcd", dev, + plcd->lcd = lcd_device_register(dev_name(dev), dev, plcd, &platform_lcd_ops); if (IS_ERR(plcd->lcd)) { dev_err(dev, "cannot register lcd device\n"); @@ -101,6 +101,8 @@ static int __devinit platform_lcd_probe(struct platform_device *pdev) } platform_set_drvdata(pdev, plcd); + platform_lcd_set_power(plcd->lcd, FB_BLANK_NORMAL); + return 0; err_mem: diff --git a/drivers/video/console/sticore.c b/drivers/video/console/sticore.c index d7822af0e00..ef7870f5ea0 100644 --- a/drivers/video/console/sticore.c +++ b/drivers/video/console/sticore.c @@ -24,6 +24,7 @@ #include <asm/hardware.h> #include <asm/parisc-device.h> #include <asm/cacheflush.h> +#include <asm/grfioctl.h> #include "../sticore.h" @@ -725,6 +726,7 @@ static int __devinit sti_read_rom(int wordmode, struct sti_struct *sti, { struct sti_cooked_rom *cooked; struct sti_rom *raw = NULL; + unsigned long revno; cooked = kmalloc(sizeof *cooked, GFP_KERNEL); if (!cooked) @@ -767,9 +769,35 @@ static int __devinit sti_read_rom(int wordmode, struct sti_struct *sti, sti->graphics_id[1] = raw->graphics_id[1]; sti_dump_rom(raw); - + + /* check if the ROM routines in this card are compatible */ + if (wordmode || sti->graphics_id[1] != 0x09A02587) + goto ok; + + revno = (raw->revno[0] << 8) | raw->revno[1]; + + switch (sti->graphics_id[0]) { + case S9000_ID_HCRX: + /* HyperA or HyperB ? */ + if (revno == 0x8408 || revno == 0x840b) + goto msg_not_supported; + break; + case CRT_ID_THUNDER: + if (revno == 0x8509) + goto msg_not_supported; + break; + case CRT_ID_THUNDER2: + if (revno == 0x850c) + goto msg_not_supported; + } +ok: return 1; +msg_not_supported: + printk(KERN_ERR "Sorry, this GSC/STI card is not yet supported.\n"); + printk(KERN_ERR "Please see http://parisc-linux.org/faq/" + "graphics-howto.html for more info.\n"); + /* fall through */ out_err: kfree(raw); kfree(cooked); diff --git a/drivers/video/gbefb.c b/drivers/video/gbefb.c index 2e552d5bbb5..f89c3cce1e0 100644 --- a/drivers/video/gbefb.c +++ b/drivers/video/gbefb.c @@ -87,6 +87,8 @@ static int gbe_revision; static int ypan, ywrap; static uint32_t pseudo_palette[16]; +static uint32_t gbe_cmap[256]; +static int gbe_turned_on; /* 0 turned off, 1 turned on */ static char *mode_option __initdata = NULL; @@ -208,6 +210,8 @@ void gbe_turn_off(void) int i; unsigned int val, x, y, vpixen_off; + gbe_turned_on = 0; + /* check if pixel counter is on */ val = gbe->vt_xy; if (GET_GBE_FIELD(VT_XY, FREEZE, val) == 1) @@ -371,6 +375,22 @@ static void gbe_turn_on(void) } if (i == 10000) printk(KERN_ERR "gbefb: turn on DMA timed out\n"); + + gbe_turned_on = 1; +} + +static void gbe_loadcmap(void) +{ + int i, j; + + for (i = 0; i < 256; i++) { + for (j = 0; j < 1000 && gbe->cm_fifo >= 63; j++) + udelay(10); + if (j == 1000) + printk(KERN_ERR "gbefb: cmap FIFO timeout\n"); + + gbe->cmap[i] = gbe_cmap[i]; + } } /* @@ -382,6 +402,7 @@ static int gbefb_blank(int blank, struct fb_info *info) switch (blank) { case FB_BLANK_UNBLANK: /* unblank */ gbe_turn_on(); + gbe_loadcmap(); break; case FB_BLANK_NORMAL: /* blank */ @@ -796,16 +817,10 @@ static int gbefb_set_par(struct fb_info *info) gbe->gmap[i] = (i << 24) | (i << 16) | (i << 8); /* Initialize the color map */ - for (i = 0; i < 256; i++) { - int j; - - for (j = 0; j < 1000 && gbe->cm_fifo >= 63; j++) - udelay(10); - if (j == 1000) - printk(KERN_ERR "gbefb: cmap FIFO timeout\n"); + for (i = 0; i < 256; i++) + gbe_cmap[i] = (i << 8) | (i << 16) | (i << 24); - gbe->cmap[i] = (i << 8) | (i << 16) | (i << 24); - } + gbe_loadcmap(); return 0; } @@ -855,14 +870,17 @@ static int gbefb_setcolreg(unsigned regno, unsigned red, unsigned green, blue >>= 8; if (info->var.bits_per_pixel <= 8) { - /* wait for the color map FIFO to have a free entry */ - for (i = 0; i < 1000 && gbe->cm_fifo >= 63; i++) - udelay(10); - if (i == 1000) { - printk(KERN_ERR "gbefb: cmap FIFO timeout\n"); - return 1; + gbe_cmap[regno] = (red << 24) | (green << 16) | (blue << 8); + if (gbe_turned_on) { + /* wait for the color map FIFO to have a free entry */ + for (i = 0; i < 1000 && gbe->cm_fifo >= 63; i++) + udelay(10); + if (i == 1000) { + printk(KERN_ERR "gbefb: cmap FIFO timeout\n"); + return 1; + } + gbe->cmap[regno] = gbe_cmap[regno]; } - gbe->cmap[regno] = (red << 24) | (green << 16) | (blue << 8); } else if (regno < 16) { switch (info->var.bits_per_pixel) { case 15: diff --git a/drivers/video/sh7760fb.c b/drivers/video/sh7760fb.c index 4d0e28c5790..8d0212da451 100644 --- a/drivers/video/sh7760fb.c +++ b/drivers/video/sh7760fb.c @@ -152,6 +152,7 @@ static int sh7760fb_setcmap(struct fb_cmap *cmap, struct fb_info *info) col |= ((*g) & 0xff) << 8; col |= ((*b) & 0xff); col &= SH7760FB_PALETTE_MASK; + iowrite32(col, par->base + LDPR(s)); if (s < 16) ((u32 *) (info->pseudo_palette))[s] = s; diff --git a/drivers/video/vt8623fb.c b/drivers/video/vt8623fb.c index 536ab11623f..4a484ee98f8 100644 --- a/drivers/video/vt8623fb.c +++ b/drivers/video/vt8623fb.c @@ -853,11 +853,8 @@ static int vt8623_pci_resume(struct pci_dev* dev) acquire_console_sem(); mutex_lock(&(par->open_lock)); - if (par->ref_count == 0) { - mutex_unlock(&(par->open_lock)); - release_console_sem(); - return 0; - } + if (par->ref_count == 0) + goto fail; pci_set_power_state(dev, PCI_D0); pci_restore_state(dev); @@ -870,8 +867,8 @@ static int vt8623_pci_resume(struct pci_dev* dev) vt8623fb_set_par(info); fb_set_suspend(info, 0); - mutex_unlock(&(par->open_lock)); fail: + mutex_unlock(&(par->open_lock)); release_console_sem(); return 0; diff --git a/drivers/watchdog/hpwdt.c b/drivers/watchdog/hpwdt.c index eaa3f2a79ff..ccd6c530782 100644 --- a/drivers/watchdog/hpwdt.c +++ b/drivers/watchdog/hpwdt.c @@ -420,7 +420,7 @@ static int __devinit detect_cru_service(void) static int hpwdt_pretimeout(struct notifier_block *nb, unsigned long ulReason, void *data) { - static unsigned long rom_pl; + unsigned long rom_pl; static int die_nmi_called; if (ulReason != DIE_NMI && ulReason != DIE_NMI_IPI) diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c index 63e2ee63058..c3e174b35fe 100644 --- a/fs/bio-integrity.c +++ b/fs/bio-integrity.c @@ -705,7 +705,6 @@ void __init bio_integrity_init_slab(void) bio_integrity_slab = KMEM_CACHE(bio_integrity_payload, SLAB_HWCACHE_ALIGN|SLAB_PANIC); } -EXPORT_SYMBOL(bio_integrity_init_slab); static int __init integrity_init(void) { diff --git a/fs/buffer.c b/fs/buffer.c index f9580501963..4dbe52948e8 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -580,7 +580,7 @@ EXPORT_SYMBOL(mark_buffer_async_write); /* * The buffer's backing address_space's private_lock must be held */ -static inline void __remove_assoc_queue(struct buffer_head *bh) +static void __remove_assoc_queue(struct buffer_head *bh) { list_del_init(&bh->b_assoc_buffers); WARN_ON(!bh->b_assoc_map); @@ -2096,6 +2096,52 @@ int generic_write_end(struct file *file, struct address_space *mapping, EXPORT_SYMBOL(generic_write_end); /* + * block_is_partially_uptodate checks whether buffers within a page are + * uptodate or not. + * + * Returns true if all buffers which correspond to a file portion + * we want to read are uptodate. + */ +int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc, + unsigned long from) +{ + struct inode *inode = page->mapping->host; + unsigned block_start, block_end, blocksize; + unsigned to; + struct buffer_head *bh, *head; + int ret = 1; + + if (!page_has_buffers(page)) + return 0; + + blocksize = 1 << inode->i_blkbits; + to = min_t(unsigned, PAGE_CACHE_SIZE - from, desc->count); + to = from + to; + if (from < blocksize && to > PAGE_CACHE_SIZE - blocksize) + return 0; + + head = page_buffers(page); + bh = head; + block_start = 0; + do { + block_end = block_start + blocksize; + if (block_end > from && block_start < to) { + if (!buffer_uptodate(bh)) { + ret = 0; + break; + } + if (block_end >= to) + break; + } + block_start = block_end; + bh = bh->b_this_page; + } while (bh != head); + + return ret; +} +EXPORT_SYMBOL(block_is_partially_uptodate); + +/* * Generic "read page" function for block devices that have the normal * get_block functionality. This is most of the block device filesystems. * Reads the page asynchronously --- the unlock_buffer() and diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index 7b99917ffad..06db79d05c1 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -475,8 +475,8 @@ int ecryptfs_encrypt_page(struct page *page) { struct inode *ecryptfs_inode; struct ecryptfs_crypt_stat *crypt_stat; - char *enc_extent_virt = NULL; - struct page *enc_extent_page; + char *enc_extent_virt; + struct page *enc_extent_page = NULL; loff_t extent_offset; int rc = 0; @@ -492,14 +492,14 @@ int ecryptfs_encrypt_page(struct page *page) page->index); goto out; } - enc_extent_virt = kmalloc(PAGE_CACHE_SIZE, GFP_USER); - if (!enc_extent_virt) { + enc_extent_page = alloc_page(GFP_USER); + if (!enc_extent_page) { rc = -ENOMEM; ecryptfs_printk(KERN_ERR, "Error allocating memory for " "encrypted extent\n"); goto out; } - enc_extent_page = virt_to_page(enc_extent_virt); + enc_extent_virt = kmap(enc_extent_page); for (extent_offset = 0; extent_offset < (PAGE_CACHE_SIZE / crypt_stat->extent_size); extent_offset++) { @@ -527,7 +527,10 @@ int ecryptfs_encrypt_page(struct page *page) } } out: - kfree(enc_extent_virt); + if (enc_extent_page) { + kunmap(enc_extent_page); + __free_page(enc_extent_page); + } return rc; } @@ -609,8 +612,8 @@ int ecryptfs_decrypt_page(struct page *page) { struct inode *ecryptfs_inode; struct ecryptfs_crypt_stat *crypt_stat; - char *enc_extent_virt = NULL; - struct page *enc_extent_page; + char *enc_extent_virt; + struct page *enc_extent_page = NULL; unsigned long extent_offset; int rc = 0; @@ -627,14 +630,14 @@ int ecryptfs_decrypt_page(struct page *page) page->index); goto out; } - enc_extent_virt = kmalloc(PAGE_CACHE_SIZE, GFP_USER); - if (!enc_extent_virt) { + enc_extent_page = alloc_page(GFP_USER); + if (!enc_extent_page) { rc = -ENOMEM; ecryptfs_printk(KERN_ERR, "Error allocating memory for " "encrypted extent\n"); goto out; } - enc_extent_page = virt_to_page(enc_extent_virt); + enc_extent_virt = kmap(enc_extent_page); for (extent_offset = 0; extent_offset < (PAGE_CACHE_SIZE / crypt_stat->extent_size); extent_offset++) { @@ -662,7 +665,10 @@ int ecryptfs_decrypt_page(struct page *page) } } out: - kfree(enc_extent_virt); + if (enc_extent_page) { + kunmap(enc_extent_page); + __free_page(enc_extent_page); + } return rc; } diff --git a/fs/exec.c b/fs/exec.c index 9696bbf0f0b..32993beecbe 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -32,6 +32,7 @@ #include <linux/swap.h> #include <linux/string.h> #include <linux/init.h> +#include <linux/pagemap.h> #include <linux/highmem.h> #include <linux/spinlock.h> #include <linux/key.h> diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 384fc0d1dd7..991d6dfeb51 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -791,6 +791,7 @@ const struct address_space_operations ext2_aops = { .direct_IO = ext2_direct_IO, .writepages = ext2_writepages, .migratepage = buffer_migrate_page, + .is_partially_uptodate = block_is_partially_uptodate, }; const struct address_space_operations ext2_aops_xip = { diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 3bf07d70b91..507d8689b11 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -1767,44 +1767,47 @@ static int ext3_journalled_set_page_dirty(struct page *page) } static const struct address_space_operations ext3_ordered_aops = { - .readpage = ext3_readpage, - .readpages = ext3_readpages, - .writepage = ext3_ordered_writepage, - .sync_page = block_sync_page, - .write_begin = ext3_write_begin, - .write_end = ext3_ordered_write_end, - .bmap = ext3_bmap, - .invalidatepage = ext3_invalidatepage, - .releasepage = ext3_releasepage, - .direct_IO = ext3_direct_IO, - .migratepage = buffer_migrate_page, + .readpage = ext3_readpage, + .readpages = ext3_readpages, + .writepage = ext3_ordered_writepage, + .sync_page = block_sync_page, + .write_begin = ext3_write_begin, + .write_end = ext3_ordered_write_end, + .bmap = ext3_bmap, + .invalidatepage = ext3_invalidatepage, + .releasepage = ext3_releasepage, + .direct_IO = ext3_direct_IO, + .migratepage = buffer_migrate_page, + .is_partially_uptodate = block_is_partially_uptodate, }; static const struct address_space_operations ext3_writeback_aops = { - .readpage = ext3_readpage, - .readpages = ext3_readpages, - .writepage = ext3_writeback_writepage, - .sync_page = block_sync_page, - .write_begin = ext3_write_begin, - .write_end = ext3_writeback_write_end, - .bmap = ext3_bmap, - .invalidatepage = ext3_invalidatepage, - .releasepage = ext3_releasepage, - .direct_IO = ext3_direct_IO, - .migratepage = buffer_migrate_page, + .readpage = ext3_readpage, + .readpages = ext3_readpages, + .writepage = ext3_writeback_writepage, + .sync_page = block_sync_page, + .write_begin = ext3_write_begin, + .write_end = ext3_writeback_write_end, + .bmap = ext3_bmap, + .invalidatepage = ext3_invalidatepage, + .releasepage = ext3_releasepage, + .direct_IO = ext3_direct_IO, + .migratepage = buffer_migrate_page, + .is_partially_uptodate = block_is_partially_uptodate, }; static const struct address_space_operations ext3_journalled_aops = { - .readpage = ext3_readpage, - .readpages = ext3_readpages, - .writepage = ext3_journalled_writepage, - .sync_page = block_sync_page, - .write_begin = ext3_write_begin, - .write_end = ext3_journalled_write_end, - .set_page_dirty = ext3_journalled_set_page_dirty, - .bmap = ext3_bmap, - .invalidatepage = ext3_invalidatepage, - .releasepage = ext3_releasepage, + .readpage = ext3_readpage, + .readpages = ext3_readpages, + .writepage = ext3_journalled_writepage, + .sync_page = block_sync_page, + .write_begin = ext3_write_begin, + .write_end = ext3_journalled_write_end, + .set_page_dirty = ext3_journalled_set_page_dirty, + .bmap = ext3_bmap, + .invalidatepage = ext3_invalidatepage, + .releasepage = ext3_releasepage, + .is_partially_uptodate = block_is_partially_uptodate, }; void ext3_set_aops(struct inode *inode) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 8ca2763df09..9843b046c23 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2806,59 +2806,63 @@ static int ext4_journalled_set_page_dirty(struct page *page) } static const struct address_space_operations ext4_ordered_aops = { - .readpage = ext4_readpage, - .readpages = ext4_readpages, - .writepage = ext4_normal_writepage, - .sync_page = block_sync_page, - .write_begin = ext4_write_begin, - .write_end = ext4_ordered_write_end, - .bmap = ext4_bmap, - .invalidatepage = ext4_invalidatepage, - .releasepage = ext4_releasepage, - .direct_IO = ext4_direct_IO, - .migratepage = buffer_migrate_page, + .readpage = ext4_readpage, + .readpages = ext4_readpages, + .writepage = ext4_normal_writepage, + .sync_page = block_sync_page, + .write_begin = ext4_write_begin, + .write_end = ext4_ordered_write_end, + .bmap = ext4_bmap, + .invalidatepage = ext4_invalidatepage, + .releasepage = ext4_releasepage, + .direct_IO = ext4_direct_IO, + .migratepage = buffer_migrate_page, + .is_partially_uptodate = block_is_partially_uptodate, }; static const struct address_space_operations ext4_writeback_aops = { - .readpage = ext4_readpage, - .readpages = ext4_readpages, - .writepage = ext4_normal_writepage, - .sync_page = block_sync_page, - .write_begin = ext4_write_begin, - .write_end = ext4_writeback_write_end, - .bmap = ext4_bmap, - .invalidatepage = ext4_invalidatepage, - .releasepage = ext4_releasepage, - .direct_IO = ext4_direct_IO, - .migratepage = buffer_migrate_page, + .readpage = ext4_readpage, + .readpages = ext4_readpages, + .writepage = ext4_normal_writepage, + .sync_page = block_sync_page, + .write_begin = ext4_write_begin, + .write_end = ext4_writeback_write_end, + .bmap = ext4_bmap, + .invalidatepage = ext4_invalidatepage, + .releasepage = ext4_releasepage, + .direct_IO = ext4_direct_IO, + .migratepage = buffer_migrate_page, + .is_partially_uptodate = block_is_partially_uptodate, }; static const struct address_space_operations ext4_journalled_aops = { - .readpage = ext4_readpage, - .readpages = ext4_readpages, - .writepage = ext4_journalled_writepage, - .sync_page = block_sync_page, - .write_begin = ext4_write_begin, - .write_end = ext4_journalled_write_end, - .set_page_dirty = ext4_journalled_set_page_dirty, - .bmap = ext4_bmap, - .invalidatepage = ext4_invalidatepage, - .releasepage = ext4_releasepage, + .readpage = ext4_readpage, + .readpages = ext4_readpages, + .writepage = ext4_journalled_writepage, + .sync_page = block_sync_page, + .write_begin = ext4_write_begin, + .write_end = ext4_journalled_write_end, + .set_page_dirty = ext4_journalled_set_page_dirty, + .bmap = ext4_bmap, + .invalidatepage = ext4_invalidatepage, + .releasepage = ext4_releasepage, + .is_partially_uptodate = block_is_partially_uptodate, }; static const struct address_space_operations ext4_da_aops = { - .readpage = ext4_readpage, - .readpages = ext4_readpages, - .writepage = ext4_da_writepage, - .writepages = ext4_da_writepages, - .sync_page = block_sync_page, - .write_begin = ext4_da_write_begin, - .write_end = ext4_da_write_end, - .bmap = ext4_bmap, - .invalidatepage = ext4_da_invalidatepage, - .releasepage = ext4_releasepage, - .direct_IO = ext4_direct_IO, - .migratepage = buffer_migrate_page, + .readpage = ext4_readpage, + .readpages = ext4_readpages, + .writepage = ext4_da_writepage, + .writepages = ext4_da_writepages, + .sync_page = block_sync_page, + .write_begin = ext4_da_write_begin, + .write_end = ext4_da_write_end, + .bmap = ext4_bmap, + .invalidatepage = ext4_da_invalidatepage, + .releasepage = ext4_releasepage, + .direct_IO = ext4_direct_IO, + .migratepage = buffer_migrate_page, + .is_partially_uptodate = block_is_partially_uptodate, }; void ext4_set_aops(struct inode *inode) diff --git a/fs/libfs.c b/fs/libfs.c index baeb71ee1cd..1add676a19d 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -216,8 +216,8 @@ int get_sb_pseudo(struct file_system_type *fs_type, char *name, s->s_flags = MS_NOUSER; s->s_maxbytes = ~0ULL; - s->s_blocksize = 1024; - s->s_blocksize_bits = 10; + s->s_blocksize = PAGE_SIZE; + s->s_blocksize_bits = PAGE_SHIFT; s->s_magic = magic; s->s_op = ops ? ops : &simple_super_operations; s->s_time_gran = 1; diff --git a/fs/omfs/bitmap.c b/fs/omfs/bitmap.c index dc75f22be3f..697663b01ba 100644 --- a/fs/omfs/bitmap.c +++ b/fs/omfs/bitmap.c @@ -71,10 +71,10 @@ static int set_run(struct super_block *sb, int map, } if (set) { set_bit(bit, sbi->s_imap[map]); - set_bit(bit, (long *) bh->b_data); + set_bit(bit, (unsigned long *)bh->b_data); } else { clear_bit(bit, sbi->s_imap[map]); - clear_bit(bit, (long *) bh->b_data); + clear_bit(bit, (unsigned long *)bh->b_data); } } mark_buffer_dirty(bh); @@ -109,7 +109,7 @@ int omfs_allocate_block(struct super_block *sb, u64 block) if (!bh) goto out; - set_bit(bit, (long *) bh->b_data); + set_bit(bit, (unsigned long *)bh->b_data); mark_buffer_dirty(bh); brelse(bh); } diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c index 05a5bc31e4b..c0757e99887 100644 --- a/fs/omfs/dir.c +++ b/fs/omfs/dir.c @@ -104,7 +104,7 @@ int omfs_make_empty(struct inode *inode, struct super_block *sb) oi = (struct omfs_inode *) bh->b_data; oi->i_head.h_self = cpu_to_be64(inode->i_ino); - oi->i_sibling = ~0ULL; + oi->i_sibling = ~cpu_to_be64(0ULL); mark_buffer_dirty(bh); brelse(bh); diff --git a/fs/omfs/file.c b/fs/omfs/file.c index 66e01fae438..7e2499053e4 100644 --- a/fs/omfs/file.c +++ b/fs/omfs/file.c @@ -30,11 +30,11 @@ void omfs_make_empty_table(struct buffer_head *bh, int offset) { struct omfs_extent *oe = (struct omfs_extent *) &bh->b_data[offset]; - oe->e_next = ~0ULL; + oe->e_next = ~cpu_to_be64(0ULL); oe->e_extent_count = cpu_to_be32(1), oe->e_fill = cpu_to_be32(0x22), - oe->e_entry.e_cluster = ~0ULL; - oe->e_entry.e_blocks = ~0ULL; + oe->e_entry.e_cluster = ~cpu_to_be64(0ULL); + oe->e_entry.e_blocks = ~cpu_to_be64(0ULL); } int omfs_shrink_inode(struct inode *inode) diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c index 8e51a2aaa97..60d2f822e87 100644 --- a/fs/romfs/inode.c +++ b/fs/romfs/inode.c @@ -418,7 +418,8 @@ static int romfs_readpage(struct file *file, struct page * page) { struct inode *inode = page->mapping->host; - loff_t offset, avail, readlen; + loff_t offset, size; + unsigned long filled; void *buf; int result = -EIO; @@ -430,21 +431,29 @@ romfs_readpage(struct file *file, struct page * page) /* 32 bit warning -- but not for us :) */ offset = page_offset(page); - if (offset < i_size_read(inode)) { - avail = inode->i_size-offset; - readlen = min_t(unsigned long, avail, PAGE_SIZE); - if (romfs_copyfrom(inode, buf, ROMFS_I(inode)->i_dataoffset+offset, readlen) == readlen) { - if (readlen < PAGE_SIZE) { - memset(buf + readlen,0,PAGE_SIZE-readlen); - } - SetPageUptodate(page); - result = 0; + size = i_size_read(inode); + filled = 0; + result = 0; + if (offset < size) { + unsigned long readlen; + + size -= offset; + readlen = size > PAGE_SIZE ? PAGE_SIZE : size; + + filled = romfs_copyfrom(inode, buf, ROMFS_I(inode)->i_dataoffset+offset, readlen); + + if (filled != readlen) { + SetPageError(page); + filled = 0; + result = -EIO; } } - if (result) { - memset(buf, 0, PAGE_SIZE); - SetPageError(page); - } + + if (filled < PAGE_SIZE) + memset(buf + filled, 0, PAGE_SIZE-filled); + + if (!result) + SetPageUptodate(page); flush_dcache_page(page); unlock_page(page); diff --git a/include/acpi/actbl.h b/include/acpi/actbl.h index 1ebbe883f78..13a3d9ad92d 100644 --- a/include/acpi/actbl.h +++ b/include/acpi/actbl.h @@ -277,6 +277,7 @@ enum acpi_prefered_pm_profiles { #define BAF_LEGACY_DEVICES 0x0001 #define BAF_8042_KEYBOARD_CONTROLLER 0x0002 #define BAF_MSI_NOT_SUPPORTED 0x0008 +#define BAF_PCIE_ASPM_CONTROL 0x0010 #define FADT2_REVISION_ID 3 #define FADT2_MINUS_REVISION_ID 2 diff --git a/include/asm-arm/arch-s3c2410/spi.h b/include/asm-arm/arch-s3c2410/spi.h index 352d33860b6..442169887d3 100644 --- a/include/asm-arm/arch-s3c2410/spi.h +++ b/include/asm-arm/arch-s3c2410/spi.h @@ -16,6 +16,7 @@ struct s3c2410_spi_info { unsigned long pin_cs; /* simple gpio cs */ unsigned int num_cs; /* total chipselects */ + int bus_num; /* bus number to use. */ void (*set_cs)(struct s3c2410_spi_info *spi, int cs, int pol); }; diff --git a/include/asm-arm/dma-mapping.h b/include/asm-arm/dma-mapping.h index f41335ba633..45329fca1b6 100644 --- a/include/asm-arm/dma-mapping.h +++ b/include/asm-arm/dma-mapping.h @@ -7,6 +7,8 @@ #include <linux/scatterlist.h> +#include <asm-generic/dma-coherent.h> + /* * DMA-consistent mapping functions. These allocate/free a region of * uncached, unwrite-buffered mapped memory space for use with DMA diff --git a/include/asm-cris/dma-mapping.h b/include/asm-cris/dma-mapping.h index cb2fb25ff8d..da8ef8e8f84 100644 --- a/include/asm-cris/dma-mapping.h +++ b/include/asm-cris/dma-mapping.h @@ -14,6 +14,8 @@ #define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) #ifdef CONFIG_PCI +#include <asm-generic/dma-coherent.h> + void *dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag); diff --git a/include/asm-generic/dma-coherent.h b/include/asm-generic/dma-coherent.h new file mode 100644 index 00000000000..85a3ffaa024 --- /dev/null +++ b/include/asm-generic/dma-coherent.h @@ -0,0 +1,32 @@ +#ifndef DMA_COHERENT_H +#define DMA_COHERENT_H + +#ifdef CONFIG_HAVE_GENERIC_DMA_COHERENT +/* + * These two functions are only for dma allocator. + * Don't use them in device drivers. + */ +int dma_alloc_from_coherent(struct device *dev, ssize_t size, + dma_addr_t *dma_handle, void **ret); +int dma_release_from_coherent(struct device *dev, int order, void *vaddr); + +/* + * Standard interface + */ +#define ARCH_HAS_DMA_DECLARE_COHERENT_MEMORY +extern int +dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr, + dma_addr_t device_addr, size_t size, int flags); + +extern void +dma_release_declared_memory(struct device *dev); + +extern void * +dma_mark_declared_memory_occupied(struct device *dev, + dma_addr_t device_addr, size_t size); +#else +#define dma_alloc_from_coherent(dev, size, handle, ret) (0) +#define dma_release_from_coherent(dev, order, vaddr) (0) +#endif + +#endif diff --git a/include/asm-generic/gpio.h b/include/asm-generic/gpio.h index c764a8fcb05..0f99ad38b01 100644 --- a/include/asm-generic/gpio.h +++ b/include/asm-generic/gpio.h @@ -2,6 +2,7 @@ #define _ASM_GENERIC_GPIO_H #include <linux/types.h> +#include <linux/errno.h> #ifdef CONFIG_GPIOLIB diff --git a/include/asm-generic/pgtable-nopmd.h b/include/asm-generic/pgtable-nopmd.h index 087325ede76..a7cdc48e8b7 100644 --- a/include/asm-generic/pgtable-nopmd.h +++ b/include/asm-generic/pgtable-nopmd.h @@ -5,6 +5,8 @@ #include <asm-generic/pgtable-nopud.h> +struct mm_struct; + #define __PAGETABLE_PMD_FOLDED /* @@ -54,7 +56,9 @@ static inline pmd_t * pmd_offset(pud_t * pud, unsigned long address) * inside the pud, so has no extra memory associated with it. */ #define pmd_alloc_one(mm, address) NULL -#define pmd_free(mm, x) do { } while (0) +static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) +{ +} #define __pmd_free_tlb(tlb, x) do { } while (0) #undef pmd_addr_end diff --git a/include/asm-ia64/sn/mspec.h b/include/asm-ia64/sn/mspec.h index dbe13c6121a..c1d3c50c322 100644 --- a/include/asm-ia64/sn/mspec.h +++ b/include/asm-ia64/sn/mspec.h @@ -4,7 +4,7 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (c) 2001-2004 Silicon Graphics, Inc. All rights reserved. + * Copyright (c) 2001-2008 Silicon Graphics, Inc. All rights reserved. */ #ifndef _ASM_IA64_SN_MSPEC_H @@ -32,26 +32,26 @@ #ifdef __KERNEL__ /* - * Each Atomic Memory Operation (AMO formerly known as fetchop) + * Each Atomic Memory Operation (amo, formerly known as fetchop) * variable is 64 bytes long. The first 8 bytes are used. The * remaining 56 bytes are unaddressable due to the operation taking * that portion of the address. * - * NOTE: The AMO_t _MUST_ be placed in either the first or second half - * of the cache line. The cache line _MUST NOT_ be used for anything - * other than additional AMO_t entries. This is because there are two + * NOTE: The amo structure _MUST_ be placed in either the first or second + * half of the cache line. The cache line _MUST NOT_ be used for anything + * other than additional amo entries. This is because there are two * addresses which reference the same physical cache line. One will * be a cached entry with the memory type bits all set. This address - * may be loaded into processor cache. The AMO_t will be referenced + * may be loaded into processor cache. The amo will be referenced * uncached via the memory special memory type. If any portion of the * cached cache-line is modified, when that line is flushed, it will * overwrite the uncached value in physical memory and lead to * inconsistency. */ -typedef struct { +struct amo { u64 variable; u64 unused[7]; -} AMO_t; +}; #endif /* __KERNEL__ */ diff --git a/include/asm-mips/gdb-stub.h b/include/asm-mips/gdb-stub.h deleted file mode 100644 index 22f67d4a71a..00000000000 --- a/include/asm-mips/gdb-stub.h +++ /dev/null @@ -1,215 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 1995 Andreas Busse - * Copyright (C) 2003 Ralf Baechle - */ -#ifndef _ASM_GDB_STUB_H -#define _ASM_GDB_STUB_H - - -/* - * important register numbers - */ - -#define REG_EPC 37 -#define REG_FP 72 -#define REG_SP 29 - -/* - * Stack layout for the GDB exception handler - * Derived from the stack layout described in asm-mips/stackframe.h - * - * The first PTRSIZE*6 bytes are argument save space for C subroutines. - */ -#define NUMREGS 90 - -#define GDB_FR_REG0 (PTRSIZE*6) /* 0 */ -#define GDB_FR_REG1 ((GDB_FR_REG0) + LONGSIZE) /* 1 */ -#define GDB_FR_REG2 ((GDB_FR_REG1) + LONGSIZE) /* 2 */ -#define GDB_FR_REG3 ((GDB_FR_REG2) + LONGSIZE) /* 3 */ -#define GDB_FR_REG4 ((GDB_FR_REG3) + LONGSIZE) /* 4 */ -#define GDB_FR_REG5 ((GDB_FR_REG4) + LONGSIZE) /* 5 */ -#define GDB_FR_REG6 ((GDB_FR_REG5) + LONGSIZE) /* 6 */ -#define GDB_FR_REG7 ((GDB_FR_REG6) + LONGSIZE) /* 7 */ -#define GDB_FR_REG8 ((GDB_FR_REG7) + LONGSIZE) /* 8 */ -#define GDB_FR_REG9 ((GDB_FR_REG8) + LONGSIZE) /* 9 */ -#define GDB_FR_REG10 ((GDB_FR_REG9) + LONGSIZE) /* 10 */ -#define GDB_FR_REG11 ((GDB_FR_REG10) + LONGSIZE) /* 11 */ -#define GDB_FR_REG12 ((GDB_FR_REG11) + LONGSIZE) /* 12 */ -#define GDB_FR_REG13 ((GDB_FR_REG12) + LONGSIZE) /* 13 */ -#define GDB_FR_REG14 ((GDB_FR_REG13) + LONGSIZE) /* 14 */ -#define GDB_FR_REG15 ((GDB_FR_REG14) + LONGSIZE) /* 15 */ -#define GDB_FR_REG16 ((GDB_FR_REG15) + LONGSIZE) /* 16 */ -#define GDB_FR_REG17 ((GDB_FR_REG16) + LONGSIZE) /* 17 */ -#define GDB_FR_REG18 ((GDB_FR_REG17) + LONGSIZE) /* 18 */ -#define GDB_FR_REG19 ((GDB_FR_REG18) + LONGSIZE) /* 19 */ -#define GDB_FR_REG20 ((GDB_FR_REG19) + LONGSIZE) /* 20 */ -#define GDB_FR_REG21 ((GDB_FR_REG20) + LONGSIZE) /* 21 */ -#define GDB_FR_REG22 ((GDB_FR_REG21) + LONGSIZE) /* 22 */ -#define GDB_FR_REG23 ((GDB_FR_REG22) + LONGSIZE) /* 23 */ -#define GDB_FR_REG24 ((GDB_FR_REG23) + LONGSIZE) /* 24 */ -#define GDB_FR_REG25 ((GDB_FR_REG24) + LONGSIZE) /* 25 */ -#define GDB_FR_REG26 ((GDB_FR_REG25) + LONGSIZE) /* 26 */ -#define GDB_FR_REG27 ((GDB_FR_REG26) + LONGSIZE) /* 27 */ -#define GDB_FR_REG28 ((GDB_FR_REG27) + LONGSIZE) /* 28 */ -#define GDB_FR_REG29 ((GDB_FR_REG28) + LONGSIZE) /* 29 */ -#define GDB_FR_REG30 ((GDB_FR_REG29) + LONGSIZE) /* 30 */ -#define GDB_FR_REG31 ((GDB_FR_REG30) + LONGSIZE) /* 31 */ - -/* - * Saved special registers - */ -#define GDB_FR_STATUS ((GDB_FR_REG31) + LONGSIZE) /* 32 */ -#define GDB_FR_LO ((GDB_FR_STATUS) + LONGSIZE) /* 33 */ -#define GDB_FR_HI ((GDB_FR_LO) + LONGSIZE) /* 34 */ -#define GDB_FR_BADVADDR ((GDB_FR_HI) + LONGSIZE) /* 35 */ -#define GDB_FR_CAUSE ((GDB_FR_BADVADDR) + LONGSIZE) /* 36 */ -#define GDB_FR_EPC ((GDB_FR_CAUSE) + LONGSIZE) /* 37 */ - -/* - * Saved floating point registers - */ -#define GDB_FR_FPR0 ((GDB_FR_EPC) + LONGSIZE) /* 38 */ -#define GDB_FR_FPR1 ((GDB_FR_FPR0) + LONGSIZE) /* 39 */ -#define GDB_FR_FPR2 ((GDB_FR_FPR1) + LONGSIZE) /* 40 */ -#define GDB_FR_FPR3 ((GDB_FR_FPR2) + LONGSIZE) /* 41 */ -#define GDB_FR_FPR4 ((GDB_FR_FPR3) + LONGSIZE) /* 42 */ -#define GDB_FR_FPR5 ((GDB_FR_FPR4) + LONGSIZE) /* 43 */ -#define GDB_FR_FPR6 ((GDB_FR_FPR5) + LONGSIZE) /* 44 */ -#define GDB_FR_FPR7 ((GDB_FR_FPR6) + LONGSIZE) /* 45 */ -#define GDB_FR_FPR8 ((GDB_FR_FPR7) + LONGSIZE) /* 46 */ -#define GDB_FR_FPR9 ((GDB_FR_FPR8) + LONGSIZE) /* 47 */ -#define GDB_FR_FPR10 ((GDB_FR_FPR9) + LONGSIZE) /* 48 */ -#define GDB_FR_FPR11 ((GDB_FR_FPR10) + LONGSIZE) /* 49 */ -#define GDB_FR_FPR12 ((GDB_FR_FPR11) + LONGSIZE) /* 50 */ -#define GDB_FR_FPR13 ((GDB_FR_FPR12) + LONGSIZE) /* 51 */ -#define GDB_FR_FPR14 ((GDB_FR_FPR13) + LONGSIZE) /* 52 */ -#define GDB_FR_FPR15 ((GDB_FR_FPR14) + LONGSIZE) /* 53 */ -#define GDB_FR_FPR16 ((GDB_FR_FPR15) + LONGSIZE) /* 54 */ -#define GDB_FR_FPR17 ((GDB_FR_FPR16) + LONGSIZE) /* 55 */ -#define GDB_FR_FPR18 ((GDB_FR_FPR17) + LONGSIZE) /* 56 */ -#define GDB_FR_FPR19 ((GDB_FR_FPR18) + LONGSIZE) /* 57 */ -#define GDB_FR_FPR20 ((GDB_FR_FPR19) + LONGSIZE) /* 58 */ -#define GDB_FR_FPR21 ((GDB_FR_FPR20) + LONGSIZE) /* 59 */ -#define GDB_FR_FPR22 ((GDB_FR_FPR21) + LONGSIZE) /* 60 */ -#define GDB_FR_FPR23 ((GDB_FR_FPR22) + LONGSIZE) /* 61 */ -#define GDB_FR_FPR24 ((GDB_FR_FPR23) + LONGSIZE) /* 62 */ -#define GDB_FR_FPR25 ((GDB_FR_FPR24) + LONGSIZE) /* 63 */ -#define GDB_FR_FPR26 ((GDB_FR_FPR25) + LONGSIZE) /* 64 */ -#define GDB_FR_FPR27 ((GDB_FR_FPR26) + LONGSIZE) /* 65 */ -#define GDB_FR_FPR28 ((GDB_FR_FPR27) + LONGSIZE) /* 66 */ -#define GDB_FR_FPR29 ((GDB_FR_FPR28) + LONGSIZE) /* 67 */ -#define GDB_FR_FPR30 ((GDB_FR_FPR29) + LONGSIZE) /* 68 */ -#define GDB_FR_FPR31 ((GDB_FR_FPR30) + LONGSIZE) /* 69 */ - -#define GDB_FR_FSR ((GDB_FR_FPR31) + LONGSIZE) /* 70 */ -#define GDB_FR_FIR ((GDB_FR_FSR) + LONGSIZE) /* 71 */ -#define GDB_FR_FRP ((GDB_FR_FIR) + LONGSIZE) /* 72 */ - -#define GDB_FR_DUMMY ((GDB_FR_FRP) + LONGSIZE) /* 73, unused ??? */ - -/* - * Again, CP0 registers - */ -#define GDB_FR_CP0_INDEX ((GDB_FR_DUMMY) + LONGSIZE) /* 74 */ -#define GDB_FR_CP0_RANDOM ((GDB_FR_CP0_INDEX) + LONGSIZE) /* 75 */ -#define GDB_FR_CP0_ENTRYLO0 ((GDB_FR_CP0_RANDOM) + LONGSIZE)/* 76 */ -#define GDB_FR_CP0_ENTRYLO1 ((GDB_FR_CP0_ENTRYLO0) + LONGSIZE)/* 77 */ -#define GDB_FR_CP0_CONTEXT ((GDB_FR_CP0_ENTRYLO1) + LONGSIZE)/* 78 */ -#define GDB_FR_CP0_PAGEMASK ((GDB_FR_CP0_CONTEXT) + LONGSIZE)/* 79 */ -#define GDB_FR_CP0_WIRED ((GDB_FR_CP0_PAGEMASK) + LONGSIZE)/* 80 */ -#define GDB_FR_CP0_REG7 ((GDB_FR_CP0_WIRED) + LONGSIZE) /* 81 */ -#define GDB_FR_CP0_REG8 ((GDB_FR_CP0_REG7) + LONGSIZE) /* 82 */ -#define GDB_FR_CP0_REG9 ((GDB_FR_CP0_REG8) + LONGSIZE) /* 83 */ -#define GDB_FR_CP0_ENTRYHI ((GDB_FR_CP0_REG9) + LONGSIZE) /* 84 */ -#define GDB_FR_CP0_REG11 ((GDB_FR_CP0_ENTRYHI) + LONGSIZE)/* 85 */ -#define GDB_FR_CP0_REG12 ((GDB_FR_CP0_REG11) + LONGSIZE) /* 86 */ -#define GDB_FR_CP0_REG13 ((GDB_FR_CP0_REG12) + LONGSIZE) /* 87 */ -#define GDB_FR_CP0_REG14 ((GDB_FR_CP0_REG13) + LONGSIZE) /* 88 */ -#define GDB_FR_CP0_PRID ((GDB_FR_CP0_REG14) + LONGSIZE) /* 89 */ - -#define GDB_FR_SIZE ((((GDB_FR_CP0_PRID) + LONGSIZE) + (PTRSIZE-1)) & ~(PTRSIZE-1)) - -#ifndef __ASSEMBLY__ - -/* - * This is the same as above, but for the high-level - * part of the GDB stub. - */ - -struct gdb_regs { - /* - * Pad bytes for argument save space on the stack - * 24/48 Bytes for 32/64 bit code - */ - unsigned long pad0[6]; - - /* - * saved main processor registers - */ - long reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7; - long reg8, reg9, reg10, reg11, reg12, reg13, reg14, reg15; - long reg16, reg17, reg18, reg19, reg20, reg21, reg22, reg23; - long reg24, reg25, reg26, reg27, reg28, reg29, reg30, reg31; - - /* - * Saved special registers - */ - long cp0_status; - long lo; - long hi; - long cp0_badvaddr; - long cp0_cause; - long cp0_epc; - - /* - * Saved floating point registers - */ - long fpr0, fpr1, fpr2, fpr3, fpr4, fpr5, fpr6, fpr7; - long fpr8, fpr9, fpr10, fpr11, fpr12, fpr13, fpr14, fpr15; - long fpr16, fpr17, fpr18, fpr19, fpr20, fpr21, fpr22, fpr23; - long fpr24, fpr25, fpr26, fpr27, fpr28, fpr29, fpr30, fpr31; - - long cp1_fsr; - long cp1_fir; - - /* - * Frame pointer - */ - long frame_ptr; - long dummy; /* unused */ - - /* - * saved cp0 registers - */ - long cp0_index; - long cp0_random; - long cp0_entrylo0; - long cp0_entrylo1; - long cp0_context; - long cp0_pagemask; - long cp0_wired; - long cp0_reg7; - long cp0_reg8; - long cp0_reg9; - long cp0_entryhi; - long cp0_reg11; - long cp0_reg12; - long cp0_reg13; - long cp0_reg14; - long cp0_prid; -}; - -/* - * Prototypes - */ - -extern int kgdb_enabled; -void set_debug_traps(void); -void set_async_breakpoint(unsigned long *epc); - -#endif /* !__ASSEMBLY__ */ -#endif /* _ASM_GDB_STUB_H */ diff --git a/include/asm-mips/kdebug.h b/include/asm-mips/kdebug.h index 6ece1b03766..5bf62aafc89 100644 --- a/include/asm-mips/kdebug.h +++ b/include/asm-mips/kdebug.h @@ -1 +1,13 @@ -#include <asm-generic/kdebug.h> +#ifndef _ASM_MIPS_KDEBUG_H +#define _ASM_MIPS_KDEBUG_H + +#include <linux/notifier.h> + +enum die_val { + DIE_OOPS = 1, + DIE_FP, + DIE_TRAP, + DIE_RI, +}; + +#endif /* _ASM_MIPS_KDEBUG_H */ diff --git a/include/asm-mips/kgdb.h b/include/asm-mips/kgdb.h new file mode 100644 index 00000000000..48223b09396 --- /dev/null +++ b/include/asm-mips/kgdb.h @@ -0,0 +1,44 @@ +#ifndef __ASM_KGDB_H_ +#define __ASM_KGDB_H_ + +#ifdef __KERNEL__ + +#include <asm/sgidefs.h> + +#if (_MIPS_ISA == _MIPS_ISA_MIPS1) || (_MIPS_ISA == _MIPS_ISA_MIPS2) || \ + (_MIPS_ISA == _MIPS_ISA_MIPS32) + +#define KGDB_GDB_REG_SIZE 32 + +#elif (_MIPS_ISA == _MIPS_ISA_MIPS3) || (_MIPS_ISA == _MIPS_ISA_MIPS4) || \ + (_MIPS_ISA == _MIPS_ISA_MIPS64) + +#ifdef CONFIG_32BIT +#define KGDB_GDB_REG_SIZE 32 +#else /* CONFIG_CPU_32BIT */ +#define KGDB_GDB_REG_SIZE 64 +#endif +#else +#error "Need to set KGDB_GDB_REG_SIZE for MIPS ISA" +#endif /* _MIPS_ISA */ + +#define BUFMAX 2048 +#if (KGDB_GDB_REG_SIZE == 32) +#define NUMREGBYTES (90*sizeof(u32)) +#define NUMCRITREGBYTES (12*sizeof(u32)) +#else +#define NUMREGBYTES (90*sizeof(u64)) +#define NUMCRITREGBYTES (12*sizeof(u64)) +#endif +#define BREAK_INSTR_SIZE 4 +#define CACHE_FLUSH_IS_SAFE 0 + +extern void arch_kgdb_breakpoint(void); +extern int kgdb_early_setup; +extern void *saved_vectors[32]; +extern void handle_exception(struct pt_regs *regs); +extern void breakinst(void); + +#endif /* __KERNEL__ */ + +#endif /* __ASM_KGDB_H_ */ diff --git a/include/asm-mips/pci.h b/include/asm-mips/pci.h index c205875d7f3..5510c53b7fe 100644 --- a/include/asm-mips/pci.h +++ b/include/asm-mips/pci.h @@ -174,4 +174,6 @@ static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel) extern int pci_probe_only; +extern char * (*pcibios_plat_setup)(char *str); + #endif /* _ASM_PCI_H */ diff --git a/include/asm-mips/txx9/generic.h b/include/asm-mips/txx9/generic.h index cbae37ec3d8..5b1ccf901c6 100644 --- a/include/asm-mips/txx9/generic.h +++ b/include/asm-mips/txx9/generic.h @@ -44,5 +44,19 @@ extern struct txx9_board_vec *txx9_board_vec; extern int (*txx9_irq_dispatch)(int pending); void prom_init_cmdline(void); char *prom_getcmdline(void); +void txx9_wdt_init(unsigned long base); +void txx9_spi_init(int busid, unsigned long base, int irq); +void txx9_ethaddr_init(unsigned int id, unsigned char *ethaddr); +void txx9_sio_init(unsigned long baseaddr, int irq, + unsigned int line, unsigned int sclk, int nocts); +void prom_putchar(char c); +#ifdef CONFIG_EARLY_PRINTK +extern void (*txx9_prom_putchar)(char c); +void txx9_sio_putchar_init(unsigned long baseaddr); +#else +static inline void txx9_sio_putchar_init(unsigned long baseaddr) +{ +} +#endif #endif /* __ASM_TXX9_GENERIC_H */ diff --git a/include/asm-mips/txx9/jmr3927.h b/include/asm-mips/txx9/jmr3927.h index d6eb1b6a54e..a409c446bf1 100644 --- a/include/asm-mips/txx9/jmr3927.h +++ b/include/asm-mips/txx9/jmr3927.h @@ -149,8 +149,6 @@ /* Clocks */ #define JMR3927_CORECLK 132710400 /* 132.7MHz */ -#define JMR3927_GBUSCLK (JMR3927_CORECLK / 2) /* 66.35MHz */ -#define JMR3927_IMCLK (JMR3927_CORECLK / 4) /* 33.17MHz */ /* * TX3927 Pin Configuration: diff --git a/include/asm-mips/txx9/pci.h b/include/asm-mips/txx9/pci.h index d89a45091e2..3d32529060a 100644 --- a/include/asm-mips/txx9/pci.h +++ b/include/asm-mips/txx9/pci.h @@ -33,4 +33,7 @@ enum txx9_pci_err_action { }; extern enum txx9_pci_err_action txx9_pci_err_action; +extern char * (*txx9_board_pcibios_setup)(char *str); +char *txx9_pcibios_setup(char *str); + #endif /* __ASM_TXX9_PCI_H */ diff --git a/include/asm-mips/txx9/smsc_fdc37m81x.h b/include/asm-mips/txx9/smsc_fdc37m81x.h index 9375e4fc228..02e161d0755 100644 --- a/include/asm-mips/txx9/smsc_fdc37m81x.h +++ b/include/asm-mips/txx9/smsc_fdc37m81x.h @@ -56,7 +56,7 @@ #define SMSC_FDC37M81X_CONFIG_EXIT 0xaa #define SMSC_FDC37M81X_CHIP_ID 0x4d -unsigned long __init smsc_fdc37m81x_init(unsigned long port); +unsigned long smsc_fdc37m81x_init(unsigned long port); void smsc_fdc37m81x_config_beg(void); diff --git a/include/asm-mips/txx9/tx3927.h b/include/asm-mips/txx9/tx3927.h index ea79e1b16e7..587deb9592d 100644 --- a/include/asm-mips/txx9/tx3927.h +++ b/include/asm-mips/txx9/tx3927.h @@ -8,9 +8,8 @@ #ifndef __ASM_TXX9_TX3927_H #define __ASM_TXX9_TX3927_H -#include <asm/txx9/txx927.h> - #define TX3927_REG_BASE 0xfffe0000UL +#define TX3927_REG_SIZE 0x00010000 #define TX3927_SDRAMC_REG (TX3927_REG_BASE + 0x8000) #define TX3927_ROMC_REG (TX3927_REG_BASE + 0x9000) #define TX3927_DMA_REG (TX3927_REG_BASE + 0xb000) @@ -236,11 +235,17 @@ struct tx3927_ccfg_reg { /* see PCI_STATUS_XXX in linux/pci.h */ #define PCI_STATUS_NEW_CAP 0x0010 +/* bits for ISTAT/IIM */ +#define TX3927_PCIC_IIM_ALL 0x00001600 + /* bits for TC */ #define TX3927_PCIC_TC_OF16E 0x00000020 #define TX3927_PCIC_TC_IF8E 0x00000010 #define TX3927_PCIC_TC_OF8E 0x00000008 +/* bits for TSTAT/TIM */ +#define TX3927_PCIC_TIM_ALL 0x0003ffff + /* bits for IOBA/MBA */ /* see PCI_BASE_ADDRESS_XXX in linux/pci.h */ @@ -313,12 +318,22 @@ struct tx3927_ccfg_reg { #define tx3927_dmaptr ((struct tx3927_dma_reg *)TX3927_DMA_REG) #define tx3927_pcicptr ((struct tx3927_pcic_reg *)TX3927_PCIC_REG) #define tx3927_ccfgptr ((struct tx3927_ccfg_reg *)TX3927_CCFG_REG) -#define tx3927_tmrptr(ch) ((struct txx927_tmr_reg *)TX3927_TMR_REG(ch)) #define tx3927_sioptr(ch) ((struct txx927_sio_reg *)TX3927_SIO_REG(ch)) #define tx3927_pioptr ((struct txx9_pio_reg __iomem *)TX3927_PIO_REG) +#define TX3927_REV_PCODE() (tx3927_ccfgptr->crir >> 16) +#define TX3927_ROMC_BA(ch) (tx3927_romcptr->cr[(ch)] & 0xfff00000) +#define TX3927_ROMC_SIZE(ch) \ + (0x00100000 << ((tx3927_romcptr->cr[(ch)] >> 8) & 0xf)) + +void tx3927_wdt_init(void); +void tx3927_setup(void); +void tx3927_time_init(unsigned int evt_tmrnr, unsigned int src_tmrnr); +void tx3927_sio_init(unsigned int sclk, unsigned int cts_mask); struct pci_controller; -void __init tx3927_pcic_setup(struct pci_controller *channel, - unsigned long sdram_size, int extarb); +void tx3927_pcic_setup(struct pci_controller *channel, + unsigned long sdram_size, int extarb); +void tx3927_setup_pcierr_irq(void); +void tx3927_irq_init(void); #endif /* __ASM_TXX9_TX3927_H */ diff --git a/include/asm-mips/txx9/tx4927.h b/include/asm-mips/txx9/tx4927.h index ceb4b79ff4e..195f6515db9 100644 --- a/include/asm-mips/txx9/tx4927.h +++ b/include/asm-mips/txx9/tx4927.h @@ -243,12 +243,13 @@ static inline void tx4927_ccfg_change(__u64 change, __u64 new) } unsigned int tx4927_get_mem_size(void); -void tx4927_wdr_init(void); +void tx4927_wdt_init(void); void tx4927_setup(void); void tx4927_time_init(unsigned int tmrnr); -void tx4927_setup_serial(void); +void tx4927_sio_init(unsigned int sclk, unsigned int cts_mask); int tx4927_report_pciclk(void); int tx4927_pciclk66_setup(void); +void tx4927_setup_pcierr_irq(void); void tx4927_irq_init(void); #endif /* __ASM_TXX9_TX4927_H */ diff --git a/include/asm-mips/txx9/tx4927pcic.h b/include/asm-mips/txx9/tx4927pcic.h index d61c3d09c4a..c470b8a5fe5 100644 --- a/include/asm-mips/txx9/tx4927pcic.h +++ b/include/asm-mips/txx9/tx4927pcic.h @@ -10,6 +10,7 @@ #define __ASM_TXX9_TX4927PCIC_H #include <linux/pci.h> +#include <linux/irqreturn.h> struct tx4927_pcic_reg { u32 pciid; @@ -192,8 +193,11 @@ struct tx4927_pcic_reg { struct tx4927_pcic_reg __iomem *get_tx4927_pcicptr( struct pci_controller *channel); -void __init tx4927_pcic_setup(struct tx4927_pcic_reg __iomem *pcicptr, - struct pci_controller *channel, int extarb); +void tx4927_pcic_setup(struct tx4927_pcic_reg __iomem *pcicptr, + struct pci_controller *channel, int extarb); void tx4927_report_pcic_status(void); +char *tx4927_pcibios_setup(char *str); +void tx4927_dump_pcic_settings(void); +irqreturn_t tx4927_pcierr_interrupt(int irq, void *dev_id); #endif /* __ASM_TXX9_TX4927PCIC_H */ diff --git a/include/asm-mips/txx9/tx4938.h b/include/asm-mips/txx9/tx4938.h index 1ed969d381d..8175d4ccbc3 100644 --- a/include/asm-mips/txx9/tx4938.h +++ b/include/asm-mips/txx9/tx4938.h @@ -276,15 +276,18 @@ struct tx4938_ccfg_reg { #define TX4938_EBUSC_SIZE(ch) TX4927_EBUSC_SIZE(ch) #define tx4938_get_mem_size() tx4927_get_mem_size() -void tx4938_wdr_init(void); +void tx4938_wdt_init(void); void tx4938_setup(void); void tx4938_time_init(unsigned int tmrnr); -void tx4938_setup_serial(void); +void tx4938_sio_init(unsigned int sclk, unsigned int cts_mask); +void tx4938_spi_init(int busid); +void tx4938_ethaddr_init(unsigned char *addr0, unsigned char *addr1); int tx4938_report_pciclk(void); void tx4938_report_pci1clk(void); int tx4938_pciclk66_setup(void); struct pci_dev; int tx4938_pcic1_map_irq(const struct pci_dev *dev, u8 slot); +void tx4938_setup_pcierr_irq(void); void tx4938_irq_init(void); #endif diff --git a/include/asm-mips/txx9/txx927.h b/include/asm-mips/txx9/txx927.h deleted file mode 100644 index 97dd7ad1a89..00000000000 --- a/include/asm-mips/txx9/txx927.h +++ /dev/null @@ -1,121 +0,0 @@ -/* - * Common definitions for TX3927/TX4927 - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 2000 Toshiba Corporation - */ -#ifndef __ASM_TXX9_TXX927_H -#define __ASM_TXX9_TXX927_H - -struct txx927_sio_reg { - volatile unsigned long lcr; - volatile unsigned long dicr; - volatile unsigned long disr; - volatile unsigned long cisr; - volatile unsigned long fcr; - volatile unsigned long flcr; - volatile unsigned long bgr; - volatile unsigned long tfifo; - volatile unsigned long rfifo; -}; - -/* - * SIO - */ -/* SILCR : Line Control */ -#define TXx927_SILCR_SCS_MASK 0x00000060 -#define TXx927_SILCR_SCS_IMCLK 0x00000000 -#define TXx927_SILCR_SCS_IMCLK_BG 0x00000020 -#define TXx927_SILCR_SCS_SCLK 0x00000040 -#define TXx927_SILCR_SCS_SCLK_BG 0x00000060 -#define TXx927_SILCR_UEPS 0x00000010 -#define TXx927_SILCR_UPEN 0x00000008 -#define TXx927_SILCR_USBL_MASK 0x00000004 -#define TXx927_SILCR_USBL_1BIT 0x00000004 -#define TXx927_SILCR_USBL_2BIT 0x00000000 -#define TXx927_SILCR_UMODE_MASK 0x00000003 -#define TXx927_SILCR_UMODE_8BIT 0x00000000 -#define TXx927_SILCR_UMODE_7BIT 0x00000001 - -/* SIDICR : DMA/Int. Control */ -#define TXx927_SIDICR_TDE 0x00008000 -#define TXx927_SIDICR_RDE 0x00004000 -#define TXx927_SIDICR_TIE 0x00002000 -#define TXx927_SIDICR_RIE 0x00001000 -#define TXx927_SIDICR_SPIE 0x00000800 -#define TXx927_SIDICR_CTSAC 0x00000600 -#define TXx927_SIDICR_STIE_MASK 0x0000003f -#define TXx927_SIDICR_STIE_OERS 0x00000020 -#define TXx927_SIDICR_STIE_CTSS 0x00000010 -#define TXx927_SIDICR_STIE_RBRKD 0x00000008 -#define TXx927_SIDICR_STIE_TRDY 0x00000004 -#define TXx927_SIDICR_STIE_TXALS 0x00000002 -#define TXx927_SIDICR_STIE_UBRKD 0x00000001 - -/* SIDISR : DMA/Int. Status */ -#define TXx927_SIDISR_UBRK 0x00008000 -#define TXx927_SIDISR_UVALID 0x00004000 -#define TXx927_SIDISR_UFER 0x00002000 -#define TXx927_SIDISR_UPER 0x00001000 -#define TXx927_SIDISR_UOER 0x00000800 -#define TXx927_SIDISR_ERI 0x00000400 -#define TXx927_SIDISR_TOUT 0x00000200 -#define TXx927_SIDISR_TDIS 0x00000100 -#define TXx927_SIDISR_RDIS 0x00000080 -#define TXx927_SIDISR_STIS 0x00000040 -#define TXx927_SIDISR_RFDN_MASK 0x0000001f - -/* SICISR : Change Int. Status */ -#define TXx927_SICISR_OERS 0x00000020 -#define TXx927_SICISR_CTSS 0x00000010 -#define TXx927_SICISR_RBRKD 0x00000008 -#define TXx927_SICISR_TRDY 0x00000004 -#define TXx927_SICISR_TXALS 0x00000002 -#define TXx927_SICISR_UBRKD 0x00000001 - -/* SIFCR : FIFO Control */ -#define TXx927_SIFCR_SWRST 0x00008000 -#define TXx927_SIFCR_RDIL_MASK 0x00000180 -#define TXx927_SIFCR_RDIL_1 0x00000000 -#define TXx927_SIFCR_RDIL_4 0x00000080 -#define TXx927_SIFCR_RDIL_8 0x00000100 -#define TXx927_SIFCR_RDIL_12 0x00000180 -#define TXx927_SIFCR_RDIL_MAX 0x00000180 -#define TXx927_SIFCR_TDIL_MASK 0x00000018 -#define TXx927_SIFCR_TDIL_MASK 0x00000018 -#define TXx927_SIFCR_TDIL_1 0x00000000 -#define TXx927_SIFCR_TDIL_4 0x00000001 -#define TXx927_SIFCR_TDIL_8 0x00000010 -#define TXx927_SIFCR_TDIL_MAX 0x00000010 -#define TXx927_SIFCR_TFRST 0x00000004 -#define TXx927_SIFCR_RFRST 0x00000002 -#define TXx927_SIFCR_FRSTE 0x00000001 -#define TXx927_SIO_TX_FIFO 8 -#define TXx927_SIO_RX_FIFO 16 - -/* SIFLCR : Flow Control */ -#define TXx927_SIFLCR_RCS 0x00001000 -#define TXx927_SIFLCR_TES 0x00000800 -#define TXx927_SIFLCR_RTSSC 0x00000200 -#define TXx927_SIFLCR_RSDE 0x00000100 -#define TXx927_SIFLCR_TSDE 0x00000080 -#define TXx927_SIFLCR_RTSTL_MASK 0x0000001e -#define TXx927_SIFLCR_RTSTL_MAX 0x0000001e -#define TXx927_SIFLCR_TBRK 0x00000001 - -/* SIBGR : Baudrate Control */ -#define TXx927_SIBGR_BCLK_MASK 0x00000300 -#define TXx927_SIBGR_BCLK_T0 0x00000000 -#define TXx927_SIBGR_BCLK_T2 0x00000100 -#define TXx927_SIBGR_BCLK_T4 0x00000200 -#define TXx927_SIBGR_BCLK_T6 0x00000300 -#define TXx927_SIBGR_BRD_MASK 0x000000ff - -/* - * PIO - */ - -#endif /* __ASM_TXX9_TXX927_H */ diff --git a/include/asm-mips/txx9irq.h b/include/asm-mips/txx9irq.h index 1c439e51b87..5620879be37 100644 --- a/include/asm-mips/txx9irq.h +++ b/include/asm-mips/txx9irq.h @@ -14,8 +14,12 @@ #ifdef CONFIG_IRQ_CPU #define TXX9_IRQ_BASE (MIPS_CPU_IRQ_BASE + 8) #else +#ifdef CONFIG_I8259 +#define TXX9_IRQ_BASE (I8259A_IRQ_BASE + 16) +#else #define TXX9_IRQ_BASE 0 #endif +#endif #ifdef CONFIG_CPU_TX39XX #define TXx9_MAX_IR 16 diff --git a/include/asm-powerpc/cpm.h b/include/asm-powerpc/cpm.h index 63a55337c2d..24d79e3abd8 100644 --- a/include/asm-powerpc/cpm.h +++ b/include/asm-powerpc/cpm.h @@ -3,6 +3,7 @@ #include <linux/compiler.h> #include <linux/types.h> +#include <linux/of.h> /* Opcodes common to CPM1 and CPM2 */ @@ -100,4 +101,6 @@ unsigned long cpm_muram_offset(void __iomem *addr); dma_addr_t cpm_muram_dma(void __iomem *addr); int cpm_command(u32 command, u8 opcode); +int cpm2_gpiochip_add32(struct device_node *np); + #endif diff --git a/include/asm-powerpc/cpm2.h b/include/asm-powerpc/cpm2.h index 2c7fd9cee29..2a6fa0183ac 100644 --- a/include/asm-powerpc/cpm2.h +++ b/include/asm-powerpc/cpm2.h @@ -12,6 +12,7 @@ #include <asm/immap_cpm2.h> #include <asm/cpm.h> +#include <sysdev/fsl_soc.h> #ifdef CONFIG_PPC_85xx #define CPM_MAP_ADDR (get_immrbase() + 0x80000) @@ -93,10 +94,40 @@ extern cpm_cpm2_t __iomem *cpmp; /* Pointer to comm processor */ #define cpm_dpfree cpm_muram_free #define cpm_dpram_addr cpm_muram_addr -extern void cpm_setbrg(uint brg, uint rate); -extern void cpm2_fastbrg(uint brg, uint rate, int div16); extern void cpm2_reset(void); +/* Baud rate generators. +*/ +#define CPM_BRG_RST ((uint)0x00020000) +#define CPM_BRG_EN ((uint)0x00010000) +#define CPM_BRG_EXTC_INT ((uint)0x00000000) +#define CPM_BRG_EXTC_CLK3_9 ((uint)0x00004000) +#define CPM_BRG_EXTC_CLK5_15 ((uint)0x00008000) +#define CPM_BRG_ATB ((uint)0x00002000) +#define CPM_BRG_CD_MASK ((uint)0x00001ffe) +#define CPM_BRG_DIV16 ((uint)0x00000001) + +#define CPM2_BRG_INT_CLK (get_brgfreq()) +#define CPM2_BRG_UART_CLK (CPM2_BRG_INT_CLK/16) + +extern void __cpm2_setbrg(uint brg, uint rate, uint clk, int div16, int src); + +/* This function is used by UARTS, or anything else that uses a 16x + * oversampled clock. + */ +static inline void cpm_setbrg(uint brg, uint rate) +{ + __cpm2_setbrg(brg, rate, CPM2_BRG_UART_CLK, 0, CPM_BRG_EXTC_INT); +} + +/* This function is used to set high speed synchronous baud rate + * clocks. + */ +static inline void cpm2_fastbrg(uint brg, uint rate, int div16) +{ + __cpm2_setbrg(brg, rate, CPM2_BRG_INT_CLK, div16, CPM_BRG_EXTC_INT); +} + /* Function code bits, usually generic to devices. */ #define CPMFCR_GBL ((u_char)0x20) /* Set memory snooping */ @@ -195,17 +226,6 @@ typedef struct smc_uart { #define SMCM_TX ((unsigned char)0x02) #define SMCM_RX ((unsigned char)0x01) -/* Baud rate generators. -*/ -#define CPM_BRG_RST ((uint)0x00020000) -#define CPM_BRG_EN ((uint)0x00010000) -#define CPM_BRG_EXTC_INT ((uint)0x00000000) -#define CPM_BRG_EXTC_CLK3_9 ((uint)0x00004000) -#define CPM_BRG_EXTC_CLK5_15 ((uint)0x00008000) -#define CPM_BRG_ATB ((uint)0x00002000) -#define CPM_BRG_CD_MASK ((uint)0x00001ffe) -#define CPM_BRG_DIV16 ((uint)0x00000001) - /* SCCs. */ #define SCC_GSMRH_IRP ((uint)0x00040000) diff --git a/include/asm-powerpc/pgtable-ppc64.h b/include/asm-powerpc/pgtable-ppc64.h index 5fc78c0be30..74c6f380b80 100644 --- a/include/asm-powerpc/pgtable-ppc64.h +++ b/include/asm-powerpc/pgtable-ppc64.h @@ -461,6 +461,8 @@ void pgtable_cache_init(void); return pt; } +pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long address); + #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_PGTABLE_PPC64_H_ */ diff --git a/include/asm-s390/hardirq.h b/include/asm-s390/hardirq.h index 4b7cb964ff3..89ec7056da2 100644 --- a/include/asm-s390/hardirq.h +++ b/include/asm-s390/hardirq.h @@ -34,4 +34,18 @@ typedef struct { void clock_comparator_work(void); +static inline unsigned long long local_tick_disable(void) +{ + unsigned long long old; + + old = S390_lowcore.clock_comparator; + S390_lowcore.clock_comparator = -1ULL; + return old; +} + +static inline void local_tick_enable(unsigned long long comp) +{ + S390_lowcore.clock_comparator = comp; +} + #endif /* __ASM_HARDIRQ_H */ diff --git a/include/asm-s390/ipl.h b/include/asm-s390/ipl.h index eaca6dff540..1171e6d144a 100644 --- a/include/asm-s390/ipl.h +++ b/include/asm-s390/ipl.h @@ -159,7 +159,8 @@ enum diag308_vm_flags { }; enum diag308_rc { - DIAG308_RC_OK = 1, + DIAG308_RC_OK = 0x0001, + DIAG308_RC_NOCONFIG = 0x0102, }; extern int diag308(unsigned long subcode, void *addr); diff --git a/include/asm-s390/schid.h b/include/asm-s390/schid.h index 7bdc0fe1569..825503cf3dc 100644 --- a/include/asm-s390/schid.h +++ b/include/asm-s390/schid.h @@ -11,6 +11,7 @@ struct subchannel_id { } __attribute__ ((packed, aligned(4))); #ifdef __KERNEL__ +#include <linux/string.h> /* Helper function for sane state of pre-allocated subchannel_id. */ static inline void diff --git a/include/asm-s390/setup.h b/include/asm-s390/setup.h index 4ba14e463e8..2bd9faeb391 100644 --- a/include/asm-s390/setup.h +++ b/include/asm-s390/setup.h @@ -65,7 +65,6 @@ extern unsigned long machine_flags; #define MACHINE_FLAG_VM (1UL << 0) #define MACHINE_FLAG_IEEE (1UL << 1) -#define MACHINE_FLAG_P390 (1UL << 2) #define MACHINE_FLAG_CSP (1UL << 3) #define MACHINE_FLAG_MVPG (1UL << 4) #define MACHINE_FLAG_DIAG44 (1UL << 5) diff --git a/include/asm-s390/unistd.h b/include/asm-s390/unistd.h index 583da807ea9..c8ad350d144 100644 --- a/include/asm-s390/unistd.h +++ b/include/asm-s390/unistd.h @@ -259,7 +259,13 @@ #define __NR_timerfd_create 319 #define __NR_timerfd_settime 320 #define __NR_timerfd_gettime 321 -#define NR_syscalls 322 +#define __NR_signalfd4 322 +#define __NR_eventfd2 323 +#define __NR_inotify_init1 324 +#define __NR_pipe2 325 +#define __NR_dup3 326 +#define __NR_epoll_create1 327 +#define NR_syscalls 328 /* * There are some system calls that are not present on 64 bit, some diff --git a/include/asm-x86/dma-mapping.h b/include/asm-x86/dma-mapping.h index 0eaa9bf6011..ad9cd6d49bf 100644 --- a/include/asm-x86/dma-mapping.h +++ b/include/asm-x86/dma-mapping.h @@ -249,25 +249,5 @@ static inline int dma_get_cache_alignment(void) #define dma_is_consistent(d, h) (1) -#ifdef CONFIG_X86_32 -# define ARCH_HAS_DMA_DECLARE_COHERENT_MEMORY -struct dma_coherent_mem { - void *virt_base; - u32 device_base; - int size; - int flags; - unsigned long *bitmap; -}; - -extern int -dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr, - dma_addr_t device_addr, size_t size, int flags); - -extern void -dma_release_declared_memory(struct device *dev); - -extern void * -dma_mark_declared_memory_occupied(struct device *dev, - dma_addr_t device_addr, size_t size); -#endif /* CONFIG_X86_32 */ +#include <asm-generic/dma-coherent.h> #endif diff --git a/include/asm-x86/iommu.h b/include/asm-x86/iommu.h index ecc8061904a..5f888cc5be4 100644 --- a/include/asm-x86/iommu.h +++ b/include/asm-x86/iommu.h @@ -7,6 +7,8 @@ extern struct dma_mapping_ops nommu_dma_ops; extern int force_iommu, no_iommu; extern int iommu_detected; +extern unsigned long iommu_num_pages(unsigned long addr, unsigned long len); + #ifdef CONFIG_GART_IOMMU extern int gart_iommu_aperture; extern int gart_iommu_aperture_allowed; diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 82aa36c53ea..50cfe8ceb47 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -205,6 +205,8 @@ void block_invalidatepage(struct page *page, unsigned long offset); int block_write_full_page(struct page *page, get_block_t *get_block, struct writeback_control *wbc); int block_read_full_page(struct page*, get_block_t*); +int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc, + unsigned long from); int block_write_begin(struct file *, struct address_space *, loff_t, unsigned, unsigned, struct page **, void **, get_block_t*); diff --git a/include/linux/connector.h b/include/linux/connector.h index 96a89d3d672..5c7f9468f75 100644 --- a/include/linux/connector.h +++ b/include/linux/connector.h @@ -38,8 +38,9 @@ #define CN_W1_VAL 0x1 #define CN_IDX_V86D 0x4 #define CN_VAL_V86D_UVESAFB 0x1 +#define CN_IDX_BB 0x5 /* BlackBoard, from the TSP GPL sampling framework */ -#define CN_NETLINK_USERS 5 +#define CN_NETLINK_USERS 6 /* * Maximum connector's message size. diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 2270ca5ec63..6fd5668aa57 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -106,6 +106,7 @@ struct cpufreq_policy { #define CPUFREQ_ADJUST (0) #define CPUFREQ_INCOMPATIBLE (1) #define CPUFREQ_NOTIFY (2) +#define CPUFREQ_START (3) #define CPUFREQ_SHARED_TYPE_NONE (0) /* None */ #define CPUFREQ_SHARED_TYPE_HW (1) /* HW does needed coordination */ diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 1b5c98e7fef..d3219d73f8e 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -62,15 +62,7 @@ * int next_cpu_nr(cpu, mask) Next cpu past 'cpu', or nr_cpu_ids * * cpumask_t cpumask_of_cpu(cpu) Return cpumask with bit 'cpu' set - *ifdef CONFIG_HAS_CPUMASK_OF_CPU - * cpumask_of_cpu_ptr_declare(v) Declares cpumask_t *v - * cpumask_of_cpu_ptr_next(v, cpu) Sets v = &cpumask_of_cpu_map[cpu] - * cpumask_of_cpu_ptr(v, cpu) Combines above two operations - *else - * cpumask_of_cpu_ptr_declare(v) Declares cpumask_t _v and *v = &_v - * cpumask_of_cpu_ptr_next(v, cpu) Sets _v = cpumask_of_cpu(cpu) - * cpumask_of_cpu_ptr(v, cpu) Combines above two operations - *endif + * (can be used as an lvalue) * CPU_MASK_ALL Initializer - all bits set * CPU_MASK_NONE Initializer - no bits set * unsigned long *cpus_addr(mask) Array of unsigned long's in mask @@ -273,37 +265,30 @@ static inline void __cpus_shift_left(cpumask_t *dstp, bitmap_shift_left(dstp->bits, srcp->bits, n, nbits); } +/* + * Special-case data structure for "single bit set only" constant CPU masks. + * + * We pre-generate all the 64 (or 32) possible bit positions, with enough + * padding to the left and the right, and return the constant pointer + * appropriately offset. + */ +extern const unsigned long + cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)]; + +static inline const cpumask_t *get_cpu_mask(unsigned int cpu) +{ + const unsigned long *p = cpu_bit_bitmap[1 + cpu % BITS_PER_LONG]; + p -= cpu / BITS_PER_LONG; + return (const cpumask_t *)p; +} + +/* + * In cases where we take the address of the cpumask immediately, + * gcc optimizes it out (it's a constant) and there's no huge stack + * variable created: + */ +#define cpumask_of_cpu(cpu) (*get_cpu_mask(cpu)) -#ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP -extern cpumask_t *cpumask_of_cpu_map; -#define cpumask_of_cpu(cpu) (cpumask_of_cpu_map[cpu]) -#define cpumask_of_cpu_ptr(v, cpu) \ - const cpumask_t *v = &cpumask_of_cpu(cpu) -#define cpumask_of_cpu_ptr_declare(v) \ - const cpumask_t *v -#define cpumask_of_cpu_ptr_next(v, cpu) \ - v = &cpumask_of_cpu(cpu) -#else -#define cpumask_of_cpu(cpu) \ -({ \ - typeof(_unused_cpumask_arg_) m; \ - if (sizeof(m) == sizeof(unsigned long)) { \ - m.bits[0] = 1UL<<(cpu); \ - } else { \ - cpus_clear(m); \ - cpu_set((cpu), m); \ - } \ - m; \ -}) -#define cpumask_of_cpu_ptr(v, cpu) \ - cpumask_t _##v = cpumask_of_cpu(cpu); \ - const cpumask_t *v = &_##v -#define cpumask_of_cpu_ptr_declare(v) \ - cpumask_t _##v; \ - const cpumask_t *v = &_##v -#define cpumask_of_cpu_ptr_next(v, cpu) \ - _##v = cpumask_of_cpu(cpu) -#endif #define CPU_MASK_LAST_WORD BITMAP_LAST_WORD_MASK(NR_CPUS) diff --git a/include/linux/fs.h b/include/linux/fs.h index 8252b045e62..580b513668f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -443,6 +443,27 @@ static inline size_t iov_iter_count(struct iov_iter *i) return i->count; } +/* + * "descriptor" for what we're up to with a read. + * This allows us to use the same read code yet + * have multiple different users of the data that + * we read from a file. + * + * The simplest case just copies the data to user + * mode. + */ +typedef struct { + size_t written; + size_t count; + union { + char __user *buf; + void *data; + } arg; + int error; +} read_descriptor_t; + +typedef int (*read_actor_t)(read_descriptor_t *, struct page *, + unsigned long, unsigned long); struct address_space_operations { int (*writepage)(struct page *page, struct writeback_control *wbc); @@ -484,6 +505,8 @@ struct address_space_operations { int (*migratepage) (struct address_space *, struct page *, struct page *); int (*launder_page) (struct page *); + int (*is_partially_uptodate) (struct page *, read_descriptor_t *, + unsigned long); }; /* @@ -1198,27 +1221,6 @@ struct block_device_operations { struct module *owner; }; -/* - * "descriptor" for what we're up to with a read. - * This allows us to use the same read code yet - * have multiple different users of the data that - * we read from a file. - * - * The simplest case just copies the data to user - * mode. - */ -typedef struct { - size_t written; - size_t count; - union { - char __user * buf; - void *data; - } arg; - int error; -} read_descriptor_t; - -typedef int (*read_actor_t)(read_descriptor_t *, struct page *, unsigned long, unsigned long); - /* These macros are for out of kernel modules to test that * the kernel supports the unlocked_ioctl and compat_ioctl * fields in struct file_operations. */ diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 2cd07cc2968..22d2115458c 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -118,6 +118,10 @@ extern int allocate_resource(struct resource *root, struct resource *new, int adjust_resource(struct resource *res, resource_size_t start, resource_size_t size); resource_size_t resource_alignment(struct resource *res); +static inline resource_size_t resource_size(struct resource *res) +{ + return res->end - res->start + 1; +} /* Convenience shorthand with allocation */ #define request_region(start,n,name) __request_region(&ioport_resource, (start), (n), (name)) diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h index 57aefa160a9..b9614488744 100644 --- a/include/linux/kallsyms.h +++ b/include/linux/kallsyms.h @@ -108,8 +108,7 @@ static inline void print_fn_descriptor_symbol(const char *fmt, void *addr) static inline void print_ip_sym(unsigned long ip) { - printk("[<%p>]", (void *) ip); - print_symbol(" %s\n", ip); + printk("[<%p>] %pS\n", (void *) ip, (void *) ip); } #endif /*_LINUX_KALLSYMS_H*/ diff --git a/include/linux/kernel.h b/include/linux/kernel.h index fdbbf72ca2e..aaa998f65c7 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -75,6 +75,12 @@ extern const char linux_proc_banner[]; */ #define upper_32_bits(n) ((u32)(((n) >> 16) >> 16)) +/** + * lower_32_bits - return bits 0-31 of a number + * @n: the number we're accessing + */ +#define lower_32_bits(n) ((u32)(n)) + #define KERN_EMERG "<0>" /* system is unusable */ #define KERN_ALERT "<1>" /* action must be taken immediately */ #define KERN_CRIT "<2>" /* critical conditions */ diff --git a/include/linux/mfd/core.h b/include/linux/mfd/core.h index bb3dd054592..49ef857cdb2 100644 --- a/include/linux/mfd/core.h +++ b/include/linux/mfd/core.h @@ -1,5 +1,3 @@ -#ifndef MFD_CORE_H -#define MFD_CORE_H /* * drivers/mfd/mfd-core.h * @@ -13,6 +11,9 @@ * */ +#ifndef MFD_CORE_H +#define MFD_CORE_H + #include <linux/platform_device.h> /* @@ -28,7 +29,13 @@ struct mfd_cell { int (*suspend)(struct platform_device *dev); int (*resume)(struct platform_device *dev); - void *driver_data; /* driver-specific data */ + /* driver-specific data for MFD-aware "cell" drivers */ + void *driver_data; + + /* platform_data can be used to either pass data to "generic" + driver or as a hook to mfd_cell for the "cell" drivers */ + void *platform_data; + size_t data_size; /* * This resources can be specified relatievly to the parent device. @@ -38,18 +45,11 @@ struct mfd_cell { const struct resource *resources; }; -static inline struct mfd_cell * -mfd_get_cell(struct platform_device *pdev) -{ - return (struct mfd_cell *)pdev->dev.platform_data; -} - -extern int mfd_add_devices( - struct platform_device *parent, - const struct mfd_cell *cells, int n_devs, - struct resource *mem_base, - int irq_base); +extern int mfd_add_devices(struct device *parent, int id, + const struct mfd_cell *cells, int n_devs, + struct resource *mem_base, + int irq_base); -extern void mfd_remove_devices(struct platform_device *parent); +extern void mfd_remove_devices(struct device *parent); #endif diff --git a/include/linux/mm.h b/include/linux/mm.h index 6e695eaab4c..335288bff1b 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -744,6 +744,8 @@ struct zap_details { struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, pte_t pte); +int zap_vma_ptes(struct vm_area_struct *vma, unsigned long address, + unsigned long size); unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address, unsigned long size, struct zap_details *); unsigned long unmap_vmas(struct mmu_gather **tlb, @@ -1041,7 +1043,6 @@ extern unsigned long absent_pages_in_range(unsigned long start_pfn, extern void get_pfn_range_for_nid(unsigned int nid, unsigned long *start_pfn, unsigned long *end_pfn); extern unsigned long find_min_pfn_with_active_regions(void); -extern unsigned long find_max_pfn_with_active_regions(void); extern void free_bootmem_with_active_regions(int nid, unsigned long max_low_pfn); typedef int (*work_fn_t)(unsigned long, unsigned long, void *); @@ -1104,6 +1105,9 @@ extern struct vm_area_struct *copy_vma(struct vm_area_struct **, unsigned long addr, unsigned long len, pgoff_t pgoff); extern void exit_mmap(struct mm_struct *); +extern int mm_take_all_locks(struct mm_struct *mm); +extern void mm_drop_all_locks(struct mm_struct *mm); + #ifdef CONFIG_PROC_FS /* From fs/proc/base.c. callers must _not_ hold the mm's exe_file_lock */ extern void added_exe_file_vma(struct mm_struct *mm); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 746f975b58e..386edbe2cb4 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -10,6 +10,7 @@ #include <linux/rbtree.h> #include <linux/rwsem.h> #include <linux/completion.h> +#include <linux/cpumask.h> #include <asm/page.h> #include <asm/mmu.h> @@ -253,6 +254,9 @@ struct mm_struct { struct file *exe_file; unsigned long num_exe_file_vmas; #endif +#ifdef CONFIG_MMU_NOTIFIER + struct mmu_notifier_mm *mmu_notifier_mm; +#endif }; #endif /* _LINUX_MM_TYPES_H */ diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h new file mode 100644 index 00000000000..b77486d152c --- /dev/null +++ b/include/linux/mmu_notifier.h @@ -0,0 +1,279 @@ +#ifndef _LINUX_MMU_NOTIFIER_H +#define _LINUX_MMU_NOTIFIER_H + +#include <linux/list.h> +#include <linux/spinlock.h> +#include <linux/mm_types.h> + +struct mmu_notifier; +struct mmu_notifier_ops; + +#ifdef CONFIG_MMU_NOTIFIER + +/* + * The mmu notifier_mm structure is allocated and installed in + * mm->mmu_notifier_mm inside the mm_take_all_locks() protected + * critical section and it's released only when mm_count reaches zero + * in mmdrop(). + */ +struct mmu_notifier_mm { + /* all mmu notifiers registerd in this mm are queued in this list */ + struct hlist_head list; + /* to serialize the list modifications and hlist_unhashed */ + spinlock_t lock; +}; + +struct mmu_notifier_ops { + /* + * Called either by mmu_notifier_unregister or when the mm is + * being destroyed by exit_mmap, always before all pages are + * freed. This can run concurrently with other mmu notifier + * methods (the ones invoked outside the mm context) and it + * should tear down all secondary mmu mappings and freeze the + * secondary mmu. If this method isn't implemented you've to + * be sure that nothing could possibly write to the pages + * through the secondary mmu by the time the last thread with + * tsk->mm == mm exits. + * + * As side note: the pages freed after ->release returns could + * be immediately reallocated by the gart at an alias physical + * address with a different cache model, so if ->release isn't + * implemented because all _software_ driven memory accesses + * through the secondary mmu are terminated by the time the + * last thread of this mm quits, you've also to be sure that + * speculative _hardware_ operations can't allocate dirty + * cachelines in the cpu that could not be snooped and made + * coherent with the other read and write operations happening + * through the gart alias address, so leading to memory + * corruption. + */ + void (*release)(struct mmu_notifier *mn, + struct mm_struct *mm); + + /* + * clear_flush_young is called after the VM is + * test-and-clearing the young/accessed bitflag in the + * pte. This way the VM will provide proper aging to the + * accesses to the page through the secondary MMUs and not + * only to the ones through the Linux pte. + */ + int (*clear_flush_young)(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long address); + + /* + * Before this is invoked any secondary MMU is still ok to + * read/write to the page previously pointed to by the Linux + * pte because the page hasn't been freed yet and it won't be + * freed until this returns. If required set_page_dirty has to + * be called internally to this method. + */ + void (*invalidate_page)(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long address); + + /* + * invalidate_range_start() and invalidate_range_end() must be + * paired and are called only when the mmap_sem and/or the + * locks protecting the reverse maps are held. The subsystem + * must guarantee that no additional references are taken to + * the pages in the range established between the call to + * invalidate_range_start() and the matching call to + * invalidate_range_end(). + * + * Invalidation of multiple concurrent ranges may be + * optionally permitted by the driver. Either way the + * establishment of sptes is forbidden in the range passed to + * invalidate_range_begin/end for the whole duration of the + * invalidate_range_begin/end critical section. + * + * invalidate_range_start() is called when all pages in the + * range are still mapped and have at least a refcount of one. + * + * invalidate_range_end() is called when all pages in the + * range have been unmapped and the pages have been freed by + * the VM. + * + * The VM will remove the page table entries and potentially + * the page between invalidate_range_start() and + * invalidate_range_end(). If the page must not be freed + * because of pending I/O or other circumstances then the + * invalidate_range_start() callback (or the initial mapping + * by the driver) must make sure that the refcount is kept + * elevated. + * + * If the driver increases the refcount when the pages are + * initially mapped into an address space then either + * invalidate_range_start() or invalidate_range_end() may + * decrease the refcount. If the refcount is decreased on + * invalidate_range_start() then the VM can free pages as page + * table entries are removed. If the refcount is only + * droppped on invalidate_range_end() then the driver itself + * will drop the last refcount but it must take care to flush + * any secondary tlb before doing the final free on the + * page. Pages will no longer be referenced by the linux + * address space but may still be referenced by sptes until + * the last refcount is dropped. + */ + void (*invalidate_range_start)(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, unsigned long end); + void (*invalidate_range_end)(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, unsigned long end); +}; + +/* + * The notifier chains are protected by mmap_sem and/or the reverse map + * semaphores. Notifier chains are only changed when all reverse maps and + * the mmap_sem locks are taken. + * + * Therefore notifier chains can only be traversed when either + * + * 1. mmap_sem is held. + * 2. One of the reverse map locks is held (i_mmap_lock or anon_vma->lock). + * 3. No other concurrent thread can access the list (release) + */ +struct mmu_notifier { + struct hlist_node hlist; + const struct mmu_notifier_ops *ops; +}; + +static inline int mm_has_notifiers(struct mm_struct *mm) +{ + return unlikely(mm->mmu_notifier_mm); +} + +extern int mmu_notifier_register(struct mmu_notifier *mn, + struct mm_struct *mm); +extern int __mmu_notifier_register(struct mmu_notifier *mn, + struct mm_struct *mm); +extern void mmu_notifier_unregister(struct mmu_notifier *mn, + struct mm_struct *mm); +extern void __mmu_notifier_mm_destroy(struct mm_struct *mm); +extern void __mmu_notifier_release(struct mm_struct *mm); +extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm, + unsigned long address); +extern void __mmu_notifier_invalidate_page(struct mm_struct *mm, + unsigned long address); +extern void __mmu_notifier_invalidate_range_start(struct mm_struct *mm, + unsigned long start, unsigned long end); +extern void __mmu_notifier_invalidate_range_end(struct mm_struct *mm, + unsigned long start, unsigned long end); + +static inline void mmu_notifier_release(struct mm_struct *mm) +{ + if (mm_has_notifiers(mm)) + __mmu_notifier_release(mm); +} + +static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm, + unsigned long address) +{ + if (mm_has_notifiers(mm)) + return __mmu_notifier_clear_flush_young(mm, address); + return 0; +} + +static inline void mmu_notifier_invalidate_page(struct mm_struct *mm, + unsigned long address) +{ + if (mm_has_notifiers(mm)) + __mmu_notifier_invalidate_page(mm, address); +} + +static inline void mmu_notifier_invalidate_range_start(struct mm_struct *mm, + unsigned long start, unsigned long end) +{ + if (mm_has_notifiers(mm)) + __mmu_notifier_invalidate_range_start(mm, start, end); +} + +static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm, + unsigned long start, unsigned long end) +{ + if (mm_has_notifiers(mm)) + __mmu_notifier_invalidate_range_end(mm, start, end); +} + +static inline void mmu_notifier_mm_init(struct mm_struct *mm) +{ + mm->mmu_notifier_mm = NULL; +} + +static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) +{ + if (mm_has_notifiers(mm)) + __mmu_notifier_mm_destroy(mm); +} + +/* + * These two macros will sometime replace ptep_clear_flush. + * ptep_clear_flush is impleemnted as macro itself, so this also is + * implemented as a macro until ptep_clear_flush will converted to an + * inline function, to diminish the risk of compilation failure. The + * invalidate_page method over time can be moved outside the PT lock + * and these two macros can be later removed. + */ +#define ptep_clear_flush_notify(__vma, __address, __ptep) \ +({ \ + pte_t __pte; \ + struct vm_area_struct *___vma = __vma; \ + unsigned long ___address = __address; \ + __pte = ptep_clear_flush(___vma, ___address, __ptep); \ + mmu_notifier_invalidate_page(___vma->vm_mm, ___address); \ + __pte; \ +}) + +#define ptep_clear_flush_young_notify(__vma, __address, __ptep) \ +({ \ + int __young; \ + struct vm_area_struct *___vma = __vma; \ + unsigned long ___address = __address; \ + __young = ptep_clear_flush_young(___vma, ___address, __ptep); \ + __young |= mmu_notifier_clear_flush_young(___vma->vm_mm, \ + ___address); \ + __young; \ +}) + +#else /* CONFIG_MMU_NOTIFIER */ + +static inline void mmu_notifier_release(struct mm_struct *mm) +{ +} + +static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm, + unsigned long address) +{ + return 0; +} + +static inline void mmu_notifier_invalidate_page(struct mm_struct *mm, + unsigned long address) +{ +} + +static inline void mmu_notifier_invalidate_range_start(struct mm_struct *mm, + unsigned long start, unsigned long end) +{ +} + +static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm, + unsigned long start, unsigned long end) +{ +} + +static inline void mmu_notifier_mm_init(struct mm_struct *mm) +{ +} + +static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) +{ +} + +#define ptep_clear_flush_young_notify ptep_clear_flush_young +#define ptep_clear_flush_notify ptep_clear_flush + +#endif /* CONFIG_MMU_NOTIFIER */ + +#endif /* _LINUX_MMU_NOTIFIER_H */ diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 54590a9a103..25aaccdb2f2 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -239,9 +239,6 @@ static inline void __SetPageUptodate(struct page *page) { smp_wmb(); __set_bit(PG_uptodate, &(page)->flags); -#ifdef CONFIG_S390 - page_clear_dirty(page); -#endif } static inline void SetPageUptodate(struct page *page) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index a81d8189042..69ed3cb1197 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -20,6 +20,7 @@ */ #define AS_EIO (__GFP_BITS_SHIFT + 0) /* IO error on async write */ #define AS_ENOSPC (__GFP_BITS_SHIFT + 1) /* ENOSPC on async write */ +#define AS_MM_ALL_LOCKS (__GFP_BITS_SHIFT + 2) /* under mm_take_all_locks() */ static inline void mapping_set_error(struct address_space *mapping, int error) { @@ -142,6 +143,29 @@ static inline int page_cache_get_speculative(struct page *page) return 1; } +/* + * Same as above, but add instead of inc (could just be merged) + */ +static inline int page_cache_add_speculative(struct page *page, int count) +{ + VM_BUG_ON(in_interrupt()); + +#if !defined(CONFIG_SMP) && defined(CONFIG_CLASSIC_RCU) +# ifdef CONFIG_PREEMPT + VM_BUG_ON(!in_atomic()); +# endif + VM_BUG_ON(page_count(page) == 0); + atomic_add(count, &page->_count); + +#else + if (unlikely(!atomic_add_unless(&page->_count, count, 0))) + return 0; +#endif + VM_BUG_ON(PageCompound(page) && page != compound_head(page)); + + return 1; +} + static inline int page_freeze_refs(struct page *page, int count) { return likely(atomic_cmpxchg(&page->_count, count, 0) == count); diff --git a/include/linux/pci-aspm.h b/include/linux/pci-aspm.h index a1a1e618e99..91ba0b338b4 100644 --- a/include/linux/pci-aspm.h +++ b/include/linux/pci-aspm.h @@ -27,6 +27,7 @@ extern void pcie_aspm_init_link_state(struct pci_dev *pdev); extern void pcie_aspm_exit_link_state(struct pci_dev *pdev); extern void pcie_aspm_pm_state_change(struct pci_dev *pdev); extern void pci_disable_link_state(struct pci_dev *pdev, int state); +extern void pcie_no_aspm(void); #else static inline void pcie_aspm_init_link_state(struct pci_dev *pdev) { @@ -40,6 +41,10 @@ static inline void pcie_aspm_pm_state_change(struct pci_dev *pdev) static inline void pci_disable_link_state(struct pci_dev *pdev, int state) { } + +static inline void pcie_no_aspm(void) +{ +} #endif #ifdef CONFIG_PCIEASPM_DEBUG /* this depends on CONFIG_PCIEASPM */ diff --git a/include/linux/pci.h b/include/linux/pci.h index 1d296d31abe..825be3878f6 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -124,6 +124,8 @@ enum pci_dev_flags { * generation too. */ PCI_DEV_FLAGS_MSI_INTX_DISABLE_BUG = (__force pci_dev_flags_t) 1, + /* Device configuration is irrevocably lost if disabled into D3 */ + PCI_DEV_FLAGS_NO_D3 = (__force pci_dev_flags_t) 2, }; typedef unsigned short __bitwise pci_bus_flags_t; diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h index 19958b92990..450684f7eaa 100644 --- a/include/linux/pci_regs.h +++ b/include/linux/pci_regs.h @@ -374,6 +374,7 @@ #define PCI_EXP_DEVCAP_ATN_BUT 0x1000 /* Attention Button Present */ #define PCI_EXP_DEVCAP_ATN_IND 0x2000 /* Attention Indicator Present */ #define PCI_EXP_DEVCAP_PWR_IND 0x4000 /* Power Indicator Present */ +#define PCI_EXP_DEVCAP_RBER 0x8000 /* Role-Based Error Reporting */ #define PCI_EXP_DEVCAP_PWR_VAL 0x3fc0000 /* Slot Power Limit Value */ #define PCI_EXP_DEVCAP_PWR_SCL 0xc000000 /* Slot Power Limit Scale */ #define PCI_EXP_DEVCTL 8 /* Device Control */ diff --git a/include/linux/rculist.h b/include/linux/rculist.h index b0f39be08b6..eb4443c7e05 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -98,6 +98,34 @@ static inline void list_del_rcu(struct list_head *entry) } /** + * hlist_del_init_rcu - deletes entry from hash list with re-initialization + * @n: the element to delete from the hash list. + * + * Note: list_unhashed() on the node return true after this. It is + * useful for RCU based read lockfree traversal if the writer side + * must know if the list entry is still hashed or already unhashed. + * + * In particular, it means that we can not poison the forward pointers + * that may still be used for walking the hash list and we can only + * zero the pprev pointer so list_unhashed() will return true after + * this. + * + * The caller must take whatever precautions are necessary (such as + * holding appropriate locks) to avoid racing with another + * list-mutation primitive, such as hlist_add_head_rcu() or + * hlist_del_rcu(), running on this same list. However, it is + * perfectly legal to run concurrently with the _rcu list-traversal + * primitives, such as hlist_for_each_entry_rcu(). + */ +static inline void hlist_del_init_rcu(struct hlist_node *n) +{ + if (!hlist_unhashed(n)) { + __hlist_del(n); + n->pprev = NULL; + } +} + +/** * list_replace_rcu - replace old entry by new one * @old : the element to be replaced * @new : the new element to insert diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h index c5f6e54ec6a..741d1a62cc3 100644 --- a/include/linux/rfkill.h +++ b/include/linux/rfkill.h @@ -68,7 +68,8 @@ enum rfkill_state { * @user_claim_unsupported: Whether the hardware supports exclusive * RF-kill control by userspace. Set this before registering. * @user_claim: Set when the switch is controlled exlusively by userspace. - * @mutex: Guards switch state transitions + * @mutex: Guards switch state transitions. It serializes callbacks + * and also protects the state. * @data: Pointer to the RF button drivers private data which will be * passed along when toggling radio state. * @toggle_radio(): Mandatory handler to control state of the radio. @@ -89,12 +90,13 @@ struct rfkill { const char *name; enum rfkill_type type; - enum rfkill_state state; bool user_claim_unsupported; bool user_claim; + /* the mutex serializes callbacks and also protects + * the state */ struct mutex mutex; - + enum rfkill_state state; void *data; int (*toggle_radio)(void *data, enum rfkill_state state); int (*get_state)(void *data, enum rfkill_state *state); diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 1383692ac5b..69407f85e10 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -26,6 +26,14 @@ */ struct anon_vma { spinlock_t lock; /* Serialize access to vma list */ + /* + * NOTE: the LSB of the head.next is set by + * mm_take_all_locks() _after_ taking the above lock. So the + * head must only be read/written after taking the above lock + * to be sure to see a valid next pointer. The LSB bit itself + * is serialized by a system wide lock only visible to + * mm_take_all_locks() (mm_all_locks_mutex). + */ struct list_head head; /* List of private "related" vmas */ }; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 7ea44f6621f..a640385e059 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -316,7 +316,10 @@ struct sk_buff { #ifdef CONFIG_IPV6_NDISC_NODETYPE __u8 ndisc_nodetype:2; #endif - /* 14 bit hole */ +#if defined(CONFIG_MAC80211) || defined(CONFIG_MAC80211_MODULE) + __u8 do_not_encrypt:1; +#endif + /* 0/13/14 bit hole */ #ifdef CONFIG_NET_DMA dma_cookie_t dma_cookie; diff --git a/include/linux/snmp.h b/include/linux/snmp.h index 5df62ef1280..7a6e6bba4a7 100644 --- a/include/linux/snmp.h +++ b/include/linux/snmp.h @@ -214,6 +214,8 @@ enum LINUX_MIB_TCPDSACKIGNOREDOLD, /* TCPSACKIgnoredOld */ LINUX_MIB_TCPDSACKIGNOREDNOUNDO, /* TCPSACKIgnoredNoUndo */ LINUX_MIB_TCPSPURIOUSRTOS, /* TCPSpuriousRTOs */ + LINUX_MIB_TCPMD5NOTFOUND, /* TCPMD5NotFound */ + LINUX_MIB_TCPMD5UNEXPECTED, /* TCPMD5Unexpected */ __LINUX_MIB_MAX }; diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 4dd3d93e196..b52721008be 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -206,8 +206,6 @@ struct ieee80211_bss_conf { * These flags are used with the @flags member of &ieee80211_tx_info. * * @IEEE80211_TX_CTL_REQ_TX_STATUS: request TX status callback for this frame. - * @IEEE80211_TX_CTL_DO_NOT_ENCRYPT: send this frame without encryption; - * e.g., for EAPOL frame * @IEEE80211_TX_CTL_USE_RTS_CTS: use RTS-CTS before sending frame * @IEEE80211_TX_CTL_USE_CTS_PROTECT: use CTS protection for the frame (e.g., * for combined 802.11g / 802.11b networks) @@ -220,7 +218,6 @@ struct ieee80211_bss_conf { * @IEEE80211_TX_CTL_SHORT_PREAMBLE: TBD * @IEEE80211_TX_CTL_LONG_RETRY_LIMIT: this frame should be send using the * through set_retry_limit configured long retry value - * @IEEE80211_TX_CTL_EAPOL_FRAME: internal to mac80211 * @IEEE80211_TX_CTL_SEND_AFTER_DTIM: send this frame after DTIM beacon * @IEEE80211_TX_CTL_AMPDU: this frame should be sent as part of an A-MPDU * @IEEE80211_TX_CTL_OFDM_HT: this frame can be sent in HT OFDM rates. number @@ -253,7 +250,6 @@ struct ieee80211_bss_conf { */ enum mac80211_tx_control_flags { IEEE80211_TX_CTL_REQ_TX_STATUS = BIT(0), - IEEE80211_TX_CTL_DO_NOT_ENCRYPT = BIT(1), IEEE80211_TX_CTL_USE_RTS_CTS = BIT(2), IEEE80211_TX_CTL_USE_CTS_PROTECT = BIT(3), IEEE80211_TX_CTL_NO_ACK = BIT(4), @@ -263,7 +259,6 @@ enum mac80211_tx_control_flags { IEEE80211_TX_CTL_FIRST_FRAGMENT = BIT(8), IEEE80211_TX_CTL_SHORT_PREAMBLE = BIT(9), IEEE80211_TX_CTL_LONG_RETRY_LIMIT = BIT(10), - IEEE80211_TX_CTL_EAPOL_FRAME = BIT(11), IEEE80211_TX_CTL_SEND_AFTER_DTIM = BIT(12), IEEE80211_TX_CTL_AMPDU = BIT(13), IEEE80211_TX_CTL_OFDM_HT = BIT(14), @@ -323,7 +318,6 @@ struct ieee80211_tx_info { struct ieee80211_vif *vif; struct ieee80211_key_conf *hw_key; unsigned long jiffies; - int ifindex; u16 aid; s8 rts_cts_rate_idx, alt_retry_rate_idx; u8 retry_limit; @@ -746,7 +740,6 @@ enum ieee80211_tkip_key_type { * Measurement, Channel Switch, Quieting, TPC */ enum ieee80211_hw_flags { - IEEE80211_HW_HOST_GEN_BEACON_TEMPLATE = 1<<0, IEEE80211_HW_RX_INCLUDES_FCS = 1<<1, IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING = 1<<2, IEEE80211_HW_2GHZ_SHORT_SLOT_INCAPABLE = 1<<3, diff --git a/init/Kconfig b/init/Kconfig index 43d6989c275..250e02c8f8f 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -802,6 +802,10 @@ config PROC_PAGE_MONITOR endmenu # General setup +config HAVE_GENERIC_DMA_COHERENT + bool + default n + config SLABINFO bool depends on PROC_FS diff --git a/init/calibrate.c b/init/calibrate.c index 7963e3fc51d..a379c906119 100644 --- a/init/calibrate.c +++ b/init/calibrate.c @@ -170,7 +170,7 @@ void __cpuinit calibrate_delay(void) loops_per_jiffy &= ~loopbit; } } - printk(KERN_INFO "%lu.%02lu BogoMIPS (lpj=%lu)\n", + printk(KERN_CONT "%lu.%02lu BogoMIPS (lpj=%lu)\n", loops_per_jiffy/(500000/HZ), (loops_per_jiffy/(5000/HZ)) % 100, loops_per_jiffy); } diff --git a/init/main.c b/init/main.c index 20fdc9884b7..9c3b68b86ca 100644 --- a/init/main.c +++ b/init/main.c @@ -635,10 +635,11 @@ asmlinkage void __init start_kernel(void) #ifdef CONFIG_BLK_DEV_INITRD if (initrd_start && !initrd_below_start_ok && - page_to_pfn(virt_to_page(initrd_start)) < min_low_pfn) { + page_to_pfn(virt_to_page((void *)initrd_start)) < min_low_pfn) { printk(KERN_CRIT "initrd overwritten (0x%08lx < 0x%08lx) - " "disabling it.\n", - page_to_pfn(virt_to_page(initrd_start)), min_low_pfn); + page_to_pfn(virt_to_page((void *)initrd_start)), + min_low_pfn); initrd_start = 0; } #endif diff --git a/kernel/Makefile b/kernel/Makefile index 54f69837d35..4e1d7df7c3e 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -84,6 +84,7 @@ obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o obj-$(CONFIG_MARKERS) += marker.o obj-$(CONFIG_LATENCYTOP) += latencytop.o +obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o obj-$(CONFIG_FTRACE) += trace/ obj-$(CONFIG_TRACING) += trace/ obj-$(CONFIG_SMP) += sched_cpupri.o diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 657f8f8d93a..13932abde15 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -355,6 +355,17 @@ static struct css_set *find_existing_css_set( return NULL; } +static void free_cg_links(struct list_head *tmp) +{ + struct cg_cgroup_link *link; + struct cg_cgroup_link *saved_link; + + list_for_each_entry_safe(link, saved_link, tmp, cgrp_link_list) { + list_del(&link->cgrp_link_list); + kfree(link); + } +} + /* * allocate_cg_links() allocates "count" cg_cgroup_link structures * and chains them on tmp through their cgrp_link_list fields. Returns 0 on @@ -363,17 +374,12 @@ static struct css_set *find_existing_css_set( static int allocate_cg_links(int count, struct list_head *tmp) { struct cg_cgroup_link *link; - struct cg_cgroup_link *saved_link; int i; INIT_LIST_HEAD(tmp); for (i = 0; i < count; i++) { link = kmalloc(sizeof(*link), GFP_KERNEL); if (!link) { - list_for_each_entry_safe(link, saved_link, tmp, - cgrp_link_list) { - list_del(&link->cgrp_link_list); - kfree(link); - } + free_cg_links(tmp); return -ENOMEM; } list_add(&link->cgrp_link_list, tmp); @@ -381,17 +387,6 @@ static int allocate_cg_links(int count, struct list_head *tmp) return 0; } -static void free_cg_links(struct list_head *tmp) -{ - struct cg_cgroup_link *link; - struct cg_cgroup_link *saved_link; - - list_for_each_entry_safe(link, saved_link, tmp, cgrp_link_list) { - list_del(&link->cgrp_link_list); - kfree(link); - } -} - /* * find_css_set() takes an existing cgroup group and a * cgroup object, and returns a css_set object that's @@ -956,7 +951,6 @@ static int cgroup_get_sb(struct file_system_type *fs_type, struct super_block *sb; struct cgroupfs_root *root; struct list_head tmp_cg_links; - INIT_LIST_HEAD(&tmp_cg_links); /* First find the desired set of subsystems */ ret = parse_cgroupfs_options(data, &opts); @@ -1424,14 +1418,17 @@ static ssize_t cgroup_write_string(struct cgroup *cgrp, struct cftype *cft, if (buffer == NULL) return -ENOMEM; } - if (nbytes && copy_from_user(buffer, userbuf, nbytes)) - return -EFAULT; + if (nbytes && copy_from_user(buffer, userbuf, nbytes)) { + retval = -EFAULT; + goto out; + } buffer[nbytes] = 0; /* nul-terminate */ strstrip(buffer); retval = cft->write_string(cgrp, cft, buffer); if (!retval) retval = nbytes; +out: if (buffer != local_buffer) kfree(buffer); return retval; @@ -2371,7 +2368,7 @@ static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode) return cgroup_create(c_parent, dentry, mode | S_IFDIR); } -static inline int cgroup_has_css_refs(struct cgroup *cgrp) +static int cgroup_has_css_refs(struct cgroup *cgrp) { /* Check the reference count on each subsystem. Since we * already established that there are no tasks in the diff --git a/kernel/cpu.c b/kernel/cpu.c index 29510d68338..e202a68d1cc 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -455,3 +455,28 @@ out: #endif /* CONFIG_PM_SLEEP_SMP */ #endif /* CONFIG_SMP */ + +/* + * cpu_bit_bitmap[] is a special, "compressed" data structure that + * represents all NR_CPUS bits binary values of 1<<nr. + * + * It is used by cpumask_of_cpu() to get a constant address to a CPU + * mask value that has a single bit set only. + */ + +/* cpu_bit_bitmap[0] is empty - so we can back into it */ +#define MASK_DECLARE_1(x) [x+1][0] = 1UL << (x) +#define MASK_DECLARE_2(x) MASK_DECLARE_1(x), MASK_DECLARE_1(x+1) +#define MASK_DECLARE_4(x) MASK_DECLARE_2(x), MASK_DECLARE_2(x+2) +#define MASK_DECLARE_8(x) MASK_DECLARE_4(x), MASK_DECLARE_4(x+4) + +const unsigned long cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)] = { + + MASK_DECLARE_8(0), MASK_DECLARE_8(8), + MASK_DECLARE_8(16), MASK_DECLARE_8(24), +#if BITS_PER_LONG > 32 + MASK_DECLARE_8(32), MASK_DECLARE_8(40), + MASK_DECLARE_8(48), MASK_DECLARE_8(56), +#endif +}; +EXPORT_SYMBOL_GPL(cpu_bit_bitmap); diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 91cf85b36dd..d5ab79cf516 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -54,7 +54,6 @@ #include <asm/uaccess.h> #include <asm/atomic.h> #include <linux/mutex.h> -#include <linux/kfifo.h> #include <linux/workqueue.h> #include <linux/cgroup.h> @@ -486,13 +485,38 @@ static int cpusets_overlap(struct cpuset *a, struct cpuset *b) static void update_domain_attr(struct sched_domain_attr *dattr, struct cpuset *c) { - if (!dattr) - return; if (dattr->relax_domain_level < c->relax_domain_level) dattr->relax_domain_level = c->relax_domain_level; return; } +static void +update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c) +{ + LIST_HEAD(q); + + list_add(&c->stack_list, &q); + while (!list_empty(&q)) { + struct cpuset *cp; + struct cgroup *cont; + struct cpuset *child; + + cp = list_first_entry(&q, struct cpuset, stack_list); + list_del(q.next); + + if (cpus_empty(cp->cpus_allowed)) + continue; + + if (is_sched_load_balance(cp)) + update_domain_attr(dattr, cp); + + list_for_each_entry(cont, &cp->css.cgroup->children, sibling) { + child = cgroup_cs(cont); + list_add_tail(&child->stack_list, &q); + } + } +} + /* * rebuild_sched_domains() * @@ -532,7 +556,7 @@ update_domain_attr(struct sched_domain_attr *dattr, struct cpuset *c) * So the reverse nesting would risk an ABBA deadlock. * * The three key local variables below are: - * q - a kfifo queue of cpuset pointers, used to implement a + * q - a linked-list queue of cpuset pointers, used to implement a * top-down scan of all cpusets. This scan loads a pointer * to each cpuset marked is_sched_load_balance into the * array 'csa'. For our purposes, rebuilding the schedulers @@ -567,7 +591,7 @@ update_domain_attr(struct sched_domain_attr *dattr, struct cpuset *c) void rebuild_sched_domains(void) { - struct kfifo *q; /* queue of cpusets to be scanned */ + LIST_HEAD(q); /* queue of cpusets to be scanned*/ struct cpuset *cp; /* scans q */ struct cpuset **csa; /* array of all cpuset ptrs */ int csn; /* how many cpuset ptrs in csa so far */ @@ -577,7 +601,6 @@ void rebuild_sched_domains(void) int ndoms; /* number of sched domains in result */ int nslot; /* next empty doms[] cpumask_t slot */ - q = NULL; csa = NULL; doms = NULL; dattr = NULL; @@ -591,35 +614,42 @@ void rebuild_sched_domains(void) dattr = kmalloc(sizeof(struct sched_domain_attr), GFP_KERNEL); if (dattr) { *dattr = SD_ATTR_INIT; - update_domain_attr(dattr, &top_cpuset); + update_domain_attr_tree(dattr, &top_cpuset); } *doms = top_cpuset.cpus_allowed; goto rebuild; } - q = kfifo_alloc(number_of_cpusets * sizeof(cp), GFP_KERNEL, NULL); - if (IS_ERR(q)) - goto done; csa = kmalloc(number_of_cpusets * sizeof(cp), GFP_KERNEL); if (!csa) goto done; csn = 0; - cp = &top_cpuset; - __kfifo_put(q, (void *)&cp, sizeof(cp)); - while (__kfifo_get(q, (void *)&cp, sizeof(cp))) { + list_add(&top_cpuset.stack_list, &q); + while (!list_empty(&q)) { struct cgroup *cont; struct cpuset *child; /* scans child cpusets of cp */ + cp = list_first_entry(&q, struct cpuset, stack_list); + list_del(q.next); + if (cpus_empty(cp->cpus_allowed)) continue; - if (is_sched_load_balance(cp)) + /* + * All child cpusets contain a subset of the parent's cpus, so + * just skip them, and then we call update_domain_attr_tree() + * to calc relax_domain_level of the corresponding sched + * domain. + */ + if (is_sched_load_balance(cp)) { csa[csn++] = cp; + continue; + } list_for_each_entry(cont, &cp->css.cgroup->children, sibling) { child = cgroup_cs(cont); - __kfifo_put(q, (void *)&child, sizeof(cp)); + list_add_tail(&child->stack_list, &q); } } @@ -686,7 +716,7 @@ restart: cpus_or(*dp, *dp, b->cpus_allowed); b->pn = -1; if (dattr) - update_domain_attr(dattr + update_domain_attr_tree(dattr + nslot, b); } } @@ -702,8 +732,6 @@ rebuild: put_online_cpus(); done: - if (q && !IS_ERR(q)) - kfifo_free(q); kfree(csa); /* Don't kfree(doms) -- partition_sched_domains() does that. */ /* Don't kfree(dattr) -- partition_sched_domains() does that. */ @@ -1833,24 +1861,21 @@ static void remove_tasks_in_empty_cpuset(struct cpuset *cs) */ static void scan_for_empty_cpusets(const struct cpuset *root) { + LIST_HEAD(queue); struct cpuset *cp; /* scans cpusets being updated */ struct cpuset *child; /* scans child cpusets of cp */ - struct list_head queue; struct cgroup *cont; nodemask_t oldmems; - INIT_LIST_HEAD(&queue); - list_add_tail((struct list_head *)&root->stack_list, &queue); while (!list_empty(&queue)) { - cp = container_of(queue.next, struct cpuset, stack_list); + cp = list_first_entry(&queue, struct cpuset, stack_list); list_del(queue.next); list_for_each_entry(cont, &cp->css.cgroup->children, sibling) { child = cgroup_cs(cont); list_add_tail(&child->stack_list, &queue); } - cont = cp->css.cgroup; /* Continue past cpusets with all cpus, mems online */ if (cpus_subset(cp->cpus_allowed, cpu_online_map) && diff --git a/kernel/dma-coherent.c b/kernel/dma-coherent.c new file mode 100644 index 00000000000..7517115a8cc --- /dev/null +++ b/kernel/dma-coherent.c @@ -0,0 +1,154 @@ +/* + * Coherent per-device memory handling. + * Borrowed from i386 + */ +#include <linux/kernel.h> +#include <linux/dma-mapping.h> + +struct dma_coherent_mem { + void *virt_base; + u32 device_base; + int size; + int flags; + unsigned long *bitmap; +}; + +int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr, + dma_addr_t device_addr, size_t size, int flags) +{ + void __iomem *mem_base = NULL; + int pages = size >> PAGE_SHIFT; + int bitmap_size = BITS_TO_LONGS(pages) * sizeof(long); + + if ((flags & (DMA_MEMORY_MAP | DMA_MEMORY_IO)) == 0) + goto out; + if (!size) + goto out; + if (dev->dma_mem) + goto out; + + /* FIXME: this routine just ignores DMA_MEMORY_INCLUDES_CHILDREN */ + + mem_base = ioremap(bus_addr, size); + if (!mem_base) + goto out; + + dev->dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL); + if (!dev->dma_mem) + goto out; + dev->dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL); + if (!dev->dma_mem->bitmap) + goto free1_out; + + dev->dma_mem->virt_base = mem_base; + dev->dma_mem->device_base = device_addr; + dev->dma_mem->size = pages; + dev->dma_mem->flags = flags; + + if (flags & DMA_MEMORY_MAP) + return DMA_MEMORY_MAP; + + return DMA_MEMORY_IO; + + free1_out: + kfree(dev->dma_mem); + out: + if (mem_base) + iounmap(mem_base); + return 0; +} +EXPORT_SYMBOL(dma_declare_coherent_memory); + +void dma_release_declared_memory(struct device *dev) +{ + struct dma_coherent_mem *mem = dev->dma_mem; + + if (!mem) + return; + dev->dma_mem = NULL; + iounmap(mem->virt_base); + kfree(mem->bitmap); + kfree(mem); +} +EXPORT_SYMBOL(dma_release_declared_memory); + +void *dma_mark_declared_memory_occupied(struct device *dev, + dma_addr_t device_addr, size_t size) +{ + struct dma_coherent_mem *mem = dev->dma_mem; + int pos, err; + int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1); + + pages >>= PAGE_SHIFT; + + if (!mem) + return ERR_PTR(-EINVAL); + + pos = (device_addr - mem->device_base) >> PAGE_SHIFT; + err = bitmap_allocate_region(mem->bitmap, pos, get_order(pages)); + if (err != 0) + return ERR_PTR(err); + return mem->virt_base + (pos << PAGE_SHIFT); +} +EXPORT_SYMBOL(dma_mark_declared_memory_occupied); + +/** + * Try to allocate memory from the per-device coherent area. + * + * @dev: device from which we allocate memory + * @size: size of requested memory area + * @dma_handle: This will be filled with the correct dma handle + * @ret: This pointer will be filled with the virtual address + * to allocated area. + * + * This function should be only called from per-arch %dma_alloc_coherent() + * to support allocation from per-device coherent memory pools. + * + * Returns 0 if dma_alloc_coherent should continue with allocating from + * generic memory areas, or !0 if dma_alloc_coherent should return %ret. + */ +int dma_alloc_from_coherent(struct device *dev, ssize_t size, + dma_addr_t *dma_handle, void **ret) +{ + struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; + int order = get_order(size); + + if (mem) { + int page = bitmap_find_free_region(mem->bitmap, mem->size, + order); + if (page >= 0) { + *dma_handle = mem->device_base + (page << PAGE_SHIFT); + *ret = mem->virt_base + (page << PAGE_SHIFT); + memset(*ret, 0, size); + } else if (mem->flags & DMA_MEMORY_EXCLUSIVE) + *ret = NULL; + } + return (mem != NULL); +} + +/** + * Try to free the memory allocated from per-device coherent memory pool. + * @dev: device from which the memory was allocated + * @order: the order of pages allocated + * @vaddr: virtual address of allocated pages + * + * This checks whether the memory was allocated from the per-device + * coherent memory pool and if so, releases that memory. + * + * Returns 1 if we correctly released the memory, or 0 if + * %dma_release_coherent() should proceed with releasing memory from + * generic pools. + */ +int dma_release_from_coherent(struct device *dev, int order, void *vaddr) +{ + struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; + + if (mem && vaddr >= mem->virt_base && vaddr < + (mem->virt_base + (mem->size << PAGE_SHIFT))) { + int page = (vaddr - mem->virt_base) >> PAGE_SHIFT; + + bitmap_release_region(mem->bitmap, page, order); + return 1; + } + return 0; +} diff --git a/kernel/fork.c b/kernel/fork.c index 8214ba7c8bb..7ce2ebe8479 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -27,6 +27,7 @@ #include <linux/key.h> #include <linux/binfmts.h> #include <linux/mman.h> +#include <linux/mmu_notifier.h> #include <linux/fs.h> #include <linux/nsproxy.h> #include <linux/capability.h> @@ -414,6 +415,7 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) if (likely(!mm_alloc_pgd(mm))) { mm->def_flags = 0; + mmu_notifier_mm_init(mm); return mm; } @@ -446,6 +448,7 @@ void __mmdrop(struct mm_struct *mm) BUG_ON(mm == &init_mm); mm_free_pgd(mm); destroy_context(mm); + mmu_notifier_mm_destroy(mm); free_mm(mm); } EXPORT_SYMBOL_GPL(__mmdrop); diff --git a/kernel/marker.c b/kernel/marker.c index 971da531790..7d1faecd7a5 100644 --- a/kernel/marker.c +++ b/kernel/marker.c @@ -126,6 +126,11 @@ void marker_probe_cb(const struct marker *mdata, void *call_private, ...) struct marker_probe_closure *multi; int i; /* + * Read mdata->ptype before mdata->multi. + */ + smp_rmb(); + multi = mdata->multi; + /* * multi points to an array, therefore accessing the array * depends on reading multi. However, even in this case, * we must insure that the pointer is read _before_ the array @@ -133,7 +138,6 @@ void marker_probe_cb(const struct marker *mdata, void *call_private, ...) * in the fast path, so put the explicit barrier here. */ smp_read_barrier_depends(); - multi = mdata->multi; for (i = 0; multi[i].func; i++) { va_start(args, call_private); multi[i].func(multi[i].probe_private, call_private, @@ -175,6 +179,11 @@ void marker_probe_cb_noarg(const struct marker *mdata, void *call_private, ...) struct marker_probe_closure *multi; int i; /* + * Read mdata->ptype before mdata->multi. + */ + smp_rmb(); + multi = mdata->multi; + /* * multi points to an array, therefore accessing the array * depends on reading multi. However, even in this case, * we must insure that the pointer is read _before_ the array @@ -182,7 +191,6 @@ void marker_probe_cb_noarg(const struct marker *mdata, void *call_private, ...) * in the fast path, so put the explicit barrier here. */ smp_read_barrier_depends(); - multi = mdata->multi; for (i = 0; multi[i].func; i++) multi[i].func(multi[i].probe_private, call_private, mdata->format, &args); diff --git a/kernel/printk.c b/kernel/printk.c index a7f7559c5f6..b51b1567bb5 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -1309,14 +1309,14 @@ void tty_write_message(struct tty_struct *tty, char *msg) #if defined CONFIG_PRINTK -DEFINE_RATELIMIT_STATE(printk_ratelimit_state, 5 * HZ, 10); /* * printk rate limiting, lifted from the networking subsystem. * - * This enforces a rate limit: not more than one kernel message - * every printk_ratelimit_jiffies to make a denial-of-service - * attack impossible. + * This enforces a rate limit: not more than 10 kernel messages + * every 5s to make a denial-of-service attack impossible. */ +DEFINE_RATELIMIT_STATE(printk_ratelimit_state, 5 * HZ, 10); + int printk_ratelimit(void) { return __ratelimit(&printk_ratelimit_state); diff --git a/kernel/resource.c b/kernel/resource.c index 74af2d7cb5a..f5b518eabef 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -490,7 +490,7 @@ resource_size_t resource_alignment(struct resource *res) { switch (res->flags & (IORESOURCE_SIZEALIGN | IORESOURCE_STARTALIGN)) { case IORESOURCE_SIZEALIGN: - return res->end - res->start + 1; + return resource_size(res); case IORESOURCE_STARTALIGN: return res->start; default: diff --git a/kernel/sched.c b/kernel/sched.c index 0236958addc..21f7da94662 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -7671,34 +7671,34 @@ static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt) } #ifdef CONFIG_SCHED_MC -static ssize_t sched_mc_power_savings_show(struct sys_device *dev, - struct sysdev_attribute *attr, char *page) +static ssize_t sched_mc_power_savings_show(struct sysdev_class *class, + char *page) { return sprintf(page, "%u\n", sched_mc_power_savings); } -static ssize_t sched_mc_power_savings_store(struct sys_device *dev, - struct sysdev_attribute *attr, +static ssize_t sched_mc_power_savings_store(struct sysdev_class *class, const char *buf, size_t count) { return sched_power_savings_store(buf, count, 0); } -static SYSDEV_ATTR(sched_mc_power_savings, 0644, sched_mc_power_savings_show, - sched_mc_power_savings_store); +static SYSDEV_CLASS_ATTR(sched_mc_power_savings, 0644, + sched_mc_power_savings_show, + sched_mc_power_savings_store); #endif #ifdef CONFIG_SCHED_SMT -static ssize_t sched_smt_power_savings_show(struct sys_device *dev, - struct sysdev_attribute *attr, char *page) +static ssize_t sched_smt_power_savings_show(struct sysdev_class *dev, + char *page) { return sprintf(page, "%u\n", sched_smt_power_savings); } -static ssize_t sched_smt_power_savings_store(struct sys_device *dev, - struct sysdev_attribute *attr, +static ssize_t sched_smt_power_savings_store(struct sysdev_class *dev, const char *buf, size_t count) { return sched_power_savings_store(buf, count, 1); } -static SYSDEV_ATTR(sched_smt_power_savings, 0644, sched_smt_power_savings_show, +static SYSDEV_CLASS_ATTR(sched_smt_power_savings, 0644, + sched_smt_power_savings_show, sched_smt_power_savings_store); #endif diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index bf43284d685..80c4336f418 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -196,12 +196,10 @@ static int tick_check_new_device(struct clock_event_device *newdev) struct tick_device *td; int cpu, ret = NOTIFY_OK; unsigned long flags; - cpumask_of_cpu_ptr_declare(cpumask); spin_lock_irqsave(&tick_device_lock, flags); cpu = smp_processor_id(); - cpumask_of_cpu_ptr_next(cpumask, cpu); if (!cpu_isset(cpu, newdev->cpumask)) goto out_bc; @@ -209,7 +207,7 @@ static int tick_check_new_device(struct clock_event_device *newdev) curdev = td->evtdev; /* cpu local device ? */ - if (!cpus_equal(newdev->cpumask, *cpumask)) { + if (!cpus_equal(newdev->cpumask, cpumask_of_cpu(cpu))) { /* * If the cpu affinity of the device interrupt can not @@ -222,7 +220,7 @@ static int tick_check_new_device(struct clock_event_device *newdev) * If we have a cpu local device already, do not replace it * by a non cpu local device */ - if (curdev && cpus_equal(curdev->cpumask, *cpumask)) + if (curdev && cpus_equal(curdev->cpumask, cpumask_of_cpu(cpu))) goto out_bc; } @@ -254,7 +252,7 @@ static int tick_check_new_device(struct clock_event_device *newdev) curdev = NULL; } clockevents_exchange_device(curdev, newdev); - tick_setup_device(td, newdev, cpu, cpumask); + tick_setup_device(td, newdev, cpu, &cpumask_of_cpu(cpu)); if (newdev->features & CLOCK_EVT_FEAT_ONESHOT) tick_oneshot_notify(); diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c index ce2d723c10e..bb948e52ce2 100644 --- a/kernel/trace/trace_sysprof.c +++ b/kernel/trace/trace_sysprof.c @@ -213,9 +213,7 @@ static void start_stack_timers(void) int cpu; for_each_online_cpu(cpu) { - cpumask_of_cpu_ptr(new_mask, cpu); - - set_cpus_allowed_ptr(current, new_mask); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); start_stack_timer(cpu); } set_cpus_allowed_ptr(current, &saved_mask); diff --git a/kernel/workqueue.c b/kernel/workqueue.c index ec7e4f62aaf..4a26a1382df 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -830,10 +830,21 @@ struct workqueue_struct *__create_workqueue_key(const char *name, start_workqueue_thread(cwq, -1); } else { cpu_maps_update_begin(); + /* + * We must place this wq on list even if the code below fails. + * cpu_down(cpu) can remove cpu from cpu_populated_map before + * destroy_workqueue() takes the lock, in that case we leak + * cwq[cpu]->thread. + */ spin_lock(&workqueue_lock); list_add(&wq->list, &workqueues); spin_unlock(&workqueue_lock); - + /* + * We must initialize cwqs for each possible cpu even if we + * are going to call destroy_workqueue() finally. Otherwise + * cpu_up() can hit the uninitialized cwq once we drop the + * lock. + */ for_each_possible_cpu(cpu) { cwq = init_cpu_workqueue(wq, cpu); if (err || !cpu_online(cpu)) diff --git a/lib/ratelimit.c b/lib/ratelimit.c index 35136671b21..26187edcc7e 100644 --- a/lib/ratelimit.c +++ b/lib/ratelimit.c @@ -15,7 +15,6 @@ #include <linux/module.h> static DEFINE_SPINLOCK(ratelimit_lock); -static unsigned long flags; /* * __ratelimit - rate limiting @@ -26,6 +25,8 @@ static unsigned long flags; */ int __ratelimit(struct ratelimit_state *rs) { + unsigned long flags; + if (!rs->interval) return 1; diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c index c4381d9516f..0f8fc22ed10 100644 --- a/lib/smp_processor_id.c +++ b/lib/smp_processor_id.c @@ -11,7 +11,6 @@ notrace unsigned int debug_smp_processor_id(void) { unsigned long preempt_count = preempt_count(); int this_cpu = raw_smp_processor_id(); - cpumask_of_cpu_ptr_declare(this_mask); if (likely(preempt_count)) goto out; @@ -23,9 +22,7 @@ notrace unsigned int debug_smp_processor_id(void) * Kernel threads bound to a single CPU can safely use * smp_processor_id(): */ - cpumask_of_cpu_ptr_next(this_mask, this_cpu); - - if (cpus_equal(current->cpus_allowed, *this_mask)) + if (cpus_equal(current->cpus_allowed, cpumask_of_cpu(this_cpu))) goto out; /* diff --git a/mm/Kconfig b/mm/Kconfig index efee5d379df..446c6588c75 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -208,3 +208,6 @@ config NR_QUICK config VIRT_TO_BUS def_bool y depends on !ARCH_NO_VIRT_TO_BUS + +config MMU_NOTIFIER + bool diff --git a/mm/Makefile b/mm/Makefile index 06ca2381fef..da4ccf015ae 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -25,6 +25,7 @@ obj-$(CONFIG_SHMEM) += shmem.o obj-$(CONFIG_TMPFS_POSIX_ACL) += shmem_acl.o obj-$(CONFIG_TINY_SHMEM) += tiny-shmem.o obj-$(CONFIG_SLOB) += slob.o +obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o obj-$(CONFIG_SLAB) += slab.o obj-$(CONFIG_SLUB) += slub.o obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o diff --git a/mm/filemap.c b/mm/filemap.c index 5de7633e1db..d97d1ad5547 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1023,8 +1023,17 @@ find_page: ra, filp, page, index, last_index - index); } - if (!PageUptodate(page)) - goto page_not_up_to_date; + if (!PageUptodate(page)) { + if (inode->i_blkbits == PAGE_CACHE_SHIFT || + !mapping->a_ops->is_partially_uptodate) + goto page_not_up_to_date; + if (TestSetPageLocked(page)) + goto page_not_up_to_date; + if (!mapping->a_ops->is_partially_uptodate(page, + desc, offset)) + goto page_not_up_to_date_locked; + unlock_page(page); + } page_ok: /* * i_size must be checked after we know the page is Uptodate. @@ -1094,6 +1103,7 @@ page_not_up_to_date: if (lock_page_killable(page)) goto readpage_eio; +page_not_up_to_date_locked: /* Did it get truncated before we got the lock? */ if (!page->mapping) { unlock_page(page); @@ -1869,7 +1879,7 @@ void iov_iter_advance(struct iov_iter *i, size_t bytes) * The !iov->iov_len check ensures we skip over unlikely * zero-length segments (without overruning the iovec). */ - while (bytes || unlikely(!iov->iov_len && i->count)) { + while (bytes || unlikely(i->count && !iov->iov_len)) { int copy; copy = min(bytes, iov->iov_len - base); diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c index 98a3f31ccd6..380ab402d71 100644 --- a/mm/filemap_xip.c +++ b/mm/filemap_xip.c @@ -13,6 +13,7 @@ #include <linux/module.h> #include <linux/uio.h> #include <linux/rmap.h> +#include <linux/mmu_notifier.h> #include <linux/sched.h> #include <asm/tlbflush.h> #include <asm/io.h> @@ -188,7 +189,7 @@ __xip_unmap (struct address_space * mapping, if (pte) { /* Nuke the page table entry. */ flush_cache_page(vma, address, pte_pfn(*pte)); - pteval = ptep_clear_flush(vma, address, pte); + pteval = ptep_clear_flush_notify(vma, address, pte); page_remove_rmap(page, vma); dec_mm_counter(mm, file_rss); BUG_ON(pte_dirty(pteval)); diff --git a/mm/fremap.c b/mm/fremap.c index 07a9c82ce1a..7881638e4a1 100644 --- a/mm/fremap.c +++ b/mm/fremap.c @@ -15,6 +15,7 @@ #include <linux/rmap.h> #include <linux/module.h> #include <linux/syscalls.h> +#include <linux/mmu_notifier.h> #include <asm/mmu_context.h> #include <asm/cacheflush.h> @@ -214,7 +215,9 @@ asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size, spin_unlock(&mapping->i_mmap_lock); } + mmu_notifier_invalidate_range_start(mm, start, start + size); err = populate_range(mm, vma, start, size, pgoff); + mmu_notifier_invalidate_range_end(mm, start, start + size); if (!err && !(flags & MAP_NONBLOCK)) { if (unlikely(has_write_lock)) { downgrade_write(&mm->mmap_sem); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index b3c78640b62..d237a02eb22 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -9,6 +9,7 @@ #include <linux/mm.h> #include <linux/sysctl.h> #include <linux/highmem.h> +#include <linux/mmu_notifier.h> #include <linux/nodemask.h> #include <linux/pagemap.h> #include <linux/mempolicy.h> @@ -19,6 +20,7 @@ #include <asm/io.h> #include <asm/page.h> #include <asm/pgtable.h> +#include <asm/io.h> #include <linux/hugetlb.h> #include "internal.h" @@ -1672,6 +1674,7 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, BUG_ON(start & ~huge_page_mask(h)); BUG_ON(end & ~huge_page_mask(h)); + mmu_notifier_invalidate_range_start(mm, start, end); spin_lock(&mm->page_table_lock); for (address = start; address < end; address += sz) { ptep = huge_pte_offset(mm, address); @@ -1713,6 +1716,7 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, } spin_unlock(&mm->page_table_lock); flush_tlb_range(vma, start, end); + mmu_notifier_invalidate_range_end(mm, start, end); list_for_each_entry_safe(page, tmp, &page_list, lru) { list_del(&page->lru); put_page(page); diff --git a/mm/madvise.c b/mm/madvise.c index 23a0ec3e0ea..f9349c18a1b 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -132,10 +132,10 @@ static long madvise_willneed(struct vm_area_struct * vma, * Application no longer needs these pages. If the pages are dirty, * it's OK to just throw them away. The app will be more careful about * data it wants to keep. Be sure to free swap resources too. The - * zap_page_range call sets things up for refill_inactive to actually free + * zap_page_range call sets things up for shrink_active_list to actually free * these pages later if no one else has touched them in the meantime, * although we could add these pages to a global reuse list for - * refill_inactive to pick up before reclaiming other pages. + * shrink_active_list to pick up before reclaiming other pages. * * NB: This interface discards data rather than pushes it out to swap, * as some implementations do. This has performance implications for diff --git a/mm/memcontrol.c b/mm/memcontrol.c index fba566c5132..7056c3bdb47 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1168,9 +1168,6 @@ static void mem_cgroup_move_task(struct cgroup_subsys *ss, mem = mem_cgroup_from_cont(cont); old_mem = mem_cgroup_from_cont(old_cont); - if (mem == old_mem) - goto out; - /* * Only thread group leaders are allowed to migrate, the mm_struct is * in effect owned by the leader diff --git a/mm/memory.c b/mm/memory.c index a8ca04faaea..0e4eea10c7b 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -51,6 +51,7 @@ #include <linux/init.h> #include <linux/writeback.h> #include <linux/memcontrol.h> +#include <linux/mmu_notifier.h> #include <asm/pgalloc.h> #include <asm/uaccess.h> @@ -652,6 +653,7 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, unsigned long next; unsigned long addr = vma->vm_start; unsigned long end = vma->vm_end; + int ret; /* * Don't copy ptes where a page fault will fill them correctly. @@ -667,17 +669,33 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, if (is_vm_hugetlb_page(vma)) return copy_hugetlb_page_range(dst_mm, src_mm, vma); + /* + * We need to invalidate the secondary MMU mappings only when + * there could be a permission downgrade on the ptes of the + * parent mm. And a permission downgrade will only happen if + * is_cow_mapping() returns true. + */ + if (is_cow_mapping(vma->vm_flags)) + mmu_notifier_invalidate_range_start(src_mm, addr, end); + + ret = 0; dst_pgd = pgd_offset(dst_mm, addr); src_pgd = pgd_offset(src_mm, addr); do { next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(src_pgd)) continue; - if (copy_pud_range(dst_mm, src_mm, dst_pgd, src_pgd, - vma, addr, next)) - return -ENOMEM; + if (unlikely(copy_pud_range(dst_mm, src_mm, dst_pgd, src_pgd, + vma, addr, next))) { + ret = -ENOMEM; + break; + } } while (dst_pgd++, src_pgd++, addr = next, addr != end); - return 0; + + if (is_cow_mapping(vma->vm_flags)) + mmu_notifier_invalidate_range_end(src_mm, + vma->vm_start, end); + return ret; } static unsigned long zap_pte_range(struct mmu_gather *tlb, @@ -881,7 +899,9 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp, unsigned long start = start_addr; spinlock_t *i_mmap_lock = details? details->i_mmap_lock: NULL; int fullmm = (*tlbp)->fullmm; + struct mm_struct *mm = vma->vm_mm; + mmu_notifier_invalidate_range_start(mm, start_addr, end_addr); for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next) { unsigned long end; @@ -946,6 +966,7 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp, } } out: + mmu_notifier_invalidate_range_end(mm, start_addr, end_addr); return start; /* which is now the end (or restart) address */ } @@ -972,6 +993,30 @@ unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address, tlb_finish_mmu(tlb, address, end); return end; } +EXPORT_SYMBOL_GPL(zap_page_range); + +/** + * zap_vma_ptes - remove ptes mapping the vma + * @vma: vm_area_struct holding ptes to be zapped + * @address: starting address of pages to zap + * @size: number of bytes to zap + * + * This function only unmaps ptes assigned to VM_PFNMAP vmas. + * + * The entire address range must be fully contained within the vma. + * + * Returns 0 if successful. + */ +int zap_vma_ptes(struct vm_area_struct *vma, unsigned long address, + unsigned long size) +{ + if (address < vma->vm_start || address + size > vma->vm_end || + !(vma->vm_flags & VM_PFNMAP)) + return -1; + zap_page_range(vma, address, size, NULL); + return 0; +} +EXPORT_SYMBOL_GPL(zap_vma_ptes); /* * Do a quick page-table lookup for a single page. @@ -1066,6 +1111,7 @@ no_page_table: } return page; } +EXPORT_SYMBOL_GPL(follow_page); /* Can we do the FOLL_ANON optimization? */ static inline int use_zero_page(struct vm_area_struct *vma) @@ -1616,10 +1662,11 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr, { pgd_t *pgd; unsigned long next; - unsigned long end = addr + size; + unsigned long start = addr, end = addr + size; int err; BUG_ON(addr >= end); + mmu_notifier_invalidate_range_start(mm, start, end); pgd = pgd_offset(mm, addr); do { next = pgd_addr_end(addr, end); @@ -1627,6 +1674,7 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr, if (err) break; } while (pgd++, addr = next, addr != end); + mmu_notifier_invalidate_range_end(mm, start, end); return err; } EXPORT_SYMBOL_GPL(apply_to_page_range); @@ -1839,7 +1887,7 @@ gotten: * seen in the presence of one thread doing SMC and another * thread doing COW. */ - ptep_clear_flush(vma, address, page_table); + ptep_clear_flush_notify(vma, address, page_table); set_pte_at(mm, address, page_table, entry); update_mmu_cache(vma, address, entry); lru_cache_add_active(new_page); diff --git a/mm/mmap.c b/mm/mmap.c index 5e0cc99e9cd..245c3d69067 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -26,6 +26,7 @@ #include <linux/mount.h> #include <linux/mempolicy.h> #include <linux/rmap.h> +#include <linux/mmu_notifier.h> #include <asm/uaccess.h> #include <asm/cacheflush.h> @@ -2061,6 +2062,7 @@ void exit_mmap(struct mm_struct *mm) /* mm's last user has gone, and its about to be pulled down */ arch_exit_mmap(mm); + mmu_notifier_release(mm); lru_add_drain(); flush_cache_mm(mm); @@ -2268,3 +2270,161 @@ int install_special_mapping(struct mm_struct *mm, return 0; } + +static DEFINE_MUTEX(mm_all_locks_mutex); + +static void vm_lock_anon_vma(struct anon_vma *anon_vma) +{ + if (!test_bit(0, (unsigned long *) &anon_vma->head.next)) { + /* + * The LSB of head.next can't change from under us + * because we hold the mm_all_locks_mutex. + */ + spin_lock(&anon_vma->lock); + /* + * We can safely modify head.next after taking the + * anon_vma->lock. If some other vma in this mm shares + * the same anon_vma we won't take it again. + * + * No need of atomic instructions here, head.next + * can't change from under us thanks to the + * anon_vma->lock. + */ + if (__test_and_set_bit(0, (unsigned long *) + &anon_vma->head.next)) + BUG(); + } +} + +static void vm_lock_mapping(struct address_space *mapping) +{ + if (!test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) { + /* + * AS_MM_ALL_LOCKS can't change from under us because + * we hold the mm_all_locks_mutex. + * + * Operations on ->flags have to be atomic because + * even if AS_MM_ALL_LOCKS is stable thanks to the + * mm_all_locks_mutex, there may be other cpus + * changing other bitflags in parallel to us. + */ + if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags)) + BUG(); + spin_lock(&mapping->i_mmap_lock); + } +} + +/* + * This operation locks against the VM for all pte/vma/mm related + * operations that could ever happen on a certain mm. This includes + * vmtruncate, try_to_unmap, and all page faults. + * + * The caller must take the mmap_sem in write mode before calling + * mm_take_all_locks(). The caller isn't allowed to release the + * mmap_sem until mm_drop_all_locks() returns. + * + * mmap_sem in write mode is required in order to block all operations + * that could modify pagetables and free pages without need of + * altering the vma layout (for example populate_range() with + * nonlinear vmas). It's also needed in write mode to avoid new + * anon_vmas to be associated with existing vmas. + * + * A single task can't take more than one mm_take_all_locks() in a row + * or it would deadlock. + * + * The LSB in anon_vma->head.next and the AS_MM_ALL_LOCKS bitflag in + * mapping->flags avoid to take the same lock twice, if more than one + * vma in this mm is backed by the same anon_vma or address_space. + * + * We can take all the locks in random order because the VM code + * taking i_mmap_lock or anon_vma->lock outside the mmap_sem never + * takes more than one of them in a row. Secondly we're protected + * against a concurrent mm_take_all_locks() by the mm_all_locks_mutex. + * + * mm_take_all_locks() and mm_drop_all_locks are expensive operations + * that may have to take thousand of locks. + * + * mm_take_all_locks() can fail if it's interrupted by signals. + */ +int mm_take_all_locks(struct mm_struct *mm) +{ + struct vm_area_struct *vma; + int ret = -EINTR; + + BUG_ON(down_read_trylock(&mm->mmap_sem)); + + mutex_lock(&mm_all_locks_mutex); + + for (vma = mm->mmap; vma; vma = vma->vm_next) { + if (signal_pending(current)) + goto out_unlock; + if (vma->anon_vma) + vm_lock_anon_vma(vma->anon_vma); + if (vma->vm_file && vma->vm_file->f_mapping) + vm_lock_mapping(vma->vm_file->f_mapping); + } + ret = 0; + +out_unlock: + if (ret) + mm_drop_all_locks(mm); + + return ret; +} + +static void vm_unlock_anon_vma(struct anon_vma *anon_vma) +{ + if (test_bit(0, (unsigned long *) &anon_vma->head.next)) { + /* + * The LSB of head.next can't change to 0 from under + * us because we hold the mm_all_locks_mutex. + * + * We must however clear the bitflag before unlocking + * the vma so the users using the anon_vma->head will + * never see our bitflag. + * + * No need of atomic instructions here, head.next + * can't change from under us until we release the + * anon_vma->lock. + */ + if (!__test_and_clear_bit(0, (unsigned long *) + &anon_vma->head.next)) + BUG(); + spin_unlock(&anon_vma->lock); + } +} + +static void vm_unlock_mapping(struct address_space *mapping) +{ + if (test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) { + /* + * AS_MM_ALL_LOCKS can't change to 0 from under us + * because we hold the mm_all_locks_mutex. + */ + spin_unlock(&mapping->i_mmap_lock); + if (!test_and_clear_bit(AS_MM_ALL_LOCKS, + &mapping->flags)) + BUG(); + } +} + +/* + * The mmap_sem cannot be released by the caller until + * mm_drop_all_locks() returns. + */ +void mm_drop_all_locks(struct mm_struct *mm) +{ + struct vm_area_struct *vma; + + BUG_ON(down_read_trylock(&mm->mmap_sem)); + BUG_ON(!mutex_is_locked(&mm_all_locks_mutex)); + + for (vma = mm->mmap; vma; vma = vma->vm_next) { + if (vma->anon_vma) + vm_unlock_anon_vma(vma->anon_vma); + if (vma->vm_file && vma->vm_file->f_mapping) + vm_unlock_mapping(vma->vm_file->f_mapping); + } + + mutex_unlock(&mm_all_locks_mutex); +} diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c new file mode 100644 index 00000000000..5f4ef0250be --- /dev/null +++ b/mm/mmu_notifier.c @@ -0,0 +1,277 @@ +/* + * linux/mm/mmu_notifier.c + * + * Copyright (C) 2008 Qumranet, Inc. + * Copyright (C) 2008 SGI + * Christoph Lameter <clameter@sgi.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#include <linux/rculist.h> +#include <linux/mmu_notifier.h> +#include <linux/module.h> +#include <linux/mm.h> +#include <linux/err.h> +#include <linux/rcupdate.h> +#include <linux/sched.h> + +/* + * This function can't run concurrently against mmu_notifier_register + * because mm->mm_users > 0 during mmu_notifier_register and exit_mmap + * runs with mm_users == 0. Other tasks may still invoke mmu notifiers + * in parallel despite there being no task using this mm any more, + * through the vmas outside of the exit_mmap context, such as with + * vmtruncate. This serializes against mmu_notifier_unregister with + * the mmu_notifier_mm->lock in addition to RCU and it serializes + * against the other mmu notifiers with RCU. struct mmu_notifier_mm + * can't go away from under us as exit_mmap holds an mm_count pin + * itself. + */ +void __mmu_notifier_release(struct mm_struct *mm) +{ + struct mmu_notifier *mn; + + spin_lock(&mm->mmu_notifier_mm->lock); + while (unlikely(!hlist_empty(&mm->mmu_notifier_mm->list))) { + mn = hlist_entry(mm->mmu_notifier_mm->list.first, + struct mmu_notifier, + hlist); + /* + * We arrived before mmu_notifier_unregister so + * mmu_notifier_unregister will do nothing other than + * to wait ->release to finish and + * mmu_notifier_unregister to return. + */ + hlist_del_init_rcu(&mn->hlist); + /* + * RCU here will block mmu_notifier_unregister until + * ->release returns. + */ + rcu_read_lock(); + spin_unlock(&mm->mmu_notifier_mm->lock); + /* + * if ->release runs before mmu_notifier_unregister it + * must be handled as it's the only way for the driver + * to flush all existing sptes and stop the driver + * from establishing any more sptes before all the + * pages in the mm are freed. + */ + if (mn->ops->release) + mn->ops->release(mn, mm); + rcu_read_unlock(); + spin_lock(&mm->mmu_notifier_mm->lock); + } + spin_unlock(&mm->mmu_notifier_mm->lock); + + /* + * synchronize_rcu here prevents mmu_notifier_release to + * return to exit_mmap (which would proceed freeing all pages + * in the mm) until the ->release method returns, if it was + * invoked by mmu_notifier_unregister. + * + * The mmu_notifier_mm can't go away from under us because one + * mm_count is hold by exit_mmap. + */ + synchronize_rcu(); +} + +/* + * If no young bitflag is supported by the hardware, ->clear_flush_young can + * unmap the address and return 1 or 0 depending if the mapping previously + * existed or not. + */ +int __mmu_notifier_clear_flush_young(struct mm_struct *mm, + unsigned long address) +{ + struct mmu_notifier *mn; + struct hlist_node *n; + int young = 0; + + rcu_read_lock(); + hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) { + if (mn->ops->clear_flush_young) + young |= mn->ops->clear_flush_young(mn, mm, address); + } + rcu_read_unlock(); + + return young; +} + +void __mmu_notifier_invalidate_page(struct mm_struct *mm, + unsigned long address) +{ + struct mmu_notifier *mn; + struct hlist_node *n; + + rcu_read_lock(); + hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) { + if (mn->ops->invalidate_page) + mn->ops->invalidate_page(mn, mm, address); + } + rcu_read_unlock(); +} + +void __mmu_notifier_invalidate_range_start(struct mm_struct *mm, + unsigned long start, unsigned long end) +{ + struct mmu_notifier *mn; + struct hlist_node *n; + + rcu_read_lock(); + hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) { + if (mn->ops->invalidate_range_start) + mn->ops->invalidate_range_start(mn, mm, start, end); + } + rcu_read_unlock(); +} + +void __mmu_notifier_invalidate_range_end(struct mm_struct *mm, + unsigned long start, unsigned long end) +{ + struct mmu_notifier *mn; + struct hlist_node *n; + + rcu_read_lock(); + hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) { + if (mn->ops->invalidate_range_end) + mn->ops->invalidate_range_end(mn, mm, start, end); + } + rcu_read_unlock(); +} + +static int do_mmu_notifier_register(struct mmu_notifier *mn, + struct mm_struct *mm, + int take_mmap_sem) +{ + struct mmu_notifier_mm *mmu_notifier_mm; + int ret; + + BUG_ON(atomic_read(&mm->mm_users) <= 0); + + ret = -ENOMEM; + mmu_notifier_mm = kmalloc(sizeof(struct mmu_notifier_mm), GFP_KERNEL); + if (unlikely(!mmu_notifier_mm)) + goto out; + + if (take_mmap_sem) + down_write(&mm->mmap_sem); + ret = mm_take_all_locks(mm); + if (unlikely(ret)) + goto out_cleanup; + + if (!mm_has_notifiers(mm)) { + INIT_HLIST_HEAD(&mmu_notifier_mm->list); + spin_lock_init(&mmu_notifier_mm->lock); + mm->mmu_notifier_mm = mmu_notifier_mm; + mmu_notifier_mm = NULL; + } + atomic_inc(&mm->mm_count); + + /* + * Serialize the update against mmu_notifier_unregister. A + * side note: mmu_notifier_release can't run concurrently with + * us because we hold the mm_users pin (either implicitly as + * current->mm or explicitly with get_task_mm() or similar). + * We can't race against any other mmu notifier method either + * thanks to mm_take_all_locks(). + */ + spin_lock(&mm->mmu_notifier_mm->lock); + hlist_add_head(&mn->hlist, &mm->mmu_notifier_mm->list); + spin_unlock(&mm->mmu_notifier_mm->lock); + + mm_drop_all_locks(mm); +out_cleanup: + if (take_mmap_sem) + up_write(&mm->mmap_sem); + /* kfree() does nothing if mmu_notifier_mm is NULL */ + kfree(mmu_notifier_mm); +out: + BUG_ON(atomic_read(&mm->mm_users) <= 0); + return ret; +} + +/* + * Must not hold mmap_sem nor any other VM related lock when calling + * this registration function. Must also ensure mm_users can't go down + * to zero while this runs to avoid races with mmu_notifier_release, + * so mm has to be current->mm or the mm should be pinned safely such + * as with get_task_mm(). If the mm is not current->mm, the mm_users + * pin should be released by calling mmput after mmu_notifier_register + * returns. mmu_notifier_unregister must be always called to + * unregister the notifier. mm_count is automatically pinned to allow + * mmu_notifier_unregister to safely run at any time later, before or + * after exit_mmap. ->release will always be called before exit_mmap + * frees the pages. + */ +int mmu_notifier_register(struct mmu_notifier *mn, struct mm_struct *mm) +{ + return do_mmu_notifier_register(mn, mm, 1); +} +EXPORT_SYMBOL_GPL(mmu_notifier_register); + +/* + * Same as mmu_notifier_register but here the caller must hold the + * mmap_sem in write mode. + */ +int __mmu_notifier_register(struct mmu_notifier *mn, struct mm_struct *mm) +{ + return do_mmu_notifier_register(mn, mm, 0); +} +EXPORT_SYMBOL_GPL(__mmu_notifier_register); + +/* this is called after the last mmu_notifier_unregister() returned */ +void __mmu_notifier_mm_destroy(struct mm_struct *mm) +{ + BUG_ON(!hlist_empty(&mm->mmu_notifier_mm->list)); + kfree(mm->mmu_notifier_mm); + mm->mmu_notifier_mm = LIST_POISON1; /* debug */ +} + +/* + * This releases the mm_count pin automatically and frees the mm + * structure if it was the last user of it. It serializes against + * running mmu notifiers with RCU and against mmu_notifier_unregister + * with the unregister lock + RCU. All sptes must be dropped before + * calling mmu_notifier_unregister. ->release or any other notifier + * method may be invoked concurrently with mmu_notifier_unregister, + * and only after mmu_notifier_unregister returned we're guaranteed + * that ->release or any other method can't run anymore. + */ +void mmu_notifier_unregister(struct mmu_notifier *mn, struct mm_struct *mm) +{ + BUG_ON(atomic_read(&mm->mm_count) <= 0); + + spin_lock(&mm->mmu_notifier_mm->lock); + if (!hlist_unhashed(&mn->hlist)) { + hlist_del_rcu(&mn->hlist); + + /* + * RCU here will force exit_mmap to wait ->release to finish + * before freeing the pages. + */ + rcu_read_lock(); + spin_unlock(&mm->mmu_notifier_mm->lock); + /* + * exit_mmap will block in mmu_notifier_release to + * guarantee ->release is called before freeing the + * pages. + */ + if (mn->ops->release) + mn->ops->release(mn, mm); + rcu_read_unlock(); + } else + spin_unlock(&mm->mmu_notifier_mm->lock); + + /* + * Wait any running method to finish, of course including + * ->release if it was run by mmu_notifier_relase instead of us. + */ + synchronize_rcu(); + + BUG_ON(atomic_read(&mm->mm_count) <= 0); + + mmdrop(mm); +} +EXPORT_SYMBOL_GPL(mmu_notifier_unregister); diff --git a/mm/mprotect.c b/mm/mprotect.c index abd645a3b0a..fded06f923f 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -21,6 +21,7 @@ #include <linux/syscalls.h> #include <linux/swap.h> #include <linux/swapops.h> +#include <linux/mmu_notifier.h> #include <asm/uaccess.h> #include <asm/pgtable.h> #include <asm/cacheflush.h> @@ -203,10 +204,12 @@ success: dirty_accountable = 1; } + mmu_notifier_invalidate_range_start(mm, start, end); if (is_vm_hugetlb_page(vma)) hugetlb_change_protection(vma, start, end, vma->vm_page_prot); else change_protection(vma, start, end, vma->vm_page_prot, dirty_accountable); + mmu_notifier_invalidate_range_end(mm, start, end); vm_stat_account(mm, oldflags, vma->vm_file, -nrpages); vm_stat_account(mm, newflags, vma->vm_file, nrpages); return 0; diff --git a/mm/mremap.c b/mm/mremap.c index 08e3c7f2bd1..1a7743923c8 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -18,6 +18,7 @@ #include <linux/highmem.h> #include <linux/security.h> #include <linux/syscalls.h> +#include <linux/mmu_notifier.h> #include <asm/uaccess.h> #include <asm/cacheflush.h> @@ -74,7 +75,11 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, struct mm_struct *mm = vma->vm_mm; pte_t *old_pte, *new_pte, pte; spinlock_t *old_ptl, *new_ptl; + unsigned long old_start; + old_start = old_addr; + mmu_notifier_invalidate_range_start(vma->vm_mm, + old_start, old_end); if (vma->vm_file) { /* * Subtle point from Rajesh Venkatasubramanian: before @@ -116,6 +121,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, pte_unmap_unlock(old_pte - 1, old_ptl); if (mapping) spin_unlock(&mapping->i_mmap_lock); + mmu_notifier_invalidate_range_end(vma->vm_mm, old_start, old_end); } #define LATENCY_LIMIT (64 * PAGE_SIZE) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 3cf3d05b6bd..401d104d2bb 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3753,23 +3753,6 @@ unsigned long __init find_min_pfn_with_active_regions(void) return find_min_pfn_for_node(MAX_NUMNODES); } -/** - * find_max_pfn_with_active_regions - Find the maximum PFN registered - * - * It returns the maximum PFN based on information provided via - * add_active_range(). - */ -unsigned long __init find_max_pfn_with_active_regions(void) -{ - int i; - unsigned long max_pfn = 0; - - for (i = 0; i < nr_nodemap_entries; i++) - max_pfn = max(max_pfn, early_node_map[i].end_pfn); - - return max_pfn; -} - /* * early_calculate_totalpages() * Sum pages in active regions for movable zone. diff --git a/mm/rmap.c b/mm/rmap.c index 39ae5a9bf38..94a5246a3f9 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -49,6 +49,7 @@ #include <linux/module.h> #include <linux/kallsyms.h> #include <linux/memcontrol.h> +#include <linux/mmu_notifier.h> #include <asm/tlbflush.h> @@ -287,7 +288,7 @@ static int page_referenced_one(struct page *page, if (vma->vm_flags & VM_LOCKED) { referenced++; *mapcount = 1; /* break early from loop */ - } else if (ptep_clear_flush_young(vma, address, pte)) + } else if (ptep_clear_flush_young_notify(vma, address, pte)) referenced++; /* Pretend the page is referenced if the task has the @@ -457,7 +458,7 @@ static int page_mkclean_one(struct page *page, struct vm_area_struct *vma) pte_t entry; flush_cache_page(vma, address, pte_pfn(*pte)); - entry = ptep_clear_flush(vma, address, pte); + entry = ptep_clear_flush_notify(vma, address, pte); entry = pte_wrprotect(entry); entry = pte_mkclean(entry); set_pte_at(mm, address, pte, entry); @@ -666,7 +667,8 @@ void page_remove_rmap(struct page *page, struct vm_area_struct *vma) * Leaving it set also helps swapoff to reinstate ptes * faster for those pages still in swapcache. */ - if (page_test_dirty(page)) { + if ((!PageAnon(page) || PageSwapCache(page)) && + page_test_dirty(page)) { page_clear_dirty(page); set_page_dirty(page); } @@ -705,14 +707,14 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, * skipped over this mm) then we should reactivate it. */ if (!migration && ((vma->vm_flags & VM_LOCKED) || - (ptep_clear_flush_young(vma, address, pte)))) { + (ptep_clear_flush_young_notify(vma, address, pte)))) { ret = SWAP_FAIL; goto out_unmap; } /* Nuke the page table entry. */ flush_cache_page(vma, address, page_to_pfn(page)); - pteval = ptep_clear_flush(vma, address, pte); + pteval = ptep_clear_flush_notify(vma, address, pte); /* Move the dirty bit to the physical page now the pte is gone. */ if (pte_dirty(pteval)) @@ -837,12 +839,12 @@ static void try_to_unmap_cluster(unsigned long cursor, page = vm_normal_page(vma, address, *pte); BUG_ON(!page || PageAnon(page)); - if (ptep_clear_flush_young(vma, address, pte)) + if (ptep_clear_flush_young_notify(vma, address, pte)) continue; /* Nuke the page table entry. */ flush_cache_page(vma, address, pte_pfn(*pte)); - pteval = ptep_clear_flush(vma, address, pte); + pteval = ptep_clear_flush_notify(vma, address, pte); /* If nonlinear, store the file page offset in the pte. */ if (page->index != linear_page_index(vma, address)) diff --git a/mm/shmem.c b/mm/shmem.c index 952d361774b..c1e5a3b4f75 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1513,7 +1513,6 @@ shmem_get_inode(struct super_block *sb, int mode, dev_t dev) inode->i_uid = current->fsuid; inode->i_gid = current->fsgid; inode->i_blocks = 0; - inode->i_mapping->a_ops = &shmem_aops; inode->i_mapping->backing_dev_info = &shmem_backing_dev_info; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; inode->i_generation = get_seconds(); @@ -1528,6 +1527,7 @@ shmem_get_inode(struct super_block *sb, int mode, dev_t dev) init_special_inode(inode, mode, dev); break; case S_IFREG: + inode->i_mapping->a_ops = &shmem_aops; inode->i_op = &shmem_inode_operations; inode->i_fop = &shmem_file_operations; mpol_shared_policy_init(&info->policy, @@ -1929,6 +1929,7 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s return error; } unlock_page(page); + inode->i_mapping->a_ops = &shmem_aops; inode->i_op = &shmem_symlink_inode_operations; kaddr = kmap_atomic(page, KM_USER0); memcpy(kaddr, symname, len); diff --git a/mm/swap.c b/mm/swap.c index dd89234ee51..7417a2adbe5 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -278,9 +278,10 @@ int lru_add_drain_all(void) * Avoid taking zone->lru_lock if possible, but if it is taken, retain it * for the remainder of the operation. * - * The locking in this function is against shrink_cache(): we recheck the - * page count inside the lock to see whether shrink_cache grabbed the page - * via the LRU. If it did, give up: shrink_cache will free it. + * The locking in this function is against shrink_inactive_list(): we recheck + * the page count inside the lock to see whether shrink_inactive_list() + * grabbed the page via the LRU. If it did, give up: shrink_inactive_list() + * will free it. */ void release_pages(struct page **pages, int nr, int cold) { diff --git a/mm/swapfile.c b/mm/swapfile.c index 6beb6251e99..bb7f79641f9 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -656,8 +656,8 @@ static int unuse_mm(struct mm_struct *mm, if (!down_read_trylock(&mm->mmap_sem)) { /* - * Activate page so shrink_cache is unlikely to unmap its - * ptes while lock is dropped, so swapoff can make progress. + * Activate page so shrink_inactive_list is unlikely to unmap + * its ptes while lock is dropped, so swapoff can make progress. */ activate_page(page); unlock_page(page); diff --git a/mm/vmscan.c b/mm/vmscan.c index 8f71761bc4b..75be453628b 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1408,7 +1408,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, if (sc->nr_scanned && priority < DEF_PRIORITY - 2) congestion_wait(WRITE, HZ/10); } - /* top priority shrink_caches still had more to do? don't OOM, then */ + /* top priority shrink_zones still had more to do? don't OOM, then */ if (!sc->all_unreclaimable && scan_global_lru(sc)) ret = nr_reclaimed; out: @@ -1979,7 +1979,7 @@ module_init(kswapd_init) int zone_reclaim_mode __read_mostly; #define RECLAIM_OFF 0 -#define RECLAIM_ZONE (1<<0) /* Run shrink_cache on the zone */ +#define RECLAIM_ZONE (1<<0) /* Run shrink_inactive_list on the zone */ #define RECLAIM_WRITE (1<<1) /* Writeout pages during reclaim */ #define RECLAIM_SWAP (1<<2) /* Swap pages out during reclaim */ diff --git a/net/Kconfig b/net/Kconfig index b9866875174..7612cc8c337 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -2,9 +2,7 @@ # Network configuration # -menu "Networking" - -config NET +menuconfig NET bool "Networking support" ---help--- Unless you really know what you are doing, you should say Y here. @@ -22,7 +20,6 @@ config NET recommended to read the NET-HOWTO, available from <http://www.tldp.org/docs.html#howto>. -# Make sure that all config symbols are dependent on NET if NET menu "Networking options" @@ -252,5 +249,3 @@ source "net/rfkill/Kconfig" source "net/9p/Kconfig" endif # if NET -endmenu # Networking - diff --git a/net/core/dev.c b/net/core/dev.c index 8d13a9b9f1d..63d6bcddbf4 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2100,7 +2100,7 @@ static int ing_filter(struct sk_buff *skb) rxq = &dev->rx_queue; q = rxq->qdisc; - if (q) { + if (q != &noop_qdisc) { spin_lock(qdisc_lock(q)); result = qdisc_enqueue_root(skb, q); spin_unlock(qdisc_lock(q)); @@ -2113,7 +2113,7 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, struct net_device *orig_dev) { - if (!skb->dev->rx_queue.qdisc) + if (skb->dev->rx_queue.qdisc == &noop_qdisc) goto out; if (*pt_prev) { diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 4e0c9227418..84640172d65 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -485,6 +485,9 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb) C(head); C(data); C(truesize); +#if defined(CONFIG_MAC80211) || defined(CONFIG_MAC80211_MODULE) + C(do_not_encrypt); +#endif atomic_set(&n->users, 1); atomic_inc(&(skb_shinfo(skb)->dataref)); diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 834356ea99d..8f5a403f6f6 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -232,6 +232,8 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPDSACKIgnoredOld", LINUX_MIB_TCPDSACKIGNOREDOLD), SNMP_MIB_ITEM("TCPDSACKIgnoredNoUndo", LINUX_MIB_TCPDSACKIGNOREDNOUNDO), SNMP_MIB_ITEM("TCPSpuriousRTOs", LINUX_MIB_TCPSPURIOUSRTOS), + SNMP_MIB_ITEM("TCPMD5NotFound", LINUX_MIB_TCPMD5NOTFOUND), + SNMP_MIB_ITEM("TCPMD5Unexpected", LINUX_MIB_TCPMD5UNEXPECTED), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a2b06d0cc26..b3875c0d83c 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1116,18 +1116,12 @@ static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb) return 0; if (hash_expected && !hash_location) { - LIMIT_NETDEBUG(KERN_INFO "MD5 Hash expected but NOT found " - "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)\n", - NIPQUAD(iph->saddr), ntohs(th->source), - NIPQUAD(iph->daddr), ntohs(th->dest)); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND); return 1; } if (!hash_expected && hash_location) { - LIMIT_NETDEBUG(KERN_INFO "MD5 Hash NOT expected but found " - "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)\n", - NIPQUAD(iph->saddr), ntohs(th->source), - NIPQUAD(iph->daddr), ntohs(th->dest)); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED); return 1; } diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index f7b535dec86..410046a8cc9 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -732,7 +732,7 @@ int datagram_send_ctl(struct net *net, LIMIT_NETDEBUG(KERN_DEBUG "invalid cmsg type: %d\n", cmsg->cmsg_type); err = -EINVAL; - break; + goto exit_f; } } diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index f82f6074cf8..0179b66864f 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -286,7 +286,6 @@ proc_net_fail: void ipv6_misc_proc_exit(void) { - proc_net_remove(&init_net, "sockstat6"); proc_net_remove(&init_net, "dev_snmp6"); proc_net_remove(&init_net, "snmp6"); unregister_pernet_subsys(&ipv6_proc_ops); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index cff778b23a7..1db45216b23 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -849,28 +849,17 @@ static int tcp_v6_inbound_md5_hash (struct sock *sk, struct sk_buff *skb) hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr); hash_location = tcp_parse_md5sig_option(th); - /* do we have a hash as expected? */ - if (!hash_expected) { - if (!hash_location) - return 0; - if (net_ratelimit()) { - printk(KERN_INFO "MD5 Hash NOT expected but found " - "(" NIP6_FMT ", %u)->" - "(" NIP6_FMT ", %u)\n", - NIP6(ip6h->saddr), ntohs(th->source), - NIP6(ip6h->daddr), ntohs(th->dest)); - } + /* We've parsed the options - do we have a hash? */ + if (!hash_expected && !hash_location) + return 0; + + if (hash_expected && !hash_location) { + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND); return 1; } - if (!hash_location) { - if (net_ratelimit()) { - printk(KERN_INFO "MD5 Hash expected but NOT found " - "(" NIP6_FMT ", %u)->" - "(" NIP6_FMT ", %u)\n", - NIP6(ip6h->saddr), ntohs(th->source), - NIP6(ip6h->daddr), ntohs(th->dest)); - } + if (!hash_expected && hash_location) { + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED); return 1; } diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 8e7ba0e62cf..297c257864c 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -81,6 +81,7 @@ static int ieee80211_change_iface(struct wiphy *wiphy, int ifindex, enum nl80211_iftype type, u32 *flags, struct vif_params *params) { + struct ieee80211_local *local = wiphy_priv(wiphy); struct net_device *dev; enum ieee80211_if_types itype; struct ieee80211_sub_if_data *sdata; @@ -95,6 +96,9 @@ static int ieee80211_change_iface(struct wiphy *wiphy, int ifindex, if (itype == IEEE80211_IF_TYPE_INVALID) return -EINVAL; + if (dev == local->mdev) + return -EOPNOTSUPP; + sdata = IEEE80211_DEV_TO_SUB_IF(dev); ret = ieee80211_if_change_type(sdata, itype); @@ -117,12 +121,16 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, u8 key_idx, u8 *mac_addr, struct key_params *params) { + struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata; struct sta_info *sta = NULL; enum ieee80211_key_alg alg; struct ieee80211_key *key; int err; + if (dev == local->mdev) + return -EOPNOTSUPP; + sdata = IEEE80211_DEV_TO_SUB_IF(dev); switch (params->cipher) { @@ -167,10 +175,14 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev, u8 key_idx, u8 *mac_addr) { + struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata; struct sta_info *sta; int ret; + if (dev == local->mdev) + return -EOPNOTSUPP; + sdata = IEEE80211_DEV_TO_SUB_IF(dev); rcu_read_lock(); @@ -211,7 +223,8 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, void (*callback)(void *cookie, struct key_params *params)) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = wiphy_priv(wiphy); + struct ieee80211_sub_if_data *sdata; struct sta_info *sta = NULL; u8 seq[6] = {0}; struct key_params params; @@ -220,6 +233,11 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, u16 iv16; int err = -ENOENT; + if (dev == local->mdev) + return -EOPNOTSUPP; + + sdata = IEEE80211_DEV_TO_SUB_IF(dev); + rcu_read_lock(); if (mac_addr) { @@ -293,8 +311,12 @@ static int ieee80211_config_default_key(struct wiphy *wiphy, struct net_device *dev, u8 key_idx) { + struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata; + if (dev == local->mdev) + return -EOPNOTSUPP; + rcu_read_lock(); sdata = IEEE80211_DEV_TO_SUB_IF(dev); @@ -475,9 +497,15 @@ static int ieee80211_config_beacon(struct ieee80211_sub_if_data *sdata, static int ieee80211_add_beacon(struct wiphy *wiphy, struct net_device *dev, struct beacon_parameters *params) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = wiphy_priv(wiphy); + struct ieee80211_sub_if_data *sdata; struct beacon_data *old; + if (dev == local->mdev) + return -EOPNOTSUPP; + + sdata = IEEE80211_DEV_TO_SUB_IF(dev); + if (sdata->vif.type != IEEE80211_IF_TYPE_AP) return -EINVAL; @@ -492,9 +520,15 @@ static int ieee80211_add_beacon(struct wiphy *wiphy, struct net_device *dev, static int ieee80211_set_beacon(struct wiphy *wiphy, struct net_device *dev, struct beacon_parameters *params) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = wiphy_priv(wiphy); + struct ieee80211_sub_if_data *sdata; struct beacon_data *old; + if (dev == local->mdev) + return -EOPNOTSUPP; + + sdata = IEEE80211_DEV_TO_SUB_IF(dev); + if (sdata->vif.type != IEEE80211_IF_TYPE_AP) return -EINVAL; @@ -508,9 +542,15 @@ static int ieee80211_set_beacon(struct wiphy *wiphy, struct net_device *dev, static int ieee80211_del_beacon(struct wiphy *wiphy, struct net_device *dev) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = wiphy_priv(wiphy); + struct ieee80211_sub_if_data *sdata; struct beacon_data *old; + if (dev == local->mdev) + return -EOPNOTSUPP; + + sdata = IEEE80211_DEV_TO_SUB_IF(dev); + if (sdata->vif.type != IEEE80211_IF_TYPE_AP) return -EINVAL; @@ -646,11 +686,14 @@ static void sta_apply_parameters(struct ieee80211_local *local, static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, u8 *mac, struct station_parameters *params) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_local *local = wiphy_priv(wiphy); struct sta_info *sta; struct ieee80211_sub_if_data *sdata; int err; + if (dev == local->mdev || params->vlan == local->mdev) + return -EOPNOTSUPP; + /* Prevent a race with changing the rate control algorithm */ if (!netif_running(dev)) return -ENETDOWN; @@ -701,10 +744,15 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, static int ieee80211_del_station(struct wiphy *wiphy, struct net_device *dev, u8 *mac) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - struct ieee80211_local *local = sdata->local; + struct ieee80211_local *local = wiphy_priv(wiphy); + struct ieee80211_sub_if_data *sdata; struct sta_info *sta; + if (dev == local->mdev) + return -EOPNOTSUPP; + + sdata = IEEE80211_DEV_TO_SUB_IF(dev); + if (mac) { rcu_read_lock(); @@ -730,10 +778,13 @@ static int ieee80211_change_station(struct wiphy *wiphy, u8 *mac, struct station_parameters *params) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_local *local = wiphy_priv(wiphy); struct sta_info *sta; struct ieee80211_sub_if_data *vlansdata; + if (dev == local->mdev || params->vlan == local->mdev) + return -EOPNOTSUPP; + rcu_read_lock(); /* XXX: get sta belonging to dev */ @@ -752,7 +803,7 @@ static int ieee80211_change_station(struct wiphy *wiphy, return -EINVAL; } - sta->sdata = IEEE80211_DEV_TO_SUB_IF(params->vlan); + sta->sdata = vlansdata; ieee80211_send_layer2_update(sta); } @@ -767,15 +818,20 @@ static int ieee80211_change_station(struct wiphy *wiphy, static int ieee80211_add_mpath(struct wiphy *wiphy, struct net_device *dev, u8 *dst, u8 *next_hop) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = wiphy_priv(wiphy); + struct ieee80211_sub_if_data *sdata; struct mesh_path *mpath; struct sta_info *sta; int err; + if (dev == local->mdev) + return -EOPNOTSUPP; + if (!netif_running(dev)) return -ENETDOWN; + sdata = IEEE80211_DEV_TO_SUB_IF(dev); + if (sdata->vif.type != IEEE80211_IF_TYPE_MESH_POINT) return -ENOTSUPP; @@ -817,14 +873,19 @@ static int ieee80211_change_mpath(struct wiphy *wiphy, struct net_device *dev, u8 *dst, u8 *next_hop) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = wiphy_priv(wiphy); + struct ieee80211_sub_if_data *sdata; struct mesh_path *mpath; struct sta_info *sta; + if (dev == local->mdev) + return -EOPNOTSUPP; + if (!netif_running(dev)) return -ENETDOWN; + sdata = IEEE80211_DEV_TO_SUB_IF(dev); + if (sdata->vif.type != IEEE80211_IF_TYPE_MESH_POINT) return -ENOTSUPP; @@ -891,9 +952,15 @@ static int ieee80211_get_mpath(struct wiphy *wiphy, struct net_device *dev, u8 *dst, u8 *next_hop, struct mpath_info *pinfo) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = wiphy_priv(wiphy); + struct ieee80211_sub_if_data *sdata; struct mesh_path *mpath; + if (dev == local->mdev) + return -EOPNOTSUPP; + + sdata = IEEE80211_DEV_TO_SUB_IF(dev); + if (sdata->vif.type != IEEE80211_IF_TYPE_MESH_POINT) return -ENOTSUPP; @@ -913,9 +980,15 @@ static int ieee80211_dump_mpath(struct wiphy *wiphy, struct net_device *dev, int idx, u8 *dst, u8 *next_hop, struct mpath_info *pinfo) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = wiphy_priv(wiphy); + struct ieee80211_sub_if_data *sdata; struct mesh_path *mpath; + if (dev == local->mdev) + return -EOPNOTSUPP; + + sdata = IEEE80211_DEV_TO_SUB_IF(dev); + if (sdata->vif.type != IEEE80211_IF_TYPE_MESH_POINT) return -ENOTSUPP; diff --git a/net/mac80211/main.c b/net/mac80211/main.c index f1a83d450ea..a4c5b90de76 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -1233,18 +1233,12 @@ static void ieee80211_tasklet_handler(unsigned long data) /* Remove added headers (e.g., QoS control), encryption header/MIC, etc. to * make a prepared TX frame (one that has been given to hw) to look like brand * new IEEE 802.11 frame that is ready to go through TX processing again. - * Also, tx_packet_data in cb is restored from tx_control. */ + */ static void ieee80211_remove_tx_extra(struct ieee80211_local *local, struct ieee80211_key *key, struct sk_buff *skb) { int hdrlen, iv_len, mic_len; - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - - info->flags &= IEEE80211_TX_CTL_REQ_TX_STATUS | - IEEE80211_TX_CTL_DO_NOT_ENCRYPT | - IEEE80211_TX_CTL_REQUEUE | - IEEE80211_TX_CTL_EAPOL_FRAME; hdrlen = ieee80211_get_hdrlen_from_skb(skb); @@ -1731,8 +1725,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) result = ieee80211_wep_init(local); if (result < 0) { - printk(KERN_DEBUG "%s: Failed to initialize wep\n", - wiphy_name(local->hw.wiphy)); + printk(KERN_DEBUG "%s: Failed to initialize wep: %d\n", + wiphy_name(local->hw.wiphy), result); goto fail_wep; } diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index d7c371e36bf..acb04133a95 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -606,7 +606,6 @@ void ieee80211_sta_tx(struct net_device *dev, struct sk_buff *skb, int encrypt) { struct ieee80211_sub_if_data *sdata; - struct ieee80211_tx_info *info; sdata = IEEE80211_DEV_TO_SUB_IF(dev); skb->dev = sdata->local->mdev; @@ -614,11 +613,8 @@ void ieee80211_sta_tx(struct net_device *dev, struct sk_buff *skb, skb_set_network_header(skb, 0); skb_set_transport_header(skb, 0); - info = IEEE80211_SKB_CB(skb); - memset(info, 0, sizeof(struct ieee80211_tx_info)); - info->control.ifindex = sdata->dev->ifindex; - if (!encrypt) - info->flags |= IEEE80211_TX_CTL_DO_NOT_ENCRYPT; + skb->iif = sdata->dev->ifindex; + skb->do_not_encrypt = !encrypt; dev_queue_xmit(skb); } @@ -3303,6 +3299,7 @@ void ieee80211_start_mesh(struct net_device *dev) ifsta = &sdata->u.sta; ifsta->state = IEEE80211_MESH_UP; ieee80211_sta_timer((unsigned long)sdata); + ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON); } #endif diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 0fbadd8b983..69019e94387 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -305,7 +305,7 @@ static void purge_old_ps_buffers(struct ieee80211_local *local) rcu_read_unlock(); local->total_ps_buffered = total; -#ifdef MAC80211_VERBOSE_PS_DEBUG +#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG printk(KERN_DEBUG "%s: PS buffers full - purged %d frames\n", wiphy_name(local->hw.wiphy), purged); #endif @@ -342,7 +342,7 @@ ieee80211_tx_h_multicast_ps_buf(struct ieee80211_tx_data *tx) purge_old_ps_buffers(tx->local); if (skb_queue_len(&tx->sdata->bss->ps_bc_buf) >= AP_MAX_BC_BUFFER) { -#ifdef MAC80211_VERBOSE_PS_DEBUG +#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG if (net_ratelimit()) { printk(KERN_DEBUG "%s: BC TX buffer full - " "dropping the oldest frame\n", @@ -389,7 +389,7 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx) purge_old_ps_buffers(tx->local); if (skb_queue_len(&sta->ps_tx_buf) >= STA_MAX_TX_BUFFER) { struct sk_buff *old = skb_dequeue(&sta->ps_tx_buf); -#ifdef MAC80211_VERBOSE_PS_DEBUG +#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG if (net_ratelimit()) { printk(KERN_DEBUG "%s: STA %s TX " "buffer full - dropping oldest frame\n", @@ -439,14 +439,14 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx) struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); u16 fc = tx->fc; - if (unlikely(info->flags & IEEE80211_TX_CTL_DO_NOT_ENCRYPT)) + if (unlikely(tx->skb->do_not_encrypt)) tx->key = NULL; else if (tx->sta && (key = rcu_dereference(tx->sta->key))) tx->key = key; else if ((key = rcu_dereference(tx->sdata->default_key))) tx->key = key; else if (tx->sdata->drop_unencrypted && - !(info->flags & IEEE80211_TX_CTL_EAPOL_FRAME) && + (tx->skb->protocol != cpu_to_be16(ETH_P_PAE)) && !(info->flags & IEEE80211_TX_CTL_INJECTED)) { I802_DEBUG_INC(tx->local->tx_handlers_drop_unencrypted); return TX_DROP; @@ -476,7 +476,7 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx) } if (!tx->key || !(tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)) - info->flags |= IEEE80211_TX_CTL_DO_NOT_ENCRYPT; + tx->skb->do_not_encrypt = 1; return TX_CONTINUE; } @@ -732,6 +732,7 @@ ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx) memcpy(skb_put(frag, copylen), pos, copylen); memcpy(frag->cb, first->cb, sizeof(frag->cb)); skb_copy_queue_mapping(frag, first); + frag->do_not_encrypt = first->do_not_encrypt; pos += copylen; left -= copylen; @@ -852,7 +853,7 @@ __ieee80211_parse_tx_radiotap(struct ieee80211_tx_data *tx, sband = tx->local->hw.wiphy->bands[tx->channel->band]; - info->flags |= IEEE80211_TX_CTL_DO_NOT_ENCRYPT; + skb->do_not_encrypt = 1; info->flags |= IEEE80211_TX_CTL_INJECTED; tx->flags &= ~IEEE80211_TX_FRAGMENTED; @@ -925,8 +926,7 @@ __ieee80211_parse_tx_radiotap(struct ieee80211_tx_data *tx, skb_trim(skb, skb->len - FCS_LEN); } if (*iterator.this_arg & IEEE80211_RADIOTAP_F_WEP) - info->flags &= - ~IEEE80211_TX_CTL_DO_NOT_ENCRYPT; + tx->skb->do_not_encrypt = 0; if (*iterator.this_arg & IEEE80211_RADIOTAP_F_FRAG) tx->flags |= IEEE80211_TX_FRAGMENTED; break; @@ -1042,10 +1042,9 @@ static int ieee80211_tx_prepare(struct ieee80211_tx_data *tx, struct sk_buff *skb, struct net_device *mdev) { - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct net_device *dev; - dev = dev_get_by_index(&init_net, info->control.ifindex); + dev = dev_get_by_index(&init_net, skb->iif); if (unlikely(dev && !is_ieee80211_device(dev, mdev))) { dev_put(dev); dev = NULL; @@ -1306,8 +1305,8 @@ int ieee80211_master_start_xmit(struct sk_buff *skb, bool may_encrypt; int ret; - if (info->control.ifindex) - odev = dev_get_by_index(&init_net, info->control.ifindex); + if (skb->iif) + odev = dev_get_by_index(&init_net, skb->iif); if (unlikely(odev && !is_ieee80211_device(odev, dev))) { dev_put(odev); odev = NULL; @@ -1321,9 +1320,13 @@ int ieee80211_master_start_xmit(struct sk_buff *skb, return 0; } + memset(info, 0, sizeof(*info)); + + info->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS; + osdata = IEEE80211_DEV_TO_SUB_IF(odev); - may_encrypt = !(info->flags & IEEE80211_TX_CTL_DO_NOT_ENCRYPT); + may_encrypt = !skb->do_not_encrypt; headroom = osdata->local->tx_headroom; if (may_encrypt) @@ -1348,7 +1351,6 @@ int ieee80211_monitor_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct ieee80211_radiotap_header *prthdr = (struct ieee80211_radiotap_header *)skb->data; u16 len_rthdr; @@ -1371,11 +1373,11 @@ int ieee80211_monitor_start_xmit(struct sk_buff *skb, skb->dev = local->mdev; /* needed because we set skb device to master */ - info->control.ifindex = dev->ifindex; + skb->iif = dev->ifindex; - info->flags |= IEEE80211_TX_CTL_DO_NOT_ENCRYPT; - /* Interfaces should always request a status report */ - info->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS; + /* sometimes we do encrypt injected frames, will be fixed + * up in radiotap parser if not wanted */ + skb->do_not_encrypt = 0; /* * fix up the pointers accounting for the radiotap @@ -1419,7 +1421,6 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_tx_info *info; struct ieee80211_sub_if_data *sdata; int ret = 1, head_need; u16 ethertype, hdrlen, meshhdrlen = 0; @@ -1645,14 +1646,7 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb, nh_pos += hdrlen; h_pos += hdrlen; - info = IEEE80211_SKB_CB(skb); - memset(info, 0, sizeof(*info)); - info->control.ifindex = dev->ifindex; - if (ethertype == ETH_P_PAE) - info->flags |= IEEE80211_TX_CTL_EAPOL_FRAME; - - /* Interfaces should always request a status report */ - info->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS; + skb->iif = dev->ifindex; skb->dev = local->mdev; dev->stats.tx_packets++; @@ -1922,6 +1916,8 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, info = IEEE80211_SKB_CB(skb); + skb->do_not_encrypt = 1; + info->band = band; rate_control_get_rate(local->mdev, sband, skb, &rsel); @@ -1931,7 +1927,7 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, "no rate found\n", wiphy_name(local->hw.wiphy)); } - dev_kfree_skb(skb); + dev_kfree_skb_any(skb); skb = NULL; goto out; } @@ -1940,7 +1936,6 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, info->tx_rate_idx = rsel.rate_idx; info->flags |= IEEE80211_TX_CTL_NO_ACK; - info->flags |= IEEE80211_TX_CTL_DO_NOT_ENCRYPT; info->flags |= IEEE80211_TX_CTL_CLEAR_PS_FILT; info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ; if (sdata->bss_conf.use_short_preamble && diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c index 872d2fcd1a5..5c2bf0a3d4d 100644 --- a/net/mac80211/wep.c +++ b/net/mac80211/wep.c @@ -31,13 +31,13 @@ int ieee80211_wep_init(struct ieee80211_local *local) local->wep_tx_tfm = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(local->wep_tx_tfm)) - return -ENOMEM; + return PTR_ERR(local->wep_tx_tfm); local->wep_rx_tfm = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(local->wep_rx_tfm)) { crypto_free_blkcipher(local->wep_tx_tfm); - return -ENOMEM; + return PTR_ERR(local->wep_rx_tfm); } return 0; diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c index 07edda0b8a5..28437f0001d 100644 --- a/net/mac80211/wme.c +++ b/net/mac80211/wme.c @@ -188,6 +188,9 @@ int ieee80211_ht_agg_queue_add(struct ieee80211_local *local, { int i; + /* XXX: currently broken due to cb/requeue use */ + return -EPERM; + /* prepare the filter and save it for the SW queue * matching the received HW queue */ diff --git a/net/rfkill/rfkill.c b/net/rfkill/rfkill.c index 7a560b78509..c6f2f388cb7 100644 --- a/net/rfkill/rfkill.c +++ b/net/rfkill/rfkill.c @@ -130,7 +130,6 @@ static void update_rfkill_state(struct rfkill *rfkill) /** * rfkill_toggle_radio - wrapper for toggle_radio hook - * * @rfkill: the rfkill struct to use * @force: calls toggle_radio even if cache says it is not needed, * and also makes sure notifications of the state will be @@ -141,8 +140,8 @@ static void update_rfkill_state(struct rfkill *rfkill) * calls and handling all the red tape such as issuing notifications * if the call is successful. * - * Note that @force cannot override a (possibly cached) state of - * RFKILL_STATE_HARD_BLOCKED. Any device making use of + * Note that the @force parameter cannot override a (possibly cached) + * state of RFKILL_STATE_HARD_BLOCKED. Any device making use of * RFKILL_STATE_HARD_BLOCKED implements either get_state() or * rfkill_force_state(), so the cache either is bypassed or valid. * @@ -150,7 +149,7 @@ static void update_rfkill_state(struct rfkill *rfkill) * even if the radio is in RFKILL_STATE_HARD_BLOCKED state, so as to * give the driver a hint that it should double-BLOCK the transmitter. * - * Caller must have aquired rfkill_mutex. + * Caller must have acquired rfkill->mutex. */ static int rfkill_toggle_radio(struct rfkill *rfkill, enum rfkill_state state, @@ -200,12 +199,12 @@ static int rfkill_toggle_radio(struct rfkill *rfkill, /** * rfkill_switch_all - Toggle state of all switches of given type - * @type: type of interfaces to be affeceted + * @type: type of interfaces to be affected * @state: the new state * - * This function toggles state of all switches of given type unless - * a specific switch is claimed by userspace in which case it is - * left alone. + * This function toggles the state of all switches of given type, + * unless a specific switch is claimed by userspace (in which case, + * that switch is left alone). */ void rfkill_switch_all(enum rfkill_type type, enum rfkill_state state) { @@ -216,8 +215,11 @@ void rfkill_switch_all(enum rfkill_type type, enum rfkill_state state) rfkill_states[type] = state; list_for_each_entry(rfkill, &rfkill_list, node) { - if ((!rfkill->user_claim) && (rfkill->type == type)) + if ((!rfkill->user_claim) && (rfkill->type == type)) { + mutex_lock(&rfkill->mutex); rfkill_toggle_radio(rfkill, state, 0); + mutex_unlock(&rfkill->mutex); + } } mutex_unlock(&rfkill_mutex); @@ -228,7 +230,7 @@ EXPORT_SYMBOL(rfkill_switch_all); * rfkill_epo - emergency power off all transmitters * * This kicks all rfkill devices to RFKILL_STATE_SOFT_BLOCKED, ignoring - * everything in its path but rfkill_mutex. + * everything in its path but rfkill_mutex and rfkill->mutex. */ void rfkill_epo(void) { @@ -236,7 +238,9 @@ void rfkill_epo(void) mutex_lock(&rfkill_mutex); list_for_each_entry(rfkill, &rfkill_list, node) { + mutex_lock(&rfkill->mutex); rfkill_toggle_radio(rfkill, RFKILL_STATE_SOFT_BLOCKED, 1); + mutex_unlock(&rfkill->mutex); } mutex_unlock(&rfkill_mutex); } @@ -252,7 +256,12 @@ EXPORT_SYMBOL_GPL(rfkill_epo); * a notification by the firmware/hardware of the current *real* * state of the radio rfkill switch. * - * It may not be called from an atomic context. + * Devices which are subject to external changes on their rfkill + * state (such as those caused by a hardware rfkill line) MUST + * have their driver arrange to call rfkill_force_state() as soon + * as possible after such a change. + * + * This function may not be called from an atomic context. */ int rfkill_force_state(struct rfkill *rfkill, enum rfkill_state state) { @@ -367,6 +376,9 @@ static ssize_t rfkill_claim_store(struct device *dev, if (!capable(CAP_NET_ADMIN)) return -EPERM; + if (rfkill->user_claim_unsupported) + return -EOPNOTSUPP; + /* * Take the global lock to make sure the kernel is not in * the middle of rfkill_switch_all @@ -375,19 +387,17 @@ static ssize_t rfkill_claim_store(struct device *dev, if (error) return error; - if (rfkill->user_claim_unsupported) { - error = -EOPNOTSUPP; - goto out_unlock; - } if (rfkill->user_claim != claim) { - if (!claim) + if (!claim) { + mutex_lock(&rfkill->mutex); rfkill_toggle_radio(rfkill, rfkill_states[rfkill->type], 0); + mutex_unlock(&rfkill->mutex); + } rfkill->user_claim = claim; } -out_unlock: mutex_unlock(&rfkill_mutex); return error ? error : count; @@ -516,8 +526,11 @@ static void rfkill_remove_switch(struct rfkill *rfkill) { mutex_lock(&rfkill_mutex); list_del_init(&rfkill->node); - rfkill_toggle_radio(rfkill, RFKILL_STATE_SOFT_BLOCKED, 1); mutex_unlock(&rfkill_mutex); + + mutex_lock(&rfkill->mutex); + rfkill_toggle_radio(rfkill, RFKILL_STATE_SOFT_BLOCKED, 1); + mutex_unlock(&rfkill->mutex); } /** @@ -526,9 +539,10 @@ static void rfkill_remove_switch(struct rfkill *rfkill) * @type: type of the switch (RFKILL_TYPE_*) * * This function should be called by the network driver when it needs - * rfkill structure. Once the structure is allocated the driver shoud - * finish its initialization by setting name, private data, enable_radio + * rfkill structure. Once the structure is allocated the driver should + * finish its initialization by setting the name, private data, enable_radio * and disable_radio methods and then register it with rfkill_register(). + * * NOTE: If registration fails the structure shoudl be freed by calling * rfkill_free() otherwise rfkill_unregister() should be used. */ @@ -560,7 +574,7 @@ EXPORT_SYMBOL(rfkill_allocate); * rfkill_free - Mark rfkill structure for deletion * @rfkill: rfkill structure to be destroyed * - * Decrements reference count of rfkill structure so it is destroyed. + * Decrements reference count of the rfkill structure so it is destroyed. * Note that rfkill_free() should _not_ be called after rfkill_unregister(). */ void rfkill_free(struct rfkill *rfkill) @@ -585,8 +599,10 @@ static void rfkill_led_trigger_register(struct rfkill *rfkill) static void rfkill_led_trigger_unregister(struct rfkill *rfkill) { #ifdef CONFIG_RFKILL_LEDS - if (rfkill->led_trigger.name) + if (rfkill->led_trigger.name) { led_trigger_unregister(&rfkill->led_trigger); + rfkill->led_trigger.name = NULL; + } #endif } @@ -622,8 +638,8 @@ int rfkill_register(struct rfkill *rfkill) error = device_add(dev); if (error) { - rfkill_led_trigger_unregister(rfkill); rfkill_remove_switch(rfkill); + rfkill_led_trigger_unregister(rfkill); return error; } diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index b0601642e22..4840aff4725 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -572,44 +572,21 @@ static u32 qdisc_alloc_handle(struct net_device *dev) static struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue, struct Qdisc *qdisc) { + struct Qdisc *oqdisc = dev_queue->qdisc_sleeping; spinlock_t *root_lock; - struct Qdisc *oqdisc; - int ingress; - - ingress = 0; - if (qdisc && qdisc->flags&TCQ_F_INGRESS) - ingress = 1; - - if (ingress) { - oqdisc = dev_queue->qdisc; - } else { - oqdisc = dev_queue->qdisc_sleeping; - } root_lock = qdisc_root_lock(oqdisc); spin_lock_bh(root_lock); - if (ingress) { - /* Prune old scheduler */ - if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1) { - /* delete */ - qdisc_reset(oqdisc); - dev_queue->qdisc = NULL; - } else { /* new */ - dev_queue->qdisc = qdisc; - } + /* Prune old scheduler */ + if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1) + qdisc_reset(oqdisc); - } else { - /* Prune old scheduler */ - if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1) - qdisc_reset(oqdisc); - - /* ... and graft new one */ - if (qdisc == NULL) - qdisc = &noop_qdisc; - dev_queue->qdisc_sleeping = qdisc; - dev_queue->qdisc = &noop_qdisc; - } + /* ... and graft new one */ + if (qdisc == NULL) + qdisc = &noop_qdisc; + dev_queue->qdisc_sleeping = qdisc; + dev_queue->qdisc = &noop_qdisc; spin_unlock_bh(root_lock); @@ -678,7 +655,8 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, ingress = 0; num_q = dev->num_tx_queues; - if (q && q->flags & TCQ_F_INGRESS) { + if ((q && q->flags & TCQ_F_INGRESS) || + (new && new->flags & TCQ_F_INGRESS)) { num_q = 1; ingress = 1; } @@ -692,13 +670,10 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, if (!ingress) dev_queue = netdev_get_tx_queue(dev, i); - if (ingress) { - old = dev_graft_qdisc(dev_queue, q); - } else { - old = dev_graft_qdisc(dev_queue, new); - if (new && i > 0) - atomic_inc(&new->refcnt); - } + old = dev_graft_qdisc(dev_queue, new); + if (new && i > 0) + atomic_inc(&new->refcnt); + notify_and_destroy(skb, n, classid, old, new); } @@ -817,7 +792,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, goto err_out3; } } - if (parent) + if (parent && !(sch->flags & TCQ_F_INGRESS)) list_add_tail(&sch->list, &dev_queue->qdisc->list); return sch; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index fd2a6cadb11..345838a2e36 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -596,7 +596,7 @@ static void transition_one_qdisc(struct net_device *dev, int *need_watchdog_p = _need_watchdog; rcu_assign_pointer(dev_queue->qdisc, new_qdisc); - if (new_qdisc != &noqueue_qdisc) + if (need_watchdog_p && new_qdisc != &noqueue_qdisc) *need_watchdog_p = 1; } @@ -619,6 +619,7 @@ void dev_activate(struct net_device *dev) need_watchdog = 0; netdev_for_each_tx_queue(dev, transition_one_qdisc, &need_watchdog); + transition_one_qdisc(dev, &dev->rx_queue, NULL); if (need_watchdog) { dev->trans_start = jiffies; @@ -677,6 +678,7 @@ void dev_deactivate(struct net_device *dev) bool running; netdev_for_each_tx_queue(dev, dev_deactivate_queue, &noop_qdisc); + dev_deactivate_queue(dev, &dev->rx_queue, &noop_qdisc); dev_watchdog_down(dev); @@ -718,7 +720,7 @@ static void dev_init_scheduler_queue(struct net_device *dev, void dev_init_scheduler(struct net_device *dev) { netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc); - dev_init_scheduler_queue(dev, &dev->rx_queue, NULL); + dev_init_scheduler_queue(dev, &dev->rx_queue, &noop_qdisc); setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev); } @@ -745,6 +747,6 @@ static void shutdown_scheduler_queue(struct net_device *dev, void dev_shutdown(struct net_device *dev) { netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc); - shutdown_scheduler_queue(dev, &dev->rx_queue, NULL); + shutdown_scheduler_queue(dev, &dev->rx_queue, &noop_qdisc); WARN_ON(timer_pending(&dev->watchdog_timer)); } diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 835d2741308..5a32cb7c4bb 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -310,8 +310,7 @@ svc_pool_map_set_cpumask(struct task_struct *task, unsigned int pidx) switch (m->mode) { case SVC_POOL_PERCPU: { - cpumask_of_cpu_ptr(cpumask, node); - set_cpus_allowed_ptr(task, cpumask); + set_cpus_allowed_ptr(task, &cpumask_of_cpu(node)); break; } case SVC_POOL_PERNODE: diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index b7fefffd2d0..59eb2cf42e5 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -29,16 +29,16 @@ static struct genl_family nl80211_fam = { }; /* internal helper: get drv and dev */ -static int get_drv_dev_by_info_ifindex(struct genl_info *info, +static int get_drv_dev_by_info_ifindex(struct nlattr **attrs, struct cfg80211_registered_device **drv, struct net_device **dev) { int ifindex; - if (!info->attrs[NL80211_ATTR_IFINDEX]) + if (!attrs[NL80211_ATTR_IFINDEX]) return -EINVAL; - ifindex = nla_get_u32(info->attrs[NL80211_ATTR_IFINDEX]); + ifindex = nla_get_u32(attrs[NL80211_ATTR_IFINDEX]); *dev = dev_get_by_index(&init_net, ifindex); if (!*dev) return -ENODEV; @@ -291,21 +291,31 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback * mutex_lock(&cfg80211_drv_mutex); list_for_each_entry(dev, &cfg80211_drv_list, list) { - if (++wp_idx < wp_start) + if (wp_idx < wp_start) { + wp_idx++; continue; + } if_idx = 0; mutex_lock(&dev->devlist_mtx); list_for_each_entry(wdev, &dev->netdev_list, list) { - if (++if_idx < if_start) + if (if_idx < if_start) { + if_idx++; continue; + } if (nl80211_send_iface(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, NLM_F_MULTI, - wdev->netdev) < 0) - break; + wdev->netdev) < 0) { + mutex_unlock(&dev->devlist_mtx); + goto out; + } + if_idx++; } mutex_unlock(&dev->devlist_mtx); + + wp_idx++; } + out: mutex_unlock(&cfg80211_drv_mutex); cb->args[0] = wp_idx; @@ -321,7 +331,7 @@ static int nl80211_get_interface(struct sk_buff *skb, struct genl_info *info) struct net_device *netdev; int err; - err = get_drv_dev_by_info_ifindex(info, &dev, &netdev); + err = get_drv_dev_by_info_ifindex(info->attrs, &dev, &netdev); if (err) return err; @@ -392,7 +402,7 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) } else return -EINVAL; - err = get_drv_dev_by_info_ifindex(info, &drv, &dev); + err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); if (err) return err; ifindex = dev->ifindex; @@ -477,7 +487,7 @@ static int nl80211_del_interface(struct sk_buff *skb, struct genl_info *info) int ifindex, err; struct net_device *dev; - err = get_drv_dev_by_info_ifindex(info, &drv, &dev); + err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); if (err) return err; ifindex = dev->ifindex; @@ -545,7 +555,7 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_MAC]) mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); - err = get_drv_dev_by_info_ifindex(info, &drv, &dev); + err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); if (err) return err; @@ -618,7 +628,7 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[NL80211_ATTR_KEY_DEFAULT]) return -EINVAL; - err = get_drv_dev_by_info_ifindex(info, &drv, &dev); + err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); if (err) return err; @@ -699,7 +709,7 @@ static int nl80211_new_key(struct sk_buff *skb, struct genl_info *info) return -EINVAL; } - err = get_drv_dev_by_info_ifindex(info, &drv, &dev); + err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); if (err) return err; @@ -735,7 +745,7 @@ static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_MAC]) mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); - err = get_drv_dev_by_info_ifindex(info, &drv, &dev); + err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); if (err) return err; @@ -764,7 +774,7 @@ static int nl80211_addset_beacon(struct sk_buff *skb, struct genl_info *info) struct beacon_parameters params; int haveinfo = 0; - err = get_drv_dev_by_info_ifindex(info, &drv, &dev); + err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); if (err) return err; @@ -843,7 +853,7 @@ static int nl80211_del_beacon(struct sk_buff *skb, struct genl_info *info) int err; struct net_device *dev; - err = get_drv_dev_by_info_ifindex(info, &drv, &dev); + err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); if (err) return err; @@ -937,67 +947,78 @@ static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq, } static int nl80211_dump_station(struct sk_buff *skb, - struct netlink_callback *cb) + struct netlink_callback *cb) { - int wp_idx = 0; - int if_idx = 0; - int sta_idx = cb->args[2]; - int wp_start = cb->args[0]; - int if_start = cb->args[1]; struct station_info sinfo; struct cfg80211_registered_device *dev; - struct wireless_dev *wdev; + struct net_device *netdev; u8 mac_addr[ETH_ALEN]; + int ifidx = cb->args[0]; + int sta_idx = cb->args[1]; int err; - int exit = 0; - /* TODO: filter by device */ - mutex_lock(&cfg80211_drv_mutex); - list_for_each_entry(dev, &cfg80211_drv_list, list) { - if (exit) + if (!ifidx) { + err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, + nl80211_fam.attrbuf, nl80211_fam.maxattr, + nl80211_policy); + if (err) + return err; + + if (!nl80211_fam.attrbuf[NL80211_ATTR_IFINDEX]) + return -EINVAL; + + ifidx = nla_get_u32(nl80211_fam.attrbuf[NL80211_ATTR_IFINDEX]); + if (!ifidx) + return -EINVAL; + } + + netdev = dev_get_by_index(&init_net, ifidx); + if (!netdev) + return -ENODEV; + + dev = cfg80211_get_dev_from_ifindex(ifidx); + if (IS_ERR(dev)) { + err = PTR_ERR(dev); + goto out_put_netdev; + } + + if (!dev->ops->dump_station) { + err = -ENOSYS; + goto out_err; + } + + rtnl_lock(); + + while (1) { + err = dev->ops->dump_station(&dev->wiphy, netdev, sta_idx, + mac_addr, &sinfo); + if (err == -ENOENT) break; - if (++wp_idx < wp_start) - continue; - if_idx = 0; + if (err) + goto out_err_rtnl; - mutex_lock(&dev->devlist_mtx); - list_for_each_entry(wdev, &dev->netdev_list, list) { - if (exit) - break; - if (++if_idx < if_start) - continue; - if (!dev->ops->dump_station) - continue; + if (nl80211_send_station(skb, + NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, + netdev, mac_addr, + &sinfo) < 0) + goto out; - for (;; ++sta_idx) { - rtnl_lock(); - err = dev->ops->dump_station(&dev->wiphy, - wdev->netdev, sta_idx, mac_addr, - &sinfo); - rtnl_unlock(); - if (err) { - sta_idx = 0; - break; - } - if (nl80211_send_station(skb, - NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, NLM_F_MULTI, - wdev->netdev, mac_addr, - &sinfo) < 0) { - exit = 1; - break; - } - } - } - mutex_unlock(&dev->devlist_mtx); + sta_idx++; } - mutex_unlock(&cfg80211_drv_mutex); - cb->args[0] = wp_idx; - cb->args[1] = if_idx; - cb->args[2] = sta_idx; - return skb->len; + out: + cb->args[1] = sta_idx; + err = skb->len; + out_err_rtnl: + rtnl_unlock(); + out_err: + cfg80211_put_dev(dev); + out_put_netdev: + dev_put(netdev); + + return err; } static int nl80211_get_station(struct sk_buff *skb, struct genl_info *info) @@ -1016,7 +1037,7 @@ static int nl80211_get_station(struct sk_buff *skb, struct genl_info *info) mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); - err = get_drv_dev_by_info_ifindex(info, &drv, &dev); + err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); if (err) return err; @@ -1112,7 +1133,7 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) params.plink_action = nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_ACTION]); - err = get_drv_dev_by_info_ifindex(info, &drv, &dev); + err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); if (err) return err; @@ -1172,7 +1193,7 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) ¶ms.station_flags)) return -EINVAL; - err = get_drv_dev_by_info_ifindex(info, &drv, &dev); + err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); if (err) return err; @@ -1207,7 +1228,7 @@ static int nl80211_del_station(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_MAC]) mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); - err = get_drv_dev_by_info_ifindex(info, &drv, &dev); + err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); if (err) return err; @@ -1277,68 +1298,78 @@ static int nl80211_send_mpath(struct sk_buff *msg, u32 pid, u32 seq, } static int nl80211_dump_mpath(struct sk_buff *skb, - struct netlink_callback *cb) + struct netlink_callback *cb) { - int wp_idx = 0; - int if_idx = 0; - int sta_idx = cb->args[2]; - int wp_start = cb->args[0]; - int if_start = cb->args[1]; struct mpath_info pinfo; struct cfg80211_registered_device *dev; - struct wireless_dev *wdev; + struct net_device *netdev; u8 dst[ETH_ALEN]; u8 next_hop[ETH_ALEN]; + int ifidx = cb->args[0]; + int path_idx = cb->args[1]; int err; - int exit = 0; - /* TODO: filter by device */ - mutex_lock(&cfg80211_drv_mutex); - list_for_each_entry(dev, &cfg80211_drv_list, list) { - if (exit) + if (!ifidx) { + err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, + nl80211_fam.attrbuf, nl80211_fam.maxattr, + nl80211_policy); + if (err) + return err; + + if (!nl80211_fam.attrbuf[NL80211_ATTR_IFINDEX]) + return -EINVAL; + + ifidx = nla_get_u32(nl80211_fam.attrbuf[NL80211_ATTR_IFINDEX]); + if (!ifidx) + return -EINVAL; + } + + netdev = dev_get_by_index(&init_net, ifidx); + if (!netdev) + return -ENODEV; + + dev = cfg80211_get_dev_from_ifindex(ifidx); + if (IS_ERR(dev)) { + err = PTR_ERR(dev); + goto out_put_netdev; + } + + if (!dev->ops->dump_mpath) { + err = -ENOSYS; + goto out_err; + } + + rtnl_lock(); + + while (1) { + err = dev->ops->dump_mpath(&dev->wiphy, netdev, path_idx, + dst, next_hop, &pinfo); + if (err == -ENOENT) break; - if (++wp_idx < wp_start) - continue; - if_idx = 0; + if (err) + goto out_err_rtnl; - mutex_lock(&dev->devlist_mtx); - list_for_each_entry(wdev, &dev->netdev_list, list) { - if (exit) - break; - if (++if_idx < if_start) - continue; - if (!dev->ops->dump_mpath) - continue; + if (nl80211_send_mpath(skb, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, + netdev, dst, next_hop, + &pinfo) < 0) + goto out; - for (;; ++sta_idx) { - rtnl_lock(); - err = dev->ops->dump_mpath(&dev->wiphy, - wdev->netdev, sta_idx, dst, - next_hop, &pinfo); - rtnl_unlock(); - if (err) { - sta_idx = 0; - break; - } - if (nl80211_send_mpath(skb, - NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, NLM_F_MULTI, - wdev->netdev, dst, next_hop, - &pinfo) < 0) { - exit = 1; - break; - } - } - } - mutex_unlock(&dev->devlist_mtx); + path_idx++; } - mutex_unlock(&cfg80211_drv_mutex); - cb->args[0] = wp_idx; - cb->args[1] = if_idx; - cb->args[2] = sta_idx; - return skb->len; + out: + cb->args[1] = path_idx; + err = skb->len; + out_err_rtnl: + rtnl_unlock(); + out_err: + cfg80211_put_dev(dev); + out_put_netdev: + dev_put(netdev); + + return err; } static int nl80211_get_mpath(struct sk_buff *skb, struct genl_info *info) @@ -1358,7 +1389,7 @@ static int nl80211_get_mpath(struct sk_buff *skb, struct genl_info *info) dst = nla_data(info->attrs[NL80211_ATTR_MAC]); - err = get_drv_dev_by_info_ifindex(info, &drv, &dev); + err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); if (err) return err; @@ -1411,7 +1442,7 @@ static int nl80211_set_mpath(struct sk_buff *skb, struct genl_info *info) dst = nla_data(info->attrs[NL80211_ATTR_MAC]); next_hop = nla_data(info->attrs[NL80211_ATTR_MPATH_NEXT_HOP]); - err = get_drv_dev_by_info_ifindex(info, &drv, &dev); + err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); if (err) return err; @@ -1446,7 +1477,7 @@ static int nl80211_new_mpath(struct sk_buff *skb, struct genl_info *info) dst = nla_data(info->attrs[NL80211_ATTR_MAC]); next_hop = nla_data(info->attrs[NL80211_ATTR_MPATH_NEXT_HOP]); - err = get_drv_dev_by_info_ifindex(info, &drv, &dev); + err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); if (err) return err; @@ -1475,7 +1506,7 @@ static int nl80211_del_mpath(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_MAC]) dst = nla_data(info->attrs[NL80211_ATTR_MAC]); - err = get_drv_dev_by_info_ifindex(info, &drv, &dev); + err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); if (err) return err; diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 8f038e6d5f9..418cd7dbbc9 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -1468,7 +1468,7 @@ static void section_rel(const char *modname, struct elf_info *elf, * marked __initdata will be discarded when the module has been intialized. * Likewise for modules used built-in the sections marked __exit * are discarded because __exit marked function are supposed to be called - * only when a moduel is unloaded which never happes for built-in modules. + * only when a module is unloaded which never happens for built-in modules. * The check_sec_ref() function traverses all relocation records * to find all references to a section that reference a section that will * be discarded and warns about it. |