From 08eb8f124f990aa476589d1f7810f7ec7f259c08 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 10 Oct 2005 21:02:26 -0700 Subject: [SPARC32]: Revert IOMAP change eb98129eec7fa605f0407dfd92d40ee8ddf5cd9a Breakage noted by Al Viro. It breaks non-PCI builds, it's probably better to have a more direct implementation on sparc32, and which driver actually needs this is still questionable. We can resolve this in 2.6.15 Signed-off-by: David S. Miller --- arch/sparc/Kconfig | 4 ---- arch/sparc/defconfig | 1 - 2 files changed, 5 deletions(-) (limited to 'arch') diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index f7c51b86904..6537445dac0 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -21,10 +21,6 @@ config GENERIC_ISA_DMA bool default y -config GENERIC_IOMAP - bool - default y - source "init/Kconfig" menu "General machine setup" diff --git a/arch/sparc/defconfig b/arch/sparc/defconfig index 8a3aef1e22f..a6985626300 100644 --- a/arch/sparc/defconfig +++ b/arch/sparc/defconfig @@ -5,7 +5,6 @@ CONFIG_MMU=y CONFIG_UID16=y CONFIG_HIGHMEM=y CONFIG_GENERIC_ISA_DMA=y -CONFIG_GENERIC_IOMAP=y # # Code maturity level options -- cgit v1.2.3-70-g09d2 From e4314bf496bb7bb9acd754aeb319c30869bc8d76 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 11 Oct 2005 08:29:00 -0700 Subject: [PATCH] ppc64: Fix PCI hotplug pSeries_irq_bus_setup is marked __devinit but references s7a_workaround which is marked __initdata. Depending on who got the memory for s7a_workaround (and if the value was now positive), it was possible for PCI hotplugged devices to have 3 subtracted from their interrupt number. This would happen randomly and caused me much confusion :) Signed-off-by: Anton Blanchard Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ppc64/kernel/pSeries_pci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/ppc64/kernel/pSeries_pci.c b/arch/ppc64/kernel/pSeries_pci.c index 1f5f141fb7a..928f8febdb3 100644 --- a/arch/ppc64/kernel/pSeries_pci.c +++ b/arch/ppc64/kernel/pSeries_pci.c @@ -32,7 +32,7 @@ #include "pci.h" -static int __initdata s7a_workaround = -1; +static int __devinitdata s7a_workaround = -1; #if 0 void pcibios_name_device(struct pci_dev *dev) @@ -60,7 +60,7 @@ void pcibios_name_device(struct pci_dev *dev) DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pcibios_name_device); #endif -static void __init check_s7a(void) +static void __devinit check_s7a(void) { struct device_node *root; char *model; -- cgit v1.2.3-70-g09d2 From a0c111c631e7ab4abd68920debd44259160812ef Mon Sep 17 00:00:00 2001 From: Paolo Galtieri Date: Tue, 11 Oct 2005 08:29:07 -0700 Subject: [PATCH] ppc highmem fix I've noticed that the calculations for seg_size and nr_segs in __dma_sync_page_highmem() (arch/ppc/kernel/dma-mapping.c) are wrong. The incorrect calculations can result in either an oops or a panic when running fsck depending on the size of the partition. The problem with the seg_size calculation is that it can result in a negative number if size is offset > size. The problem with the nr_segs caculation is returns the wrong number of segments, e.g. it returns 1 when size is 200 and offset is 4095, when it should return 2 or more. Acked-by: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ppc/kernel/dma-mapping.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/ppc/kernel/dma-mapping.c b/arch/ppc/kernel/dma-mapping.c index b566d982806..8edee806dae 100644 --- a/arch/ppc/kernel/dma-mapping.c +++ b/arch/ppc/kernel/dma-mapping.c @@ -401,10 +401,10 @@ EXPORT_SYMBOL(__dma_sync); static inline void __dma_sync_page_highmem(struct page *page, unsigned long offset, size_t size, int direction) { - size_t seg_size = min((size_t)PAGE_SIZE, size) - offset; + size_t seg_size = min((size_t)(PAGE_SIZE - offset), size); size_t cur_size = seg_size; unsigned long flags, start, seg_offset = offset; - int nr_segs = PAGE_ALIGN(size + (PAGE_SIZE - offset))/PAGE_SIZE; + int nr_segs = 1 + ((size - seg_size) + PAGE_SIZE - 1)/PAGE_SIZE; int seg_nr = 0; local_irq_save(flags); -- cgit v1.2.3-70-g09d2 From 9de11aab1c8fd87da7e1fb435ce0ff26bacd7909 Mon Sep 17 00:00:00 2001 From: Hirokazu Takata Date: Tue, 11 Oct 2005 08:29:09 -0700 Subject: [PATCH] m32r: trap handler code for illegal traps This patch prevents illegal traps from causing m32r kernel's infinite loop execution. Signed-off-by: Naoto Sugai Signed-off-by: Hirokazu Takata Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/m32r/kernel/entry.S | 9 +++++++++ arch/m32r/kernel/traps.c | 33 ++++++++++++++++----------------- 2 files changed, 25 insertions(+), 17 deletions(-) (limited to 'arch') diff --git a/arch/m32r/kernel/entry.S b/arch/m32r/kernel/entry.S index dddbf6b5ed2..85920fb8d08 100644 --- a/arch/m32r/kernel/entry.S +++ b/arch/m32r/kernel/entry.S @@ -681,6 +681,15 @@ ENTRY(debug_trap) bl do_debug_trap bra error_code +ENTRY(ill_trap) + /* void ill_trap(void) */ + SWITCH_TO_KERNEL_STACK + SAVE_ALL + ldi r1, #0 ; error_code ; FIXME + mv r0, sp ; pt_regs + bl do_ill_trap + bra error_code + /* Cache flushing handler */ ENTRY(cache_flushing_handler) diff --git a/arch/m32r/kernel/traps.c b/arch/m32r/kernel/traps.c index 01922271d17..5fe8ed6d62d 100644 --- a/arch/m32r/kernel/traps.c +++ b/arch/m32r/kernel/traps.c @@ -5,8 +5,6 @@ * Hitoshi Yamamoto */ -/* $Id$ */ - /* * 'traps.c' handles hardware traps and faults after we have saved some * state in 'entry.S'. @@ -35,6 +33,7 @@ asmlinkage void ei_handler(void); asmlinkage void rie_handler(void); asmlinkage void debug_trap(void); asmlinkage void cache_flushing_handler(void); +asmlinkage void ill_trap(void); #ifdef CONFIG_SMP extern void smp_reschedule_interrupt(void); @@ -77,22 +76,22 @@ void set_eit_vector_entries(void) eit_vector[5] = BRA_INSN(default_eit_handler, 5); eit_vector[8] = BRA_INSN(rie_handler, 8); eit_vector[12] = BRA_INSN(alignment_check, 12); - eit_vector[16] = 0xff000000UL; + eit_vector[16] = BRA_INSN(ill_trap, 16); eit_vector[17] = BRA_INSN(debug_trap, 17); eit_vector[18] = BRA_INSN(system_call, 18); - eit_vector[19] = 0xff000000UL; - eit_vector[20] = 0xff000000UL; - eit_vector[21] = 0xff000000UL; - eit_vector[22] = 0xff000000UL; - eit_vector[23] = 0xff000000UL; - eit_vector[24] = 0xff000000UL; - eit_vector[25] = 0xff000000UL; - eit_vector[26] = 0xff000000UL; - eit_vector[27] = 0xff000000UL; + eit_vector[19] = BRA_INSN(ill_trap, 19); + eit_vector[20] = BRA_INSN(ill_trap, 20); + eit_vector[21] = BRA_INSN(ill_trap, 21); + eit_vector[22] = BRA_INSN(ill_trap, 22); + eit_vector[23] = BRA_INSN(ill_trap, 23); + eit_vector[24] = BRA_INSN(ill_trap, 24); + eit_vector[25] = BRA_INSN(ill_trap, 25); + eit_vector[26] = BRA_INSN(ill_trap, 26); + eit_vector[27] = BRA_INSN(ill_trap, 27); eit_vector[28] = BRA_INSN(cache_flushing_handler, 28); - eit_vector[29] = 0xff000000UL; - eit_vector[30] = 0xff000000UL; - eit_vector[31] = 0xff000000UL; + eit_vector[29] = BRA_INSN(ill_trap, 29); + eit_vector[30] = BRA_INSN(ill_trap, 30); + eit_vector[31] = BRA_INSN(ill_trap, 31); eit_vector[32] = BRA_INSN(ei_handler, 32); eit_vector[64] = BRA_INSN(pie_handler, 64); #ifdef CONFIG_MMU @@ -286,7 +285,8 @@ asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ DO_ERROR( 1, SIGTRAP, "debug trap", debug_trap) DO_ERROR_INFO(0x20, SIGILL, "reserved instruction ", rie_handler, ILL_ILLOPC, regs->bpc) -DO_ERROR_INFO(0x100, SIGILL, "privilege instruction", pie_handler, ILL_PRVOPC, regs->bpc) +DO_ERROR_INFO(0x100, SIGILL, "privileged instruction", pie_handler, ILL_PRVOPC, regs->bpc) +DO_ERROR_INFO(-1, SIGILL, "illegal trap", ill_trap, ILL_ILLTRP, regs->bpc) extern int handle_unaligned_access(unsigned long, struct pt_regs *); @@ -329,4 +329,3 @@ asmlinkage void do_alignment_check(struct pt_regs *regs, long error_code) set_fs(oldfs); } } - -- cgit v1.2.3-70-g09d2 From 9149ccfa3571eaa4a4b444777d67fc4ed3ebcf27 Mon Sep 17 00:00:00 2001 From: Peter Bergner Date: Tue, 11 Oct 2005 09:28:24 -0700 Subject: [PATCH] ppc64: Add R_PPC64_TOC16 module reloc Newer gcc's are generating this relocation, so the module loader needs to handle it. Signed-off-by: Peter Bergner Signed-off-by: Anton Blanchard Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ppc64/kernel/module.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'arch') diff --git a/arch/ppc64/kernel/module.c b/arch/ppc64/kernel/module.c index c683bf88e69..928b8581fcb 100644 --- a/arch/ppc64/kernel/module.c +++ b/arch/ppc64/kernel/module.c @@ -341,6 +341,19 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, *(unsigned long *)location = my_r2(sechdrs, me); break; + case R_PPC64_TOC16: + /* Subtact TOC pointer */ + value -= my_r2(sechdrs, me); + if (value + 0x8000 > 0xffff) { + printk("%s: bad TOC16 relocation (%lu)\n", + me->name, value); + return -ENOEXEC; + } + *((uint16_t *) location) + = (*((uint16_t *) location) & ~0xffff) + | (value & 0xffff); + break; + case R_PPC64_TOC16_DS: /* Subtact TOC pointer */ value -= my_r2(sechdrs, me); -- cgit v1.2.3-70-g09d2 From b1b510aa284af1908d5d369d52f7dae16aaabd71 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 11 Oct 2005 15:45:16 -0700 Subject: [SPARC64]: Fix net booting on Ultra5 We were not doing alignment properly when remapping the kernel image. What we want is a 4MB aligned physical address to map at KERNBASE. Mistakedly we were 4MB aligning the virtual address where the kernel initially sits, that's wrong. Instead, we should PAGE align the virtual address, then 4MB align the physical address result the prom gives to us. Signed-off-by: David S. Miller --- arch/sparc64/kernel/head.S | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/sparc64/kernel/head.S b/arch/sparc64/kernel/head.S index f1dcdf8f743..4c942f71184 100644 --- a/arch/sparc64/kernel/head.S +++ b/arch/sparc64/kernel/head.S @@ -191,8 +191,9 @@ prom_boot_mapping_phys_low: stx %l3, [%sp + 2047 + 128 + 0x10] ! num_rets, 5 stx %l2, [%sp + 2047 + 128 + 0x18] ! arg1: "translate" stx %l5, [%sp + 2047 + 128 + 0x20] ! arg2: prom_mmu_ihandle_cache - srlx %l0, 22, %l3 - sllx %l3, 22, %l3 + /* PAGE align */ + srlx %l0, 13, %l3 + sllx %l3, 13, %l3 stx %l3, [%sp + 2047 + 128 + 0x28] ! arg3: vaddr, our PC stx %g0, [%sp + 2047 + 128 + 0x30] ! res1 stx %g0, [%sp + 2047 + 128 + 0x38] ! res2 @@ -211,6 +212,9 @@ prom_boot_mapping_phys_low: ldx [%sp + 2047 + 128 + 0x48], %l2 ! physaddr high stx %l2, [%l4 + 0x0] ldx [%sp + 2047 + 128 + 0x50], %l3 ! physaddr low + /* 4MB align */ + srlx %l3, 22, %l3 + sllx %l3, 22, %l3 stx %l3, [%l4 + 0x8] /* Leave service as-is, "call-method" */ -- cgit v1.2.3-70-g09d2 From 91acb21f084aa244f26839406ae7ed8aa3668058 Mon Sep 17 00:00:00 2001 From: Jeff Dike Date: Mon, 10 Oct 2005 23:10:32 -0400 Subject: [PATCH] uml: revert block driver use of host AIO The patch to use host AIO support that I submitted early after 2.6.13 exposed some problems in the block driver. I have fixes for these, but am not comfortable putting them into 2.6.14 at this late date. So, this patch reverts the use of host AIO. I will resubmit the original patch, plus fixes to the driver after 2.6.14 in order to get a reasonable amount of testing before they're exposed to the general public. Signed-off-by: Jeff Dike Signed-off-by: Linus Torvalds --- arch/um/drivers/Makefile | 2 +- arch/um/drivers/ubd_kern.c | 556 +++++++++++++++++++++++++-------------------- arch/um/drivers/ubd_user.c | 75 ++++++ arch/um/include/aio.h | 18 +- arch/um/os-Linux/aio.c | 205 ++++++++--------- 5 files changed, 483 insertions(+), 373 deletions(-) create mode 100644 arch/um/drivers/ubd_user.c (limited to 'arch') diff --git a/arch/um/drivers/Makefile b/arch/um/drivers/Makefile index 783e18cae09..de17d4c6e02 100644 --- a/arch/um/drivers/Makefile +++ b/arch/um/drivers/Makefile @@ -13,7 +13,7 @@ mcast-objs := mcast_kern.o mcast_user.o net-objs := net_kern.o net_user.o mconsole-objs := mconsole_kern.o mconsole_user.o hostaudio-objs := hostaudio_kern.o -ubd-objs := ubd_kern.o +ubd-objs := ubd_kern.o ubd_user.o port-objs := port_kern.o port_user.o harddog-objs := harddog_kern.o harddog_user.o diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index e77a38da435..f73134333f6 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -35,7 +35,6 @@ #include "linux/blkpg.h" #include "linux/genhd.h" #include "linux/spinlock.h" -#include "asm/atomic.h" #include "asm/segment.h" #include "asm/uaccess.h" #include "asm/irq.h" @@ -54,21 +53,20 @@ #include "mem.h" #include "mem_kern.h" #include "cow.h" -#include "aio.h" enum ubd_req { UBD_READ, UBD_WRITE }; struct io_thread_req { - enum aio_type op; + enum ubd_req op; int fds[2]; unsigned long offsets[2]; unsigned long long offset; unsigned long length; char *buffer; int sectorsize; - int bitmap_offset; - long bitmap_start; - long bitmap_end; + unsigned long sector_mask; + unsigned long long cow_offset; + unsigned long bitmap_words[2]; int error; }; @@ -82,31 +80,28 @@ extern int create_cow_file(char *cow_file, char *backing_file, unsigned long *bitmap_len_out, int *data_offset_out); extern int read_cow_bitmap(int fd, void *buf, int offset, int len); -extern void do_io(struct io_thread_req *req, struct request *r, - unsigned long *bitmap); +extern void do_io(struct io_thread_req *req); -static inline int ubd_test_bit(__u64 bit, void *data) +static inline int ubd_test_bit(__u64 bit, unsigned char *data) { - unsigned char *buffer = data; __u64 n; int bits, off; - bits = sizeof(buffer[0]) * 8; + bits = sizeof(data[0]) * 8; n = bit / bits; off = bit % bits; - return((buffer[n] & (1 << off)) != 0); + return((data[n] & (1 << off)) != 0); } -static inline void ubd_set_bit(__u64 bit, void *data) +static inline void ubd_set_bit(__u64 bit, unsigned char *data) { - unsigned char *buffer = data; __u64 n; int bits, off; - bits = sizeof(buffer[0]) * 8; + bits = sizeof(data[0]) * 8; n = bit / bits; off = bit % bits; - buffer[n] |= (1 << off); + data[n] |= (1 << off); } /*End stuff from ubd_user.h*/ @@ -115,6 +110,8 @@ static inline void ubd_set_bit(__u64 bit, void *data) static DEFINE_SPINLOCK(ubd_io_lock); static DEFINE_SPINLOCK(ubd_lock); +static void (*do_ubd)(void); + static int ubd_open(struct inode * inode, struct file * filp); static int ubd_release(struct inode * inode, struct file * file); static int ubd_ioctl(struct inode * inode, struct file * file, @@ -161,8 +158,6 @@ struct cow { int data_offset; }; -#define MAX_SG 64 - struct ubd { char *file; int count; @@ -173,7 +168,6 @@ struct ubd { int no_cow; struct cow cow; struct platform_device pdev; - struct scatterlist sg[MAX_SG]; }; #define DEFAULT_COW { \ @@ -466,114 +460,81 @@ __uml_help(fakehd, ); static void do_ubd_request(request_queue_t * q); -static int in_ubd; + +/* Only changed by ubd_init, which is an initcall. */ +int thread_fd = -1; /* Changed by ubd_handler, which is serialized because interrupts only * happen on CPU 0. */ int intr_count = 0; -static void ubd_end_request(struct request *req, int bytes, int uptodate) +/* call ubd_finish if you need to serialize */ +static void __ubd_finish(struct request *req, int error) { - if (!end_that_request_first(req, uptodate, bytes >> 9)) { - add_disk_randomness(req->rq_disk); - end_that_request_last(req); + int nsect; + + if(error){ + end_request(req, 0); + return; } + nsect = req->current_nr_sectors; + req->sector += nsect; + req->buffer += nsect << 9; + req->errors = 0; + req->nr_sectors -= nsect; + req->current_nr_sectors = 0; + end_request(req, 1); } -/* call ubd_finish if you need to serialize */ -static void __ubd_finish(struct request *req, int bytes) +static inline void ubd_finish(struct request *req, int error) { - if(bytes < 0){ - ubd_end_request(req, 0, 0); - return; - } - - ubd_end_request(req, bytes, 1); + spin_lock(&ubd_io_lock); + __ubd_finish(req, error); + spin_unlock(&ubd_io_lock); } -static inline void ubd_finish(struct request *req, int bytes) +/* Called without ubd_io_lock held */ +static void ubd_handler(void) { - spin_lock(&ubd_io_lock); - __ubd_finish(req, bytes); - spin_unlock(&ubd_io_lock); + struct io_thread_req req; + struct request *rq = elv_next_request(ubd_queue); + int n; + + do_ubd = NULL; + intr_count++; + n = os_read_file(thread_fd, &req, sizeof(req)); + if(n != sizeof(req)){ + printk(KERN_ERR "Pid %d - spurious interrupt in ubd_handler, " + "err = %d\n", os_getpid(), -n); + spin_lock(&ubd_io_lock); + end_request(rq, 0); + spin_unlock(&ubd_io_lock); + return; + } + + ubd_finish(rq, req.error); + reactivate_fd(thread_fd, UBD_IRQ); + do_ubd_request(ubd_queue); } -struct bitmap_io { - atomic_t count; - struct aio_context aio; -}; - -struct ubd_aio { - struct aio_context aio; - struct request *req; - int len; - struct bitmap_io *bitmap; - void *bitmap_buf; -}; - -static int ubd_reply_fd = -1; - static irqreturn_t ubd_intr(int irq, void *dev, struct pt_regs *unused) { - struct aio_thread_reply reply; - struct ubd_aio *aio; - struct request *req; - int err, n, fd = (int) (long) dev; - - while(1){ - err = os_read_file(fd, &reply, sizeof(reply)); - if(err == -EAGAIN) - break; - if(err < 0){ - printk("ubd_aio_handler - read returned err %d\n", - -err); - break; - } - - aio = container_of(reply.data, struct ubd_aio, aio); - n = reply.err; - - if(n == 0){ - req = aio->req; - req->nr_sectors -= aio->len >> 9; - - if((aio->bitmap != NULL) && - (atomic_dec_and_test(&aio->bitmap->count))){ - aio->aio = aio->bitmap->aio; - aio->len = 0; - kfree(aio->bitmap); - aio->bitmap = NULL; - submit_aio(&aio->aio); - } - else { - if((req->nr_sectors == 0) && - (aio->bitmap == NULL)){ - int len = req->hard_nr_sectors << 9; - ubd_finish(req, len); - } - - if(aio->bitmap_buf != NULL) - kfree(aio->bitmap_buf); - kfree(aio); - } - } - else if(n < 0){ - ubd_finish(aio->req, n); - if(aio->bitmap != NULL) - kfree(aio->bitmap); - if(aio->bitmap_buf != NULL) - kfree(aio->bitmap_buf); - kfree(aio); - } - } - reactivate_fd(fd, UBD_IRQ); + ubd_handler(); + return(IRQ_HANDLED); +} - do_ubd_request(ubd_queue); +/* Only changed by ubd_init, which is an initcall. */ +static int io_pid = -1; - return(IRQ_HANDLED); +void kill_io_thread(void) +{ + if(io_pid != -1) + os_kill_process(io_pid, 1); } +__uml_exitcall(kill_io_thread); + static int ubd_file_size(struct ubd *dev, __u64 *size_out) { char *file; @@ -608,7 +569,7 @@ static int ubd_open_dev(struct ubd *dev) &dev->cow.data_offset, create_ptr); if((dev->fd == -ENOENT) && create_cow){ - dev->fd = create_cow_file(dev->file, dev->cow.file, + dev->fd = create_cow_file(dev->file, dev->cow.file, dev->openflags, 1 << 9, PAGE_SIZE, &dev->cow.bitmap_offset, &dev->cow.bitmap_len, @@ -870,10 +831,6 @@ int ubd_init(void) { int i; - ubd_reply_fd = init_aio_irq(UBD_IRQ, "ubd", ubd_intr); - if(ubd_reply_fd < 0) - printk("Setting up ubd AIO failed, err = %d\n", ubd_reply_fd); - devfs_mk_dir("ubd"); if (register_blkdev(MAJOR_NR, "ubd")) return -1; @@ -884,7 +841,6 @@ int ubd_init(void) return -1; } - blk_queue_max_hw_segments(ubd_queue, MAX_SG); if (fake_major != MAJOR_NR) { char name[sizeof("ubd_nnn\0")]; @@ -896,12 +852,40 @@ int ubd_init(void) driver_register(&ubd_driver); for (i = 0; i < MAX_DEV; i++) ubd_add(i); - return 0; } late_initcall(ubd_init); +int ubd_driver_init(void){ + unsigned long stack; + int err; + + /* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/ + if(global_openflags.s){ + printk(KERN_INFO "ubd: Synchronous mode\n"); + /* Letting ubd=sync be like using ubd#s= instead of ubd#= is + * enough. So use anyway the io thread. */ + } + stack = alloc_stack(0, 0); + io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *), + &thread_fd); + if(io_pid < 0){ + printk(KERN_ERR + "ubd : Failed to start I/O thread (errno = %d) - " + "falling back to synchronous I/O\n", -io_pid); + io_pid = -1; + return(0); + } + err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr, + SA_INTERRUPT, "ubd", ubd_dev); + if(err != 0) + printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err); + return(err); +} + +device_initcall(ubd_driver_init); + static int ubd_open(struct inode *inode, struct file *filp) { struct gendisk *disk = inode->i_bdev->bd_disk; @@ -939,55 +923,105 @@ static int ubd_release(struct inode * inode, struct file * file) return(0); } -static void cowify_bitmap(struct io_thread_req *req, unsigned long *bitmap) +static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask, + __u64 *cow_offset, unsigned long *bitmap, + __u64 bitmap_offset, unsigned long *bitmap_words, + __u64 bitmap_len) { - __u64 sector = req->offset / req->sectorsize; - int i; + __u64 sector = io_offset >> 9; + int i, update_bitmap = 0; + + for(i = 0; i < length >> 9; i++){ + if(cow_mask != NULL) + ubd_set_bit(i, (unsigned char *) cow_mask); + if(ubd_test_bit(sector + i, (unsigned char *) bitmap)) + continue; - for(i = 0; i < req->length / req->sectorsize; i++){ - if(ubd_test_bit(sector + i, bitmap)) - continue; + update_bitmap = 1; + ubd_set_bit(sector + i, (unsigned char *) bitmap); + } + + if(!update_bitmap) + return; - if(req->bitmap_start == -1) - req->bitmap_start = sector + i; - req->bitmap_end = sector + i + 1; + *cow_offset = sector / (sizeof(unsigned long) * 8); - ubd_set_bit(sector + i, bitmap); - } + /* This takes care of the case where we're exactly at the end of the + * device, and *cow_offset + 1 is off the end. So, just back it up + * by one word. Thanks to Lynn Kerby for the fix and James McMechan + * for the original diagnosis. + */ + if(*cow_offset == ((bitmap_len + sizeof(unsigned long) - 1) / + sizeof(unsigned long) - 1)) + (*cow_offset)--; + + bitmap_words[0] = bitmap[*cow_offset]; + bitmap_words[1] = bitmap[*cow_offset + 1]; + + *cow_offset *= sizeof(unsigned long); + *cow_offset += bitmap_offset; +} + +static void cowify_req(struct io_thread_req *req, unsigned long *bitmap, + __u64 bitmap_offset, __u64 bitmap_len) +{ + __u64 sector = req->offset >> 9; + int i; + + if(req->length > (sizeof(req->sector_mask) * 8) << 9) + panic("Operation too long"); + + if(req->op == UBD_READ) { + for(i = 0; i < req->length >> 9; i++){ + if(ubd_test_bit(sector + i, (unsigned char *) bitmap)) + ubd_set_bit(i, (unsigned char *) + &req->sector_mask); + } + } + else cowify_bitmap(req->offset, req->length, &req->sector_mask, + &req->cow_offset, bitmap, bitmap_offset, + req->bitmap_words, bitmap_len); } /* Called with ubd_io_lock held */ -static int prepare_request(struct request *req, struct io_thread_req *io_req, - unsigned long long offset, int page_offset, - int len, struct page *page) +static int prepare_request(struct request *req, struct io_thread_req *io_req) { struct gendisk *disk = req->rq_disk; struct ubd *dev = disk->private_data; + __u64 offset; + int len; + + if(req->rq_status == RQ_INACTIVE) return(1); /* This should be impossible now */ if((rq_data_dir(req) == WRITE) && !dev->openflags.w){ printk("Write attempted on readonly ubd device %s\n", disk->disk_name); - ubd_end_request(req, 0, 0); + end_request(req, 0); return(1); } + offset = ((__u64) req->sector) << 9; + len = req->current_nr_sectors << 9; + io_req->fds[0] = (dev->cow.file != NULL) ? dev->cow.fd : dev->fd; io_req->fds[1] = dev->fd; + io_req->cow_offset = -1; io_req->offset = offset; io_req->length = len; io_req->error = 0; - io_req->op = (rq_data_dir(req) == READ) ? AIO_READ : AIO_WRITE; + io_req->sector_mask = 0; + + io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE; io_req->offsets[0] = 0; io_req->offsets[1] = dev->cow.data_offset; - io_req->buffer = page_address(page) + page_offset; + io_req->buffer = req->buffer; io_req->sectorsize = 1 << 9; - io_req->bitmap_offset = dev->cow.bitmap_offset; - io_req->bitmap_start = -1; - io_req->bitmap_end = -1; - if((dev->cow.file != NULL) && (io_req->op == UBD_WRITE)) - cowify_bitmap(io_req, dev->cow.bitmap); + if(dev->cow.file != NULL) + cowify_req(io_req, dev->cow.bitmap, dev->cow.bitmap_offset, + dev->cow.bitmap_len); + return(0); } @@ -996,36 +1030,30 @@ static void do_ubd_request(request_queue_t *q) { struct io_thread_req io_req; struct request *req; - __u64 sector; - int err; - - if(in_ubd) - return; - in_ubd = 1; - while((req = elv_next_request(q)) != NULL){ - struct gendisk *disk = req->rq_disk; - struct ubd *dev = disk->private_data; - int n, i; - - blkdev_dequeue_request(req); - - sector = req->sector; - n = blk_rq_map_sg(q, req, dev->sg); - - for(i = 0; i < n; i++){ - struct scatterlist *sg = &dev->sg[i]; - - err = prepare_request(req, &io_req, sector << 9, - sg->offset, sg->length, - sg->page); - if(err) - continue; - - sector += sg->length >> 9; - do_io(&io_req, req, dev->cow.bitmap); + int err, n; + + if(thread_fd == -1){ + while((req = elv_next_request(q)) != NULL){ + err = prepare_request(req, &io_req); + if(!err){ + do_io(&io_req); + __ubd_finish(req, io_req.error); + } + } + } + else { + if(do_ubd || (req = elv_next_request(q)) == NULL) + return; + err = prepare_request(req, &io_req); + if(!err){ + do_ubd = ubd_handler; + n = os_write_file(thread_fd, (char *) &io_req, + sizeof(io_req)); + if(n != sizeof(io_req)) + printk("write to io thread failed, " + "errno = %d\n", -n); } } - in_ubd = 0; } static int ubd_ioctl(struct inode * inode, struct file * file, @@ -1241,95 +1269,131 @@ int create_cow_file(char *cow_file, char *backing_file, struct openflags flags, return(err); } -void do_io(struct io_thread_req *req, struct request *r, unsigned long *bitmap) +static int update_bitmap(struct io_thread_req *req) { - struct ubd_aio *aio; - struct bitmap_io *bitmap_io = NULL; - char *buf; - void *bitmap_buf = NULL; - unsigned long len, sector; - int nsectors, start, end, bit, err; - __u64 off; - - if(req->bitmap_start != -1){ - /* Round up to the nearest word */ - int round = sizeof(unsigned long); - len = (req->bitmap_end - req->bitmap_start + - round * 8 - 1) / (round * 8); - len *= round; - - off = req->bitmap_start / (8 * round); - off *= round; - - bitmap_io = kmalloc(sizeof(*bitmap_io), GFP_KERNEL); - if(bitmap_io == NULL){ - printk("Failed to kmalloc bitmap IO\n"); - req->error = 1; - return; - } + int n; - bitmap_buf = kmalloc(len, GFP_KERNEL); - if(bitmap_buf == NULL){ - printk("do_io : kmalloc of bitmap chunk " - "failed\n"); - kfree(bitmap_io); - req->error = 1; - return; - } - memcpy(bitmap_buf, &bitmap[off / sizeof(bitmap[0])], len); - - *bitmap_io = ((struct bitmap_io) - { .count = ATOMIC_INIT(0), - .aio = INIT_AIO(AIO_WRITE, req->fds[1], - bitmap_buf, len, - req->bitmap_offset + off, - ubd_reply_fd) } ); - } + if(req->cow_offset == -1) + return(0); - nsectors = req->length / req->sectorsize; - start = 0; - end = nsectors; - bit = 0; - do { - if(bitmap != NULL){ - sector = req->offset / req->sectorsize; - bit = ubd_test_bit(sector + start, bitmap); - end = start; - while((end < nsectors) && - (ubd_test_bit(sector + end, bitmap) == bit)) - end++; - } + n = os_seek_file(req->fds[1], req->cow_offset); + if(n < 0){ + printk("do_io - bitmap lseek failed : err = %d\n", -n); + return(1); + } - off = req->offsets[bit] + req->offset + - start * req->sectorsize; - len = (end - start) * req->sectorsize; - buf = &req->buffer[start * req->sectorsize]; + n = os_write_file(req->fds[1], &req->bitmap_words, + sizeof(req->bitmap_words)); + if(n != sizeof(req->bitmap_words)){ + printk("do_io - bitmap update failed, err = %d fd = %d\n", -n, + req->fds[1]); + return(1); + } - aio = kmalloc(sizeof(*aio), GFP_KERNEL); - if(aio == NULL){ - req->error = 1; - return; - } + return(0); +} - *aio = ((struct ubd_aio) - { .aio = INIT_AIO(req->op, req->fds[bit], buf, - len, off, ubd_reply_fd), - .len = len, - .req = r, - .bitmap = bitmap_io, - .bitmap_buf = bitmap_buf }); - - if(aio->bitmap != NULL) - atomic_inc(&aio->bitmap->count); - - err = submit_aio(&aio->aio); - if(err){ - printk("do_io - submit_aio failed, " - "err = %d\n", err); - req->error = 1; - return; - } +void do_io(struct io_thread_req *req) +{ + char *buf; + unsigned long len; + int n, nsectors, start, end, bit; + int err; + __u64 off; + + nsectors = req->length / req->sectorsize; + start = 0; + do { + bit = ubd_test_bit(start, (unsigned char *) &req->sector_mask); + end = start; + while((end < nsectors) && + (ubd_test_bit(end, (unsigned char *) + &req->sector_mask) == bit)) + end++; + + off = req->offset + req->offsets[bit] + + start * req->sectorsize; + len = (end - start) * req->sectorsize; + buf = &req->buffer[start * req->sectorsize]; + + err = os_seek_file(req->fds[bit], off); + if(err < 0){ + printk("do_io - lseek failed : err = %d\n", -err); + req->error = 1; + return; + } + if(req->op == UBD_READ){ + n = 0; + do { + buf = &buf[n]; + len -= n; + n = os_read_file(req->fds[bit], buf, len); + if (n < 0) { + printk("do_io - read failed, err = %d " + "fd = %d\n", -n, req->fds[bit]); + req->error = 1; + return; + } + } while((n < len) && (n != 0)); + if (n < len) memset(&buf[n], 0, len - n); + } else { + n = os_write_file(req->fds[bit], buf, len); + if(n != len){ + printk("do_io - write failed err = %d " + "fd = %d\n", -n, req->fds[bit]); + req->error = 1; + return; + } + } + + start = end; + } while(start < nsectors); - start = end; - } while(start < nsectors); + req->error = update_bitmap(req); } + +/* Changed in start_io_thread, which is serialized by being called only + * from ubd_init, which is an initcall. + */ +int kernel_fd = -1; + +/* Only changed by the io thread */ +int io_count = 0; + +int io_thread(void *arg) +{ + struct io_thread_req req; + int n; + + ignore_sigwinch_sig(); + while(1){ + n = os_read_file(kernel_fd, &req, sizeof(req)); + if(n != sizeof(req)){ + if(n < 0) + printk("io_thread - read failed, fd = %d, " + "err = %d\n", kernel_fd, -n); + else { + printk("io_thread - short read, fd = %d, " + "length = %d\n", kernel_fd, n); + } + continue; + } + io_count++; + do_io(&req); + n = os_write_file(kernel_fd, &req, sizeof(req)); + if(n != sizeof(req)) + printk("io_thread - write failed, fd = %d, err = %d\n", + kernel_fd, -n); + } +} + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff --git a/arch/um/drivers/ubd_user.c b/arch/um/drivers/ubd_user.c new file mode 100644 index 00000000000..b94d2bc4fe0 --- /dev/null +++ b/arch/um/drivers/ubd_user.c @@ -0,0 +1,75 @@ +/* + * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) + * Copyright (C) 2001 Ridgerun,Inc (glonnon@ridgerun.com) + * Licensed under the GPL + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "asm/types.h" +#include "user_util.h" +#include "kern_util.h" +#include "user.h" +#include "ubd_user.h" +#include "os.h" +#include "cow.h" + +#include +#include + +void ignore_sigwinch_sig(void) +{ + signal(SIGWINCH, SIG_IGN); +} + +int start_io_thread(unsigned long sp, int *fd_out) +{ + int pid, fds[2], err; + + err = os_pipe(fds, 1, 1); + if(err < 0){ + printk("start_io_thread - os_pipe failed, err = %d\n", -err); + goto out; + } + + kernel_fd = fds[0]; + *fd_out = fds[1]; + + pid = clone(io_thread, (void *) sp, CLONE_FILES | CLONE_VM | SIGCHLD, + NULL); + if(pid < 0){ + printk("start_io_thread - clone failed : errno = %d\n", errno); + err = -errno; + goto out_close; + } + + return(pid); + + out_close: + os_close_file(fds[0]); + os_close_file(fds[1]); + kernel_fd = -1; + *fd_out = -1; + out: + return(err); +} + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff --git a/arch/um/include/aio.h b/arch/um/include/aio.h index 83f16877ab0..423bae9153f 100644 --- a/arch/um/include/aio.h +++ b/arch/um/include/aio.h @@ -14,27 +14,15 @@ struct aio_thread_reply { }; struct aio_context { - enum aio_type type; - int fd; - void *data; - int len; - unsigned long long offset; int reply_fd; struct aio_context *next; }; -#define INIT_AIO(aio_type, aio_fd, aio_data, aio_len, aio_offset, \ - aio_reply_fd) \ - { .type = aio_type, \ - .fd = aio_fd, \ - .data = aio_data, \ - .len = aio_len, \ - .offset = aio_offset, \ - .reply_fd = aio_reply_fd } - #define INIT_AIO_CONTEXT { .reply_fd = -1, \ .next = NULL } -extern int submit_aio(struct aio_context *aio); +extern int submit_aio(enum aio_type type, int fd, char *buf, int len, + unsigned long long offset, int reply_fd, + struct aio_context *aio); #endif diff --git a/arch/um/os-Linux/aio.c b/arch/um/os-Linux/aio.c index f6e64026f99..41cfb094420 100644 --- a/arch/um/os-Linux/aio.c +++ b/arch/um/os-Linux/aio.c @@ -6,7 +6,6 @@ #include #include #include -#include #include #include #include @@ -17,31 +16,18 @@ #include "user.h" #include "mode.h" +struct aio_thread_req { + enum aio_type type; + int io_fd; + unsigned long long offset; + char *buf; + int len; + struct aio_context *aio; +}; + static int aio_req_fd_r = -1; static int aio_req_fd_w = -1; -static int update_aio(struct aio_context *aio, int res) -{ - if(res < 0) - aio->len = res; - else if((res == 0) && (aio->type == AIO_READ)){ - /* This is the EOF case - we have hit the end of the file - * and it ends in a partial block, so we fill the end of - * the block with zeros and claim success. - */ - memset(aio->data, 0, aio->len); - aio->len = 0; - } - else if(res > 0){ - aio->len -= res; - aio->data += res; - aio->offset += res; - return aio->len; - } - - return 0; -} - #if defined(HAVE_AIO_ABI) #include @@ -80,7 +66,8 @@ static long io_getevents(aio_context_t ctx_id, long min_nr, long nr, * that it now backs the mmapped area. */ -static int do_aio(aio_context_t ctx, struct aio_context *aio) +static int do_aio(aio_context_t ctx, enum aio_type type, int fd, char *buf, + int len, unsigned long long offset, struct aio_context *aio) { struct iocb iocb, *iocbp = &iocb; char c; @@ -88,39 +75,40 @@ static int do_aio(aio_context_t ctx, struct aio_context *aio) iocb = ((struct iocb) { .aio_data = (unsigned long) aio, .aio_reqprio = 0, - .aio_fildes = aio->fd, - .aio_buf = (unsigned long) aio->data, - .aio_nbytes = aio->len, - .aio_offset = aio->offset, + .aio_fildes = fd, + .aio_buf = (unsigned long) buf, + .aio_nbytes = len, + .aio_offset = offset, .aio_reserved1 = 0, .aio_reserved2 = 0, .aio_reserved3 = 0 }); - switch(aio->type){ + switch(type){ case AIO_READ: iocb.aio_lio_opcode = IOCB_CMD_PREAD; + err = io_submit(ctx, 1, &iocbp); break; case AIO_WRITE: iocb.aio_lio_opcode = IOCB_CMD_PWRITE; + err = io_submit(ctx, 1, &iocbp); break; case AIO_MMAP: iocb.aio_lio_opcode = IOCB_CMD_PREAD; iocb.aio_buf = (unsigned long) &c; iocb.aio_nbytes = sizeof(c); + err = io_submit(ctx, 1, &iocbp); break; default: - printk("Bogus op in do_aio - %d\n", aio->type); + printk("Bogus op in do_aio - %d\n", type); err = -EINVAL; - goto out; + break; } - err = io_submit(ctx, 1, &iocbp); if(err > 0) err = 0; else err = -errno; - out: return err; } @@ -129,9 +117,8 @@ static aio_context_t ctx = 0; static int aio_thread(void *arg) { struct aio_thread_reply reply; - struct aio_context *aio; struct io_event event; - int err, n; + int err, n, reply_fd; signal(SIGWINCH, SIG_IGN); @@ -144,22 +131,14 @@ static int aio_thread(void *arg) "errno = %d\n", errno); } else { - /* This is safe as we've just a pointer here. */ - aio = (struct aio_context *) (long) event.data; - if(update_aio(aio, event.res)){ - do_aio(ctx, aio); - continue; - } - reply = ((struct aio_thread_reply) - { .data = aio, - .err = aio->len }); - err = os_write_file(aio->reply_fd, &reply, - sizeof(reply)); + { .data = (void *) (long) event.data, + .err = event.res }); + reply_fd = ((struct aio_context *) reply.data)->reply_fd; + err = os_write_file(reply_fd, &reply, sizeof(reply)); if(err != sizeof(reply)) - printk("aio_thread - write failed, " - "fd = %d, err = %d\n", aio->reply_fd, - -err); + printk("aio_thread - write failed, fd = %d, " + "err = %d\n", aio_req_fd_r, -err); } } return 0; @@ -167,35 +146,35 @@ static int aio_thread(void *arg) #endif -static int do_not_aio(struct aio_context *aio) +static int do_not_aio(struct aio_thread_req *req) { char c; int err; - switch(aio->type){ + switch(req->type){ case AIO_READ: - err = os_seek_file(aio->fd, aio->offset); + err = os_seek_file(req->io_fd, req->offset); if(err) goto out; - err = os_read_file(aio->fd, aio->data, aio->len); + err = os_read_file(req->io_fd, req->buf, req->len); break; case AIO_WRITE: - err = os_seek_file(aio->fd, aio->offset); + err = os_seek_file(req->io_fd, req->offset); if(err) goto out; - err = os_write_file(aio->fd, aio->data, aio->len); + err = os_write_file(req->io_fd, req->buf, req->len); break; case AIO_MMAP: - err = os_seek_file(aio->fd, aio->offset); + err = os_seek_file(req->io_fd, req->offset); if(err) goto out; - err = os_read_file(aio->fd, &c, sizeof(c)); + err = os_read_file(req->io_fd, &c, sizeof(c)); break; default: - printk("do_not_aio - bad request type : %d\n", aio->type); + printk("do_not_aio - bad request type : %d\n", req->type); err = -EINVAL; break; } @@ -206,14 +185,14 @@ static int do_not_aio(struct aio_context *aio) static int not_aio_thread(void *arg) { - struct aio_context *aio; + struct aio_thread_req req; struct aio_thread_reply reply; int err; signal(SIGWINCH, SIG_IGN); while(1){ - err = os_read_file(aio_req_fd_r, &aio, sizeof(aio)); - if(err != sizeof(aio)){ + err = os_read_file(aio_req_fd_r, &req, sizeof(req)); + if(err != sizeof(req)){ if(err < 0) printk("not_aio_thread - read failed, " "fd = %d, err = %d\n", aio_req_fd_r, @@ -224,34 +203,17 @@ static int not_aio_thread(void *arg) } continue; } - again: - err = do_not_aio(aio); - - if(update_aio(aio, err)) - goto again; - - reply = ((struct aio_thread_reply) { .data = aio, - .err = aio->len }); - err = os_write_file(aio->reply_fd, &reply, sizeof(reply)); + err = do_not_aio(&req); + reply = ((struct aio_thread_reply) { .data = req.aio, + .err = err }); + err = os_write_file(req.aio->reply_fd, &reply, sizeof(reply)); if(err != sizeof(reply)) printk("not_aio_thread - write failed, fd = %d, " "err = %d\n", aio_req_fd_r, -err); } } -static int submit_aio_24(struct aio_context *aio) -{ - int err; - - err = os_write_file(aio_req_fd_w, &aio, sizeof(aio)); - if(err == sizeof(aio)) - err = 0; - - return err; -} - static int aio_pid = -1; -static int (*submit_proc)(struct aio_context *aio); static int init_aio_24(void) { @@ -283,33 +245,11 @@ static int init_aio_24(void) #endif printk("2.6 host AIO support not used - falling back to I/O " "thread\n"); - - submit_proc = submit_aio_24; - return 0; } #ifdef HAVE_AIO_ABI #define DEFAULT_24_AIO 0 -static int submit_aio_26(struct aio_context *aio) -{ - struct aio_thread_reply reply; - int err; - - err = do_aio(ctx, aio); - if(err){ - reply = ((struct aio_thread_reply) { .data = aio, - .err = err }); - err = os_write_file(aio->reply_fd, &reply, sizeof(reply)); - if(err != sizeof(reply)) - printk("submit_aio_26 - write failed, " - "fd = %d, err = %d\n", aio->reply_fd, -err); - else err = 0; - } - - return err; -} - static int init_aio_26(void) { unsigned long stack; @@ -330,22 +270,39 @@ static int init_aio_26(void) aio_pid = err; printk("Using 2.6 host AIO\n"); + return 0; +} + +static int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len, + unsigned long long offset, struct aio_context *aio) +{ + struct aio_thread_reply reply; + int err; - submit_proc = submit_aio_26; + err = do_aio(ctx, type, io_fd, buf, len, offset, aio); + if(err){ + reply = ((struct aio_thread_reply) { .data = aio, + .err = err }); + err = os_write_file(aio->reply_fd, &reply, sizeof(reply)); + if(err != sizeof(reply)) + printk("submit_aio_26 - write failed, " + "fd = %d, err = %d\n", aio->reply_fd, -err); + else err = 0; + } - return 0; + return err; } #else #define DEFAULT_24_AIO 1 -static int submit_aio_26(struct aio_context *aio) +static int init_aio_26(void) { return -ENOSYS; } -static int init_aio_26(void) +static int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len, + unsigned long long offset, struct aio_context *aio) { - submit_proc = submit_aio_26; return -ENOSYS; } #endif @@ -412,7 +369,33 @@ static void exit_aio(void) __uml_exitcall(exit_aio); -int submit_aio(struct aio_context *aio) +static int submit_aio_24(enum aio_type type, int io_fd, char *buf, int len, + unsigned long long offset, struct aio_context *aio) { - return (*submit_proc)(aio); + struct aio_thread_req req = { .type = type, + .io_fd = io_fd, + .offset = offset, + .buf = buf, + .len = len, + .aio = aio, + }; + int err; + + err = os_write_file(aio_req_fd_w, &req, sizeof(req)); + if(err == sizeof(req)) + err = 0; + + return err; +} + +int submit_aio(enum aio_type type, int io_fd, char *buf, int len, + unsigned long long offset, int reply_fd, + struct aio_context *aio) +{ + aio->reply_fd = reply_fd; + if(aio_24) + return submit_aio_24(type, io_fd, buf, len, offset, aio); + else { + return submit_aio_26(type, io_fd, buf, len, offset, aio); + } } -- cgit v1.2.3-70-g09d2 From 9d624ea474a3ddf3a0702d9b47e428ce1f8488a6 Mon Sep 17 00:00:00 2001 From: Paolo 'Blaisorblade' Giarrusso Date: Tue, 11 Oct 2005 21:01:01 +0200 Subject: [PATCH] uml: compile-time fix recent patch Give an empty definition for clear_can_do_skas() when it is not needed. Thanks to Junichi Uekawa for reporting the breakage and providing a fix (I re-fixed it in an IMHO cleaner way). Signed-off-by: Paolo 'Blaisorblade' Giarrusso Signed-off-by: Linus Torvalds --- arch/um/include/os.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch') diff --git a/arch/um/include/os.h b/arch/um/include/os.h index 6f766e1faec..2e58e304b8b 100644 --- a/arch/um/include/os.h +++ b/arch/um/include/os.h @@ -6,6 +6,7 @@ #ifndef __OS_H__ #define __OS_H__ +#include "uml-config.h" #include "asm/types.h" #include "../os/include/file.h" @@ -159,7 +160,11 @@ extern int can_do_skas(void); /* Make sure they are clear when running in TT mode. Required by * SEGV_MAYBE_FIXABLE */ +#ifdef UML_CONFIG_MODE_SKAS #define clear_can_do_skas() do { ptrace_faultinfo = proc_mm = 0; } while (0) +#else +#define clear_can_do_skas() do {} while (0) +#endif /* mem.c */ extern int create_mem_file(unsigned long len); -- cgit v1.2.3-70-g09d2 From d8e998c58a870770905495a1d45ebf7285b5b1c5 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 12 Oct 2005 14:22:50 +1000 Subject: [PATCH] ppc32: Tell userland about lack of standard TB Glibc is about to get some new high precision timer stuff that relies on the standard timebase of the PPC architecture. However, some (rare & old) CPUs do not have such timebase and it is a bit annoying to have your stuff just crash because you are running on the wrong CPU... This exposes to userland a CPU feature bit that tells that the current processor doesn't have a standard timebase. It's negative logic so that glibc will still "just work" on older kernels (it will just be unhappy on those old CPUs but that doesn't really matter as distro tend to update glibc & kernel at the same time). Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Linus Torvalds --- arch/ppc/kernel/cputable.c | 5 +++-- include/asm-ppc/cputable.h | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/ppc/kernel/cputable.c b/arch/ppc/kernel/cputable.c index 546e1ea4caf..6b76cf58d9e 100644 --- a/arch/ppc/kernel/cputable.c +++ b/arch/ppc/kernel/cputable.c @@ -91,7 +91,7 @@ struct cpu_spec cpu_specs[] = { .cpu_features = CPU_FTR_COMMON | CPU_FTR_601 | CPU_FTR_HPTE_TABLE, .cpu_user_features = COMMON_PPC | PPC_FEATURE_601_INSTR | - PPC_FEATURE_UNIFIED_CACHE, + PPC_FEATURE_UNIFIED_CACHE | PPC_FEATURE_NO_TB, .icache_bsize = 32, .dcache_bsize = 32, .cpu_setup = __setup_cpu_601 @@ -745,7 +745,8 @@ struct cpu_spec cpu_specs[] = { .cpu_name = "403GCX", .cpu_features = CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB, - .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU, + .cpu_user_features = PPC_FEATURE_32 | + PPC_FEATURE_HAS_MMU | PPC_FEATURE_NO_TB, .icache_bsize = 16, .dcache_bsize = 16, }, diff --git a/include/asm-ppc/cputable.h b/include/asm-ppc/cputable.h index 41d8f8425c0..e17c492c870 100644 --- a/include/asm-ppc/cputable.h +++ b/include/asm-ppc/cputable.h @@ -24,6 +24,7 @@ #define PPC_FEATURE_HAS_SPE 0x00800000 #define PPC_FEATURE_HAS_EFP_SINGLE 0x00400000 #define PPC_FEATURE_HAS_EFP_DOUBLE 0x00200000 +#define PPC_FEATURE_NO_TB 0x00100000 #ifdef __KERNEL__ -- cgit v1.2.3-70-g09d2 From 60ac133aac9e07b94f2cb6bf571bf8aef69248c3 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Wed, 12 Oct 2005 19:51:24 +0100 Subject: [ARM] 2974/1: fix ARM710 swi bug workaround Patch from Nicolas Pitre Either no one is using an ARM710 with recent kernels, or all ARM710s still in use are not afflicted by this swi bug. Nevertheless, the code to work around the ARM710 swi bug is itself currently buggy since it uses r8 as a pointer to S_PC while in fact it holds the spsr content these days. Fix that, and simplify the code as well. Signed-off-by: Nicolas Pitre Signed-off-by: Russell King --- arch/arm/kernel/entry-common.S | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 81d450ac3fa..066597f4345 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -106,15 +106,10 @@ ENTRY(ret_from_fork) .endm .Larm700bug: - ldr r0, [sp, #S_PSR] @ Get calling cpsr - sub lr, lr, #4 - str lr, [r8] - msr spsr_cxsf, r0 ldmia sp, {r0 - lr}^ @ Get calling r0 - lr mov r0, r0 - ldr lr, [sp, #S_PC] @ Get PC add sp, sp, #S_FRAME_SIZE - movs pc, lr + subs pc, lr, #4 #else .macro arm710_bug_check, instr, temp .endm -- cgit v1.2.3-70-g09d2 From 0eea3c0b6cb356bc8e515084f831cac7e3d5131c Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Wed, 12 Oct 2005 19:58:05 +0100 Subject: [ARM] 2975/1: S3C2410: time.c missing include of cpu.h Patch from Ben Dooks arch/arm/mach-s3c2410/time.c is missing include of cpu.h, causing the declaration of the timer struct (s3c24xx_timer) to be flagged as missing the declaration. Signed-off-by: Ben Dooks Signed-off-by: Russell King --- arch/arm/mach-s3c2410/time.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm/mach-s3c2410/time.c b/arch/arm/mach-s3c2410/time.c index c0acfb2ad79..8a00e3c3cd0 100644 --- a/arch/arm/mach-s3c2410/time.c +++ b/arch/arm/mach-s3c2410/time.c @@ -38,6 +38,7 @@ #include #include "clock.h" +#include "cpu.h" static unsigned long timer_startval; static unsigned long timer_usec_ticks; -- cgit v1.2.3-70-g09d2 From a7b1bbbc89194deba8cde02200f08b3840c9daa2 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Wed, 12 Oct 2005 19:58:07 +0100 Subject: [ARM] 2977/1: armksyms.c - make items in export table static Patch from Ben Dooks The items in the export table do not need to be exported elsehwere, so quash the sparse warning by making the symbol for the table entry static. Signed-off-by: Ben Dooks Signed-off-by: Russell King --- arch/arm/kernel/armksyms.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c index 835d450797a..7b17a87a331 100644 --- a/arch/arm/kernel/armksyms.c +++ b/arch/arm/kernel/armksyms.c @@ -45,8 +45,8 @@ extern void fp_enter(void); #define EXPORT_SYMBOL_ALIAS(sym,orig) \ EXPORT_CRC_ALIAS(sym) \ - const struct kernel_symbol __ksymtab_##sym \ - __attribute__((section("__ksymtab"))) = \ + static const struct kernel_symbol __ksymtab_##sym \ + __attribute_used__ __attribute__((section("__ksymtab"))) = \ { (unsigned long)&orig, #sym }; /* -- cgit v1.2.3-70-g09d2 From 9f693d7b149a74bac301ee47136359294cffed25 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Wed, 12 Oct 2005 19:58:07 +0100 Subject: [ARM] 2979/2: S3C2410 - add static to non-exported machine items Patch from Ben Dooks Do not export items that are not needed by symbol name elsewhere Signed-off-by: Ben Dooks Signed-off-by: Russell King --- arch/arm/mach-s3c2410/mach-anubis.c | 2 +- arch/arm/mach-s3c2410/mach-bast.c | 4 ++-- arch/arm/mach-s3c2410/mach-vr1000.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-s3c2410/mach-anubis.c b/arch/arm/mach-s3c2410/mach-anubis.c index 7c05f27fe1d..5ae80f4e3e6 100644 --- a/arch/arm/mach-s3c2410/mach-anubis.c +++ b/arch/arm/mach-s3c2410/mach-anubis.c @@ -125,7 +125,7 @@ static int external_map[] = { 2 }; static int chip0_map[] = { 0 }; static int chip1_map[] = { 1 }; -struct mtd_partition anubis_default_nand_part[] = { +static struct mtd_partition anubis_default_nand_part[] = { [0] = { .name = "Boot Agent", .size = SZ_16K, diff --git a/arch/arm/mach-s3c2410/mach-bast.c b/arch/arm/mach-s3c2410/mach-bast.c index ed1f07d7252..8ca95598464 100644 --- a/arch/arm/mach-s3c2410/mach-bast.c +++ b/arch/arm/mach-s3c2410/mach-bast.c @@ -230,7 +230,7 @@ static int chip0_map[] = { 1 }; static int chip1_map[] = { 2 }; static int chip2_map[] = { 3 }; -struct mtd_partition bast_default_nand_part[] = { +static struct mtd_partition bast_default_nand_part[] = { [0] = { .name = "Boot Agent", .size = SZ_16K, @@ -340,7 +340,7 @@ static struct resource bast_dm9k_resource[] = { * better IO routines can be written and tested */ -struct dm9000_plat_data bast_dm9k_platdata = { +static struct dm9000_plat_data bast_dm9k_platdata = { .flags = DM9000_PLATF_16BITONLY }; diff --git a/arch/arm/mach-s3c2410/mach-vr1000.c b/arch/arm/mach-s3c2410/mach-vr1000.c index 663a7f98fc0..46b259673c1 100644 --- a/arch/arm/mach-s3c2410/mach-vr1000.c +++ b/arch/arm/mach-s3c2410/mach-vr1000.c @@ -288,7 +288,7 @@ static struct resource vr1000_dm9k1_resource[] = { * better IO routines can be written and tested */ -struct dm9000_plat_data vr1000_dm9k_platdata = { +static struct dm9000_plat_data vr1000_dm9k_platdata = { .flags = DM9000_PLATF_16BITONLY, }; -- cgit v1.2.3-70-g09d2 From cd26f45bfca4d4fa5ddfe21613d2da46f1acb821 Mon Sep 17 00:00:00 2001 From: "George G. Davis" Date: Wed, 12 Oct 2005 19:58:09 +0100 Subject: [ARM] 2970/1: Use -mtune=arm1136j-s when building for CPU_V6 targets Patch from George G. Davis When building for CPU_V6 targets, we should use -mtune=arm1136j-s rather than -mtune=strongarm but fall back to the later in case someone is using an older toolchain (although they should really upgrade instead). Signed-off-by: George G. Davis Signed-off-by: Russell King --- arch/arm/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 7779f2d1aca..299bc046870 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -53,7 +53,7 @@ tune-$(CONFIG_CPU_ARM926T) :=-mtune=arm9tdmi tune-$(CONFIG_CPU_SA110) :=-mtune=strongarm110 tune-$(CONFIG_CPU_SA1100) :=-mtune=strongarm1100 tune-$(CONFIG_CPU_XSCALE) :=$(call cc-option,-mtune=xscale,-mtune=strongarm110) -Wa,-mcpu=xscale -tune-$(CONFIG_CPU_V6) :=-mtune=strongarm +tune-$(CONFIG_CPU_V6) :=$(call cc-option,-mtune=arm1136j-s,-mtune=strongarm) # Need -Uarm for gcc < 3.x CFLAGS_ABI :=$(call cc-option,-mapcs-32,-mabi=apcs-gnu) $(call cc-option,-mno-thumb-interwork,) -- cgit v1.2.3-70-g09d2 From 737d0bb7701cdebb661e4db0236071a7df977777 Mon Sep 17 00:00:00 2001 From: "George G. Davis" Date: Wed, 12 Oct 2005 19:58:10 +0100 Subject: [ARM] 2969/1: miscellaneous whitespace cleanup Patch from George G. Davis Fix leading, trailing and other miscellaneous whitespace issues in arch/arm/kernel/alignment.c. Signed-off-by: George G. Davis Signed-off-by: Russell King --- arch/arm/mm/alignment.c | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) (limited to 'arch') diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c index f35e69e9c65..705c98921c3 100644 --- a/arch/arm/mm/alignment.c +++ b/arch/arm/mm/alignment.c @@ -111,7 +111,7 @@ proc_alignment_read(char *page, char **start, off_t off, int count, int *eof, } static int proc_alignment_write(struct file *file, const char __user *buffer, - unsigned long count, void *data) + unsigned long count, void *data) { char mode; @@ -119,7 +119,7 @@ static int proc_alignment_write(struct file *file, const char __user *buffer, if (get_user(mode, buffer)) return -EFAULT; if (mode >= '0' && mode <= '5') - ai_usermode = mode - '0'; + ai_usermode = mode - '0'; } return count; } @@ -262,7 +262,7 @@ union offset_union { goto fault; \ } while (0) -#define put32_unaligned_check(val,addr) \ +#define put32_unaligned_check(val,addr) \ __put32_unaligned_check("strb", val, addr) #define put32t_unaligned_check(val,addr) \ @@ -306,19 +306,19 @@ do_alignment_ldrhstrh(unsigned long addr, unsigned long instr, struct pt_regs *r return TYPE_LDST; user: - if (LDST_L_BIT(instr)) { - unsigned long val; - get16t_unaligned_check(val, addr); + if (LDST_L_BIT(instr)) { + unsigned long val; + get16t_unaligned_check(val, addr); - /* signed half-word? */ - if (instr & 0x40) - val = (signed long)((signed short) val); + /* signed half-word? */ + if (instr & 0x40) + val = (signed long)((signed short) val); - regs->uregs[rd] = val; - } else - put16t_unaligned_check(regs->uregs[rd], addr); + regs->uregs[rd] = val; + } else + put16t_unaligned_check(regs->uregs[rd], addr); - return TYPE_LDST; + return TYPE_LDST; fault: return TYPE_FAULT; @@ -342,11 +342,11 @@ do_alignment_ldrdstrd(unsigned long addr, unsigned long instr, unsigned long val; get32_unaligned_check(val, addr); regs->uregs[rd] = val; - get32_unaligned_check(val, addr+4); - regs->uregs[rd+1] = val; + get32_unaligned_check(val, addr + 4); + regs->uregs[rd + 1] = val; } else { put32_unaligned_check(regs->uregs[rd], addr); - put32_unaligned_check(regs->uregs[rd+1], addr+4); + put32_unaligned_check(regs->uregs[rd + 1], addr + 4); } return TYPE_LDST; @@ -356,11 +356,11 @@ do_alignment_ldrdstrd(unsigned long addr, unsigned long instr, unsigned long val; get32t_unaligned_check(val, addr); regs->uregs[rd] = val; - get32t_unaligned_check(val, addr+4); - regs->uregs[rd+1] = val; + get32t_unaligned_check(val, addr + 4); + regs->uregs[rd + 1] = val; } else { put32t_unaligned_check(regs->uregs[rd], addr); - put32t_unaligned_check(regs->uregs[rd+1], addr+4); + put32t_unaligned_check(regs->uregs[rd + 1], addr + 4); } return TYPE_LDST; @@ -443,7 +443,7 @@ do_alignment_ldmstm(unsigned long addr, unsigned long instr, struct pt_regs *reg if (LDST_P_EQ_U(instr)) /* U = P */ eaddr += 4; - /* + /* * For alignment faults on the ARM922T/ARM920T the MMU makes * the FSR (and hence addr) equal to the updated base address * of the multiple access rather than the restored value. @@ -570,7 +570,7 @@ thumb2arm(u16 tinstr) /* 6.5.1 Format 3: */ case 0x4800 >> 11: /* 7.1.28 LDR(3) */ /* NOTE: This case is not technically possible. We're - * loading 32-bit memory data via PC relative + * loading 32-bit memory data via PC relative * addressing mode. So we can and should eliminate * this case. But I'll leave it here for now. */ @@ -642,7 +642,7 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) if (fault) { type = TYPE_FAULT; - goto bad_or_fault; + goto bad_or_fault; } if (user_mode(regs)) -- cgit v1.2.3-70-g09d2 From 6ec5e7f3656f0397b7e8b39a7dcc77937d187596 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Wed, 12 Oct 2005 19:58:10 +0100 Subject: [ARM] 2978/1: nwfpe - clean up sparse errors Patch from Ben Dooks The NWFPE is producing a number of errors from sparse due to not defining a number of functions in the header files. Signed-off-by: Ben Dooks Signed-off-by: Russell King --- arch/arm/nwfpe/fpa11.c | 5 ----- arch/arm/nwfpe/fpa11.h | 20 ++++++++++++++++++++ arch/arm/nwfpe/fpa11_cprt.c | 3 +-- arch/arm/nwfpe/fpopcode.h | 6 ++++++ arch/arm/nwfpe/softfloat.h | 3 +++ 5 files changed, 30 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/arm/nwfpe/fpa11.c b/arch/arm/nwfpe/fpa11.c index 7690f731ee8..7b3d74d73c8 100644 --- a/arch/arm/nwfpe/fpa11.c +++ b/arch/arm/nwfpe/fpa11.c @@ -31,11 +31,6 @@ #include #include -/* forward declarations */ -unsigned int EmulateCPDO(const unsigned int); -unsigned int EmulateCPDT(const unsigned int); -unsigned int EmulateCPRT(const unsigned int); - /* Reset the FPA11 chip. Called to initialize and reset the emulator. */ static void resetFPA11(void) { diff --git a/arch/arm/nwfpe/fpa11.h b/arch/arm/nwfpe/fpa11.h index 93523ae4b7a..9677ae8448e 100644 --- a/arch/arm/nwfpe/fpa11.h +++ b/arch/arm/nwfpe/fpa11.h @@ -95,4 +95,24 @@ extern int8 SetRoundingMode(const unsigned int); extern int8 SetRoundingPrecision(const unsigned int); extern void nwfpe_init_fpa(union fp_state *fp); +extern unsigned int EmulateAll(unsigned int opcode); + +extern unsigned int EmulateCPDT(const unsigned int opcode); +extern unsigned int EmulateCPDO(const unsigned int opcode); +extern unsigned int EmulateCPRT(const unsigned int opcode); + +/* fpa11_cpdt.c */ +extern unsigned int PerformLDF(const unsigned int opcode); +extern unsigned int PerformSTF(const unsigned int opcode); +extern unsigned int PerformLFM(const unsigned int opcode); +extern unsigned int PerformSFM(const unsigned int opcode); + +/* single_cpdo.c */ + +extern unsigned int SingleCPDO(struct roundingData *roundData, + const unsigned int opcode, FPREG * rFd); +/* double_cpdo.c */ +extern unsigned int DoubleCPDO(struct roundingData *roundData, + const unsigned int opcode, FPREG * rFd); + #endif diff --git a/arch/arm/nwfpe/fpa11_cprt.c b/arch/arm/nwfpe/fpa11_cprt.c index adf8d300054..7c67023655e 100644 --- a/arch/arm/nwfpe/fpa11_cprt.c +++ b/arch/arm/nwfpe/fpa11_cprt.c @@ -26,12 +26,11 @@ #include "fpa11.inl" #include "fpmodule.h" #include "fpmodule.inl" +#include "softfloat.h" #ifdef CONFIG_FPE_NWFPE_XP extern flag floatx80_is_nan(floatx80); #endif -extern flag float64_is_nan(float64); -extern flag float32_is_nan(float32); unsigned int PerformFLT(const unsigned int opcode); unsigned int PerformFIX(const unsigned int opcode); diff --git a/arch/arm/nwfpe/fpopcode.h b/arch/arm/nwfpe/fpopcode.h index 1777e92a88e..6528e081c83 100644 --- a/arch/arm/nwfpe/fpopcode.h +++ b/arch/arm/nwfpe/fpopcode.h @@ -476,4 +476,10 @@ static inline unsigned int getDestinationSize(const unsigned int opcode) return (nRc); } +extern unsigned int checkCondition(const unsigned int opcode, + const unsigned int ccodes); + +extern const float64 float64Constant[]; +extern const float32 float32Constant[]; + #endif diff --git a/arch/arm/nwfpe/softfloat.h b/arch/arm/nwfpe/softfloat.h index 1c8799b9ee4..14151700b6b 100644 --- a/arch/arm/nwfpe/softfloat.h +++ b/arch/arm/nwfpe/softfloat.h @@ -265,4 +265,7 @@ static inline flag float64_lt_nocheck(float64 a, float64 b) return (a != b) && (aSign ^ (a < b)); } +extern flag float32_is_nan( float32 a ); +extern flag float64_is_nan( float64 a ); + #endif -- cgit v1.2.3-70-g09d2 From c9c10830740df1b5e7848d6fbb68c93a73e8f7cd Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 12 Oct 2005 12:22:46 -0700 Subject: [SPARC64]: Fix boot failures on SunBlade-150 The sequence to move over to the Linux trap tables from the firmware ones needs to be more air tight. It turns out that to be %100 safe we do need to be able to translate OBP mappings in our TLB miss handlers early. In order not to eat up a lot of kernel image memory with static page tables, just use the translations array in the OBP TLB miss handlers. That solves the bulk of the problem. Furthermore, to make sure the OBP TLB miss path will work even before the fixed MMU globals are loaded, explicitly load %g1 to TLB_SFSR at the beginning of the i-TLB and d-TLB miss handlers. To ease the OBP TLB miss walking of the prom_trans[] array, we sort it then delete all of the non-OBP entries in there (for example, there are entries for the kernel image itself which we're not interested in at all). We also save about 32K of kernel image size with this change. Not a bad side effect :-) There are still some reasons why trampoline.S can't use the setup_trap_table() yet. The most noteworthy are: 1) OBP boots secondary processors with non-bias'd stack for some reason. This is easily fixed by using a small bootup stack in the kernel image explicitly for this purpose. 2) Doing a firmware call via the normal C call prom_set_trap_table() goes through the whole OBP enter/exit sequence that saves and restores OBP and Linux kernel state in the MMUs. This path unfortunately does a "flush %g6" while loading up the OBP locked TLB entries for the firmware call. If we setup the %g6 in the trampoline.S code properly, that is in the PAGE_OFFSET linear mapping, but we're not on the kernel trap table yet so those addresses won't translate properly. One idea is to do a by-hand firmware call like we do in the early bootup code and elsewhere here in trampoline.S But this fails as well, as aparently the secondary processors are not booted with OBP's special locked TLB entries loaded. These are necessary for the firwmare to processes TLB misses correctly up until the point where we take over the trap table. This does need to be resolved at some point. Signed-off-by: David S. Miller --- arch/sparc64/kernel/dtlb_base.S | 14 ++- arch/sparc64/kernel/dtlb_prot.S | 12 +-- arch/sparc64/kernel/head.S | 61 ++++++------- arch/sparc64/kernel/itlb_base.S | 26 +++--- arch/sparc64/kernel/ktlb.S | 92 ++++++++++---------- arch/sparc64/mm/init.c | 187 ++++++++++++++++------------------------ 6 files changed, 166 insertions(+), 226 deletions(-) (limited to 'arch') diff --git a/arch/sparc64/kernel/dtlb_base.S b/arch/sparc64/kernel/dtlb_base.S index 702d349c1e8..6528786840c 100644 --- a/arch/sparc64/kernel/dtlb_base.S +++ b/arch/sparc64/kernel/dtlb_base.S @@ -53,19 +53,18 @@ * be guaranteed to be 0 ... mmu_context.h does guarantee this * by only using 10 bits in the hwcontext value. */ -#define CREATE_VPTE_OFFSET1(r1, r2) +#define CREATE_VPTE_OFFSET1(r1, r2) nop #define CREATE_VPTE_OFFSET2(r1, r2) \ srax r1, 10, r2 -#define CREATE_VPTE_NOP nop #else #define CREATE_VPTE_OFFSET1(r1, r2) \ srax r1, PAGE_SHIFT, r2 #define CREATE_VPTE_OFFSET2(r1, r2) \ sllx r2, 3, r2 -#define CREATE_VPTE_NOP #endif /* DTLB ** ICACHE line 1: Quick user TLB misses */ + mov TLB_SFSR, %g1 ldxa [%g1 + %g1] ASI_DMMU, %g4 ! Get TAG_ACCESS andcc %g4, TAG_CONTEXT_BITS, %g0 ! From Nucleus? from_tl1_trap: @@ -74,18 +73,16 @@ from_tl1_trap: be,pn %xcc, kvmap ! Yep, special processing CREATE_VPTE_OFFSET2(%g4, %g6) ! Create VPTE offset cmp %g5, 4 ! Last trap level? - be,pn %xcc, longpath ! Yep, cannot risk VPTE miss - nop ! delay slot /* DTLB ** ICACHE line 2: User finish + quick kernel TLB misses */ + be,pn %xcc, longpath ! Yep, cannot risk VPTE miss + nop ! delay slot ldxa [%g3 + %g6] ASI_S, %g5 ! Load VPTE 1: brgez,pn %g5, longpath ! Invalid, branch out nop ! Delay-slot 9: stxa %g5, [%g0] ASI_DTLB_DATA_IN ! Reload TLB retry ! Trap return nop - nop - nop /* DTLB ** ICACHE line 3: winfixups+real_faults */ longpath: @@ -106,8 +103,7 @@ longpath: nop nop nop - CREATE_VPTE_NOP + nop #undef CREATE_VPTE_OFFSET1 #undef CREATE_VPTE_OFFSET2 -#undef CREATE_VPTE_NOP diff --git a/arch/sparc64/kernel/dtlb_prot.S b/arch/sparc64/kernel/dtlb_prot.S index d848bb7374b..e0a92016260 100644 --- a/arch/sparc64/kernel/dtlb_prot.S +++ b/arch/sparc64/kernel/dtlb_prot.S @@ -14,14 +14,14 @@ */ /* PROT ** ICACHE line 1: User DTLB protection trap */ - stxa %g0, [%g1] ASI_DMMU ! Clear SFSR FaultValid bit - membar #Sync ! Synchronize ASI stores - rdpr %pstate, %g5 ! Move into alternate globals + mov TLB_SFSR, %g1 + stxa %g0, [%g1] ASI_DMMU ! Clear FaultValid bit + membar #Sync ! Synchronize stores + rdpr %pstate, %g5 ! Move into alt-globals wrpr %g5, PSTATE_AG|PSTATE_MG, %pstate - rdpr %tl, %g1 ! Need to do a winfixup? + rdpr %tl, %g1 ! Need a winfixup? cmp %g1, 1 ! Trap level >1? - mov TLB_TAG_ACCESS, %g4 ! Prepare reload of vaddr - nop + mov TLB_TAG_ACCESS, %g4 ! For reload of vaddr /* PROT ** ICACHE line 2: More real fault processing */ bgu,pn %xcc, winfix_trampoline ! Yes, perform winfixup diff --git a/arch/sparc64/kernel/head.S b/arch/sparc64/kernel/head.S index 4c942f71184..b49dcd4504b 100644 --- a/arch/sparc64/kernel/head.S +++ b/arch/sparc64/kernel/head.S @@ -28,19 +28,14 @@ #include /* This section from from _start to sparc64_boot_end should fit into - * 0x0000.0000.0040.4000 to 0x0000.0000.0040.8000 and will be sharing space - * with bootup_user_stack, which is from 0x0000.0000.0040.4000 to - * 0x0000.0000.0040.6000 and empty_bad_page, which is from - * 0x0000.0000.0040.6000 to 0x0000.0000.0040.8000. + * 0x0000000000404000 to 0x0000000000408000. */ - .text .globl start, _start, stext, _stext _start: start: _stext: stext: -bootup_user_stack: ! 0x0000000000404000 b sparc64_boot flushw /* Flush register file. */ @@ -392,31 +387,30 @@ tlb_fixup_done: * former does use this code, the latter does not yet due * to some complexities. That should be fixed up at some * point. + * + * There used to be enormous complexity wrt. transferring + * over from the firwmare's trap table to the Linux kernel's. + * For example, there was a chicken & egg problem wrt. building + * the OBP page tables, yet needing to be on the Linux kernel + * trap table (to translate PAGE_OFFSET addresses) in order to + * do that. + * + * We now handle OBP tlb misses differently, via linear lookups + * into the prom_trans[] array. So that specific problem no + * longer exists. Yet, unfortunately there are still some issues + * preventing trampoline.S from using this code... ho hum. */ .globl setup_trap_table setup_trap_table: save %sp, -192, %sp - /* Force interrupts to be disabled. Transferring over to - * the Linux trap table is a very delicate operation. - * Until we are actually on the Linux trap table, we cannot - * get the PAGE_OFFSET linear mappings translated. We need - * that mapping to be setup in order to initialize the firmware - * page tables. - * - * So there is this window of time, from the return from - * prom_set_trap_table() until inherit_prom_mappings_post() - * (in arch/sparc64/mm/init.c) completes, during which no - * firmware address space accesses can be made. - */ + /* Force interrupts to be disabled. */ rdpr %pstate, %o1 andn %o1, PSTATE_IE, %o1 wrpr %o1, 0x0, %pstate wrpr %g0, 15, %pil - /* Ok, now make the final valid firmware call to jump over - * to the Linux trap table. - */ + /* Make the firmware call to jump over to the Linux trap table. */ call prom_set_trap_table sethi %hi(sparc64_ttable_tl0), %o0 @@ -540,15 +534,21 @@ setup_tba: /* i0 = is_starfire */ ret restore +sparc64_boot_end: + +#include "systbls.S" +#include "ktlb.S" +#include "etrap.S" +#include "rtrap.S" +#include "winfixup.S" +#include "entry.S" /* - * The following skips make sure the trap table in ttable.S is aligned + * The following skip makes sure the trap table in ttable.S is aligned * on a 32K boundary as required by the v9 specs for TBA register. */ -sparc64_boot_end: - .skip 0x2000 + _start - sparc64_boot_end -bootup_user_stack_end: - .skip 0x2000 +1: + .skip 0x4000 + _start - 1b #ifdef CONFIG_SBUS /* This is just a hack to fool make depend config.h discovering @@ -560,15 +560,6 @@ bootup_user_stack_end: ! 0x0000000000408000 #include "ttable.S" -#include "systbls.S" -#include "ktlb.S" -#include "etrap.S" -#include "rtrap.S" -#include "winfixup.S" -#include "entry.S" - - /* This is just anal retentiveness on my part... */ - .align 16384 .data .align 8 diff --git a/arch/sparc64/kernel/itlb_base.S b/arch/sparc64/kernel/itlb_base.S index b5e32dfa4fb..4951ff8f687 100644 --- a/arch/sparc64/kernel/itlb_base.S +++ b/arch/sparc64/kernel/itlb_base.S @@ -15,14 +15,12 @@ */ #define CREATE_VPTE_OFFSET1(r1, r2) \ srax r1, 10, r2 -#define CREATE_VPTE_OFFSET2(r1, r2) -#define CREATE_VPTE_NOP nop +#define CREATE_VPTE_OFFSET2(r1, r2) nop #else /* PAGE_SHIFT */ #define CREATE_VPTE_OFFSET1(r1, r2) \ srax r1, PAGE_SHIFT, r2 #define CREATE_VPTE_OFFSET2(r1, r2) \ sllx r2, 3, r2 -#define CREATE_VPTE_NOP #endif /* PAGE_SHIFT */ @@ -36,6 +34,7 @@ */ /* ITLB ** ICACHE line 1: Quick user TLB misses */ + mov TLB_SFSR, %g1 ldxa [%g1 + %g1] ASI_IMMU, %g4 ! Get TAG_ACCESS CREATE_VPTE_OFFSET1(%g4, %g6) ! Create VPTE offset CREATE_VPTE_OFFSET2(%g4, %g6) ! Create VPTE offset @@ -43,41 +42,38 @@ 1: brgez,pn %g5, 3f ! Not valid, branch out sethi %hi(_PAGE_EXEC), %g4 ! Delay-slot andcc %g5, %g4, %g0 ! Executable? + +/* ITLB ** ICACHE line 2: Real faults */ be,pn %xcc, 3f ! Nope, branch. nop ! Delay-slot 2: stxa %g5, [%g0] ASI_ITLB_DATA_IN ! Load PTE into TLB retry ! Trap return -3: rdpr %pstate, %g4 ! Move into alternate globals - -/* ITLB ** ICACHE line 2: Real faults */ +3: rdpr %pstate, %g4 ! Move into alt-globals wrpr %g4, PSTATE_AG|PSTATE_MG, %pstate rdpr %tpc, %g5 ! And load faulting VA mov FAULT_CODE_ITLB, %g4 ! It was read from ITLB -sparc64_realfault_common: ! Called by TL0 dtlb_miss too + +/* ITLB ** ICACHE line 3: Finish faults */ +sparc64_realfault_common: ! Called by dtlb_miss stb %g4, [%g6 + TI_FAULT_CODE] stx %g5, [%g6 + TI_FAULT_ADDR] ba,pt %xcc, etrap ! Save state 1: rd %pc, %g7 ! ... - nop - -/* ITLB ** ICACHE line 3: Finish faults + window fixups */ call do_sparc64_fault ! Call fault handler add %sp, PTREGS_OFF, %o0! Compute pt_regs arg ba,pt %xcc, rtrap_clr_l6 ! Restore cpu state nop + +/* ITLB ** ICACHE line 4: Window fixups */ winfix_trampoline: rdpr %tpc, %g3 ! Prepare winfixup TNPC - or %g3, 0x7c, %g3 ! Compute offset to branch + or %g3, 0x7c, %g3 ! Compute branch offset wrpr %g3, %tnpc ! Write it into TNPC done ! Do it to it - -/* ITLB ** ICACHE line 4: Unused... */ nop nop nop nop - CREATE_VPTE_NOP #undef CREATE_VPTE_OFFSET1 #undef CREATE_VPTE_OFFSET2 -#undef CREATE_VPTE_NOP diff --git a/arch/sparc64/kernel/ktlb.S b/arch/sparc64/kernel/ktlb.S index 7796b37f478..d9244d3c9f7 100644 --- a/arch/sparc64/kernel/ktlb.S +++ b/arch/sparc64/kernel/ktlb.S @@ -58,9 +58,6 @@ vpte_noent: done vpte_insn_obp: - sethi %hi(prom_pmd_phys), %g5 - ldx [%g5 + %lo(prom_pmd_phys)], %g5 - /* Behave as if we are at TL0. */ wrpr %g0, 1, %tl rdpr %tpc, %g4 /* Find original faulting iaddr */ @@ -71,58 +68,57 @@ vpte_insn_obp: mov TLB_SFSR, %g1 stxa %g4, [%g1 + %g1] ASI_IMMU - /* Get PMD offset. */ - srlx %g4, 23, %g6 - and %g6, 0x7ff, %g6 - sllx %g6, 2, %g6 - - /* Load PMD, is it valid? */ - lduwa [%g5 + %g6] ASI_PHYS_USE_EC, %g5 - brz,pn %g5, longpath - sllx %g5, 11, %g5 - - /* Get PTE offset. */ - srlx %g4, 13, %g6 - and %g6, 0x3ff, %g6 - sllx %g6, 3, %g6 - - /* Load PTE. */ - ldxa [%g5 + %g6] ASI_PHYS_USE_EC, %g5 - brgez,pn %g5, longpath - nop - - /* TLB load and return from trap. */ + sethi %hi(prom_trans), %g5 + or %g5, %lo(prom_trans), %g5 + +1: ldx [%g5 + 0x00], %g6 ! base + brz,a,pn %g6, longpath ! no more entries, fail + mov TLB_SFSR, %g1 ! and restore %g1 + ldx [%g5 + 0x08], %g1 ! len + add %g6, %g1, %g1 ! end + cmp %g6, %g4 + bgu,pt %xcc, 2f + cmp %g4, %g1 + bgeu,pt %xcc, 2f + ldx [%g5 + 0x10], %g1 ! PTE + + /* TLB load, restore %g1, and return from trap. */ + sub %g4, %g6, %g6 + add %g1, %g6, %g5 + mov TLB_SFSR, %g1 stxa %g5, [%g0] ASI_ITLB_DATA_IN retry -kvmap_do_obp: - sethi %hi(prom_pmd_phys), %g5 - ldx [%g5 + %lo(prom_pmd_phys)], %g5 - - /* Get PMD offset. */ - srlx %g4, 23, %g6 - and %g6, 0x7ff, %g6 - sllx %g6, 2, %g6 - - /* Load PMD, is it valid? */ - lduwa [%g5 + %g6] ASI_PHYS_USE_EC, %g5 - brz,pn %g5, longpath - sllx %g5, 11, %g5 - - /* Get PTE offset. */ - srlx %g4, 13, %g6 - and %g6, 0x3ff, %g6 - sllx %g6, 3, %g6 - - /* Load PTE. */ - ldxa [%g5 + %g6] ASI_PHYS_USE_EC, %g5 - brgez,pn %g5, longpath - nop +2: ba,pt %xcc, 1b + add %g5, (3 * 8), %g5 ! next entry - /* TLB load and return from trap. */ +kvmap_do_obp: + sethi %hi(prom_trans), %g5 + or %g5, %lo(prom_trans), %g5 + srlx %g4, 13, %g4 + sllx %g4, 13, %g4 + +1: ldx [%g5 + 0x00], %g6 ! base + brz,a,pn %g6, longpath ! no more entries, fail + mov TLB_SFSR, %g1 ! and restore %g1 + ldx [%g5 + 0x08], %g1 ! len + add %g6, %g1, %g1 ! end + cmp %g6, %g4 + bgu,pt %xcc, 2f + cmp %g4, %g1 + bgeu,pt %xcc, 2f + ldx [%g5 + 0x10], %g1 ! PTE + + /* TLB load, restore %g1, and return from trap. */ + sub %g4, %g6, %g6 + add %g1, %g6, %g5 + mov TLB_SFSR, %g1 stxa %g5, [%g0] ASI_DTLB_DATA_IN retry +2: ba,pt %xcc, 1b + add %g5, (3 * 8), %g5 ! next entry + /* * On a first level data miss, check whether this is to the OBP range (note * that such accesses can be made by prom, as well as by kernel using diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c index 0d2e967c720..1e44ee26cee 100644 --- a/arch/sparc64/mm/init.c +++ b/arch/sparc64/mm/init.c @@ -105,7 +105,7 @@ static void __init read_obp_memory(const char *property, regs[i].phys_addr = base; regs[i].reg_size = size; } - sort(regs, ents, sizeof(struct linux_prom64_registers), + sort(regs, ents, sizeof(struct linux_prom64_registers), cmp_p64, NULL); } @@ -367,8 +367,11 @@ struct linux_prom_translation { unsigned long size; unsigned long data; }; -static struct linux_prom_translation prom_trans[512] __initdata; -static unsigned int prom_trans_ents __initdata; + +/* Exported for kernel TLB miss handling in ktlb.S */ +struct linux_prom_translation prom_trans[512] __read_mostly; +unsigned int prom_trans_ents __read_mostly; +unsigned int swapper_pgd_zero __read_mostly; extern unsigned long prom_boot_page; extern void prom_remap(unsigned long physpage, unsigned long virtpage, int mmu_ihandle); @@ -378,122 +381,57 @@ extern void register_prom_callbacks(void); /* Exported for SMP bootup purposes. */ unsigned long kern_locked_tte_data; -/* Exported for kernel TLB miss handling in ktlb.S */ -unsigned long prom_pmd_phys __read_mostly; -unsigned int swapper_pgd_zero __read_mostly; - -static pmd_t *prompmd __read_mostly; - -#define BASE_PAGE_SIZE 8192 - /* * Translate PROM's mapping we capture at boot time into physical address. * The second parameter is only set from prom_callback() invocations. */ unsigned long prom_virt_to_phys(unsigned long promva, int *error) { - pmd_t *pmdp = prompmd + ((promva >> 23) & 0x7ff); - pte_t *ptep; - unsigned long base; - - if (pmd_none(*pmdp)) { - if (error) - *error = 1; - return 0; - } - ptep = (pte_t *)__pmd_page(*pmdp) + ((promva >> 13) & 0x3ff); - if (!pte_present(*ptep)) { - if (error) - *error = 1; - return 0; - } - if (error) { - *error = 0; - return pte_val(*ptep); - } - base = pte_val(*ptep) & _PAGE_PADDR; - - return base + (promva & (BASE_PAGE_SIZE - 1)); -} + int i; -/* The obp translations are saved based on 8k pagesize, since obp can - * use a mixture of pagesizes. Misses to the LOW_OBP_ADDRESS -> - * HI_OBP_ADDRESS range are handled in entry.S and do not use the vpte - * scheme (also, see rant in inherit_locked_prom_mappings()). - */ -static void __init build_obp_range(unsigned long start, unsigned long end, unsigned long data) -{ - unsigned long vaddr; + for (i = 0; i < prom_trans_ents; i++) { + struct linux_prom_translation *p = &prom_trans[i]; - for (vaddr = start; vaddr < end; vaddr += BASE_PAGE_SIZE) { - unsigned long val; - pmd_t *pmd; - pte_t *pte; + if (promva >= p->virt && + promva < (p->virt + p->size)) { + unsigned long base = p->data & _PAGE_PADDR; - pmd = prompmd + ((vaddr >> 23) & 0x7ff); - if (pmd_none(*pmd)) { - pte = __alloc_bootmem(BASE_PAGE_SIZE, BASE_PAGE_SIZE, - PAGE_SIZE); - if (!pte) - prom_halt(); - memset(pte, 0, BASE_PAGE_SIZE); - pmd_set(pmd, pte); + if (error) + *error = 0; + return base + (promva & (8192 - 1)); } - pte = (pte_t *) __pmd_page(*pmd) + ((vaddr >> 13) & 0x3ff); - - val = data; - - /* Clear diag TTE bits. */ - if (tlb_type == spitfire) - val &= ~0x0003fe0000000000UL; - - set_pte_at(&init_mm, vaddr, pte, - __pte(val | _PAGE_MODIFIED)); - - data += BASE_PAGE_SIZE; } + if (error) + *error = 1; + return 0UL; } +/* The obp translations are saved based on 8k pagesize, since obp can + * use a mixture of pagesizes. Misses to the LOW_OBP_ADDRESS -> + * HI_OBP_ADDRESS range are handled in ktlb.S and do not use the vpte + * scheme (also, see rant in inherit_locked_prom_mappings()). + */ static inline int in_obp_range(unsigned long vaddr) { return (vaddr >= LOW_OBP_ADDRESS && vaddr < HI_OBP_ADDRESS); } -#define OBP_PMD_SIZE 2048 -static void __init build_obp_pgtable(void) +static int cmp_ptrans(const void *a, const void *b) { - unsigned long i; - - prompmd = __alloc_bootmem(OBP_PMD_SIZE, OBP_PMD_SIZE, PAGE_SIZE); - if (!prompmd) - prom_halt(); - - memset(prompmd, 0, OBP_PMD_SIZE); - - prom_pmd_phys = __pa(prompmd); - - for (i = 0; i < prom_trans_ents; i++) { - unsigned long start, end; - - if (!in_obp_range(prom_trans[i].virt)) - continue; + const struct linux_prom_translation *x = a, *y = b; - start = prom_trans[i].virt; - end = start + prom_trans[i].size; - if (end > HI_OBP_ADDRESS) - end = HI_OBP_ADDRESS; - - build_obp_range(start, end, prom_trans[i].data); - } + if (x->virt > y->virt) + return 1; + if (x->virt < y->virt) + return -1; + return 0; } -/* Read OBP translations property into 'prom_trans[]'. - * Return the number of entries. - */ +/* Read OBP translations property into 'prom_trans[]'. */ static void __init read_obp_translations(void) { - int n, node; + int n, node, ents, first, last, i; node = prom_finddevice("/virtual-memory"); n = prom_getproplen(node, "translations"); @@ -515,7 +453,41 @@ static void __init read_obp_translations(void) n = n / sizeof(struct linux_prom_translation); - prom_trans_ents = n; + ents = n; + + sort(prom_trans, ents, sizeof(struct linux_prom_translation), + cmp_ptrans, NULL); + + /* Now kick out all the non-OBP entries. */ + for (i = 0; i < ents; i++) { + if (in_obp_range(prom_trans[i].virt)) + break; + } + first = i; + for (; i < ents; i++) { + if (!in_obp_range(prom_trans[i].virt)) + break; + } + last = i; + + for (i = 0; i < (last - first); i++) { + struct linux_prom_translation *src = &prom_trans[i + first]; + struct linux_prom_translation *dest = &prom_trans[i]; + + *dest = *src; + } + for (; i < ents; i++) { + struct linux_prom_translation *dest = &prom_trans[i]; + dest->virt = dest->size = dest->data = 0x0UL; + } + + prom_trans_ents = last - first; + + if (tlb_type == spitfire) { + /* Clear diag TTE bits. */ + for (i = 0; i < prom_trans_ents; i++) + prom_trans[i].data &= ~0x0003fe0000000000UL; + } } static void __init remap_kernel(void) @@ -553,21 +525,18 @@ static void __init remap_kernel(void) } -static void __init inherit_prom_mappings_pre(void) +static void __init inherit_prom_mappings(void) { read_obp_translations(); /* Now fixup OBP's idea about where we really are mapped. */ prom_printf("Remapping the kernel... "); remap_kernel(); - prom_printf("done.\n"); -} -static void __init inherit_prom_mappings_post(void) -{ - build_obp_pgtable(); + prom_printf("Registering callbacks... "); register_prom_callbacks(); + prom_printf("done.\n"); } /* The OBP specifications for sun4u mark 0xfffffffc00000000 and @@ -1519,7 +1488,7 @@ void __init paging_init(void) swapper_pgd_zero = pgd_val(swapper_pg_dir[0]); - inherit_prom_mappings_pre(); + inherit_prom_mappings(); /* Ok, we can use our TLB miss and window trap handlers safely. * We need to do a quick peek here to see if we are on StarFire @@ -1530,23 +1499,15 @@ void __init paging_init(void) extern void setup_tba(int); setup_tba(this_is_starfire); } - __flush_tlb_all(); - /* Everything from this point forward, until we are done with - * inherit_prom_mappings_post(), must complete successfully - * without calling into the firmware. The firwmare page tables - * have not been built, but we are running on the Linux kernel's - * trap table. - */ + inherit_locked_prom_mappings(1); + + __flush_tlb_all(); /* Setup bootmem... */ pages_avail = 0; last_valid_pfn = end_pfn = bootmem_init(&pages_avail); - inherit_prom_mappings_post(); - - inherit_locked_prom_mappings(1); - #ifdef CONFIG_DEBUG_PAGEALLOC kernel_physical_mapping_init(); #endif -- cgit v1.2.3-70-g09d2 From 3a8f675c40ba2d04e4fff6710db3da763436269f Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Thu, 13 Oct 2005 16:46:35 +0100 Subject: [ARM] 3006/1: S3C2410 - arch/arm/mach-s3c2410 sparse fixes Patch from Ben Dooks Remove an unused variable from s3c2410.c and ensure that items not needed to be exported from s3c2440.c are declared static. Signed-off-by: Ben Dooks Signed-off-by: Russell King --- arch/arm/mach-s3c2410/s3c2410.c | 3 --- arch/arm/mach-s3c2410/s3c2440.c | 4 ++-- 2 files changed, 2 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-s3c2410/s3c2410.c b/arch/arm/mach-s3c2410/s3c2410.c index 0b88993dfd2..a8bf5ec8260 100644 --- a/arch/arm/mach-s3c2410/s3c2410.c +++ b/arch/arm/mach-s3c2410/s3c2410.c @@ -125,9 +125,6 @@ static struct platform_device *uart_devices[] __initdata = { &s3c_uart2 }; -/* store our uart devices for the serial driver console */ -struct platform_device *s3c2410_uart_devices[3]; - static int s3c2410_uart_count = 0; /* uart registration process */ diff --git a/arch/arm/mach-s3c2410/s3c2440.c b/arch/arm/mach-s3c2410/s3c2440.c index d4c8281b55f..833fa36bce0 100644 --- a/arch/arm/mach-s3c2410/s3c2440.c +++ b/arch/arm/mach-s3c2410/s3c2440.c @@ -151,7 +151,7 @@ void __init s3c2440_init_uarts(struct s3c2410_uartcfg *cfg, int no) #ifdef CONFIG_PM -struct sleep_save s3c2440_sleep[] = { +static struct sleep_save s3c2440_sleep[] = { SAVE_ITEM(S3C2440_DSC0), SAVE_ITEM(S3C2440_DSC1), SAVE_ITEM(S3C2440_GPJDAT), @@ -260,7 +260,7 @@ void __init s3c2440_init_clocks(int xtal) * as a driver which may support both 2410 and 2440 may try and use it. */ -int __init s3c2440_core_init(void) +static int __init s3c2440_core_init(void) { return sysdev_class_register(&s3c2440_sysclass); } -- cgit v1.2.3-70-g09d2 From c8923c6b852d3a97c1faad0566e38fca330375a7 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 13 Oct 2005 14:41:23 -0700 Subject: [NETFILTER]: Fix OOPSes on machines with discontiguous cpu numbering. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Original patch by Harald Welte, with feedback from Herbert Xu and testing by Sébastien Bernard. EBTABLES, ARP tables, and IP/IP6 tables all assume that cpus are numbered linearly. That is not necessarily true. This patch fixes that up by calculating the largest possible cpu number, and allocating enough per-cpu structure space given that. Signed-off-by: David S. Miller --- arch/cris/arch-v32/kernel/smp.c | 2 ++ arch/sh/kernel/smp.c | 3 +++ include/linux/cpumask.h | 12 ++++++++++++ net/bridge/netfilter/ebtables.c | 27 +++++++++++++++++---------- net/ipv4/netfilter/arp_tables.c | 14 +++++++++----- net/ipv4/netfilter/ip_tables.c | 17 +++++++++++------ net/ipv6/netfilter/ip6_tables.c | 16 +++++++++++----- 7 files changed, 65 insertions(+), 26 deletions(-) (limited to 'arch') diff --git a/arch/cris/arch-v32/kernel/smp.c b/arch/cris/arch-v32/kernel/smp.c index 2c5cae04a95..957f551ba5c 100644 --- a/arch/cris/arch-v32/kernel/smp.c +++ b/arch/cris/arch-v32/kernel/smp.c @@ -15,6 +15,7 @@ #include #include #include +#include #define IPI_SCHEDULE 1 #define IPI_CALL 2 @@ -28,6 +29,7 @@ spinlock_t cris_atomic_locks[] = { [0 ... LOCK_COUNT - 1] = SPIN_LOCK_UNLOCKED}; /* CPU masks */ cpumask_t cpu_online_map = CPU_MASK_NONE; cpumask_t phys_cpu_present_map = CPU_MASK_NONE; +EXPORT_SYMBOL(phys_cpu_present_map); /* Variables used during SMP boot */ volatile int cpu_now_booting = 0; diff --git a/arch/sh/kernel/smp.c b/arch/sh/kernel/smp.c index 56a39d69e08..5ecefc02896 100644 --- a/arch/sh/kernel/smp.c +++ b/arch/sh/kernel/smp.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -39,6 +40,8 @@ struct sh_cpuinfo cpu_data[NR_CPUS]; extern void per_cpu_trap_init(void); cpumask_t cpu_possible_map; +EXPORT_SYMBOL(cpu_possible_map); + cpumask_t cpu_online_map; static atomic_t cpus_booted = ATOMIC_INIT(0); diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index b15826f6e3a..fe9778301d0 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -392,4 +392,16 @@ extern cpumask_t cpu_present_map; #define for_each_online_cpu(cpu) for_each_cpu_mask((cpu), cpu_online_map) #define for_each_present_cpu(cpu) for_each_cpu_mask((cpu), cpu_present_map) +/* Find the highest possible smp_processor_id() */ +static inline unsigned int highest_possible_processor_id(void) +{ + unsigned int cpu, highest = 0; + + for_each_cpu_mask(cpu, cpu_possible_map) + highest = cpu; + + return highest; +} + + #endif /* __LINUX_CPUMASK_H */ diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index c4540144f0f..f8ffbf6e233 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -26,6 +26,7 @@ #include #include #include +#include #include /* needed for logical [in,out]-dev filtering */ #include "../br_private.h" @@ -823,10 +824,11 @@ static int translate_table(struct ebt_replace *repl, /* this will get free'd in do_replace()/ebt_register_table() if an error occurs */ newinfo->chainstack = (struct ebt_chainstack **) - vmalloc(num_possible_cpus() * sizeof(struct ebt_chainstack)); + vmalloc((highest_possible_processor_id()+1) + * sizeof(struct ebt_chainstack)); if (!newinfo->chainstack) return -ENOMEM; - for (i = 0; i < num_possible_cpus(); i++) { + for_each_cpu(i) { newinfo->chainstack[i] = vmalloc(udc_cnt * sizeof(struct ebt_chainstack)); if (!newinfo->chainstack[i]) { @@ -895,9 +897,12 @@ static void get_counters(struct ebt_counter *oldcounters, /* counters of cpu 0 */ memcpy(counters, oldcounters, - sizeof(struct ebt_counter) * nentries); + sizeof(struct ebt_counter) * nentries); + /* add other counters to those of cpu 0 */ - for (cpu = 1; cpu < num_possible_cpus(); cpu++) { + for_each_cpu(cpu) { + if (cpu == 0) + continue; counter_base = COUNTER_BASE(oldcounters, nentries, cpu); for (i = 0; i < nentries; i++) { counters[i].pcnt += counter_base[i].pcnt; @@ -929,7 +934,8 @@ static int do_replace(void __user *user, unsigned int len) BUGPRINT("Entries_size never zero\n"); return -EINVAL; } - countersize = COUNTER_OFFSET(tmp.nentries) * num_possible_cpus(); + countersize = COUNTER_OFFSET(tmp.nentries) * + (highest_possible_processor_id()+1); newinfo = (struct ebt_table_info *) vmalloc(sizeof(struct ebt_table_info) + countersize); if (!newinfo) @@ -1022,7 +1028,7 @@ static int do_replace(void __user *user, unsigned int len) vfree(table->entries); if (table->chainstack) { - for (i = 0; i < num_possible_cpus(); i++) + for_each_cpu(i) vfree(table->chainstack[i]); vfree(table->chainstack); } @@ -1040,7 +1046,7 @@ free_counterstmp: vfree(counterstmp); /* can be initialized in translate_table() */ if (newinfo->chainstack) { - for (i = 0; i < num_possible_cpus(); i++) + for_each_cpu(i) vfree(newinfo->chainstack[i]); vfree(newinfo->chainstack); } @@ -1132,7 +1138,8 @@ int ebt_register_table(struct ebt_table *table) return -EINVAL; } - countersize = COUNTER_OFFSET(table->table->nentries) * num_possible_cpus(); + countersize = COUNTER_OFFSET(table->table->nentries) * + (highest_possible_processor_id()+1); newinfo = (struct ebt_table_info *) vmalloc(sizeof(struct ebt_table_info) + countersize); ret = -ENOMEM; @@ -1186,7 +1193,7 @@ free_unlock: up(&ebt_mutex); free_chainstack: if (newinfo->chainstack) { - for (i = 0; i < num_possible_cpus(); i++) + for_each_cpu(i) vfree(newinfo->chainstack[i]); vfree(newinfo->chainstack); } @@ -1209,7 +1216,7 @@ void ebt_unregister_table(struct ebt_table *table) up(&ebt_mutex); vfree(table->private->entries); if (table->private->chainstack) { - for (i = 0; i < num_possible_cpus(); i++) + for_each_cpu(i) vfree(table->private->chainstack[i]); vfree(table->private->chainstack); } diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index fa163425668..a7969286e6e 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -716,8 +716,10 @@ static int translate_table(const char *name, } /* And one copy for every other CPU */ - for (i = 1; i < num_possible_cpus(); i++) { - memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i, + for_each_cpu(i) { + if (i == 0) + continue; + memcpy(newinfo->entries + SMP_ALIGN(newinfo->size) * i, newinfo->entries, SMP_ALIGN(newinfo->size)); } @@ -767,7 +769,7 @@ static void get_counters(const struct arpt_table_info *t, unsigned int cpu; unsigned int i; - for (cpu = 0; cpu < num_possible_cpus(); cpu++) { + for_each_cpu(cpu) { i = 0; ARPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu), t->size, @@ -885,7 +887,8 @@ static int do_replace(void __user *user, unsigned int len) return -ENOMEM; newinfo = vmalloc(sizeof(struct arpt_table_info) - + SMP_ALIGN(tmp.size) * num_possible_cpus()); + + SMP_ALIGN(tmp.size) * + (highest_possible_processor_id()+1)); if (!newinfo) return -ENOMEM; @@ -1158,7 +1161,8 @@ int arpt_register_table(struct arpt_table *table, = { 0, 0, 0, { 0 }, { 0 }, { } }; newinfo = vmalloc(sizeof(struct arpt_table_info) - + SMP_ALIGN(repl->size) * num_possible_cpus()); + + SMP_ALIGN(repl->size) * + (highest_possible_processor_id()+1)); if (!newinfo) { ret = -ENOMEM; return ret; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index eef99a1b5de..75c27e92f6a 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -27,6 +27,7 @@ #include #include #include +#include #include @@ -921,8 +922,10 @@ translate_table(const char *name, } /* And one copy for every other CPU */ - for (i = 1; i < num_possible_cpus(); i++) { - memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i, + for_each_cpu(i) { + if (i == 0) + continue; + memcpy(newinfo->entries + SMP_ALIGN(newinfo->size) * i, newinfo->entries, SMP_ALIGN(newinfo->size)); } @@ -943,7 +946,7 @@ replace_table(struct ipt_table *table, struct ipt_entry *table_base; unsigned int i; - for (i = 0; i < num_possible_cpus(); i++) { + for_each_cpu(i) { table_base = (void *)newinfo->entries + TABLE_OFFSET(newinfo, i); @@ -990,7 +993,7 @@ get_counters(const struct ipt_table_info *t, unsigned int cpu; unsigned int i; - for (cpu = 0; cpu < num_possible_cpus(); cpu++) { + for_each_cpu(cpu) { i = 0; IPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu), t->size, @@ -1128,7 +1131,8 @@ do_replace(void __user *user, unsigned int len) return -ENOMEM; newinfo = vmalloc(sizeof(struct ipt_table_info) - + SMP_ALIGN(tmp.size) * num_possible_cpus()); + + SMP_ALIGN(tmp.size) * + (highest_possible_processor_id()+1)); if (!newinfo) return -ENOMEM; @@ -1458,7 +1462,8 @@ int ipt_register_table(struct ipt_table *table, const struct ipt_replace *repl) = { 0, 0, 0, { 0 }, { 0 }, { } }; newinfo = vmalloc(sizeof(struct ipt_table_info) - + SMP_ALIGN(repl->size) * num_possible_cpus()); + + SMP_ALIGN(repl->size) * + (highest_possible_processor_id()+1)); if (!newinfo) return -ENOMEM; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 2da514b16d9..b03e90649eb 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -28,6 +28,7 @@ #include #include #include +#include #include @@ -950,8 +951,10 @@ translate_table(const char *name, } /* And one copy for every other CPU */ - for (i = 1; i < num_possible_cpus(); i++) { - memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i, + for_each_cpu(i) { + if (i == 0) + continue; + memcpy(newinfo->entries + SMP_ALIGN(newinfo->size) * i, newinfo->entries, SMP_ALIGN(newinfo->size)); } @@ -973,6 +976,7 @@ replace_table(struct ip6t_table *table, unsigned int i; for (i = 0; i < num_possible_cpus(); i++) { + for_each_cpu(i) { table_base = (void *)newinfo->entries + TABLE_OFFSET(newinfo, i); @@ -1019,7 +1023,7 @@ get_counters(const struct ip6t_table_info *t, unsigned int cpu; unsigned int i; - for (cpu = 0; cpu < num_possible_cpus(); cpu++) { + for_each_cpu(cpu) { i = 0; IP6T_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu), t->size, @@ -1153,7 +1157,8 @@ do_replace(void __user *user, unsigned int len) return -ENOMEM; newinfo = vmalloc(sizeof(struct ip6t_table_info) - + SMP_ALIGN(tmp.size) * num_possible_cpus()); + + SMP_ALIGN(tmp.size) * + (highest_possible_processor_id()+1)); if (!newinfo) return -ENOMEM; @@ -1467,7 +1472,8 @@ int ip6t_register_table(struct ip6t_table *table, = { 0, 0, 0, { 0 }, { 0 }, { } }; newinfo = vmalloc(sizeof(struct ip6t_table_info) - + SMP_ALIGN(repl->size) * num_possible_cpus()); + + SMP_ALIGN(repl->size) * + (highest_possible_processor_id()+1)); if (!newinfo) return -ENOMEM; -- cgit v1.2.3-70-g09d2