diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-10-27 18:42:52 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-10-27 18:42:52 -0700 |
commit | 17bb51d56cdc8cbf252031db3107de034cfeb44c (patch) | |
tree | f9fb2c16b29a152d3413fa0028e660e3b6146584 /fs | |
parent | 0671b7674f42ab3a200401ea0e48d6f47d34acae (diff) | |
parent | 95aac7b1cd224f568fb83937044cd303ff11b029 (diff) |
Merge branch 'akpm-incoming-2'
* akpm-incoming-2: (139 commits)
epoll: make epoll_wait() use the hrtimer range feature
select: rename estimate_accuracy() to select_estimate_accuracy()
Remove duplicate includes from many files
ramoops: use the platform data structure instead of module params
kernel/resource.c: handle reinsertion of an already-inserted resource
kfifo: fix kfifo_alloc() to return a signed int value
w1: don't allow arbitrary users to remove w1 devices
alpha: remove dma64_addr_t usage
mips: remove dma64_addr_t usage
sparc: remove dma64_addr_t usage
fuse: use release_pages()
taskstats: use real microsecond granularity for CPU times
taskstats: split fill_pid function
taskstats: separate taskstats commands
delayacct: align to 8 byte boundary on 64-bit systems
delay-accounting: reimplement -c for getdelays.c to report information on a target command
namespaces Kconfig: move namespace menu location after the cgroup
namespaces Kconfig: remove the cgroup device whitelist experimental tag
namespaces Kconfig: remove pointless cgroup dependency
namespaces Kconfig: make namespace a submenu
...
Diffstat (limited to 'fs')
-rw-r--r-- | fs/Kconfig.binfmt | 4 | ||||
-rw-r--r-- | fs/eventpoll.c | 35 | ||||
-rw-r--r-- | fs/exec.c | 168 | ||||
-rw-r--r-- | fs/fuse/dev.c | 7 | ||||
-rw-r--r-- | fs/isofs/inode.c | 40 | ||||
-rw-r--r-- | fs/proc/base.c | 8 | ||||
-rw-r--r-- | fs/proc/softirqs.c | 4 | ||||
-rw-r--r-- | fs/proc/stat.c | 14 | ||||
-rw-r--r-- | fs/proc/task_mmu.c | 6 | ||||
-rw-r--r-- | fs/select.c | 6 |
10 files changed, 181 insertions, 111 deletions
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt index bb4cc5b8abc..79e2ca7973b 100644 --- a/fs/Kconfig.binfmt +++ b/fs/Kconfig.binfmt @@ -42,7 +42,7 @@ config BINFMT_ELF_FDPIC config CORE_DUMP_DEFAULT_ELF_HEADERS bool "Write ELF core dumps with partial segments" - default n + default y depends on BINFMT_ELF && ELF_CORE help ELF core dump files describe each memory mapping of the crashed @@ -60,7 +60,7 @@ config CORE_DUMP_DEFAULT_ELF_HEADERS inherited. See Documentation/filesystems/proc.txt for details. This config option changes the default setting of coredump_filter - seen at boot time. If unsure, say N. + seen at boot time. If unsure, say Y. config BINFMT_FLAT bool "Kernel support for flat binaries" diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 256bb7bb102..8cf07242067 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -77,9 +77,6 @@ /* Maximum number of nesting allowed inside epoll sets */ #define EP_MAX_NESTS 4 -/* Maximum msec timeout value storeable in a long int */ -#define EP_MAX_MSTIMEO min(1000ULL * MAX_SCHEDULE_TIMEOUT / HZ, (LONG_MAX - 999ULL) / HZ) - #define EP_MAX_EVENTS (INT_MAX / sizeof(struct epoll_event)) #define EP_UNACTIVE_PTR ((void *) -1L) @@ -1117,18 +1114,22 @@ static int ep_send_events(struct eventpoll *ep, static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, int maxevents, long timeout) { - int res, eavail; + int res, eavail, timed_out = 0; unsigned long flags; - long jtimeout; + long slack; wait_queue_t wait; - - /* - * Calculate the timeout by checking for the "infinite" value (-1) - * and the overflow condition. The passed timeout is in milliseconds, - * that why (t * HZ) / 1000. - */ - jtimeout = (timeout < 0 || timeout >= EP_MAX_MSTIMEO) ? - MAX_SCHEDULE_TIMEOUT : (timeout * HZ + 999) / 1000; + struct timespec end_time; + ktime_t expires, *to = NULL; + + if (timeout > 0) { + ktime_get_ts(&end_time); + timespec_add_ns(&end_time, (u64)timeout * NSEC_PER_MSEC); + slack = select_estimate_accuracy(&end_time); + to = &expires; + *to = timespec_to_ktime(end_time); + } else if (timeout == 0) { + timed_out = 1; + } retry: spin_lock_irqsave(&ep->lock, flags); @@ -1150,7 +1151,7 @@ retry: * to TASK_INTERRUPTIBLE before doing the checks. */ set_current_state(TASK_INTERRUPTIBLE); - if (!list_empty(&ep->rdllist) || !jtimeout) + if (!list_empty(&ep->rdllist) || timed_out) break; if (signal_pending(current)) { res = -EINTR; @@ -1158,7 +1159,9 @@ retry: } spin_unlock_irqrestore(&ep->lock, flags); - jtimeout = schedule_timeout(jtimeout); + if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS)) + timed_out = 1; + spin_lock_irqsave(&ep->lock, flags); } __remove_wait_queue(&ep->wq, &wait); @@ -1176,7 +1179,7 @@ retry: * more luck. */ if (!res && eavail && - !(res = ep_send_events(ep, events, maxevents)) && jtimeout) + !(res = ep_send_events(ep, events, maxevents)) && !timed_out) goto retry; return res; diff --git a/fs/exec.c b/fs/exec.c index 3aa75b8888a..99d33a1371e 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -66,6 +66,12 @@ char core_pattern[CORENAME_MAX_SIZE] = "core"; unsigned int core_pipe_limit; int suid_dumpable = 0; +struct core_name { + char *corename; + int used, size; +}; +static atomic_t call_count = ATOMIC_INIT(1); + /* The maximal length of core_pattern is also specified in sysctl.c */ static LIST_HEAD(formats); @@ -1003,7 +1009,7 @@ int flush_old_exec(struct linux_binprm * bprm) bprm->mm = NULL; /* We're using it now */ - current->flags &= ~PF_RANDOMIZE; + current->flags &= ~(PF_RANDOMIZE | PF_KTHREAD); flush_thread(); current->personality &= ~bprm->per_clear; @@ -1083,14 +1089,14 @@ EXPORT_SYMBOL(setup_new_exec); */ int prepare_bprm_creds(struct linux_binprm *bprm) { - if (mutex_lock_interruptible(¤t->cred_guard_mutex)) + if (mutex_lock_interruptible(¤t->signal->cred_guard_mutex)) return -ERESTARTNOINTR; bprm->cred = prepare_exec_creds(); if (likely(bprm->cred)) return 0; - mutex_unlock(¤t->cred_guard_mutex); + mutex_unlock(¤t->signal->cred_guard_mutex); return -ENOMEM; } @@ -1098,7 +1104,7 @@ void free_bprm(struct linux_binprm *bprm) { free_arg_pages(bprm); if (bprm->cred) { - mutex_unlock(¤t->cred_guard_mutex); + mutex_unlock(¤t->signal->cred_guard_mutex); abort_creds(bprm->cred); } kfree(bprm); @@ -1119,13 +1125,13 @@ void install_exec_creds(struct linux_binprm *bprm) * credentials; any time after this it may be unlocked. */ security_bprm_committed_creds(bprm); - mutex_unlock(¤t->cred_guard_mutex); + mutex_unlock(¤t->signal->cred_guard_mutex); } EXPORT_SYMBOL(install_exec_creds); /* * determine how safe it is to execute the proposed program - * - the caller must hold current->cred_guard_mutex to protect against + * - the caller must hold ->cred_guard_mutex to protect against * PTRACE_ATTACH */ int check_unsafe_exec(struct linux_binprm *bprm) @@ -1406,7 +1412,6 @@ int do_execve(const char * filename, if (retval < 0) goto out; - current->flags &= ~PF_KTHREAD; retval = search_binary_handler(bprm,regs); if (retval < 0) goto out; @@ -1459,127 +1464,148 @@ void set_binfmt(struct linux_binfmt *new) EXPORT_SYMBOL(set_binfmt); +static int expand_corename(struct core_name *cn) +{ + char *old_corename = cn->corename; + + cn->size = CORENAME_MAX_SIZE * atomic_inc_return(&call_count); + cn->corename = krealloc(old_corename, cn->size, GFP_KERNEL); + + if (!cn->corename) { + kfree(old_corename); + return -ENOMEM; + } + + return 0; +} + +static int cn_printf(struct core_name *cn, const char *fmt, ...) +{ + char *cur; + int need; + int ret; + va_list arg; + + va_start(arg, fmt); + need = vsnprintf(NULL, 0, fmt, arg); + va_end(arg); + + if (likely(need < cn->size - cn->used - 1)) + goto out_printf; + + ret = expand_corename(cn); + if (ret) + goto expand_fail; + +out_printf: + cur = cn->corename + cn->used; + va_start(arg, fmt); + vsnprintf(cur, need + 1, fmt, arg); + va_end(arg); + cn->used += need; + return 0; + +expand_fail: + return ret; +} + /* format_corename will inspect the pattern parameter, and output a * name into corename, which must have space for at least * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator. */ -static int format_corename(char *corename, long signr) +static int format_corename(struct core_name *cn, long signr) { const struct cred *cred = current_cred(); const char *pat_ptr = core_pattern; int ispipe = (*pat_ptr == '|'); - char *out_ptr = corename; - char *const out_end = corename + CORENAME_MAX_SIZE; - int rc; int pid_in_pattern = 0; + int err = 0; + + cn->size = CORENAME_MAX_SIZE * atomic_read(&call_count); + cn->corename = kmalloc(cn->size, GFP_KERNEL); + cn->used = 0; + + if (!cn->corename) + return -ENOMEM; /* Repeat as long as we have more pattern to process and more output space */ while (*pat_ptr) { if (*pat_ptr != '%') { - if (out_ptr == out_end) + if (*pat_ptr == 0) goto out; - *out_ptr++ = *pat_ptr++; + err = cn_printf(cn, "%c", *pat_ptr++); } else { switch (*++pat_ptr) { + /* single % at the end, drop that */ case 0: goto out; /* Double percent, output one percent */ case '%': - if (out_ptr == out_end) - goto out; - *out_ptr++ = '%'; + err = cn_printf(cn, "%c", '%'); break; /* pid */ case 'p': pid_in_pattern = 1; - rc = snprintf(out_ptr, out_end - out_ptr, - "%d", task_tgid_vnr(current)); - if (rc > out_end - out_ptr) - goto out; - out_ptr += rc; + err = cn_printf(cn, "%d", + task_tgid_vnr(current)); break; /* uid */ case 'u': - rc = snprintf(out_ptr, out_end - out_ptr, - "%d", cred->uid); - if (rc > out_end - out_ptr) - goto out; - out_ptr += rc; + err = cn_printf(cn, "%d", cred->uid); break; /* gid */ case 'g': - rc = snprintf(out_ptr, out_end - out_ptr, - "%d", cred->gid); - if (rc > out_end - out_ptr) - goto out; - out_ptr += rc; + err = cn_printf(cn, "%d", cred->gid); break; /* signal that caused the coredump */ case 's': - rc = snprintf(out_ptr, out_end - out_ptr, - "%ld", signr); - if (rc > out_end - out_ptr) - goto out; - out_ptr += rc; + err = cn_printf(cn, "%ld", signr); break; /* UNIX time of coredump */ case 't': { struct timeval tv; do_gettimeofday(&tv); - rc = snprintf(out_ptr, out_end - out_ptr, - "%lu", tv.tv_sec); - if (rc > out_end - out_ptr) - goto out; - out_ptr += rc; + err = cn_printf(cn, "%lu", tv.tv_sec); break; } /* hostname */ case 'h': down_read(&uts_sem); - rc = snprintf(out_ptr, out_end - out_ptr, - "%s", utsname()->nodename); + err = cn_printf(cn, "%s", + utsname()->nodename); up_read(&uts_sem); - if (rc > out_end - out_ptr) - goto out; - out_ptr += rc; break; /* executable */ case 'e': - rc = snprintf(out_ptr, out_end - out_ptr, - "%s", current->comm); - if (rc > out_end - out_ptr) - goto out; - out_ptr += rc; + err = cn_printf(cn, "%s", current->comm); break; /* core limit size */ case 'c': - rc = snprintf(out_ptr, out_end - out_ptr, - "%lu", rlimit(RLIMIT_CORE)); - if (rc > out_end - out_ptr) - goto out; - out_ptr += rc; + err = cn_printf(cn, "%lu", + rlimit(RLIMIT_CORE)); break; default: break; } ++pat_ptr; } + + if (err) + return err; } + /* Backward compatibility with core_uses_pid: * * If core_pattern does not include a %p (as is the default) * and core_uses_pid is set, then .%pid will be appended to * the filename. Do not do this for piped commands. */ if (!ispipe && !pid_in_pattern && core_uses_pid) { - rc = snprintf(out_ptr, out_end - out_ptr, - ".%d", task_tgid_vnr(current)); - if (rc > out_end - out_ptr) - goto out; - out_ptr += rc; + err = cn_printf(cn, ".%d", task_tgid_vnr(current)); + if (err) + return err; } out: - *out_ptr = 0; return ispipe; } @@ -1856,7 +1882,7 @@ static int umh_pipe_setup(struct subprocess_info *info) void do_coredump(long signr, int exit_code, struct pt_regs *regs) { struct core_state core_state; - char corename[CORENAME_MAX_SIZE + 1]; + struct core_name cn; struct mm_struct *mm = current->mm; struct linux_binfmt * binfmt; const struct cred *old_cred; @@ -1911,7 +1937,13 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) */ clear_thread_flag(TIF_SIGPENDING); - ispipe = format_corename(corename, signr); + ispipe = format_corename(&cn, signr); + + if (ispipe == -ENOMEM) { + printk(KERN_WARNING "format_corename failed\n"); + printk(KERN_WARNING "Aborting core\n"); + goto fail_corename; + } if (ispipe) { int dump_count; @@ -1948,7 +1980,7 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) goto fail_dropcount; } - helper_argv = argv_split(GFP_KERNEL, corename+1, NULL); + helper_argv = argv_split(GFP_KERNEL, cn.corename+1, NULL); if (!helper_argv) { printk(KERN_WARNING "%s failed to allocate memory\n", __func__); @@ -1961,7 +1993,7 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) argv_free(helper_argv); if (retval) { printk(KERN_INFO "Core dump to %s pipe failed\n", - corename); + cn.corename); goto close_fail; } } else { @@ -1970,7 +2002,7 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) if (cprm.limit < binfmt->min_coredump) goto fail_unlock; - cprm.file = filp_open(corename, + cprm.file = filp_open(cn.corename, O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag, 0600); if (IS_ERR(cprm.file)) @@ -2012,6 +2044,8 @@ fail_dropcount: if (ispipe) atomic_dec(&core_dump_count); fail_unlock: + kfree(cn.corename); +fail_corename: coredump_finish(mm); revert_creds(old_cred); fail_creds: diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index b98664275f0..6e07696308d 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1334,12 +1334,7 @@ out_finish: static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_req *req) { - int i; - - for (i = 0; i < req->num_pages; i++) { - struct page *page = req->pages[i]; - page_cache_release(page); - } + release_pages(req->pages, req->num_pages, 0); } static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode, diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 60c2b944d76..79cf7f616bb 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -544,6 +544,34 @@ static unsigned int isofs_get_last_session(struct super_block *sb, s32 session) } /* + * Check if root directory is empty (has less than 3 files). + * + * Used to detect broken CDs where ISO root directory is empty but Joliet root + * directory is OK. If such CD has Rock Ridge extensions, they will be disabled + * (and Joliet used instead) or else no files would be visible. + */ +static bool rootdir_empty(struct super_block *sb, unsigned long block) +{ + int offset = 0, files = 0, de_len; + struct iso_directory_record *de; + struct buffer_head *bh; + + bh = sb_bread(sb, block); + if (!bh) + return true; + while (files < 3) { + de = (struct iso_directory_record *) (bh->b_data + offset); + de_len = *(unsigned char *) de; + if (de_len == 0) + break; + files++; + offset += de_len; + } + brelse(bh); + return files < 3; +} + +/* * Initialize the superblock and read the root inode. * * Note: a check_disk_change() has been done immediately prior @@ -843,6 +871,18 @@ root_found: goto out_no_root; /* + * Fix for broken CDs with Rock Ridge and empty ISO root directory but + * correct Joliet root directory. + */ + if (sbi->s_rock == 1 && joliet_level && + rootdir_empty(s, sbi->s_firstdatazone)) { + printk(KERN_NOTICE + "ISOFS: primary root directory is empty. " + "Disabling Rock Ridge and switching to Joliet."); + sbi->s_rock = 0; + } + + /* * If this disk has both Rock Ridge and Joliet on it, then we * want to use Rock Ridge by default. This can be overridden * by using the norock mount option. There is still one other diff --git a/fs/proc/base.c b/fs/proc/base.c index 9b094c1c846..f3d02ca461e 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -226,7 +226,7 @@ struct mm_struct *mm_for_maps(struct task_struct *task) { struct mm_struct *mm; - if (mutex_lock_killable(&task->cred_guard_mutex)) + if (mutex_lock_killable(&task->signal->cred_guard_mutex)) return NULL; mm = get_task_mm(task); @@ -235,7 +235,7 @@ struct mm_struct *mm_for_maps(struct task_struct *task) mmput(mm); mm = NULL; } - mutex_unlock(&task->cred_guard_mutex); + mutex_unlock(&task->signal->cred_guard_mutex); return mm; } @@ -2354,14 +2354,14 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf, goto out_free; /* Guard against adverse ptrace interaction */ - length = mutex_lock_interruptible(&task->cred_guard_mutex); + length = mutex_lock_interruptible(&task->signal->cred_guard_mutex); if (length < 0) goto out_free; length = security_setprocattr(task, (char*)file->f_path.dentry->d_name.name, (void*)page, count); - mutex_unlock(&task->cred_guard_mutex); + mutex_unlock(&task->signal->cred_guard_mutex); out_free: free_page((unsigned long) page); out: diff --git a/fs/proc/softirqs.c b/fs/proc/softirqs.c index 1807c2419f1..37994737c98 100644 --- a/fs/proc/softirqs.c +++ b/fs/proc/softirqs.c @@ -10,13 +10,13 @@ static int show_softirqs(struct seq_file *p, void *v) { int i, j; - seq_printf(p, " "); + seq_printf(p, " "); for_each_possible_cpu(i) seq_printf(p, "CPU%-8d", i); seq_printf(p, "\n"); for (i = 0; i < NR_SOFTIRQS; i++) { - seq_printf(p, "%8s:", softirq_to_name[i]); + seq_printf(p, "%12s:", softirq_to_name[i]); for_each_possible_cpu(j) seq_printf(p, " %10u", kstat_softirqs_cpu(i, j)); seq_printf(p, "\n"); diff --git a/fs/proc/stat.c b/fs/proc/stat.c index bf31b03fc27..e15a19c93ba 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c @@ -31,7 +31,6 @@ static int show_stat(struct seq_file *p, void *v) u64 sum_softirq = 0; unsigned int per_softirq_sums[NR_SOFTIRQS] = {0}; struct timespec boottime; - unsigned int per_irq_sum; user = nice = system = idle = iowait = irq = softirq = steal = cputime64_zero; @@ -52,9 +51,7 @@ static int show_stat(struct seq_file *p, void *v) guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest); guest_nice = cputime64_add(guest_nice, kstat_cpu(i).cpustat.guest_nice); - for_each_irq_nr(j) { - sum += kstat_irqs_cpu(j, i); - } + sum += kstat_cpu_irqs_sum(i); sum += arch_irq_stat_cpu(i); for (j = 0; j < NR_SOFTIRQS; j++) { @@ -110,13 +107,8 @@ static int show_stat(struct seq_file *p, void *v) seq_printf(p, "intr %llu", (unsigned long long)sum); /* sum again ? it could be updated? */ - for_each_irq_nr(j) { - per_irq_sum = 0; - for_each_possible_cpu(i) - per_irq_sum += kstat_irqs_cpu(j, i); - - seq_printf(p, " %u", per_irq_sum); - } + for_each_irq_nr(j) + seq_printf(p, " %u", kstat_irqs(j)); seq_printf(p, "\nctxt %llu\n" diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 871e25ed006..da6b01d70f0 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -327,6 +327,7 @@ struct mem_size_stats { unsigned long private_clean; unsigned long private_dirty; unsigned long referenced; + unsigned long anonymous; unsigned long swap; u64 pss; }; @@ -357,6 +358,9 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, if (!page) continue; + if (PageAnon(page)) + mss->anonymous += PAGE_SIZE; + mss->resident += PAGE_SIZE; /* Accumulate the size in pages that have been accessed. */ if (pte_young(ptent) || PageReferenced(page)) @@ -410,6 +414,7 @@ static int show_smap(struct seq_file *m, void *v) "Private_Clean: %8lu kB\n" "Private_Dirty: %8lu kB\n" "Referenced: %8lu kB\n" + "Anonymous: %8lu kB\n" "Swap: %8lu kB\n" "KernelPageSize: %8lu kB\n" "MMUPageSize: %8lu kB\n", @@ -421,6 +426,7 @@ static int show_smap(struct seq_file *m, void *v) mss.private_clean >> 10, mss.private_dirty >> 10, mss.referenced >> 10, + mss.anonymous >> 10, mss.swap >> 10, vma_kernel_pagesize(vma) >> 10, vma_mmu_pagesize(vma) >> 10); diff --git a/fs/select.c b/fs/select.c index 500a669f779..b7b10aa3086 100644 --- a/fs/select.c +++ b/fs/select.c @@ -67,7 +67,7 @@ static long __estimate_accuracy(struct timespec *tv) return slack; } -static long estimate_accuracy(struct timespec *tv) +long select_estimate_accuracy(struct timespec *tv) { unsigned long ret; struct timespec now; @@ -417,7 +417,7 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) } if (end_time && !timed_out) - slack = estimate_accuracy(end_time); + slack = select_estimate_accuracy(end_time); retval = 0; for (;;) { @@ -769,7 +769,7 @@ static int do_poll(unsigned int nfds, struct poll_list *list, } if (end_time && !timed_out) - slack = estimate_accuracy(end_time); + slack = select_estimate_accuracy(end_time); for (;;) { struct poll_list *walk; |