From 7bb67439bf6bd3782f07f1d7be1e63406453d5de Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sun, 31 Aug 2008 08:05:58 -0700 Subject: select: Introduce a hrtimeout function This patch adds a schedule_hrtimeout() function, to be used by select() and poll() in a later patch. This function works similar to schedule_timeout() in most ways, but takes a timespec rather than jiffies. With a lot of contributions/fixes from Thomas Signed-off-by: Arjan van de Ven Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 6d93dce61cb..becd17db1a1 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -346,6 +346,8 @@ extern long hrtimer_nanosleep_restart(struct restart_block *restart_block); extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *tsk); +extern int schedule_hrtimeout(ktime_t *expires, const enum hrtimer_mode mode); + /* Soft interrupt function to run the hrtimer queues: */ extern void hrtimer_run_queues(void); extern void hrtimer_run_pending(void); -- cgit v1.2.3-70-g09d2 From df0cc0539b4127bd02f64de2c335b4af1fdb3845 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 31 Aug 2008 08:09:53 -0700 Subject: select: add a timespec_add_safe() function For the select() rework, it's important to be able to add timespec structures in an overflow-safe manner. This patch adds a timespec_add_safe() function for this which is similar in operation to ktime_add_safe(), but works on a struct timespec. Signed-off-by: Thomas Gleixner Signed-off-by: Arjan van de Ven --- include/linux/time.h | 4 ++++ kernel/time.c | 18 ++++++++++++++++++ 2 files changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/time.h b/include/linux/time.h index e15206a7e82..72697647848 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -38,6 +38,8 @@ struct timezone { #define NSEC_PER_SEC 1000000000L #define FSEC_PER_SEC 1000000000000000L +#define TIME_T_MAX (time_t)((1UL << ((sizeof(time_t) << 3) - 1)) - 1) + static inline int timespec_equal(const struct timespec *a, const struct timespec *b) { @@ -72,6 +74,8 @@ extern unsigned long mktime(const unsigned int year, const unsigned int mon, const unsigned int min, const unsigned int sec); extern void set_normalized_timespec(struct timespec *ts, time_t sec, long nsec); +extern struct timespec timespec_add_safe(const struct timespec lhs, + const struct timespec rhs); /* * sub = lhs - rhs, in normalized form diff --git a/kernel/time.c b/kernel/time.c index 6a08660b4fa..d63a4336fad 100644 --- a/kernel/time.c +++ b/kernel/time.c @@ -669,3 +669,21 @@ EXPORT_SYMBOL(get_jiffies_64); #endif EXPORT_SYMBOL(jiffies); + +/* + * Add two timespec values and do a safety check for overflow. + * It's assumed that both values are valid (>= 0) + */ +struct timespec timespec_add_safe(const struct timespec lhs, + const struct timespec rhs) +{ + struct timespec res; + + set_normalized_timespec(&res, lhs.tv_sec + rhs.tv_sec, + lhs.tv_nsec + rhs.tv_nsec); + + if (res.tv_sec < lhs.tv_sec || res.tv_sec < rhs.tv_sec) + res.tv_sec = TIME_T_MAX; + + return res; +} -- cgit v1.2.3-70-g09d2 From b773ad40aca5bd755ba886620842f16e8fef6d75 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 31 Aug 2008 08:16:57 -0700 Subject: select: add poll_select_set_timeout() and poll_select_copy_remaining() helpers This patch adds 2 helpers that will be used for the hrtimer based select/poll: poll_select_set_timeout() is a helper that takes a timeout (as a second, nanosecond pair) and turns that into a "struct timespec" that represents the absolute end time. This is a common operation in the many select() and poll() variants and needs various, common, sanity checks. poll_select_copy_remaining() is a helper that takes care of copying the remaining time to userspace, as select(), pselect() and ppoll() do. This function comes in both a natural and a compat implementation (due to datastructure differences). Signed-off-by: Thomas Gleixner Signed-off-by: Arjan van de Ven --- fs/compat.c | 51 +++++++++++++++++++++++++++++++++++ fs/select.c | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/poll.h | 2 ++ 3 files changed, 128 insertions(+) (limited to 'include/linux') diff --git a/fs/compat.c b/fs/compat.c index 075d0509970..424767c954a 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -1436,6 +1436,57 @@ out_ret: #define __COMPAT_NFDBITS (8 * sizeof(compat_ulong_t)) +static int poll_select_copy_remaining(struct timespec *end_time, void __user *p, + int timeval, int ret) +{ + struct timespec ts; + + if (!p) + return ret; + + if (current->personality & STICKY_TIMEOUTS) + goto sticky; + + /* No update for zero timeout */ + if (!end_time->tv_sec && !end_time->tv_nsec) + return ret; + + ktime_get_ts(&ts); + ts = timespec_sub(*end_time, ts); + if (ts.tv_sec < 0) + ts.tv_sec = ts.tv_nsec = 0; + + if (timeval) { + struct compat_timeval rtv; + + rtv.tv_sec = ts.tv_sec; + rtv.tv_usec = ts.tv_nsec / NSEC_PER_USEC; + + if (!copy_to_user(p, &rtv, sizeof(rtv))) + return ret; + } else { + struct compat_timespec rts; + + rts.tv_sec = ts.tv_sec; + rts.tv_nsec = ts.tv_nsec; + + if (!copy_to_user(p, &rts, sizeof(rts))) + return ret; + } + /* + * If an application puts its timeval in read-only memory, we + * don't want the Linux-specific update to the timeval to + * cause a fault after the select has completed + * successfully. However, because we're not updating the + * timeval, we can't restart the system call. + */ + +sticky: + if (ret == -ERESTARTNOHAND) + ret = -EINTR; + return ret; +} + /* * Ooo, nasty. We need here to frob 32-bit unsigned longs to * 64-bit unsigned longs. diff --git a/fs/select.c b/fs/select.c index da0e88201c3..1180a620778 100644 --- a/fs/select.c +++ b/fs/select.c @@ -130,6 +130,81 @@ static void __pollwait(struct file *filp, wait_queue_head_t *wait_address, add_wait_queue(wait_address, &entry->wait); } +/** + * poll_select_set_timeout - helper function to setup the timeout value + * @to: pointer to timespec variable for the final timeout + * @sec: seconds (from user space) + * @nsec: nanoseconds (from user space) + * + * Note, we do not use a timespec for the user space value here, That + * way we can use the function for timeval and compat interfaces as well. + * + * Returns -EINVAL if sec/nsec are not normalized. Otherwise 0. + */ +int poll_select_set_timeout(struct timespec *to, long sec, long nsec) +{ + struct timespec ts = {.tv_sec = sec, .tv_nsec = nsec}; + + if (!timespec_valid(&ts)) + return -EINVAL; + + /* Optimize for the zero timeout value here */ + if (!sec && !nsec) { + to->tv_sec = to->tv_nsec = 0; + } else { + ktime_get_ts(to); + *to = timespec_add_safe(*to, ts); + } + return 0; +} + +static int poll_select_copy_remaining(struct timespec *end_time, void __user *p, + int timeval, int ret) +{ + struct timespec rts; + struct timeval rtv; + + if (!p) + return ret; + + if (current->personality & STICKY_TIMEOUTS) + goto sticky; + + /* No update for zero timeout */ + if (!end_time->tv_sec && !end_time->tv_nsec) + return ret; + + ktime_get_ts(&rts); + rts = timespec_sub(*end_time, rts); + if (rts.tv_sec < 0) + rts.tv_sec = rts.tv_nsec = 0; + + if (timeval) { + rtv.tv_sec = rts.tv_sec; + rtv.tv_usec = rts.tv_nsec / NSEC_PER_USEC; + + if (!copy_to_user(p, &rtv, sizeof(rtv))) + return ret; + + } else if (!copy_to_user(p, &rts, sizeof(rts))) + return ret; + + /* + * If an application puts its timeval in read-only memory, we + * don't want the Linux-specific update to the timeval to + * cause a fault after the select has completed + * successfully. However, because we're not updating the + * timeval, we can't restart the system call. + */ + +sticky: + if (ret == -ERESTARTNOHAND) + ret = -EINTR; + return ret; +} + + + #define FDS_IN(fds, n) (fds->in + n) #define FDS_OUT(fds, n) (fds->out + n) #define FDS_EX(fds, n) (fds->ex + n) diff --git a/include/linux/poll.h b/include/linux/poll.h index ef453828877..f65de5128a9 100644 --- a/include/linux/poll.h +++ b/include/linux/poll.h @@ -120,6 +120,8 @@ extern int do_sys_poll(struct pollfd __user * ufds, unsigned int nfds, extern int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, s64 *timeout); +extern int poll_select_set_timeout(struct timespec *to, long sec, long nsec); + #endif /* KERNEL */ #endif /* _LINUX_POLL_H */ -- cgit v1.2.3-70-g09d2 From be5dad20a55e054a35dac7f6f5f184dc72b379b4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 31 Aug 2008 08:19:15 -0700 Subject: select: add a poll specific struct to the restart_block union with hrtimer poll/select, the signal restart data no longer is a single long representing a jiffies count, but it becomes a second/nanosecond pair that also needs to encode if there was a timeout at all or not. This patch adds a struct to the restart_block union for this purpose Signed-off-by: Thomas Gleixner Signed-off-by: Arjan van de Ven --- include/linux/thread_info.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index 38a56477f27..e6b820f8b56 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -38,6 +38,14 @@ struct restart_block { #endif u64 expires; } nanosleep; + /* For poll */ + struct { + struct pollfd __user *ufds; + int nfds; + int has_timeout; + unsigned long tv_sec; + unsigned long tv_nsec; + } poll; }; }; -- cgit v1.2.3-70-g09d2 From 8ff3e8e85fa6c312051134b3953e397feb639f51 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sun, 31 Aug 2008 08:26:40 -0700 Subject: select: switch select() and poll() over to hrtimers With lots of help, input and cleanups from Thomas Gleixner This patch switches select() and poll() over to hrtimers. The core of the patch is replacing the "s64 timeout" with a "struct timespec end_time" in all the plumbing. But most of the diffstat comes from using the just introduced helpers: poll_select_set_timeout poll_select_copy_remaining timespec_add_safe which make manipulating the timespec easier and less error-prone. Signed-off-by: Arjan van de Ven Signed-off-by: Thomas Gleixner --- fs/compat.c | 136 ++++---------------------- fs/select.c | 263 +++++++++++++++++---------------------------------- include/linux/poll.h | 6 +- 3 files changed, 111 insertions(+), 294 deletions(-) (limited to 'include/linux') diff --git a/fs/compat.c b/fs/compat.c index 424767c954a..133ed7f5d68 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -1568,7 +1568,8 @@ int compat_set_fd_set(unsigned long nr, compat_ulong_t __user *ufdset, ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1) int compat_core_sys_select(int n, compat_ulong_t __user *inp, - compat_ulong_t __user *outp, compat_ulong_t __user *exp, s64 *timeout) + compat_ulong_t __user *outp, compat_ulong_t __user *exp, + struct timespec *end_time) { fd_set_bits fds; void *bits; @@ -1615,7 +1616,7 @@ int compat_core_sys_select(int n, compat_ulong_t __user *inp, zero_fd_set(n, fds.res_out); zero_fd_set(n, fds.res_ex); - ret = do_select(n, &fds, timeout); + ret = do_select(n, &fds, end_time); if (ret < 0) goto out; @@ -1641,7 +1642,7 @@ asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp, compat_ulong_t __user *exp, struct compat_timeval __user *tvp) { - s64 timeout = -1; + struct timespec end_time, *to = NULL; struct compat_timeval tv; int ret; @@ -1649,43 +1650,14 @@ asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp, if (copy_from_user(&tv, tvp, sizeof(tv))) return -EFAULT; - if (tv.tv_sec < 0 || tv.tv_usec < 0) + to = &end_time; + if (poll_select_set_timeout(to, tv.tv_sec, + tv.tv_usec * NSEC_PER_USEC)) return -EINVAL; - - /* Cast to u64 to make GCC stop complaining */ - if ((u64)tv.tv_sec >= (u64)MAX_INT64_SECONDS) - timeout = -1; /* infinite */ - else { - timeout = DIV_ROUND_UP(tv.tv_usec, 1000000/HZ); - timeout += tv.tv_sec * HZ; - } } - ret = compat_core_sys_select(n, inp, outp, exp, &timeout); - - if (tvp) { - struct compat_timeval rtv; - - if (current->personality & STICKY_TIMEOUTS) - goto sticky; - rtv.tv_usec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)); - rtv.tv_sec = timeout; - if (compat_timeval_compare(&rtv, &tv) >= 0) - rtv = tv; - if (copy_to_user(tvp, &rtv, sizeof(rtv))) { -sticky: - /* - * If an application puts its timeval in read-only - * memory, we don't want the Linux-specific update to - * the timeval to cause a fault after the select has - * completed successfully. However, because we're not - * updating the timeval, we can't restart the system - * call. - */ - if (ret == -ERESTARTNOHAND) - ret = -EINTR; - } - } + ret = compat_core_sys_select(n, inp, outp, exp, to); + ret = poll_select_copy_remaining(&end_time, tvp, 1, ret); return ret; } @@ -1698,15 +1670,16 @@ asmlinkage long compat_sys_pselect7(int n, compat_ulong_t __user *inp, { compat_sigset_t ss32; sigset_t ksigmask, sigsaved; - s64 timeout = MAX_SCHEDULE_TIMEOUT; struct compat_timespec ts; + struct timespec end_time, *to = NULL; int ret; if (tsp) { if (copy_from_user(&ts, tsp, sizeof(ts))) return -EFAULT; - if (ts.tv_sec < 0 || ts.tv_nsec < 0) + to = &end_time; + if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec)) return -EINVAL; } @@ -1721,51 +1694,8 @@ asmlinkage long compat_sys_pselect7(int n, compat_ulong_t __user *inp, sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); } - do { - if (tsp) { - if ((unsigned long)ts.tv_sec < MAX_SELECT_SECONDS) { - timeout = DIV_ROUND_UP(ts.tv_nsec, 1000000000/HZ); - timeout += ts.tv_sec * (unsigned long)HZ; - ts.tv_sec = 0; - ts.tv_nsec = 0; - } else { - ts.tv_sec -= MAX_SELECT_SECONDS; - timeout = MAX_SELECT_SECONDS * HZ; - } - } - - ret = compat_core_sys_select(n, inp, outp, exp, &timeout); - - } while (!ret && !timeout && tsp && (ts.tv_sec || ts.tv_nsec)); - - if (tsp) { - struct compat_timespec rts; - - if (current->personality & STICKY_TIMEOUTS) - goto sticky; - - rts.tv_sec = timeout / HZ; - rts.tv_nsec = (timeout % HZ) * (NSEC_PER_SEC/HZ); - if (rts.tv_nsec >= NSEC_PER_SEC) { - rts.tv_sec++; - rts.tv_nsec -= NSEC_PER_SEC; - } - if (compat_timespec_compare(&rts, &ts) >= 0) - rts = ts; - if (copy_to_user(tsp, &rts, sizeof(rts))) { -sticky: - /* - * If an application puts its timeval in read-only - * memory, we don't want the Linux-specific update to - * the timeval to cause a fault after the select has - * completed successfully. However, because we're not - * updating the timeval, we can't restart the system - * call. - */ - if (ret == -ERESTARTNOHAND) - ret = -EINTR; - } - } + ret = compat_core_sys_select(n, inp, outp, exp, to); + ret = poll_select_copy_remaining(&end_time, tsp, 0, ret); if (ret == -ERESTARTNOHAND) { /* @@ -1810,18 +1740,16 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds, compat_sigset_t ss32; sigset_t ksigmask, sigsaved; struct compat_timespec ts; - s64 timeout = -1; + struct timespec end_time, *to = NULL; int ret; if (tsp) { if (copy_from_user(&ts, tsp, sizeof(ts))) return -EFAULT; - /* We assume that ts.tv_sec is always lower than - the number of seconds that can be expressed in - an s64. Otherwise the compiler bitches at us */ - timeout = DIV_ROUND_UP(ts.tv_nsec, 1000000000/HZ); - timeout += ts.tv_sec * HZ; + to = &end_time; + if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec)) + return -EINVAL; } if (sigmask) { @@ -1835,7 +1763,7 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds, sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); } - ret = do_sys_poll(ufds, nfds, &timeout); + ret = do_sys_poll(ufds, nfds, to); /* We can restart this syscall, usually */ if (ret == -EINTR) { @@ -1853,31 +1781,7 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds, } else if (sigmask) sigprocmask(SIG_SETMASK, &sigsaved, NULL); - if (tsp && timeout >= 0) { - struct compat_timespec rts; - - if (current->personality & STICKY_TIMEOUTS) - goto sticky; - /* Yes, we know it's actually an s64, but it's also positive. */ - rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) * - 1000; - rts.tv_sec = timeout; - if (compat_timespec_compare(&rts, &ts) >= 0) - rts = ts; - if (copy_to_user(tsp, &rts, sizeof(rts))) { -sticky: - /* - * If an application puts its timeval in read-only - * memory, we don't want the Linux-specific update to - * the timeval to cause a fault after the select has - * completed successfully. However, because we're not - * updating the timeval, we can't restart the system - * call. - */ - if (ret == -ERESTARTNOHAND && timeout >= 0) - ret = -EINTR; - } - } + ret = poll_select_copy_remaining(&end_time, tsp, 0, ret); return ret; } diff --git a/fs/select.c b/fs/select.c index 1180a620778..f6dceb56793 100644 --- a/fs/select.c +++ b/fs/select.c @@ -24,6 +24,7 @@ #include #include #include +#include #include @@ -203,8 +204,6 @@ sticky: return ret; } - - #define FDS_IN(fds, n) (fds->in + n) #define FDS_OUT(fds, n) (fds->out + n) #define FDS_EX(fds, n) (fds->ex + n) @@ -257,11 +256,12 @@ get_max: #define POLLOUT_SET (POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR) #define POLLEX_SET (POLLPRI) -int do_select(int n, fd_set_bits *fds, s64 *timeout) +int do_select(int n, fd_set_bits *fds, struct timespec *end_time) { + ktime_t expire, *to = NULL; struct poll_wqueues table; poll_table *wait; - int retval, i; + int retval, i, timed_out = 0; rcu_read_lock(); retval = max_select_fd(n, fds); @@ -273,12 +273,14 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout) poll_initwait(&table); wait = &table.pt; - if (!*timeout) + if (end_time && !end_time->tv_sec && !end_time->tv_nsec) { wait = NULL; + timed_out = 1; + } + retval = 0; for (;;) { unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp; - long __timeout; set_current_state(TASK_INTERRUPTIBLE); @@ -334,27 +336,25 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout) cond_resched(); } wait = NULL; - if (retval || !*timeout || signal_pending(current)) + if (retval || timed_out || signal_pending(current)) break; if (table.error) { retval = table.error; break; } - if (*timeout < 0) { - /* Wait indefinitely */ - __timeout = MAX_SCHEDULE_TIMEOUT; - } else if (unlikely(*timeout >= (s64)MAX_SCHEDULE_TIMEOUT - 1)) { - /* Wait for longer than MAX_SCHEDULE_TIMEOUT. Do it in a loop */ - __timeout = MAX_SCHEDULE_TIMEOUT - 1; - *timeout -= __timeout; - } else { - __timeout = *timeout; - *timeout = 0; + /* + * If this is the first loop and we have a timeout + * given, then we convert to ktime_t and set the to + * pointer to the expiry value. + */ + if (end_time && !to) { + expire = timespec_to_ktime(*end_time); + to = &expire; } - __timeout = schedule_timeout(__timeout); - if (*timeout >= 0) - *timeout += __timeout; + + if (!schedule_hrtimeout(to, HRTIMER_MODE_ABS)) + timed_out = 1; } __set_current_state(TASK_RUNNING); @@ -375,7 +375,7 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout) ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1) int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp, - fd_set __user *exp, s64 *timeout) + fd_set __user *exp, struct timespec *end_time) { fd_set_bits fds; void *bits; @@ -426,7 +426,7 @@ int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp, zero_fd_set(n, fds.res_out); zero_fd_set(n, fds.res_ex); - ret = do_select(n, &fds, timeout); + ret = do_select(n, &fds, end_time); if (ret < 0) goto out; @@ -452,7 +452,7 @@ out_nofds: asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct timeval __user *tvp) { - s64 timeout = -1; + struct timespec end_time, *to = NULL; struct timeval tv; int ret; @@ -460,43 +460,14 @@ asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp, if (copy_from_user(&tv, tvp, sizeof(tv))) return -EFAULT; - if (tv.tv_sec < 0 || tv.tv_usec < 0) + to = &end_time; + if (poll_select_set_timeout(to, tv.tv_sec, + tv.tv_usec * NSEC_PER_USEC)) return -EINVAL; - - /* Cast to u64 to make GCC stop complaining */ - if ((u64)tv.tv_sec >= (u64)MAX_INT64_SECONDS) - timeout = -1; /* infinite */ - else { - timeout = DIV_ROUND_UP(tv.tv_usec, USEC_PER_SEC/HZ); - timeout += tv.tv_sec * HZ; - } } - ret = core_sys_select(n, inp, outp, exp, &timeout); - - if (tvp) { - struct timeval rtv; - - if (current->personality & STICKY_TIMEOUTS) - goto sticky; - rtv.tv_usec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)); - rtv.tv_sec = timeout; - if (timeval_compare(&rtv, &tv) >= 0) - rtv = tv; - if (copy_to_user(tvp, &rtv, sizeof(rtv))) { -sticky: - /* - * If an application puts its timeval in read-only - * memory, we don't want the Linux-specific update to - * the timeval to cause a fault after the select has - * completed successfully. However, because we're not - * updating the timeval, we can't restart the system - * call. - */ - if (ret == -ERESTARTNOHAND) - ret = -EINTR; - } - } + ret = core_sys_select(n, inp, outp, exp, to); + ret = poll_select_copy_remaining(&end_time, tvp, 1, ret); return ret; } @@ -506,25 +477,17 @@ asmlinkage long sys_pselect7(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct timespec __user *tsp, const sigset_t __user *sigmask, size_t sigsetsize) { - s64 timeout = MAX_SCHEDULE_TIMEOUT; sigset_t ksigmask, sigsaved; - struct timespec ts; + struct timespec ts, end_time, *to = NULL; int ret; if (tsp) { if (copy_from_user(&ts, tsp, sizeof(ts))) return -EFAULT; - if (ts.tv_sec < 0 || ts.tv_nsec < 0) + to = &end_time; + if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec)) return -EINVAL; - - /* Cast to u64 to make GCC stop complaining */ - if ((u64)ts.tv_sec >= (u64)MAX_INT64_SECONDS) - timeout = -1; /* infinite */ - else { - timeout = DIV_ROUND_UP(ts.tv_nsec, NSEC_PER_SEC/HZ); - timeout += ts.tv_sec * HZ; - } } if (sigmask) { @@ -538,32 +501,8 @@ asmlinkage long sys_pselect7(int n, fd_set __user *inp, fd_set __user *outp, sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); } - ret = core_sys_select(n, inp, outp, exp, &timeout); - - if (tsp) { - struct timespec rts; - - if (current->personality & STICKY_TIMEOUTS) - goto sticky; - rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) * - 1000; - rts.tv_sec = timeout; - if (timespec_compare(&rts, &ts) >= 0) - rts = ts; - if (copy_to_user(tsp, &rts, sizeof(rts))) { -sticky: - /* - * If an application puts its timeval in read-only - * memory, we don't want the Linux-specific update to - * the timeval to cause a fault after the select has - * completed successfully. However, because we're not - * updating the timeval, we can't restart the system - * call. - */ - if (ret == -ERESTARTNOHAND) - ret = -EINTR; - } - } + ret = core_sys_select(n, inp, outp, exp, &end_time); + ret = poll_select_copy_remaining(&end_time, tsp, 0, ret); if (ret == -ERESTARTNOHAND) { /* @@ -649,18 +588,20 @@ static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait) } static int do_poll(unsigned int nfds, struct poll_list *list, - struct poll_wqueues *wait, s64 *timeout) + struct poll_wqueues *wait, struct timespec *end_time) { - int count = 0; poll_table* pt = &wait->pt; + ktime_t expire, *to = NULL; + int timed_out = 0, count = 0; /* Optimise the no-wait case */ - if (!(*timeout)) + if (end_time && !end_time->tv_sec && !end_time->tv_nsec) { pt = NULL; + timed_out = 1; + } for (;;) { struct poll_list *walk; - long __timeout; set_current_state(TASK_INTERRUPTIBLE); for (walk = list; walk != NULL; walk = walk->next) { @@ -692,27 +633,21 @@ static int do_poll(unsigned int nfds, struct poll_list *list, if (signal_pending(current)) count = -EINTR; } - if (count || !*timeout) + if (count || timed_out) break; - if (*timeout < 0) { - /* Wait indefinitely */ - __timeout = MAX_SCHEDULE_TIMEOUT; - } else if (unlikely(*timeout >= (s64)MAX_SCHEDULE_TIMEOUT-1)) { - /* - * Wait for longer than MAX_SCHEDULE_TIMEOUT. Do it in - * a loop - */ - __timeout = MAX_SCHEDULE_TIMEOUT - 1; - *timeout -= __timeout; - } else { - __timeout = *timeout; - *timeout = 0; + /* + * If this is the first loop and we have a timeout + * given, then we convert to ktime_t and set the to + * pointer to the expiry value. + */ + if (end_time && !to) { + expire = timespec_to_ktime(*end_time); + to = &expire; } - __timeout = schedule_timeout(__timeout); - if (*timeout >= 0) - *timeout += __timeout; + if (!schedule_hrtimeout(to, HRTIMER_MODE_ABS)) + timed_out = 1; } __set_current_state(TASK_RUNNING); return count; @@ -721,7 +656,8 @@ static int do_poll(unsigned int nfds, struct poll_list *list, #define N_STACK_PPS ((sizeof(stack_pps) - sizeof(struct poll_list)) / \ sizeof(struct pollfd)) -int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, s64 *timeout) +int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, + struct timespec *end_time) { struct poll_wqueues table; int err = -EFAULT, fdcount, len, size; @@ -761,7 +697,7 @@ int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, s64 *timeout) } poll_initwait(&table); - fdcount = do_poll(nfds, head, &table, timeout); + fdcount = do_poll(nfds, head, &table, end_time); poll_freewait(&table); for (walk = head; walk; walk = walk->next) { @@ -787,16 +723,21 @@ out_fds: static long do_restart_poll(struct restart_block *restart_block) { - struct pollfd __user *ufds = (struct pollfd __user*)restart_block->arg0; - int nfds = restart_block->arg1; - s64 timeout = ((s64)restart_block->arg3<<32) | (s64)restart_block->arg2; + struct pollfd __user *ufds = restart_block->poll.ufds; + int nfds = restart_block->poll.nfds; + struct timespec *to = NULL, end_time; int ret; - ret = do_sys_poll(ufds, nfds, &timeout); + if (restart_block->poll.has_timeout) { + end_time.tv_sec = restart_block->poll.tv_sec; + end_time.tv_nsec = restart_block->poll.tv_nsec; + to = &end_time; + } + + ret = do_sys_poll(ufds, nfds, to); + if (ret == -EINTR) { restart_block->fn = do_restart_poll; - restart_block->arg2 = timeout & 0xFFFFFFFF; - restart_block->arg3 = (u64)timeout >> 32; ret = -ERESTART_RESTARTBLOCK; } return ret; @@ -805,31 +746,32 @@ static long do_restart_poll(struct restart_block *restart_block) asmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds, long timeout_msecs) { - s64 timeout_jiffies; + struct timespec end_time, *to = NULL; int ret; - if (timeout_msecs > 0) { -#if HZ > 1000 - /* We can only overflow if HZ > 1000 */ - if (timeout_msecs / 1000 > (s64)0x7fffffffffffffffULL / (s64)HZ) - timeout_jiffies = -1; - else -#endif - timeout_jiffies = msecs_to_jiffies(timeout_msecs) + 1; - } else { - /* Infinite (< 0) or no (0) timeout */ - timeout_jiffies = timeout_msecs; + if (timeout_msecs >= 0) { + to = &end_time; + poll_select_set_timeout(to, timeout_msecs / MSEC_PER_SEC, + NSEC_PER_MSEC * (timeout_msecs % MSEC_PER_SEC)); } - ret = do_sys_poll(ufds, nfds, &timeout_jiffies); + ret = do_sys_poll(ufds, nfds, to); + if (ret == -EINTR) { struct restart_block *restart_block; + restart_block = ¤t_thread_info()->restart_block; restart_block->fn = do_restart_poll; - restart_block->arg0 = (unsigned long)ufds; - restart_block->arg1 = nfds; - restart_block->arg2 = timeout_jiffies & 0xFFFFFFFF; - restart_block->arg3 = (u64)timeout_jiffies >> 32; + restart_block->poll.ufds = ufds; + restart_block->poll.nfds = nfds; + + if (timeout_msecs >= 0) { + restart_block->poll.tv_sec = end_time.tv_sec; + restart_block->poll.tv_nsec = end_time.tv_nsec; + restart_block->poll.has_timeout = 1; + } else + restart_block->poll.has_timeout = 0; + ret = -ERESTART_RESTARTBLOCK; } return ret; @@ -841,21 +783,16 @@ asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds, size_t sigsetsize) { sigset_t ksigmask, sigsaved; - struct timespec ts; - s64 timeout = -1; + struct timespec ts, end_time, *to = NULL; int ret; if (tsp) { if (copy_from_user(&ts, tsp, sizeof(ts))) return -EFAULT; - /* Cast to u64 to make GCC stop complaining */ - if ((u64)ts.tv_sec >= (u64)MAX_INT64_SECONDS) - timeout = -1; /* infinite */ - else { - timeout = DIV_ROUND_UP(ts.tv_nsec, NSEC_PER_SEC/HZ); - timeout += ts.tv_sec * HZ; - } + to = &end_time; + if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec)) + return -EINVAL; } if (sigmask) { @@ -869,7 +806,7 @@ asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds, sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); } - ret = do_sys_poll(ufds, nfds, &timeout); + ret = do_sys_poll(ufds, nfds, to); /* We can restart this syscall, usually */ if (ret == -EINTR) { @@ -887,31 +824,7 @@ asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds, } else if (sigmask) sigprocmask(SIG_SETMASK, &sigsaved, NULL); - if (tsp && timeout >= 0) { - struct timespec rts; - - if (current->personality & STICKY_TIMEOUTS) - goto sticky; - /* Yes, we know it's actually an s64, but it's also positive. */ - rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) * - 1000; - rts.tv_sec = timeout; - if (timespec_compare(&rts, &ts) >= 0) - rts = ts; - if (copy_to_user(tsp, &rts, sizeof(rts))) { - sticky: - /* - * If an application puts its timeval in read-only - * memory, we don't want the Linux-specific update to - * the timeval to cause a fault after the select has - * completed successfully. However, because we're not - * updating the timeval, we can't restart the system - * call. - */ - if (ret == -ERESTARTNOHAND && timeout >= 0) - ret = -EINTR; - } - } + ret = poll_select_copy_remaining(&end_time, tsp, 0, ret); return ret; } diff --git a/include/linux/poll.h b/include/linux/poll.h index f65de5128a9..badd98ab06f 100644 --- a/include/linux/poll.h +++ b/include/linux/poll.h @@ -114,11 +114,11 @@ void zero_fd_set(unsigned long nr, unsigned long *fdset) #define MAX_INT64_SECONDS (((s64)(~((u64)0)>>1)/HZ)-1) -extern int do_select(int n, fd_set_bits *fds, s64 *timeout); +extern int do_select(int n, fd_set_bits *fds, struct timespec *end_time); extern int do_sys_poll(struct pollfd __user * ufds, unsigned int nfds, - s64 *timeout); + struct timespec *end_time); extern int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp, - fd_set __user *exp, s64 *timeout); + fd_set __user *exp, struct timespec *end_time); extern int poll_select_set_timeout(struct timespec *to, long sec, long nsec); -- cgit v1.2.3-70-g09d2 From 63ca243b271f5b44e0b1057003cf498b6d0fadf7 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Mon, 1 Sep 2008 14:35:02 -0700 Subject: hrtimer: add abstraction functions for accessing the "expires" member In order to be able to turn hrtimers into range based, we need to provide accessor functions for getting to the "expires" ktime_t member of the struct hrtimer. This patch adds a set of accessors for this purpose: * hrtimer_set_expires * hrtimer_set_expires_tv64 * hrtimer_add_expires * hrtimer_add_expires_ns * hrtimer_get_expires * hrtimer_get_expires_tv64 * hrtimer_get_expires_ns * hrtimer_expires_remaining * hrtimer_start_expires No users of these new accessors are added yet; these follow in later patches. Hopefully this patch can even go into 2.6.27-rc so that the conversions will not have a bottleneck in -next Signed-off-by: Arjan van de Ven --- include/linux/hrtimer.h | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index becd17db1a1..9900e998ea8 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -217,6 +217,45 @@ static inline int hrtimer_is_hres_active(struct hrtimer *timer) return timer->base->cpu_base->hres_active; } +static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time) +{ + timer->expires = time; +} +static inline void hrtimer_set_expires_tv64(struct hrtimer *timer, s64 tv64) +{ + timer->expires.tv64 = tv64; +} + +static inline void hrtimer_add_expires(struct hrtimer *timer, ktime_t time) +{ + timer->expires = ktime_add_safe(timer->expires, time); +} + +static inline void hrtimer_add_expires_ns(struct hrtimer *timer, unsigned long ns) +{ + timer->expires = ktime_add_ns(timer->expires, ns); +} + +static inline ktime_t hrtimer_get_expires(const struct hrtimer *timer) +{ + return timer->expires; +} + +static inline s64 hrtimer_get_expires_tv64(const struct hrtimer *timer) +{ + return timer->expires.tv64; +} + +static inline s64 hrtimer_get_expires_ns(const struct hrtimer *timer) +{ + return ktime_to_ns(timer->expires); +} + +static inline ktime_t hrtimer_expires_remaining(const struct hrtimer *timer) +{ + return ktime_sub(timer->expires, timer->base->get_time()); +} + /* * The resolution of the clocks. The resolution value is returned in * the clock_getres() system call to give application programmers an @@ -287,6 +326,12 @@ extern int hrtimer_start(struct hrtimer *timer, ktime_t tim, extern int hrtimer_cancel(struct hrtimer *timer); extern int hrtimer_try_to_cancel(struct hrtimer *timer); +static inline int hrtimer_start_expires(struct hrtimer *timer, + enum hrtimer_mode mode) +{ + return hrtimer_start(timer, hrtimer_get_expires(timer), mode); +} + static inline int hrtimer_restart(struct hrtimer *timer) { return hrtimer_start(timer, timer->expires, HRTIMER_MODE_ABS); -- cgit v1.2.3-70-g09d2 From 799b64de256ea68fbb5db63bb55f61c305870643 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Mon, 1 Sep 2008 15:27:58 -0700 Subject: hrtimer: rename the "expires" struct member to avoid accidental usage To catch code that still touches the "expires" memory directly, rename it to have the compiler complain rather than get nasty, hard to explain, runtime behavior Signed-off-by: Arjan van de Ven --- include/linux/hrtimer.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 9900e998ea8..485a634fd6e 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -111,7 +111,7 @@ enum hrtimer_cb_mode { */ struct hrtimer { struct rb_node node; - ktime_t expires; + ktime_t _expires; enum hrtimer_restart (*function)(struct hrtimer *); struct hrtimer_clock_base *base; unsigned long state; @@ -219,41 +219,41 @@ static inline int hrtimer_is_hres_active(struct hrtimer *timer) static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time) { - timer->expires = time; + timer->_expires = time; } static inline void hrtimer_set_expires_tv64(struct hrtimer *timer, s64 tv64) { - timer->expires.tv64 = tv64; + timer->_expires.tv64 = tv64; } static inline void hrtimer_add_expires(struct hrtimer *timer, ktime_t time) { - timer->expires = ktime_add_safe(timer->expires, time); + timer->_expires = ktime_add_safe(timer->_expires, time); } static inline void hrtimer_add_expires_ns(struct hrtimer *timer, unsigned long ns) { - timer->expires = ktime_add_ns(timer->expires, ns); + timer->_expires = ktime_add_ns(timer->_expires, ns); } static inline ktime_t hrtimer_get_expires(const struct hrtimer *timer) { - return timer->expires; + return timer->_expires; } static inline s64 hrtimer_get_expires_tv64(const struct hrtimer *timer) { - return timer->expires.tv64; + return timer->_expires.tv64; } static inline s64 hrtimer_get_expires_ns(const struct hrtimer *timer) { - return ktime_to_ns(timer->expires); + return ktime_to_ns(timer->_expires); } static inline ktime_t hrtimer_expires_remaining(const struct hrtimer *timer) { - return ktime_sub(timer->expires, timer->base->get_time()); + return ktime_sub(timer->_expires, timer->base->get_time()); } /* @@ -334,7 +334,7 @@ static inline int hrtimer_start_expires(struct hrtimer *timer, static inline int hrtimer_restart(struct hrtimer *timer) { - return hrtimer_start(timer, timer->expires, HRTIMER_MODE_ABS); + return hrtimer_start(timer, timer->_expires, HRTIMER_MODE_ABS); } /* Query timers: */ -- cgit v1.2.3-70-g09d2 From 654c8e0b1c623b156c5b92f28d914ab38c9c2c90 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Mon, 1 Sep 2008 15:47:08 -0700 Subject: hrtimer: turn hrtimers into range timers this patch turns hrtimers into range timers; they have 2 expire points 1) the soft expire point 2) the hard expire point the kernel will do it's regular best effort attempt to get the timer run at the hard expire point. However, if some other time fires after the soft expire point, the kernel now has the freedom to fire this timer at this point, and thus grouping the events and preventing a power-expensive wakeup in the future. Signed-off-by: Arjan van de Ven --- include/linux/hrtimer.h | 31 ++++++++++++++++++++++++++- kernel/hrtimer.c | 56 +++++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 82 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 485a634fd6e..28259c33667 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -112,6 +112,7 @@ enum hrtimer_cb_mode { struct hrtimer { struct rb_node node; ktime_t _expires; + ktime_t _softexpires; enum hrtimer_restart (*function)(struct hrtimer *); struct hrtimer_clock_base *base; unsigned long state; @@ -220,20 +221,37 @@ static inline int hrtimer_is_hres_active(struct hrtimer *timer) static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time) { timer->_expires = time; + timer->_softexpires = time; } + +static inline void hrtimer_set_expires_range(struct hrtimer *timer, ktime_t time, ktime_t delta) +{ + timer->_softexpires = time; + timer->_expires = ktime_add_safe(time, delta); +} + +static inline void hrtimer_set_expires_range_ns(struct hrtimer *timer, ktime_t time, unsigned long delta) +{ + timer->_softexpires = time; + timer->_expires = ktime_add_safe(time, ns_to_ktime(delta)); +} + static inline void hrtimer_set_expires_tv64(struct hrtimer *timer, s64 tv64) { timer->_expires.tv64 = tv64; + timer->_softexpires.tv64 = tv64; } static inline void hrtimer_add_expires(struct hrtimer *timer, ktime_t time) { timer->_expires = ktime_add_safe(timer->_expires, time); + timer->_softexpires = ktime_add_safe(timer->_softexpires, time); } static inline void hrtimer_add_expires_ns(struct hrtimer *timer, unsigned long ns) { timer->_expires = ktime_add_ns(timer->_expires, ns); + timer->_softexpires = ktime_add_ns(timer->_softexpires, ns); } static inline ktime_t hrtimer_get_expires(const struct hrtimer *timer) @@ -241,10 +259,19 @@ static inline ktime_t hrtimer_get_expires(const struct hrtimer *timer) return timer->_expires; } +static inline ktime_t hrtimer_get_softexpires(const struct hrtimer *timer) +{ + return timer->_softexpires; +} + static inline s64 hrtimer_get_expires_tv64(const struct hrtimer *timer) { return timer->_expires.tv64; } +static inline s64 hrtimer_get_softexpires_tv64(const struct hrtimer *timer) +{ + return timer->_softexpires.tv64; +} static inline s64 hrtimer_get_expires_ns(const struct hrtimer *timer) { @@ -334,7 +361,7 @@ static inline int hrtimer_start_expires(struct hrtimer *timer, static inline int hrtimer_restart(struct hrtimer *timer) { - return hrtimer_start(timer, timer->_expires, HRTIMER_MODE_ABS); + return hrtimer_start_expires(timer, HRTIMER_MODE_ABS); } /* Query timers: */ @@ -391,6 +418,8 @@ extern long hrtimer_nanosleep_restart(struct restart_block *restart_block); extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *tsk); +extern int schedule_hrtimeout_range(ktime_t *expires, unsigned long delta, + const enum hrtimer_mode mode); extern int schedule_hrtimeout(ktime_t *expires, const enum hrtimer_mode mode); /* Soft interrupt function to run the hrtimer queues: */ diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index ae307feec74..01483004183 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -1309,7 +1309,20 @@ void hrtimer_interrupt(struct clock_event_device *dev) timer = rb_entry(node, struct hrtimer, node); - if (basenow.tv64 < hrtimer_get_expires_tv64(timer)) { + /* + * The immediate goal for using the softexpires is + * minimizing wakeups, not running timers at the + * earliest interrupt after their soft expiration. + * This allows us to avoid using a Priority Search + * Tree, which can answer a stabbing querry for + * overlapping intervals and instead use the simple + * BST we already have. + * We don't add extra wakeups by delaying timers that + * are right-of a not yet expired timer, because that + * timer will have to trigger a wakeup anyway. + */ + + if (basenow.tv64 < hrtimer_get_softexpires_tv64(timer)) { ktime_t expires; expires = ktime_sub(hrtimer_get_expires(timer), @@ -1681,14 +1694,20 @@ void __init hrtimers_init(void) } /** - * schedule_hrtimeout - sleep until timeout + * schedule_hrtimeout_range - sleep until timeout * @expires: timeout value (ktime_t) + * @delta: slack in expires timeout (ktime_t) * @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL * * Make the current task sleep until the given expiry time has * elapsed. The routine will return immediately unless * the current task state has been set (see set_current_state()). * + * The @delta argument gives the kernel the freedom to schedule the + * actual wakeup to a time that is both power and performance friendly. + * The kernel give the normal best effort behavior for "@expires+@delta", + * but may decide to fire the timer earlier, but no earlier than @expires. + * * You can set the task state as follows - * * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to @@ -1702,7 +1721,7 @@ void __init hrtimers_init(void) * * Returns 0 when the timer has expired otherwise -EINTR */ -int __sched schedule_hrtimeout(ktime_t *expires, +int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta, const enum hrtimer_mode mode) { struct hrtimer_sleeper t; @@ -1726,7 +1745,7 @@ int __sched schedule_hrtimeout(ktime_t *expires, } hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, mode); - hrtimer_set_expires(&t.timer, *expires); + hrtimer_set_expires_range_ns(&t.timer, *expires, delta); hrtimer_init_sleeper(&t, current); @@ -1744,4 +1763,33 @@ int __sched schedule_hrtimeout(ktime_t *expires, return !t.task ? 0 : -EINTR; } +EXPORT_SYMBOL_GPL(schedule_hrtimeout_range); + +/** + * schedule_hrtimeout - sleep until timeout + * @expires: timeout value (ktime_t) + * @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL + * + * Make the current task sleep until the given expiry time has + * elapsed. The routine will return immediately unless + * the current task state has been set (see set_current_state()). + * + * You can set the task state as follows - + * + * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to + * pass before the routine returns. + * + * %TASK_INTERRUPTIBLE - the routine may return early if a signal is + * delivered to the current task. + * + * The current task state is guaranteed to be TASK_RUNNING when this + * routine returns. + * + * Returns 0 when the timer has expired otherwise -EINTR + */ +int __sched schedule_hrtimeout(ktime_t *expires, + const enum hrtimer_mode mode) +{ + return schedule_hrtimeout_range(expires, 0, mode); +} EXPORT_SYMBOL_GPL(schedule_hrtimeout); -- cgit v1.2.3-70-g09d2 From 6976675d94042fbd446231d1bd8b7de71a980ada Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Mon, 1 Sep 2008 15:52:40 -0700 Subject: hrtimer: create a "timer_slack" field in the task struct We want to be able to control the default "rounding" that is used by select() and poll() and friends. This is a per process property (so that we can have a "nice" like program to start certain programs with a looser or stricter rounding) that can be set/get via a prctl(). For this purpose, a field called "timer_slack_ns" is added to the task struct. In addition, a field called "default_timer_slack"ns" is added so that tasks easily can temporarily to a more/less accurate slack and then back to the default. The default value of the slack is set to 50 usec; this is significantly less than 2.6.27's average select() and poll() timing error but still allows the kernel to group timers somewhat to preserve power behavior. Applications and admins can override this via the prctl() Signed-off-by: Arjan van de Ven --- include/linux/init_task.h | 1 + include/linux/prctl.h | 7 +++++++ include/linux/sched.h | 6 ++++++ kernel/fork.c | 2 ++ kernel/sys.c | 10 ++++++++++ 5 files changed, 26 insertions(+) (limited to 'include/linux') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 021d8e720c7..23fd8909b9e 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -170,6 +170,7 @@ extern struct group_info init_groups; .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \ .fs_excl = ATOMIC_INIT(0), \ .pi_lock = __SPIN_LOCK_UNLOCKED(tsk.pi_lock), \ + .timer_slack_ns = 50000, /* 50 usec default slack */ \ .pids = { \ [PIDTYPE_PID] = INIT_PID_LINK(PIDTYPE_PID), \ [PIDTYPE_PGID] = INIT_PID_LINK(PIDTYPE_PGID), \ diff --git a/include/linux/prctl.h b/include/linux/prctl.h index 5ad79198d6f..48d887e3c6e 100644 --- a/include/linux/prctl.h +++ b/include/linux/prctl.h @@ -78,4 +78,11 @@ #define PR_GET_SECUREBITS 27 #define PR_SET_SECUREBITS 28 +/* + * Get/set the timerslack as used by poll/select/nanosleep + * A value of 0 means "use default" + */ +#define PR_SET_TIMERSLACK 29 +#define PR_GET_TIMERSLACK 30 + #endif /* _LINUX_PRCTL_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 3d9120c5ad1..dcc03fd5a7f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1301,6 +1301,12 @@ struct task_struct { int latency_record_count; struct latency_record latency_record[LT_SAVECOUNT]; #endif + /* + * time slack values; these are used to round up poll() and + * select() etc timeout values. These are in nanoseconds. + */ + unsigned long timer_slack_ns; + unsigned long default_timer_slack_ns; }; /* diff --git a/kernel/fork.c b/kernel/fork.c index 7ce2ebe8479..4308d75f0fa 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -987,6 +987,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, p->prev_utime = cputime_zero; p->prev_stime = cputime_zero; + p->default_timer_slack_ns = current->timer_slack_ns; + #ifdef CONFIG_DETECT_SOFTLOCKUP p->last_switch_count = 0; p->last_switch_timestamp = 0; diff --git a/kernel/sys.c b/kernel/sys.c index 038a7bc0901..1b96401a057 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1727,6 +1727,16 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, case PR_SET_TSC: error = SET_TSC_CTL(arg2); break; + case PR_GET_TIMERSLACK: + error = current->timer_slack_ns; + break; + case PR_SET_TIMERSLACK: + if (arg2 <= 0) + current->timer_slack_ns = + current->default_timer_slack_ns; + else + current->timer_slack_ns = arg2; + break; default: error = -EINVAL; break; -- cgit v1.2.3-70-g09d2 From 584fb4a76413ec9215741e075e0dfb69173b213f Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sat, 6 Sep 2008 08:32:57 -0700 Subject: hrtimer: fix build bug found by Ingo in some randconfig configurations, hrtimers are used even though the hrtimer config if off; and it broke the build due to some of the new functions being on the wrong side of the ifdef. This patch moves the functions to the other side of the ifdef, fixing the build bug. Signed-off-by: Arjan van de Ven --- include/linux/hrtimer.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 28259c33667..c407b33ef84 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -198,13 +198,6 @@ struct hrtimer_cpu_base { #endif }; -#ifdef CONFIG_HIGH_RES_TIMERS -struct clock_event_device; - -extern void clock_was_set(void); -extern void hres_timers_resume(void); -extern void hrtimer_interrupt(struct clock_event_device *dev); - /* * In high resolution mode the time reference must be read accurate */ @@ -283,6 +276,13 @@ static inline ktime_t hrtimer_expires_remaining(const struct hrtimer *timer) return ktime_sub(timer->_expires, timer->base->get_time()); } +#ifdef CONFIG_HIGH_RES_TIMERS +struct clock_event_device; + +extern void clock_was_set(void); +extern void hres_timers_resume(void); +extern void hrtimer_interrupt(struct clock_event_device *dev); + /* * The resolution of the clocks. The resolution value is returned in * the clock_getres() system call to give application programmers an -- cgit v1.2.3-70-g09d2 From 2ec02270c00f94b08fddfb68c37510a9fb47ac7c Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sat, 6 Sep 2008 09:36:56 -0700 Subject: hrtimer: another build fix More randconfig testing Signed-off-by: Arjan van de Ven --- include/linux/hrtimer.h | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index c407b33ef84..4c1a834b984 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -198,19 +198,6 @@ struct hrtimer_cpu_base { #endif }; -/* - * In high resolution mode the time reference must be read accurate - */ -static inline ktime_t hrtimer_cb_get_time(struct hrtimer *timer) -{ - return timer->base->get_time(); -} - -static inline int hrtimer_is_hres_active(struct hrtimer *timer) -{ - return timer->base->cpu_base->hres_active; -} - static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time) { timer->_expires = time; @@ -283,6 +270,19 @@ extern void clock_was_set(void); extern void hres_timers_resume(void); extern void hrtimer_interrupt(struct clock_event_device *dev); +/* + * In high resolution mode the time reference must be read accurate + */ +static inline ktime_t hrtimer_cb_get_time(struct hrtimer *timer) +{ + return timer->base->get_time(); +} + +static inline int hrtimer_is_hres_active(struct hrtimer *timer) +{ + return timer->base->cpu_base->hres_active; +} + /* * The resolution of the clocks. The resolution value is returned in * the clock_getres() system call to give application programmers an -- cgit v1.2.3-70-g09d2 From da8f2e170ea94cc20f8ebbc8ee8d127edb8f12f1 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sun, 7 Sep 2008 10:47:46 -0700 Subject: hrtimer: add a hrtimer_start_range() function this patch adds a _range version of hrtimer_start() so that range timers can be created; the hrtimer_start() function is just a wrapper around this. In addition, hrtimer_start_expires() will now preserve existing ranges. Signed-off-by: Arjan van de Ven --- include/linux/hrtimer.h | 9 ++++++++- kernel/hrtimer.c | 26 +++++++++++++++++++++++--- 2 files changed, 31 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 4c1a834b984..1c0473e8ecb 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -350,13 +350,20 @@ static inline void destroy_hrtimer_on_stack(struct hrtimer *timer) { } /* Basic timer operations: */ extern int hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode); +extern int hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, + unsigned long range_ns, const enum hrtimer_mode mode); extern int hrtimer_cancel(struct hrtimer *timer); extern int hrtimer_try_to_cancel(struct hrtimer *timer); static inline int hrtimer_start_expires(struct hrtimer *timer, enum hrtimer_mode mode) { - return hrtimer_start(timer, hrtimer_get_expires(timer), mode); + unsigned long delta; + ktime_t soft, hard; + soft = hrtimer_get_softexpires(timer); + hard = hrtimer_get_expires(timer); + delta = ktime_to_ns(ktime_sub(hard, soft)); + return hrtimer_start_range_ns(timer, hrtimer_get_expires(timer), delta, mode); } static inline int hrtimer_restart(struct hrtimer *timer) diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 01483004183..a0222097c57 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -945,9 +945,10 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base) } /** - * hrtimer_start - (re)start an relative timer on the current CPU + * hrtimer_start_range_ns - (re)start an relative timer on the current CPU * @timer: the timer to be added * @tim: expiry time + * @delta_ns: "slack" range for the timer * @mode: expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL) * * Returns: @@ -955,7 +956,8 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base) * 1 when the timer was active */ int -hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) +hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, unsigned long delta_ns, + const enum hrtimer_mode mode) { struct hrtimer_clock_base *base, *new_base; unsigned long flags; @@ -983,7 +985,7 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) #endif } - hrtimer_set_expires(timer, tim); + hrtimer_set_expires_range_ns(timer, tim, delta_ns); timer_stats_hrtimer_set_start_info(timer); @@ -1016,8 +1018,26 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) return ret; } +EXPORT_SYMBOL_GPL(hrtimer_start_range_ns); + +/** + * hrtimer_start - (re)start an relative timer on the current CPU + * @timer: the timer to be added + * @tim: expiry time + * @mode: expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL) + * + * Returns: + * 0 on success + * 1 when the timer was active + */ +int +hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) +{ + return hrtimer_start_range_ns(timer, tim, 0, mode); +} EXPORT_SYMBOL_GPL(hrtimer_start); + /** * hrtimer_try_to_cancel - try to deactivate a timer * @timer: hrtimer to stop -- cgit v1.2.3-70-g09d2 From 4ce105d30e08fb8a1783c55a0e48aa3fa200c455 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sun, 7 Sep 2008 15:31:39 -0700 Subject: hrtimer: incorporate feedback from Peter Zijlstra (based on lkml review) * use rt_task() * task_nice() has a sign Signed-off-by: Arjan van de Ven --- fs/select.c | 5 ++--- include/linux/hrtimer.h | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/fs/select.c b/fs/select.c index 5e61b43d076..fdd8584e536 100644 --- a/fs/select.c +++ b/fs/select.c @@ -46,7 +46,7 @@ static unsigned long __estimate_accuracy(struct timespec *tv) unsigned long slack; int divfactor = 1000; - if (task_nice(current)) + if (task_nice(current) > 0) divfactor = divfactor / 5; slack = tv->tv_nsec / divfactor; @@ -66,8 +66,7 @@ static unsigned long estimate_accuracy(struct timespec *tv) * Realtime tasks get a slack of 0 for obvious reasons. */ - if (current->policy == SCHED_FIFO || - current->policy == SCHED_RR) + if (rt_task(current)) return 0; ktime_get_ts(&now); diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 1c0473e8ecb..95db11f62ff 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -363,7 +363,7 @@ static inline int hrtimer_start_expires(struct hrtimer *timer, soft = hrtimer_get_softexpires(timer); hard = hrtimer_get_expires(timer); delta = ktime_to_ns(ktime_sub(hard, soft)); - return hrtimer_start_range_ns(timer, hrtimer_get_expires(timer), delta, mode); + return hrtimer_start_range_ns(timer, soft, delta, mode); } static inline int hrtimer_restart(struct hrtimer *timer) -- cgit v1.2.3-70-g09d2 From 2e94d1f71f7e4404d997e6fb4f1618aa147d76f9 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Wed, 10 Sep 2008 16:06:00 -0700 Subject: hrtimer: peek at the timer queue just before going idle As part of going idle, we already look at the time of the next timer event to determine which C-state to select etc. This patch adds functionality that causes the timers that are past their soft expire time, to fire at this time, before we calculate the next wakeup time. This functionality will thus avoid wakeups by running timers before going idle rather than specially waking up for it. Signed-off-by: Arjan van de Ven --- drivers/cpuidle/cpuidle.c | 7 +++++++ include/linux/hrtimer.h | 5 +++++ kernel/hrtimer.c | 30 ++++++++++++++++++++++++++++++ 3 files changed, 42 insertions(+) (limited to 'include/linux') diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 5ce07b517c5..2e314849936 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "cpuidle.h" @@ -60,6 +61,12 @@ static void cpuidle_idle_call(void) return; } + /* + * run any timers that can be run now, at this point + * before calculating the idle duration etc. + */ + hrtimer_peek_ahead_timers(); + /* ask the governor for the next state */ next_state = cpuidle_curr_governor->select(dev); if (need_resched()) diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 95db11f62ff..d93b1e1dc16 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -326,6 +326,11 @@ static inline int hrtimer_is_hres_active(struct hrtimer *timer) extern ktime_t ktime_get(void); extern ktime_t ktime_get_real(void); + +DECLARE_PER_CPU(struct tick_device, tick_cpu_device); +extern void hrtimer_peek_ahead_timers(void); + + /* Exported timer functions: */ /* Initialize timers: */ diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 9a4c9018556..eb2cf984959 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -1381,6 +1381,36 @@ void hrtimer_interrupt(struct clock_event_device *dev) raise_softirq(HRTIMER_SOFTIRQ); } +/** + * hrtimer_peek_ahead_timers -- run soft-expired timers now + * + * hrtimer_peek_ahead_timers will peek at the timer queue of + * the current cpu and check if there are any timers for which + * the soft expires time has passed. If any such timers exist, + * they are run immediately and then removed from the timer queue. + * + */ +void hrtimer_peek_ahead_timers(void) +{ + unsigned long flags; + struct tick_device *td; + struct clock_event_device *dev; + + if (hrtimer_hres_active()) + return; + + local_irq_save(flags); + td = &__get_cpu_var(tick_cpu_device); + if (!td) + goto out; + dev = td->evtdev; + if (!dev) + goto out; + hrtimer_interrupt(dev); +out: + local_irq_restore(flags); +} + static void run_hrtimer_softirq(struct softirq_action *h) { run_hrtimer_pending(&__get_cpu_var(hrtimer_bases)); -- cgit v1.2.3-70-g09d2 From ccbe329bcd87924baed96474ec0a6725e3957897 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Wed, 17 Sep 2008 16:34:03 +0100 Subject: bitmap: add bitmap_copy_le() bitmap_copy_le() copies a bitmap, putting the bits into little-endian order (i.e., each unsigned long word in the bitmap is put into little-endian order). The UWB stack used bitmaps to manage Medium Access Slot availability, and these bitmaps need to be written to the hardware in LE order. Signed-off-by: David Vrabel --- include/linux/bitmap.h | 1 + lib/bitmap.c | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 89781fd4885..5379913aca5 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -130,6 +130,7 @@ extern void bitmap_fold(unsigned long *dst, const unsigned long *orig, extern int bitmap_find_free_region(unsigned long *bitmap, int bits, int order); extern void bitmap_release_region(unsigned long *bitmap, int pos, int order); extern int bitmap_allocate_region(unsigned long *bitmap, int pos, int order); +extern void bitmap_copy_le(void *dst, const unsigned long *src, int nbits); #define BITMAP_LAST_WORD_MASK(nbits) \ ( \ diff --git a/lib/bitmap.c b/lib/bitmap.c index 06fb57c86de..c2006bfeea4 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -1007,3 +1007,25 @@ int bitmap_allocate_region(unsigned long *bitmap, int pos, int order) return 0; } EXPORT_SYMBOL(bitmap_allocate_region); + +/** + * bitmap_copy_le - copy a bitmap, putting the bits into little-endian order. + * @dst: destination buffer + * @src: bitmap to copy + * @nbits: number of bits in the bitmap + * + * Require nbits % BITS_PER_LONG == 0. + */ +void bitmap_copy_le(void *dst, const unsigned long *src, int nbits) +{ + unsigned long *d = dst; + int i; + + for (i = 0; i < nbits/BITS_PER_LONG; i++) { + if (BITS_PER_LONG == 64) + d[i] = cpu_to_le64(src[i]); + else + d[i] = cpu_to_le32(src[i]); + } +} +EXPORT_SYMBOL(bitmap_copy_le); -- cgit v1.2.3-70-g09d2 From 34e95e41f1fd751e33a7eb3fa66594903b81f13d Mon Sep 17 00:00:00 2001 From: Inaky Perez-Gonzalez Date: Wed, 17 Sep 2008 16:34:05 +0100 Subject: uwb: add the uwb include files Signed-off-by: David Vrabel --- include/linux/uwb.h | 761 ++++++++++++++++++++++++++++++++++++++++++ include/linux/uwb/debug-cmd.h | 57 ++++ include/linux/uwb/debug.h | 82 +++++ include/linux/uwb/spec.h | 727 ++++++++++++++++++++++++++++++++++++++++ include/linux/wlp.h | 735 ++++++++++++++++++++++++++++++++++++++++ 5 files changed, 2362 insertions(+) create mode 100644 include/linux/uwb.h create mode 100644 include/linux/uwb/debug-cmd.h create mode 100644 include/linux/uwb/debug.h create mode 100644 include/linux/uwb/spec.h create mode 100644 include/linux/wlp.h (limited to 'include/linux') diff --git a/include/linux/uwb.h b/include/linux/uwb.h new file mode 100644 index 00000000000..0cd35937e12 --- /dev/null +++ b/include/linux/uwb.h @@ -0,0 +1,761 @@ +/* + * Ultra Wide Band + * UWB API + * + * Copyright (C) 2005-2006 Intel Corporation + * Inaky Perez-Gonzalez + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + * + * + * FIXME: doc: overview of the API, different parts and pointers + */ + +#ifndef __LINUX__UWB_H__ +#define __LINUX__UWB_H__ + +#include +#include +#include +#include +#include +#include + +struct uwb_dev; +struct uwb_beca_e; +struct uwb_rc; +struct uwb_rsv; +struct uwb_dbg; + +/** + * struct uwb_dev - a UWB Device + * @rc: UWB Radio Controller that discovered the device (kind of its + * parent). + * @bce: a beacon cache entry for this device; or NULL if the device + * is a local radio controller. + * @mac_addr: the EUI-48 address of this device. + * @dev_addr: the current DevAddr used by this device. + * @beacon_slot: the slot number the beacon is using. + * @streams: bitmap of streams allocated to reservations targeted at + * this device. For an RC, this is the streams allocated for + * reservations targeted at DevAddrs. + * + * A UWB device may either by a neighbor or part of a local radio + * controller. + */ +struct uwb_dev { + struct mutex mutex; + struct list_head list_node; + struct device dev; + struct uwb_rc *rc; /* radio controller */ + struct uwb_beca_e *bce; /* Beacon Cache Entry */ + + struct uwb_mac_addr mac_addr; + struct uwb_dev_addr dev_addr; + int beacon_slot; + DECLARE_BITMAP(streams, UWB_NUM_STREAMS); +}; +#define to_uwb_dev(d) container_of(d, struct uwb_dev, dev) + +/** + * UWB HWA/WHCI Radio Control {Command|Event} Block context IDs + * + * RC[CE]Bs have a 'context ID' field that matches the command with + * the event received to confirm it. + * + * Maximum number of context IDs + */ +enum { UWB_RC_CTX_MAX = 256 }; + + +/** Notification chain head for UWB generated events to listeners */ +struct uwb_notifs_chain { + struct list_head list; + struct mutex mutex; +}; + +/** + * struct uwb_mas_bm - a bitmap of all MAS in a superframe + * @bm: a bitmap of length #UWB_NUM_MAS + */ +struct uwb_mas_bm { + DECLARE_BITMAP(bm, UWB_NUM_MAS); +}; + +/** + * uwb_rsv_state - UWB Reservation state. + * + * NONE - reservation is not active (no DRP IE being transmitted). + * + * Owner reservation states: + * + * INITIATED - owner has sent an initial DRP request. + * PENDING - target responded with pending Reason Code. + * MODIFIED - reservation manager is modifying an established + * reservation with a different MAS allocation. + * ESTABLISHED - the reservation has been successfully negotiated. + * + * Target reservation states: + * + * DENIED - request is denied. + * ACCEPTED - request is accepted. + * PENDING - PAL has yet to make a decision to whether to accept or + * deny. + * + * FIXME: further target states TBD. + */ +enum uwb_rsv_state { + UWB_RSV_STATE_NONE, + UWB_RSV_STATE_O_INITIATED, + UWB_RSV_STATE_O_PENDING, + UWB_RSV_STATE_O_MODIFIED, + UWB_RSV_STATE_O_ESTABLISHED, + UWB_RSV_STATE_T_ACCEPTED, + UWB_RSV_STATE_T_DENIED, + UWB_RSV_STATE_T_PENDING, + + UWB_RSV_STATE_LAST, +}; + +enum uwb_rsv_target_type { + UWB_RSV_TARGET_DEV, + UWB_RSV_TARGET_DEVADDR, +}; + +/** + * struct uwb_rsv_target - the target of a reservation. + * + * Reservations unicast and targeted at a single device + * (UWB_RSV_TARGET_DEV); or (e.g., in the case of WUSB) targeted at a + * specific (private) DevAddr (UWB_RSV_TARGET_DEVADDR). + */ +struct uwb_rsv_target { + enum uwb_rsv_target_type type; + union { + struct uwb_dev *dev; + struct uwb_dev_addr devaddr; + }; +}; + +/* + * Number of streams reserved for reservations targeted at DevAddrs. + */ +#define UWB_NUM_GLOBAL_STREAMS 1 + +typedef void (*uwb_rsv_cb_f)(struct uwb_rsv *rsv); + +/** + * struct uwb_rsv - a DRP reservation + * + * Data structure management: + * + * @rc: the radio controller this reservation is for + * (as target or owner) + * @rc_node: a list node for the RC + * @pal_node: a list node for the PAL + * + * Owner and target parameters: + * + * @owner: the UWB device owning this reservation + * @target: the target UWB device + * @type: reservation type + * + * Owner parameters: + * + * @max_mas: maxiumum number of MAS + * @min_mas: minimum number of MAS + * @sparsity: owner selected sparsity + * @is_multicast: true iff multicast + * + * @callback: callback function when the reservation completes + * @pal_priv: private data for the PAL making the reservation + * + * Reservation status: + * + * @status: negotiation status + * @stream: stream index allocated for this reservation + * @mas: reserved MAS + * @drp_ie: the DRP IE + * @ie_valid: true iff the DRP IE matches the reservation parameters + * + * DRP reservations are uniquely identified by the owner, target and + * stream index. However, when using a DevAddr as a target (e.g., for + * a WUSB cluster reservation) the responses may be received from + * devices with different DevAddrs. In this case, reservations are + * uniquely identified by just the stream index. A number of stream + * indexes (UWB_NUM_GLOBAL_STREAMS) are reserved for this. + */ +struct uwb_rsv { + struct uwb_rc *rc; + struct list_head rc_node; + struct list_head pal_node; + + struct uwb_dev *owner; + struct uwb_rsv_target target; + enum uwb_drp_type type; + int max_mas; + int min_mas; + int sparsity; + bool is_multicast; + + uwb_rsv_cb_f callback; + void *pal_priv; + + enum uwb_rsv_state state; + u8 stream; + struct uwb_mas_bm mas; + struct uwb_ie_drp *drp_ie; + bool ie_valid; + struct timer_list timer; + bool expired; +}; + +static const +struct uwb_mas_bm uwb_mas_bm_zero = { .bm = { 0 } }; + +static inline void uwb_mas_bm_copy_le(void *dst, const struct uwb_mas_bm *mas) +{ + bitmap_copy_le(dst, mas->bm, UWB_NUM_MAS); +} + +/** + * struct uwb_drp_avail - a radio controller's view of MAS usage + * @global: MAS unused by neighbors (excluding reservations targetted + * or owned by the local radio controller) or the beaon period + * @local: MAS unused by local established reservations + * @pending: MAS unused by local pending reservations + * @ie: DRP Availability IE to be included in the beacon + * @ie_valid: true iff @ie is valid and does not need to regenerated from + * @global and @local + * + * Each radio controller maintains a view of MAS usage or + * availability. MAS available for a new reservation are determined + * from the intersection of @global, @local, and @pending. + * + * The radio controller must transmit a DRP Availability IE that's the + * intersection of @global and @local. + * + * A set bit indicates the MAS is unused and available. + * + * rc->rsvs_mutex should be held before accessing this data structure. + * + * [ECMA-368] section 17.4.3. + */ +struct uwb_drp_avail { + DECLARE_BITMAP(global, UWB_NUM_MAS); + DECLARE_BITMAP(local, UWB_NUM_MAS); + DECLARE_BITMAP(pending, UWB_NUM_MAS); + struct uwb_ie_drp_avail ie; + bool ie_valid; +}; + + +const char *uwb_rsv_state_str(enum uwb_rsv_state state); +const char *uwb_rsv_type_str(enum uwb_drp_type type); + +struct uwb_rsv *uwb_rsv_create(struct uwb_rc *rc, uwb_rsv_cb_f cb, + void *pal_priv); +void uwb_rsv_destroy(struct uwb_rsv *rsv); + +int uwb_rsv_establish(struct uwb_rsv *rsv); +int uwb_rsv_modify(struct uwb_rsv *rsv, + int max_mas, int min_mas, int sparsity); +void uwb_rsv_terminate(struct uwb_rsv *rsv); + +void uwb_rsv_accept(struct uwb_rsv *rsv, uwb_rsv_cb_f cb, void *pal_priv); + +/** + * Radio Control Interface instance + * + * + * Life cycle rules: those of the UWB Device. + * + * @index: an index number for this radio controller, as used in the + * device name. + * @version: version of protocol supported by this device + * @priv: Backend implementation; rw with uwb_dev.dev.sem taken. + * @cmd: Backend implementation to execute commands; rw and call + * only with uwb_dev.dev.sem taken. + * @reset: Hardware reset of radio controller and any PAL controllers. + * @filter: Backend implementation to manipulate data to and from device + * to be compliant to specification assumed by driver (WHCI + * 0.95). + * + * uwb_dev.dev.mutex is used to execute commands and update + * the corresponding structures; can't use a spinlock + * because rc->cmd() can sleep. + * @ies: This is a dynamically allocated array cacheing the + * IEs (settable by the host) that the beacon of this + * radio controller is currently sending. + * + * In reality, we store here the full command we set to + * the radio controller (which is basically a command + * prefix followed by all the IEs the beacon currently + * contains). This way we don't have to realloc and + * memcpy when setting it. + * + * We set this up in uwb_rc_ie_setup(), where we alloc + * this struct, call get_ie() [so we know which IEs are + * currently being sent, if any]. + * + * @ies_capacity:Amount of space (in bytes) allocated in @ies. The + * amount used is given by sizeof(*ies) plus ies->wIELength + * (which is a little endian quantity all the time). + * @ies_mutex: protect the IE cache + * @dbg: information for the debug interface + */ +struct uwb_rc { + struct uwb_dev uwb_dev; + int index; + u16 version; + + struct module *owner; + void *priv; + int (*start)(struct uwb_rc *rc); + void (*stop)(struct uwb_rc *rc); + int (*cmd)(struct uwb_rc *, const struct uwb_rccb *, size_t); + int (*reset)(struct uwb_rc *rc); + int (*filter_cmd)(struct uwb_rc *, struct uwb_rccb **, size_t *); + int (*filter_event)(struct uwb_rc *, struct uwb_rceb **, const size_t, + size_t *, size_t *); + + spinlock_t neh_lock; /* protects neh_* and ctx_* */ + struct list_head neh_list; /* Open NE handles */ + unsigned long ctx_bm[UWB_RC_CTX_MAX / 8 / sizeof(unsigned long)]; + u8 ctx_roll; + + int beaconing; /* Beaconing state [channel number] */ + int scanning; + enum uwb_scan_type scan_type:3; + unsigned ready:1; + struct uwb_notifs_chain notifs_chain; + + struct uwb_drp_avail drp_avail; + struct list_head reservations; + struct mutex rsvs_mutex; + struct workqueue_struct *rsv_workq; + struct work_struct rsv_update_work; + + struct mutex ies_mutex; + struct uwb_rc_cmd_set_ie *ies; + size_t ies_capacity; + + spinlock_t pal_lock; + struct list_head pals; + + struct uwb_dbg *dbg; +}; + + +/** + * struct uwb_pal - a UWB PAL + * @new_rsv: called when a peer requests a reservation (may be NULL if + * the PAL cannot accept reservation requests). + * + * A Protocol Adaptation Layer (PAL) is a user of the WiMedia UWB + * radio platform (e.g., WUSB, WLP or Bluetooth UWB AMP). + * + * The PALs using a radio controller must register themselves to + * permit the UWB stack to coordinate usage of the radio between the + * various PALs or to allow PALs to response to certain requests from + * peers. + * + * A struct uwb_pal should be embedded in a containing structure + * belonging to the PAL and initialized with uwb_pal_init()). Fields + * should be set appropriately by the PAL before registering the PAL + * with uwb_pal_register(). + */ +struct uwb_pal { + struct list_head node; + + void (*new_rsv)(struct uwb_rsv *rsv); +}; + +void uwb_pal_init(struct uwb_pal *pal); +int uwb_pal_register(struct uwb_rc *rc, struct uwb_pal *pal); +void uwb_pal_unregister(struct uwb_rc *rc, struct uwb_pal *pal); + +/* + * General public API + * + * This API can be used by UWB device drivers or by those implementing + * UWB Radio Controllers + */ +struct uwb_dev *uwb_dev_get_by_devaddr(struct uwb_rc *rc, + const struct uwb_dev_addr *devaddr); +struct uwb_dev *uwb_dev_get_by_rc(struct uwb_dev *, struct uwb_rc *); +static inline void uwb_dev_get(struct uwb_dev *uwb_dev) +{ + get_device(&uwb_dev->dev); +} +static inline void uwb_dev_put(struct uwb_dev *uwb_dev) +{ + put_device(&uwb_dev->dev); +} +struct uwb_dev *uwb_dev_try_get(struct uwb_rc *rc, struct uwb_dev *uwb_dev); + +/** + * Callback function for 'uwb_{dev,rc}_foreach()'. + * + * @dev: Linux device instance + * 'uwb_dev = container_of(dev, struct uwb_dev, dev)' + * @priv: Data passed by the caller to 'uwb_{dev,rc}_foreach()'. + * + * @returns: 0 to continue the iterations, any other val to stop + * iterating and return the value to the caller of + * _foreach(). + */ +typedef int (*uwb_dev_for_each_f)(struct device *dev, void *priv); +int uwb_dev_for_each(struct uwb_rc *rc, uwb_dev_for_each_f func, void *priv); + +struct uwb_rc *uwb_rc_alloc(void); +struct uwb_rc *uwb_rc_get_by_dev(const struct uwb_dev_addr *); +struct uwb_rc *uwb_rc_get_by_grandpa(const struct device *); +void uwb_rc_put(struct uwb_rc *rc); + +typedef void (*uwb_rc_cmd_cb_f)(struct uwb_rc *rc, void *arg, + struct uwb_rceb *reply, ssize_t reply_size); + +int uwb_rc_cmd_async(struct uwb_rc *rc, const char *cmd_name, + struct uwb_rccb *cmd, size_t cmd_size, + u8 expected_type, u16 expected_event, + uwb_rc_cmd_cb_f cb, void *arg); +ssize_t uwb_rc_cmd(struct uwb_rc *rc, const char *cmd_name, + struct uwb_rccb *cmd, size_t cmd_size, + struct uwb_rceb *reply, size_t reply_size); +ssize_t uwb_rc_vcmd(struct uwb_rc *rc, const char *cmd_name, + struct uwb_rccb *cmd, size_t cmd_size, + u8 expected_type, u16 expected_event, + struct uwb_rceb **preply); +ssize_t uwb_rc_get_ie(struct uwb_rc *, struct uwb_rc_evt_get_ie **); +int uwb_bg_joined(struct uwb_rc *rc); + +size_t __uwb_addr_print(char *, size_t, const unsigned char *, int); + +int uwb_rc_dev_addr_set(struct uwb_rc *, const struct uwb_dev_addr *); +int uwb_rc_dev_addr_get(struct uwb_rc *, struct uwb_dev_addr *); +int uwb_rc_mac_addr_set(struct uwb_rc *, const struct uwb_mac_addr *); +int uwb_rc_mac_addr_get(struct uwb_rc *, struct uwb_mac_addr *); +int __uwb_mac_addr_assigned_check(struct device *, void *); +int __uwb_dev_addr_assigned_check(struct device *, void *); + +/* Print in @buf a pretty repr of @addr */ +static inline size_t uwb_dev_addr_print(char *buf, size_t buf_size, + const struct uwb_dev_addr *addr) +{ + return __uwb_addr_print(buf, buf_size, addr->data, 0); +} + +/* Print in @buf a pretty repr of @addr */ +static inline size_t uwb_mac_addr_print(char *buf, size_t buf_size, + const struct uwb_mac_addr *addr) +{ + return __uwb_addr_print(buf, buf_size, addr->data, 1); +} + +/* @returns 0 if device addresses @addr2 and @addr1 are equal */ +static inline int uwb_dev_addr_cmp(const struct uwb_dev_addr *addr1, + const struct uwb_dev_addr *addr2) +{ + return memcmp(addr1, addr2, sizeof(*addr1)); +} + +/* @returns 0 if MAC addresses @addr2 and @addr1 are equal */ +static inline int uwb_mac_addr_cmp(const struct uwb_mac_addr *addr1, + const struct uwb_mac_addr *addr2) +{ + return memcmp(addr1, addr2, sizeof(*addr1)); +} + +/* @returns !0 if a MAC @addr is a broadcast address */ +static inline int uwb_mac_addr_bcast(const struct uwb_mac_addr *addr) +{ + struct uwb_mac_addr bcast = { + .data = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff } + }; + return !uwb_mac_addr_cmp(addr, &bcast); +} + +/* @returns !0 if a MAC @addr is all zeroes*/ +static inline int uwb_mac_addr_unset(const struct uwb_mac_addr *addr) +{ + struct uwb_mac_addr unset = { + .data = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } + }; + return !uwb_mac_addr_cmp(addr, &unset); +} + +/* @returns !0 if the address is in use. */ +static inline unsigned __uwb_dev_addr_assigned(struct uwb_rc *rc, + struct uwb_dev_addr *addr) +{ + return uwb_dev_for_each(rc, __uwb_dev_addr_assigned_check, addr); +} + +/* + * UWB Radio Controller API + * + * This API is used (in addition to the general API) to implement UWB + * Radio Controllers. + */ +void uwb_rc_init(struct uwb_rc *); +int uwb_rc_add(struct uwb_rc *, struct device *dev, void *rc_priv); +void uwb_rc_rm(struct uwb_rc *); +void uwb_rc_neh_grok(struct uwb_rc *, void *, size_t); +void uwb_rc_neh_error(struct uwb_rc *, int); +void uwb_rc_reset_all(struct uwb_rc *rc); + +/** + * uwb_rsv_is_owner - is the owner of this reservation the RC? + * @rsv: the reservation + */ +static inline bool uwb_rsv_is_owner(struct uwb_rsv *rsv) +{ + return rsv->owner == &rsv->rc->uwb_dev; +} + +/** + * Events generated by UWB that can be passed to any listeners + * + * Higher layers can register callback functions with the radio + * controller using uwb_notifs_register(). The radio controller + * maintains a list of all registered handlers and will notify all + * nodes when an event occurs. + */ +enum uwb_notifs { + UWB_NOTIF_BG_JOIN = 0, /* radio controller joined a beacon group */ + UWB_NOTIF_BG_LEAVE = 1, /* radio controller left a beacon group */ + UWB_NOTIF_ONAIR, + UWB_NOTIF_OFFAIR, +}; + +/* Callback function registered with UWB */ +struct uwb_notifs_handler { + struct list_head list_node; + void (*cb)(void *, struct uwb_dev *, enum uwb_notifs); + void *data; +}; + +int uwb_notifs_register(struct uwb_rc *, struct uwb_notifs_handler *); +int uwb_notifs_deregister(struct uwb_rc *, struct uwb_notifs_handler *); + + +/** + * UWB radio controller Event Size Entry (for creating entry tables) + * + * WUSB and WHCI define events and notifications, and they might have + * fixed or variable size. + * + * Each event/notification has a size which is not necessarily known + * in advance based on the event code. As well, vendor specific + * events/notifications will have a size impossible to determine + * unless we know about the device's specific details. + * + * It was way too smart of the spec writers not to think that it would + * be impossible for a generic driver to skip over vendor specific + * events/notifications if there are no LENGTH fields in the HEADER of + * each message...the transaction size cannot be counted on as the + * spec does not forbid to pack more than one event in a single + * transaction. + * + * Thus, we guess sizes with tables (or for events, when you know the + * size ahead of time you can use uwb_rc_neh_extra_size*()). We + * register tables with the known events and their sizes, and then we + * traverse those tables. For those with variable length, we provide a + * way to lookup the size inside the event/notification's + * payload. This allows device-specific event size tables to be + * registered. + * + * @size: Size of the payload + * + * @offset: if != 0, at offset @offset-1 starts a field with a length + * that has to be added to @size. The format of the field is + * given by @type. + * + * @type: Type and length of the offset field. Most common is LE 16 + * bits (that's why that is zero); others are there mostly to + * cover for bugs and weirdos. + */ +struct uwb_est_entry { + size_t size; + unsigned offset; + enum { UWB_EST_16 = 0, UWB_EST_8 = 1 } type; +}; + +int uwb_est_register(u8 type, u8 code_high, u16 vendor, u16 product, + const struct uwb_est_entry *, size_t entries); +int uwb_est_unregister(u8 type, u8 code_high, u16 vendor, u16 product, + const struct uwb_est_entry *, size_t entries); +ssize_t uwb_est_find_size(struct uwb_rc *rc, const struct uwb_rceb *rceb, + size_t len); + +/* -- Misc */ + +enum { + EDC_MAX_ERRORS = 10, + EDC_ERROR_TIMEFRAME = HZ, +}; + +/* error density counter */ +struct edc { + unsigned long timestart; + u16 errorcount; +}; + +static inline +void edc_init(struct edc *edc) +{ + edc->timestart = jiffies; +} + +/* Called when an error occured. + * This is way to determine if the number of acceptable errors per time + * period has been exceeded. It is not accurate as there are cases in which + * this scheme will not work, for example if there are periodic occurences + * of errors that straddle updates to the start time. This scheme is + * sufficient for our usage. + * + * @returns 1 if maximum acceptable errors per timeframe has been exceeded. + */ +static inline int edc_inc(struct edc *err_hist, u16 max_err, u16 timeframe) +{ + unsigned long now; + + now = jiffies; + if (now - err_hist->timestart > timeframe) { + err_hist->errorcount = 1; + err_hist->timestart = now; + } else if (++err_hist->errorcount > max_err) { + err_hist->errorcount = 0; + err_hist->timestart = now; + return 1; + } + return 0; +} + + +/* Information Element handling */ + +/* For representing the state of writing to a buffer when iterating */ +struct uwb_buf_ctx { + char *buf; + size_t bytes, size; +}; + +typedef int (*uwb_ie_f)(struct uwb_dev *, const struct uwb_ie_hdr *, + size_t, void *); +struct uwb_ie_hdr *uwb_ie_next(void **ptr, size_t *len); +ssize_t uwb_ie_for_each(struct uwb_dev *uwb_dev, uwb_ie_f fn, void *data, + const void *buf, size_t size); +int uwb_ie_dump_hex(struct uwb_dev *, const struct uwb_ie_hdr *, + size_t, void *); +int uwb_rc_set_ie(struct uwb_rc *, struct uwb_rc_cmd_set_ie *); +struct uwb_ie_hdr *uwb_ie_next(void **ptr, size_t *len); + + +/* + * Transmission statistics + * + * UWB uses LQI and RSSI (one byte values) for reporting radio signal + * strength and line quality indication. We do quick and dirty + * averages of those. They are signed values, btw. + * + * For 8 bit quantities, we keep the min, the max, an accumulator + * (@sigma) and a # of samples. When @samples gets to 255, we compute + * the average (@sigma / @samples), place it in @sigma and reset + * @samples to 1 (so we use it as the first sample). + * + * Now, statistically speaking, probably I am kicking the kidneys of + * some books I have in my shelves collecting dust, but I just want to + * get an approx, not the Nobel. + * + * LOCKING: there is no locking per se, but we try to keep a lockless + * schema. Only _add_samples() modifies the values--as long as you + * have other locking on top that makes sure that no two calls of + * _add_sample() happen at the same time, then we are fine. Now, for + * resetting the values we just set @samples to 0 and that makes the + * next _add_sample() to start with defaults. Reading the values in + * _show() currently can race, so you need to make sure the calls are + * under the same lock that protects calls to _add_sample(). FIXME: + * currently unlocked (It is not ultraprecise but does the trick. Bite + * me). + */ +struct stats { + s8 min, max; + s16 sigma; + atomic_t samples; +}; + +static inline +void stats_init(struct stats *stats) +{ + atomic_set(&stats->samples, 0); + wmb(); +} + +static inline +void stats_add_sample(struct stats *stats, s8 sample) +{ + s8 min, max; + s16 sigma; + unsigned samples = atomic_read(&stats->samples); + if (samples == 0) { /* it was zero before, so we initialize */ + min = 127; + max = -128; + sigma = 0; + } else { + min = stats->min; + max = stats->max; + sigma = stats->sigma; + } + + if (sample < min) /* compute new values */ + min = sample; + else if (sample > max) + max = sample; + sigma += sample; + + stats->min = min; /* commit */ + stats->max = max; + stats->sigma = sigma; + if (atomic_add_return(1, &stats->samples) > 255) { + /* wrapped around! reset */ + stats->sigma = sigma / 256; + atomic_set(&stats->samples, 1); + } +} + +static inline ssize_t stats_show(struct stats *stats, char *buf) +{ + int min, max, avg; + int samples = atomic_read(&stats->samples); + if (samples == 0) + min = max = avg = 0; + else { + min = stats->min; + max = stats->max; + avg = stats->sigma / samples; + } + return scnprintf(buf, PAGE_SIZE, "%d %d %d\n", min, max, avg); +} + +static inline ssize_t stats_store(struct stats *stats, const char *buf, + size_t size) +{ + stats_init(stats); + return size; +} + +#endif /* #ifndef __LINUX__UWB_H__ */ diff --git a/include/linux/uwb/debug-cmd.h b/include/linux/uwb/debug-cmd.h new file mode 100644 index 00000000000..1141f41bab5 --- /dev/null +++ b/include/linux/uwb/debug-cmd.h @@ -0,0 +1,57 @@ +/* + * Ultra Wide Band + * Debug interface commands + * + * Copyright (C) 2008 Cambridge Silicon Radio Ltd. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#ifndef __LINUX__UWB__DEBUG_CMD_H__ +#define __LINUX__UWB__DEBUG_CMD_H__ + +#include + +/* + * Debug interface commands + * + * UWB_DBG_CMD_RSV_ESTABLISH: Establish a new unicast reservation. + * + * UWB_DBG_CMD_RSV_TERMINATE: Terminate the Nth reservation. + */ + +enum uwb_dbg_cmd_type { + UWB_DBG_CMD_RSV_ESTABLISH = 1, + UWB_DBG_CMD_RSV_TERMINATE = 2, +}; + +struct uwb_dbg_cmd_rsv_establish { + __u8 target[6]; + __u8 type; + __u16 max_mas; + __u16 min_mas; + __u8 sparsity; +}; + +struct uwb_dbg_cmd_rsv_terminate { + int index; +}; + +struct uwb_dbg_cmd { + __u32 type; + union { + struct uwb_dbg_cmd_rsv_establish rsv_establish; + struct uwb_dbg_cmd_rsv_terminate rsv_terminate; + }; +}; + +#endif /* #ifndef __LINUX__UWB__DEBUG_CMD_H__ */ diff --git a/include/linux/uwb/debug.h b/include/linux/uwb/debug.h new file mode 100644 index 00000000000..a86a73fe303 --- /dev/null +++ b/include/linux/uwb/debug.h @@ -0,0 +1,82 @@ +/* + * Ultra Wide Band + * Debug Support + * + * Copyright (C) 2005-2006 Intel Corporation + * Inaky Perez-Gonzalez + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + * + * + * FIXME: doc + * Invoke like: + * + * #define D_LOCAL 4 + * #include + * + * At the end of your include files. + */ +#include + +struct device; +extern void dump_bytes(struct device *dev, const void *_buf, size_t rsize); + +/* Master debug switch; !0 enables, 0 disables */ +#define D_MASTER (!0) + +/* Local (per-file) debug switch; #define before #including */ +#ifndef D_LOCAL +#define D_LOCAL 0 +#endif + +#undef __d_printf +#undef d_fnstart +#undef d_fnend +#undef d_printf +#undef d_dump + +#define __d_printf(l, _tag, _dev, f, a...) \ +do { \ + struct device *__dev = (_dev); \ + if (D_MASTER && D_LOCAL >= (l)) { \ + char __head[64] = ""; \ + if (_dev != NULL) { \ + if ((unsigned long)__dev < 4096) \ + printk(KERN_ERR "E: Corrupt dev %p\n", \ + __dev); \ + else \ + snprintf(__head, sizeof(__head), \ + "%s %s: ", \ + dev_driver_string(__dev), \ + __dev->bus_id); \ + } \ + printk(KERN_ERR "%s%s" _tag ": " f, __head, \ + __func__, ## a); \ + } \ +} while (0 && _dev) + +#define d_fnstart(l, _dev, f, a...) \ + __d_printf(l, " FNSTART", _dev, f, ## a) +#define d_fnend(l, _dev, f, a...) \ + __d_printf(l, " FNEND", _dev, f, ## a) +#define d_printf(l, _dev, f, a...) \ + __d_printf(l, "", _dev, f, ## a) +#define d_dump(l, _dev, ptr, size) \ +do { \ + struct device *__dev = _dev; \ + if (D_MASTER && D_LOCAL >= (l)) \ + dump_bytes(__dev, ptr, size); \ +} while (0 && _dev) +#define d_test(l) (D_MASTER && D_LOCAL >= (l)) diff --git a/include/linux/uwb/spec.h b/include/linux/uwb/spec.h new file mode 100644 index 00000000000..198c15f8e25 --- /dev/null +++ b/include/linux/uwb/spec.h @@ -0,0 +1,727 @@ +/* + * Ultra Wide Band + * UWB Standard definitions + * + * Copyright (C) 2005-2006 Intel Corporation + * Inaky Perez-Gonzalez + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + * + * + * All these definitions are based on the ECMA-368 standard. + * + * Note all definitions are Little Endian in the wire, and we will + * convert them to host order before operating on the bitfields (that + * yes, we use extensively). + */ + +#ifndef __LINUX__UWB_SPEC_H__ +#define __LINUX__UWB_SPEC_H__ + +#include +#include + +#define i1480_FW 0x00000303 +/* #define i1480_FW 0x00000302 */ + +/** + * Number of Medium Access Slots in a superframe. + * + * UWB divides time in SuperFrames, each one divided in 256 pieces, or + * Medium Access Slots. See MBOA MAC[5.4.5] for details. The MAS is the + * basic bandwidth allocation unit in UWB. + */ +enum { UWB_NUM_MAS = 256 }; + +/** + * Number of Zones in superframe. + * + * UWB divides the superframe into zones with numbering starting from BPST. + * See MBOA MAC[16.8.6] + */ +enum { UWB_NUM_ZONES = 16 }; + +/* + * Number of MAS in a zone. + */ +#define UWB_MAS_PER_ZONE (UWB_NUM_MAS / UWB_NUM_ZONES) + +/* + * Number of streams per DRP reservation between a pair of devices. + * + * [ECMA-368] section 16.8.6. + */ +enum { UWB_NUM_STREAMS = 8 }; + +/* + * mMasLength + * + * The length of a MAS in microseconds. + * + * [ECMA-368] section 17.16. + */ +enum { UWB_MAS_LENGTH_US = 256 }; + +/* + * mBeaconSlotLength + * + * The length of the beacon slot in microseconds. + * + * [ECMA-368] section 17.16 + */ +enum { UWB_BEACON_SLOT_LENGTH_US = 85 }; + +/* + * mMaxLostBeacons + * + * The number beacons missing in consecutive superframes before a + * device can be considered as unreachable. + * + * [ECMA-368] section 17.16 + */ +enum { UWB_MAX_LOST_BEACONS = 3 }; + +/* + * Length of a superframe in microseconds. + */ +#define UWB_SUPERFRAME_LENGTH_US (UWB_MAS_LENGTH_US * UWB_NUM_MAS) + +/** + * UWB MAC address + * + * It is *imperative* that this struct is exactly 6 packed bytes (as + * it is also used to define headers sent down and up the wire/radio). + */ +struct uwb_mac_addr { + u8 data[6]; +} __attribute__((packed)); + + +/** + * UWB device address + * + * It is *imperative* that this struct is exactly 6 packed bytes (as + * it is also used to define headers sent down and up the wire/radio). + */ +struct uwb_dev_addr { + u8 data[2]; +} __attribute__((packed)); + + +/** + * Types of UWB addresses + * + * Order matters (by size). + */ +enum uwb_addr_type { + UWB_ADDR_DEV = 0, + UWB_ADDR_MAC = 1, +}; + + +/** Size of a char buffer for printing a MAC/device address */ +enum { UWB_ADDR_STRSIZE = 32 }; + + +/** UWB WiMedia protocol IDs. */ +enum uwb_prid { + UWB_PRID_WLP_RESERVED = 0x0000, + UWB_PRID_WLP = 0x0001, + UWB_PRID_WUSB_BOT = 0x0010, + UWB_PRID_WUSB = 0x0010, + UWB_PRID_WUSB_TOP = 0x001F, +}; + + +/** PHY Rate (MBOA MAC[7.8.12, Table 61]) */ +enum uwb_phy_rate { + UWB_PHY_RATE_53 = 0, + UWB_PHY_RATE_80, + UWB_PHY_RATE_106, + UWB_PHY_RATE_160, + UWB_PHY_RATE_200, + UWB_PHY_RATE_320, + UWB_PHY_RATE_400, + UWB_PHY_RATE_480, + UWB_PHY_RATE_INVALID +}; + + +/** + * Different ways to scan (MBOA MAC[6.2.2, Table 8], WUSB[Table 8-78]) + */ +enum uwb_scan_type { + UWB_SCAN_ONLY = 0, + UWB_SCAN_OUTSIDE_BP, + UWB_SCAN_WHILE_INACTIVE, + UWB_SCAN_DISABLED, + UWB_SCAN_ONLY_STARTTIME, + UWB_SCAN_TOP +}; + + +/** ACK Policy types (MBOA MAC[7.2.1.3]) */ +enum uwb_ack_pol { + UWB_ACK_NO = 0, + UWB_ACK_INM = 1, + UWB_ACK_B = 2, + UWB_ACK_B_REQ = 3, +}; + + +/** DRP reservation types ([ECMA-368 table 106) */ +enum uwb_drp_type { + UWB_DRP_TYPE_ALIEN_BP = 0, + UWB_DRP_TYPE_HARD, + UWB_DRP_TYPE_SOFT, + UWB_DRP_TYPE_PRIVATE, + UWB_DRP_TYPE_PCA, +}; + + +/** DRP Reason Codes ([ECMA-368] table 107) */ +enum uwb_drp_reason { + UWB_DRP_REASON_ACCEPTED = 0, + UWB_DRP_REASON_CONFLICT, + UWB_DRP_REASON_PENDING, + UWB_DRP_REASON_DENIED, + UWB_DRP_REASON_MODIFIED, +}; + +/** + * DRP Notification Reason Codes (WHCI 0.95 [3.1.4.9]) + */ +enum uwb_drp_notif_reason { + UWB_DRP_NOTIF_DRP_IE_RCVD = 0, + UWB_DRP_NOTIF_CONFLICT, + UWB_DRP_NOTIF_TERMINATE, +}; + + +/** Allocation of MAS slots in a DRP request MBOA MAC[7.8.7] */ +struct uwb_drp_alloc { + __le16 zone_bm; + __le16 mas_bm; +} __attribute__((packed)); + + +/** General MAC Header format (ECMA-368[16.2]) */ +struct uwb_mac_frame_hdr { + __le16 Frame_Control; + struct uwb_dev_addr DestAddr; + struct uwb_dev_addr SrcAddr; + __le16 Sequence_Control; + __le16 Access_Information; +} __attribute__((packed)); + + +/** + * uwb_beacon_frame - a beacon frame including MAC headers + * + * [ECMA] section 16.3. + */ +struct uwb_beacon_frame { + struct uwb_mac_frame_hdr hdr; + struct uwb_mac_addr Device_Identifier; /* may be a NULL EUI-48 */ + u8 Beacon_Slot_Number; + u8 Device_Control; + u8 IEData[]; +} __attribute__((packed)); + + +/** Information Element codes (MBOA MAC[T54]) */ +enum uwb_ie { + UWB_PCA_AVAILABILITY = 2, + UWB_IE_DRP_AVAILABILITY = 8, + UWB_IE_DRP = 9, + UWB_BP_SWITCH_IE = 11, + UWB_MAC_CAPABILITIES_IE = 12, + UWB_PHY_CAPABILITIES_IE = 13, + UWB_APP_SPEC_PROBE_IE = 15, + UWB_IDENTIFICATION_IE = 19, + UWB_MASTER_KEY_ID_IE = 20, + UWB_IE_WLP = 250, /* WiMedia Logical Link Control Protocol WLP 0.99 */ + UWB_APP_SPEC_IE = 255, +}; + + +/** + * Header common to all Information Elements (IEs) + */ +struct uwb_ie_hdr { + u8 element_id; /* enum uwb_ie */ + u8 length; +} __attribute__((packed)); + + +/** Dynamic Reservation Protocol IE (MBOA MAC[7.8.6]) */ +struct uwb_ie_drp { + struct uwb_ie_hdr hdr; + __le16 drp_control; + struct uwb_dev_addr dev_addr; + struct uwb_drp_alloc allocs[]; +} __attribute__((packed)); + +static inline int uwb_ie_drp_type(struct uwb_ie_drp *ie) +{ + return (le16_to_cpu(ie->drp_control) >> 0) & 0x7; +} + +static inline int uwb_ie_drp_stream_index(struct uwb_ie_drp *ie) +{ + return (le16_to_cpu(ie->drp_control) >> 3) & 0x7; +} + +static inline int uwb_ie_drp_reason_code(struct uwb_ie_drp *ie) +{ + return (le16_to_cpu(ie->drp_control) >> 6) & 0x7; +} + +static inline int uwb_ie_drp_status(struct uwb_ie_drp *ie) +{ + return (le16_to_cpu(ie->drp_control) >> 9) & 0x1; +} + +static inline int uwb_ie_drp_owner(struct uwb_ie_drp *ie) +{ + return (le16_to_cpu(ie->drp_control) >> 10) & 0x1; +} + +static inline int uwb_ie_drp_tiebreaker(struct uwb_ie_drp *ie) +{ + return (le16_to_cpu(ie->drp_control) >> 11) & 0x1; +} + +static inline int uwb_ie_drp_unsafe(struct uwb_ie_drp *ie) +{ + return (le16_to_cpu(ie->drp_control) >> 12) & 0x1; +} + +static inline void uwb_ie_drp_set_type(struct uwb_ie_drp *ie, enum uwb_drp_type type) +{ + u16 drp_control = le16_to_cpu(ie->drp_control); + drp_control = (drp_control & ~(0x7 << 0)) | (type << 0); + ie->drp_control = cpu_to_le16(drp_control); +} + +static inline void uwb_ie_drp_set_stream_index(struct uwb_ie_drp *ie, int stream_index) +{ + u16 drp_control = le16_to_cpu(ie->drp_control); + drp_control = (drp_control & ~(0x7 << 3)) | (stream_index << 3); + ie->drp_control = cpu_to_le16(drp_control); +} + +static inline void uwb_ie_drp_set_reason_code(struct uwb_ie_drp *ie, + enum uwb_drp_reason reason_code) +{ + u16 drp_control = le16_to_cpu(ie->drp_control); + drp_control = (ie->drp_control & ~(0x7 << 6)) | (reason_code << 6); + ie->drp_control = cpu_to_le16(drp_control); +} + +static inline void uwb_ie_drp_set_status(struct uwb_ie_drp *ie, int status) +{ + u16 drp_control = le16_to_cpu(ie->drp_control); + drp_control = (drp_control & ~(0x1 << 9)) | (status << 9); + ie->drp_control = cpu_to_le16(drp_control); +} + +static inline void uwb_ie_drp_set_owner(struct uwb_ie_drp *ie, int owner) +{ + u16 drp_control = le16_to_cpu(ie->drp_control); + drp_control = (drp_control & ~(0x1 << 10)) | (owner << 10); + ie->drp_control = cpu_to_le16(drp_control); +} + +static inline void uwb_ie_drp_set_tiebreaker(struct uwb_ie_drp *ie, int tiebreaker) +{ + u16 drp_control = le16_to_cpu(ie->drp_control); + drp_control = (drp_control & ~(0x1 << 11)) | (tiebreaker << 11); + ie->drp_control = cpu_to_le16(drp_control); +} + +static inline void uwb_ie_drp_set_unsafe(struct uwb_ie_drp *ie, int unsafe) +{ + u16 drp_control = le16_to_cpu(ie->drp_control); + drp_control = (drp_control & ~(0x1 << 12)) | (unsafe << 12); + ie->drp_control = cpu_to_le16(drp_control); +} + +/** Dynamic Reservation Protocol IE (MBOA MAC[7.8.7]) */ +struct uwb_ie_drp_avail { + struct uwb_ie_hdr hdr; + DECLARE_BITMAP(bmp, UWB_NUM_MAS); +} __attribute__((packed)); + +/** + * The Vendor ID is set to an OUI that indicates the vendor of the device. + * ECMA-368 [16.8.10] + */ +struct uwb_vendor_id { + u8 data[3]; +} __attribute__((packed)); + +/** + * The device type ID + * FIXME: clarify what this means + * ECMA-368 [16.8.10] + */ +struct uwb_device_type_id { + u8 data[3]; +} __attribute__((packed)); + + +/** + * UWB device information types + * ECMA-368 [16.8.10] + */ +enum uwb_dev_info_type { + UWB_DEV_INFO_VENDOR_ID = 0, + UWB_DEV_INFO_VENDOR_TYPE, + UWB_DEV_INFO_NAME, +}; + +/** + * UWB device information found in Identification IE + * ECMA-368 [16.8.10] + */ +struct uwb_dev_info { + u8 type; /* enum uwb_dev_info_type */ + u8 length; + u8 data[]; +} __attribute__((packed)); + +/** + * UWB Identification IE + * ECMA-368 [16.8.10] + */ +struct uwb_identification_ie { + struct uwb_ie_hdr hdr; + struct uwb_dev_info info[]; +} __attribute__((packed)); + +/* + * UWB Radio Controller + * + * These definitions are common to the Radio Control layers as + * exported by the WUSB1.0 HWA and WHCI interfaces. + */ + +/** Radio Control Command Block (WUSB1.0[Table 8-65] and WHCI 0.95) */ +struct uwb_rccb { + u8 bCommandType; /* enum hwa_cet */ + __le16 wCommand; /* Command code */ + u8 bCommandContext; /* Context ID */ +} __attribute__((packed)); + + +/** Radio Control Event Block (WUSB[table 8-66], WHCI 0.95) */ +struct uwb_rceb { + u8 bEventType; /* enum hwa_cet */ + __le16 wEvent; /* Event code */ + u8 bEventContext; /* Context ID */ +} __attribute__((packed)); + + +enum { + UWB_RC_CET_GENERAL = 0, /* General Command/Event type */ + UWB_RC_CET_EX_TYPE_1 = 1, /* Extended Type 1 Command/Event type */ +}; + +/* Commands to the radio controller */ +enum uwb_rc_cmd { + UWB_RC_CMD_CHANNEL_CHANGE = 16, + UWB_RC_CMD_DEV_ADDR_MGMT = 17, /* Device Address Management */ + UWB_RC_CMD_GET_IE = 18, /* GET Information Elements */ + UWB_RC_CMD_RESET = 19, + UWB_RC_CMD_SCAN = 20, /* Scan management */ + UWB_RC_CMD_SET_BEACON_FILTER = 21, + UWB_RC_CMD_SET_DRP_IE = 22, /* Dynamic Reservation Protocol IEs */ + UWB_RC_CMD_SET_IE = 23, /* Information Element management */ + UWB_RC_CMD_SET_NOTIFICATION_FILTER = 24, + UWB_RC_CMD_SET_TX_POWER = 25, + UWB_RC_CMD_SLEEP = 26, + UWB_RC_CMD_START_BEACON = 27, + UWB_RC_CMD_STOP_BEACON = 28, + UWB_RC_CMD_BP_MERGE = 29, + UWB_RC_CMD_SEND_COMMAND_FRAME = 30, + UWB_RC_CMD_SET_ASIE_NOTIF = 31, +}; + +/* Notifications from the radio controller */ +enum uwb_rc_evt { + UWB_RC_EVT_IE_RCV = 0, + UWB_RC_EVT_BEACON = 1, + UWB_RC_EVT_BEACON_SIZE = 2, + UWB_RC_EVT_BPOIE_CHANGE = 3, + UWB_RC_EVT_BP_SLOT_CHANGE = 4, + UWB_RC_EVT_BP_SWITCH_IE_RCV = 5, + UWB_RC_EVT_DEV_ADDR_CONFLICT = 6, + UWB_RC_EVT_DRP_AVAIL = 7, + UWB_RC_EVT_DRP = 8, + UWB_RC_EVT_BP_SWITCH_STATUS = 9, + UWB_RC_EVT_CMD_FRAME_RCV = 10, + UWB_RC_EVT_CHANNEL_CHANGE_IE_RCV = 11, + /* Events (command responses) use the same code as the command */ + UWB_RC_EVT_UNKNOWN_CMD_RCV = 65535, +}; + +enum uwb_rc_extended_type_1_cmd { + UWB_RC_SET_DAA_ENERGY_MASK = 32, + UWB_RC_SET_NOTIFICATION_FILTER_EX = 33, +}; + +enum uwb_rc_extended_type_1_evt { + UWB_RC_DAA_ENERGY_DETECTED = 0, +}; + +/* Radio Control Result Code. [WHCI] table 3-3. */ +enum { + UWB_RC_RES_SUCCESS = 0, + UWB_RC_RES_FAIL, + UWB_RC_RES_FAIL_HARDWARE, + UWB_RC_RES_FAIL_NO_SLOTS, + UWB_RC_RES_FAIL_BEACON_TOO_LARGE, + UWB_RC_RES_FAIL_INVALID_PARAMETER, + UWB_RC_RES_FAIL_UNSUPPORTED_PWR_LEVEL, + UWB_RC_RES_FAIL_INVALID_IE_DATA, + UWB_RC_RES_FAIL_BEACON_SIZE_EXCEEDED, + UWB_RC_RES_FAIL_CANCELLED, + UWB_RC_RES_FAIL_INVALID_STATE, + UWB_RC_RES_FAIL_INVALID_SIZE, + UWB_RC_RES_FAIL_ACK_NOT_RECEIVED, + UWB_RC_RES_FAIL_NO_MORE_ASIE_NOTIF, + UWB_RC_RES_FAIL_TIME_OUT = 255, +}; + +/* Confirm event. [WHCI] section 3.1.3.1 etc. */ +struct uwb_rc_evt_confirm { + struct uwb_rceb rceb; + u8 bResultCode; +} __attribute__((packed)); + +/* Device Address Management event. [WHCI] section 3.1.3.2. */ +struct uwb_rc_evt_dev_addr_mgmt { + struct uwb_rceb rceb; + u8 baAddr[6]; + u8 bResultCode; +} __attribute__((packed)); + + +/* Get IE Event. [WHCI] section 3.1.3.3. */ +struct uwb_rc_evt_get_ie { + struct uwb_rceb rceb; + __le16 wIELength; + u8 IEData[]; +} __attribute__((packed)); + +/* Set DRP IE Event. [WHCI] section 3.1.3.7. */ +struct uwb_rc_evt_set_drp_ie { + struct uwb_rceb rceb; + __le16 wRemainingSpace; + u8 bResultCode; +} __attribute__((packed)); + +/* Set IE Event. [WHCI] section 3.1.3.8. */ +struct uwb_rc_evt_set_ie { + struct uwb_rceb rceb; + __le16 RemainingSpace; + u8 bResultCode; +} __attribute__((packed)); + +/* Scan command. [WHCI] 3.1.3.5. */ +struct uwb_rc_cmd_scan { + struct uwb_rccb rccb; + u8 bChannelNumber; + u8 bScanState; + __le16 wStartTime; +} __attribute__((packed)); + +/* Set DRP IE command. [WHCI] section 3.1.3.7. */ +struct uwb_rc_cmd_set_drp_ie { + struct uwb_rccb rccb; + __le16 wIELength; + struct uwb_ie_drp IEData[]; +} __attribute__((packed)); + +/* Set IE command. [WHCI] section 3.1.3.8. */ +struct uwb_rc_cmd_set_ie { + struct uwb_rccb rccb; + __le16 wIELength; + u8 IEData[]; +} __attribute__((packed)); + +/* Set DAA Energy Mask event. [WHCI 0.96] section 3.1.3.17. */ +struct uwb_rc_evt_set_daa_energy_mask { + struct uwb_rceb rceb; + __le16 wLength; + u8 result; +} __attribute__((packed)); + +/* Set Notification Filter Extended event. [WHCI 0.96] section 3.1.3.18. */ +struct uwb_rc_evt_set_notification_filter_ex { + struct uwb_rceb rceb; + __le16 wLength; + u8 result; +} __attribute__((packed)); + +/* IE Received notification. [WHCI] section 3.1.4.1. */ +struct uwb_rc_evt_ie_rcv { + struct uwb_rceb rceb; + struct uwb_dev_addr SrcAddr; + __le16 wIELength; + u8 IEData[]; +} __attribute__((packed)); + +/* Type of the received beacon. [WHCI] section 3.1.4.2. */ +enum uwb_rc_beacon_type { + UWB_RC_BEACON_TYPE_SCAN = 0, + UWB_RC_BEACON_TYPE_NEIGHBOR, + UWB_RC_BEACON_TYPE_OL_ALIEN, + UWB_RC_BEACON_TYPE_NOL_ALIEN, +}; + +/* Beacon received notification. [WHCI] 3.1.4.2. */ +struct uwb_rc_evt_beacon { + struct uwb_rceb rceb; + u8 bChannelNumber; + u8 bBeaconType; + __le16 wBPSTOffset; + u8 bLQI; + u8 bRSSI; + __le16 wBeaconInfoLength; + u8 BeaconInfo[]; +} __attribute__((packed)); + + +/* Beacon Size Change notification. [WHCI] section 3.1.4.3 */ +struct uwb_rc_evt_beacon_size { + struct uwb_rceb rceb; + __le16 wNewBeaconSize; +} __attribute__((packed)); + + +/* BPOIE Change notification. [WHCI] section 3.1.4.4. */ +struct uwb_rc_evt_bpoie_change { + struct uwb_rceb rceb; + __le16 wBPOIELength; + u8 BPOIE[]; +} __attribute__((packed)); + + +/* Beacon Slot Change notification. [WHCI] section 3.1.4.5. */ +struct uwb_rc_evt_bp_slot_change { + struct uwb_rceb rceb; + u8 slot_info; +} __attribute__((packed)); + +static inline int uwb_rc_evt_bp_slot_change_slot_num( + const struct uwb_rc_evt_bp_slot_change *evt) +{ + return evt->slot_info & 0x7f; +} + +static inline int uwb_rc_evt_bp_slot_change_no_slot( + const struct uwb_rc_evt_bp_slot_change *evt) +{ + return (evt->slot_info & 0x80) >> 7; +} + +/* BP Switch IE Received notification. [WHCI] section 3.1.4.6. */ +struct uwb_rc_evt_bp_switch_ie_rcv { + struct uwb_rceb rceb; + struct uwb_dev_addr wSrcAddr; + __le16 wIELength; + u8 IEData[]; +} __attribute__((packed)); + +/* DevAddr Conflict notification. [WHCI] section 3.1.4.7. */ +struct uwb_rc_evt_dev_addr_conflict { + struct uwb_rceb rceb; +} __attribute__((packed)); + +/* DRP notification. [WHCI] section 3.1.4.9. */ +struct uwb_rc_evt_drp { + struct uwb_rceb rceb; + struct uwb_dev_addr src_addr; + u8 reason; + u8 beacon_slot_number; + __le16 ie_length; + u8 ie_data[]; +} __attribute__((packed)); + +static inline enum uwb_drp_notif_reason uwb_rc_evt_drp_reason(struct uwb_rc_evt_drp *evt) +{ + return evt->reason & 0x0f; +} + + +/* DRP Availability Change notification. [WHCI] section 3.1.4.8. */ +struct uwb_rc_evt_drp_avail { + struct uwb_rceb rceb; + DECLARE_BITMAP(bmp, UWB_NUM_MAS); +} __attribute__((packed)); + +/* BP switch status notification. [WHCI] section 3.1.4.10. */ +struct uwb_rc_evt_bp_switch_status { + struct uwb_rceb rceb; + u8 status; + u8 slot_offset; + __le16 bpst_offset; + u8 move_countdown; +} __attribute__((packed)); + +/* Command Frame Received notification. [WHCI] section 3.1.4.11. */ +struct uwb_rc_evt_cmd_frame_rcv { + struct uwb_rceb rceb; + __le16 receive_time; + struct uwb_dev_addr wSrcAddr; + struct uwb_dev_addr wDstAddr; + __le16 control; + __le16 reserved; + __le16 dataLength; + u8 data[]; +} __attribute__((packed)); + +/* Channel Change IE Received notification. [WHCI] section 3.1.4.12. */ +struct uwb_rc_evt_channel_change_ie_rcv { + struct uwb_rceb rceb; + struct uwb_dev_addr wSrcAddr; + __le16 wIELength; + u8 IEData[]; +} __attribute__((packed)); + +/* DAA Energy Detected notification. [WHCI 0.96] section 3.1.4.14. */ +struct uwb_rc_evt_daa_energy_detected { + struct uwb_rceb rceb; + __le16 wLength; + u8 bandID; + u8 reserved; + u8 toneBmp[16]; +} __attribute__((packed)); + + +/** + * Radio Control Interface Class Descriptor + * + * WUSB 1.0 [8.6.1.2] + */ +struct uwb_rc_control_intf_class_desc { + u8 bLength; + u8 bDescriptorType; + __le16 bcdRCIVersion; +} __attribute__((packed)); + +#endif /* #ifndef __LINUX__UWB_SPEC_H__ */ diff --git a/include/linux/wlp.h b/include/linux/wlp.h new file mode 100644 index 00000000000..033545e145c --- /dev/null +++ b/include/linux/wlp.h @@ -0,0 +1,735 @@ +/* + * WiMedia Logical Link Control Protocol (WLP) + * + * Copyright (C) 2005-2006 Intel Corporation + * Reinette Chatre + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + * + * + * FIXME: docs + * + * - Does not (yet) include support for WLP control frames + * WLP Draft 0.99 [6.5]. + * + * A visual representation of the data structures. + * + * wssidB wssidB + * ^ ^ + * | | + * wssidA wssidA + * wlp interface { ^ ^ + * ... | | + * ... ... wssid wssid ... + * wlp --- ... | | + * }; neighbors --> neighbA --> neighbB + * ... + * wss + * ... + * eda cache --> neighborA --> neighborB --> neighborC ... + */ + +#ifndef __LINUX__WLP_H_ +#define __LINUX__WLP_H_ + +#include +#include +#include +#include + +/** + * WLP Protocol ID + * WLP Draft 0.99 [6.2] + * + * The MUX header for all WLP frames + */ +#define WLP_PROTOCOL_ID 0x0100 + +/** + * WLP Version + * WLP version placed in the association frames (WLP 0.99 [6.6]) + */ +#define WLP_VERSION 0x10 + +/** + * Bytes needed to print UUID as string + */ +#define WLP_WSS_UUID_STRSIZE 48 + +/** + * Bytes needed to print nonce as string + */ +#define WLP_WSS_NONCE_STRSIZE 48 + + +/** + * Size used for WLP name size + * + * The WSS name is set to 65 bytes, 1 byte larger than the maximum + * allowed by the WLP spec. This is to have a null terminated string + * for display to the user. A maximum of 64 bytes will still be used + * when placing the WSS name field in association frames. + */ +#define WLP_WSS_NAME_SIZE 65 + +/** + * Number of bytes added by WLP to data frame + * + * A data frame transmitted from a host will be placed in a Standard or + * Abbreviated WLP frame. These have an extra 4 bytes of header (struct + * wlp_frame_std_abbrv_hdr). + * When the stack sends this data frame for transmission it needs to ensure + * there is enough headroom for this header. + */ +#define WLP_DATA_HLEN 4 + +/** + * State of device regarding WLP Service Set + * + * WLP_WSS_STATE_NONE: the host does not participate in any WSS + * WLP_WSS_STATE_PART_ENROLLED: used as part of the enrollment sequence + * ("Partial Enroll"). This state is used to + * indicate the first part of enrollment that is + * unsecure. If the WSS is unsecure then the + * state will promptly go to WLP_WSS_STATE_ENROLLED, + * if the WSS is not secure then the enrollment + * procedure is a few more steps before we are + * enrolled. + * WLP_WSS_STATE_ENROLLED: the host is enrolled in a WSS + * WLP_WSS_STATE_ACTIVE: WSS is activated + * WLP_WSS_STATE_CONNECTED: host is connected to neighbor in WSS + * + */ +enum wlp_wss_state { + WLP_WSS_STATE_NONE = 0, + WLP_WSS_STATE_PART_ENROLLED, + WLP_WSS_STATE_ENROLLED, + WLP_WSS_STATE_ACTIVE, + WLP_WSS_STATE_CONNECTED, +}; + +/** + * WSS Secure status + * WLP 0.99 Table 6 + * + * Set to one if the WSS is secure, zero if it is not secure + */ +enum wlp_wss_sec_status { + WLP_WSS_UNSECURE = 0, + WLP_WSS_SECURE, +}; + +/** + * WLP frame type + * WLP Draft 0.99 [6.2 Table 1] + */ +enum wlp_frame_type { + WLP_FRAME_STANDARD = 0, + WLP_FRAME_ABBREVIATED, + WLP_FRAME_CONTROL, + WLP_FRAME_ASSOCIATION, +}; + +/** + * WLP Association Message Type + * WLP Draft 0.99 [6.6.1.2 Table 8] + */ +enum wlp_assoc_type { + WLP_ASSOC_D1 = 2, + WLP_ASSOC_D2 = 3, + WLP_ASSOC_M1 = 4, + WLP_ASSOC_M2 = 5, + WLP_ASSOC_M3 = 7, + WLP_ASSOC_M4 = 8, + WLP_ASSOC_M5 = 9, + WLP_ASSOC_M6 = 10, + WLP_ASSOC_M7 = 11, + WLP_ASSOC_M8 = 12, + WLP_ASSOC_F0 = 14, + WLP_ASSOC_E1 = 32, + WLP_ASSOC_E2 = 33, + WLP_ASSOC_C1 = 34, + WLP_ASSOC_C2 = 35, + WLP_ASSOC_C3 = 36, + WLP_ASSOC_C4 = 37, +}; + +/** + * WLP Attribute Type + * WLP Draft 0.99 [6.6.1 Table 6] + */ +enum wlp_attr_type { + WLP_ATTR_AUTH = 0x1005, /* Authenticator */ + WLP_ATTR_DEV_NAME = 0x1011, /* Device Name */ + WLP_ATTR_DEV_PWD_ID = 0x1012, /* Device Password ID */ + WLP_ATTR_E_HASH1 = 0x1014, /* E-Hash1 */ + WLP_ATTR_E_HASH2 = 0x1015, /* E-Hash2 */ + WLP_ATTR_E_SNONCE1 = 0x1016, /* E-SNonce1 */ + WLP_ATTR_E_SNONCE2 = 0x1017, /* E-SNonce2 */ + WLP_ATTR_ENCR_SET = 0x1018, /* Encrypted Settings */ + WLP_ATTR_ENRL_NONCE = 0x101A, /* Enrollee Nonce */ + WLP_ATTR_KEYWRAP_AUTH = 0x101E, /* Key Wrap Authenticator */ + WLP_ATTR_MANUF = 0x1021, /* Manufacturer */ + WLP_ATTR_MSG_TYPE = 0x1022, /* Message Type */ + WLP_ATTR_MODEL_NAME = 0x1023, /* Model Name */ + WLP_ATTR_MODEL_NR = 0x1024, /* Model Number */ + WLP_ATTR_PUB_KEY = 0x1032, /* Public Key */ + WLP_ATTR_REG_NONCE = 0x1039, /* Registrar Nonce */ + WLP_ATTR_R_HASH1 = 0x103D, /* R-Hash1 */ + WLP_ATTR_R_HASH2 = 0x103E, /* R-Hash2 */ + WLP_ATTR_R_SNONCE1 = 0x103F, /* R-SNonce1 */ + WLP_ATTR_R_SNONCE2 = 0x1040, /* R-SNonce2 */ + WLP_ATTR_SERIAL = 0x1042, /* Serial number */ + WLP_ATTR_UUID_E = 0x1047, /* UUID-E */ + WLP_ATTR_UUID_R = 0x1048, /* UUID-R */ + WLP_ATTR_PRI_DEV_TYPE = 0x1054, /* Primary Device Type */ + WLP_ATTR_SEC_DEV_TYPE = 0x1055, /* Secondary Device Type */ + WLP_ATTR_PORT_DEV = 0x1056, /* Portable Device */ + WLP_ATTR_APP_EXT = 0x1058, /* Application Extension */ + WLP_ATTR_WLP_VER = 0x2000, /* WLP Version */ + WLP_ATTR_WSSID = 0x2001, /* WSSID */ + WLP_ATTR_WSS_NAME = 0x2002, /* WSS Name */ + WLP_ATTR_WSS_SEC_STAT = 0x2003, /* WSS Secure Status */ + WLP_ATTR_WSS_BCAST = 0x2004, /* WSS Broadcast Address */ + WLP_ATTR_WSS_M_KEY = 0x2005, /* WSS Master Key */ + WLP_ATTR_ACC_ENRL = 0x2006, /* Accepting Enrollment */ + WLP_ATTR_WSS_INFO = 0x2007, /* WSS Information */ + WLP_ATTR_WSS_SEL_MTHD = 0x2008, /* WSS Selection Method */ + WLP_ATTR_ASSC_MTHD_LIST = 0x2009, /* Association Methods List */ + WLP_ATTR_SEL_ASSC_MTHD = 0x200A, /* Selected Association Method */ + WLP_ATTR_ENRL_HASH_COMM = 0x200B, /* Enrollee Hash Commitment */ + WLP_ATTR_WSS_TAG = 0x200C, /* WSS Tag */ + WLP_ATTR_WSS_VIRT = 0x200D, /* WSS Virtual EUI-48 */ + WLP_ATTR_WLP_ASSC_ERR = 0x200E, /* WLP Association Error */ + WLP_ATTR_VNDR_EXT = 0x200F, /* Vendor Extension */ +}; + +/** + * WLP Category ID of primary/secondary device + * WLP Draft 0.99 [6.6.1.8 Table 12] + */ +enum wlp_dev_category_id { + WLP_DEV_CAT_COMPUTER = 1, + WLP_DEV_CAT_INPUT, + WLP_DEV_CAT_PRINT_SCAN_FAX_COPIER, + WLP_DEV_CAT_CAMERA, + WLP_DEV_CAT_STORAGE, + WLP_DEV_CAT_INFRASTRUCTURE, + WLP_DEV_CAT_DISPLAY, + WLP_DEV_CAT_MULTIM, + WLP_DEV_CAT_GAMING, + WLP_DEV_CAT_TELEPHONE, + WLP_DEV_CAT_OTHER = 65535, +}; + +/** + * WLP WSS selection method + * WLP Draft 0.99 [6.6.1.6 Table 10] + */ +enum wlp_wss_sel_mthd { + WLP_WSS_ENRL_SELECT = 1, /* Enrollee selects */ + WLP_WSS_REG_SELECT, /* Registrar selects */ +}; + +/** + * WLP association error values + * WLP Draft 0.99 [6.6.1.5 Table 9] + */ +enum wlp_assc_error { + WLP_ASSOC_ERROR_NONE, + WLP_ASSOC_ERROR_AUTH, /* Authenticator Failure */ + WLP_ASSOC_ERROR_ROGUE, /* Rogue activity suspected */ + WLP_ASSOC_ERROR_BUSY, /* Device busy */ + WLP_ASSOC_ERROR_LOCK, /* Setup Locked */ + WLP_ASSOC_ERROR_NOT_READY, /* Registrar not ready */ + WLP_ASSOC_ERROR_INV, /* Invalid WSS selection */ + WLP_ASSOC_ERROR_MSG_TIME, /* Message timeout */ + WLP_ASSOC_ERROR_ENR_TIME, /* Enrollment session timeout */ + WLP_ASSOC_ERROR_PW, /* Device password invalid */ + WLP_ASSOC_ERROR_VER, /* Unsupported version */ + WLP_ASSOC_ERROR_INT, /* Internal error */ + WLP_ASSOC_ERROR_UNDEF, /* Undefined error */ + WLP_ASSOC_ERROR_NUM, /* Numeric comparison failure */ + WLP_ASSOC_ERROR_WAIT, /* Waiting for user input */ +}; + +/** + * WLP Parameters + * WLP 0.99 [7.7] + */ +enum wlp_parameters { + WLP_PER_MSG_TIMEOUT = 15, /* Seconds to wait for response to + association message. */ +}; + +/** + * WLP IE + * + * The WLP IE should be included in beacons by all devices. + * + * The driver can set only a few of the fields in this information element, + * most fields are managed by the device self. When the driver needs to set + * a field it will only provide values for the fields of interest, the rest + * will be filled with zeroes. The fields of interest are: + * + * Element ID + * Length + * Capabilities (only to include WSSID Hash list length) + * WSSID Hash List fields + * + * WLP 0.99 [6.7] + * + * Only the fields that will be used are detailed in this structure, rest + * are not detailed or marked as "notused". + */ +struct wlp_ie { + struct uwb_ie_hdr hdr; + __le16 capabilities; + __le16 cycle_param; + __le16 acw_anchor_addr; + u8 wssid_hash_list[]; +} __attribute__((packed)); + +static inline int wlp_ie_hash_length(struct wlp_ie *ie) +{ + return (le16_to_cpu(ie->capabilities) >> 12) & 0xf; +} + +static inline void wlp_ie_set_hash_length(struct wlp_ie *ie, int hash_length) +{ + u16 caps = le16_to_cpu(ie->capabilities); + caps = (caps & ~(0xf << 12)) | (hash_length << 12); + ie->capabilities = cpu_to_le16(caps); +} + +/** + * WLP nonce + * WLP Draft 0.99 [6.6.1 Table 6] + * + * A 128-bit random number often used (E-SNonce1, E-SNonce2, Enrollee + * Nonce, Registrar Nonce, R-SNonce1, R-SNonce2). It is passed to HW so + * it is packed. + */ +struct wlp_nonce { + u8 data[16]; +} __attribute__((packed)); + +/** + * WLP UUID + * WLP Draft 0.99 [6.6.1 Table 6] + * + * Universally Unique Identifier (UUID) encoded as an octet string in the + * order the octets are shown in string representation in RFC4122. A UUID + * is often used (UUID-E, UUID-R, WSSID). It is passed to HW so it is packed. + */ +struct wlp_uuid { + u8 data[16]; +} __attribute__((packed)); + + +/** + * Primary and secondary device type attributes + * WLP Draft 0.99 [6.6.1.8] + */ +struct wlp_dev_type { + enum wlp_dev_category_id category:16; + u8 OUI[3]; + u8 OUIsubdiv; + __le16 subID; +} __attribute__((packed)); + +/** + * WLP frame header + * WLP Draft 0.99 [6.2] + */ +struct wlp_frame_hdr { + __le16 mux_hdr; /* WLP_PROTOCOL_ID */ + enum wlp_frame_type type:8; +} __attribute__((packed)); + +/** + * WLP attribute field header + * WLP Draft 0.99 [6.6.1] + * + * Header of each attribute found in an association frame + */ +struct wlp_attr_hdr { + __le16 type; + __le16 length; +} __attribute__((packed)); + +/** + * Device information commonly used together + * + * Each of these device information elements has a specified range in which it + * should fit (WLP 0.99 [Table 6]). This range provided in the spec does not + * include the termination null '\0' character (when used in the + * association protocol the attribute fields are accompanied + * with a "length" field so the full range from the spec can be used for + * the value). We thus allocate an extra byte to be able to store a string + * of max length with a terminating '\0'. + */ +struct wlp_device_info { + char name[33]; + char model_name[33]; + char manufacturer[65]; + char model_nr[33]; + char serial[33]; + struct wlp_dev_type prim_dev_type; +}; + +/** + * Macros for the WLP attributes + * + * There are quite a few attributes (total is 43). The attribute layout can be + * in one of three categories: one value, an array, an enum forced to 8 bits. + * These macros help with their definitions. + */ +#define wlp_attr(type, name) \ +struct wlp_attr_##name { \ + struct wlp_attr_hdr hdr; \ + type name; \ +} __attribute__((packed)); + +#define wlp_attr_array(type, name) \ +struct wlp_attr_##name { \ + struct wlp_attr_hdr hdr; \ + type name[]; \ +} __attribute__((packed)); + +/** + * WLP association attribute fields + * WLP Draft 0.99 [6.6.1 Table 6] + * + * Attributes appear in same order as the Table in the spec + * FIXME Does not define all attributes yet + */ + +/* Device name: Friendly name of sending device */ +wlp_attr_array(u8, dev_name) + +/* Enrollee Nonce: Random number generated by enrollee for an enrollment + * session */ +wlp_attr(struct wlp_nonce, enonce) + +/* Manufacturer name: Name of manufacturer of the sending device */ +wlp_attr_array(u8, manufacturer) + +/* WLP Message Type */ +wlp_attr(u8, msg_type) + +/* WLP Model name: Model name of sending device */ +wlp_attr_array(u8, model_name) + +/* WLP Model number: Model number of sending device */ +wlp_attr_array(u8, model_nr) + +/* Registrar Nonce: Random number generated by registrar for an enrollment + * session */ +wlp_attr(struct wlp_nonce, rnonce) + +/* Serial number of device */ +wlp_attr_array(u8, serial) + +/* UUID of enrollee */ +wlp_attr(struct wlp_uuid, uuid_e) + +/* UUID of registrar */ +wlp_attr(struct wlp_uuid, uuid_r) + +/* WLP Primary device type */ +wlp_attr(struct wlp_dev_type, prim_dev_type) + +/* WLP Secondary device type */ +wlp_attr(struct wlp_dev_type, sec_dev_type) + +/* WLP protocol version */ +wlp_attr(u8, version) + +/* WLP service set identifier */ +wlp_attr(struct wlp_uuid, wssid) + +/* WLP WSS name */ +wlp_attr_array(u8, wss_name) + +/* WLP WSS Secure Status */ +wlp_attr(u8, wss_sec_status) + +/* WSS Broadcast Address */ +wlp_attr(struct uwb_mac_addr, wss_bcast) + +/* WLP Accepting Enrollment */ +wlp_attr(u8, accept_enrl) + +/** + * WSS information attributes + * WLP Draft 0.99 [6.6.3 Table 15] + */ +struct wlp_wss_info { + struct wlp_attr_wssid wssid; + struct wlp_attr_wss_name name; + struct wlp_attr_accept_enrl accept; + struct wlp_attr_wss_sec_status sec_stat; + struct wlp_attr_wss_bcast bcast; +} __attribute__((packed)); + +/* WLP WSS Information */ +wlp_attr_array(struct wlp_wss_info, wss_info) + +/* WLP WSS Selection method */ +wlp_attr(u8, wss_sel_mthd) + +/* WLP WSS tag */ +wlp_attr(u8, wss_tag) + +/* WSS Virtual Address */ +wlp_attr(struct uwb_mac_addr, wss_virt) + +/* WLP association error */ +wlp_attr(u8, wlp_assc_err) + +/** + * WLP standard and abbreviated frames + * + * WLP Draft 0.99 [6.3] and [6.4] + * + * The difference between the WLP standard frame and the WLP + * abbreviated frame is that the standard frame includes the src + * and dest addresses from the Ethernet header, the abbreviated frame does + * not. + * The src/dest (as well as the type/length and client data) are already + * defined as part of the Ethernet header, we do not do this here. + * From this perspective the standard and abbreviated frames appear the + * same - they will be treated differently though. + * + * The size of this header is also captured in WLP_DATA_HLEN to enable + * interfaces to prepare their headroom. + */ +struct wlp_frame_std_abbrv_hdr { + struct wlp_frame_hdr hdr; + u8 tag; +} __attribute__((packed)); + +/** + * WLP association frames + * + * WLP Draft 0.99 [6.6] + */ +struct wlp_frame_assoc { + struct wlp_frame_hdr hdr; + enum wlp_assoc_type type:8; + struct wlp_attr_version version; + struct wlp_attr_msg_type msg_type; + u8 attr[]; +} __attribute__((packed)); + +/* Ethernet to dev address mapping */ +struct wlp_eda { + spinlock_t lock; + struct list_head cache; /* Eth<->Dev Addr cache */ +}; + +/** + * WSS information temporary storage + * + * This information is only stored temporarily during discovery. It should + * not be stored unless the device is enrolled in the advertised WSS. This + * is done mainly because we follow the letter of the spec in this regard. + * See WLP 0.99 [7.2.3]. + * When the device does become enrolled in a WSS the WSS information will + * be stored as part of the more comprehensive struct wlp_wss. + */ +struct wlp_wss_tmp_info { + char name[WLP_WSS_NAME_SIZE]; + u8 accept_enroll; + u8 sec_status; + struct uwb_mac_addr bcast; +}; + +struct wlp_wssid_e { + struct list_head node; + struct wlp_uuid wssid; + struct wlp_wss_tmp_info *info; +}; + +/** + * A cache entry of WLP neighborhood + * + * @node: head of list is wlp->neighbors + * @wssid: list of wssids of this neighbor, element is wlp_wssid_e + * @info: temporary storage for information learned during discovery. This + * storage is used together with the wssid_e temporary storage + * during discovery. + */ +struct wlp_neighbor_e { + struct list_head node; + struct wlp_uuid uuid; + struct uwb_dev *uwb_dev; + struct list_head wssid; /* Elements are wlp_wssid_e */ + struct wlp_device_info *info; +}; + +struct wlp; +/** + * Information for an association session in progress. + * + * @exp_message: The type of the expected message. Both this message and a + * F0 message (which can be sent in response to any + * association frame) will be accepted as a valid message for + * this session. + * @cb: The function that will be called upon receipt of this + * message. + * @cb_priv: Private data of callback + * @data: Data used in association process (always a sk_buff?) + * @neighbor: Address of neighbor with which association session is in + * progress. + */ +struct wlp_session { + enum wlp_assoc_type exp_message; + void (*cb)(struct wlp *); + void *cb_priv; + void *data; + struct uwb_dev_addr neighbor_addr; +}; + +/** + * WLP Service Set + * + * @mutex: used to protect entire WSS structure. + * + * @name: The WSS name is set to 65 bytes, 1 byte larger than the maximum + * allowed by the WLP spec. This is to have a null terminated string + * for display to the user. A maximum of 64 bytes will still be used + * when placing the WSS name field in association frames. + * + * @accept_enroll: Accepting enrollment: Set to one if registrar is + * accepting enrollment in WSS, or zero otherwise. + * + * Global and local information for each WSS in which we are enrolled. + * WLP 0.99 Section 7.2.1 and Section 7.2.2 + */ +struct wlp_wss { + struct mutex mutex; + struct kobject kobj; + /* Global properties. */ + struct wlp_uuid wssid; + u8 hash; + char name[WLP_WSS_NAME_SIZE]; + struct uwb_mac_addr bcast; + u8 secure_status:1; + u8 master_key[16]; + /* Local properties. */ + u8 tag; + struct uwb_mac_addr virtual_addr; + /* Extra */ + u8 accept_enroll:1; + enum wlp_wss_state state; +}; + +/** + * WLP main structure + * @mutex: protect changes to WLP structure. We only allow changes to the + * uuid, so currently this mutex only protects this field. + */ +struct wlp { + struct mutex mutex; + struct uwb_rc *rc; /* UWB radio controller */ + struct uwb_pal pal; + struct wlp_eda eda; + struct wlp_uuid uuid; + struct wlp_session *session; + struct wlp_wss wss; + struct mutex nbmutex; /* Neighbor mutex protects neighbors list */ + struct list_head neighbors; /* Elements are wlp_neighbor_e */ + struct uwb_notifs_handler uwb_notifs_handler; + struct wlp_device_info *dev_info; + void (*fill_device_info)(struct wlp *wlp, struct wlp_device_info *info); + int (*xmit_frame)(struct wlp *, struct sk_buff *, + struct uwb_dev_addr *); + void (*stop_queue)(struct wlp *); + void (*start_queue)(struct wlp *); +}; + +/* sysfs */ + + +struct wlp_wss_attribute { + struct attribute attr; + ssize_t (*show)(struct wlp_wss *wss, char *buf); + ssize_t (*store)(struct wlp_wss *wss, const char *buf, size_t count); +}; + +#define WSS_ATTR(_name, _mode, _show, _store) \ +static struct wlp_wss_attribute wss_attr_##_name = __ATTR(_name, _mode, \ + _show, _store) + +extern int wlp_setup(struct wlp *, struct uwb_rc *); +extern void wlp_remove(struct wlp *); +extern ssize_t wlp_neighborhood_show(struct wlp *, char *); +extern int wlp_wss_setup(struct net_device *, struct wlp_wss *); +extern void wlp_wss_remove(struct wlp_wss *); +extern ssize_t wlp_wss_activate_show(struct wlp_wss *, char *); +extern ssize_t wlp_wss_activate_store(struct wlp_wss *, const char *, size_t); +extern ssize_t wlp_eda_show(struct wlp *, char *); +extern ssize_t wlp_eda_store(struct wlp *, const char *, size_t); +extern ssize_t wlp_uuid_show(struct wlp *, char *); +extern ssize_t wlp_uuid_store(struct wlp *, const char *, size_t); +extern ssize_t wlp_dev_name_show(struct wlp *, char *); +extern ssize_t wlp_dev_name_store(struct wlp *, const char *, size_t); +extern ssize_t wlp_dev_manufacturer_show(struct wlp *, char *); +extern ssize_t wlp_dev_manufacturer_store(struct wlp *, const char *, size_t); +extern ssize_t wlp_dev_model_name_show(struct wlp *, char *); +extern ssize_t wlp_dev_model_name_store(struct wlp *, const char *, size_t); +extern ssize_t wlp_dev_model_nr_show(struct wlp *, char *); +extern ssize_t wlp_dev_model_nr_store(struct wlp *, const char *, size_t); +extern ssize_t wlp_dev_serial_show(struct wlp *, char *); +extern ssize_t wlp_dev_serial_store(struct wlp *, const char *, size_t); +extern ssize_t wlp_dev_prim_category_show(struct wlp *, char *); +extern ssize_t wlp_dev_prim_category_store(struct wlp *, const char *, + size_t); +extern ssize_t wlp_dev_prim_OUI_show(struct wlp *, char *); +extern ssize_t wlp_dev_prim_OUI_store(struct wlp *, const char *, size_t); +extern ssize_t wlp_dev_prim_OUI_sub_show(struct wlp *, char *); +extern ssize_t wlp_dev_prim_OUI_sub_store(struct wlp *, const char *, + size_t); +extern ssize_t wlp_dev_prim_subcat_show(struct wlp *, char *); +extern ssize_t wlp_dev_prim_subcat_store(struct wlp *, const char *, + size_t); +extern int wlp_receive_frame(struct device *, struct wlp *, struct sk_buff *, + struct uwb_dev_addr *); +extern int wlp_prepare_tx_frame(struct device *, struct wlp *, + struct sk_buff *, struct uwb_dev_addr *); +void wlp_reset_all(struct wlp *wlp); + +/** + * Initialize WSS + */ +static inline +void wlp_wss_init(struct wlp_wss *wss) +{ + mutex_init(&wss->mutex); +} + +static inline +void wlp_init(struct wlp *wlp) +{ + INIT_LIST_HEAD(&wlp->neighbors); + mutex_init(&wlp->mutex); + mutex_init(&wlp->nbmutex); + wlp_wss_init(&wlp->wss); +} + + +#endif /* #ifndef __LINUX__WLP_H_ */ -- cgit v1.2.3-70-g09d2 From da389eac31be24556a71dd59ea6539ae4cba5c15 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Wed, 17 Sep 2008 16:34:12 +0100 Subject: uwb: add the umc bus The UMC bus is used for the capabilities exposed by a UWB Multi-interface Controller as described in the WHCI specification. Signed-off-by: David Vrabel --- drivers/uwb/Makefile | 6 ++ drivers/uwb/umc-bus.c | 218 ++++++++++++++++++++++++++++++++++++++++++++++++ drivers/uwb/umc-dev.c | 104 +++++++++++++++++++++++ drivers/uwb/umc-drv.c | 31 +++++++ include/linux/uwb/umc.h | 194 ++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 553 insertions(+) create mode 100644 drivers/uwb/umc-bus.c create mode 100644 drivers/uwb/umc-dev.c create mode 100644 drivers/uwb/umc-drv.c create mode 100644 include/linux/uwb/umc.h (limited to 'include/linux') diff --git a/drivers/uwb/Makefile b/drivers/uwb/Makefile index 9a67be5ac5c..41c9fca5f87 100644 --- a/drivers/uwb/Makefile +++ b/drivers/uwb/Makefile @@ -1,4 +1,5 @@ obj-$(CONFIG_UWB) += uwb.o +obj-$(CONFIG_UWB_WHCI) += umc.o uwb-objs := \ address.o \ @@ -18,3 +19,8 @@ uwb-objs := \ scan.o \ uwb-debug.o \ uwbd.o + +umc-objs := \ + umc-bus.o \ + umc-dev.o \ + umc-drv.o diff --git a/drivers/uwb/umc-bus.c b/drivers/uwb/umc-bus.c new file mode 100644 index 00000000000..2d8d62d9f53 --- /dev/null +++ b/drivers/uwb/umc-bus.c @@ -0,0 +1,218 @@ +/* + * Bus for UWB Multi-interface Controller capabilities. + * + * Copyright (C) 2007 Cambridge Silicon Radio Ltd. + * + * This file is released under the GNU GPL v2. + */ +#include +#include +#include +#include +#include + +static int umc_bus_unbind_helper(struct device *dev, void *data) +{ + struct device *parent = data; + + if (dev->parent == parent && dev->driver) + device_release_driver(dev); + return 0; +} + +/** + * umc_controller_reset - reset the whole UMC controller + * @umc: the UMC device for the radio controller. + * + * Drivers will be unbound from all UMC devices belonging to the + * controller and then the radio controller will be rebound. The + * radio controller is expected to do a full hardware reset when it is + * probed. + * + * If this is called while a probe() or remove() is in progress it + * will return -EAGAIN and not perform the reset. + */ +int umc_controller_reset(struct umc_dev *umc) +{ + struct device *parent = umc->dev.parent; + int ret; + + if (down_trylock(&parent->sem)) + return -EAGAIN; + bus_for_each_dev(&umc_bus_type, NULL, parent, umc_bus_unbind_helper); + ret = device_attach(&umc->dev); + if (ret == 1) + ret = 0; + up(&parent->sem); + + return ret; +} +EXPORT_SYMBOL_GPL(umc_controller_reset); + +/** + * umc_match_pci_id - match a UMC driver to a UMC device's parent PCI device. + * @umc_drv: umc driver with match_data pointing to a zero-terminated + * table of pci_device_id's. + * @umc: umc device whose parent is to be matched. + */ +int umc_match_pci_id(struct umc_driver *umc_drv, struct umc_dev *umc) +{ + const struct pci_device_id *id_table = umc_drv->match_data; + struct pci_dev *pci; + + if (umc->dev.parent->bus != &pci_bus_type) + return 0; + + pci = to_pci_dev(umc->dev.parent); + return pci_match_id(id_table, pci) != NULL; +} +EXPORT_SYMBOL_GPL(umc_match_pci_id); + +static int umc_bus_rescan_helper(struct device *dev, void *data) +{ + int ret = 0; + + if (!dev->driver) + ret = device_attach(dev); + + return ret < 0 ? ret : 0; +} + +static void umc_bus_rescan(void) +{ + int err; + + /* + * We can't use bus_rescan_devices() here as it deadlocks when + * it tries to retake the dev->parent semaphore. + */ + err = bus_for_each_dev(&umc_bus_type, NULL, NULL, umc_bus_rescan_helper); + if (err < 0) + printk(KERN_WARNING "%s: rescan of bus failed: %d\n", + KBUILD_MODNAME, err); +} + +static int umc_bus_match(struct device *dev, struct device_driver *drv) +{ + struct umc_dev *umc = to_umc_dev(dev); + struct umc_driver *umc_driver = to_umc_driver(drv); + + if (umc->cap_id == umc_driver->cap_id) { + if (umc_driver->match) + return umc_driver->match(umc_driver, umc); + else + return 1; + } + return 0; +} + +static int umc_device_probe(struct device *dev) +{ + struct umc_dev *umc; + struct umc_driver *umc_driver; + int err; + + umc_driver = to_umc_driver(dev->driver); + umc = to_umc_dev(dev); + + get_device(dev); + err = umc_driver->probe(umc); + if (err) + put_device(dev); + else + umc_bus_rescan(); + + return err; +} + +static int umc_device_remove(struct device *dev) +{ + struct umc_dev *umc; + struct umc_driver *umc_driver; + + umc_driver = to_umc_driver(dev->driver); + umc = to_umc_dev(dev); + + umc_driver->remove(umc); + put_device(dev); + return 0; +} + +static int umc_device_suspend(struct device *dev, pm_message_t state) +{ + struct umc_dev *umc; + struct umc_driver *umc_driver; + int err = 0; + + umc = to_umc_dev(dev); + + if (dev->driver) { + umc_driver = to_umc_driver(dev->driver); + if (umc_driver->suspend) + err = umc_driver->suspend(umc, state); + } + return err; +} + +static int umc_device_resume(struct device *dev) +{ + struct umc_dev *umc; + struct umc_driver *umc_driver; + int err = 0; + + umc = to_umc_dev(dev); + + if (dev->driver) { + umc_driver = to_umc_driver(dev->driver); + if (umc_driver->resume) + err = umc_driver->resume(umc); + } + return err; +} + +static ssize_t capability_id_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct umc_dev *umc = to_umc_dev(dev); + + return sprintf(buf, "0x%02x\n", umc->cap_id); +} + +static ssize_t version_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct umc_dev *umc = to_umc_dev(dev); + + return sprintf(buf, "0x%04x\n", umc->version); +} + +static struct device_attribute umc_dev_attrs[] = { + __ATTR_RO(capability_id), + __ATTR_RO(version), + __ATTR_NULL, +}; + +struct bus_type umc_bus_type = { + .name = "umc", + .match = umc_bus_match, + .probe = umc_device_probe, + .remove = umc_device_remove, + .suspend = umc_device_suspend, + .resume = umc_device_resume, + .dev_attrs = umc_dev_attrs, +}; +EXPORT_SYMBOL_GPL(umc_bus_type); + +static int __init umc_bus_init(void) +{ + return bus_register(&umc_bus_type); +} +module_init(umc_bus_init); + +static void __exit umc_bus_exit(void) +{ + bus_unregister(&umc_bus_type); +} +module_exit(umc_bus_exit); + +MODULE_DESCRIPTION("UWB Multi-interface Controller capability bus"); +MODULE_AUTHOR("Cambridge Silicon Radio Ltd."); +MODULE_LICENSE("GPL"); diff --git a/drivers/uwb/umc-dev.c b/drivers/uwb/umc-dev.c new file mode 100644 index 00000000000..aa44e1c1a10 --- /dev/null +++ b/drivers/uwb/umc-dev.c @@ -0,0 +1,104 @@ +/* + * UWB Multi-interface Controller device management. + * + * Copyright (C) 2007 Cambridge Silicon Radio Ltd. + * + * This file is released under the GNU GPL v2. + */ +#include +#include +#define D_LOCAL 0 +#include + +static void umc_device_release(struct device *dev) +{ + struct umc_dev *umc = to_umc_dev(dev); + + kfree(umc); +} + +/** + * umc_device_create - allocate a child UMC device + * @parent: parent of the new UMC device. + * @n: index of the new device. + * + * The new UMC device will have a bus ID of the parent with '-n' + * appended. + */ +struct umc_dev *umc_device_create(struct device *parent, int n) +{ + struct umc_dev *umc; + + umc = kzalloc(sizeof(struct umc_dev), GFP_KERNEL); + if (umc) { + snprintf(umc->dev.bus_id, sizeof(umc->dev.bus_id), "%s-%d", + parent->bus_id, n); + umc->dev.parent = parent; + umc->dev.bus = &umc_bus_type; + umc->dev.release = umc_device_release; + + umc->dev.dma_mask = parent->dma_mask; + } + return umc; +} +EXPORT_SYMBOL_GPL(umc_device_create); + +/** + * umc_device_register - register a UMC device + * @umc: pointer to the UMC device + * + * The memory resource for the UMC device is acquired and the device + * registered with the system. + */ +int umc_device_register(struct umc_dev *umc) +{ + int err; + + d_fnstart(3, &umc->dev, "(umc_dev %p)\n", umc); + + err = request_resource(umc->resource.parent, &umc->resource); + if (err < 0) { + dev_err(&umc->dev, "can't allocate resource range " + "%016Lx to %016Lx: %d\n", + (unsigned long long)umc->resource.start, + (unsigned long long)umc->resource.end, + err); + goto error_request_resource; + } + + err = device_register(&umc->dev); + if (err < 0) + goto error_device_register; + d_fnend(3, &umc->dev, "(umc_dev %p) = 0\n", umc); + return 0; + +error_device_register: + release_resource(&umc->resource); +error_request_resource: + d_fnend(3, &umc->dev, "(umc_dev %p) = %d\n", umc, err); + return err; +} +EXPORT_SYMBOL_GPL(umc_device_register); + +/** + * umc_device_unregister - unregister a UMC device + * @umc: pointer to the UMC device + * + * First we unregister the device, make sure the driver can do it's + * resource release thing and then we try to release any left over + * resources. We take a ref to the device, to make sure it doesn't + * dissapear under our feet. + */ +void umc_device_unregister(struct umc_dev *umc) +{ + struct device *dev; + if (!umc) + return; + dev = get_device(&umc->dev); + d_fnstart(3, dev, "(umc_dev %p)\n", umc); + device_unregister(&umc->dev); + release_resource(&umc->resource); + d_fnend(3, dev, "(umc_dev %p) = void\n", umc); + put_device(dev); +} +EXPORT_SYMBOL_GPL(umc_device_unregister); diff --git a/drivers/uwb/umc-drv.c b/drivers/uwb/umc-drv.c new file mode 100644 index 00000000000..367b5eb85d6 --- /dev/null +++ b/drivers/uwb/umc-drv.c @@ -0,0 +1,31 @@ +/* + * UWB Multi-interface Controller driver management. + * + * Copyright (C) 2007 Cambridge Silicon Radio Ltd. + * + * This file is released under the GNU GPL v2. + */ +#include +#include + +int __umc_driver_register(struct umc_driver *umc_drv, struct module *module, + const char *mod_name) +{ + umc_drv->driver.name = umc_drv->name; + umc_drv->driver.owner = module; + umc_drv->driver.mod_name = mod_name; + umc_drv->driver.bus = &umc_bus_type; + + return driver_register(&umc_drv->driver); +} +EXPORT_SYMBOL_GPL(__umc_driver_register); + +/** + * umc_driver_register - unregister a UMC capabiltity driver. + * @umc_drv: pointer to the driver. + */ +void umc_driver_unregister(struct umc_driver *umc_drv) +{ + driver_unregister(&umc_drv->driver); +} +EXPORT_SYMBOL_GPL(umc_driver_unregister); diff --git a/include/linux/uwb/umc.h b/include/linux/uwb/umc.h new file mode 100644 index 00000000000..36a39e34f8d --- /dev/null +++ b/include/linux/uwb/umc.h @@ -0,0 +1,194 @@ +/* + * UWB Multi-interface Controller support. + * + * Copyright (C) 2007 Cambridge Silicon Radio Ltd. + * + * This file is released under the GPLv2 + * + * UMC (UWB Multi-interface Controller) capabilities (e.g., radio + * controller, host controller) are presented as devices on the "umc" + * bus. + * + * The radio controller is not strictly a UMC capability but it's + * useful to present it as such. + * + * References: + * + * [WHCI] Wireless Host Controller Interface Specification for + * Certified Wireless Universal Serial Bus, revision 0.95. + * + * How this works is kind of convoluted but simple. The whci.ko driver + * loads when WHCI devices are detected. These WHCI devices expose + * many devices in the same PCI function (they couldn't have reused + * functions, no), so for each PCI function that exposes these many + * devices, whci ceates a umc_dev [whci_probe() -> whci_add_cap()] + * with umc_device_create() and adds it to the bus with + * umc_device_register(). + * + * umc_device_register() calls device_register() which will push the + * bus management code to load your UMC driver's somehting_probe() + * that you have registered for that capability code. + * + * Now when the WHCI device is removed, whci_remove() will go over + * each umc_dev assigned to each of the PCI function's capabilities + * and through whci_del_cap() call umc_device_unregister() each + * created umc_dev. Of course, if you are bound to the device, your + * driver's something_remove() will be called. + */ + +#ifndef _LINUX_UWB_UMC_H_ +#define _LINUX_UWB_UMC_H_ + +#include +#include + +/* + * UMC capability IDs. + * + * 0x00 is reserved so use it for the radio controller device. + * + * [WHCI] table 2-8 + */ +#define UMC_CAP_ID_WHCI_RC 0x00 /* radio controller */ +#define UMC_CAP_ID_WHCI_WUSB_HC 0x01 /* WUSB host controller */ + +/** + * struct umc_dev - UMC capability device + * + * @version: version of the specification this capability conforms to. + * @cap_id: capability ID. + * @bar: PCI Bar (64 bit) where the resource lies + * @resource: register space resource. + * @irq: interrupt line. + */ +struct umc_dev { + u16 version; + u8 cap_id; + u8 bar; + struct resource resource; + unsigned irq; + struct device dev; +}; + +#define to_umc_dev(d) container_of(d, struct umc_dev, dev) + +/** + * struct umc_driver - UMC capability driver + * @cap_id: supported capability ID. + * @match: driver specific capability matching function. + * @match_data: driver specific data for match() (e.g., a + * table of pci_device_id's if umc_match_pci_id() is used). + */ +struct umc_driver { + char *name; + u8 cap_id; + int (*match)(struct umc_driver *, struct umc_dev *); + const void *match_data; + + int (*probe)(struct umc_dev *); + void (*remove)(struct umc_dev *); + int (*suspend)(struct umc_dev *, pm_message_t state); + int (*resume)(struct umc_dev *); + + struct device_driver driver; +}; + +#define to_umc_driver(d) container_of(d, struct umc_driver, driver) + +extern struct bus_type umc_bus_type; + +struct umc_dev *umc_device_create(struct device *parent, int n); +int __must_check umc_device_register(struct umc_dev *umc); +void umc_device_unregister(struct umc_dev *umc); + +int __must_check __umc_driver_register(struct umc_driver *umc_drv, + struct module *mod, + const char *mod_name); + +/** + * umc_driver_register - register a UMC capabiltity driver. + * @umc_drv: pointer to the driver. + */ +static inline int __must_check umc_driver_register(struct umc_driver *umc_drv) +{ + return __umc_driver_register(umc_drv, THIS_MODULE, KBUILD_MODNAME); +} +void umc_driver_unregister(struct umc_driver *umc_drv); + +/* + * Utility function you can use to match (umc_driver->match) against a + * null-terminated array of 'struct pci_device_id' in + * umc_driver->match_data. + */ +int umc_match_pci_id(struct umc_driver *umc_drv, struct umc_dev *umc); + +/** + * umc_parent_pci_dev - return the UMC's parent PCI device or NULL if none + * @umc_dev: UMC device whose parent PCI device we are looking for + * + * DIRTY!!! DON'T RELY ON THIS + * + * FIXME: This is as dirty as it gets, but we need some way to check + * the correct type of umc_dev->parent (so that for example, we can + * cast to pci_dev). Casting to pci_dev is necesary because at some + * point we need to request resources from the device. Mapping is + * easily over come (ioremap and stuff are bus agnostic), but hooking + * up to some error handlers (such as pci error handlers) might need + * this. + * + * THIS might (probably will) be removed in the future, so don't count + * on it. + */ +static inline struct pci_dev *umc_parent_pci_dev(struct umc_dev *umc_dev) +{ + struct pci_dev *pci_dev = NULL; + if (umc_dev->dev.parent->bus == &pci_bus_type) + pci_dev = to_pci_dev(umc_dev->dev.parent); + return pci_dev; +} + +/** + * umc_dev_get() - reference a UMC device. + * @umc_dev: Pointer to UMC device. + * + * NOTE: we are assuming in this whole scheme that the parent device + * is referenced at _probe() time and unreferenced at _remove() + * time by the parent's subsystem. + */ +static inline struct umc_dev *umc_dev_get(struct umc_dev *umc_dev) +{ + get_device(&umc_dev->dev); + return umc_dev; +} + +/** + * umc_dev_put() - unreference a UMC device. + * @umc_dev: Pointer to UMC device. + */ +static inline void umc_dev_put(struct umc_dev *umc_dev) +{ + put_device(&umc_dev->dev); +} + +/** + * umc_set_drvdata - set UMC device's driver data. + * @umc_dev: Pointer to UMC device. + * @data: Data to set. + */ +static inline void umc_set_drvdata(struct umc_dev *umc_dev, void *data) +{ + dev_set_drvdata(&umc_dev->dev, data); +} + +/** + * umc_get_drvdata - recover UMC device's driver data. + * @umc_dev: Pointer to UMC device. + */ +static inline void *umc_get_drvdata(struct umc_dev *umc_dev) +{ + return dev_get_drvdata(&umc_dev->dev); +} + +int umc_controller_reset(struct umc_dev *umc); + +#endif /* #ifndef _LINUX_UWB_UMC_H_ */ -- cgit v1.2.3-70-g09d2 From 8f1b678ab900c2bda1620dfb6e1f1f02604fc3a2 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Wed, 17 Sep 2008 16:34:13 +0100 Subject: uwb: add the driver to enumerate WHCI capabilities This enumerates the capabilties of a WHCI device, adding a umc device for each one. Signed-off-by: David Vrabel --- drivers/uwb/Makefile | 2 +- drivers/uwb/whci.c | 269 +++++++++++++++++++++++++++++++++++++++++++++++ include/linux/uwb/whci.h | 117 +++++++++++++++++++++ 3 files changed, 387 insertions(+), 1 deletion(-) create mode 100644 drivers/uwb/whci.c create mode 100644 include/linux/uwb/whci.h (limited to 'include/linux') diff --git a/drivers/uwb/Makefile b/drivers/uwb/Makefile index 41c9fca5f87..b054471af28 100644 --- a/drivers/uwb/Makefile +++ b/drivers/uwb/Makefile @@ -1,5 +1,5 @@ obj-$(CONFIG_UWB) += uwb.o -obj-$(CONFIG_UWB_WHCI) += umc.o +obj-$(CONFIG_UWB_WHCI) += umc.o whci.o uwb-objs := \ address.o \ diff --git a/drivers/uwb/whci.c b/drivers/uwb/whci.c new file mode 100644 index 00000000000..3df2388f908 --- /dev/null +++ b/drivers/uwb/whci.c @@ -0,0 +1,269 @@ +/* + * WHCI UWB Multi-interface Controller enumerator. + * + * Copyright (C) 2007 Cambridge Silicon Radio Ltd. + * + * This file is released under the GNU GPL v2. + */ +#include +#include +#include +#include +#include +#include + +struct whci_card { + struct pci_dev *pci; + void __iomem *uwbbase; + u8 n_caps; + struct umc_dev *devs[0]; +}; + + +/* Fix faulty HW :( */ +static +u64 whci_capdata_quirks(struct whci_card *card, u64 capdata) +{ + u64 capdata_orig = capdata; + struct pci_dev *pci_dev = card->pci; + if (pci_dev->vendor == PCI_VENDOR_ID_INTEL + && (pci_dev->device == 0x0c3b || pci_dev->device == 0004) + && pci_dev->class == 0x0d1010) { + switch (UWBCAPDATA_TO_CAP_ID(capdata)) { + /* WLP capability has 0x100 bytes of aperture */ + case 0x80: + capdata |= 0x40 << 8; break; + /* WUSB capability has 0x80 bytes of aperture + * and ID is 1 */ + case 0x02: + capdata &= ~0xffff; + capdata |= 0x2001; + break; + } + } + if (capdata_orig != capdata) + dev_warn(&pci_dev->dev, + "PCI v%04x d%04x c%06x#%02x: " + "corrected capdata from %016Lx to %016Lx\n", + pci_dev->vendor, pci_dev->device, pci_dev->class, + (unsigned)UWBCAPDATA_TO_CAP_ID(capdata), + (unsigned long long)capdata_orig, + (unsigned long long)capdata); + return capdata; +} + + +/** + * whci_wait_for - wait for a WHCI register to be set + * + * Polls (for at most @max_ms ms) until '*@reg & @mask == @result'. + */ +int whci_wait_for(struct device *dev, u32 __iomem *reg, u32 mask, u32 result, + unsigned long max_ms, const char *tag) +{ + unsigned t = 0; + u32 val; + for (;;) { + val = le_readl(reg); + if ((val & mask) == result) + break; + msleep(10); + if (t >= max_ms) { + dev_err(dev, "timed out waiting for %s ", tag); + return -ETIMEDOUT; + } + t += 10; + } + return 0; +} +EXPORT_SYMBOL_GPL(whci_wait_for); + + +/* + * NOTE: the capinfo and capdata registers are slightly different + * (size and cap-id fields). So for cap #0, we need to fill + * in. Size comes from the size of the register block + * (statically calculated); cap_id comes from nowhere, we use + * zero, that is reserved, for the radio controller, because + * none was defined at the spec level. + */ +static int whci_add_cap(struct whci_card *card, int n) +{ + struct umc_dev *umc; + u64 capdata; + int bar, err; + + umc = umc_device_create(&card->pci->dev, n); + if (umc == NULL) + return -ENOMEM; + + capdata = le_readq(card->uwbbase + UWBCAPDATA(n)); + + bar = UWBCAPDATA_TO_BAR(capdata) << 1; + + capdata = whci_capdata_quirks(card, capdata); + /* Capability 0 is the radio controller. It's size is 32 + * bytes (WHCI0.95[2.3, T2-9]). */ + umc->version = UWBCAPDATA_TO_VERSION(capdata); + umc->cap_id = n == 0 ? 0 : UWBCAPDATA_TO_CAP_ID(capdata); + umc->bar = bar; + umc->resource.start = pci_resource_start(card->pci, bar) + + UWBCAPDATA_TO_OFFSET(capdata); + umc->resource.end = umc->resource.start + + (n == 0 ? 0x20 : UWBCAPDATA_TO_SIZE(capdata)) - 1; + umc->resource.name = umc->dev.bus_id; + umc->resource.flags = card->pci->resource[bar].flags; + umc->resource.parent = &card->pci->resource[bar]; + umc->irq = card->pci->irq; + + err = umc_device_register(umc); + if (err < 0) + goto error; + card->devs[n] = umc; + return 0; + +error: + kfree(umc); + return err; +} + +static void whci_del_cap(struct whci_card *card, int n) +{ + struct umc_dev *umc = card->devs[n]; + + if (umc != NULL) + umc_device_unregister(umc); +} + +static int whci_n_caps(struct pci_dev *pci) +{ + void __iomem *uwbbase; + u64 capinfo; + + uwbbase = pci_iomap(pci, 0, 8); + if (!uwbbase) + return -ENOMEM; + capinfo = le_readq(uwbbase + UWBCAPINFO); + pci_iounmap(pci, uwbbase); + + return UWBCAPINFO_TO_N_CAPS(capinfo); +} + +static int whci_probe(struct pci_dev *pci, const struct pci_device_id *id) +{ + struct whci_card *card; + int err, n_caps, n; + + err = pci_enable_device(pci); + if (err < 0) + goto error; + pci_enable_msi(pci); + pci_set_master(pci); + err = -ENXIO; + if (!pci_set_dma_mask(pci, DMA_64BIT_MASK)) + pci_set_consistent_dma_mask(pci, DMA_64BIT_MASK); + else if (!pci_set_dma_mask(pci, DMA_32BIT_MASK)) + pci_set_consistent_dma_mask(pci, DMA_32BIT_MASK); + else + goto error_dma; + + err = n_caps = whci_n_caps(pci); + if (n_caps < 0) + goto error_ncaps; + + err = -ENOMEM; + card = kzalloc(sizeof(struct whci_card) + + sizeof(struct whci_dev *) * (n_caps + 1), + GFP_KERNEL); + if (card == NULL) + goto error_kzalloc; + card->pci = pci; + card->n_caps = n_caps; + + err = -EBUSY; + if (!request_mem_region(pci_resource_start(pci, 0), + UWBCAPDATA_SIZE(card->n_caps), + "whci (capability data)")) + goto error_request_memregion; + err = -ENOMEM; + card->uwbbase = pci_iomap(pci, 0, UWBCAPDATA_SIZE(card->n_caps)); + if (!card->uwbbase) + goto error_iomap; + + /* Add each capability. */ + for (n = 0; n <= card->n_caps; n++) { + err = whci_add_cap(card, n); + if (err < 0 && n == 0) { + dev_err(&pci->dev, "cannot bind UWB radio controller:" + " %d\n", err); + goto error_bind; + } + if (err < 0) + dev_warn(&pci->dev, "warning: cannot bind capability " + "#%u: %d\n", n, err); + } + pci_set_drvdata(pci, card); + return 0; + +error_bind: + pci_iounmap(pci, card->uwbbase); +error_iomap: + release_mem_region(pci_resource_start(pci, 0), UWBCAPDATA_SIZE(card->n_caps)); +error_request_memregion: + kfree(card); +error_kzalloc: +error_ncaps: +error_dma: + pci_disable_msi(pci); + pci_disable_device(pci); +error: + return err; +} + +static void whci_remove(struct pci_dev *pci) +{ + struct whci_card *card = pci_get_drvdata(pci); + int n; + + pci_set_drvdata(pci, NULL); + /* Unregister each capability in reverse (so the master device + * is unregistered last). */ + for (n = card->n_caps; n >= 0 ; n--) + whci_del_cap(card, n); + pci_iounmap(pci, card->uwbbase); + release_mem_region(pci_resource_start(pci, 0), UWBCAPDATA_SIZE(card->n_caps)); + kfree(card); + pci_disable_msi(pci); + pci_disable_device(pci); +} + +static struct pci_device_id whci_id_table[] = { + { PCI_DEVICE_CLASS(PCI_CLASS_WIRELESS_WHCI, ~0) }, + { 0 }, +}; +MODULE_DEVICE_TABLE(pci, whci_id_table); + + +static struct pci_driver whci_driver = { + .name = "whci", + .id_table = whci_id_table, + .probe = whci_probe, + .remove = whci_remove, +}; + +static int __init whci_init(void) +{ + return pci_register_driver(&whci_driver); +} + +static void __exit whci_exit(void) +{ + pci_unregister_driver(&whci_driver); +} + +module_init(whci_init); +module_exit(whci_exit); + +MODULE_DESCRIPTION("WHCI UWB Multi-interface Controller enumerator"); +MODULE_AUTHOR("Cambridge Silicon Radio Ltd."); +MODULE_LICENSE("GPL"); diff --git a/include/linux/uwb/whci.h b/include/linux/uwb/whci.h new file mode 100644 index 00000000000..915ec23042d --- /dev/null +++ b/include/linux/uwb/whci.h @@ -0,0 +1,117 @@ +/* + * Wireless Host Controller Interface for Ultra-Wide-Band and Wireless USB + * + * Copyright (C) 2005-2006 Intel Corporation + * Inaky Perez-Gonzalez + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + * + * + * + * References: + * [WHCI] Wireless Host Controller Interface Specification for + * Certified Wireless Universal Serial Bus, revision 0.95. + */ +#ifndef _LINUX_UWB_WHCI_H_ +#define _LINUX_UWB_WHCI_H_ + +#include + +/* + * UWB interface capability registers (offsets from UWBBASE) + * + * [WHCI] section 2.2 + */ +#define UWBCAPINFO 0x00 /* == UWBCAPDATA(0) */ +# define UWBCAPINFO_TO_N_CAPS(c) (((c) >> 0) & 0xFull) +#define UWBCAPDATA(n) (8*(n)) +# define UWBCAPDATA_TO_VERSION(c) (((c) >> 32) & 0xFFFFull) +# define UWBCAPDATA_TO_OFFSET(c) (((c) >> 18) & 0x3FFFull) +# define UWBCAPDATA_TO_BAR(c) (((c) >> 16) & 0x3ull) +# define UWBCAPDATA_TO_SIZE(c) ((((c) >> 8) & 0xFFull) * sizeof(u32)) +# define UWBCAPDATA_TO_CAP_ID(c) (((c) >> 0) & 0xFFull) + +/* Size of the WHCI capability data (including the RC capability) for + a device with n capabilities. */ +#define UWBCAPDATA_SIZE(n) (8 + 8*(n)) + + +/* + * URC registers (offsets from URCBASE) + * + * [WHCI] section 2.3 + */ +#define URCCMD 0x00 +# define URCCMD_RESET (1 << 31) /* UMC Hardware reset */ +# define URCCMD_RS (1 << 30) /* Run/Stop */ +# define URCCMD_EARV (1 << 29) /* Event Address Register Valid */ +# define URCCMD_ACTIVE (1 << 15) /* Command is active */ +# define URCCMD_IWR (1 << 14) /* Interrupt When Ready */ +# define URCCMD_SIZE_MASK 0x00000fff /* Command size mask */ +#define URCSTS 0x04 +# define URCSTS_EPS (1 << 17) /* Event Processing Status */ +# define URCSTS_HALTED (1 << 16) /* RC halted */ +# define URCSTS_HSE (1 << 10) /* Host System Error...fried */ +# define URCSTS_ER (1 << 9) /* Event Ready */ +# define URCSTS_RCI (1 << 8) /* Ready for Command Interrupt */ +# define URCSTS_INT_MASK 0x00000700 /* URC interrupt sources */ +# define URCSTS_ISI 0x000000ff /* Interrupt Source Identification */ +#define URCINTR 0x08 +# define URCINTR_EN_ALL 0x000007ff /* Enable all interrupt sources */ +#define URCCMDADDR 0x10 +#define URCEVTADDR 0x18 +# define URCEVTADDR_OFFSET_MASK 0xfff /* Event pointer offset mask */ + + +/** Write 32 bit @value to little endian register at @addr */ +static inline +void le_writel(u32 value, void __iomem *addr) +{ + iowrite32(value, addr); +} + + +/** Read from 32 bit little endian register at @addr */ +static inline +u32 le_readl(void __iomem *addr) +{ + return ioread32(addr); +} + + +/** Write 64 bit @value to little endian register at @addr */ +static inline +void le_writeq(u64 value, void __iomem *addr) +{ + iowrite32(value, addr); + iowrite32(value >> 32, addr + 4); +} + + +/** Read from 64 bit little endian register at @addr */ +static inline +u64 le_readq(void __iomem *addr) +{ + u64 value; + value = ioread32(addr); + value |= (u64)ioread32(addr + 4) << 32; + return value; +} + +extern int whci_wait_for(struct device *dev, u32 __iomem *reg, + u32 mask, u32 result, + unsigned long max_ms, const char *tag); + +#endif /* #ifndef _LINUX_UWB_WHCI_H_ */ -- cgit v1.2.3-70-g09d2 From c7f736484f8ecde4dc1bc8459179c4d65f2ccbe4 Mon Sep 17 00:00:00 2001 From: Inaky Perez-Gonzalez Date: Wed, 17 Sep 2008 16:34:22 +0100 Subject: wusb: add the Wireless USB include files. Common header files derived from the WUSB 1.0 specification. Signed-off-by: David Vrabel --- include/linux/usb/wusb-wa.h | 271 +++++++++++++++++++++++++++++++ include/linux/usb/wusb.h | 376 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 647 insertions(+) create mode 100644 include/linux/usb/wusb-wa.h create mode 100644 include/linux/usb/wusb.h (limited to 'include/linux') diff --git a/include/linux/usb/wusb-wa.h b/include/linux/usb/wusb-wa.h new file mode 100644 index 00000000000..a102561e702 --- /dev/null +++ b/include/linux/usb/wusb-wa.h @@ -0,0 +1,271 @@ +/* + * Wireless USB Wire Adapter constants and structures. + * + * Copyright (C) 2005-2006 Intel Corporation. + * Inaky Perez-Gonzalez + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + * + * + * FIXME: docs + * FIXME: organize properly, group logically + * + * All the event structures are defined in uwb/spec.h, as they are + * common to the WHCI and WUSB radio control interfaces. + * + * References: + * [WUSB] Wireless Universal Serial Bus Specification, revision 1.0, ch8 + */ +#ifndef __LINUX_USB_WUSB_WA_H +#define __LINUX_USB_WUSB_WA_H + +/** + * Radio Command Request for the Radio Control Interface + * + * Radio Control Interface command and event codes are the same as + * WHCI, and listed in include/linux/uwb.h:UWB_RC_{CMD,EVT}_* + */ +enum { + WA_EXEC_RC_CMD = 40, /* Radio Control command Request */ +}; + +/* Wireless Adapter Requests ([WUSB] table 8-51) */ +enum { + WUSB_REQ_ADD_MMC_IE = 20, + WUSB_REQ_REMOVE_MMC_IE = 21, + WUSB_REQ_SET_NUM_DNTS = 22, + WUSB_REQ_SET_CLUSTER_ID = 23, + WUSB_REQ_SET_DEV_INFO = 24, + WUSB_REQ_GET_TIME = 25, + WUSB_REQ_SET_STREAM_IDX = 26, + WUSB_REQ_SET_WUSB_MAS = 27, +}; + + +/* Wireless Adapter WUSB Channel Time types ([WUSB] table 8-52) */ +enum { + WUSB_TIME_ADJ = 0, + WUSB_TIME_BPST = 1, + WUSB_TIME_WUSB = 2, +}; + +enum { + WA_ENABLE = 0x01, + WA_RESET = 0x02, + RPIPE_PAUSE = 0x1, +}; + +/* Responses from Get Status request ([WUSB] section 8.3.1.6) */ +enum { + WA_STATUS_ENABLED = 0x01, + WA_STATUS_RESETTING = 0x02 +}; + +enum rpipe_crs { + RPIPE_CRS_CTL = 0x01, + RPIPE_CRS_ISO = 0x02, + RPIPE_CRS_BULK = 0x04, + RPIPE_CRS_INTR = 0x08 +}; + +/** + * RPipe descriptor ([WUSB] section 8.5.2.11) + * + * FIXME: explain rpipes + */ +struct usb_rpipe_descriptor { + u8 bLength; + u8 bDescriptorType; + __le16 wRPipeIndex; + __le16 wRequests; + __le16 wBlocks; /* rw if 0 */ + __le16 wMaxPacketSize; /* rw? */ + u8 bHSHubAddress; /* reserved: 0 */ + u8 bHSHubPort; /* ??? FIXME ??? */ + u8 bSpeed; /* rw: xfer rate 'enum uwb_phy_rate' */ + u8 bDeviceAddress; /* rw: Target device address */ + u8 bEndpointAddress; /* rw: Target EP address */ + u8 bDataSequence; /* ro: Current Data sequence */ + __le32 dwCurrentWindow; /* ro */ + u8 bMaxDataSequence; /* ro?: max supported seq */ + u8 bInterval; /* rw: */ + u8 bOverTheAirInterval; /* rw: */ + u8 bmAttribute; /* ro? */ + u8 bmCharacteristics; /* ro? enum rpipe_attr, supported xsactions */ + u8 bmRetryOptions; /* rw? */ + __le16 wNumTransactionErrors; /* rw */ +} __attribute__ ((packed)); + +/** + * Wire Adapter Notification types ([WUSB] sections 8.4.5 & 8.5.4) + * + * These are the notifications coming on the notification endpoint of + * an HWA and a DWA. + */ +enum wa_notif_type { + DWA_NOTIF_RWAKE = 0x91, + DWA_NOTIF_PORTSTATUS = 0x92, + WA_NOTIF_TRANSFER = 0x93, + HWA_NOTIF_BPST_ADJ = 0x94, + HWA_NOTIF_DN = 0x95, +}; + +/** + * Wire Adapter notification header + * + * Notifications coming from a wire adapter use a common header + * defined in [WUSB] sections 8.4.5 & 8.5.4. + */ +struct wa_notif_hdr { + u8 bLength; + u8 bNotifyType; /* enum wa_notif_type */ +} __attribute__((packed)); + +/** + * HWA DN Received notification [(WUSB] section 8.5.4.2) + * + * The DNData is specified in WUSB1.0[7.6]. For each device + * notification we received, we just need to dispatch it. + * + * @dndata: this is really an array of notifications, but all start + * with the same header. + */ +struct hwa_notif_dn { + struct wa_notif_hdr hdr; + u8 bSourceDeviceAddr; /* from errata 2005/07 */ + u8 bmAttributes; + struct wusb_dn_hdr dndata[]; +} __attribute__((packed)); + +/* [WUSB] section 8.3.3 */ +enum wa_xfer_type { + WA_XFER_TYPE_CTL = 0x80, + WA_XFER_TYPE_BI = 0x81, /* bulk/interrupt */ + WA_XFER_TYPE_ISO = 0x82, + WA_XFER_RESULT = 0x83, + WA_XFER_ABORT = 0x84, +}; + +/* [WUSB] section 8.3.3 */ +struct wa_xfer_hdr { + u8 bLength; /* 0x18 */ + u8 bRequestType; /* 0x80 WA_REQUEST_TYPE_CTL */ + __le16 wRPipe; /* RPipe index */ + __le32 dwTransferID; /* Host-assigned ID */ + __le32 dwTransferLength; /* Length of data to xfer */ + u8 bTransferSegment; +} __attribute__((packed)); + +struct wa_xfer_ctl { + struct wa_xfer_hdr hdr; + u8 bmAttribute; + __le16 wReserved; + struct usb_ctrlrequest baSetupData; +} __attribute__((packed)); + +struct wa_xfer_bi { + struct wa_xfer_hdr hdr; + u8 bReserved; + __le16 wReserved; +} __attribute__((packed)); + +struct wa_xfer_hwaiso { + struct wa_xfer_hdr hdr; + u8 bReserved; + __le16 wPresentationTime; + __le32 dwNumOfPackets; + /* FIXME: u8 pktdata[]? */ +} __attribute__((packed)); + +/* [WUSB] section 8.3.3.5 */ +struct wa_xfer_abort { + u8 bLength; + u8 bRequestType; + __le16 wRPipe; /* RPipe index */ + __le32 dwTransferID; /* Host-assigned ID */ +} __attribute__((packed)); + +/** + * WA Transfer Complete notification ([WUSB] section 8.3.3.3) + * + */ +struct wa_notif_xfer { + struct wa_notif_hdr hdr; + u8 bEndpoint; + u8 Reserved; +} __attribute__((packed)); + +/** Transfer result basic codes [WUSB] table 8-15 */ +enum { + WA_XFER_STATUS_SUCCESS, + WA_XFER_STATUS_HALTED, + WA_XFER_STATUS_DATA_BUFFER_ERROR, + WA_XFER_STATUS_BABBLE, + WA_XFER_RESERVED, + WA_XFER_STATUS_NOT_FOUND, + WA_XFER_STATUS_INSUFFICIENT_RESOURCE, + WA_XFER_STATUS_TRANSACTION_ERROR, + WA_XFER_STATUS_ABORTED, + WA_XFER_STATUS_RPIPE_NOT_READY, + WA_XFER_INVALID_FORMAT, + WA_XFER_UNEXPECTED_SEGMENT_NUMBER, + WA_XFER_STATUS_RPIPE_TYPE_MISMATCH, +}; + +/** [WUSB] section 8.3.3.4 */ +struct wa_xfer_result { + struct wa_notif_hdr hdr; + __le32 dwTransferID; + __le32 dwTransferLength; + u8 bTransferSegment; + u8 bTransferStatus; + __le32 dwNumOfPackets; +} __attribute__((packed)); + +/** + * Wire Adapter Class Descriptor ([WUSB] section 8.5.2.7). + * + * NOTE: u16 fields are read Little Endian from the hardware. + * + * @bNumPorts is the original max number of devices that the host can + * connect; we might chop this so the stack can handle + * it. In case you need to access it, use wusbhc->ports_max + * if it is a Wireless USB WA. + */ +struct usb_wa_descriptor { + u8 bLength; + u8 bDescriptorType; + u16 bcdWAVersion; + u8 bNumPorts; /* don't use!! */ + u8 bmAttributes; /* Reserved == 0 */ + u16 wNumRPipes; + u16 wRPipeMaxBlock; + u8 bRPipeBlockSize; + u8 bPwrOn2PwrGood; + u8 bNumMMCIEs; + u8 DeviceRemovable; /* FIXME: in DWA this is up to 16 bytes */ +} __attribute__((packed)); + +/** + * HWA Device Information Buffer (WUSB1.0[T8.54]) + */ +struct hwa_dev_info { + u8 bmDeviceAvailability[32]; /* FIXME: ignored for now */ + u8 bDeviceAddress; + __le16 wPHYRates; + u8 bmDeviceAttribute; +} __attribute__((packed)); + +#endif /* #ifndef __LINUX_USB_WUSB_WA_H */ diff --git a/include/linux/usb/wusb.h b/include/linux/usb/wusb.h new file mode 100644 index 00000000000..5f401b644ed --- /dev/null +++ b/include/linux/usb/wusb.h @@ -0,0 +1,376 @@ +/* + * Wireless USB Standard Definitions + * Event Size Tables + * + * Copyright (C) 2005-2006 Intel Corporation + * Inaky Perez-Gonzalez + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + * + * + * FIXME: docs + * FIXME: organize properly, group logically + * + * All the event structures are defined in uwb/spec.h, as they are + * common to the WHCI and WUSB radio control interfaces. + */ + +#ifndef __WUSB_H__ +#define __WUSB_H__ + +#include +#include +#include +#include +#include + +/** + * WUSB Information Element header + * + * I don't know why, they decided to make it different to the MBOA MAC + * IE Header; beats me. + */ +struct wuie_hdr { + u8 bLength; + u8 bIEIdentifier; +} __attribute__((packed)); + +enum { + WUIE_ID_WCTA = 0x80, + WUIE_ID_CONNECTACK, + WUIE_ID_HOST_INFO, + WUIE_ID_CHANGE_ANNOUNCE, + WUIE_ID_DEVICE_DISCONNECT, + WUIE_ID_HOST_DISCONNECT, + WUIE_ID_KEEP_ALIVE = 0x89, + WUIE_ID_ISOCH_DISCARD, + WUIE_ID_RESET_DEVICE, +}; + +/** + * Maximum number of array elements in a WUSB IE. + * + * WUSB1.0[7.5 before table 7-38] says that in WUSB IEs that + * are "arrays" have to limited to 4 elements. So we define it + * like that to ease up and submit only the neeed size. + */ +#define WUIE_ELT_MAX 4 + +/** + * Wrapper for the data that defines a CHID, a CDID or a CK + * + * WUSB defines that CHIDs, CDIDs and CKs are a 16 byte string of + * data. In order to avoid confusion and enforce types, we wrap it. + * + * Make it packed, as we use it in some hw defintions. + */ +struct wusb_ckhdid { + u8 data[16]; +} __attribute__((packed)); + +const static +struct wusb_ckhdid wusb_ckhdid_zero = { .data = { 0 } }; + +#define WUSB_CKHDID_STRSIZE (3 * sizeof(struct wusb_ckhdid) + 1) + +/** + * WUSB IE: Host Information (WUSB1.0[7.5.2]) + * + * Used to provide information about the host to the Wireless USB + * devices in range (CHID can be used as an ASCII string). + */ +struct wuie_host_info { + struct wuie_hdr hdr; + __le16 attributes; + struct wusb_ckhdid CHID; +} __attribute__((packed)); + +/** + * WUSB IE: Connect Ack (WUSB1.0[7.5.1]) + * + * Used to acknowledge device connect requests. See note for + * WUIE_ELT_MAX. + */ +struct wuie_connect_ack { + struct wuie_hdr hdr; + struct { + struct wusb_ckhdid CDID; + u8 bDeviceAddress; /* 0 means unused */ + u8 bReserved; + } blk[WUIE_ELT_MAX]; +} __attribute__((packed)); + +/** + * WUSB IE Host Information Element, Connect Availability + * + * WUSB1.0[7.5.2], bmAttributes description + */ +enum { + WUIE_HI_CAP_RECONNECT = 0, + WUIE_HI_CAP_LIMITED, + WUIE_HI_CAP_RESERVED, + WUIE_HI_CAP_ALL, +}; + +/** + * WUSB IE: Channel Stop (WUSB1.0[7.5.8]) + * + * Tells devices the host is going to stop sending MMCs and will dissapear. + */ +struct wuie_channel_stop { + struct wuie_hdr hdr; + u8 attributes; + u8 timestamp[3]; +} __attribute__((packed)); + +/** + * WUSB IE: Keepalive (WUSB1.0[7.5.9]) + * + * Ask device(s) to send keepalives. + */ +struct wuie_keep_alive { + struct wuie_hdr hdr; + u8 bDeviceAddress[WUIE_ELT_MAX]; +} __attribute__((packed)); + +/** + * WUSB IE: Reset device (WUSB1.0[7.5.11]) + * + * Tell device to reset; in all truth, we can fit 4 CDIDs, but we only + * use it for one at the time... + * + * In any case, this request is a wee bit silly: why don't they target + * by address?? + */ +struct wuie_reset { + struct wuie_hdr hdr; + struct wusb_ckhdid CDID; +} __attribute__((packed)); + +/** + * WUSB IE: Disconnect device (WUSB1.0[7.5.11]) + * + * Tell device to disconnect; we can fit 4 addresses, but we only use + * it for one at the time... + */ +struct wuie_disconnect { + struct wuie_hdr hdr; + u8 bDeviceAddress; + u8 padding; +} __attribute__((packed)); + +/** + * WUSB IE: Host disconnect ([WUSB] section 7.5.5) + * + * Tells all connected devices to disconnect. + */ +struct wuie_host_disconnect { + struct wuie_hdr hdr; +} __attribute__((packed)); + +/** + * WUSB Device Notification header (WUSB1.0[7.6]) + */ +struct wusb_dn_hdr { + u8 bType; + u8 notifdata[]; +} __attribute__((packed)); + +/** Device Notification codes (WUSB1.0[Table 7-54]) */ +enum WUSB_DN { + WUSB_DN_CONNECT = 0x01, + WUSB_DN_DISCONNECT = 0x02, + WUSB_DN_EPRDY = 0x03, + WUSB_DN_MASAVAILCHANGED = 0x04, + WUSB_DN_RWAKE = 0x05, + WUSB_DN_SLEEP = 0x06, + WUSB_DN_ALIVE = 0x07, +}; + +/** WUSB Device Notification Connect */ +struct wusb_dn_connect { + struct wusb_dn_hdr hdr; + __le16 attributes; + struct wusb_ckhdid CDID; +} __attribute__((packed)); + +static inline int wusb_dn_connect_prev_dev_addr(const struct wusb_dn_connect *dn) +{ + return le16_to_cpu(dn->attributes) & 0xff; +} + +static inline int wusb_dn_connect_new_connection(const struct wusb_dn_connect *dn) +{ + return (le16_to_cpu(dn->attributes) >> 8) & 0x1; +} + +static inline int wusb_dn_connect_beacon_behavior(const struct wusb_dn_connect *dn) +{ + return (le16_to_cpu(dn->attributes) >> 9) & 0x03; +} + +/** Device is alive (aka: pong) (WUSB1.0[7.6.7]) */ +struct wusb_dn_alive { + struct wusb_dn_hdr hdr; +} __attribute__((packed)); + +/** Device is disconnecting (WUSB1.0[7.6.2]) */ +struct wusb_dn_disconnect { + struct wusb_dn_hdr hdr; +} __attribute__((packed)); + +/* General constants */ +enum { + WUSB_TRUST_TIMEOUT_MS = 4000, /* [WUSB] section 4.15.1 */ +}; + +static inline size_t ckhdid_printf(char *pr_ckhdid, size_t size, + const struct wusb_ckhdid *ckhdid) +{ + return scnprintf(pr_ckhdid, size, + "%02hx %02hx %02hx %02hx %02hx %02hx %02hx %02hx " + "%02hx %02hx %02hx %02hx %02hx %02hx %02hx %02hx", + ckhdid->data[0], ckhdid->data[1], + ckhdid->data[2], ckhdid->data[3], + ckhdid->data[4], ckhdid->data[5], + ckhdid->data[6], ckhdid->data[7], + ckhdid->data[8], ckhdid->data[9], + ckhdid->data[10], ckhdid->data[11], + ckhdid->data[12], ckhdid->data[13], + ckhdid->data[14], ckhdid->data[15]); +} + +/* + * WUSB Crypto stuff (WUSB1.0[6]) + */ + +extern const char *wusb_et_name(u8); + +/** + * WUSB key index WUSB1.0[7.3.2.4], for usage when setting keys for + * the host or the device. + */ +static inline u8 wusb_key_index(int index, int type, int originator) +{ + return (originator << 6) | (type << 4) | index; +} + +#define WUSB_KEY_INDEX_TYPE_PTK 0 /* for HWA only */ +#define WUSB_KEY_INDEX_TYPE_ASSOC 1 +#define WUSB_KEY_INDEX_TYPE_GTK 2 +#define WUSB_KEY_INDEX_ORIGINATOR_HOST 0 +#define WUSB_KEY_INDEX_ORIGINATOR_DEVICE 1 + +/* A CCM Nonce, defined in WUSB1.0[6.4.1] */ +struct aes_ccm_nonce { + u8 sfn[6]; /* Little Endian */ + u8 tkid[3]; /* LE */ + struct uwb_dev_addr dest_addr; + struct uwb_dev_addr src_addr; +} __attribute__((packed)); + +/* A CCM operation label, defined on WUSB1.0[6.5.x] */ +struct aes_ccm_label { + u8 data[14]; +} __attribute__((packed)); + +/* + * Input to the key derivation sequence defined in + * WUSB1.0[6.5.1]. Rest of the data is in the CCM Nonce passed to the + * PRF function. + */ +struct wusb_keydvt_in { + u8 hnonce[16]; + u8 dnonce[16]; +} __attribute__((packed)); + +/* + * Output from the key derivation sequence defined in + * WUSB1.0[6.5.1]. + */ +struct wusb_keydvt_out { + u8 kck[16]; + u8 ptk[16]; +} __attribute__((packed)); + +/* Pseudo Random Function WUSB1.0[6.5] */ +extern int wusb_crypto_init(void); +extern void wusb_crypto_exit(void); +extern ssize_t wusb_prf(void *out, size_t out_size, + const u8 key[16], const struct aes_ccm_nonce *_n, + const struct aes_ccm_label *a, + const void *b, size_t blen, size_t len); + +static inline int wusb_prf_64(void *out, size_t out_size, const u8 key[16], + const struct aes_ccm_nonce *n, + const struct aes_ccm_label *a, + const void *b, size_t blen) +{ + return wusb_prf(out, out_size, key, n, a, b, blen, 64); +} + +static inline int wusb_prf_128(void *out, size_t out_size, const u8 key[16], + const struct aes_ccm_nonce *n, + const struct aes_ccm_label *a, + const void *b, size_t blen) +{ + return wusb_prf(out, out_size, key, n, a, b, blen, 128); +} + +static inline int wusb_prf_256(void *out, size_t out_size, const u8 key[16], + const struct aes_ccm_nonce *n, + const struct aes_ccm_label *a, + const void *b, size_t blen) +{ + return wusb_prf(out, out_size, key, n, a, b, blen, 256); +} + +/* Key derivation WUSB1.0[6.5.1] */ +static inline int wusb_key_derive(struct wusb_keydvt_out *keydvt_out, + const u8 key[16], + const struct aes_ccm_nonce *n, + const struct wusb_keydvt_in *keydvt_in) +{ + const struct aes_ccm_label a = { .data = "Pair-wise keys" }; + return wusb_prf_256(keydvt_out, sizeof(*keydvt_out), key, n, &a, + keydvt_in, sizeof(*keydvt_in)); +} + +/* + * Out-of-band MIC Generation WUSB1.0[6.5.2] + * + * Compute the MIC over @key, @n and @hs and place it in @mic_out. + * + * @mic_out: Where to place the 8 byte MIC tag + * @key: KCK from the derivation process + * @n: CCM nonce, n->sfn == 0, TKID as established in the + * process. + * @hs: Handshake struct for phase 2 of the 4-way. + * hs->bStatus and hs->bReserved are zero. + * hs->bMessageNumber is 2 (WUSB1.0[7.3.2.5.2] + * hs->dest_addr is the device's USB address padded with 0 + * hs->src_addr is the hosts's UWB device address + * hs->mic is ignored (as we compute that value). + */ +static inline int wusb_oob_mic(u8 mic_out[8], const u8 key[16], + const struct aes_ccm_nonce *n, + const struct usb_handshake *hs) +{ + const struct aes_ccm_label a = { .data = "out-of-bandMIC" }; + return wusb_prf_64(mic_out, 8, key, n, &a, + hs, sizeof(*hs) - sizeof(hs->MIC)); +} + +#endif /* #ifndef __WUSB_H__ */ -- cgit v1.2.3-70-g09d2 From b60066c141997ac2e4ef08459b75638ae86ae781 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Wed, 17 Sep 2008 16:34:40 +0100 Subject: uwb: add symlinks in sysfs between radio controllers and PALs Add a facility for PALs to have symlinks to their radio controller (and vice-versa) and make WUSB host controllers use this. Signed-off-by: David Vrabel --- drivers/usb/wusbcore/pal.c | 3 +++ drivers/usb/wusbcore/wusbhc.c | 16 +++++++++------- drivers/uwb/pal.c | 20 ++++++++++++++++++++ include/linux/uwb.h | 6 +++++- 4 files changed, 37 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/wusbcore/pal.c b/drivers/usb/wusbcore/pal.c index cc126b44473..7cc51e9905c 100644 --- a/drivers/usb/wusbcore/pal.c +++ b/drivers/usb/wusbcore/pal.c @@ -26,6 +26,9 @@ int wusbhc_pal_register(struct wusbhc *wusbhc) { uwb_pal_init(&wusbhc->pal); + wusbhc->pal.name = "wusbhc"; + wusbhc->pal.device = wusbhc->usb_hcd.self.controller; + return uwb_pal_register(wusbhc->uwb_rc, &wusbhc->pal); } diff --git a/drivers/usb/wusbcore/wusbhc.c b/drivers/usb/wusbcore/wusbhc.c index 1149b1e59c8..07c63a31c79 100644 --- a/drivers/usb/wusbcore/wusbhc.c +++ b/drivers/usb/wusbcore/wusbhc.c @@ -192,13 +192,8 @@ int wusbhc_create(struct wusbhc *wusbhc) result = wusbhc_sec_create(wusbhc); if (result < 0) goto error_sec_create; - result = wusbhc_pal_register(wusbhc); - if (result < 0) - goto error_pal_register; return 0; -error_pal_register: - wusbhc_sec_destroy(wusbhc); error_sec_create: wusbhc_rh_destroy(wusbhc); error_rh_create: @@ -235,7 +230,14 @@ int wusbhc_b_create(struct wusbhc *wusbhc) dev_err(dev, "Cannot register WUSBHC attributes: %d\n", result); goto error_create_attr_group; } - /* Yep, I plan to add stuff here... */ + + result = wusbhc_pal_register(wusbhc); + if (result < 0) + goto error_pal_register; + return 0; + +error_pal_register: + sysfs_remove_group(wusbhc_kobj(wusbhc), &wusbhc_attr_group); error_create_attr_group: return result; } @@ -243,13 +245,13 @@ EXPORT_SYMBOL_GPL(wusbhc_b_create); void wusbhc_b_destroy(struct wusbhc *wusbhc) { + wusbhc_pal_unregister(wusbhc); sysfs_remove_group(wusbhc_kobj(wusbhc), &wusbhc_attr_group); } EXPORT_SYMBOL_GPL(wusbhc_b_destroy); void wusbhc_destroy(struct wusbhc *wusbhc) { - wusbhc_pal_unregister(wusbhc); wusbhc_sec_destroy(wusbhc); wusbhc_rh_destroy(wusbhc); wusbhc_devconnect_destroy(wusbhc); diff --git a/drivers/uwb/pal.c b/drivers/uwb/pal.c index 5508993a820..1afb38eacb9 100644 --- a/drivers/uwb/pal.c +++ b/drivers/uwb/pal.c @@ -39,6 +39,21 @@ EXPORT_SYMBOL_GPL(uwb_pal_init); */ int uwb_pal_register(struct uwb_rc *rc, struct uwb_pal *pal) { + int ret; + + if (pal->device) { + ret = sysfs_create_link(&pal->device->kobj, + &rc->uwb_dev.dev.kobj, "uwb_rc"); + if (ret < 0) + return ret; + ret = sysfs_create_link(&rc->uwb_dev.dev.kobj, + &pal->device->kobj, pal->name); + if (ret < 0) { + sysfs_remove_link(&pal->device->kobj, "uwb_rc"); + return ret; + } + } + spin_lock(&rc->pal_lock); list_add(&pal->node, &rc->pals); spin_unlock(&rc->pal_lock); @@ -57,6 +72,11 @@ void uwb_pal_unregister(struct uwb_rc *rc, struct uwb_pal *pal) spin_lock(&rc->pal_lock); list_del(&pal->node); spin_unlock(&rc->pal_lock); + + if (pal->device) { + sysfs_remove_link(&rc->uwb_dev.dev.kobj, pal->name); + sysfs_remove_link(&pal->device->kobj, "uwb_rc"); + } } EXPORT_SYMBOL_GPL(uwb_pal_unregister); diff --git a/include/linux/uwb.h b/include/linux/uwb.h index 0cd35937e12..f9ccbd9a2ce 100644 --- a/include/linux/uwb.h +++ b/include/linux/uwb.h @@ -361,6 +361,9 @@ struct uwb_rc { /** * struct uwb_pal - a UWB PAL + * @name: descriptive name for this PAL (wushc, wlp, etc.). + * @device: a device for the PAL. Used to link the PAL and the radio + * controller in sysfs. * @new_rsv: called when a peer requests a reservation (may be NULL if * the PAL cannot accept reservation requests). * @@ -379,7 +382,8 @@ struct uwb_rc { */ struct uwb_pal { struct list_head node; - + const char *name; + struct device *device; void (*new_rsv)(struct uwb_rsv *rsv); }; -- cgit v1.2.3-70-g09d2 From a0ad05c75aa362c91f4d9cd91ff375a739574dd8 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Mon, 1 Sep 2008 14:27:02 +0200 Subject: Introduce FW_BUG, FW_WARN and FW_INFO to consistenly tell users about BIOS bugs The idea is to add this to printk after the severity: printk(KERN_ERR FW_BUG "This is not our fault, BIOS developer: fix it by simply add ...\n"); If a Firmware issue should be hidden, because it is work-arounded, but you still want to see something popping up e.g. for info only: printk(KERN_INFO FW_INFO "This is done stupid, we can handle it, but it should better be avoided in future\n"); or on the Linuxfirmwarekit to tell vendors that they did something stupid or wrong without bothering the user: printk(KERN_INFO FW_BUG "This is done stupid, we can handle it, but it should better be avoided in future\n"); Some use cases: - If a user sees a [Firmware Bug] message in the kernel he should first update the BIOS before wasting time with debugging and submiting on old firmware code to mailing lists. - The linuxfirmwarekit (http://www.linuxfirmwarekit.org) tries to detect firmware bugs. It currently is doing that in userspace which results in: - Huge test scripts that could be a one liner in the kernel - A lot of BIOS bugs are already absorbed by the kernel What do we need such a stupid linuxfirmwarekit for? - Vendors: Can test their BIOSes for Linux compatibility. There will be the time when vendors realize that the test utils on Linux are more strict and using them increases the qualitity and stability of their products. - Vendors: Can easily fix up their BIOSes and be more Linux compatible by: dmesg |grep "Firmware Bug" and send the result to their BIOS developer colleagues who should know what the messages are about and how to fix them, without the need of studying kernel code. - Distributions: can do a first automated HW/BIOS checks. This can then be done without the need of asking kernel developers who need to dig down the code and explain the details. Certification can/will just be rejected until dmesg |grep "Firmware Bug" is empty. - Thus this can be used as an instrument to enforce cleaner BIOS code. Currently every stupid Windows ACPI bug is re-implemented in Linux which is a rather unfortunate situation. We already have the power to avoid this in e.g. memory or cpu hot-plug ACPI implementations, because Linux certification is a must for most vendors in the server area. Working towards being able to do that in the laptop area (vendors are starting to look at Linux here also and will use this tool) is the goal. At least provide them a tool to make it as easy for this guys (e.g. not needing to browse kernel code) as possible. - The ordinary Linux user: can go into the next shop, boots the firmwarekit on his most preferred machines. He chooses one without BIOS bugs. Unsupported HW is ok, he likes to try out latest projects which might support them or likes to dig on it on his own, but he hates to workaround broken BIOSes like hell. I double checked with the firmwarekit. There they have: So the mapping generally is (also depending on how likely the BIOS is to blame, this could sometimes be difficult): FW_INFO = INFO FW_WARN = WARN FW_BUG = FAIL For more info about the linuxfirmwarekit and why this is needed can be found here: http://www.linuxfirmwarekit.org While severity matches with the firmwarekit, it might be tricky to hide messages from the user. E.g. we recently found out that on HP BIOSes negative temperatures are returned, which seem to indicate that the thermal zone is invalid. We can work around that gracefully by ignoring the thermal zone and we do not want to bother the ordinary user with a frightening message: Firmware Bug: thermal management absolutely broken but want to hide it from the user. But in the linuxfirmwarekit this should be shown as a real show stopper (the temperatures could really be wrong, broken thermal management is one of the worst things that can happen and the BIOS guys of the machine must implement this properly). It is intended to do that (hide it from the user with KERN_INFO msg, but still print it as a BIOS bug) by: printk(KERN_INFO FW_BUG "Negativ temperature values detected. Try to workarounded, BIOS must get fixed\n"); Hope that works out..., no idea how to better hide it as printk is the only way to easily provide this functionality. Signed-off-by: Thomas Renninger Signed-off-by: Andi Kleen Signed-off-by: Len Brown --- include/linux/kernel.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 2651f805ba6..0b19848e380 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -190,6 +190,30 @@ extern int kernel_text_address(unsigned long addr); struct pid; extern struct pid *session_of_pgrp(struct pid *pgrp); +/* + * FW_BUG + * Add this to a message where you are sure the firmware is buggy or behaves + * really stupid or out of spec. Be aware that the responsible BIOS developer + * should be able to fix this issue or at least get a concrete idea of the + * problem by reading your message without the need of looking at the kernel + * code. + * + * Use it for definite and high priority BIOS bugs. + * + * FW_WARN + * Use it for not that clear (e.g. could the kernel messed up things already?) + * and medium priority BIOS bugs. + * + * FW_INFO + * Use this one if you want to tell the user or vendor about something + * suspicious, but generally harmless related to the firmware. + * + * Use it for information or very low priority BIOS bugs. + */ +#define FW_BUG "[Firmware Bug]: " +#define FW_WARN "[Firmware Warn]: " +#define FW_INFO "[Firmware Info]: " + #ifdef CONFIG_PRINTK asmlinkage int vprintk(const char *fmt, va_list args) __attribute__ ((format (printf, 1, 0))); -- cgit v1.2.3-70-g09d2 From 2075eb8d95612cadde91ef5be82691d97a2ea6c5 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Tue, 7 Oct 2008 10:57:54 -0700 Subject: rangetimer: fix x86 build failure for the !HRTIMERS case the timer peek function was on the wrong side of an ifdef, breaking for the !HRTIMERs case. Just provide an empty inline for that case since it doesn't make sense in that scenario. Signed-off-by: Arjan van de Ven --- include/linux/hrtimer.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index d93b1e1dc16..508ce20b8f9 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -283,6 +283,8 @@ static inline int hrtimer_is_hres_active(struct hrtimer *timer) return timer->base->cpu_base->hres_active; } +extern void hrtimer_peek_ahead_timers(void); + /* * The resolution of the clocks. The resolution value is returned in * the clock_getres() system call to give application programmers an @@ -305,6 +307,7 @@ static inline int hrtimer_is_hres_active(struct hrtimer *timer) * is expired in the next softirq when the clock was advanced. */ static inline void clock_was_set(void) { } +static inline void hrtimer_peek_ahead_timers(void) { } static inline void hres_timers_resume(void) { } @@ -328,7 +331,6 @@ extern ktime_t ktime_get_real(void); DECLARE_PER_CPU(struct tick_device, tick_cpu_device); -extern void hrtimer_peek_ahead_timers(void); /* Exported timer functions: */ -- cgit v1.2.3-70-g09d2 From a3cdcbfa8fb1fccfe48d359da86e99546610c562 Mon Sep 17 00:00:00 2001 From: Yevgeny Petrilin Date: Fri, 10 Oct 2008 12:01:37 -0700 Subject: mlx4_core: Add QP range reservation support To allow allocating an aligned range of consecutive QP numbers, add an interface to reserve an aligned range of QP numbers and have the QP allocation function always take a QP number. This will be used for RSS support in the mlx4_en Ethernet driver and also potentially by IPoIB RSS support. Signed-off-by: Yevgeny Petrilin Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/qp.c | 21 ++++++++++-- drivers/net/mlx4/alloc.c | 74 ++++++++++++++++++++++++++++++++++++++++- drivers/net/mlx4/mlx4.h | 2 ++ drivers/net/mlx4/qp.c | 45 +++++++++++++++++-------- include/linux/mlx4/device.h | 5 ++- 5 files changed, 129 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index baa01deb243..39167a797f9 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -451,6 +451,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata, int sqpn, struct mlx4_ib_qp *qp) { + int qpn; int err; mutex_init(&qp->mutex); @@ -545,9 +546,17 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, } } - err = mlx4_qp_alloc(dev->dev, sqpn, &qp->mqp); + if (sqpn) { + qpn = sqpn; + } else { + err = mlx4_qp_reserve_range(dev->dev, 1, 1, &qpn); + if (err) + goto err_wrid; + } + + err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp); if (err) - goto err_wrid; + goto err_qpn; /* * Hardware wants QPN written in big-endian order (after @@ -560,6 +569,10 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, return 0; +err_qpn: + if (!sqpn) + mlx4_qp_release_range(dev->dev, qpn, 1); + err_wrid: if (pd->uobject) { if (!init_attr->srq) @@ -655,6 +668,10 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, mlx4_ib_unlock_cqs(send_cq, recv_cq); mlx4_qp_free(dev->dev, &qp->mqp); + + if (!is_sqp(dev, qp)) + mlx4_qp_release_range(dev->dev, qp->mqp.qpn, 1); + mlx4_mtt_cleanup(dev->dev, &qp->mtt); if (is_user) { diff --git a/drivers/net/mlx4/alloc.c b/drivers/net/mlx4/alloc.c index 096bca54bcf..e6c0d5bb5dc 100644 --- a/drivers/net/mlx4/alloc.c +++ b/drivers/net/mlx4/alloc.c @@ -65,10 +65,82 @@ u32 mlx4_bitmap_alloc(struct mlx4_bitmap *bitmap) void mlx4_bitmap_free(struct mlx4_bitmap *bitmap, u32 obj) { + mlx4_bitmap_free_range(bitmap, obj, 1); +} + +static unsigned long find_aligned_range(unsigned long *bitmap, + u32 start, u32 nbits, + int len, int align) +{ + unsigned long end, i; + +again: + start = ALIGN(start, align); + + while ((start < nbits) && test_bit(start, bitmap)) + start += align; + + if (start >= nbits) + return -1; + + end = start+len; + if (end > nbits) + return -1; + + for (i = start + 1; i < end; i++) { + if (test_bit(i, bitmap)) { + start = i + 1; + goto again; + } + } + + return start; +} + +u32 mlx4_bitmap_alloc_range(struct mlx4_bitmap *bitmap, int cnt, int align) +{ + u32 obj, i; + + if (likely(cnt == 1 && align == 1)) + return mlx4_bitmap_alloc(bitmap); + + spin_lock(&bitmap->lock); + + obj = find_aligned_range(bitmap->table, bitmap->last, + bitmap->max, cnt, align); + if (obj >= bitmap->max) { + bitmap->top = (bitmap->top + bitmap->max) & bitmap->mask; + obj = find_aligned_range(bitmap->table, 0, + bitmap->max, + cnt, align); + } + + if (obj < bitmap->max) { + for (i = 0; i < cnt; i++) + set_bit(obj + i, bitmap->table); + if (obj == bitmap->last) { + bitmap->last = (obj + cnt); + if (bitmap->last >= bitmap->max) + bitmap->last = 0; + } + obj |= bitmap->top; + } else + obj = -1; + + spin_unlock(&bitmap->lock); + + return obj; +} + +void mlx4_bitmap_free_range(struct mlx4_bitmap *bitmap, u32 obj, int cnt) +{ + u32 i; + obj &= bitmap->max - 1; spin_lock(&bitmap->lock); - clear_bit(obj, bitmap->table); + for (i = 0; i < cnt; i++) + clear_bit(obj + i, bitmap->table); bitmap->last = min(bitmap->last, obj); bitmap->top = (bitmap->top + bitmap->max) & bitmap->mask; spin_unlock(&bitmap->lock); diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h index 5337e3ac3e7..b55ddab73f6 100644 --- a/drivers/net/mlx4/mlx4.h +++ b/drivers/net/mlx4/mlx4.h @@ -288,6 +288,8 @@ static inline struct mlx4_priv *mlx4_priv(struct mlx4_dev *dev) u32 mlx4_bitmap_alloc(struct mlx4_bitmap *bitmap); void mlx4_bitmap_free(struct mlx4_bitmap *bitmap, u32 obj); +u32 mlx4_bitmap_alloc_range(struct mlx4_bitmap *bitmap, int cnt, int align); +void mlx4_bitmap_free_range(struct mlx4_bitmap *bitmap, u32 obj, int cnt); int mlx4_bitmap_init(struct mlx4_bitmap *bitmap, u32 num, u32 mask, u32 reserved); void mlx4_bitmap_cleanup(struct mlx4_bitmap *bitmap); diff --git a/drivers/net/mlx4/qp.c b/drivers/net/mlx4/qp.c index c49a86044bf..98e0c40ba36 100644 --- a/drivers/net/mlx4/qp.c +++ b/drivers/net/mlx4/qp.c @@ -147,19 +147,42 @@ int mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt, } EXPORT_SYMBOL_GPL(mlx4_qp_modify); -int mlx4_qp_alloc(struct mlx4_dev *dev, int sqpn, struct mlx4_qp *qp) +int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, int *base) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_qp_table *qp_table = &priv->qp_table; + int qpn; + + qpn = mlx4_bitmap_alloc_range(&qp_table->bitmap, cnt, align); + if (qpn == -1) + return -ENOMEM; + + *base = qpn; + return 0; +} +EXPORT_SYMBOL_GPL(mlx4_qp_reserve_range); + +void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_qp_table *qp_table = &priv->qp_table; + if (base_qpn < dev->caps.sqp_start + 8) + return; + + mlx4_bitmap_free_range(&qp_table->bitmap, base_qpn, cnt); +} +EXPORT_SYMBOL_GPL(mlx4_qp_release_range); + +int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_qp_table *qp_table = &priv->qp_table; int err; - if (sqpn) - qp->qpn = sqpn; - else { - qp->qpn = mlx4_bitmap_alloc(&qp_table->bitmap); - if (qp->qpn == -1) - return -ENOMEM; - } + if (!qpn) + return -EINVAL; + + qp->qpn = qpn; err = mlx4_table_get(dev, &qp_table->qp_table, qp->qpn); if (err) @@ -208,9 +231,6 @@ err_put_qp: mlx4_table_put(dev, &qp_table->qp_table, qp->qpn); err_out: - if (!sqpn) - mlx4_bitmap_free(&qp_table->bitmap, qp->qpn); - return err; } EXPORT_SYMBOL_GPL(mlx4_qp_alloc); @@ -239,9 +259,6 @@ void mlx4_qp_free(struct mlx4_dev *dev, struct mlx4_qp *qp) mlx4_table_put(dev, &qp_table->altc_table, qp->qpn); mlx4_table_put(dev, &qp_table->auxc_table, qp->qpn); mlx4_table_put(dev, &qp_table->qp_table, qp->qpn); - - if (qp->qpn >= dev->caps.sqp_start + 8) - mlx4_bitmap_free(&qp_table->bitmap, qp->qpn); } EXPORT_SYMBOL_GPL(mlx4_qp_free); diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index b2f94446831..d21e879f3c9 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -400,7 +400,10 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt, int collapsed); void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq); -int mlx4_qp_alloc(struct mlx4_dev *dev, int sqpn, struct mlx4_qp *qp); +int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, int *base); +void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt); + +int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp); void mlx4_qp_free(struct mlx4_dev *dev, struct mlx4_qp *qp); int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, struct mlx4_mtt *mtt, -- cgit v1.2.3-70-g09d2 From c865d2f6eb160c15b74245b4891c8e945d67d96c Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 19 Aug 2008 16:53:26 -0600 Subject: PNP: convert the last few pnp_info() uses to printk() There are only a few remaining uses of pnp_info(), so I just converted them to printk and removed the pnp_err(), pnp_info(), pnp_warn(), and pnp_dbg() wrappers. I also removed a couple debug messages that don't seem useful any more ("driver registered", "driver unregistered", "driver attached"). Signed-off-by: Bjorn Helgaas Signed-off-by: Andi Kleen Signed-off-by: Len Brown --- drivers/pnp/driver.c | 4 ---- drivers/pnp/pnpacpi/core.c | 6 +++--- include/linux/pnp.h | 10 ---------- 3 files changed, 3 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/drivers/pnp/driver.c b/drivers/pnp/driver.c index d3f869ee1d9..54673a4f590 100644 --- a/drivers/pnp/driver.c +++ b/drivers/pnp/driver.c @@ -114,7 +114,6 @@ static int pnp_device_probe(struct device *dev) } else goto fail; - dev_dbg(dev, "driver attached\n"); return error; fail: @@ -210,8 +209,6 @@ struct bus_type pnp_bus_type = { int pnp_register_driver(struct pnp_driver *drv) { - pnp_dbg("the driver '%s' has been registered", drv->name); - drv->driver.name = drv->name; drv->driver.bus = &pnp_bus_type; @@ -221,7 +218,6 @@ int pnp_register_driver(struct pnp_driver *drv) void pnp_unregister_driver(struct pnp_driver *drv) { driver_unregister(&drv->driver); - pnp_dbg("the driver '%s' has been unregistered", drv->name); } /** diff --git a/drivers/pnp/pnpacpi/core.c b/drivers/pnp/pnpacpi/core.c index c1b9ea34977..67c651bcaf7 100644 --- a/drivers/pnp/pnpacpi/core.c +++ b/drivers/pnp/pnpacpi/core.c @@ -255,14 +255,14 @@ int pnpacpi_disabled __initdata; static int __init pnpacpi_init(void) { if (acpi_disabled || pnpacpi_disabled) { - pnp_info("PnP ACPI: disabled"); + printk(KERN_INFO "pnp: PnP ACPI: disabled\n"); return 0; } - pnp_info("PnP ACPI init"); + printk(KERN_INFO "pnp: PnP ACPI init\n"); pnp_register_protocol(&pnpacpi_protocol); register_acpi_bus_type(&acpi_pnp_bus); acpi_get_devices(NULL, pnpacpi_add_device_handler, NULL, NULL); - pnp_info("PnP ACPI: found %d devices", num); + printk(KERN_INFO "pnp: PnP ACPI: found %d devices\n", num); unregister_acpi_bus_type(&acpi_pnp_bus); pnp_platform_devices = 1; return 0; diff --git a/include/linux/pnp.h b/include/linux/pnp.h index be764e514e3..05daecec16c 100644 --- a/include/linux/pnp.h +++ b/include/linux/pnp.h @@ -483,14 +483,4 @@ static inline void pnp_unregister_driver(struct pnp_driver *drv) { } #endif /* CONFIG_PNP */ -#define pnp_err(format, arg...) printk(KERN_ERR "pnp: " format "\n" , ## arg) -#define pnp_info(format, arg...) printk(KERN_INFO "pnp: " format "\n" , ## arg) -#define pnp_warn(format, arg...) printk(KERN_WARNING "pnp: " format "\n" , ## arg) - -#ifdef CONFIG_PNP_DEBUG -#define pnp_dbg(format, arg...) printk(KERN_DEBUG "pnp: " format "\n" , ## arg) -#else -#define pnp_dbg(format, arg...) do {} while (0) -#endif - #endif /* _LINUX_PNP_H */ -- cgit v1.2.3-70-g09d2 From fa89b6089b5f4c7a5244b642caaca3e72b06ebe4 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 24 Sep 2008 19:04:32 -0700 Subject: ACPI: remove unused have_arch_parse_srat This was a workaround for 32bit numa SRAT processing, and we removed those workarounds, making 32 bit more like 64 bit. HAVE_ARCH_PARSE_SRAT is no longer defined anywhere. Signed-off-by: Yinghai Lu Signed-off-by: Andrew Morton Signed-off-by: Len Brown --- include/linux/acpi.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 702f79dad16..fd6a452b0ce 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -94,18 +94,10 @@ int acpi_parse_mcfg (struct acpi_table_header *header); void acpi_table_print_madt_entry (struct acpi_subtable_header *madt); /* the following four functions are architecture-dependent */ -#ifdef CONFIG_HAVE_ARCH_PARSE_SRAT -#define NR_NODE_MEMBLKS MAX_NUMNODES -#define acpi_numa_slit_init(slit) do {} while (0) -#define acpi_numa_processor_affinity_init(pa) do {} while (0) -#define acpi_numa_memory_affinity_init(ma) do {} while (0) -#define acpi_numa_arch_fixup() do {} while (0) -#else void acpi_numa_slit_init (struct acpi_table_slit *slit); void acpi_numa_processor_affinity_init (struct acpi_srat_cpu_affinity *pa); void acpi_numa_memory_affinity_init (struct acpi_srat_mem_affinity *ma); void acpi_numa_arch_fixup(void); -#endif #ifdef CONFIG_ACPI_HOTPLUG_CPU /* Arch dependent functions for cpu hotplug support */ -- cgit v1.2.3-70-g09d2 From 40b8606253552109815786e5d4b0de98782d31f5 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Wed, 15 Oct 2008 14:20:28 +1100 Subject: DECLARE_PER_CPU needs linux/percpu.h Signed-off-by: Stephen Rothwell --- include/linux/hrtimer.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 508ce20b8f9..1e6f731381d 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -20,6 +20,8 @@ #include #include #include +#include + struct hrtimer_clock_base; struct hrtimer_cpu_base; -- cgit v1.2.3-70-g09d2 From 3481f21097cb560392c411377893b5109fbde557 Mon Sep 17 00:00:00 2001 From: Youquan Song Date: Thu, 16 Oct 2008 16:31:55 -0700 Subject: dmar: context cache and IOTLB invalidation using queued invalidation Implement context cache invalidate and IOTLB invalidation using queued invalidation interface. This interface will be used by DMA remapping, when queued invalidation is supported. Signed-off-by: Youquan Song Signed-off-by: Suresh Siddha Signed-off-by: David Woodhouse --- drivers/pci/dmar.c | 56 +++++++++++++++++++++++++++++++++++++++++++++ include/linux/intel-iommu.h | 21 +++++++++++++++++ 2 files changed, 77 insertions(+) (limited to 'include/linux') diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index b64cec19054..0f409e23631 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -645,6 +645,62 @@ void qi_global_iec(struct intel_iommu *iommu) qi_submit_sync(&desc, iommu); } +int qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm, + u64 type, int non_present_entry_flush) +{ + + struct qi_desc desc; + + if (non_present_entry_flush) { + if (!cap_caching_mode(iommu->cap)) + return 1; + else + did = 0; + } + + desc.low = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did) + | QI_CC_GRAN(type) | QI_CC_TYPE; + desc.high = 0; + + qi_submit_sync(&desc, iommu); + + return 0; + +} + +int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, + unsigned int size_order, u64 type, + int non_present_entry_flush) +{ + u8 dw = 0, dr = 0; + + struct qi_desc desc; + int ih = 0; + + if (non_present_entry_flush) { + if (!cap_caching_mode(iommu->cap)) + return 1; + else + did = 0; + } + + if (cap_write_drain(iommu->cap)) + dw = 1; + + if (cap_read_drain(iommu->cap)) + dr = 1; + + desc.low = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw) + | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE; + desc.high = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih) + | QI_IOTLB_AM(size_order); + + qi_submit_sync(&desc, iommu); + + return 0; + +} + /* * Enable Queued Invalidation interface. This is a must to support * interrupt-remapping. Also used by DMA-remapping, which replaces diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 2e117f30a76..0c5f5e49107 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -127,6 +127,7 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) /* IOTLB_REG */ +#define DMA_TLB_FLUSH_GRANU_OFFSET 60 #define DMA_TLB_GLOBAL_FLUSH (((u64)1) << 60) #define DMA_TLB_DSI_FLUSH (((u64)2) << 60) #define DMA_TLB_PSI_FLUSH (((u64)3) << 60) @@ -140,6 +141,7 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) #define DMA_TLB_MAX_SIZE (0x3f) /* INVALID_DESC */ +#define DMA_CCMD_INVL_GRANU_OFFSET 61 #define DMA_ID_TLB_GLOBAL_FLUSH (((u64)1) << 3) #define DMA_ID_TLB_DSI_FLUSH (((u64)2) << 3) #define DMA_ID_TLB_PSI_FLUSH (((u64)3) << 3) @@ -238,6 +240,19 @@ enum { #define QI_IWD_STATUS_DATA(d) (((u64)d) << 32) #define QI_IWD_STATUS_WRITE (((u64)1) << 5) +#define QI_IOTLB_DID(did) (((u64)did) << 16) +#define QI_IOTLB_DR(dr) (((u64)dr) << 7) +#define QI_IOTLB_DW(dw) (((u64)dw) << 6) +#define QI_IOTLB_GRAN(gran) (((u64)gran) >> (DMA_TLB_FLUSH_GRANU_OFFSET-4)) +#define QI_IOTLB_ADDR(addr) (((u64)addr) & PAGE_MASK_4K) +#define QI_IOTLB_IH(ih) (((u64)ih) << 6) +#define QI_IOTLB_AM(am) (((u8)am)) + +#define QI_CC_FM(fm) (((u64)fm) << 48) +#define QI_CC_SID(sid) (((u64)sid) << 32) +#define QI_CC_DID(did) (((u64)did) << 16) +#define QI_CC_GRAN(gran) (((u64)gran) >> (DMA_CCMD_INVL_GRANU_OFFSET-4)) + struct qi_desc { u64 low, high; }; @@ -303,6 +318,12 @@ extern void free_iommu(struct intel_iommu *iommu); extern int dmar_enable_qi(struct intel_iommu *iommu); extern void qi_global_iec(struct intel_iommu *iommu); +extern int qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, + u8 fm, u64 type, int non_present_entry_flush); +extern int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, + unsigned int size_order, u64 type, + int non_present_entry_flush); + extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu); void intel_iommu_domain_exit(struct dmar_domain *domain); -- cgit v1.2.3-70-g09d2 From a77b67d4023770805141014b8fa9eb5467457817 Mon Sep 17 00:00:00 2001 From: Youquan Song Date: Thu, 16 Oct 2008 16:31:56 -0700 Subject: dmar: Use queued invalidation interface for IOTLB and context invalidation If queued invalidation interface is available and enabled, queued invalidation interface will be used instead of the register based interface. According to Vt-d2 specification, when queued invalidation is enabled, invalidation command submit works only through invalidation queue and not through the command registers interface. Signed-off-by: Youquan Song Signed-off-by: Suresh Siddha Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 95 +++++++++++++++++++++------------------------ include/linux/intel-iommu.h | 8 ++++ 2 files changed, 53 insertions(+), 50 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index fc5f2dbf532..50947041913 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -567,27 +567,6 @@ static int __iommu_flush_context(struct intel_iommu *iommu, return 0; } -static int inline iommu_flush_context_global(struct intel_iommu *iommu, - int non_present_entry_flush) -{ - return __iommu_flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL, - non_present_entry_flush); -} - -static int inline iommu_flush_context_domain(struct intel_iommu *iommu, u16 did, - int non_present_entry_flush) -{ - return __iommu_flush_context(iommu, did, 0, 0, DMA_CCMD_DOMAIN_INVL, - non_present_entry_flush); -} - -static int inline iommu_flush_context_device(struct intel_iommu *iommu, - u16 did, u16 source_id, u8 function_mask, int non_present_entry_flush) -{ - return __iommu_flush_context(iommu, did, source_id, function_mask, - DMA_CCMD_DEVICE_INVL, non_present_entry_flush); -} - /* return value determine if we need a write buffer flush */ static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, unsigned int size_order, u64 type, @@ -660,20 +639,6 @@ static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, return 0; } -static int inline iommu_flush_iotlb_global(struct intel_iommu *iommu, - int non_present_entry_flush) -{ - return __iommu_flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH, - non_present_entry_flush); -} - -static int inline iommu_flush_iotlb_dsi(struct intel_iommu *iommu, u16 did, - int non_present_entry_flush) -{ - return __iommu_flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH, - non_present_entry_flush); -} - static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did, u64 addr, unsigned int pages, int non_present_entry_flush) { @@ -684,8 +649,9 @@ static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did, /* Fallback to domain selective flush if no PSI support */ if (!cap_pgsel_inv(iommu->cap)) - return iommu_flush_iotlb_dsi(iommu, did, - non_present_entry_flush); + return iommu->flush.flush_iotlb(iommu, did, 0, 0, + DMA_TLB_DSI_FLUSH, + non_present_entry_flush); /* * PSI requires page size to be 2 ^ x, and the base address is naturally @@ -694,11 +660,12 @@ static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did, mask = ilog2(__roundup_pow_of_two(pages)); /* Fallback to domain selective flush if size is too big */ if (mask > cap_max_amask_val(iommu->cap)) - return iommu_flush_iotlb_dsi(iommu, did, - non_present_entry_flush); + return iommu->flush.flush_iotlb(iommu, did, 0, 0, + DMA_TLB_DSI_FLUSH, non_present_entry_flush); - return __iommu_flush_iotlb(iommu, did, addr, mask, - DMA_TLB_PSI_FLUSH, non_present_entry_flush); + return iommu->flush.flush_iotlb(iommu, did, addr, mask, + DMA_TLB_PSI_FLUSH, + non_present_entry_flush); } static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu) @@ -1204,11 +1171,13 @@ static int domain_context_mapping_one(struct dmar_domain *domain, __iommu_flush_cache(iommu, context, sizeof(*context)); /* it's a non-present to present mapping */ - if (iommu_flush_context_device(iommu, domain->id, - (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT, 1)) + if (iommu->flush.flush_context(iommu, domain->id, + (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT, + DMA_CCMD_DEVICE_INVL, 1)) iommu_flush_write_buffer(iommu); else - iommu_flush_iotlb_dsi(iommu, 0, 0); + iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH, 0); + spin_unlock_irqrestore(&iommu->lock, flags); return 0; } @@ -1310,8 +1279,10 @@ domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova, static void detach_domain_for_dev(struct dmar_domain *domain, u8 bus, u8 devfn) { clear_context_table(domain->iommu, bus, devfn); - iommu_flush_context_global(domain->iommu, 0); - iommu_flush_iotlb_global(domain->iommu, 0); + domain->iommu->flush.flush_context(domain->iommu, 0, 0, 0, + DMA_CCMD_GLOBAL_INVL, 0); + domain->iommu->flush.flush_iotlb(domain->iommu, 0, 0, 0, + DMA_TLB_GLOBAL_FLUSH, 0); } static void domain_remove_dev_info(struct dmar_domain *domain) @@ -1662,6 +1633,28 @@ int __init init_dmars(void) } } + for_each_drhd_unit(drhd) { + if (drhd->ignored) + continue; + + iommu = drhd->iommu; + if (dmar_enable_qi(iommu)) { + /* + * Queued Invalidate not enabled, use Register Based + * Invalidate + */ + iommu->flush.flush_context = __iommu_flush_context; + iommu->flush.flush_iotlb = __iommu_flush_iotlb; + printk(KERN_INFO "IOMMU 0x%Lx: using Register based " + "invalidation\n", drhd->reg_base_addr); + } else { + iommu->flush.flush_context = qi_flush_context; + iommu->flush.flush_iotlb = qi_flush_iotlb; + printk(KERN_INFO "IOMMU 0x%Lx: using Queued " + "invalidation\n", drhd->reg_base_addr); + } + } + /* * For each rmrr * for each dev attached to rmrr @@ -1714,9 +1707,10 @@ int __init init_dmars(void) iommu_set_root_entry(iommu); - iommu_flush_context_global(iommu, 0); - iommu_flush_iotlb_global(iommu, 0); - + iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL, + 0); + iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH, + 0); iommu_disable_protect_mem_regions(iommu); ret = iommu_enable_translation(iommu); @@ -1891,7 +1885,8 @@ static void flush_unmaps(void) struct intel_iommu *iommu = deferred_flush[i].domain[0]->iommu; - iommu_flush_iotlb_global(iommu, 0); + iommu->flush.flush_iotlb(iommu, 0, 0, 0, + DMA_TLB_GLOBAL_FLUSH, 0); for (j = 0; j < deferred_flush[i].next; j++) { __free_iova(&deferred_flush[i].domain[j]->iovad, deferred_flush[i].iova[j]); diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 0c5f5e49107..afb0d2a5b7c 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -278,6 +278,13 @@ struct ir_table { }; #endif +struct iommu_flush { + int (*flush_context)(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm, + u64 type, int non_present_entry_flush); + int (*flush_iotlb)(struct intel_iommu *iommu, u16 did, u64 addr, + unsigned int size_order, u64 type, int non_present_entry_flush); +}; + struct intel_iommu { void __iomem *reg; /* Pointer to hardware regs, virtual addr */ u64 cap; @@ -297,6 +304,7 @@ struct intel_iommu { unsigned char name[7]; /* Device Name */ struct msi_msg saved_msg; struct sys_device sysdev; + struct iommu_flush flush; #endif struct q_inval *qi; /* Queued invalidation info */ #ifdef CONFIG_INTR_REMAP -- cgit v1.2.3-70-g09d2 From e62b4853983d032dcb3cde9fb20407dc556f47bc Mon Sep 17 00:00:00 2001 From: David Miller Date: Thu, 16 Oct 2008 21:14:11 -0700 Subject: sched: kill unused scheduler decl. I noticed this while making investigations into the tbench regressions. Please apply. sched: Remove hrtick_resched() extern decl. This function was removed by 31656519e132f6612584815f128c83976a9aaaef ("sched, x86: clean up hrtick implementation"). Signed-off-by: David S. Miller Acked-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index c226c7b8294..6eda6ad735d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -287,7 +287,6 @@ extern void trap_init(void); extern void account_process_tick(struct task_struct *task, int user); extern void update_process_times(int user); extern void scheduler_tick(void); -extern void hrtick_resched(void); extern void sched_show_task(struct task_struct *p); -- cgit v1.2.3-70-g09d2 From 5b6985ce8ec7127b4d60ad450b64ca8b82748a3b Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Thu, 16 Oct 2008 18:02:32 -0700 Subject: intel-iommu: IA64 support The current Intel IOMMU code assumes that both host page size and Intel IOMMU page size are 4KiB. The first patch supports variable page size. This provides support for IA64 which has multiple page sizes. This patch also adds some other code hooks for IA64 platform including DMAR_OPERATION_TIMEOUT definition. [dwmw2: some cleanup] Signed-off-by: Fenghua Yu Signed-off-by: Tony Luck Signed-off-by: David Woodhouse --- arch/x86/kernel/pci-dma.c | 16 ------ drivers/pci/dmar.c | 19 ++++--- drivers/pci/intel-iommu.c | 128 ++++++++++++++++++++++-------------------- drivers/pci/quirks.c | 14 +++++ include/asm-x86/iommu.h | 4 ++ include/linux/dma_remapping.h | 27 +++++---- include/linux/intel-iommu.h | 39 +++++++------ 7 files changed, 131 insertions(+), 116 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 19262482021..1972266e8ba 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -9,8 +9,6 @@ #include #include -static int forbid_dac __read_mostly; - struct dma_mapping_ops *dma_ops; EXPORT_SYMBOL(dma_ops); @@ -293,17 +291,3 @@ void pci_iommu_shutdown(void) } /* Must execute after PCI subsystem */ fs_initcall(pci_iommu_init); - -#ifdef CONFIG_PCI -/* Many VIA bridges seem to corrupt data for DAC. Disable it here */ - -static __devinit void via_no_dac(struct pci_dev *dev) -{ - if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) { - printk(KERN_INFO "PCI: VIA PCI bridge detected." - "Disabling DAC.\n"); - forbid_dac = 1; - } -} -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, via_no_dac); -#endif diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 44d6c7081b8..b65173828bc 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -277,14 +277,15 @@ dmar_table_print_dmar_entry(struct acpi_dmar_header *header) drhd = (struct acpi_dmar_hardware_unit *)header; printk (KERN_INFO PREFIX "DRHD (flags: 0x%08x)base: 0x%016Lx\n", - drhd->flags, drhd->address); + drhd->flags, (unsigned long long)drhd->address); break; case ACPI_DMAR_TYPE_RESERVED_MEMORY: rmrr = (struct acpi_dmar_reserved_memory *)header; printk (KERN_INFO PREFIX "RMRR base: 0x%016Lx end: 0x%016Lx\n", - rmrr->base_address, rmrr->end_address); + (unsigned long long)rmrr->base_address, + (unsigned long long)rmrr->end_address); break; } } @@ -304,7 +305,7 @@ parse_dmar_table(void) if (!dmar) return -ENODEV; - if (dmar->width < PAGE_SHIFT_4K - 1) { + if (dmar->width < PAGE_SHIFT - 1) { printk(KERN_WARNING PREFIX "Invalid DMAR haw\n"); return -EINVAL; } @@ -493,7 +494,7 @@ int alloc_iommu(struct dmar_drhd_unit *drhd) iommu->seq_id = iommu_allocated++; - iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K); + iommu->reg = ioremap(drhd->reg_base_addr, VTD_PAGE_SIZE); if (!iommu->reg) { printk(KERN_ERR "IOMMU: can't map the region\n"); goto error; @@ -504,8 +505,8 @@ int alloc_iommu(struct dmar_drhd_unit *drhd) /* the registers might be more than one page */ map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap), cap_max_fault_reg_offset(iommu->cap)); - map_size = PAGE_ALIGN_4K(map_size); - if (map_size > PAGE_SIZE_4K) { + map_size = VTD_PAGE_ALIGN(map_size); + if (map_size > VTD_PAGE_SIZE) { iounmap(iommu->reg); iommu->reg = ioremap(drhd->reg_base_addr, map_size); if (!iommu->reg) { @@ -516,8 +517,10 @@ int alloc_iommu(struct dmar_drhd_unit *drhd) ver = readl(iommu->reg + DMAR_VER_REG); pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n", - drhd->reg_base_addr, DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver), - iommu->cap, iommu->ecap); + (unsigned long long)drhd->reg_base_addr, + DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver), + (unsigned long long)iommu->cap, + (unsigned long long)iommu->ecap); spin_lock_init(&iommu->register_lock); diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 50947041913..2bf96babbc4 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -18,6 +18,7 @@ * Author: Ashok Raj * Author: Shaohua Li * Author: Anil S Keshavamurthy + * Author: Fenghua Yu */ #include @@ -35,11 +36,13 @@ #include #include #include -#include /* force_iommu in this header in x86-64*/ #include #include #include "pci.h" +#define ROOT_SIZE VTD_PAGE_SIZE +#define CONTEXT_SIZE VTD_PAGE_SIZE + #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY) #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) @@ -199,7 +202,7 @@ static struct context_entry * device_to_context_entry(struct intel_iommu *iommu, spin_unlock_irqrestore(&iommu->lock, flags); return NULL; } - __iommu_flush_cache(iommu, (void *)context, PAGE_SIZE_4K); + __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE); phy_addr = virt_to_phys((void *)context); set_root_value(root, phy_addr); set_root_present(root); @@ -345,7 +348,7 @@ static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr) return NULL; } __iommu_flush_cache(domain->iommu, tmp_page, - PAGE_SIZE_4K); + PAGE_SIZE); dma_set_pte_addr(*pte, virt_to_phys(tmp_page)); /* * high level table always sets r/w, last level page @@ -408,13 +411,13 @@ static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end) start &= (((u64)1) << addr_width) - 1; end &= (((u64)1) << addr_width) - 1; /* in case it's partial page */ - start = PAGE_ALIGN_4K(start); - end &= PAGE_MASK_4K; + start = PAGE_ALIGN(start); + end &= PAGE_MASK; /* we don't need lock here, nobody else touches the iova range */ while (start < end) { dma_pte_clear_one(domain, start); - start += PAGE_SIZE_4K; + start += VTD_PAGE_SIZE; } } @@ -468,7 +471,7 @@ static int iommu_alloc_root_entry(struct intel_iommu *iommu) if (!root) return -ENOMEM; - __iommu_flush_cache(iommu, root, PAGE_SIZE_4K); + __iommu_flush_cache(iommu, root, ROOT_SIZE); spin_lock_irqsave(&iommu->lock, flags); iommu->root_entry = root; @@ -634,7 +637,8 @@ static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, printk(KERN_ERR"IOMMU: flush IOTLB failed\n"); if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type)) pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n", - DMA_TLB_IIRG(type), DMA_TLB_IAIG(val)); + (unsigned long long)DMA_TLB_IIRG(type), + (unsigned long long)DMA_TLB_IAIG(val)); /* flush context entry will implictly flush write buffer */ return 0; } @@ -644,7 +648,7 @@ static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did, { unsigned int mask; - BUG_ON(addr & (~PAGE_MASK_4K)); + BUG_ON(addr & (~VTD_PAGE_MASK)); BUG_ON(pages == 0); /* Fallback to domain selective flush if no PSI support */ @@ -798,7 +802,7 @@ void dmar_msi_read(int irq, struct msi_msg *msg) } static int iommu_page_fault_do_one(struct intel_iommu *iommu, int type, - u8 fault_reason, u16 source_id, u64 addr) + u8 fault_reason, u16 source_id, unsigned long long addr) { const char *reason; @@ -1051,9 +1055,9 @@ static void dmar_init_reserved_ranges(void) if (!r->flags || !(r->flags & IORESOURCE_MEM)) continue; addr = r->start; - addr &= PAGE_MASK_4K; + addr &= PAGE_MASK; size = r->end - addr; - size = PAGE_ALIGN_4K(size); + size = PAGE_ALIGN(size); iova = reserve_iova(&reserved_iova_list, IOVA_PFN(addr), IOVA_PFN(size + addr) - 1); if (!iova) @@ -1115,7 +1119,7 @@ static int domain_init(struct dmar_domain *domain, int guest_width) domain->pgd = (struct dma_pte *)alloc_pgtable_page(); if (!domain->pgd) return -ENOMEM; - __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE_4K); + __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE); return 0; } @@ -1131,7 +1135,7 @@ static void domain_exit(struct dmar_domain *domain) /* destroy iovas */ put_iova_domain(&domain->iovad); end = DOMAIN_MAX_ADDR(domain->gaw); - end = end & (~PAGE_MASK_4K); + end = end & (~PAGE_MASK); /* clear ptes */ dma_pte_clear_range(domain, 0, end); @@ -1252,22 +1256,25 @@ domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova, u64 start_pfn, end_pfn; struct dma_pte *pte; int index; + int addr_width = agaw_to_width(domain->agaw); + + hpa &= (((u64)1) << addr_width) - 1; if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0) return -EINVAL; - iova &= PAGE_MASK_4K; - start_pfn = ((u64)hpa) >> PAGE_SHIFT_4K; - end_pfn = (PAGE_ALIGN_4K(((u64)hpa) + size)) >> PAGE_SHIFT_4K; + iova &= PAGE_MASK; + start_pfn = ((u64)hpa) >> VTD_PAGE_SHIFT; + end_pfn = (VTD_PAGE_ALIGN(((u64)hpa) + size)) >> VTD_PAGE_SHIFT; index = 0; while (start_pfn < end_pfn) { - pte = addr_to_dma_pte(domain, iova + PAGE_SIZE_4K * index); + pte = addr_to_dma_pte(domain, iova + VTD_PAGE_SIZE * index); if (!pte) return -ENOMEM; /* We don't need lock here, nobody else * touches the iova range */ BUG_ON(dma_pte_addr(*pte)); - dma_set_pte_addr(*pte, start_pfn << PAGE_SHIFT_4K); + dma_set_pte_addr(*pte, start_pfn << VTD_PAGE_SHIFT); dma_set_pte_prot(*pte, prot); __iommu_flush_cache(domain->iommu, pte, sizeof(*pte)); start_pfn++; @@ -1445,11 +1452,13 @@ error: return find_domain(pdev); } -static int iommu_prepare_identity_map(struct pci_dev *pdev, u64 start, u64 end) +static int iommu_prepare_identity_map(struct pci_dev *pdev, + unsigned long long start, + unsigned long long end) { struct dmar_domain *domain; unsigned long size; - u64 base; + unsigned long long base; int ret; printk(KERN_INFO @@ -1461,9 +1470,9 @@ static int iommu_prepare_identity_map(struct pci_dev *pdev, u64 start, u64 end) return -ENOMEM; /* The address might not be aligned */ - base = start & PAGE_MASK_4K; + base = start & PAGE_MASK; size = end - base; - size = PAGE_ALIGN_4K(size); + size = PAGE_ALIGN(size); if (!reserve_iova(&domain->iovad, IOVA_PFN(base), IOVA_PFN(base + size) - 1)) { printk(KERN_ERR "IOMMU: reserve iova failed\n"); @@ -1732,8 +1741,8 @@ error: static inline u64 aligned_size(u64 host_addr, size_t size) { u64 addr; - addr = (host_addr & (~PAGE_MASK_4K)) + size; - return PAGE_ALIGN_4K(addr); + addr = (host_addr & (~PAGE_MASK)) + size; + return PAGE_ALIGN(addr); } struct iova * @@ -1747,7 +1756,7 @@ iommu_alloc_iova(struct dmar_domain *domain, size_t size, u64 end) return NULL; piova = alloc_iova(&domain->iovad, - size >> PAGE_SHIFT_4K, IOVA_PFN(end), 1); + size >> PAGE_SHIFT, IOVA_PFN(end), 1); return piova; } @@ -1807,12 +1816,12 @@ get_valid_domain_for_dev(struct pci_dev *pdev) return domain; } -static dma_addr_t +dma_addr_t intel_map_single(struct device *hwdev, phys_addr_t paddr, size_t size, int dir) { struct pci_dev *pdev = to_pci_dev(hwdev); struct dmar_domain *domain; - unsigned long start_paddr; + phys_addr_t start_paddr; struct iova *iova; int prot = 0; int ret; @@ -1831,7 +1840,7 @@ intel_map_single(struct device *hwdev, phys_addr_t paddr, size_t size, int dir) if (!iova) goto error; - start_paddr = iova->pfn_lo << PAGE_SHIFT_4K; + start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT; /* * Check if DMAR supports zero-length reads on write only @@ -1849,27 +1858,23 @@ intel_map_single(struct device *hwdev, phys_addr_t paddr, size_t size, int dir) * is not a big problem */ ret = domain_page_mapping(domain, start_paddr, - ((u64)paddr) & PAGE_MASK_4K, size, prot); + ((u64)paddr) & PAGE_MASK, size, prot); if (ret) goto error; - pr_debug("Device %s request: %lx@%llx mapping: %lx@%llx, dir %d\n", - pci_name(pdev), size, (u64)paddr, - size, (u64)start_paddr, dir); - /* it's a non-present to present mapping */ ret = iommu_flush_iotlb_psi(domain->iommu, domain->id, - start_paddr, size >> PAGE_SHIFT_4K, 1); + start_paddr, size >> VTD_PAGE_SHIFT, 1); if (ret) iommu_flush_write_buffer(domain->iommu); - return (start_paddr + ((u64)paddr & (~PAGE_MASK_4K))); + return start_paddr + ((u64)paddr & (~PAGE_MASK)); error: if (iova) __free_iova(&domain->iovad, iova); printk(KERN_ERR"Device %s request: %lx@%llx dir %d --- failed\n", - pci_name(pdev), size, (u64)paddr, dir); + pci_name(pdev), size, (unsigned long long)paddr, dir); return 0; } @@ -1931,8 +1936,8 @@ static void add_unmap(struct dmar_domain *dom, struct iova *iova) spin_unlock_irqrestore(&async_umap_flush_lock, flags); } -static void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, - size_t size, int dir) +void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size, + int dir) { struct pci_dev *pdev = to_pci_dev(dev); struct dmar_domain *domain; @@ -1948,11 +1953,11 @@ static void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, if (!iova) return; - start_addr = iova->pfn_lo << PAGE_SHIFT_4K; + start_addr = iova->pfn_lo << PAGE_SHIFT; size = aligned_size((u64)dev_addr, size); pr_debug("Device %s unmapping: %lx@%llx\n", - pci_name(pdev), size, (u64)start_addr); + pci_name(pdev), size, (unsigned long long)start_addr); /* clear the whole page */ dma_pte_clear_range(domain, start_addr, start_addr + size); @@ -1960,7 +1965,7 @@ static void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, dma_pte_free_pagetable(domain, start_addr, start_addr + size); if (intel_iommu_strict) { if (iommu_flush_iotlb_psi(domain->iommu, - domain->id, start_addr, size >> PAGE_SHIFT_4K, 0)) + domain->id, start_addr, size >> VTD_PAGE_SHIFT, 0)) iommu_flush_write_buffer(domain->iommu); /* free iova */ __free_iova(&domain->iovad, iova); @@ -1973,13 +1978,13 @@ static void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, } } -static void * intel_alloc_coherent(struct device *hwdev, size_t size, - dma_addr_t *dma_handle, gfp_t flags) +void *intel_alloc_coherent(struct device *hwdev, size_t size, + dma_addr_t *dma_handle, gfp_t flags) { void *vaddr; int order; - size = PAGE_ALIGN_4K(size); + size = PAGE_ALIGN(size); order = get_order(size); flags &= ~(GFP_DMA | GFP_DMA32); @@ -1995,12 +2000,12 @@ static void * intel_alloc_coherent(struct device *hwdev, size_t size, return NULL; } -static void intel_free_coherent(struct device *hwdev, size_t size, - void *vaddr, dma_addr_t dma_handle) +void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr, + dma_addr_t dma_handle) { int order; - size = PAGE_ALIGN_4K(size); + size = PAGE_ALIGN(size); order = get_order(size); intel_unmap_single(hwdev, dma_handle, size, DMA_BIDIRECTIONAL); @@ -2008,8 +2013,9 @@ static void intel_free_coherent(struct device *hwdev, size_t size, } #define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg))) -static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist, - int nelems, int dir) + +void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist, + int nelems, int dir) { int i; struct pci_dev *pdev = to_pci_dev(hwdev); @@ -2033,7 +2039,7 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist, size += aligned_size((u64)addr, sg->length); } - start_addr = iova->pfn_lo << PAGE_SHIFT_4K; + start_addr = iova->pfn_lo << PAGE_SHIFT; /* clear the whole page */ dma_pte_clear_range(domain, start_addr, start_addr + size); @@ -2041,7 +2047,7 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist, dma_pte_free_pagetable(domain, start_addr, start_addr + size); if (iommu_flush_iotlb_psi(domain->iommu, domain->id, start_addr, - size >> PAGE_SHIFT_4K, 0)) + size >> VTD_PAGE_SHIFT, 0)) iommu_flush_write_buffer(domain->iommu); /* free iova */ @@ -2062,8 +2068,8 @@ static int intel_nontranslate_map_sg(struct device *hddev, return nelems; } -static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, - int nelems, int dir) +int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems, + int dir) { void *addr; int i; @@ -2107,14 +2113,14 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) prot |= DMA_PTE_WRITE; - start_addr = iova->pfn_lo << PAGE_SHIFT_4K; + start_addr = iova->pfn_lo << PAGE_SHIFT; offset = 0; for_each_sg(sglist, sg, nelems, i) { addr = SG_ENT_VIRT_ADDRESS(sg); addr = (void *)virt_to_phys(addr); size = aligned_size((u64)addr, sg->length); ret = domain_page_mapping(domain, start_addr + offset, - ((u64)addr) & PAGE_MASK_4K, + ((u64)addr) & PAGE_MASK, size, prot); if (ret) { /* clear the page */ @@ -2128,14 +2134,14 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, return 0; } sg->dma_address = start_addr + offset + - ((u64)addr & (~PAGE_MASK_4K)); + ((u64)addr & (~PAGE_MASK)); sg->dma_length = sg->length; offset += size; } /* it's a non-present to present mapping */ if (iommu_flush_iotlb_psi(domain->iommu, domain->id, - start_addr, offset >> PAGE_SHIFT_4K, 1)) + start_addr, offset >> VTD_PAGE_SHIFT, 1)) iommu_flush_write_buffer(domain->iommu); return nelems; } @@ -2175,7 +2181,6 @@ static inline int iommu_devinfo_cache_init(void) sizeof(struct device_domain_info), 0, SLAB_HWCACHE_ALIGN, - NULL); if (!iommu_devinfo_cache) { printk(KERN_ERR "Couldn't create devinfo cache\n"); @@ -2193,7 +2198,6 @@ static inline int iommu_iova_cache_init(void) sizeof(struct iova), 0, SLAB_HWCACHE_ALIGN, - NULL); if (!iommu_iova_cache) { printk(KERN_ERR "Couldn't create iova cache\n"); @@ -2322,7 +2326,7 @@ void intel_iommu_domain_exit(struct dmar_domain *domain) return; end = DOMAIN_MAX_ADDR(domain->gaw); - end = end & (~PAGE_MASK_4K); + end = end & (~VTD_PAGE_MASK); /* clear ptes */ dma_pte_clear_range(domain, 0, end); @@ -2418,6 +2422,6 @@ u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova) if (pte) pfn = dma_pte_addr(*pte); - return pfn >> PAGE_SHIFT_4K; + return pfn >> VTD_PAGE_SHIFT; } EXPORT_SYMBOL_GPL(intel_iommu_iova_to_pfn); diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index e872ac925b4..832175d9ca2 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -35,6 +35,20 @@ static void __devinit quirk_mellanox_tavor(struct pci_dev *dev) DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_MELLANOX,PCI_DEVICE_ID_MELLANOX_TAVOR,quirk_mellanox_tavor); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_MELLANOX,PCI_DEVICE_ID_MELLANOX_TAVOR_BRIDGE,quirk_mellanox_tavor); +/* Many VIA bridges seem to corrupt data for DAC. Disable it here */ +int forbid_dac __read_mostly; +EXPORT_SYMBOL(forbid_dac); + +static __devinit void via_no_dac(struct pci_dev *dev) +{ + if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) { + dev_info(&dev->dev, + "VIA PCI bridge detected. Disabling DAC.\n"); + forbid_dac = 1; + } +} +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, via_no_dac); + /* Deal with broken BIOS'es that neglect to enable passive release, which can cause problems in combination with the 82441FX/PPro MTRRs */ static void quirk_passive_release(struct pci_dev *dev) diff --git a/include/asm-x86/iommu.h b/include/asm-x86/iommu.h index 961e746da97..2daaffcda52 100644 --- a/include/asm-x86/iommu.h +++ b/include/asm-x86/iommu.h @@ -7,9 +7,13 @@ extern struct dma_mapping_ops nommu_dma_ops; extern int force_iommu, no_iommu; extern int iommu_detected; extern int dmar_disabled; +extern int forbid_dac; extern unsigned long iommu_nr_pages(unsigned long addr, unsigned long len); +/* 10 seconds */ +#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) + #ifdef CONFIG_GART_IOMMU extern int gart_iommu_aperture; extern int gart_iommu_aperture_allowed; diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h index bff5c65f81d..952df39c989 100644 --- a/include/linux/dma_remapping.h +++ b/include/linux/dma_remapping.h @@ -2,15 +2,14 @@ #define _DMA_REMAPPING_H /* - * We need a fixed PAGE_SIZE of 4K irrespective of - * arch PAGE_SIZE for IOMMU page tables. + * VT-d hardware uses 4KiB page size regardless of host page size. */ -#define PAGE_SHIFT_4K (12) -#define PAGE_SIZE_4K (1UL << PAGE_SHIFT_4K) -#define PAGE_MASK_4K (((u64)-1) << PAGE_SHIFT_4K) -#define PAGE_ALIGN_4K(addr) (((addr) + PAGE_SIZE_4K - 1) & PAGE_MASK_4K) +#define VTD_PAGE_SHIFT (12) +#define VTD_PAGE_SIZE (1UL << VTD_PAGE_SHIFT) +#define VTD_PAGE_MASK (((u64)-1) << VTD_PAGE_SHIFT) +#define VTD_PAGE_ALIGN(addr) (((addr) + VTD_PAGE_SIZE - 1) & VTD_PAGE_MASK) -#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT_4K) +#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) #define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK) #define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK) @@ -25,7 +24,7 @@ struct root_entry { u64 val; u64 rsvd1; }; -#define ROOT_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct root_entry)) +#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry)) static inline bool root_present(struct root_entry *root) { return (root->val & 1); @@ -36,7 +35,7 @@ static inline void set_root_present(struct root_entry *root) } static inline void set_root_value(struct root_entry *root, unsigned long value) { - root->val |= value & PAGE_MASK_4K; + root->val |= value & VTD_PAGE_MASK; } struct context_entry; @@ -45,7 +44,7 @@ get_context_addr_from_root(struct root_entry *root) { return (struct context_entry *) (root_present(root)?phys_to_virt( - root->val & PAGE_MASK_4K): + root->val & VTD_PAGE_MASK) : NULL); } @@ -67,7 +66,7 @@ struct context_entry { #define context_present(c) ((c).lo & 1) #define context_fault_disable(c) (((c).lo >> 1) & 1) #define context_translation_type(c) (((c).lo >> 2) & 3) -#define context_address_root(c) ((c).lo & PAGE_MASK_4K) +#define context_address_root(c) ((c).lo & VTD_PAGE_MASK) #define context_address_width(c) ((c).hi & 7) #define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1)) @@ -81,7 +80,7 @@ struct context_entry { } while (0) #define CONTEXT_TT_MULTI_LEVEL 0 #define context_set_address_root(c, val) \ - do {(c).lo |= (val) & PAGE_MASK_4K;} while (0) + do {(c).lo |= (val) & VTD_PAGE_MASK; } while (0) #define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0) #define context_set_domain_id(c, val) \ do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0) @@ -107,9 +106,9 @@ struct dma_pte { #define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0) #define dma_set_pte_prot(p, prot) \ do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0) -#define dma_pte_addr(p) ((p).val & PAGE_MASK_4K) +#define dma_pte_addr(p) ((p).val & VTD_PAGE_MASK) #define dma_set_pte_addr(p, addr) do {\ - (p).val |= ((addr) & PAGE_MASK_4K); } while (0) + (p).val |= ((addr) & VTD_PAGE_MASK); } while (0) #define dma_pte_present(p) (((p).val & 3) != 0) struct intel_iommu; diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index afb0d2a5b7c..3d017cfd245 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -29,6 +29,7 @@ #include #include #include +#include /* * Intel IOMMU register specification per version 1.0 public spec. @@ -202,22 +203,21 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) #define dma_frcd_type(d) ((d >> 30) & 1) #define dma_frcd_fault_reason(c) (c & 0xff) #define dma_frcd_source_id(c) (c & 0xffff) -#define dma_frcd_page_addr(d) (d & (((u64)-1) << 12)) /* low 64 bit */ - -#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) /* 10sec */ - -#define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \ -{\ - cycles_t start_time = get_cycles();\ - while (1) {\ - sts = op (iommu->reg + offset);\ - if (cond)\ - break;\ +/* low 64 bit */ +#define dma_frcd_page_addr(d) (d & (((u64)-1) << PAGE_SHIFT)) + +#define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \ +do { \ + cycles_t start_time = get_cycles(); \ + while (1) { \ + sts = op(iommu->reg + offset); \ + if (cond) \ + break; \ if (DMAR_OPERATION_TIMEOUT < (get_cycles() - start_time))\ - panic("DMAR hardware is malfunctioning\n");\ - cpu_relax();\ - }\ -} + panic("DMAR hardware is malfunctioning\n"); \ + cpu_relax(); \ + } \ +} while (0) #define QI_LENGTH 256 /* queue length */ @@ -244,7 +244,7 @@ enum { #define QI_IOTLB_DR(dr) (((u64)dr) << 7) #define QI_IOTLB_DW(dw) (((u64)dw) << 6) #define QI_IOTLB_GRAN(gran) (((u64)gran) >> (DMA_TLB_FLUSH_GRANU_OFFSET-4)) -#define QI_IOTLB_ADDR(addr) (((u64)addr) & PAGE_MASK_4K) +#define QI_IOTLB_ADDR(addr) (((u64)addr) & VTD_PAGE_MASK) #define QI_IOTLB_IH(ih) (((u64)ih) << 6) #define QI_IOTLB_AM(am) (((u8)am)) @@ -353,4 +353,11 @@ static inline int intel_iommu_found(void) } #endif /* CONFIG_DMAR */ +extern void *intel_alloc_coherent(struct device *, size_t, dma_addr_t *, gfp_t); +extern void intel_free_coherent(struct device *, size_t, void *, dma_addr_t); +extern dma_addr_t intel_map_single(struct device *, phys_addr_t, size_t, int); +extern void intel_unmap_single(struct device *, dma_addr_t, size_t, int); +extern int intel_map_sg(struct device *, struct scatterlist *, int, int); +extern void intel_unmap_sg(struct device *, struct scatterlist *, int, int); + #endif -- cgit v1.2.3-70-g09d2 From ffda12a17a324103e9900fa1035309811eecbfe5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 17 Oct 2008 19:27:02 +0200 Subject: sched: optimize group load balancer I noticed that tg_shares_up() unconditionally takes rq-locks for all cpus in the sched_domain. This hurts. We need the rq-locks whenever we change the weight of the per-cpu group sched entities. To allevate this a little, only change the weight when the new weight is at least shares_thresh away from the old value. This avoids the rq-lock for the top level entries, since those will never be re-weighted, and fuzzes the lower level entries a little to gain performance in semi-stable situations. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 + kernel/sched.c | 45 +++++++++++++++++++++++++-------------------- kernel/sysctl.c | 10 ++++++++++ 3 files changed, 36 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 6eda6ad735d..4f59c8e8597 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1621,6 +1621,7 @@ extern unsigned int sysctl_sched_features; extern unsigned int sysctl_sched_migration_cost; extern unsigned int sysctl_sched_nr_migrate; extern unsigned int sysctl_sched_shares_ratelimit; +extern unsigned int sysctl_sched_shares_thresh; int sched_nr_latency_handler(struct ctl_table *table, int write, struct file *file, void __user *buffer, size_t *length, diff --git a/kernel/sched.c b/kernel/sched.c index c530b84c7f8..11ca3901783 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -817,6 +817,13 @@ const_debug unsigned int sysctl_sched_nr_migrate = 32; */ unsigned int sysctl_sched_shares_ratelimit = 250000; +/* + * Inject some fuzzyness into changing the per-cpu group shares + * this avoids remote rq-locks at the expense of fairness. + * default: 4 + */ +unsigned int sysctl_sched_shares_thresh = 4; + /* * period over which we measure -rt task cpu usage in us. * default: 1s @@ -1453,8 +1460,8 @@ static void __set_se_shares(struct sched_entity *se, unsigned long shares); * Calculate and set the cpu's group shares. */ static void -__update_group_shares_cpu(struct task_group *tg, int cpu, - unsigned long sd_shares, unsigned long sd_rq_weight) +update_group_shares_cpu(struct task_group *tg, int cpu, + unsigned long sd_shares, unsigned long sd_rq_weight) { int boost = 0; unsigned long shares; @@ -1485,19 +1492,23 @@ __update_group_shares_cpu(struct task_group *tg, int cpu, * */ shares = (sd_shares * rq_weight) / (sd_rq_weight + 1); + shares = clamp_t(unsigned long, shares, MIN_SHARES, MAX_SHARES); - /* - * record the actual number of shares, not the boosted amount. - */ - tg->cfs_rq[cpu]->shares = boost ? 0 : shares; - tg->cfs_rq[cpu]->rq_weight = rq_weight; + if (abs(shares - tg->se[cpu]->load.weight) > + sysctl_sched_shares_thresh) { + struct rq *rq = cpu_rq(cpu); + unsigned long flags; - if (shares < MIN_SHARES) - shares = MIN_SHARES; - else if (shares > MAX_SHARES) - shares = MAX_SHARES; + spin_lock_irqsave(&rq->lock, flags); + /* + * record the actual number of shares, not the boosted amount. + */ + tg->cfs_rq[cpu]->shares = boost ? 0 : shares; + tg->cfs_rq[cpu]->rq_weight = rq_weight; - __set_se_shares(tg->se[cpu], shares); + __set_se_shares(tg->se[cpu], shares); + spin_unlock_irqrestore(&rq->lock, flags); + } } /* @@ -1526,14 +1537,8 @@ static int tg_shares_up(struct task_group *tg, void *data) if (!rq_weight) rq_weight = cpus_weight(sd->span) * NICE_0_LOAD; - for_each_cpu_mask(i, sd->span) { - struct rq *rq = cpu_rq(i); - unsigned long flags; - - spin_lock_irqsave(&rq->lock, flags); - __update_group_shares_cpu(tg, i, shares, rq_weight); - spin_unlock_irqrestore(&rq->lock, flags); - } + for_each_cpu_mask(i, sd->span) + update_group_shares_cpu(tg, i, shares, rq_weight); return 0; } diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 617d41e4d6a..3d804f41e64 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -274,6 +274,16 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "sched_shares_thresh", + .data = &sysctl_sched_shares_thresh, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &zero, + }, { .ctl_name = CTL_UNNUMBERED, .procname = "sched_child_runs_first", -- cgit v1.2.3-70-g09d2 From 592aa999d6a272856c9bfbdaac0cfba1bb37c24c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 20 Oct 2008 16:38:19 +0200 Subject: hrtimers: add missing docbook comments to struct hrtimer Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index cb25c1cc235..58bca8e9bae 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -103,9 +103,14 @@ enum hrtimer_cb_mode { /** * struct hrtimer - the basic hrtimer structure * @node: red black tree node for time ordered insertion - * @expires: the absolute expiry time in the hrtimers internal + * @_expires: the absolute expiry time in the hrtimers internal * representation. The time is related to the clock on - * which the timer is based. + * which the timer is based. Is setup by adding + * slack to the _softexpires value. For non range timers + * identical to _softexpires. + * @_softexpires: the absolute earliest expiry time of the hrtimer. + * The time which was given as expiry time when the timer + * was armed. * @function: timer expiry callback function * @base: pointer to the timer base (per cpu and per clock) * @state: state information (See bit values above) -- cgit v1.2.3-70-g09d2 From 326bb8a5a12c6298a6bf6c74af490b1858b2f12c Mon Sep 17 00:00:00 2001 From: Trent Piepho Date: Mon, 13 Oct 2008 10:13:01 +0100 Subject: leds: Make default trigger fields const The default_trigger fields of struct gpio_led and thus struct led_classdev are pretty much always assigned from a string literal, which means the string can't be modified. Which is fine, since there is no reason to modify the string and in fact it never is. But they should be marked const to prevent such code from being added, to prevent warnings if -Wwrite-strings is used, when assigned from a constant string other than a string literal (which produces a warning under current kernel compiler flags), and for general good coding practices. Signed-off-by: Trent Piepho Signed-off-by: Richard Purdie --- include/linux/leds.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/leds.h b/include/linux/leds.h index d41ccb56146..d3a73f5a48c 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -123,7 +123,7 @@ extern void ledtrig_ide_activity(void); */ struct led_info { const char *name; - char *default_trigger; + const char *default_trigger; int flags; }; @@ -135,7 +135,7 @@ struct led_platform_data { /* For the leds-gpio driver */ struct gpio_led { const char *name; - char *default_trigger; + const char *default_trigger; unsigned gpio; u8 active_low; }; -- cgit v1.2.3-70-g09d2 From a5598ca0d49821912a5053c05f07fd650671eb6d Mon Sep 17 00:00:00 2001 From: Carl Love Date: Tue, 14 Oct 2008 23:37:01 +0000 Subject: powerpc/oprofile: Fix mutex locking for cell spu-oprofile The issue is the SPU code is not holding the kernel mutex lock while adding samples to the kernel buffer. This patch creates per SPU buffers to hold the data. Data is added to the buffers from in interrupt context. The data is periodically pushed to the kernel buffer via a new Oprofile function oprofile_put_buff(). The oprofile_put_buff() function is called via a work queue enabling the funtion to acquire the mutex lock. The existing user controls for adjusting the per CPU buffer size is used to control the size of the per SPU buffers. Similarly, overflows of the SPU buffers are reported by incrementing the per CPU buffer stats. This eliminates the need to have architecture specific controls for the per SPU buffers which is not acceptable to the OProfile user tool maintainer. The export of the oprofile add_event_entry() is removed as it is no longer needed given this patch. Note, this patch has not addressed the issue of indexing arrays by the spu number. This still needs to be fixed as the spu numbering is not guarenteed to be 0 to max_num_spus-1. Signed-off-by: Carl Love Signed-off-by: Maynard Johnson Signed-off-by: Arnd Bergmann Acked-by: Acked-by: Robert Richter Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/oprofile/cell/pr_util.h | 13 ++ arch/powerpc/oprofile/cell/spu_profiler.c | 4 +- arch/powerpc/oprofile/cell/spu_task_sync.c | 236 +++++++++++++++++++++++++---- drivers/oprofile/buffer_sync.c | 24 +++ drivers/oprofile/cpu_buffer.c | 15 +- drivers/oprofile/event_buffer.h | 7 + include/linux/oprofile.h | 16 +- 7 files changed, 279 insertions(+), 36 deletions(-) (limited to 'include/linux') diff --git a/arch/powerpc/oprofile/cell/pr_util.h b/arch/powerpc/oprofile/cell/pr_util.h index 22e4e8d4eb2..628009c0195 100644 --- a/arch/powerpc/oprofile/cell/pr_util.h +++ b/arch/powerpc/oprofile/cell/pr_util.h @@ -24,6 +24,11 @@ #define SKIP_GENERIC_SYNC 0 #define SYNC_START_ERROR -1 #define DO_GENERIC_SYNC 1 +#define SPUS_PER_NODE 8 +#define DEFAULT_TIMER_EXPIRE (HZ / 10) + +extern struct delayed_work spu_work; +extern int spu_prof_running; struct spu_overlay_info { /* map of sections within an SPU overlay */ unsigned int vma; /* SPU virtual memory address from elf */ @@ -62,6 +67,14 @@ struct vma_to_fileoffset_map { /* map of sections within an SPU program */ }; +struct spu_buffer { + int last_guard_val; + int ctx_sw_seen; + unsigned long *buff; + unsigned int head, tail; +}; + + /* The three functions below are for maintaining and accessing * the vma-to-fileoffset map. */ diff --git a/arch/powerpc/oprofile/cell/spu_profiler.c b/arch/powerpc/oprofile/cell/spu_profiler.c index 380d7e21753..6edaebd5099 100644 --- a/arch/powerpc/oprofile/cell/spu_profiler.c +++ b/arch/powerpc/oprofile/cell/spu_profiler.c @@ -23,12 +23,11 @@ static u32 *samples; -static int spu_prof_running; +int spu_prof_running; static unsigned int profiling_interval; #define NUM_SPU_BITS_TRBUF 16 #define SPUS_PER_TB_ENTRY 4 -#define SPUS_PER_NODE 8 #define SPU_PC_MASK 0xFFFF @@ -208,6 +207,7 @@ int start_spu_profiling(unsigned int cycles_reset) spu_prof_running = 1; hrtimer_start(&timer, kt, HRTIMER_MODE_REL); + schedule_delayed_work(&spu_work, DEFAULT_TIMER_EXPIRE); return 0; } diff --git a/arch/powerpc/oprofile/cell/spu_task_sync.c b/arch/powerpc/oprofile/cell/spu_task_sync.c index 2a9b4a04932..2949126d28d 100644 --- a/arch/powerpc/oprofile/cell/spu_task_sync.c +++ b/arch/powerpc/oprofile/cell/spu_task_sync.c @@ -35,7 +35,102 @@ static DEFINE_SPINLOCK(buffer_lock); static DEFINE_SPINLOCK(cache_lock); static int num_spu_nodes; int spu_prof_num_nodes; -int last_guard_val[MAX_NUMNODES * 8]; + +struct spu_buffer spu_buff[MAX_NUMNODES * SPUS_PER_NODE]; +struct delayed_work spu_work; +static unsigned max_spu_buff; + +static void spu_buff_add(unsigned long int value, int spu) +{ + /* spu buff is a circular buffer. Add entries to the + * head. Head is the index to store the next value. + * The buffer is full when there is one available entry + * in the queue, i.e. head and tail can't be equal. + * That way we can tell the difference between the + * buffer being full versus empty. + * + * ASSUPTION: the buffer_lock is held when this function + * is called to lock the buffer, head and tail. + */ + int full = 1; + + if (spu_buff[spu].head >= spu_buff[spu].tail) { + if ((spu_buff[spu].head - spu_buff[spu].tail) + < (max_spu_buff - 1)) + full = 0; + + } else if (spu_buff[spu].tail > spu_buff[spu].head) { + if ((spu_buff[spu].tail - spu_buff[spu].head) + > 1) + full = 0; + } + + if (!full) { + spu_buff[spu].buff[spu_buff[spu].head] = value; + spu_buff[spu].head++; + + if (spu_buff[spu].head >= max_spu_buff) + spu_buff[spu].head = 0; + } else { + /* From the user's perspective make the SPU buffer + * size management/overflow look like we are using + * per cpu buffers. The user uses the same + * per cpu parameter to adjust the SPU buffer size. + * Increment the sample_lost_overflow to inform + * the user the buffer size needs to be increased. + */ + oprofile_cpu_buffer_inc_smpl_lost(); + } +} + +/* This function copies the per SPU buffers to the + * OProfile kernel buffer. + */ +void sync_spu_buff(void) +{ + int spu; + unsigned long flags; + int curr_head; + + for (spu = 0; spu < num_spu_nodes; spu++) { + /* In case there was an issue and the buffer didn't + * get created skip it. + */ + if (spu_buff[spu].buff == NULL) + continue; + + /* Hold the lock to make sure the head/tail + * doesn't change while spu_buff_add() is + * deciding if the buffer is full or not. + * Being a little paranoid. + */ + spin_lock_irqsave(&buffer_lock, flags); + curr_head = spu_buff[spu].head; + spin_unlock_irqrestore(&buffer_lock, flags); + + /* Transfer the current contents to the kernel buffer. + * data can still be added to the head of the buffer. + */ + oprofile_put_buff(spu_buff[spu].buff, + spu_buff[spu].tail, + curr_head, max_spu_buff); + + spin_lock_irqsave(&buffer_lock, flags); + spu_buff[spu].tail = curr_head; + spin_unlock_irqrestore(&buffer_lock, flags); + } + +} + +static void wq_sync_spu_buff(struct work_struct *work) +{ + /* move data from spu buffers to kernel buffer */ + sync_spu_buff(); + + /* only reschedule if profiling is not done */ + if (spu_prof_running) + schedule_delayed_work(&spu_work, DEFAULT_TIMER_EXPIRE); +} /* Container for caching information about an active SPU task. */ struct cached_info { @@ -305,14 +400,21 @@ static int process_context_switch(struct spu *spu, unsigned long objectId) /* Record context info in event buffer */ spin_lock_irqsave(&buffer_lock, flags); - add_event_entry(ESCAPE_CODE); - add_event_entry(SPU_CTX_SWITCH_CODE); - add_event_entry(spu->number); - add_event_entry(spu->pid); - add_event_entry(spu->tgid); - add_event_entry(app_dcookie); - add_event_entry(spu_cookie); - add_event_entry(offset); + spu_buff_add(ESCAPE_CODE, spu->number); + spu_buff_add(SPU_CTX_SWITCH_CODE, spu->number); + spu_buff_add(spu->number, spu->number); + spu_buff_add(spu->pid, spu->number); + spu_buff_add(spu->tgid, spu->number); + spu_buff_add(app_dcookie, spu->number); + spu_buff_add(spu_cookie, spu->number); + spu_buff_add(offset, spu->number); + + /* Set flag to indicate SPU PC data can now be written out. If + * the SPU program counter data is seen before an SPU context + * record is seen, the postprocessing will fail. + */ + spu_buff[spu->number].ctx_sw_seen = 1; + spin_unlock_irqrestore(&buffer_lock, flags); smp_wmb(); /* insure spu event buffer updates are written */ /* don't want entries intermingled... */ @@ -360,6 +462,47 @@ static int number_of_online_nodes(void) return nodes; } +static int oprofile_spu_buff_create(void) +{ + int spu; + + max_spu_buff = oprofile_get_cpu_buffer_size(); + + for (spu = 0; spu < num_spu_nodes; spu++) { + /* create circular buffers to store the data in. + * use locks to manage accessing the buffers + */ + spu_buff[spu].head = 0; + spu_buff[spu].tail = 0; + + /* + * Create a buffer for each SPU. Can't reliably + * create a single buffer for all spus due to not + * enough contiguous kernel memory. + */ + + spu_buff[spu].buff = kzalloc((max_spu_buff + * sizeof(unsigned long)), + GFP_KERNEL); + + if (!spu_buff[spu].buff) { + printk(KERN_ERR "SPU_PROF: " + "%s, line %d: oprofile_spu_buff_create " + "failed to allocate spu buffer %d.\n", + __func__, __LINE__, spu); + + /* release the spu buffers that have been allocated */ + while (spu >= 0) { + kfree(spu_buff[spu].buff); + spu_buff[spu].buff = 0; + spu--; + } + return -ENOMEM; + } + } + return 0; +} + /* The main purpose of this function is to synchronize * OProfile with SPUFS by registering to be notified of * SPU task switches. @@ -372,20 +515,35 @@ static int number_of_online_nodes(void) */ int spu_sync_start(void) { - int k; + int spu; int ret = SKIP_GENERIC_SYNC; int register_ret; unsigned long flags = 0; spu_prof_num_nodes = number_of_online_nodes(); num_spu_nodes = spu_prof_num_nodes * 8; + INIT_DELAYED_WORK(&spu_work, wq_sync_spu_buff); + + /* create buffer for storing the SPU data to put in + * the kernel buffer. + */ + ret = oprofile_spu_buff_create(); + if (ret) + goto out; spin_lock_irqsave(&buffer_lock, flags); - add_event_entry(ESCAPE_CODE); - add_event_entry(SPU_PROFILING_CODE); - add_event_entry(num_spu_nodes); + for (spu = 0; spu < num_spu_nodes; spu++) { + spu_buff_add(ESCAPE_CODE, spu); + spu_buff_add(SPU_PROFILING_CODE, spu); + spu_buff_add(num_spu_nodes, spu); + } spin_unlock_irqrestore(&buffer_lock, flags); + for (spu = 0; spu < num_spu_nodes; spu++) { + spu_buff[spu].ctx_sw_seen = 0; + spu_buff[spu].last_guard_val = 0; + } + /* Register for SPU events */ register_ret = spu_switch_event_register(&spu_active); if (register_ret) { @@ -393,8 +551,6 @@ int spu_sync_start(void) goto out; } - for (k = 0; k < (MAX_NUMNODES * 8); k++) - last_guard_val[k] = 0; pr_debug("spu_sync_start -- running.\n"); out: return ret; @@ -446,13 +602,20 @@ void spu_sync_buffer(int spu_num, unsigned int *samples, * use. We need to discard samples taken during the time * period which an overlay occurs (i.e., guard value changes). */ - if (grd_val && grd_val != last_guard_val[spu_num]) { - last_guard_val[spu_num] = grd_val; + if (grd_val && grd_val != spu_buff[spu_num].last_guard_val) { + spu_buff[spu_num].last_guard_val = grd_val; /* Drop the rest of the samples. */ break; } - add_event_entry(file_offset | spu_num_shifted); + /* We must ensure that the SPU context switch has been written + * out before samples for the SPU. Otherwise, the SPU context + * information is not available and the postprocessing of the + * SPU PC will fail with no available anonymous map information. + */ + if (spu_buff[spu_num].ctx_sw_seen) + spu_buff_add((file_offset | spu_num_shifted), + spu_num); } spin_unlock(&buffer_lock); out: @@ -463,20 +626,41 @@ out: int spu_sync_stop(void) { unsigned long flags = 0; - int ret = spu_switch_event_unregister(&spu_active); - if (ret) { + int ret; + int k; + + ret = spu_switch_event_unregister(&spu_active); + + if (ret) printk(KERN_ERR "SPU_PROF: " - "%s, line %d: spu_switch_event_unregister returned %d\n", - __func__, __LINE__, ret); - goto out; - } + "%s, line %d: spu_switch_event_unregister " \ + "returned %d\n", + __func__, __LINE__, ret); + + /* flush any remaining data in the per SPU buffers */ + sync_spu_buff(); spin_lock_irqsave(&cache_lock, flags); ret = release_cached_info(RELEASE_ALL); spin_unlock_irqrestore(&cache_lock, flags); -out: + + /* remove scheduled work queue item rather then waiting + * for every queued entry to execute. Then flush pending + * system wide buffer to event buffer. + */ + cancel_delayed_work(&spu_work); + + for (k = 0; k < num_spu_nodes; k++) { + spu_buff[k].ctx_sw_seen = 0; + + /* + * spu_sys_buff will be null if there was a problem + * allocating the buffer. Only delete if it exists. + */ + kfree(spu_buff[k].buff); + spu_buff[k].buff = 0; + } pr_debug("spu_sync_stop -- done.\n"); return ret; } - diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c index ed982273fb8..37681700b61 100644 --- a/drivers/oprofile/buffer_sync.c +++ b/drivers/oprofile/buffer_sync.c @@ -628,3 +628,27 @@ void sync_buffer(int cpu) mutex_unlock(&buffer_mutex); } + +/* The function can be used to add a buffer worth of data directly to + * the kernel buffer. The buffer is assumed to be a circular buffer. + * Take the entries from index start and end at index end, wrapping + * at max_entries. + */ +void oprofile_put_buff(unsigned long *buf, unsigned int start, + unsigned int stop, unsigned int max) +{ + int i; + + i = start; + + mutex_lock(&buffer_mutex); + while (i != stop) { + add_event_entry(buf[i++]); + + if (i >= max) + i = 0; + } + + mutex_unlock(&buffer_mutex); +} + diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c index e1bd5a937f6..7ba39fe20a8 100644 --- a/drivers/oprofile/cpu_buffer.c +++ b/drivers/oprofile/cpu_buffer.c @@ -38,13 +38,26 @@ static int work_enabled; void free_cpu_buffers(void) { int i; - + for_each_online_cpu(i) { vfree(per_cpu(cpu_buffer, i).buffer); per_cpu(cpu_buffer, i).buffer = NULL; } } +unsigned long oprofile_get_cpu_buffer_size(void) +{ + return fs_cpu_buffer_size; +} + +void oprofile_cpu_buffer_inc_smpl_lost(void) +{ + struct oprofile_cpu_buffer *cpu_buf + = &__get_cpu_var(cpu_buffer); + + cpu_buf->sample_lost_overflow++; +} + int alloc_cpu_buffers(void) { int i; diff --git a/drivers/oprofile/event_buffer.h b/drivers/oprofile/event_buffer.h index 5076ed1ebd8..84bf324c577 100644 --- a/drivers/oprofile/event_buffer.h +++ b/drivers/oprofile/event_buffer.h @@ -17,6 +17,13 @@ int alloc_event_buffer(void); void free_event_buffer(void); +/** + * Add data to the event buffer. + * The data passed is free-form, but typically consists of + * file offsets, dcookies, context information, and ESCAPE codes. + */ +void add_event_entry(unsigned long data); + /* wake up the process sleeping on the event file */ void wake_up_buffer_waiter(void); diff --git a/include/linux/oprofile.h b/include/linux/oprofile.h index bcb8f725427..5231861f357 100644 --- a/include/linux/oprofile.h +++ b/include/linux/oprofile.h @@ -85,13 +85,6 @@ int oprofile_arch_init(struct oprofile_operations * ops); */ void oprofile_arch_exit(void); -/** - * Add data to the event buffer. - * The data passed is free-form, but typically consists of - * file offsets, dcookies, context information, and ESCAPE codes. - */ -void add_event_entry(unsigned long data); - /** * Add a sample. This may be called from any context. Pass * smp_processor_id() as cpu. @@ -162,5 +155,14 @@ int oprofilefs_ulong_from_user(unsigned long * val, char const __user * buf, siz /** lock for read/write safety */ extern spinlock_t oprofilefs_lock; + +/** + * Add the contents of a circular buffer to the event buffer. + */ +void oprofile_put_buff(unsigned long *buf, unsigned int start, + unsigned int stop, unsigned int max); + +unsigned long oprofile_get_cpu_buffer_size(void); +void oprofile_cpu_buffer_inc_smpl_lost(void); #endif /* OPROFILE_H */ -- cgit v1.2.3-70-g09d2 From aeb5d727062a0238a2f96c9c380fbd2be4640c6f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 2 Sep 2008 15:28:45 -0400 Subject: [PATCH] introduce fmode_t, do annotations Signed-off-by: Al Viro --- block/bsg.c | 7 ++++--- block/cmd-filter.c | 2 +- block/scsi_ioctl.c | 5 +++-- drivers/block/amiflop.c | 4 ++-- drivers/block/ataflop.c | 4 ++-- drivers/block/floppy.c | 4 ++-- drivers/block/paride/pf.c | 2 +- drivers/block/paride/pt.c | 2 +- drivers/block/pktcdvd.c | 2 +- drivers/block/swim3.c | 4 ++-- drivers/char/nvram.c | 6 +++--- drivers/ide/ide-floppy_ioctl.c | 2 +- drivers/ide/ide-gd.c | 2 +- drivers/md/dm-ioctl.c | 4 ++-- drivers/md/dm-table.c | 12 ++++++------ drivers/mtd/mtdchar.c | 10 +++++----- drivers/parisc/eisa_eeprom.c | 2 +- fs/block_dev.c | 10 +++++----- fs/fifo.c | 6 +++--- fs/file_table.c | 4 ++-- fs/hostfs/hostfs_kern.c | 5 +++-- fs/locks.c | 3 ++- fs/open.c | 2 +- fs/proc/base.c | 4 ++-- fs/reiserfs/journal.c | 2 +- include/linux/blkdev.h | 3 ++- include/linux/device-mapper.h | 8 ++++---- include/linux/file.h | 4 ++-- include/linux/fs.h | 16 ++++++++-------- include/linux/fsnotify.h | 2 +- include/linux/types.h | 1 + ipc/shm.c | 2 +- sound/core/oss/pcm_oss.c | 2 +- sound/oss/au1550_ac97.c | 2 +- sound/oss/dmasound/dmasound.h | 4 ++-- sound/oss/dmasound/dmasound_atari.c | 4 ++-- sound/oss/dmasound/dmasound_core.c | 10 +++++----- sound/oss/msnd.h | 2 +- sound/oss/sound_config.h | 20 ++++++-------------- sound/oss/swarm_cs4297a.c | 2 +- sound/oss/vwsnd.c | 2 +- 41 files changed, 96 insertions(+), 98 deletions(-) (limited to 'include/linux') diff --git a/block/bsg.c b/block/bsg.c index 034112bfe1f..2d36b127f38 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -173,7 +173,7 @@ unlock: static int blk_fill_sgv4_hdr_rq(struct request_queue *q, struct request *rq, struct sg_io_v4 *hdr, struct bsg_device *bd, - int has_write_perm) + fmode_t has_write_perm) { if (hdr->request_len > BLK_MAX_CDB) { rq->cmd = kzalloc(hdr->request_len, GFP_KERNEL); @@ -242,7 +242,7 @@ bsg_validate_sgv4_hdr(struct request_queue *q, struct sg_io_v4 *hdr, int *rw) * map sg_io_v4 to a request. */ static struct request * -bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, int has_write_perm) +bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm) { struct request_queue *q = bd->queue; struct request *rq, *next_rq = NULL; @@ -601,7 +601,8 @@ bsg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) } static int __bsg_write(struct bsg_device *bd, const char __user *buf, - size_t count, ssize_t *bytes_written, int has_write_perm) + size_t count, ssize_t *bytes_written, + fmode_t has_write_perm) { struct bsg_command *bc; struct request *rq; diff --git a/block/cmd-filter.c b/block/cmd-filter.c index e669aed4c6b..504b275e1b9 100644 --- a/block/cmd-filter.c +++ b/block/cmd-filter.c @@ -27,7 +27,7 @@ #include int blk_verify_command(struct blk_cmd_filter *filter, - unsigned char *cmd, int has_write_perm) + unsigned char *cmd, fmode_t has_write_perm) { /* root can do any command. */ if (capable(CAP_SYS_RAWIO)) diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index c34272a348f..c525905f9d3 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -384,7 +384,8 @@ int sg_scsi_ioctl(struct file *file, struct request_queue *q, struct gendisk *disk, struct scsi_ioctl_command __user *sic) { struct request *rq; - int err, write_perm = 0; + int err; + fmode_t write_perm = 0; unsigned int in_len, out_len, bytes, opcode, cmdlen; char *buffer = NULL, sense[SCSI_SENSE_BUFFERSIZE]; @@ -428,7 +429,7 @@ int sg_scsi_ioctl(struct file *file, struct request_queue *q, /* scsi_ioctl passes NULL */ if (file && (file->f_mode & FMODE_WRITE)) - write_perm = 1; + write_perm = FMODE_WRITE; err = blk_verify_command(&q->cmd_filter, rq->cmd, write_perm); if (err) diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c index 7516baff3bb..d19c5a939fe 100644 --- a/drivers/block/amiflop.c +++ b/drivers/block/amiflop.c @@ -1560,9 +1560,9 @@ static int floppy_open(struct inode *inode, struct file *filp) if (fd_ref[drive] && old_dev != system) return -EBUSY; - if (filp && filp->f_mode & 3) { + if (filp && filp->f_mode & (FMODE_READ|FMODE_WRITE)) { check_disk_change(inode->i_bdev); - if (filp->f_mode & 2 ) { + if (filp->f_mode & FMODE_WRITE ) { int wrprot; get_fdc(drive); diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index 432cf401829..e1db285b72c 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c @@ -1826,9 +1826,9 @@ static int floppy_open( struct inode *inode, struct file *filp ) if (filp->f_flags & O_NDELAY) return 0; - if (filp->f_mode & 3) { + if (filp->f_mode & (FMODE_READ|FMODE_WRITE)) { check_disk_change(inode->i_bdev); - if (filp->f_mode & 2) { + if (filp->f_mode & FMODE_WRITE) { if (p->wpstat) { if (p->ref < 0) p->ref = 0; diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 2cea27aba9a..ae3ef8945f3 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -3761,14 +3761,14 @@ static int floppy_open(struct inode *inode, struct file *filp) UFDCS->rawcmd = 2; if (!(filp->f_flags & O_NDELAY)) { - if (filp->f_mode & 3) { + if (filp->f_mode & (FMODE_READ|FMODE_WRITE)) { UDRS->last_checked = 0; check_disk_change(inode->i_bdev); if (UTESTF(FD_DISK_CHANGED)) goto out; } res = -EROFS; - if ((filp->f_mode & 2) && !(UTESTF(FD_DISK_WRITABLE))) + if ((filp->f_mode & FMODE_WRITE) && !(UTESTF(FD_DISK_WRITABLE))) goto out; } mutex_unlock(&open_lock); diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c index e7fe6ca97dd..a902d84fd33 100644 --- a/drivers/block/paride/pf.c +++ b/drivers/block/paride/pf.c @@ -305,7 +305,7 @@ static int pf_open(struct inode *inode, struct file *file) if (pf->media_status == PF_NM) return -ENODEV; - if ((pf->media_status == PF_RO) && (file->f_mode & 2)) + if ((pf->media_status == PF_RO) && (file->f_mode & FMODE_WRITE)) return -EROFS; pf->access++; diff --git a/drivers/block/paride/pt.c b/drivers/block/paride/pt.c index 5ae229656ea..1e4006e18f0 100644 --- a/drivers/block/paride/pt.c +++ b/drivers/block/paride/pt.c @@ -667,7 +667,7 @@ static int pt_open(struct inode *inode, struct file *file) goto out; err = -EROFS; - if ((!(tape->flags & PT_WRITE_OK)) && (file->f_mode & 2)) + if ((!(tape->flags & PT_WRITE_OK)) && (file->f_mode & FMODE_WRITE)) goto out; if (!(iminor(inode) & 128)) diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 195ca7c720f..4d581e8ba9f 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2320,7 +2320,7 @@ static int pkt_open_write(struct pktcdvd_device *pd) /* * called at open time. */ -static int pkt_open_dev(struct pktcdvd_device *pd, int write) +static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write) { int ret; long lba; diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c index 730ccea78e4..a53ca54bee1 100644 --- a/drivers/block/swim3.c +++ b/drivers/block/swim3.c @@ -908,13 +908,13 @@ static int floppy_open(struct inode *inode, struct file *filp) return -EBUSY; if (err == 0 && (filp->f_flags & O_NDELAY) == 0 - && (filp->f_mode & 3)) { + && (filp->f_mode & (FMODE_READ|FMODE_WRITE))) { check_disk_change(inode->i_bdev); if (fs->ejected) err = -ENXIO; } - if (err == 0 && (filp->f_mode & 2)) { + if (err == 0 && (filp->f_mode & FMODE_WRITE)) { if (fs->write_prot < 0) fs->write_prot = swim3_readbit(fs, WRITE_PROT); if (fs->write_prot) diff --git a/drivers/char/nvram.c b/drivers/char/nvram.c index 39f6357e3b5..8054ee839b3 100644 --- a/drivers/char/nvram.c +++ b/drivers/char/nvram.c @@ -338,7 +338,7 @@ nvram_open(struct inode *inode, struct file *file) if ((nvram_open_cnt && (file->f_flags & O_EXCL)) || (nvram_open_mode & NVRAM_EXCL) || - ((file->f_mode & 2) && (nvram_open_mode & NVRAM_WRITE))) { + ((file->f_mode & FMODE_WRITE) && (nvram_open_mode & NVRAM_WRITE))) { spin_unlock(&nvram_state_lock); unlock_kernel(); return -EBUSY; @@ -346,7 +346,7 @@ nvram_open(struct inode *inode, struct file *file) if (file->f_flags & O_EXCL) nvram_open_mode |= NVRAM_EXCL; - if (file->f_mode & 2) + if (file->f_mode & FMODE_WRITE) nvram_open_mode |= NVRAM_WRITE; nvram_open_cnt++; @@ -366,7 +366,7 @@ nvram_release(struct inode *inode, struct file *file) /* if only one instance is open, clear the EXCL bit */ if (nvram_open_mode & NVRAM_EXCL) nvram_open_mode &= ~NVRAM_EXCL; - if (file->f_mode & 2) + if (file->f_mode & FMODE_WRITE) nvram_open_mode &= ~NVRAM_WRITE; spin_unlock(&nvram_state_lock); diff --git a/drivers/ide/ide-floppy_ioctl.c b/drivers/ide/ide-floppy_ioctl.c index 409e4c15f9b..0d5f5054ab6 100644 --- a/drivers/ide/ide-floppy_ioctl.c +++ b/drivers/ide/ide-floppy_ioctl.c @@ -250,7 +250,7 @@ static int ide_floppy_format_ioctl(ide_drive_t *drive, struct file *file, case IDEFLOPPY_IOCTL_FORMAT_GET_CAPACITY: return ide_floppy_get_format_capacities(drive, argp); case IDEFLOPPY_IOCTL_FORMAT_START: - if (!(file->f_mode & 2)) + if (!(file->f_mode & FMODE_WRITE)) return -EPERM; return ide_floppy_format_unit(drive, (int __user *)argp); case IDEFLOPPY_IOCTL_FORMAT_GET_PROGRESS: diff --git a/drivers/ide/ide-gd.c b/drivers/ide/ide-gd.c index d44898f46c3..d367473098f 100644 --- a/drivers/ide/ide-gd.c +++ b/drivers/ide/ide-gd.c @@ -202,7 +202,7 @@ static int ide_gd_open(struct inode *inode, struct file *filp) goto out_put_idkp; } - if ((drive->dev_flags & IDE_DFLAG_WP) && (filp->f_mode & 2)) { + if ((drive->dev_flags & IDE_DFLAG_WP) && (filp->f_mode & FMODE_WRITE)) { ret = -EROFS; goto out_put_idkp; } diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index dca401dc70a..777c948180f 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -988,9 +988,9 @@ static int dev_wait(struct dm_ioctl *param, size_t param_size) return r; } -static inline int get_mode(struct dm_ioctl *param) +static inline fmode_t get_mode(struct dm_ioctl *param) { - int mode = FMODE_READ | FMODE_WRITE; + fmode_t mode = FMODE_READ | FMODE_WRITE; if (param->flags & DM_READONLY_FLAG) mode = FMODE_READ; diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index a740a6950f5..7c8671b06fe 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -43,7 +43,7 @@ struct dm_table { * device. This should be a combination of FMODE_READ * and FMODE_WRITE. */ - int mode; + fmode_t mode; /* a list of devices used by this table */ struct list_head devices; @@ -217,7 +217,7 @@ static int alloc_targets(struct dm_table *t, unsigned int num) return 0; } -int dm_table_create(struct dm_table **result, int mode, +int dm_table_create(struct dm_table **result, fmode_t mode, unsigned num_targets, struct mapped_device *md) { struct dm_table *t = kzalloc(sizeof(*t), GFP_KERNEL); @@ -395,7 +395,7 @@ static int check_device_area(struct dm_dev_internal *dd, sector_t start, * careful to leave things as they were if we fail to reopen the * device. */ -static int upgrade_mode(struct dm_dev_internal *dd, int new_mode, +static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode, struct mapped_device *md) { int r; @@ -421,7 +421,7 @@ static int upgrade_mode(struct dm_dev_internal *dd, int new_mode, */ static int __table_get_device(struct dm_table *t, struct dm_target *ti, const char *path, sector_t start, sector_t len, - int mode, struct dm_dev **result) + fmode_t mode, struct dm_dev **result) { int r; dev_t uninitialized_var(dev); @@ -537,7 +537,7 @@ void dm_set_device_limits(struct dm_target *ti, struct block_device *bdev) EXPORT_SYMBOL_GPL(dm_set_device_limits); int dm_get_device(struct dm_target *ti, const char *path, sector_t start, - sector_t len, int mode, struct dm_dev **result) + sector_t len, fmode_t mode, struct dm_dev **result) { int r = __table_get_device(ti->table, ti, path, start, len, mode, result); @@ -887,7 +887,7 @@ struct list_head *dm_table_get_devices(struct dm_table *t) return &t->devices; } -int dm_table_get_mode(struct dm_table *t) +fmode_t dm_table_get_mode(struct dm_table *t) { return t->mode; } diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c index 963840e9b5b..bcffeda2df3 100644 --- a/drivers/mtd/mtdchar.c +++ b/drivers/mtd/mtdchar.c @@ -96,7 +96,7 @@ static int mtd_open(struct inode *inode, struct file *file) return -ENODEV; /* You can't open the RO devices RW */ - if ((file->f_mode & 2) && (minor & 1)) + if ((file->f_mode & FMODE_WRITE) && (minor & 1)) return -EACCES; lock_kernel(); @@ -114,7 +114,7 @@ static int mtd_open(struct inode *inode, struct file *file) } /* You can't open it RW if it's not a writeable device */ - if ((file->f_mode & 2) && !(mtd->flags & MTD_WRITEABLE)) { + if ((file->f_mode & FMODE_WRITE) && !(mtd->flags & MTD_WRITEABLE)) { put_mtd_device(mtd); ret = -EACCES; goto out; @@ -144,7 +144,7 @@ static int mtd_close(struct inode *inode, struct file *file) DEBUG(MTD_DEBUG_LEVEL0, "MTD_close\n"); /* Only sync if opened RW */ - if ((file->f_mode & 2) && mtd->sync) + if ((file->f_mode & FMODE_WRITE) && mtd->sync) mtd->sync(mtd); put_mtd_device(mtd); @@ -443,7 +443,7 @@ static int mtd_ioctl(struct inode *inode, struct file *file, { struct erase_info *erase; - if(!(file->f_mode & 2)) + if(!(file->f_mode & FMODE_WRITE)) return -EPERM; erase=kzalloc(sizeof(struct erase_info),GFP_KERNEL); @@ -497,7 +497,7 @@ static int mtd_ioctl(struct inode *inode, struct file *file, struct mtd_oob_buf __user *user_buf = argp; uint32_t retlen; - if(!(file->f_mode & 2)) + if(!(file->f_mode & FMODE_WRITE)) return -EPERM; if (copy_from_user(&buf, argp, sizeof(struct mtd_oob_buf))) diff --git a/drivers/parisc/eisa_eeprom.c b/drivers/parisc/eisa_eeprom.c index 5ac207932fd..685d94e69d4 100644 --- a/drivers/parisc/eisa_eeprom.c +++ b/drivers/parisc/eisa_eeprom.c @@ -86,7 +86,7 @@ static int eisa_eeprom_open(struct inode *inode, struct file *file) { cycle_kernel_lock(); - if (file->f_mode & 2) + if (file->f_mode & FMODE_WRITE) return -EINVAL; return 0; diff --git a/fs/block_dev.c b/fs/block_dev.c index 218408eed1b..8897f3b02e9 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(bd_release_from_disk); * to be used for internal purposes. If you ever need it - reconsider * your API. */ -struct block_device *open_by_devnum(dev_t dev, unsigned mode) +struct block_device *open_by_devnum(dev_t dev, fmode_t mode) { struct block_device *bdev = bdget(dev); int err = -ENOMEM; @@ -975,7 +975,7 @@ void bd_set_size(struct block_device *bdev, loff_t size) } EXPORT_SYMBOL(bd_set_size); -static int __blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags, +static int __blkdev_get(struct block_device *bdev, fmode_t mode, unsigned flags, int for_part); static int __blkdev_put(struct block_device *bdev, int for_part); @@ -1104,7 +1104,7 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) return ret; } -static int __blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags, +static int __blkdev_get(struct block_device *bdev, fmode_t mode, unsigned flags, int for_part) { /* @@ -1123,7 +1123,7 @@ static int __blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags, return do_open(bdev, &fake_file, for_part); } -int blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags) +int blkdev_get(struct block_device *bdev, fmode_t mode, unsigned flags) { return __blkdev_get(bdev, mode, flags, 0); } @@ -1315,7 +1315,7 @@ EXPORT_SYMBOL(lookup_bdev); struct block_device *open_bdev_excl(const char *path, int flags, void *holder) { struct block_device *bdev; - mode_t mode = FMODE_READ; + fmode_t mode = FMODE_READ; int error = 0; bdev = lookup_bdev(path); diff --git a/fs/fifo.c b/fs/fifo.c index 987bf941149..f8f97b8b6d4 100644 --- a/fs/fifo.c +++ b/fs/fifo.c @@ -51,7 +51,7 @@ static int fifo_open(struct inode *inode, struct file *filp) filp->f_mode &= (FMODE_READ | FMODE_WRITE); switch (filp->f_mode) { - case 1: + case FMODE_READ: /* * O_RDONLY * POSIX.1 says that O_NONBLOCK means return with the FIFO @@ -76,7 +76,7 @@ static int fifo_open(struct inode *inode, struct file *filp) } break; - case 2: + case FMODE_WRITE: /* * O_WRONLY * POSIX.1 says that O_NONBLOCK means return -1 with @@ -98,7 +98,7 @@ static int fifo_open(struct inode *inode, struct file *filp) } break; - case 3: + case FMODE_READ | FMODE_WRITE: /* * O_RDWR * POSIX.1 leaves this case "undefined" when O_NONBLOCK is set. diff --git a/fs/file_table.c b/fs/file_table.c index f45a4493f9e..efc06faede6 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -161,7 +161,7 @@ EXPORT_SYMBOL(get_empty_filp); * code should be moved into this function. */ struct file *alloc_file(struct vfsmount *mnt, struct dentry *dentry, - mode_t mode, const struct file_operations *fop) + fmode_t mode, const struct file_operations *fop) { struct file *file; struct path; @@ -193,7 +193,7 @@ EXPORT_SYMBOL(alloc_file); * of this should be moving to alloc_file(). */ int init_file(struct file *file, struct vfsmount *mnt, struct dentry *dentry, - mode_t mode, const struct file_operations *fop) + fmode_t mode, const struct file_operations *fop) { int error = 0; file->f_path.dentry = dentry; diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index d6ecabf4d23..7f34f4385de 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -20,7 +20,7 @@ struct hostfs_inode_info { char *host_filename; int fd; - int mode; + fmode_t mode; struct inode vfs_inode; }; @@ -373,7 +373,8 @@ int hostfs_readdir(struct file *file, void *ent, filldir_t filldir) int hostfs_file_open(struct inode *ino, struct file *file) { char *name; - int mode = 0, r = 0, w = 0, fd; + fmode_t mode = 0; + int r = 0, w = 0, fd; mode = file->f_mode & (FMODE_READ | FMODE_WRITE); if ((mode & HOSTFS_I(ino)->mode) == mode) diff --git a/fs/locks.c b/fs/locks.c index 5eb259e3cd3..20457486d6b 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1580,7 +1580,8 @@ asmlinkage long sys_flock(unsigned int fd, unsigned int cmd) cmd &= ~LOCK_NB; unlock = (cmd == LOCK_UN); - if (!unlock && !(cmd & LOCK_MAND) && !(filp->f_mode & 3)) + if (!unlock && !(cmd & LOCK_MAND) && + !(filp->f_mode & (FMODE_READ|FMODE_WRITE))) goto out_putf; error = flock_make_lock(filp, &lock, cmd); diff --git a/fs/open.c b/fs/open.c index 5596049863b..83cdb9dee0c 100644 --- a/fs/open.c +++ b/fs/open.c @@ -798,7 +798,7 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, int error; f->f_flags = flags; - f->f_mode = ((flags+1) & O_ACCMODE) | FMODE_LSEEK | + f->f_mode = (__force fmode_t)((flags+1) & O_ACCMODE) | FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE; inode = dentry->d_inode; if (f->f_mode & FMODE_WRITE) { diff --git a/fs/proc/base.c b/fs/proc/base.c index b5918ae8ca7..486cf3fe713 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1712,9 +1712,9 @@ static struct dentry *proc_fd_instantiate(struct inode *dir, file = fcheck_files(files, fd); if (!file) goto out_unlock; - if (file->f_mode & 1) + if (file->f_mode & FMODE_READ) inode->i_mode |= S_IRUSR | S_IXUSR; - if (file->f_mode & 2) + if (file->f_mode & FMODE_WRITE) inode->i_mode |= S_IWUSR | S_IXUSR; spin_unlock(&files->file_lock); put_files_struct(files); diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index c21df71943a..b89d193a00d 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -2593,7 +2593,7 @@ static int journal_init_dev(struct super_block *super, { int result; dev_t jdev; - int blkdev_mode = FMODE_READ | FMODE_WRITE; + fmode_t blkdev_mode = FMODE_READ | FMODE_WRITE; char b[BDEVNAME_SIZE]; result = 0; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index b4fe68fe3a5..a4413ec3cb3 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -910,7 +910,8 @@ static inline int sb_issue_discard(struct super_block *sb, * command filter functions */ extern int blk_verify_command(struct blk_cmd_filter *filter, - unsigned char *cmd, int has_write_perm); + unsigned char *cmd, fmode_t has_write_perm); +extern void blk_unregister_filter(struct gendisk *disk); extern void blk_set_cmd_filter_defaults(struct blk_cmd_filter *filter); #define MAX_PHYS_SEGMENTS 128 diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 08d783592b7..3f8d4e76367 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -85,7 +85,7 @@ void dm_set_device_limits(struct dm_target *ti, struct block_device *bdev); struct dm_dev { struct block_device *bdev; - int mode; + fmode_t mode; char name[16]; }; @@ -95,7 +95,7 @@ struct dm_dev { * FIXME: too many arguments. */ int dm_get_device(struct dm_target *ti, const char *path, sector_t start, - sector_t len, int mode, struct dm_dev **result); + sector_t len, fmode_t mode, struct dm_dev **result); void dm_put_device(struct dm_target *ti, struct dm_dev *d); /* @@ -223,7 +223,7 @@ int dm_set_geometry(struct mapped_device *md, struct hd_geometry *geo); /* * First create an empty table. */ -int dm_table_create(struct dm_table **result, int mode, +int dm_table_create(struct dm_table **result, fmode_t mode, unsigned num_targets, struct mapped_device *md); /* @@ -254,7 +254,7 @@ void dm_table_put(struct dm_table *t); */ sector_t dm_table_get_size(struct dm_table *t); unsigned int dm_table_get_num_targets(struct dm_table *t); -int dm_table_get_mode(struct dm_table *t); +fmode_t dm_table_get_mode(struct dm_table *t); struct mapped_device *dm_table_get_md(struct dm_table *t); /* diff --git a/include/linux/file.h b/include/linux/file.h index a20259e248a..335a0a5c316 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -19,10 +19,10 @@ struct file_operations; struct vfsmount; struct dentry; extern int init_file(struct file *, struct vfsmount *mnt, - struct dentry *dentry, mode_t mode, + struct dentry *dentry, fmode_t mode, const struct file_operations *fop); extern struct file *alloc_file(struct vfsmount *, struct dentry *dentry, - mode_t mode, const struct file_operations *fop); + fmode_t mode, const struct file_operations *fop); static inline void fput_light(struct file *file, int fput_needed) { diff --git a/include/linux/fs.h b/include/linux/fs.h index a6a625be13f..60a7a581ba9 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -63,18 +63,18 @@ extern int dir_notify_enable; #define MAY_ACCESS 16 #define MAY_OPEN 32 -#define FMODE_READ 1 -#define FMODE_WRITE 2 +#define FMODE_READ ((__force fmode_t)1) +#define FMODE_WRITE ((__force fmode_t)2) /* Internal kernel extensions */ -#define FMODE_LSEEK 4 -#define FMODE_PREAD 8 +#define FMODE_LSEEK ((__force fmode_t)4) +#define FMODE_PREAD ((__force fmode_t)8) #define FMODE_PWRITE FMODE_PREAD /* These go hand in hand */ /* File is being opened for execution. Primary users of this flag are distributed filesystems that can use it to achieve correct ETXTBUSY behavior for cross-node execution/opening_for_writing of files */ -#define FMODE_EXEC 16 +#define FMODE_EXEC ((__force fmode_t)16) #define RW_MASK 1 #define RWA_MASK 2 @@ -825,7 +825,7 @@ struct file { const struct file_operations *f_op; atomic_long_t f_count; unsigned int f_flags; - mode_t f_mode; + fmode_t f_mode; loff_t f_pos; struct fown_struct f_owner; unsigned int f_uid, f_gid; @@ -1714,7 +1714,7 @@ extern struct block_device *bdget(dev_t); extern void bd_set_size(struct block_device *, loff_t size); extern void bd_forget(struct inode *inode); extern void bdput(struct block_device *); -extern struct block_device *open_by_devnum(dev_t, unsigned); +extern struct block_device *open_by_devnum(dev_t, fmode_t); #else static inline void bd_forget(struct inode *inode) {} #endif @@ -1729,7 +1729,7 @@ extern int blkdev_driver_ioctl(struct inode *inode, struct file *file, struct gendisk *disk, unsigned cmd, unsigned long arg); extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long); -extern int blkdev_get(struct block_device *, mode_t, unsigned); +extern int blkdev_get(struct block_device *, fmode_t, unsigned); extern int blkdev_put(struct block_device *); extern int bd_claim(struct block_device *, void *); extern void bd_release(struct block_device *); diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index a89513188ce..00fbd5b245c 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -188,7 +188,7 @@ static inline void fsnotify_close(struct file *file) struct dentry *dentry = file->f_path.dentry; struct inode *inode = dentry->d_inode; const char *name = dentry->d_name.name; - mode_t mode = file->f_mode; + fmode_t mode = file->f_mode; u32 mask = (mode & FMODE_WRITE) ? IN_CLOSE_WRITE : IN_CLOSE_NOWRITE; if (S_ISDIR(inode->i_mode)) diff --git a/include/linux/types.h b/include/linux/types.h index f24f7beb47d..1d98330b1f2 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -190,6 +190,7 @@ typedef __u32 __bitwise __wsum; #ifdef __KERNEL__ typedef unsigned __bitwise__ gfp_t; +typedef unsigned __bitwise__ fmode_t; #ifdef CONFIG_PHYS_ADDR_T_64BIT typedef u64 phys_addr_t; diff --git a/ipc/shm.c b/ipc/shm.c index 0add3fa5f54..867e5d6a55c 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -817,7 +817,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr) struct ipc_namespace *ns; struct shm_file_data *sfd; struct path path; - mode_t f_mode; + fmode_t f_mode; err = -EINVAL; if (shmid < 0) diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c index 1af62b8b86c..e17836680f4 100644 --- a/sound/core/oss/pcm_oss.c +++ b/sound/core/oss/pcm_oss.c @@ -2283,7 +2283,7 @@ static int snd_pcm_oss_open_file(struct file *file, int idx, err; struct snd_pcm_oss_file *pcm_oss_file; struct snd_pcm_substream *substream; - unsigned int f_mode = file->f_mode; + fmode_t f_mode = file->f_mode; if (rpcm_oss_file) *rpcm_oss_file = NULL; diff --git a/sound/oss/au1550_ac97.c b/sound/oss/au1550_ac97.c index 23018a7c063..81e1f443d09 100644 --- a/sound/oss/au1550_ac97.c +++ b/sound/oss/au1550_ac97.c @@ -93,7 +93,7 @@ static struct au1550_state { spinlock_t lock; struct mutex open_mutex; struct mutex sem; - mode_t open_mode; + fmode_t open_mode; wait_queue_head_t open_wait; struct dmabuf { diff --git a/sound/oss/dmasound/dmasound.h b/sound/oss/dmasound/dmasound.h index d978b009656..1cb13fe56ec 100644 --- a/sound/oss/dmasound/dmasound.h +++ b/sound/oss/dmasound/dmasound.h @@ -129,7 +129,7 @@ typedef struct { int (*mixer_ioctl)(u_int, u_long); /* optional */ int (*write_sq_setup)(void); /* optional */ int (*read_sq_setup)(void); /* optional */ - int (*sq_open)(mode_t); /* optional */ + int (*sq_open)(fmode_t); /* optional */ int (*state_info)(char *, size_t); /* optional */ void (*abort_read)(void); /* optional */ int min_dsp_speed; @@ -235,7 +235,7 @@ struct sound_queue { */ int active; wait_queue_head_t action_queue, open_queue, sync_queue; - int open_mode; + fmode_t open_mode; int busy, syncing, xruns, died; }; diff --git a/sound/oss/dmasound/dmasound_atari.c b/sound/oss/dmasound/dmasound_atari.c index 285239d64b8..4d45bd63718 100644 --- a/sound/oss/dmasound/dmasound_atari.c +++ b/sound/oss/dmasound/dmasound_atari.c @@ -143,7 +143,7 @@ static int AtaMixerIoctl(u_int cmd, u_long arg); static int TTMixerIoctl(u_int cmd, u_long arg); static int FalconMixerIoctl(u_int cmd, u_long arg); static int AtaWriteSqSetup(void); -static int AtaSqOpen(mode_t mode); +static int AtaSqOpen(fmode_t mode); static int TTStateInfo(char *buffer, size_t space); static int FalconStateInfo(char *buffer, size_t space); @@ -1461,7 +1461,7 @@ static int AtaWriteSqSetup(void) return 0 ; } -static int AtaSqOpen(mode_t mode) +static int AtaSqOpen(fmode_t mode) { write_sq_ignore_int = 1; return 0 ; diff --git a/sound/oss/dmasound/dmasound_core.c b/sound/oss/dmasound/dmasound_core.c index 95fc5c68175..b8239f3168f 100644 --- a/sound/oss/dmasound/dmasound_core.c +++ b/sound/oss/dmasound/dmasound_core.c @@ -212,7 +212,7 @@ static int irq_installed; #endif /* MODULE */ /* control over who can modify resources shared between play/record */ -static mode_t shared_resource_owner; +static fmode_t shared_resource_owner; static int shared_resources_initialised; /* @@ -668,7 +668,7 @@ static inline void sq_init_waitqueue(struct sound_queue *sq) #if 0 /* blocking open() */ static inline void sq_wake_up(struct sound_queue *sq, struct file *file, - mode_t mode) + fmode_t mode) { if (file->f_mode & mode) { sq->busy = 0; /* CHECK: IS THIS OK??? */ @@ -677,7 +677,7 @@ static inline void sq_wake_up(struct sound_queue *sq, struct file *file, } #endif -static int sq_open2(struct sound_queue *sq, struct file *file, mode_t mode, +static int sq_open2(struct sound_queue *sq, struct file *file, fmode_t mode, int numbufs, int bufsize) { int rc = 0; @@ -891,10 +891,10 @@ static int sq_release(struct inode *inode, struct file *file) is the owner - if we have problems. */ -static int shared_resources_are_mine(mode_t md) +static int shared_resources_are_mine(fmode_t md) { if (shared_resource_owner) - return (shared_resource_owner & md ) ; + return (shared_resource_owner & md) != 0; else { shared_resource_owner = md ; return 1 ; diff --git a/sound/oss/msnd.h b/sound/oss/msnd.h index 61b3955481c..c8be47ec2b7 100644 --- a/sound/oss/msnd.h +++ b/sound/oss/msnd.h @@ -211,7 +211,7 @@ typedef struct multisound_dev { /* State variables */ enum { msndClassic, msndPinnacle } type; - mode_t mode; + fmode_t mode; unsigned long flags; #define F_RESETTING 0 #define F_HAVEDIGITAL 1 diff --git a/sound/oss/sound_config.h b/sound/oss/sound_config.h index 1a00a321061..55271fbe7f4 100644 --- a/sound/oss/sound_config.h +++ b/sound/oss/sound_config.h @@ -110,24 +110,16 @@ struct channel_info { #define OPEN_WRITE PCM_ENABLE_OUTPUT #define OPEN_READWRITE (OPEN_READ|OPEN_WRITE) -#if OPEN_READ == FMODE_READ && OPEN_WRITE == FMODE_WRITE - -static inline int translate_mode(struct file *file) -{ - return file->f_mode; -} - -#else - static inline int translate_mode(struct file *file) { - return ((file->f_mode & FMODE_READ) ? OPEN_READ : 0) | - ((file->f_mode & FMODE_WRITE) ? OPEN_WRITE : 0); + if (OPEN_READ == (__force int)FMODE_READ && + OPEN_WRITE == (__force int)FMODE_WRITE) + return (__force int)(file->f_mode & (FMODE_READ | FMODE_WRITE)); + else + return ((file->f_mode & FMODE_READ) ? OPEN_READ : 0) | + ((file->f_mode & FMODE_WRITE) ? OPEN_WRITE : 0); } -#endif - - #include "sound_calls.h" #include "dev_table.h" diff --git a/sound/oss/swarm_cs4297a.c b/sound/oss/swarm_cs4297a.c index 044453a4ee5..41562ecde5b 100644 --- a/sound/oss/swarm_cs4297a.c +++ b/sound/oss/swarm_cs4297a.c @@ -295,7 +295,7 @@ struct cs4297a_state { struct mutex open_mutex; struct mutex open_sem_adc; struct mutex open_sem_dac; - mode_t open_mode; + fmode_t open_mode; wait_queue_head_t open_wait; wait_queue_head_t open_wait_adc; wait_queue_head_t open_wait_dac; diff --git a/sound/oss/vwsnd.c b/sound/oss/vwsnd.c index dcbb3f739e6..78b8acc7c3b 100644 --- a/sound/oss/vwsnd.c +++ b/sound/oss/vwsnd.c @@ -1509,7 +1509,7 @@ typedef struct vwsnd_dev { struct mutex open_mutex; struct mutex io_mutex; struct mutex mix_mutex; - mode_t open_mode; + fmode_t open_mode; wait_queue_head_t open_wait; lithium_t lith; -- cgit v1.2.3-70-g09d2 From 86d434dede14108dd917b25af0f29c0cb28b8d18 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 26 Aug 2007 19:50:05 -0400 Subject: [PATCH] eliminate use of ->f_flags in block methods store needed information in f_mode Signed-off-by: Al Viro --- drivers/block/ataflop.c | 6 +++--- drivers/block/floppy.c | 15 ++++----------- drivers/block/swim3.c | 6 +++--- drivers/block/ub.c | 2 +- drivers/cdrom/cdrom.c | 4 ++-- drivers/ide/ide-gd.c | 2 +- drivers/scsi/sd.c | 2 +- fs/block_dev.c | 7 +++++++ include/linux/fs.h | 4 ++++ 9 files changed, 26 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index e1db285b72c..85d56a26f7c 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c @@ -1813,17 +1813,17 @@ static int floppy_open( struct inode *inode, struct file *filp ) if (p->ref && p->type != type) return -EBUSY; - if (p->ref == -1 || (p->ref && filp->f_flags & O_EXCL)) + if (p->ref == -1 || (p->ref && filp->f_mode & FMODE_EXCL)) return -EBUSY; - if (filp->f_flags & O_EXCL) + if (filp->f_mode & FMODE_EXCL) p->ref = -1; else p->ref++; p->type = type; - if (filp->f_flags & O_NDELAY) + if (filp->f_mode & FMODE_NDELAY) return 0; if (filp->f_mode & (FMODE_READ|FMODE_WRITE)) { diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index ae3ef8945f3..5d60c05a736 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -3453,7 +3453,7 @@ static int fd_getgeo(struct block_device *bdev, struct hd_geometry *geo) static int fd_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long param) { -#define FD_IOCTL_ALLOWED ((filp) && (filp)->private_data) +#define FD_IOCTL_ALLOWED ((filp) && (filp)->f_mode & (FMODE_WRITE|FMODE_WRITE_IOCTL)) #define OUT(c,x) case c: outparam = (const char *) (x); break #define IN(c,x,tag) case c: *(x) = inparam. tag ; return 0 @@ -3690,7 +3690,6 @@ static int floppy_open(struct inode *inode, struct file *filp) int res = -EBUSY; char *tmp; - filp->private_data = (void *)0; mutex_lock(&open_lock); old_dev = UDRS->fd_device; if (opened_bdev[drive] && opened_bdev[drive] != inode->i_bdev) @@ -3701,10 +3700,10 @@ static int floppy_open(struct inode *inode, struct file *filp) USETF(FD_VERIFY); } - if (UDRS->fd_ref == -1 || (UDRS->fd_ref && (filp->f_flags & O_EXCL))) + if (UDRS->fd_ref == -1 || (UDRS->fd_ref && (filp->f_mode & FMODE_EXCL))) goto out2; - if (filp->f_flags & O_EXCL) + if (filp->f_mode & FMODE_EXCL) UDRS->fd_ref = -1; else UDRS->fd_ref++; @@ -3751,16 +3750,10 @@ static int floppy_open(struct inode *inode, struct file *filp) buffer_track = -1; } - /* Allow ioctls if we have write-permissions even if read-only open. - * Needed so that programs such as fdrawcmd still can work on write - * protected disks */ - if ((filp->f_mode & FMODE_WRITE) || !file_permission(filp, MAY_WRITE)) - filp->private_data = (void *)8; - if (UFDCS->rawcmd == 1) UFDCS->rawcmd = 2; - if (!(filp->f_flags & O_NDELAY)) { + if (!(filp->f_mode & FMODE_NDELAY)) { if (filp->f_mode & (FMODE_READ|FMODE_WRITE)) { UDRS->last_checked = 0; check_disk_change(inode->i_bdev); diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c index a53ca54bee1..5c45d5556ae 100644 --- a/drivers/block/swim3.c +++ b/drivers/block/swim3.c @@ -904,10 +904,10 @@ static int floppy_open(struct inode *inode, struct file *filp) swim3_action(fs, SETMFM); swim3_select(fs, RELAX); - } else if (fs->ref_count == -1 || filp->f_flags & O_EXCL) + } else if (fs->ref_count == -1 || filp->f_mode & FMODE_EXCL) return -EBUSY; - if (err == 0 && (filp->f_flags & O_NDELAY) == 0 + if (err == 0 && (filp->f_mode & FMODE_NDELAY) == 0 && (filp->f_mode & (FMODE_READ|FMODE_WRITE))) { check_disk_change(inode->i_bdev); if (fs->ejected) @@ -930,7 +930,7 @@ static int floppy_open(struct inode *inode, struct file *filp) return err; } - if (filp->f_flags & O_EXCL) + if (filp->f_mode & FMODE_EXCL) fs->ref_count = -1; else ++fs->ref_count; diff --git a/drivers/block/ub.c b/drivers/block/ub.c index f60e41833f6..85d41eb67c0 100644 --- a/drivers/block/ub.c +++ b/drivers/block/ub.c @@ -1691,7 +1691,7 @@ static int ub_bd_open(struct inode *inode, struct file *filp) * under some pretty murky conditions (a failure of READ CAPACITY). * We may need it one day. */ - if (lun->removable && lun->changed && !(filp->f_flags & O_NDELAY)) { + if (lun->removable && lun->changed && !(filp->f_mode & FMODE_NDELAY)) { rc = -ENOMEDIUM; goto err_open; } diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index d47f2f80acc..4feefa622ae 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -982,7 +982,7 @@ int cdrom_open(struct cdrom_device_info *cdi, struct inode *ip, struct file *fp) /* if this was a O_NONBLOCK open and we should honor the flags, * do a quick open without drive/disc integrity checks. */ cdi->use_count++; - if ((fp->f_flags & O_NONBLOCK) && (cdi->options & CDO_USE_FFLAGS)) { + if ((fp->f_mode & FMODE_NDELAY) && (cdi->options & CDO_USE_FFLAGS)) { ret = cdi->ops->open(cdi, 1); } else { ret = open_for_data(cdi); @@ -1205,7 +1205,7 @@ int cdrom_release(struct cdrom_device_info *cdi, struct file *fp) } opened_for_data = !(cdi->options & CDO_USE_FFLAGS) || - !(fp && fp->f_flags & O_NONBLOCK); + !(fp && fp->f_mode & FMODE_NDELAY); /* * flush cache on last write release diff --git a/drivers/ide/ide-gd.c b/drivers/ide/ide-gd.c index d367473098f..66bbb0a22f5 100644 --- a/drivers/ide/ide-gd.c +++ b/drivers/ide/ide-gd.c @@ -197,7 +197,7 @@ static int ide_gd_open(struct inode *inode, struct file *filp) * unreadable disk, so that we can get the format capacity * of the drive or begin the format - Sam */ - if (ret && (filp->f_flags & O_NDELAY) == 0) { + if (ret && (filp->f_mode & FMODE_NDELAY) == 0) { ret = -EIO; goto out_put_idkp; } diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 7c4d2e68df1..202c1ed9abd 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -640,7 +640,7 @@ static int sd_open(struct inode *inode, struct file *filp) */ retval = -ENOMEDIUM; if (sdev->removable && !sdkp->media_present && - !(filp->f_flags & O_NDELAY)) + !(filp->f_mode & FMODE_NDELAY)) goto error_out; /* diff --git a/fs/block_dev.c b/fs/block_dev.c index 8897f3b02e9..b9022694e9f 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1007,6 +1007,13 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) return ret; } + if (file->f_flags & O_NDELAY) + file->f_mode |= FMODE_NDELAY; + if (file->f_flags & O_EXCL) + file->f_mode |= FMODE_EXCL; + if ((file->f_flags & O_ACCMODE) == 3) + file->f_mode |= FMODE_WRITE_IOCTL; + ret = -ENXIO; file->f_mapping = bdev->bd_inode->i_mapping; diff --git a/include/linux/fs.h b/include/linux/fs.h index 60a7a581ba9..5ab5579a516 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -76,6 +76,10 @@ extern int dir_notify_enable; behavior for cross-node execution/opening_for_writing of files */ #define FMODE_EXEC ((__force fmode_t)16) +#define FMODE_NDELAY ((__force fmode_t)32) +#define FMODE_EXCL ((__force fmode_t)64) +#define FMODE_WRITE_IOCTL ((__force fmode_t)128) + #define RW_MASK 1 #define RWA_MASK 2 #define READ 0 -- cgit v1.2.3-70-g09d2 From e915e872ed921d707bc32b3f2184d43abfa8c9e2 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 2 Sep 2008 17:16:41 -0400 Subject: [PATCH] switch sg_scsi_ioctl() to passing fmode_t Signed-off-by: Al Viro --- block/scsi_ioctl.c | 13 ++++--------- drivers/scsi/scsi_ioctl.c | 2 +- drivers/scsi/sg.c | 2 +- include/linux/blkdev.h | 4 ++-- 4 files changed, 8 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index 9a441559118..375e25df8ad 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -379,12 +379,11 @@ out: * bytes in one int) where the lowest byte is the SCSI status. */ #define OMAX_SB_LEN 16 /* For backward compatibility */ -int sg_scsi_ioctl(struct file *file, struct request_queue *q, - struct gendisk *disk, struct scsi_ioctl_command __user *sic) +int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode, + struct scsi_ioctl_command __user *sic) { struct request *rq; int err; - fmode_t write_perm = 0; unsigned int in_len, out_len, bytes, opcode, cmdlen; char *buffer = NULL, sense[SCSI_SENSE_BUFFERSIZE]; @@ -426,11 +425,7 @@ int sg_scsi_ioctl(struct file *file, struct request_queue *q, if (in_len && copy_from_user(buffer, sic->data + cmdlen, in_len)) goto error; - /* scsi_ioctl passes NULL */ - if (file && (file->f_mode & FMODE_WRITE)) - write_perm = FMODE_WRITE; - - err = blk_verify_command(&q->cmd_filter, rq->cmd, write_perm); + err = blk_verify_command(&q->cmd_filter, rq->cmd, mode & FMODE_WRITE); if (err) goto error; @@ -636,7 +631,7 @@ int scsi_cmd_ioctl(struct file *file, struct request_queue *q, if (!arg) break; - err = sg_scsi_ioctl(file, q, bd_disk, arg); + err = sg_scsi_ioctl(q, bd_disk, file ? file->f_mode : 0, arg); break; case CDROMCLOSETRAY: err = blk_send_start_stop(q, bd_disk, 0x03); diff --git a/drivers/scsi/scsi_ioctl.c b/drivers/scsi/scsi_ioctl.c index 28b19ef2630..1f08f5a2f8f 100644 --- a/drivers/scsi/scsi_ioctl.c +++ b/drivers/scsi/scsi_ioctl.c @@ -237,7 +237,7 @@ int scsi_ioctl(struct scsi_device *sdev, int cmd, void __user *arg) case SCSI_IOCTL_SEND_COMMAND: if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO)) return -EACCES; - return sg_scsi_ioctl(NULL, sdev->request_queue, NULL, arg); + return sg_scsi_ioctl(sdev->request_queue, NULL, 0, arg); case SCSI_IOCTL_DOORLOCK: return scsi_set_medium_removal(sdev, SCSI_REMOVAL_PREVENT); case SCSI_IOCTL_DOORUNLOCK: diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 93bd59a1ed7..9adf35bd8b5 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -1059,7 +1059,7 @@ sg_ioctl(struct inode *inode, struct file *filp, if (sg_allow_access(filp, &opcode)) return -EPERM; } - return sg_scsi_ioctl(filp, sdp->device->request_queue, NULL, p); + return sg_scsi_ioctl(sdp->device->request_queue, NULL, filp->f_mode, p); case SG_SET_DEBUG: result = get_user(val, ip); if (result) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index a4413ec3cb3..8945c30e993 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -719,8 +719,8 @@ extern int blk_remove_plug(struct request_queue *); extern void blk_recount_segments(struct request_queue *, struct bio *); extern int scsi_cmd_ioctl(struct file *, struct request_queue *, struct gendisk *, unsigned int, void __user *); -extern int sg_scsi_ioctl(struct file *, struct request_queue *, - struct gendisk *, struct scsi_ioctl_command __user *); +extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t, + struct scsi_ioctl_command __user *); /* * Temporary export, until SCSI gets fixed up. -- cgit v1.2.3-70-g09d2 From 74f3c8aff36ad6552ea609c8b20bfd588fa16f38 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 27 Aug 2007 15:38:10 -0400 Subject: [PATCH] switch scsi_cmd_ioctl() to passing fmode_t Signed-off-by: Al Viro --- block/bsg.c | 2 +- block/scsi_ioctl.c | 10 +++++----- drivers/block/cciss.c | 3 ++- drivers/block/ub.c | 2 +- drivers/block/virtio_blk.c | 4 ++-- drivers/cdrom/cdrom.c | 2 +- drivers/ide/ide-floppy_ioctl.c | 4 ++-- drivers/scsi/sd.c | 3 ++- drivers/scsi/st.c | 4 ++-- include/linux/blkdev.h | 4 ++-- 10 files changed, 20 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/block/bsg.c b/block/bsg.c index 2d36b127f38..e8bd2475682 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -914,7 +914,7 @@ static long bsg_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case SG_EMULATED_HOST: case SCSI_IOCTL_SEND_COMMAND: { void __user *uarg = (void __user *) arg; - return scsi_cmd_ioctl(file, bd->queue, NULL, cmd, uarg); + return scsi_cmd_ioctl(bd->queue, NULL, file->f_mode, cmd, uarg); } case SG_IO: { struct request *rq; diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index 375e25df8ad..5963cf91a3a 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -517,8 +517,8 @@ static inline int blk_send_start_stop(struct request_queue *q, return __blk_send_generic(q, bd_disk, GPCMD_START_STOP_UNIT, data); } -int scsi_cmd_ioctl(struct file *file, struct request_queue *q, - struct gendisk *bd_disk, unsigned int cmd, void __user *arg) +int scsi_cmd_ioctl(struct request_queue *q, struct gendisk *bd_disk, fmode_t mode, + unsigned int cmd, void __user *arg) { int err; @@ -559,7 +559,7 @@ int scsi_cmd_ioctl(struct file *file, struct request_queue *q, err = -EFAULT; if (copy_from_user(&hdr, arg, sizeof(hdr))) break; - err = sg_io(q, bd_disk, &hdr, file ? file->f_mode : 0); + err = sg_io(q, bd_disk, &hdr, mode); if (err == -EFAULT) break; @@ -607,7 +607,7 @@ int scsi_cmd_ioctl(struct file *file, struct request_queue *q, hdr.cmdp = ((struct cdrom_generic_command __user*) arg)->cmd; hdr.cmd_len = sizeof(cgc.cmd); - err = sg_io(q, bd_disk, &hdr, file ? file->f_mode : 0); + err = sg_io(q, bd_disk, &hdr, mode); if (err == -EFAULT) break; @@ -631,7 +631,7 @@ int scsi_cmd_ioctl(struct file *file, struct request_queue *q, if (!arg) break; - err = sg_scsi_ioctl(q, bd_disk, file ? file->f_mode : 0, arg); + err = sg_scsi_ioctl(q, bd_disk, mode, arg); break; case CDROMCLOSETRAY: err = blk_send_start_stop(q, bd_disk, 0x03); diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 1e1f9153000..d9b1c15b811 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -1232,7 +1232,8 @@ static int cciss_ioctl(struct inode *inode, struct file *filep, case SG_EMULATED_HOST: case SG_IO: case SCSI_IOCTL_SEND_COMMAND: - return scsi_cmd_ioctl(filep, disk->queue, disk, cmd, argp); + return scsi_cmd_ioctl(disk->queue, disk, + filep ? filep->f_mode : 0, cmd, argp); /* scsi_cmd_ioctl would normally handle these, below, but */ /* they aren't a good fit for cciss, as CD-ROMs are */ diff --git a/drivers/block/ub.c b/drivers/block/ub.c index 85d41eb67c0..bc04330f368 100644 --- a/drivers/block/ub.c +++ b/drivers/block/ub.c @@ -1729,7 +1729,7 @@ static int ub_bd_ioctl(struct inode *inode, struct file *filp, struct gendisk *disk = inode->i_bdev->bd_disk; void __user *usermem = (void __user *) arg; - return scsi_cmd_ioctl(filp, disk->queue, disk, cmd, usermem); + return scsi_cmd_ioctl(disk->queue, disk, filp ? filp->f_mode : 0, cmd, usermem); } /* diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 6ec5fc05278..7643cd16fd6 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -149,8 +149,8 @@ static void do_virtblk_request(struct request_queue *q) static int virtblk_ioctl(struct inode *inode, struct file *filp, unsigned cmd, unsigned long data) { - return scsi_cmd_ioctl(filp, inode->i_bdev->bd_disk->queue, - inode->i_bdev->bd_disk, cmd, + return scsi_cmd_ioctl(inode->i_bdev->bd_disk->queue, + inode->i_bdev->bd_disk, filp->f_mode, cmd, (void __user *)data); } diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index 4feefa622ae..e286eb5d1f6 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -2672,7 +2672,7 @@ int cdrom_ioctl(struct file * file, struct cdrom_device_info *cdi, /* * Try the generic SCSI command ioctl's first. */ - ret = scsi_cmd_ioctl(file, disk->queue, disk, cmd, argp); + ret = scsi_cmd_ioctl(disk->queue, disk, file ? file->f_mode : 0, cmd, argp); if (ret != -ENOTTY) return ret; diff --git a/drivers/ide/ide-floppy_ioctl.c b/drivers/ide/ide-floppy_ioctl.c index 0d5f5054ab6..3c9da4ac934 100644 --- a/drivers/ide/ide-floppy_ioctl.c +++ b/drivers/ide/ide-floppy_ioctl.c @@ -280,8 +280,8 @@ int ide_floppy_ioctl(ide_drive_t *drive, struct inode *inode, * and CDROM_SEND_PACKET (legacy) ioctls */ if (cmd != CDROM_SEND_PACKET && cmd != SCSI_IOCTL_SEND_COMMAND) - err = scsi_cmd_ioctl(file, bdev->bd_disk->queue, - bdev->bd_disk, cmd, argp); + err = scsi_cmd_ioctl(bdev->bd_disk->queue, bdev->bd_disk, + file ? file->f_mode : 0, cmd, argp); if (err == -ENOTTY) err = generic_ide_ioctl(drive, file, bdev, cmd, arg); diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 202c1ed9abd..5d74413f591 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -775,7 +775,8 @@ static int sd_ioctl(struct inode * inode, struct file * filp, case SCSI_IOCTL_GET_BUS_NUMBER: return scsi_ioctl(sdp, cmd, p); default: - error = scsi_cmd_ioctl(filp, disk->queue, disk, cmd, p); + error = scsi_cmd_ioctl(disk->queue, disk, + filp ? filp->f_mode : 0, cmd, p); if (error != -ENOTTY) return error; } diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c index 5c28d08f18f..8dffac9f341 100644 --- a/drivers/scsi/st.c +++ b/drivers/scsi/st.c @@ -3567,8 +3567,8 @@ static long st_ioctl(struct file *file, unsigned int cmd_in, unsigned long arg) !capable(CAP_SYS_RAWIO)) i = -EPERM; else - i = scsi_cmd_ioctl(file, STp->disk->queue, - STp->disk, cmd_in, p); + i = scsi_cmd_ioctl(STp->disk->queue, STp->disk, + file->f_mode, cmd_in, p); if (i != -ENOTTY) return i; break; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 8945c30e993..48f41b991ad 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -717,8 +717,8 @@ extern void blk_plug_device(struct request_queue *); extern void blk_plug_device_unlocked(struct request_queue *); extern int blk_remove_plug(struct request_queue *); extern void blk_recount_segments(struct request_queue *, struct bio *); -extern int scsi_cmd_ioctl(struct file *, struct request_queue *, - struct gendisk *, unsigned int, void __user *); +extern int scsi_cmd_ioctl(struct request_queue *, struct gendisk *, fmode_t, + unsigned int, void __user *); extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t, struct scsi_ioctl_command __user *); -- cgit v1.2.3-70-g09d2 From 1bddd9e6453ef1c7bc5b6f4ddbf7d31f4aee7a44 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 2 Sep 2008 17:19:43 -0400 Subject: [PATCH] lose the unused file argument in generic_ide_ioctl() Signed-off-by: Al Viro --- drivers/ide/ide-cd.c | 2 +- drivers/ide/ide-disk_ioctl.c | 2 +- drivers/ide/ide-floppy_ioctl.c | 2 +- drivers/ide/ide-ioctls.c | 3 +-- drivers/ide/ide-tape.c | 2 +- drivers/scsi/ide-scsi.c | 2 +- include/linux/ide.h | 3 +-- 7 files changed, 7 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 13265a8827d..cd21b34fe50 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -2174,7 +2174,7 @@ static int idecd_ioctl(struct inode *inode, struct file *file, break; } - err = generic_ide_ioctl(info->drive, file, bdev, cmd, arg); + err = generic_ide_ioctl(info->drive, bdev, cmd, arg); if (err == -EINVAL) err = cdrom_ioctl(file, &info->devinfo, inode, cmd, arg); diff --git a/drivers/ide/ide-disk_ioctl.c b/drivers/ide/ide-disk_ioctl.c index a49698bcf96..41832af400d 100644 --- a/drivers/ide/ide-disk_ioctl.c +++ b/drivers/ide/ide-disk_ioctl.c @@ -23,5 +23,5 @@ int ide_disk_ioctl(ide_drive_t *drive, struct inode *inode, struct file *file, if (err != -EOPNOTSUPP) return err; - return generic_ide_ioctl(drive, file, bdev, cmd, arg); + return generic_ide_ioctl(drive, bdev, cmd, arg); } diff --git a/drivers/ide/ide-floppy_ioctl.c b/drivers/ide/ide-floppy_ioctl.c index 3c9da4ac934..5af70a2c9ef 100644 --- a/drivers/ide/ide-floppy_ioctl.c +++ b/drivers/ide/ide-floppy_ioctl.c @@ -284,7 +284,7 @@ int ide_floppy_ioctl(ide_drive_t *drive, struct inode *inode, file ? file->f_mode : 0, cmd, argp); if (err == -ENOTTY) - err = generic_ide_ioctl(drive, file, bdev, cmd, arg); + err = generic_ide_ioctl(drive, bdev, cmd, arg); return err; } diff --git a/drivers/ide/ide-ioctls.c b/drivers/ide/ide-ioctls.c index a90945f4979..fcde16bb53a 100644 --- a/drivers/ide/ide-ioctls.c +++ b/drivers/ide/ide-ioctls.c @@ -240,8 +240,7 @@ static int generic_drive_reset(ide_drive_t *drive) return ret; } -int generic_ide_ioctl(ide_drive_t *drive, struct file *file, - struct block_device *bdev, +int generic_ide_ioctl(ide_drive_t *drive, struct block_device *bdev, unsigned int cmd, unsigned long arg) { int err; diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index b2b2e5e8d38..2b263281ffe 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -2368,7 +2368,7 @@ static int idetape_ioctl(struct inode *inode, struct file *file, struct block_device *bdev = inode->i_bdev; struct ide_tape_obj *tape = ide_drv_g(bdev->bd_disk, ide_tape_obj); ide_drive_t *drive = tape->drive; - int err = generic_ide_ioctl(drive, file, bdev, cmd, arg); + int err = generic_ide_ioctl(drive, bdev, cmd, arg); if (err == -EINVAL) err = idetape_blkdev_ioctl(drive, cmd, arg); return err; diff --git a/drivers/scsi/ide-scsi.c b/drivers/scsi/ide-scsi.c index afc96e844a2..5bcc04e82c2 100644 --- a/drivers/scsi/ide-scsi.c +++ b/drivers/scsi/ide-scsi.c @@ -478,7 +478,7 @@ static int idescsi_ide_ioctl(struct inode *inode, struct file *file, { struct block_device *bdev = inode->i_bdev; struct ide_scsi_obj *scsi = ide_scsi_g(bdev->bd_disk); - return generic_ide_ioctl(scsi->drive, file, bdev, cmd, arg); + return generic_ide_ioctl(scsi->drive, bdev, cmd, arg); } static struct block_device_operations idescsi_ops = { diff --git a/include/linux/ide.h b/include/linux/ide.h index 89e53cfbc78..0d03e83f719 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1158,8 +1158,7 @@ struct ide_ioctl_devset { int ide_setting_ioctl(ide_drive_t *, struct block_device *, unsigned int, unsigned long, const struct ide_ioctl_devset *); -int generic_ide_ioctl(ide_drive_t *, struct file *, struct block_device *, - unsigned, unsigned long); +int generic_ide_ioctl(ide_drive_t *, struct block_device *, unsigned, unsigned long); extern int ide_vlb_clk; extern int ide_pci_clk; -- cgit v1.2.3-70-g09d2 From 647b3d0084158c47b1aea8f34d13cab9cd0a5b49 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 28 Aug 2007 22:15:59 -0400 Subject: [PATCH] lose unused arguments in dm ioctl callbacks Signed-off-by: Al Viro --- drivers/md/dm-linear.c | 3 +-- drivers/md/dm-mpath.c | 3 +-- drivers/md/dm.c | 2 +- include/linux/device-mapper.h | 3 +-- 4 files changed, 4 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 6449bcdf84c..fa358385eed 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -110,8 +110,7 @@ static int linear_status(struct dm_target *ti, status_type_t type, return 0; } -static int linear_ioctl(struct dm_target *ti, struct inode *inode, - struct file *filp, unsigned int cmd, +static int linear_ioctl(struct dm_target *ti, unsigned int cmd, unsigned long arg) { struct linear_c *lc = (struct linear_c *) ti->private; diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 9bf3460c554..c681d5e5f45 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -1395,8 +1395,7 @@ error: return -EINVAL; } -static int multipath_ioctl(struct dm_target *ti, struct inode *inode, - struct file *filp, unsigned int cmd, +static int multipath_ioctl(struct dm_target *ti, unsigned int cmd, unsigned long arg) { struct multipath *m = (struct multipath *) ti->private; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 327de03a5bd..5f0f4c8bcd3 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -345,7 +345,7 @@ static int dm_blk_ioctl(struct inode *inode, struct file *file, } if (tgt->type->ioctl) - r = tgt->type->ioctl(tgt, inode, file, cmd, arg); + r = tgt->type->ioctl(tgt, cmd, arg); out: dm_table_put(map); diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 3f8d4e76367..a567bbc5293 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -69,8 +69,7 @@ typedef int (*dm_status_fn) (struct dm_target *ti, status_type_t status_type, typedef int (*dm_message_fn) (struct dm_target *ti, unsigned argc, char **argv); -typedef int (*dm_ioctl_fn) (struct dm_target *ti, struct inode *inode, - struct file *filp, unsigned int cmd, +typedef int (*dm_ioctl_fn) (struct dm_target *ti, unsigned int cmd, unsigned long arg); typedef int (*dm_merge_fn) (struct dm_target *ti, struct bvec_merge_data *bvm, -- cgit v1.2.3-70-g09d2 From 08f85851215100d0eebf026810955ee6ad456c38 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 8 Oct 2007 13:26:20 -0400 Subject: [PATCH] move block_device_operations to blkdev.h Signed-off-by: Al Viro --- fs/ext2/xip.c | 1 + include/linux/blkdev.h | 17 +++++++++++++++++ include/linux/fs.h | 15 +-------------- 3 files changed, 19 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/fs/ext2/xip.c b/fs/ext2/xip.c index 4fb94c20041..b72b8588422 100644 --- a/fs/ext2/xip.c +++ b/fs/ext2/xip.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "ext2.h" #include "xip.h" diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 48f41b991ad..48ec8862a11 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1057,6 +1057,23 @@ static inline int blk_integrity_rq(struct request *rq) #endif /* CONFIG_BLK_DEV_INTEGRITY */ +struct file; +struct inode; + +struct block_device_operations { + int (*open) (struct inode *, struct file *); + int (*release) (struct inode *, struct file *); + int (*ioctl) (struct inode *, struct file *, unsigned, unsigned long); + long (*unlocked_ioctl) (struct file *, unsigned, unsigned long); + long (*compat_ioctl) (struct file *, unsigned, unsigned long); + int (*direct_access) (struct block_device *, sector_t, + void **, unsigned long *); + int (*media_changed) (struct gendisk *); + int (*revalidate_disk) (struct gendisk *); + int (*getgeo)(struct block_device *, struct hd_geometry *); + struct module *owner; +}; + #else /* CONFIG_BLOCK */ /* * stubs for when the block layer is configured out diff --git a/include/linux/fs.h b/include/linux/fs.h index 5ab5579a516..58bbf689fef 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1270,20 +1270,7 @@ int generic_osync_inode(struct inode *, struct address_space *, int); * to have different dirent layouts depending on the binary type. */ typedef int (*filldir_t)(void *, const char *, int, loff_t, u64, unsigned); - -struct block_device_operations { - int (*open) (struct inode *, struct file *); - int (*release) (struct inode *, struct file *); - int (*ioctl) (struct inode *, struct file *, unsigned, unsigned long); - long (*unlocked_ioctl) (struct file *, unsigned, unsigned long); - long (*compat_ioctl) (struct file *, unsigned, unsigned long); - int (*direct_access) (struct block_device *, sector_t, - void **, unsigned long *); - int (*media_changed) (struct gendisk *); - int (*revalidate_disk) (struct gendisk *); - int (*getgeo)(struct block_device *, struct hd_geometry *); - struct module *owner; -}; +struct block_device_operations; /* These macros are for out of kernel modules to test that * the kernel supports the unlocked_ioctl and compat_ioctl -- cgit v1.2.3-70-g09d2 From bbc1cc978404105da23d505163ce9fd5598ed5b1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 7 Oct 2007 17:54:28 -0400 Subject: [PATCH] switch cdrom_{open,release,ioctl} to sane APIs ... convert to it in callers Signed-off-by: Al Viro --- drivers/block/paride/pcd.c | 8 +++++--- drivers/cdrom/cdrom.c | 23 +++++++++++------------ drivers/cdrom/gdrom.c | 7 ++++--- drivers/cdrom/viocd.c | 8 +++++--- drivers/ide/ide-cd.c | 7 ++++--- drivers/scsi/sr.c | 11 ++++------- include/linux/cdrom.h | 10 +++++----- 7 files changed, 38 insertions(+), 36 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c index b8a994a2b01..8bd557e2a65 100644 --- a/drivers/block/paride/pcd.c +++ b/drivers/block/paride/pcd.c @@ -226,20 +226,22 @@ static int pcd_warned; /* Have we logged a phase warning ? */ static int pcd_block_open(struct inode *inode, struct file *file) { struct pcd_unit *cd = inode->i_bdev->bd_disk->private_data; - return cdrom_open(&cd->info, inode, file); + return cdrom_open(&cd->info, inode->i_bdev, file->f_mode); } static int pcd_block_release(struct inode *inode, struct file *file) { struct pcd_unit *cd = inode->i_bdev->bd_disk->private_data; - return cdrom_release(&cd->info, file); + cdrom_release(&cd->info, file ? file->f_mode : 0); + return 0; } static int pcd_block_ioctl(struct inode *inode, struct file *file, unsigned cmd, unsigned long arg) { struct pcd_unit *cd = inode->i_bdev->bd_disk->private_data; - return cdrom_ioctl(file, &cd->info, inode, cmd, arg); + return cdrom_ioctl(&cd->info, inode->i_bdev, + file ? file->f_mode : 0, cmd, arg); } static int pcd_block_media_changed(struct gendisk *disk) diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index e286eb5d1f6..d16b02423d6 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -973,7 +973,7 @@ static int cdrom_close_write(struct cdrom_device_info *cdi) * is in their own interest: device control becomes a lot easier * this way. */ -int cdrom_open(struct cdrom_device_info *cdi, struct inode *ip, struct file *fp) +int cdrom_open(struct cdrom_device_info *cdi, struct block_device *bdev, fmode_t mode) { int ret; @@ -982,14 +982,14 @@ int cdrom_open(struct cdrom_device_info *cdi, struct inode *ip, struct file *fp) /* if this was a O_NONBLOCK open and we should honor the flags, * do a quick open without drive/disc integrity checks. */ cdi->use_count++; - if ((fp->f_mode & FMODE_NDELAY) && (cdi->options & CDO_USE_FFLAGS)) { + if ((mode & FMODE_NDELAY) && (cdi->options & CDO_USE_FFLAGS)) { ret = cdi->ops->open(cdi, 1); } else { ret = open_for_data(cdi); if (ret) goto err; cdrom_mmc3_profile(cdi); - if (fp->f_mode & FMODE_WRITE) { + if (mode & FMODE_WRITE) { ret = -EROFS; if (cdrom_open_write(cdi)) goto err_release; @@ -1007,7 +1007,7 @@ int cdrom_open(struct cdrom_device_info *cdi, struct inode *ip, struct file *fp) cdi->name, cdi->use_count); /* Do this on open. Don't wait for mount, because they might not be mounting, but opening with O_NONBLOCK */ - check_disk_change(ip->i_bdev); + check_disk_change(bdev); return 0; err_release: if (CDROM_CAN(CDC_LOCK) && cdi->options & CDO_LOCK) { @@ -1184,7 +1184,7 @@ static int check_for_audio_disc(struct cdrom_device_info * cdi, return 0; } -int cdrom_release(struct cdrom_device_info *cdi, struct file *fp) +void cdrom_release(struct cdrom_device_info *cdi, fmode_t mode) { struct cdrom_device_ops *cdo = cdi->ops; int opened_for_data; @@ -1205,7 +1205,7 @@ int cdrom_release(struct cdrom_device_info *cdi, struct file *fp) } opened_for_data = !(cdi->options & CDO_USE_FFLAGS) || - !(fp && fp->f_mode & FMODE_NDELAY); + !(mode & FMODE_NDELAY); /* * flush cache on last write release @@ -1219,7 +1219,6 @@ int cdrom_release(struct cdrom_device_info *cdi, struct file *fp) cdi->options & CDO_AUTO_EJECT && CDROM_CAN(CDC_OPEN_TRAY)) cdo->tray_move(cdi, 1); } - return 0; } static int cdrom_read_mech_status(struct cdrom_device_info *cdi, @@ -2662,17 +2661,17 @@ static int cdrom_ioctl_audioctl(struct cdrom_device_info *cdi, * these days. * ATAPI / SCSI specific code now mainly resides in mmc_ioctl(). */ -int cdrom_ioctl(struct file * file, struct cdrom_device_info *cdi, - struct inode *ip, unsigned int cmd, unsigned long arg) +int cdrom_ioctl(struct cdrom_device_info *cdi, struct block_device *bdev, + fmode_t mode, unsigned int cmd, unsigned long arg) { void __user *argp = (void __user *)arg; int ret; - struct gendisk *disk = ip->i_bdev->bd_disk; + struct gendisk *disk = bdev->bd_disk; /* * Try the generic SCSI command ioctl's first. */ - ret = scsi_cmd_ioctl(disk->queue, disk, file ? file->f_mode : 0, cmd, argp); + ret = scsi_cmd_ioctl(disk->queue, disk, mode, cmd, argp); if (ret != -ENOTTY) return ret; @@ -2696,7 +2695,7 @@ int cdrom_ioctl(struct file * file, struct cdrom_device_info *cdi, case CDROM_SELECT_DISC: return cdrom_ioctl_select_disc(cdi, arg); case CDROMRESET: - return cdrom_ioctl_reset(cdi, ip->i_bdev); + return cdrom_ioctl_reset(cdi, bdev); case CDROM_LOCKDOOR: return cdrom_ioctl_lock_door(cdi, arg); case CDROM_DEBUG: diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c index d6ba77a2dd7..0959edf2afd 100644 --- a/drivers/cdrom/gdrom.c +++ b/drivers/cdrom/gdrom.c @@ -492,12 +492,12 @@ static struct cdrom_device_ops gdrom_ops = { static int gdrom_bdops_open(struct inode *inode, struct file *file) { - return cdrom_open(gd.cd_info, inode, file); + return cdrom_open(gd.cd_info, inode->i_bdev, file->f_mode); } static int gdrom_bdops_release(struct inode *inode, struct file *file) { - return cdrom_release(gd.cd_info, file); + return cdrom_release(gd.cd_info, file ? file->f_mode : 0); } static int gdrom_bdops_mediachanged(struct gendisk *disk) @@ -508,7 +508,8 @@ static int gdrom_bdops_mediachanged(struct gendisk *disk) static int gdrom_bdops_ioctl(struct inode *inode, struct file *file, unsigned cmd, unsigned long arg) { - return cdrom_ioctl(file, gd.cd_info, inode, cmd, arg); + return cdrom_ioctl(gd.cd_info, inode->i_bdev, + file ? file->f_mode : 0, cmd, arg); } static struct block_device_operations gdrom_bdops = { diff --git a/drivers/cdrom/viocd.c b/drivers/cdrom/viocd.c index 031e0e1a1a3..abc4079c3f4 100644 --- a/drivers/cdrom/viocd.c +++ b/drivers/cdrom/viocd.c @@ -154,20 +154,22 @@ static const struct file_operations proc_viocd_operations = { static int viocd_blk_open(struct inode *inode, struct file *file) { struct disk_info *di = inode->i_bdev->bd_disk->private_data; - return cdrom_open(&di->viocd_info, inode, file); + return cdrom_open(&di->viocd_info, inode->i_bdev, file->f_mode); } static int viocd_blk_release(struct inode *inode, struct file *file) { struct disk_info *di = inode->i_bdev->bd_disk->private_data; - return cdrom_release(&di->viocd_info, file); + cdrom_release(&di->viocd_info, file ? file->f_mode : 0); + return 0; } static int viocd_blk_ioctl(struct inode *inode, struct file *file, unsigned cmd, unsigned long arg) { struct disk_info *di = inode->i_bdev->bd_disk->private_data; - return cdrom_ioctl(file, &di->viocd_info, inode, cmd, arg); + return cdrom_ioctl(&di->viocd_info, inode->i_bdev, + file ? file->f_mode : 0, cmd, arg); } static int viocd_blk_media_changed(struct gendisk *disk) diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index cd21b34fe50..87d90200b16 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -2099,7 +2099,7 @@ static int idecd_open(struct inode *inode, struct file *file) if (!info) return -ENXIO; - rc = cdrom_open(&info->devinfo, inode, file); + rc = cdrom_open(&info->devinfo, inode->i_bdev, file->f_mode); if (rc < 0) ide_cd_put(info); @@ -2112,7 +2112,7 @@ static int idecd_release(struct inode *inode, struct file *file) struct gendisk *disk = inode->i_bdev->bd_disk; struct cdrom_info *info = ide_drv_g(disk, cdrom_info); - cdrom_release(&info->devinfo, file); + cdrom_release(&info->devinfo, file ? file->f_mode : 0); ide_cd_put(info); @@ -2176,7 +2176,8 @@ static int idecd_ioctl(struct inode *inode, struct file *file, err = generic_ide_ioctl(info->drive, bdev, cmd, arg); if (err == -EINVAL) - err = cdrom_ioctl(file, &info->devinfo, inode, cmd, arg); + err = cdrom_ioctl(&info->devinfo, bdev, + file ? file->f_mode : 0, cmd, arg); return err; } diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c index 0f17009c99d..b92e2dac9aa 100644 --- a/drivers/scsi/sr.c +++ b/drivers/scsi/sr.c @@ -480,7 +480,7 @@ static int sr_block_open(struct inode *inode, struct file *file) if(!(cd = scsi_cd_get(disk))) return -ENXIO; - if((ret = cdrom_open(&cd->cdi, inode, file)) != 0) + if((ret = cdrom_open(&cd->cdi, inode->i_bdev, file->f_mode)) != 0) scsi_cd_put(cd); return ret; @@ -488,12 +488,8 @@ static int sr_block_open(struct inode *inode, struct file *file) static int sr_block_release(struct inode *inode, struct file *file) { - int ret; struct scsi_cd *cd = scsi_cd(inode->i_bdev->bd_disk); - ret = cdrom_release(&cd->cdi, file); - if(ret) - return ret; - + cdrom_release(&cd->cdi, file ? file->f_mode : 0); scsi_cd_put(cd); return 0; @@ -517,7 +513,8 @@ static int sr_block_ioctl(struct inode *inode, struct file *file, unsigned cmd, return scsi_ioctl(sdev, cmd, argp); } - ret = cdrom_ioctl(file, &cd->cdi, inode, cmd, arg); + ret = cdrom_ioctl(&cd->cdi, inode->i_bdev, + file ? file->f_mode : 0, cmd, arg); if (ret != -ENOSYS) return ret; diff --git a/include/linux/cdrom.h b/include/linux/cdrom.h index 5db265ea60f..0b49e08d3cb 100644 --- a/include/linux/cdrom.h +++ b/include/linux/cdrom.h @@ -987,11 +987,11 @@ struct cdrom_device_ops { }; /* the general block_device operations structure: */ -extern int cdrom_open(struct cdrom_device_info *cdi, struct inode *ip, - struct file *fp); -extern int cdrom_release(struct cdrom_device_info *cdi, struct file *fp); -extern int cdrom_ioctl(struct file *file, struct cdrom_device_info *cdi, - struct inode *ip, unsigned int cmd, unsigned long arg); +extern int cdrom_open(struct cdrom_device_info *cdi, struct block_device *bdev, + fmode_t mode); +extern void cdrom_release(struct cdrom_device_info *cdi, fmode_t mode); +extern int cdrom_ioctl(struct cdrom_device_info *cdi, struct block_device *bdev, + fmode_t mode, unsigned int cmd, unsigned long arg); extern int cdrom_media_changed(struct cdrom_device_info *); extern int register_cdrom(struct cdrom_device_info *cdi); -- cgit v1.2.3-70-g09d2 From 633a08b81206122469365b4c72eaeb71f04f2cb4 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 29 Aug 2007 20:34:12 -0400 Subject: [PATCH] introduce __blkdev_driver_ioctl() Analog of blkdev_driver_ioctl() with sane arguments. For now uses fake struct file, by the end of the series it won't and blkdev_driver_ioctl() will become a wrapper around it. Signed-off-by: Al Viro --- block/ioctl.c | 31 +++++++++++++++++++++++++++++++ drivers/block/pktcdvd.c | 4 ++-- drivers/md/dm-linear.c | 10 +--------- drivers/md/dm-mpath.c | 11 +++-------- include/linux/blkdev.h | 2 ++ 5 files changed, 39 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/block/ioctl.c b/block/ioctl.c index 38bee321e1f..9a26ace6d04 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -283,6 +283,37 @@ int blkdev_driver_ioctl(struct inode *inode, struct file *file, } EXPORT_SYMBOL_GPL(blkdev_driver_ioctl); +int __blkdev_driver_ioctl(struct block_device *bdev, fmode_t mode, + unsigned cmd, unsigned long arg) +{ + struct gendisk *disk = bdev->bd_disk; + int ret; + /* you bet it'll go away by the end of patch series */ + struct file fake_file = {}; + struct dentry fake_dentry = {}; + fake_file.f_mode = mode; + fake_file.f_path.dentry = &fake_dentry; + fake_dentry.d_inode = bdev->bd_inode; + + if (disk->fops->unlocked_ioctl) + return disk->fops->unlocked_ioctl(&fake_file, cmd, arg); + + if (disk->fops->ioctl) { + lock_kernel(); + ret = disk->fops->ioctl(bdev->bd_inode, &fake_file, cmd, arg); + unlock_kernel(); + return ret; + } + + return -ENOTTY; +} +/* + * For the record: _GPL here is only because somebody decided to slap it + * on the previous export. Sheer idiocy, since it wasn't copyrightable + * at all and could be open-coded without any exports by anybody who cares. + */ +EXPORT_SYMBOL_GPL(__blkdev_driver_ioctl); + /* * always keep this in sync with compat_blkdev_ioctl() and * compat_blkdev_locked_ioctl() diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index bdd49abcb54..a0ba4023953 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2819,8 +2819,8 @@ static int pkt_ioctl(struct inode *inode, struct file *file, unsigned int cmd, u case CDROM_LAST_WRITTEN: case CDROM_SEND_PACKET: case SCSI_IOCTL_SEND_COMMAND: - return blkdev_driver_ioctl(pd->bdev->bd_inode, pd->bdev->bd_disk, - file, cmd, arg); + return __blkdev_driver_ioctl(pd->bdev, file ? file->f_mode : 0, + cmd, arg); default: VPRINTK(DRIVER_NAME": Unknown ioctl for %s (%x)\n", pd->name, cmd); diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index fa358385eed..373442b1e98 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -114,15 +114,7 @@ static int linear_ioctl(struct dm_target *ti, unsigned int cmd, unsigned long arg) { struct linear_c *lc = (struct linear_c *) ti->private; - struct block_device *bdev = lc->dev->bdev; - struct file fake_file = {}; - struct dentry fake_dentry = {}; - - fake_file.f_mode = lc->dev->mode; - fake_file.f_path.dentry = &fake_dentry; - fake_dentry.d_inode = bdev->bd_inode; - - return blkdev_driver_ioctl(bdev->bd_inode, &fake_file, bdev->bd_disk, cmd, arg); + return __blkdev_driver_ioctl(lc->dev->bdev, lc->dev->mode, cmd, arg); } static int linear_merge(struct dm_target *ti, struct bvec_merge_data *bvm, diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index c681d5e5f45..d85c65a4643 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -1400,13 +1400,10 @@ static int multipath_ioctl(struct dm_target *ti, unsigned int cmd, { struct multipath *m = (struct multipath *) ti->private; struct block_device *bdev = NULL; + fmode_t mode = 0; unsigned long flags; - struct file fake_file = {}; - struct dentry fake_dentry = {}; int r = 0; - fake_file.f_path.dentry = &fake_dentry; - spin_lock_irqsave(&m->lock, flags); if (!m->current_pgpath) @@ -1414,8 +1411,7 @@ static int multipath_ioctl(struct dm_target *ti, unsigned int cmd, if (m->current_pgpath) { bdev = m->current_pgpath->path.dev->bdev; - fake_dentry.d_inode = bdev->bd_inode; - fake_file.f_mode = m->current_pgpath->path.dev->mode; + mode = m->current_pgpath->path.dev->mode; } if (m->queue_io) @@ -1425,8 +1421,7 @@ static int multipath_ioctl(struct dm_target *ti, unsigned int cmd, spin_unlock_irqrestore(&m->lock, flags); - return r ? : blkdev_driver_ioctl(bdev->bd_inode, &fake_file, - bdev->bd_disk, cmd, arg); + return r ? : __blkdev_driver_ioctl(bdev, mode, cmd, arg); } /*----------------------------------------------------------------- diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 48ec8862a11..2bad616b994 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1074,6 +1074,8 @@ struct block_device_operations { struct module *owner; }; +extern int __blkdev_driver_ioctl(struct block_device *, fmode_t, unsigned int, + unsigned long); #else /* CONFIG_BLOCK */ /* * stubs for when the block layer is configured out -- cgit v1.2.3-70-g09d2 From badf8082c33d18b118d3a6f1b32d5ea6b97d3839 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 16 Oct 2008 10:23:20 -0400 Subject: [PATCH] switch ide_disk_ops ->ioctl() to sane prototype Signed-off-by: Al Viro --- drivers/ide/ide-disk.h | 2 +- drivers/ide/ide-disk_ioctl.c | 3 +-- drivers/ide/ide-floppy.h | 4 ++-- drivers/ide/ide-floppy_ioctl.c | 13 ++++++------- drivers/ide/ide-gd.c | 2 +- include/linux/ide.h | 4 ++-- 6 files changed, 13 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/drivers/ide/ide-disk.h b/drivers/ide/ide-disk.h index b234b0feaf7..d511dab7c4a 100644 --- a/drivers/ide/ide-disk.h +++ b/drivers/ide/ide-disk.h @@ -13,7 +13,7 @@ ide_decl_devset(wcache); ide_decl_devset(acoustic); /* ide-disk_ioctl.c */ -int ide_disk_ioctl(ide_drive_t *, struct inode *, struct file *, unsigned int, +int ide_disk_ioctl(ide_drive_t *, struct block_device *, fmode_t, unsigned int, unsigned long); #ifdef CONFIG_IDE_PROC_FS diff --git a/drivers/ide/ide-disk_ioctl.c b/drivers/ide/ide-disk_ioctl.c index 41832af400d..7b783dd7c0b 100644 --- a/drivers/ide/ide-disk_ioctl.c +++ b/drivers/ide/ide-disk_ioctl.c @@ -13,10 +13,9 @@ static const struct ide_ioctl_devset ide_disk_ioctl_settings[] = { { 0 } }; -int ide_disk_ioctl(ide_drive_t *drive, struct inode *inode, struct file *file, +int ide_disk_ioctl(ide_drive_t *drive, struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) { - struct block_device *bdev = inode->i_bdev; int err; err = ide_setting_ioctl(drive, bdev, cmd, arg, ide_disk_ioctl_settings); diff --git a/drivers/ide/ide-floppy.h b/drivers/ide/ide-floppy.h index c17124dd607..6dd2beb4843 100644 --- a/drivers/ide/ide-floppy.h +++ b/drivers/ide/ide-floppy.h @@ -23,8 +23,8 @@ void ide_floppy_create_mode_sense_cmd(struct ide_atapi_pc *, u8); void ide_floppy_create_read_capacity_cmd(struct ide_atapi_pc *); /* ide-floppy_ioctl.c */ -int ide_floppy_ioctl(ide_drive_t *, struct inode *, struct file *, unsigned int, - unsigned long); +int ide_floppy_ioctl(ide_drive_t *, struct block_device *, fmode_t, + unsigned int, unsigned long); #ifdef CONFIG_IDE_PROC_FS /* ide-floppy_proc.c */ diff --git a/drivers/ide/ide-floppy_ioctl.c b/drivers/ide/ide-floppy_ioctl.c index 5af70a2c9ef..2bc51ff73fe 100644 --- a/drivers/ide/ide-floppy_ioctl.c +++ b/drivers/ide/ide-floppy_ioctl.c @@ -241,7 +241,7 @@ static int ide_floppy_lockdoor(ide_drive_t *drive, struct ide_atapi_pc *pc, return 0; } -static int ide_floppy_format_ioctl(ide_drive_t *drive, struct file *file, +static int ide_floppy_format_ioctl(ide_drive_t *drive, fmode_t mode, unsigned int cmd, void __user *argp) { switch (cmd) { @@ -250,7 +250,7 @@ static int ide_floppy_format_ioctl(ide_drive_t *drive, struct file *file, case IDEFLOPPY_IOCTL_FORMAT_GET_CAPACITY: return ide_floppy_get_format_capacities(drive, argp); case IDEFLOPPY_IOCTL_FORMAT_START: - if (!(file->f_mode & FMODE_WRITE)) + if (!(mode & FMODE_WRITE)) return -EPERM; return ide_floppy_format_unit(drive, (int __user *)argp); case IDEFLOPPY_IOCTL_FORMAT_GET_PROGRESS: @@ -260,10 +260,9 @@ static int ide_floppy_format_ioctl(ide_drive_t *drive, struct file *file, } } -int ide_floppy_ioctl(ide_drive_t *drive, struct inode *inode, - struct file *file, unsigned int cmd, unsigned long arg) +int ide_floppy_ioctl(ide_drive_t *drive, struct block_device *bdev, + fmode_t mode, unsigned int cmd, unsigned long arg) { - struct block_device *bdev = inode->i_bdev; struct ide_atapi_pc pc; void __user *argp = (void __user *)arg; int err; @@ -271,7 +270,7 @@ int ide_floppy_ioctl(ide_drive_t *drive, struct inode *inode, if (cmd == CDROMEJECT || cmd == CDROM_LOCKDOOR) return ide_floppy_lockdoor(drive, &pc, arg, cmd); - err = ide_floppy_format_ioctl(drive, file, cmd, argp); + err = ide_floppy_format_ioctl(drive, mode, cmd, argp); if (err != -ENOTTY) return err; @@ -281,7 +280,7 @@ int ide_floppy_ioctl(ide_drive_t *drive, struct inode *inode, */ if (cmd != CDROM_SEND_PACKET && cmd != SCSI_IOCTL_SEND_COMMAND) err = scsi_cmd_ioctl(bdev->bd_disk->queue, bdev->bd_disk, - file ? file->f_mode : 0, cmd, argp); + mode, cmd, argp); if (err == -ENOTTY) err = generic_ide_ioctl(drive, bdev, cmd, arg); diff --git a/drivers/ide/ide-gd.c b/drivers/ide/ide-gd.c index 66bbb0a22f5..948af08abe2 100644 --- a/drivers/ide/ide-gd.c +++ b/drivers/ide/ide-gd.c @@ -293,7 +293,7 @@ static int ide_gd_ioctl(struct inode *inode, struct file *file, struct ide_disk_obj *idkp = ide_drv_g(bdev->bd_disk, ide_disk_obj); ide_drive_t *drive = idkp->drive; - return drive->disk_ops->ioctl(drive, inode, file, cmd, arg); + return drive->disk_ops->ioctl(drive, bdev, file ? file->f_mode : 0, cmd, arg); } static struct block_device_operations ide_gd_ops = { diff --git a/include/linux/ide.h b/include/linux/ide.h index 0d03e83f719..54525be4b5f 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -474,8 +474,8 @@ struct ide_disk_ops { ide_startstop_t (*do_request)(struct ide_drive_s *, struct request *, sector_t); int (*end_request)(struct ide_drive_s *, int, int); - int (*ioctl)(struct ide_drive_s *, struct inode *, - struct file *, unsigned int, unsigned long); + int (*ioctl)(struct ide_drive_s *, struct block_device *, + fmode_t, unsigned int, unsigned long); }; /* ATAPI device flags */ -- cgit v1.2.3-70-g09d2 From d4430d62fa77208824a37fe6f85ab2831d274769 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 2 Mar 2008 09:09:22 -0500 Subject: [PATCH] beginning of methods conversion To keep the size of changesets sane we split the switch by drivers; to keep the damn thing bisectable we do the following: 1) rename the affected methods, add ones with correct prototypes, make (few) callers handle both. That's this changeset. 2) for each driver convert to new methods. *ALL* drivers are converted in this series. 3) kill the old (renamed) methods. Note that it _is_ a flagday; all in-tree drivers are converted and by the end of this series no trace of old methods remain. The only reason why we do that this way is to keep the damn thing bisectable and allow per-driver debugging if anything goes wrong. New methods: open(bdev, mode) release(disk, mode) ioctl(bdev, mode, cmd, arg) /* Called without BKL */ compat_ioctl(bdev, mode, cmd, arg) locked_ioctl(bdev, mode, cmd, arg) /* Called with BKL, legacy */ Signed-off-by: Al Viro --- arch/um/drivers/ubd_kern.c | 6 +++--- block/compat_ioctl.c | 17 +++++++++-------- block/ioctl.c | 35 ++++++++++++++++++++++++++--------- drivers/block/DAC960.c | 2 +- drivers/block/amiflop.c | 6 +++--- drivers/block/aoe/aoeblk.c | 4 ++-- drivers/block/ataflop.c | 6 +++--- drivers/block/brd.c | 2 +- drivers/block/cciss.c | 8 ++++---- drivers/block/cpqarray.c | 6 +++--- drivers/block/floppy.c | 6 +++--- drivers/block/loop.c | 8 ++++---- drivers/block/nbd.c | 2 +- drivers/block/paride/pcd.c | 6 +++--- drivers/block/paride/pd.c | 6 +++--- drivers/block/paride/pf.c | 6 +++--- drivers/block/pktcdvd.c | 6 +++--- drivers/block/swim3.c | 6 +++--- drivers/block/ub.c | 6 +++--- drivers/block/viodasd.c | 4 ++-- drivers/block/virtio_blk.c | 2 +- drivers/block/xd.c | 2 +- drivers/block/xen-blkfront.c | 4 ++-- drivers/block/xsysace.c | 4 ++-- drivers/block/z2ram.c | 4 ++-- drivers/cdrom/gdrom.c | 6 +++--- drivers/cdrom/viocd.c | 6 +++--- drivers/ide/ide-cd.c | 6 +++--- drivers/ide/ide-gd.c | 6 +++--- drivers/ide/ide-tape.c | 6 +++--- drivers/md/dm.c | 6 +++--- drivers/md/md.c | 6 +++--- drivers/memstick/core/mspro_block.c | 4 ++-- drivers/message/i2o/i2o_block.c | 6 +++--- drivers/mmc/card/block.c | 4 ++-- drivers/mtd/mtd_blkdevs.c | 6 +++--- drivers/s390/block/dasd.c | 8 ++++---- drivers/s390/block/dcssblk.c | 4 ++-- drivers/s390/char/tape_block.c | 6 +++--- drivers/scsi/ide-scsi.c | 6 +++--- drivers/scsi/sd.c | 8 ++++---- drivers/scsi/sr.c | 6 +++--- fs/block_dev.c | 18 +++++++++++++++--- include/linux/blkdev.h | 15 ++++++++++----- include/linux/fs.h | 1 + 45 files changed, 167 insertions(+), 131 deletions(-) (limited to 'include/linux') diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index b58fb8941d8..72569cc3cbb 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -108,9 +108,9 @@ static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo); static struct block_device_operations ubd_blops = { .owner = THIS_MODULE, - .open = ubd_open, - .release = ubd_release, - .ioctl = ubd_ioctl, + .__open = ubd_open, + .__release = ubd_release, + .__ioctl = ubd_ioctl, .getgeo = ubd_getgeo, }; diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c index 1e559fba7bd..576c4fd1546 100644 --- a/block/compat_ioctl.c +++ b/block/compat_ioctl.c @@ -708,17 +708,17 @@ static int compat_blkdev_driver_ioctl(struct inode *inode, struct file *file, return -ENOIOCTLCMD; } - if (disk->fops->unlocked_ioctl) - return disk->fops->unlocked_ioctl(file, cmd, arg); + if (disk->fops->__unlocked_ioctl) + return disk->fops->__unlocked_ioctl(file, cmd, arg); - if (disk->fops->ioctl) { + if (disk->fops->__ioctl) { lock_kernel(); - ret = disk->fops->ioctl(inode, file, cmd, arg); + ret = disk->fops->__ioctl(inode, file, cmd, arg); unlock_kernel(); return ret; } - return -ENOTTY; + return __blkdev_driver_ioctl(inode->i_bdev, file->f_mode, cmd, arg); } static int compat_blkdev_locked_ioctl(struct inode *inode, struct file *file, @@ -805,10 +805,11 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) lock_kernel(); ret = compat_blkdev_locked_ioctl(inode, file, bdev, cmd, arg); - /* FIXME: why do we assume -> compat_ioctl needs the BKL? */ - if (ret == -ENOIOCTLCMD && disk->fops->compat_ioctl) - ret = disk->fops->compat_ioctl(file, cmd, arg); + if (ret == -ENOIOCTLCMD && disk->fops->__compat_ioctl) + ret = disk->fops->__compat_ioctl(file, cmd, arg); unlock_kernel(); + if (ret == -ENOIOCTLCMD && disk->fops->compat_ioctl) + ret = disk->fops->compat_ioctl(bdev, file->f_mode, cmd, arg); if (ret != -ENOIOCTLCMD) return ret; diff --git a/block/ioctl.c b/block/ioctl.c index 9a26ace6d04..01ff463bc80 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -269,17 +269,24 @@ int blkdev_driver_ioctl(struct inode *inode, struct file *file, struct gendisk *disk, unsigned cmd, unsigned long arg) { int ret; - if (disk->fops->unlocked_ioctl) - return disk->fops->unlocked_ioctl(file, cmd, arg); + fmode_t mode = 0; + if (file) { + mode = file->f_mode; + if (file->f_flags & O_NDELAY) + mode |= FMODE_NDELAY_NOW; + } + + if (disk->fops->__unlocked_ioctl) + return disk->fops->__unlocked_ioctl(file, cmd, arg); - if (disk->fops->ioctl) { + if (disk->fops->__ioctl) { lock_kernel(); - ret = disk->fops->ioctl(inode, file, cmd, arg); + ret = disk->fops->__ioctl(inode, file, cmd, arg); unlock_kernel(); return ret; } - return -ENOTTY; + return __blkdev_driver_ioctl(inode->i_bdev, mode, cmd, arg); } EXPORT_SYMBOL_GPL(blkdev_driver_ioctl); @@ -295,12 +302,22 @@ int __blkdev_driver_ioctl(struct block_device *bdev, fmode_t mode, fake_file.f_path.dentry = &fake_dentry; fake_dentry.d_inode = bdev->bd_inode; - if (disk->fops->unlocked_ioctl) - return disk->fops->unlocked_ioctl(&fake_file, cmd, arg); + if (disk->fops->__unlocked_ioctl) + return disk->fops->__unlocked_ioctl(&fake_file, cmd, arg); + + if (disk->fops->__ioctl) { + lock_kernel(); + ret = disk->fops->__ioctl(bdev->bd_inode, &fake_file, cmd, arg); + unlock_kernel(); + return ret; + } + + if (disk->fops->ioctl) + return disk->fops->ioctl(bdev, mode, cmd, arg); - if (disk->fops->ioctl) { + if (disk->fops->locked_ioctl) { lock_kernel(); - ret = disk->fops->ioctl(bdev->bd_inode, &fake_file, cmd, arg); + ret = disk->fops->locked_ioctl(bdev, mode, cmd, arg); unlock_kernel(); return ret; } diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c index a002a381df9..4b90ebfa667 100644 --- a/drivers/block/DAC960.c +++ b/drivers/block/DAC960.c @@ -153,7 +153,7 @@ static int DAC960_revalidate_disk(struct gendisk *disk) static struct block_device_operations DAC960_BlockDeviceOperations = { .owner = THIS_MODULE, - .open = DAC960_open, + .__open = DAC960_open, .getgeo = DAC960_getgeo, .media_changed = DAC960_media_changed, .revalidate_disk = DAC960_revalidate_disk, diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c index d19c5a939fe..d5da4e3cb2a 100644 --- a/drivers/block/amiflop.c +++ b/drivers/block/amiflop.c @@ -1648,9 +1648,9 @@ static int amiga_floppy_change(struct gendisk *disk) static struct block_device_operations floppy_fops = { .owner = THIS_MODULE, - .open = floppy_open, - .release = floppy_release, - .ioctl = fd_ioctl, + .__open = floppy_open, + .__release = floppy_release, + .__ioctl = fd_ioctl, .getgeo = fd_getgeo, .media_changed = amiga_floppy_change, }; diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index d876ad86123..d4d9796d5dd 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c @@ -239,8 +239,8 @@ aoeblk_getgeo(struct block_device *bdev, struct hd_geometry *geo) } static struct block_device_operations aoe_bdops = { - .open = aoeblk_open, - .release = aoeblk_release, + .__open = aoeblk_open, + .__release = aoeblk_release, .getgeo = aoeblk_getgeo, .owner = THIS_MODULE, }; diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index 85d56a26f7c..30166774327 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c @@ -1857,9 +1857,9 @@ static int floppy_release( struct inode * inode, struct file * filp ) static struct block_device_operations floppy_fops = { .owner = THIS_MODULE, - .open = floppy_open, - .release = floppy_release, - .ioctl = fd_ioctl, + .__open = floppy_open, + .__release = floppy_release, + .__ioctl = fd_ioctl, .media_changed = check_floppy_change, .revalidate_disk= floppy_revalidate, }; diff --git a/drivers/block/brd.c b/drivers/block/brd.c index d070d492e38..2ea99f94766 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -376,7 +376,7 @@ static int brd_ioctl(struct inode *inode, struct file *file, static struct block_device_operations brd_fops = { .owner = THIS_MODULE, - .ioctl = brd_ioctl, + .__ioctl = brd_ioctl, #ifdef CONFIG_BLK_DEV_XIP .direct_access = brd_direct_access, #endif diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index d9b1c15b811..781b745181d 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -197,12 +197,12 @@ static long cciss_compat_ioctl(struct file *f, unsigned cmd, unsigned long arg); static struct block_device_operations cciss_fops = { .owner = THIS_MODULE, - .open = cciss_open, - .release = cciss_release, - .ioctl = cciss_ioctl, + .__open = cciss_open, + .__release = cciss_release, + .__ioctl = cciss_ioctl, .getgeo = cciss_getgeo, #ifdef CONFIG_COMPAT - .compat_ioctl = cciss_compat_ioctl, + .__compat_ioctl = cciss_compat_ioctl, #endif .revalidate_disk = cciss_revalidate, }; diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c index 3d967525e9a..b71334b968b 100644 --- a/drivers/block/cpqarray.c +++ b/drivers/block/cpqarray.c @@ -195,9 +195,9 @@ static inline ctlr_info_t *get_host(struct gendisk *disk) static struct block_device_operations ida_fops = { .owner = THIS_MODULE, - .open = ida_open, - .release = ida_release, - .ioctl = ida_ioctl, + .__open = ida_open, + .__release = ida_release, + .__ioctl = ida_ioctl, .getgeo = ida_getgeo, .revalidate_disk= ida_revalidate, }; diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 5d60c05a736..72363df5895 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -3902,9 +3902,9 @@ static int floppy_revalidate(struct gendisk *disk) static struct block_device_operations floppy_fops = { .owner = THIS_MODULE, - .open = floppy_open, - .release = floppy_release, - .ioctl = fd_ioctl, + .__open = floppy_open, + .__release = floppy_release, + .__ioctl = fd_ioctl, .getgeo = fd_getgeo, .media_changed = check_floppy_change, .revalidate_disk = floppy_revalidate, diff --git a/drivers/block/loop.c b/drivers/block/loop.c index d3a25b027ff..6faca2b7ae3 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1355,11 +1355,11 @@ static int lo_release(struct inode *inode, struct file *file) static struct block_device_operations lo_fops = { .owner = THIS_MODULE, - .open = lo_open, - .release = lo_release, - .ioctl = lo_ioctl, + .__open = lo_open, + .__release = lo_release, + .__ioctl = lo_ioctl, #ifdef CONFIG_COMPAT - .compat_ioctl = lo_compat_ioctl, + .__compat_ioctl = lo_compat_ioctl, #endif }; diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 9034ca585af..36015e0945b 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -691,7 +691,7 @@ static int nbd_ioctl(struct inode *inode, struct file *file, static struct block_device_operations nbd_fops = { .owner = THIS_MODULE, - .ioctl = nbd_ioctl, + .__ioctl = nbd_ioctl, }; /* diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c index 8bd557e2a65..6e6dcc1d432 100644 --- a/drivers/block/paride/pcd.c +++ b/drivers/block/paride/pcd.c @@ -252,9 +252,9 @@ static int pcd_block_media_changed(struct gendisk *disk) static struct block_device_operations pcd_bdops = { .owner = THIS_MODULE, - .open = pcd_block_open, - .release = pcd_block_release, - .ioctl = pcd_block_ioctl, + .__open = pcd_block_open, + .__release = pcd_block_release, + .__ioctl = pcd_block_ioctl, .media_changed = pcd_block_media_changed, }; diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c index 5fdfa7c888c..b3023844947 100644 --- a/drivers/block/paride/pd.c +++ b/drivers/block/paride/pd.c @@ -807,9 +807,9 @@ static int pd_revalidate(struct gendisk *p) static struct block_device_operations pd_fops = { .owner = THIS_MODULE, - .open = pd_open, - .release = pd_release, - .ioctl = pd_ioctl, + .__open = pd_open, + .__release = pd_release, + .__ioctl = pd_ioctl, .getgeo = pd_getgeo, .media_changed = pd_check_media, .revalidate_disk= pd_revalidate diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c index a902d84fd33..e08ca5161ad 100644 --- a/drivers/block/paride/pf.c +++ b/drivers/block/paride/pf.c @@ -264,9 +264,9 @@ static char *pf_buf; /* buffer for request in progress */ static struct block_device_operations pf_fops = { .owner = THIS_MODULE, - .open = pf_open, - .release = pf_release, - .ioctl = pf_ioctl, + .__open = pf_open, + .__release = pf_release, + .__ioctl = pf_ioctl, .getgeo = pf_getgeo, .media_changed = pf_check_media, }; diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index a0ba4023953..33ac8ddf491 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2847,9 +2847,9 @@ static int pkt_media_changed(struct gendisk *disk) static struct block_device_operations pktcdvd_ops = { .owner = THIS_MODULE, - .open = pkt_open, - .release = pkt_close, - .ioctl = pkt_ioctl, + .__open = pkt_open, + .__release = pkt_close, + .__ioctl = pkt_ioctl, .media_changed = pkt_media_changed, }; diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c index 5c45d5556ae..9398af86a7a 100644 --- a/drivers/block/swim3.c +++ b/drivers/block/swim3.c @@ -998,9 +998,9 @@ static int floppy_revalidate(struct gendisk *disk) } static struct block_device_operations floppy_fops = { - .open = floppy_open, - .release = floppy_release, - .ioctl = floppy_ioctl, + .__open = floppy_open, + .__release = floppy_release, + .__ioctl = floppy_ioctl, .media_changed = floppy_check_change, .revalidate_disk= floppy_revalidate, }; diff --git a/drivers/block/ub.c b/drivers/block/ub.c index bc04330f368..5261773407d 100644 --- a/drivers/block/ub.c +++ b/drivers/block/ub.c @@ -1791,9 +1791,9 @@ static int ub_bd_media_changed(struct gendisk *disk) static struct block_device_operations ub_bd_fops = { .owner = THIS_MODULE, - .open = ub_bd_open, - .release = ub_bd_release, - .ioctl = ub_bd_ioctl, + .__open = ub_bd_open, + .__release = ub_bd_release, + .__ioctl = ub_bd_ioctl, .media_changed = ub_bd_media_changed, .revalidate_disk = ub_bd_revalidate, }; diff --git a/drivers/block/viodasd.c b/drivers/block/viodasd.c index 1730d29e604..7f7beec29eb 100644 --- a/drivers/block/viodasd.c +++ b/drivers/block/viodasd.c @@ -221,8 +221,8 @@ static int viodasd_getgeo(struct block_device *bdev, struct hd_geometry *geo) */ static struct block_device_operations viodasd_fops = { .owner = THIS_MODULE, - .open = viodasd_open, - .release = viodasd_release, + .__open = viodasd_open, + .__release = viodasd_release, .getgeo = viodasd_getgeo, }; diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 7643cd16fd6..10f157ea7b0 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -180,7 +180,7 @@ static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo) } static struct block_device_operations virtblk_fops = { - .ioctl = virtblk_ioctl, + .__ioctl = virtblk_ioctl, .owner = THIS_MODULE, .getgeo = virtblk_getgeo, }; diff --git a/drivers/block/xd.c b/drivers/block/xd.c index 624d30f7da3..316fa1da4b9 100644 --- a/drivers/block/xd.c +++ b/drivers/block/xd.c @@ -132,7 +132,7 @@ static int xd_getgeo(struct block_device *bdev, struct hd_geometry *geo); static struct block_device_operations xd_fops = { .owner = THIS_MODULE, - .ioctl = xd_ioctl, + .__ioctl = xd_ioctl, .getgeo = xd_getgeo, }; static DECLARE_WAIT_QUEUE_HEAD(xd_wait_int); diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 1a50ae70f71..7efac80c8dd 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -1041,8 +1041,8 @@ static int blkif_release(struct inode *inode, struct file *filep) static struct block_device_operations xlvbd_block_fops = { .owner = THIS_MODULE, - .open = blkif_open, - .release = blkif_release, + .__open = blkif_open, + .__release = blkif_release, .getgeo = blkif_getgeo, .ioctl = blkif_ioctl, }; diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c index 4a7a059ebaf..e4efe5b7ec2 100644 --- a/drivers/block/xsysace.c +++ b/drivers/block/xsysace.c @@ -919,8 +919,8 @@ static int ace_getgeo(struct block_device *bdev, struct hd_geometry *geo) static struct block_device_operations ace_fops = { .owner = THIS_MODULE, - .open = ace_open, - .release = ace_release, + .__open = ace_open, + .__release = ace_release, .media_changed = ace_media_changed, .revalidate_disk = ace_revalidate_disk, .getgeo = ace_getgeo, diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c index be20a67f1fa..4860d0f3687 100644 --- a/drivers/block/z2ram.c +++ b/drivers/block/z2ram.c @@ -314,8 +314,8 @@ z2_release( struct inode *inode, struct file *filp ) static struct block_device_operations z2_fops = { .owner = THIS_MODULE, - .open = z2_open, - .release = z2_release, + .__open = z2_open, + .__release = z2_release, }; static struct kobject *z2_find(dev_t dev, int *part, void *data) diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c index 0959edf2afd..ab0c637f58b 100644 --- a/drivers/cdrom/gdrom.c +++ b/drivers/cdrom/gdrom.c @@ -514,10 +514,10 @@ static int gdrom_bdops_ioctl(struct inode *inode, struct file *file, static struct block_device_operations gdrom_bdops = { .owner = THIS_MODULE, - .open = gdrom_bdops_open, - .release = gdrom_bdops_release, + .__open = gdrom_bdops_open, + .__release = gdrom_bdops_release, .media_changed = gdrom_bdops_mediachanged, - .ioctl = gdrom_bdops_ioctl, + .__ioctl = gdrom_bdops_ioctl, }; static irqreturn_t gdrom_command_interrupt(int irq, void *dev_id) diff --git a/drivers/cdrom/viocd.c b/drivers/cdrom/viocd.c index abc4079c3f4..57c2dced3e9 100644 --- a/drivers/cdrom/viocd.c +++ b/drivers/cdrom/viocd.c @@ -180,9 +180,9 @@ static int viocd_blk_media_changed(struct gendisk *disk) struct block_device_operations viocd_fops = { .owner = THIS_MODULE, - .open = viocd_blk_open, - .release = viocd_blk_release, - .ioctl = viocd_blk_ioctl, + .__open = viocd_blk_open, + .__release = viocd_blk_release, + .__ioctl = viocd_blk_ioctl, .media_changed = viocd_blk_media_changed, }; diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 87d90200b16..3533984355a 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -2200,9 +2200,9 @@ static int idecd_revalidate_disk(struct gendisk *disk) static struct block_device_operations idecd_ops = { .owner = THIS_MODULE, - .open = idecd_open, - .release = idecd_release, - .ioctl = idecd_ioctl, + .__open = idecd_open, + .__release = idecd_release, + .__ioctl = idecd_ioctl, .media_changed = idecd_media_changed, .revalidate_disk = idecd_revalidate_disk }; diff --git a/drivers/ide/ide-gd.c b/drivers/ide/ide-gd.c index 948af08abe2..d118bbed7cd 100644 --- a/drivers/ide/ide-gd.c +++ b/drivers/ide/ide-gd.c @@ -298,9 +298,9 @@ static int ide_gd_ioctl(struct inode *inode, struct file *file, static struct block_device_operations ide_gd_ops = { .owner = THIS_MODULE, - .open = ide_gd_open, - .release = ide_gd_release, - .ioctl = ide_gd_ioctl, + .__open = ide_gd_open, + .__release = ide_gd_release, + .__ioctl = ide_gd_ioctl, .getgeo = ide_gd_getgeo, .media_changed = ide_gd_media_changed, .revalidate_disk = ide_gd_revalidate_disk diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index 2b263281ffe..c5df53c4838 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -2376,9 +2376,9 @@ static int idetape_ioctl(struct inode *inode, struct file *file, static struct block_device_operations idetape_block_ops = { .owner = THIS_MODULE, - .open = idetape_open, - .release = idetape_release, - .ioctl = idetape_ioctl, + .__open = idetape_open, + .__release = idetape_release, + .__ioctl = idetape_ioctl, }; static int ide_tape_probe(ide_drive_t *drive) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 5f0f4c8bcd3..8b4c92b1b6d 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1698,9 +1698,9 @@ int dm_noflush_suspending(struct dm_target *ti) EXPORT_SYMBOL_GPL(dm_noflush_suspending); static struct block_device_operations dm_blk_dops = { - .open = dm_blk_open, - .release = dm_blk_close, - .ioctl = dm_blk_ioctl, + .__open = dm_blk_open, + .__release = dm_blk_close, + .__ioctl = dm_blk_ioctl, .getgeo = dm_blk_getgeo, .owner = THIS_MODULE }; diff --git a/drivers/md/md.c b/drivers/md/md.c index aaa3d465de4..21b04d39ba3 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -5046,9 +5046,9 @@ static int md_revalidate(struct gendisk *disk) static struct block_device_operations md_fops = { .owner = THIS_MODULE, - .open = md_open, - .release = md_release, - .ioctl = md_ioctl, + .__open = md_open, + .__release = md_release, + .__ioctl = md_ioctl, .getgeo = md_getgeo, .media_changed = md_media_changed, .revalidate_disk= md_revalidate, diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c index 5263913e0c6..fbe5919789d 100644 --- a/drivers/memstick/core/mspro_block.c +++ b/drivers/memstick/core/mspro_block.c @@ -237,8 +237,8 @@ static int mspro_block_bd_getgeo(struct block_device *bdev, } static struct block_device_operations ms_block_bdops = { - .open = mspro_block_bd_open, - .release = mspro_block_bd_release, + .__open = mspro_block_bd_open, + .__release = mspro_block_bd_release, .getgeo = mspro_block_bd_getgeo, .owner = THIS_MODULE }; diff --git a/drivers/message/i2o/i2o_block.c b/drivers/message/i2o/i2o_block.c index 81483de8c0f..71500dda8eb 100644 --- a/drivers/message/i2o/i2o_block.c +++ b/drivers/message/i2o/i2o_block.c @@ -931,9 +931,9 @@ static void i2o_block_request_fn(struct request_queue *q) /* I2O Block device operations definition */ static struct block_device_operations i2o_block_fops = { .owner = THIS_MODULE, - .open = i2o_block_open, - .release = i2o_block_release, - .ioctl = i2o_block_ioctl, + .__open = i2o_block_open, + .__release = i2o_block_release, + .__ioctl = i2o_block_ioctl, .getgeo = i2o_block_getgeo, .media_changed = i2o_block_media_changed }; diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c index 24c97d3d16b..8cba06f5e11 100644 --- a/drivers/mmc/card/block.c +++ b/drivers/mmc/card/block.c @@ -130,8 +130,8 @@ mmc_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo) } static struct block_device_operations mmc_bdops = { - .open = mmc_blk_open, - .release = mmc_blk_release, + .__open = mmc_blk_open, + .__release = mmc_blk_release, .getgeo = mmc_blk_getgeo, .owner = THIS_MODULE, }; diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c index 681d5aca2af..b00d07c5375 100644 --- a/drivers/mtd/mtd_blkdevs.c +++ b/drivers/mtd/mtd_blkdevs.c @@ -213,9 +213,9 @@ static int blktrans_ioctl(struct inode *inode, struct file *file, static struct block_device_operations mtd_blktrans_ops = { .owner = THIS_MODULE, - .open = blktrans_open, - .release = blktrans_release, - .ioctl = blktrans_ioctl, + .__open = blktrans_open, + .__release = blktrans_release, + .__ioctl = blktrans_ioctl, .getgeo = blktrans_getgeo, }; diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 0a225ccda02..6bf68e5fe89 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -2087,10 +2087,10 @@ static int dasd_getgeo(struct block_device *bdev, struct hd_geometry *geo) struct block_device_operations dasd_device_operations = { .owner = THIS_MODULE, - .open = dasd_open, - .release = dasd_release, - .ioctl = dasd_ioctl, - .compat_ioctl = dasd_compat_ioctl, + .__open = dasd_open, + .__release = dasd_release, + .__ioctl = dasd_ioctl, + .__compat_ioctl = dasd_compat_ioctl, .getgeo = dasd_getgeo, }; diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index a7ff167d5b8..413460cc3dd 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -42,8 +42,8 @@ static char dcssblk_segments[DCSSBLK_PARM_LEN] = "\0"; static int dcssblk_major; static struct block_device_operations dcssblk_devops = { .owner = THIS_MODULE, - .open = dcssblk_open, - .release = dcssblk_release, + .__open = dcssblk_open, + .__release = dcssblk_release, .direct_access = dcssblk_direct_access, }; diff --git a/drivers/s390/char/tape_block.c b/drivers/s390/char/tape_block.c index a25b8bf54f4..f1a741c9a6f 100644 --- a/drivers/s390/char/tape_block.c +++ b/drivers/s390/char/tape_block.c @@ -52,9 +52,9 @@ static int tapeblock_revalidate_disk(struct gendisk *); static struct block_device_operations tapeblock_fops = { .owner = THIS_MODULE, - .open = tapeblock_open, - .release = tapeblock_release, - .ioctl = tapeblock_ioctl, + .__open = tapeblock_open, + .__release = tapeblock_release, + .__ioctl = tapeblock_ioctl, .media_changed = tapeblock_medium_changed, .revalidate_disk = tapeblock_revalidate_disk, }; diff --git a/drivers/scsi/ide-scsi.c b/drivers/scsi/ide-scsi.c index 5bcc04e82c2..9069afbad9d 100644 --- a/drivers/scsi/ide-scsi.c +++ b/drivers/scsi/ide-scsi.c @@ -483,9 +483,9 @@ static int idescsi_ide_ioctl(struct inode *inode, struct file *file, static struct block_device_operations idescsi_ops = { .owner = THIS_MODULE, - .open = idescsi_ide_open, - .release = idescsi_ide_release, - .ioctl = idescsi_ide_ioctl, + .__open = idescsi_ide_open, + .__release = idescsi_ide_release, + .__ioctl = idescsi_ide_ioctl, }; static int idescsi_slave_configure(struct scsi_device * sdp) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 5a18528a69d..c8b95e8d285 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -962,12 +962,12 @@ static long sd_compat_ioctl(struct file *file, unsigned int cmd, unsigned long a static struct block_device_operations sd_fops = { .owner = THIS_MODULE, - .open = sd_open, - .release = sd_release, - .ioctl = sd_ioctl, + .__open = sd_open, + .__release = sd_release, + .__ioctl = sd_ioctl, .getgeo = sd_getgeo, #ifdef CONFIG_COMPAT - .compat_ioctl = sd_compat_ioctl, + .__compat_ioctl = sd_compat_ioctl, #endif .media_changed = sd_media_changed, .revalidate_disk = sd_revalidate_disk, diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c index 2fb8d4d2d6f..9446cbf4de8 100644 --- a/drivers/scsi/sr.c +++ b/drivers/scsi/sr.c @@ -540,9 +540,9 @@ static int sr_block_media_changed(struct gendisk *disk) static struct block_device_operations sr_bdops = { .owner = THIS_MODULE, - .open = sr_block_open, - .release = sr_block_release, - .ioctl = sr_block_ioctl, + .__open = sr_block_open, + .__release = sr_block_release, + .__ioctl = sr_block_ioctl, .media_changed = sr_block_media_changed, /* * No compat_ioctl for now because sr_block_ioctl never diff --git a/fs/block_dev.c b/fs/block_dev.c index b9022694e9f..73b6ce47c86 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1033,8 +1033,13 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) bdev->bd_contains = bdev; if (!partno) { struct backing_dev_info *bdi; + if (disk->fops->__open) { + ret = disk->fops->__open(bdev->bd_inode, file); + if (ret) + goto out_first; + } if (disk->fops->open) { - ret = disk->fops->open(bdev->bd_inode, file); + ret = disk->fops->open(bdev, file->f_mode); if (ret) goto out_clear; } @@ -1074,8 +1079,13 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) part = NULL; disk = NULL; if (bdev->bd_contains == bdev) { + if (bdev->bd_disk->fops->__open) { + ret = bdev->bd_disk->fops->__open(bdev->bd_inode, file); + if (ret) + goto out; + } if (bdev->bd_disk->fops->open) { - ret = bdev->bd_disk->fops->open(bdev->bd_inode, file); + ret = bdev->bd_disk->fops->open(bdev, file->f_mode); if (ret) goto out_unlock_bdev; } @@ -1184,8 +1194,10 @@ static int __blkdev_put(struct block_device *bdev, int for_part) kill_bdev(bdev); } if (bdev->bd_contains == bdev) { + if (disk->fops->__release) + ret = disk->fops->__release(bd_inode, NULL); if (disk->fops->release) - ret = disk->fops->release(bd_inode, NULL); + ret = disk->fops->release(disk, 0); } if (!bdev->bd_openers) { struct module *owner = disk->fops->owner; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2bad616b994..b573186ff1a 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1061,11 +1061,16 @@ struct file; struct inode; struct block_device_operations { - int (*open) (struct inode *, struct file *); - int (*release) (struct inode *, struct file *); - int (*ioctl) (struct inode *, struct file *, unsigned, unsigned long); - long (*unlocked_ioctl) (struct file *, unsigned, unsigned long); - long (*compat_ioctl) (struct file *, unsigned, unsigned long); + int (*__open) (struct inode *, struct file *); + int (*__release) (struct inode *, struct file *); + int (*__ioctl) (struct inode *, struct file *, unsigned, unsigned long); + long (*__unlocked_ioctl) (struct file *, unsigned, unsigned long); + long (*__compat_ioctl) (struct file *, unsigned, unsigned long); + int (*open) (struct block_device *, fmode_t); + int (*release) (struct gendisk *, fmode_t); + int (*locked_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); + int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); + int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); int (*direct_access) (struct block_device *, sector_t, void **, unsigned long *); int (*media_changed) (struct gendisk *); diff --git a/include/linux/fs.h b/include/linux/fs.h index 58bbf689fef..b5894604ba5 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -79,6 +79,7 @@ extern int dir_notify_enable; #define FMODE_NDELAY ((__force fmode_t)32) #define FMODE_EXCL ((__force fmode_t)64) #define FMODE_WRITE_IOCTL ((__force fmode_t)128) +#define FMODE_NDELAY_NOW ((__force fmode_t)256) #define RW_MASK 1 #define RWA_MASK 2 -- cgit v1.2.3-70-g09d2 From 90b8f2824ce68dd87d304641a1d5a048dfff39f5 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 2 Mar 2008 10:43:36 -0500 Subject: [PATCH] end of methods switch: remove the old ones Signed-off-by: Al Viro --- block/compat_ioctl.c | 12 ------------ block/ioctl.c | 26 -------------------------- fs/block_dev.c | 13 ------------- include/linux/blkdev.h | 8 -------- 4 files changed, 59 deletions(-) (limited to 'include/linux') diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c index 576c4fd1546..fd537fdb25a 100644 --- a/block/compat_ioctl.c +++ b/block/compat_ioctl.c @@ -708,16 +708,6 @@ static int compat_blkdev_driver_ioctl(struct inode *inode, struct file *file, return -ENOIOCTLCMD; } - if (disk->fops->__unlocked_ioctl) - return disk->fops->__unlocked_ioctl(file, cmd, arg); - - if (disk->fops->__ioctl) { - lock_kernel(); - ret = disk->fops->__ioctl(inode, file, cmd, arg); - unlock_kernel(); - return ret; - } - return __blkdev_driver_ioctl(inode->i_bdev, file->f_mode, cmd, arg); } @@ -805,8 +795,6 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) lock_kernel(); ret = compat_blkdev_locked_ioctl(inode, file, bdev, cmd, arg); - if (ret == -ENOIOCTLCMD && disk->fops->__compat_ioctl) - ret = disk->fops->__compat_ioctl(file, cmd, arg); unlock_kernel(); if (ret == -ENOIOCTLCMD && disk->fops->compat_ioctl) ret = disk->fops->compat_ioctl(bdev, file->f_mode, cmd, arg); diff --git a/block/ioctl.c b/block/ioctl.c index 01ff463bc80..0db89f95b15 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -276,16 +276,6 @@ int blkdev_driver_ioctl(struct inode *inode, struct file *file, mode |= FMODE_NDELAY_NOW; } - if (disk->fops->__unlocked_ioctl) - return disk->fops->__unlocked_ioctl(file, cmd, arg); - - if (disk->fops->__ioctl) { - lock_kernel(); - ret = disk->fops->__ioctl(inode, file, cmd, arg); - unlock_kernel(); - return ret; - } - return __blkdev_driver_ioctl(inode->i_bdev, mode, cmd, arg); } EXPORT_SYMBOL_GPL(blkdev_driver_ioctl); @@ -295,22 +285,6 @@ int __blkdev_driver_ioctl(struct block_device *bdev, fmode_t mode, { struct gendisk *disk = bdev->bd_disk; int ret; - /* you bet it'll go away by the end of patch series */ - struct file fake_file = {}; - struct dentry fake_dentry = {}; - fake_file.f_mode = mode; - fake_file.f_path.dentry = &fake_dentry; - fake_dentry.d_inode = bdev->bd_inode; - - if (disk->fops->__unlocked_ioctl) - return disk->fops->__unlocked_ioctl(&fake_file, cmd, arg); - - if (disk->fops->__ioctl) { - lock_kernel(); - ret = disk->fops->__ioctl(bdev->bd_inode, &fake_file, cmd, arg); - unlock_kernel(); - return ret; - } if (disk->fops->ioctl) return disk->fops->ioctl(bdev, mode, cmd, arg); diff --git a/fs/block_dev.c b/fs/block_dev.c index 73b6ce47c86..55124ac8c7a 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1033,11 +1033,6 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) bdev->bd_contains = bdev; if (!partno) { struct backing_dev_info *bdi; - if (disk->fops->__open) { - ret = disk->fops->__open(bdev->bd_inode, file); - if (ret) - goto out_first; - } if (disk->fops->open) { ret = disk->fops->open(bdev, file->f_mode); if (ret) @@ -1079,11 +1074,6 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) part = NULL; disk = NULL; if (bdev->bd_contains == bdev) { - if (bdev->bd_disk->fops->__open) { - ret = bdev->bd_disk->fops->__open(bdev->bd_inode, file); - if (ret) - goto out; - } if (bdev->bd_disk->fops->open) { ret = bdev->bd_disk->fops->open(bdev, file->f_mode); if (ret) @@ -1180,7 +1170,6 @@ static int blkdev_open(struct inode * inode, struct file * filp) static int __blkdev_put(struct block_device *bdev, int for_part) { int ret = 0; - struct inode *bd_inode = bdev->bd_inode; struct gendisk *disk = bdev->bd_disk; struct block_device *victim = NULL; @@ -1194,8 +1183,6 @@ static int __blkdev_put(struct block_device *bdev, int for_part) kill_bdev(bdev); } if (bdev->bd_contains == bdev) { - if (disk->fops->__release) - ret = disk->fops->__release(bd_inode, NULL); if (disk->fops->release) ret = disk->fops->release(disk, 0); } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index b573186ff1a..a135256b272 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1057,15 +1057,7 @@ static inline int blk_integrity_rq(struct request *rq) #endif /* CONFIG_BLK_DEV_INTEGRITY */ -struct file; -struct inode; - struct block_device_operations { - int (*__open) (struct inode *, struct file *); - int (*__release) (struct inode *, struct file *); - int (*__ioctl) (struct inode *, struct file *, unsigned, unsigned long); - long (*__unlocked_ioctl) (struct file *, unsigned, unsigned long); - long (*__compat_ioctl) (struct file *, unsigned, unsigned long); int (*open) (struct block_device *, fmode_t); int (*release) (struct gendisk *, fmode_t); int (*locked_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); -- cgit v1.2.3-70-g09d2 From 9a1c3542768b5a58e45a9216921cd10a3bae1205 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 22 Feb 2008 20:40:24 -0500 Subject: [PATCH] pass fmode_t to blkdev_put() Signed-off-by: Al Viro --- drivers/block/pktcdvd.c | 8 ++++---- drivers/char/raw.c | 4 ++-- drivers/md/dm-table.c | 4 ++-- drivers/md/md.c | 4 ++-- drivers/s390/block/dasd_genhd.c | 2 +- fs/block_dev.c | 22 +++++++++++----------- fs/ext3/super.c | 4 ++-- fs/ext4/super.c | 4 ++-- fs/jfs/jfs_logmgr.c | 4 ++-- fs/ocfs2/cluster/heartbeat.c | 4 ++-- fs/partitions/check.c | 2 +- fs/reiserfs/journal.c | 4 ++-- include/linux/fs.h | 2 +- kernel/power/swap.c | 8 ++++---- 14 files changed, 38 insertions(+), 38 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 90548da9c1c..ce8c7190192 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2381,7 +2381,7 @@ static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write) out_unclaim: bd_release(pd->bdev); out_putdev: - blkdev_put(pd->bdev); + blkdev_put(pd->bdev, FMODE_READ); out: return ret; } @@ -2399,7 +2399,7 @@ static void pkt_release_dev(struct pktcdvd_device *pd, int flush) pkt_set_speed(pd, MAX_SPEED, MAX_SPEED); bd_release(pd->bdev); - blkdev_put(pd->bdev); + blkdev_put(pd->bdev, FMODE_READ); pkt_shrink_pktlist(pd); } @@ -2790,7 +2790,7 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev) return 0; out_mem: - blkdev_put(bdev); + blkdev_put(bdev, FMODE_READ|FMODE_WRITE); /* This is safe: open() is still holding a reference. */ module_put(THIS_MODULE); return ret; @@ -2975,7 +2975,7 @@ static int pkt_remove_dev(dev_t pkt_dev) pkt_debugfs_dev_remove(pd); pkt_sysfs_dev_remove(pd); - blkdev_put(pd->bdev); + blkdev_put(pd->bdev, FMODE_READ|FMODE_WRITE); remove_proc_entry(pd->name, pkt_proc); DPRINTK(DRIVER_NAME": writer %s unmapped\n", pd->name); diff --git a/drivers/char/raw.c b/drivers/char/raw.c index e139372d0e6..bfd59e6bf54 100644 --- a/drivers/char/raw.c +++ b/drivers/char/raw.c @@ -87,7 +87,7 @@ static int raw_open(struct inode *inode, struct file *filp) out2: bd_release(bdev); out1: - blkdev_put(bdev); + blkdev_put(bdev, filp->f_mode); out: mutex_unlock(&raw_mutex); return err; @@ -112,7 +112,7 @@ static int raw_release(struct inode *inode, struct file *filp) mutex_unlock(&raw_mutex); bd_release(bdev); - blkdev_put(bdev); + blkdev_put(bdev, filp->f_mode); return 0; } diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 7c8671b06fe..dd8bd2e867c 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -357,7 +357,7 @@ static int open_dev(struct dm_dev_internal *d, dev_t dev, return PTR_ERR(bdev); r = bd_claim_by_disk(bdev, _claim_ptr, dm_disk(md)); if (r) - blkdev_put(bdev); + blkdev_put(bdev, d->dm_dev.mode); else d->dm_dev.bdev = bdev; return r; @@ -372,7 +372,7 @@ static void close_dev(struct dm_dev_internal *d, struct mapped_device *md) return; bd_release_from_disk(d->dm_dev.bdev, dm_disk(md)); - blkdev_put(d->dm_dev.bdev); + blkdev_put(d->dm_dev.bdev, d->dm_dev.mode); d->dm_dev.bdev = NULL; } diff --git a/drivers/md/md.c b/drivers/md/md.c index 06ea991c7a4..c1a837ca193 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1520,7 +1520,7 @@ static int lock_rdev(mdk_rdev_t *rdev, dev_t dev, int shared) if (err) { printk(KERN_ERR "md: could not bd_claim %s.\n", bdevname(bdev, b)); - blkdev_put(bdev); + blkdev_put(bdev, FMODE_READ|FMODE_WRITE); return err; } if (!shared) @@ -1536,7 +1536,7 @@ static void unlock_rdev(mdk_rdev_t *rdev) if (!bdev) MD_BUG(); bd_release(bdev); - blkdev_put(bdev); + blkdev_put(bdev, FMODE_READ|FMODE_WRITE); } void md_autodetect_dev(dev_t dev); diff --git a/drivers/s390/block/dasd_genhd.c b/drivers/s390/block/dasd_genhd.c index aee6565aaf9..3c1b6915c9a 100644 --- a/drivers/s390/block/dasd_genhd.c +++ b/drivers/s390/block/dasd_genhd.c @@ -152,7 +152,7 @@ void dasd_destroy_partitions(struct dasd_block *block) invalidate_partition(block->gdp, 0); /* Matching blkdev_put to the blkdev_get in dasd_scan_partitions. */ - blkdev_put(bdev); + blkdev_put(bdev, FMODE_READ); set_capacity(block->gdp, 0); } diff --git a/fs/block_dev.c b/fs/block_dev.c index 55124ac8c7a..05131baf3cf 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -977,7 +977,7 @@ EXPORT_SYMBOL(bd_set_size); static int __blkdev_get(struct block_device *bdev, fmode_t mode, unsigned flags, int for_part); -static int __blkdev_put(struct block_device *bdev, int for_part); +static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part); /* * bd_mutex locking: @@ -1095,7 +1095,7 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) bdev->bd_part = NULL; bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; if (bdev != bdev->bd_contains) - __blkdev_put(bdev->bd_contains, 1); + __blkdev_put(bdev->bd_contains, file->f_mode, 1); bdev->bd_contains = NULL; out_unlock_bdev: mutex_unlock(&bdev->bd_mutex); @@ -1163,11 +1163,11 @@ static int blkdev_open(struct inode * inode, struct file * filp) if (!(res = bd_claim(bdev, filp))) return 0; - blkdev_put(bdev); + blkdev_put(bdev, filp->f_mode); return res; } -static int __blkdev_put(struct block_device *bdev, int for_part) +static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) { int ret = 0; struct gendisk *disk = bdev->bd_disk; @@ -1184,7 +1184,7 @@ static int __blkdev_put(struct block_device *bdev, int for_part) } if (bdev->bd_contains == bdev) { if (disk->fops->release) - ret = disk->fops->release(disk, 0); + ret = disk->fops->release(disk, mode); } if (!bdev->bd_openers) { struct module *owner = disk->fops->owner; @@ -1203,13 +1203,13 @@ static int __blkdev_put(struct block_device *bdev, int for_part) mutex_unlock(&bdev->bd_mutex); bdput(bdev); if (victim) - __blkdev_put(victim, 1); + __blkdev_put(victim, mode, 1); return ret; } -int blkdev_put(struct block_device *bdev) +int blkdev_put(struct block_device *bdev, fmode_t mode) { - return __blkdev_put(bdev, 0); + return __blkdev_put(bdev, mode, 0); } EXPORT_SYMBOL(blkdev_put); @@ -1218,7 +1218,7 @@ static int blkdev_close(struct inode * inode, struct file * filp) struct block_device *bdev = I_BDEV(filp->f_mapping->host); if (bdev->bd_holder == filp) bd_release(bdev); - return blkdev_put(bdev); + return blkdev_put(bdev, filp->f_mode); } static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg) @@ -1343,7 +1343,7 @@ struct block_device *open_bdev_excl(const char *path, int flags, void *holder) return bdev; blkdev_put: - blkdev_put(bdev); + blkdev_put(bdev, mode); return ERR_PTR(error); } @@ -1359,7 +1359,7 @@ EXPORT_SYMBOL(open_bdev_excl); void close_bdev_excl(struct block_device *bdev) { bd_release(bdev); - blkdev_put(bdev); + blkdev_put(bdev, 0); /* move up in the next patches */ } EXPORT_SYMBOL(close_bdev_excl); diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 3a260af5544..15c38e69b69 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -347,7 +347,7 @@ fail: static int ext3_blkdev_put(struct block_device *bdev) { bd_release(bdev); - return blkdev_put(bdev); + return blkdev_put(bdev, FMODE_READ|FMODE_WRITE); } static int ext3_blkdev_remove(struct ext3_sb_info *sbi) @@ -2066,7 +2066,7 @@ static journal_t *ext3_get_dev_journal(struct super_block *sb, if (bd_claim(bdev, sb)) { printk(KERN_ERR "EXT3: failed to claim external journal device.\n"); - blkdev_put(bdev); + blkdev_put(bdev, FMODE_READ|FMODE_WRITE); return NULL; } diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 9b2b2bc4ec1..c12cf7a657a 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -399,7 +399,7 @@ fail: static int ext4_blkdev_put(struct block_device *bdev) { bd_release(bdev); - return blkdev_put(bdev); + return blkdev_put(bdev, FMODE_READ|FMODE_WRITE); } static int ext4_blkdev_remove(struct ext4_sb_info *sbi) @@ -2553,7 +2553,7 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb, if (bd_claim(bdev, sb)) { printk(KERN_ERR "EXT4: failed to claim external journal device.\n"); - blkdev_put(bdev); + blkdev_put(bdev, FMODE_READ|FMODE_WRITE); return NULL; } diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c index cd2ec2988b5..335c4de6552 100644 --- a/fs/jfs/jfs_logmgr.c +++ b/fs/jfs/jfs_logmgr.c @@ -1168,7 +1168,7 @@ journal_found: bd_release(bdev); close: /* close external log device */ - blkdev_put(bdev); + blkdev_put(bdev, FMODE_READ|FMODE_WRITE); free: /* free log descriptor */ mutex_unlock(&jfs_log_mutex); @@ -1514,7 +1514,7 @@ int lmLogClose(struct super_block *sb) rc = lmLogShutdown(log); bd_release(bdev); - blkdev_put(bdev); + blkdev_put(bdev, FMODE_READ|FMODE_WRITE); kfree(log); diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 7dce1612553..4b6fdf591ee 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -976,7 +976,7 @@ static void o2hb_region_release(struct config_item *item) } if (reg->hr_bdev) - blkdev_put(reg->hr_bdev); + blkdev_put(reg->hr_bdev, FMODE_READ|FMODE_WRITE); if (reg->hr_slots) kfree(reg->hr_slots); @@ -1358,7 +1358,7 @@ out: iput(inode); if (ret < 0) { if (reg->hr_bdev) { - blkdev_put(reg->hr_bdev); + blkdev_put(reg->hr_bdev, FMODE_READ|FMODE_WRITE); reg->hr_bdev = NULL; } } diff --git a/fs/partitions/check.c b/fs/partitions/check.c index cfb0c80690a..5a35ff2e1a9 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -488,7 +488,7 @@ void register_disk(struct gendisk *disk) err = blkdev_get(bdev, FMODE_READ, 0); if (err < 0) goto exit; - blkdev_put(bdev); + blkdev_put(bdev, FMODE_READ); exit: /* announce disk after possible partitions are created */ diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index b89d193a00d..3261518478f 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -2575,7 +2575,7 @@ static int release_journal_dev(struct super_block *super, if (journal->j_dev_bd != NULL) { if (journal->j_dev_bd->bd_dev != super->s_dev) bd_release(journal->j_dev_bd); - result = blkdev_put(journal->j_dev_bd); + result = blkdev_put(journal->j_dev_bd, 0); /* move up */ journal->j_dev_bd = NULL; } @@ -2618,7 +2618,7 @@ static int journal_init_dev(struct super_block *super, } else if (jdev != super->s_dev) { result = bd_claim(journal->j_dev_bd, journal); if (result) { - blkdev_put(journal->j_dev_bd); + blkdev_put(journal->j_dev_bd, blkdev_mode); return result; } diff --git a/include/linux/fs.h b/include/linux/fs.h index b5894604ba5..04c8dc41f45 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1722,7 +1722,7 @@ extern int blkdev_driver_ioctl(struct inode *inode, struct file *file, unsigned long arg); extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long); extern int blkdev_get(struct block_device *, fmode_t, unsigned); -extern int blkdev_put(struct block_device *); +extern int blkdev_put(struct block_device *, fmode_t); extern int bd_claim(struct block_device *, void *); extern void bd_release(struct block_device *); #ifdef CONFIG_SYSFS diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 80ccac849e4..7b9d611c110 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -178,7 +178,7 @@ static int swsusp_swap_check(void) /* This is called before saving image */ res = set_blocksize(resume_bdev, PAGE_SIZE); if (res < 0) - blkdev_put(resume_bdev); + blkdev_put(resume_bdev, FMODE_WRITE); return res; } @@ -574,7 +574,7 @@ int swsusp_read(unsigned int *flags_p) error = load_image(&handle, &snapshot, header->pages - 1); release_swap_reader(&handle); - blkdev_put(resume_bdev); + blkdev_put(resume_bdev, FMODE_READ); if (!error) pr_debug("PM: Image successfully loaded\n"); @@ -609,7 +609,7 @@ int swsusp_check(void) return -EINVAL; } if (error) - blkdev_put(resume_bdev); + blkdev_put(resume_bdev, FMODE_READ); else pr_debug("PM: Signature found, resuming\n"); } else { @@ -633,7 +633,7 @@ void swsusp_close(void) return; } - blkdev_put(resume_bdev); + blkdev_put(resume_bdev, 0); /* move up */ } static int swsusp_header_init(void) -- cgit v1.2.3-70-g09d2 From 30c40d2c01f68c7eb1a41ab3552bdaf5dbf300d4 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 22 Feb 2008 19:50:45 -0500 Subject: [PATCH] propagate mode through open_bdev_excl/close_bdev_excl replace open_bdev_excl/close_bdev_excl with variants taking fmode_t. superblock gets the value used to mount it stored in sb->s_mode Signed-off-by: Al Viro --- drivers/mtd/devices/block2mtd.c | 4 ++-- fs/block_dev.c | 24 +++++++++++------------- fs/reiserfs/journal.c | 3 ++- fs/super.c | 14 ++++++++++---- fs/xfs/linux-2.6/xfs_super.c | 4 ++-- include/linux/fs.h | 6 ++++-- 6 files changed, 31 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/drivers/mtd/devices/block2mtd.c b/drivers/mtd/devices/block2mtd.c index 91fbba76763..8c295f40d2a 100644 --- a/drivers/mtd/devices/block2mtd.c +++ b/drivers/mtd/devices/block2mtd.c @@ -224,7 +224,7 @@ static void block2mtd_free_device(struct block2mtd_dev *dev) if (dev->blkdev) { invalidate_mapping_pages(dev->blkdev->bd_inode->i_mapping, 0, -1); - close_bdev_excl(dev->blkdev); + close_bdev_exclusive(dev->blkdev, FMODE_READ|FMODE_WRITE); } kfree(dev); @@ -246,7 +246,7 @@ static struct block2mtd_dev *add_device(char *devname, int erase_size) return NULL; /* Get a handle on the device */ - bdev = open_bdev_excl(devname, O_RDWR, NULL); + bdev = open_bdev_exclusive(devname, FMODE_READ|FMODE_WRITE, NULL); #ifndef MODULE if (IS_ERR(bdev)) { diff --git a/fs/block_dev.c b/fs/block_dev.c index 05131baf3cf..4b595904cef 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1309,32 +1309,29 @@ fail: EXPORT_SYMBOL(lookup_bdev); /** - * open_bdev_excl - open a block device by name and set it up for use + * open_bdev_exclusive - open a block device by name and set it up for use * * @path: special file representing the block device - * @flags: %MS_RDONLY for opening read-only + * @mode: FMODE_... combination to pass be used * @holder: owner for exclusion * * Open the blockdevice described by the special file at @path, claim it * for the @holder. */ -struct block_device *open_bdev_excl(const char *path, int flags, void *holder) +struct block_device *open_bdev_exclusive(const char *path, fmode_t mode, void *holder) { struct block_device *bdev; - fmode_t mode = FMODE_READ; int error = 0; bdev = lookup_bdev(path); if (IS_ERR(bdev)) return bdev; - if (!(flags & MS_RDONLY)) - mode |= FMODE_WRITE; error = blkdev_get(bdev, mode, 0); if (error) return ERR_PTR(error); error = -EACCES; - if (!(flags & MS_RDONLY) && bdev_read_only(bdev)) + if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) goto blkdev_put; error = bd_claim(bdev, holder); if (error) @@ -1347,22 +1344,23 @@ blkdev_put: return ERR_PTR(error); } -EXPORT_SYMBOL(open_bdev_excl); +EXPORT_SYMBOL(open_bdev_exclusive); /** - * close_bdev_excl - release a blockdevice openen by open_bdev_excl() + * close_bdev_exclusive - close a blockdevice opened by open_bdev_exclusive() * * @bdev: blockdevice to close + * @mode: mode, must match that used to open. * - * This is the counterpart to open_bdev_excl(). + * This is the counterpart to open_bdev_exclusive(). */ -void close_bdev_excl(struct block_device *bdev) +void close_bdev_exclusive(struct block_device *bdev, fmode_t mode) { bd_release(bdev); - blkdev_put(bdev, 0); /* move up in the next patches */ + blkdev_put(bdev, mode); } -EXPORT_SYMBOL(close_bdev_excl); +EXPORT_SYMBOL(close_bdev_exclusive); int __invalidate_device(struct block_device *bdev) { diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 3261518478f..70b89607667 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -2628,7 +2628,8 @@ static int journal_init_dev(struct super_block *super, return 0; } - journal->j_dev_bd = open_bdev_excl(jdev_name, 0, journal); + journal->j_dev_bd = open_bdev_exclusive(jdev_name, + FMODE_READ|FMODE_WRITE, journal); if (IS_ERR(journal->j_dev_bd)) { result = PTR_ERR(journal->j_dev_bd); journal->j_dev_bd = NULL; diff --git a/fs/super.c b/fs/super.c index e931ae9511f..0d77ac20d03 100644 --- a/fs/super.c +++ b/fs/super.c @@ -760,9 +760,13 @@ int get_sb_bdev(struct file_system_type *fs_type, { struct block_device *bdev; struct super_block *s; + fmode_t mode = FMODE_READ; int error = 0; - bdev = open_bdev_excl(dev_name, flags, fs_type); + if (!(flags & MS_RDONLY)) + mode |= FMODE_WRITE; + + bdev = open_bdev_exclusive(dev_name, mode, fs_type); if (IS_ERR(bdev)) return PTR_ERR(bdev); @@ -785,11 +789,12 @@ int get_sb_bdev(struct file_system_type *fs_type, goto error_bdev; } - close_bdev_excl(bdev); + close_bdev_exclusive(bdev, mode); } else { char b[BDEVNAME_SIZE]; s->s_flags = flags; + s->s_mode = mode; strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); sb_set_blocksize(s, block_size(bdev)); error = fill_super(s, data, flags & MS_SILENT ? 1 : 0); @@ -807,7 +812,7 @@ int get_sb_bdev(struct file_system_type *fs_type, error_s: error = PTR_ERR(s); error_bdev: - close_bdev_excl(bdev); + close_bdev_exclusive(bdev, mode); error: return error; } @@ -817,10 +822,11 @@ EXPORT_SYMBOL(get_sb_bdev); void kill_block_super(struct super_block *sb) { struct block_device *bdev = sb->s_bdev; + fmode_t mode = sb->s_mode; generic_shutdown_super(sb); sync_blockdev(bdev); - close_bdev_excl(bdev); + close_bdev_exclusive(bdev, mode); } EXPORT_SYMBOL(kill_block_super); diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index e39013619b2..37ebe36056e 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -589,7 +589,7 @@ xfs_blkdev_get( { int error = 0; - *bdevp = open_bdev_excl(name, 0, mp); + *bdevp = open_bdev_exclusive(name, FMODE_READ|FMODE_WRITE, mp); if (IS_ERR(*bdevp)) { error = PTR_ERR(*bdevp); printk("XFS: Invalid device [%s], error=%d\n", name, error); @@ -603,7 +603,7 @@ xfs_blkdev_put( struct block_device *bdev) { if (bdev) - close_bdev_excl(bdev); + close_bdev_exclusive(bdev, FMODE_READ|FMODE_WRITE); } /* diff --git a/include/linux/fs.h b/include/linux/fs.h index 04c8dc41f45..c6766314dc5 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1157,6 +1157,7 @@ struct super_block { char s_id[32]; /* Informational name */ void *s_fs_info; /* Filesystem private info */ + fmode_t s_mode; /* * The next field is for VFS *only*. No filesystems have any business @@ -1753,9 +1754,10 @@ extern void chrdev_show(struct seq_file *,off_t); extern const char *__bdevname(dev_t, char *buffer); extern const char *bdevname(struct block_device *bdev, char *buffer); extern struct block_device *lookup_bdev(const char *); -extern struct block_device *open_bdev_excl(const char *, int, void *); -extern void close_bdev_excl(struct block_device *); +extern struct block_device *open_bdev_exclusive(const char *, fmode_t, void *); +extern void close_bdev_exclusive(struct block_device *, fmode_t); extern void blkdev_show(struct seq_file *,off_t); + #else #define BLKDEV_MAJOR_HASH_SIZE 0 #endif -- cgit v1.2.3-70-g09d2 From e5eb8caa83a76191feb9705c1a0a689ca260b91e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 8 Oct 2007 13:24:05 -0400 Subject: [PATCH] remember mode of reiserfs journal Signed-off-by: Al Viro --- fs/reiserfs/journal.c | 6 ++++-- include/linux/reiserfs_fs_sb.h | 1 + 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 70b89607667..9643c3bbeb3 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -2575,7 +2575,7 @@ static int release_journal_dev(struct super_block *super, if (journal->j_dev_bd != NULL) { if (journal->j_dev_bd->bd_dev != super->s_dev) bd_release(journal->j_dev_bd); - result = blkdev_put(journal->j_dev_bd, 0); /* move up */ + result = blkdev_put(journal->j_dev_bd, journal->j_dev_mode); journal->j_dev_bd = NULL; } @@ -2608,6 +2608,7 @@ static int journal_init_dev(struct super_block *super, /* there is no "jdev" option and journal is on separate device */ if ((!jdev_name || !jdev_name[0])) { journal->j_dev_bd = open_by_devnum(jdev, blkdev_mode); + journal->j_dev_mode = blkdev_mode; if (IS_ERR(journal->j_dev_bd)) { result = PTR_ERR(journal->j_dev_bd); journal->j_dev_bd = NULL; @@ -2628,8 +2629,9 @@ static int journal_init_dev(struct super_block *super, return 0; } + journal->j_dev_mode = blkdev_mode; journal->j_dev_bd = open_bdev_exclusive(jdev_name, - FMODE_READ|FMODE_WRITE, journal); + blkdev_mode, journal); if (IS_ERR(journal->j_dev_bd)) { result = PTR_ERR(journal->j_dev_bd); journal->j_dev_bd = NULL; diff --git a/include/linux/reiserfs_fs_sb.h b/include/linux/reiserfs_fs_sb.h index 315517e8bfa..bda6b562a1e 100644 --- a/include/linux/reiserfs_fs_sb.h +++ b/include/linux/reiserfs_fs_sb.h @@ -178,6 +178,7 @@ struct reiserfs_journal { struct reiserfs_journal_cnode *j_first; /* oldest journal block. start here for traverse */ struct block_device *j_dev_bd; + fmode_t j_dev_mode; int j_1st_reserved_block; /* first block on s_dev of reserved area journal */ unsigned long j_state; -- cgit v1.2.3-70-g09d2 From 572c48921574dbe6dceb958cf965aa962baefde4 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 8 Oct 2007 13:24:05 -0400 Subject: [PATCH] sanitize blkdev_get() and friends * get rid of fake struct file/struct dentry in __blkdev_get() * merge __blkdev_get() and do_open() * get rid of flags argument of blkdev_get() Signed-off-by: Al Viro --- drivers/block/pktcdvd.c | 4 +-- drivers/char/raw.c | 2 +- drivers/s390/block/dasd_genhd.c | 2 +- fs/block_dev.c | 65 ++++++++++++++--------------------------- fs/ocfs2/cluster/heartbeat.c | 2 +- fs/partitions/check.c | 2 +- include/linux/fs.h | 2 +- kernel/power/swap.c | 2 +- 8 files changed, 30 insertions(+), 51 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index ce8c7190192..f20bf359b84 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2332,7 +2332,7 @@ static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write) * so bdget() can't fail. */ bdget(pd->bdev->bd_dev); - if ((ret = blkdev_get(pd->bdev, FMODE_READ, O_RDONLY))) + if ((ret = blkdev_get(pd->bdev, FMODE_READ))) goto out; if ((ret = bd_claim(pd->bdev, pd))) @@ -2765,7 +2765,7 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev) bdev = bdget(dev); if (!bdev) return -ENOMEM; - ret = blkdev_get(bdev, FMODE_READ, O_RDONLY | O_NONBLOCK); + ret = blkdev_get(bdev, FMODE_READ | FMODE_NDELAY); if (ret) return ret; diff --git a/drivers/char/raw.c b/drivers/char/raw.c index bfd59e6bf54..f3cf5eb9b7f 100644 --- a/drivers/char/raw.c +++ b/drivers/char/raw.c @@ -65,7 +65,7 @@ static int raw_open(struct inode *inode, struct file *filp) if (!bdev) goto out; igrab(bdev->bd_inode); - err = blkdev_get(bdev, filp->f_mode, 0); + err = blkdev_get(bdev, filp->f_mode); if (err) goto out; err = bd_claim(bdev, raw_open); diff --git a/drivers/s390/block/dasd_genhd.c b/drivers/s390/block/dasd_genhd.c index 3c1b6915c9a..e99d566b69c 100644 --- a/drivers/s390/block/dasd_genhd.c +++ b/drivers/s390/block/dasd_genhd.c @@ -99,7 +99,7 @@ int dasd_scan_partitions(struct dasd_block *block) struct block_device *bdev; bdev = bdget_disk(block->gdp, 0); - if (!bdev || blkdev_get(bdev, FMODE_READ, 1) < 0) + if (!bdev || blkdev_get(bdev, FMODE_READ) < 0) return -ENODEV; /* * See fs/partition/check.c:register_disk,rescan_partitions diff --git a/fs/block_dev.c b/fs/block_dev.c index 4b595904cef..b89c956e04f 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -844,9 +844,8 @@ struct block_device *open_by_devnum(dev_t dev, fmode_t mode) { struct block_device *bdev = bdget(dev); int err = -ENOMEM; - int flags = mode & FMODE_WRITE ? O_RDWR : O_RDONLY; if (bdev) - err = blkdev_get(bdev, mode, flags); + err = blkdev_get(bdev, mode); return err ? ERR_PTR(err) : bdev; } @@ -975,8 +974,6 @@ void bd_set_size(struct block_device *bdev, loff_t size) } EXPORT_SYMBOL(bd_set_size); -static int __blkdev_get(struct block_device *bdev, fmode_t mode, unsigned flags, - int for_part); static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part); /* @@ -986,7 +983,7 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part); * mutex_lock_nested(whole->bd_mutex, 1) */ -static int do_open(struct block_device *bdev, struct file *file, int for_part) +static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) { struct gendisk *disk; struct hd_struct *part = NULL; @@ -994,9 +991,9 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) int partno; int perm = 0; - if (file->f_mode & FMODE_READ) + if (mode & FMODE_READ) perm |= MAY_READ; - if (file->f_mode & FMODE_WRITE) + if (mode & FMODE_WRITE) perm |= MAY_WRITE; /* * hooks: /n/, see "layering violations". @@ -1007,15 +1004,7 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) return ret; } - if (file->f_flags & O_NDELAY) - file->f_mode |= FMODE_NDELAY; - if (file->f_flags & O_EXCL) - file->f_mode |= FMODE_EXCL; - if ((file->f_flags & O_ACCMODE) == 3) - file->f_mode |= FMODE_WRITE_IOCTL; - ret = -ENXIO; - file->f_mapping = bdev->bd_inode->i_mapping; lock_kernel(); @@ -1034,7 +1023,7 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) if (!partno) { struct backing_dev_info *bdi; if (disk->fops->open) { - ret = disk->fops->open(bdev, file->f_mode); + ret = disk->fops->open(bdev, mode); if (ret) goto out_clear; } @@ -1054,7 +1043,7 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) if (!whole) goto out_clear; BUG_ON(for_part); - ret = __blkdev_get(whole, file->f_mode, file->f_flags, 1); + ret = __blkdev_get(whole, mode, 1); if (ret) goto out_clear; bdev->bd_contains = whole; @@ -1075,7 +1064,7 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) disk = NULL; if (bdev->bd_contains == bdev) { if (bdev->bd_disk->fops->open) { - ret = bdev->bd_disk->fops->open(bdev, file->f_mode); + ret = bdev->bd_disk->fops->open(bdev, mode); if (ret) goto out_unlock_bdev; } @@ -1095,7 +1084,7 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) bdev->bd_part = NULL; bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; if (bdev != bdev->bd_contains) - __blkdev_put(bdev->bd_contains, file->f_mode, 1); + __blkdev_put(bdev->bd_contains, mode, 1); bdev->bd_contains = NULL; out_unlock_bdev: mutex_unlock(&bdev->bd_mutex); @@ -1111,28 +1100,9 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) return ret; } -static int __blkdev_get(struct block_device *bdev, fmode_t mode, unsigned flags, - int for_part) -{ - /* - * This crockload is due to bad choice of ->open() type. - * It will go away. - * For now, block device ->open() routine must _not_ - * examine anything in 'inode' argument except ->i_rdev. - */ - struct file fake_file = {}; - struct dentry fake_dentry = {}; - fake_file.f_mode = mode; - fake_file.f_flags = flags; - fake_file.f_path.dentry = &fake_dentry; - fake_dentry.d_inode = bdev->bd_inode; - - return do_open(bdev, &fake_file, for_part); -} - -int blkdev_get(struct block_device *bdev, fmode_t mode, unsigned flags) +int blkdev_get(struct block_device *bdev, fmode_t mode) { - return __blkdev_get(bdev, mode, flags, 0); + return __blkdev_get(bdev, mode, 0); } EXPORT_SYMBOL(blkdev_get); @@ -1149,15 +1119,24 @@ static int blkdev_open(struct inode * inode, struct file * filp) */ filp->f_flags |= O_LARGEFILE; + if (filp->f_flags & O_NDELAY) + filp->f_mode |= FMODE_NDELAY; + if (filp->f_flags & O_EXCL) + filp->f_mode |= FMODE_EXCL; + if ((filp->f_flags & O_ACCMODE) == 3) + filp->f_mode |= FMODE_WRITE_IOCTL; + bdev = bd_acquire(inode); if (bdev == NULL) return -ENOMEM; - res = do_open(bdev, filp, 0); + filp->f_mapping = bdev->bd_inode->i_mapping; + + res = blkdev_get(bdev, filp->f_mode); if (res) return res; - if (!(filp->f_flags & O_EXCL) ) + if (!(filp->f_mode & FMODE_EXCL)) return 0; if (!(res = bd_claim(bdev, filp))) @@ -1327,7 +1306,7 @@ struct block_device *open_bdev_exclusive(const char *path, fmode_t mode, void *h if (IS_ERR(bdev)) return bdev; - error = blkdev_get(bdev, mode, 0); + error = blkdev_get(bdev, mode); if (error) return ERR_PTR(error); error = -EACCES; diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 4b6fdf591ee..6ebaa58e2c0 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -1268,7 +1268,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, goto out; reg->hr_bdev = I_BDEV(filp->f_mapping->host); - ret = blkdev_get(reg->hr_bdev, FMODE_WRITE | FMODE_READ, 0); + ret = blkdev_get(reg->hr_bdev, FMODE_WRITE | FMODE_READ); if (ret) { reg->hr_bdev = NULL; goto out; diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 5a35ff2e1a9..633f7a0ebb2 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -485,7 +485,7 @@ void register_disk(struct gendisk *disk) goto exit; bdev->bd_invalidated = 1; - err = blkdev_get(bdev, FMODE_READ, 0); + err = blkdev_get(bdev, FMODE_READ); if (err < 0) goto exit; blkdev_put(bdev, FMODE_READ); diff --git a/include/linux/fs.h b/include/linux/fs.h index c6766314dc5..cb78e389699 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1722,7 +1722,7 @@ extern int blkdev_driver_ioctl(struct inode *inode, struct file *file, struct gendisk *disk, unsigned cmd, unsigned long arg); extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long); -extern int blkdev_get(struct block_device *, fmode_t, unsigned); +extern int blkdev_get(struct block_device *, fmode_t); extern int blkdev_put(struct block_device *, fmode_t); extern int bd_claim(struct block_device *, void *); extern void bd_release(struct block_device *); diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 178b001a4f1..b7713b53d07 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -172,7 +172,7 @@ static int swsusp_swap_check(void) /* This is called before saving image */ return res; root_swap = res; - res = blkdev_get(resume_bdev, FMODE_WRITE, O_RDWR); + res = blkdev_get(resume_bdev, FMODE_WRITE); if (res) return res; -- cgit v1.2.3-70-g09d2 From e436fdae70a31102d2be32969b80fe8545edebd9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 18 Sep 2008 03:38:12 -0400 Subject: [PATCH] get rid of blkdev_driver_ioctl() convert remaining callers to __blkdev_driver_ioctl() Signed-off-by: Al Viro --- block/ioctl.c | 29 ++++++++++------------------- include/linux/fs.h | 3 --- 2 files changed, 10 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/block/ioctl.c b/block/ioctl.c index 0db89f95b15..b4e0abed1b4 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -265,21 +265,6 @@ static int blkdev_locked_ioctl(struct file *file, struct block_device *bdev, return -ENOIOCTLCMD; } -int blkdev_driver_ioctl(struct inode *inode, struct file *file, - struct gendisk *disk, unsigned cmd, unsigned long arg) -{ - int ret; - fmode_t mode = 0; - if (file) { - mode = file->f_mode; - if (file->f_flags & O_NDELAY) - mode |= FMODE_NDELAY_NOW; - } - - return __blkdev_driver_ioctl(inode->i_bdev, mode, cmd, arg); -} -EXPORT_SYMBOL_GPL(blkdev_driver_ioctl); - int __blkdev_driver_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, unsigned long arg) { @@ -315,13 +300,19 @@ int blkdev_ioctl(struct inode *inode, struct file *file, unsigned cmd, struct block_device *bdev = inode->i_bdev; struct gendisk *disk = bdev->bd_disk; int ret, n; + fmode_t mode = 0; + if (file) { + mode = file->f_mode; + if (file->f_flags & O_NDELAY) + mode |= FMODE_NDELAY_NOW; + } switch(cmd) { case BLKFLSBUF: if (!capable(CAP_SYS_ADMIN)) return -EACCES; - ret = blkdev_driver_ioctl(inode, file, disk, cmd, arg); + ret = __blkdev_driver_ioctl(bdev, mode, cmd, arg); /* -EINVAL to handle old uncorrected drivers */ if (ret != -EINVAL && ret != -ENOTTY) return ret; @@ -333,7 +324,7 @@ int blkdev_ioctl(struct inode *inode, struct file *file, unsigned cmd, return 0; case BLKROSET: - ret = blkdev_driver_ioctl(inode, file, disk, cmd, arg); + ret = __blkdev_driver_ioctl(bdev, mode, cmd, arg); /* -EINVAL to handle old uncorrected drivers */ if (ret != -EINVAL && ret != -ENOTTY) return ret; @@ -349,7 +340,7 @@ int blkdev_ioctl(struct inode *inode, struct file *file, unsigned cmd, case BLKDISCARD: { uint64_t range[2]; - if (!(file->f_mode & FMODE_WRITE)) + if (!(mode & FMODE_WRITE)) return -EBADF; if (copy_from_user(range, (void __user *)arg, sizeof(range))) @@ -387,6 +378,6 @@ int blkdev_ioctl(struct inode *inode, struct file *file, unsigned cmd, if (ret != -ENOIOCTLCMD) return ret; - return blkdev_driver_ioctl(inode, file, disk, cmd, arg); + ret = __blkdev_driver_ioctl(bdev, mode, cmd, arg); } EXPORT_SYMBOL_GPL(blkdev_ioctl); diff --git a/include/linux/fs.h b/include/linux/fs.h index cb78e389699..11de682c65a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1718,9 +1718,6 @@ extern const struct file_operations def_fifo_fops; #ifdef CONFIG_BLOCK extern int ioctl_by_bdev(struct block_device *, unsigned, unsigned long); extern int blkdev_ioctl(struct inode *, struct file *, unsigned, unsigned long); -extern int blkdev_driver_ioctl(struct inode *inode, struct file *file, - struct gendisk *disk, unsigned cmd, - unsigned long arg); extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long); extern int blkdev_get(struct block_device *, fmode_t); extern int blkdev_put(struct block_device *, fmode_t); -- cgit v1.2.3-70-g09d2 From 56b26add02b4bdea81d5e0ebda60db1fe3311ad4 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 19 Sep 2008 03:17:36 -0400 Subject: [PATCH] kill the rest of struct file propagation in block ioctls Now we can switch blkdev_ioctl() block_device/mode Signed-off-by: Al Viro --- block/compat_ioctl.c | 10 +++++----- block/ioctl.c | 9 +-------- drivers/char/raw.c | 2 +- fs/block_dev.c | 8 ++++++-- include/linux/fs.h | 2 +- 5 files changed, 14 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c index 5b3db0640d8..3098c92402f 100644 --- a/block/compat_ioctl.c +++ b/block/compat_ioctl.c @@ -177,7 +177,7 @@ struct compat_blkpg_ioctl_arg { compat_caddr_t data; }; -static int compat_blkpg_ioctl(struct inode *inode, struct file *file, +static int compat_blkpg_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, struct compat_blkpg_ioctl_arg __user *ua32) { struct blkpg_ioctl_arg __user *a = compat_alloc_user_space(sizeof(*a)); @@ -196,7 +196,7 @@ static int compat_blkpg_ioctl(struct inode *inode, struct file *file, if (err) return err; - return blkdev_ioctl(inode, file, cmd, (unsigned long)a); + return blkdev_ioctl(bdev, mode, cmd, (unsigned long)a); } #define BLKBSZGET_32 _IOR(0x12, 112, int) @@ -715,13 +715,13 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) * but we call blkdev_ioctl, which gets the lock for us */ case BLKRRPART: - return blkdev_ioctl(inode, file, cmd, + return blkdev_ioctl(bdev, mode, cmd, (unsigned long)compat_ptr(arg)); case BLKBSZSET_32: - return blkdev_ioctl(inode, file, BLKBSZSET, + return blkdev_ioctl(bdev, mode, BLKBSZSET, (unsigned long)compat_ptr(arg)); case BLKPG: - return compat_blkpg_ioctl(inode, file, cmd, compat_ptr(arg)); + return compat_blkpg_ioctl(bdev, mode, cmd, compat_ptr(arg)); case BLKRAGET: case BLKFRAGET: if (!arg) diff --git a/block/ioctl.c b/block/ioctl.c index 14b7f2c1066..c832d639b6e 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -230,20 +230,13 @@ EXPORT_SYMBOL_GPL(__blkdev_driver_ioctl); * always keep this in sync with compat_blkdev_ioctl() and * compat_blkdev_locked_ioctl() */ -int blkdev_ioctl(struct inode *inode, struct file *file, unsigned cmd, +int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, unsigned long arg) { - struct block_device *bdev = inode->i_bdev; struct gendisk *disk = bdev->bd_disk; struct backing_dev_info *bdi; loff_t size; int ret, n; - fmode_t mode = 0; - if (file) { - mode = file->f_mode; - if (file->f_flags & O_NDELAY) - mode |= FMODE_NDELAY_NOW; - } switch(cmd) { case BLKFLSBUF: diff --git a/drivers/char/raw.c b/drivers/char/raw.c index f3cf5eb9b7f..96adf28a17e 100644 --- a/drivers/char/raw.c +++ b/drivers/char/raw.c @@ -125,7 +125,7 @@ raw_ioctl(struct inode *inode, struct file *filp, { struct block_device *bdev = filp->private_data; - return blkdev_ioctl(bdev->bd_inode, NULL, command, arg); + return blkdev_ioctl(bdev, 0, command, arg); } static void bind_device(struct raw_config_request *rq) diff --git a/fs/block_dev.c b/fs/block_dev.c index b89c956e04f..05865b93f7e 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1202,7 +1202,11 @@ static int blkdev_close(struct inode * inode, struct file * filp) static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg) { - return blkdev_ioctl(file->f_mapping->host, file, cmd, arg); + struct block_device *bdev = I_BDEV(file->f_mapping->host); + fmode_t mode = file->f_mode; + if (file->f_flags & O_NDELAY) + mode |= FMODE_NDELAY_NOW; + return blkdev_ioctl(bdev, mode, cmd, arg); } static const struct address_space_operations def_blk_aops = { @@ -1238,7 +1242,7 @@ int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg) int res; mm_segment_t old_fs = get_fs(); set_fs(KERNEL_DS); - res = blkdev_ioctl(bdev->bd_inode, NULL, cmd, arg); + res = blkdev_ioctl(bdev, 0, cmd, arg); set_fs(old_fs); return res; } diff --git a/include/linux/fs.h b/include/linux/fs.h index 11de682c65a..ff536e106b4 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1717,7 +1717,7 @@ extern const struct file_operations bad_sock_fops; extern const struct file_operations def_fifo_fops; #ifdef CONFIG_BLOCK extern int ioctl_by_bdev(struct block_device *, unsigned, unsigned long); -extern int blkdev_ioctl(struct inode *, struct file *, unsigned, unsigned long); +extern int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long); extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long); extern int blkdev_get(struct block_device *, fmode_t); extern int blkdev_put(struct block_device *, fmode_t); -- cgit v1.2.3-70-g09d2 From e9f95e637320efe1936b647308ddf4ec5b8e0311 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 21 Oct 2008 15:49:59 +0200 Subject: genirq: fix off by one and coding style Fix off-by-one in for_each_irq_desc_reverse(). Impact is near zero in practice, because nothing substantial wants to iterate down to IRQ#0 - but fix it nevertheless. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/irq.c | 4 ++-- include/linux/irqnr.h | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index ccf6c503fc3..d1d4dc52f64 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -36,7 +36,7 @@ void ack_bad_irq(unsigned int irq) } #ifdef CONFIG_X86_32 -# define irq_stats(x) (&per_cpu(irq_stat,x)) +# define irq_stats(x) (&per_cpu(irq_stat, x)) #else # define irq_stats(x) cpu_pda(x) #endif @@ -113,7 +113,7 @@ int show_interrupts(struct seq_file *p, void *v) if (i == 0) { seq_printf(p, " "); for_each_online_cpu(j) - seq_printf(p, "CPU%-8d",j); + seq_printf(p, "CPU%-8d", j); seq_putc(p, '\n'); } diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h index 3171ddc3b39..452c280c811 100644 --- a/include/linux/irqnr.h +++ b/include/linux/irqnr.h @@ -13,9 +13,9 @@ extern int nr_irqs; # define for_each_irq_desc(irq, desc) \ for (irq = 0, desc = irq_desc; irq < nr_irqs; irq++, desc++) -# define for_each_irq_desc_reverse(irq, desc) \ - for (irq = nr_irqs -1, desc = irq_desc + (nr_irqs -1 ); \ - irq > 0; irq--, desc--) +# define for_each_irq_desc_reverse(irq, desc) \ + for (irq = nr_irqs - 1, desc = irq_desc + (nr_irqs - 1); \ + irq >= 0; irq--, desc--) #endif #define for_each_irq_nr(irq) \ -- cgit v1.2.3-70-g09d2 From 733d710b09748a79f70cbb58547d036d28ec566e Mon Sep 17 00:00:00 2001 From: Sergio Aguirre Date: Sat, 18 Oct 2008 12:26:47 -0300 Subject: V4L/DVB (9320): v4l2: Add 10-bit RAW Bayer formats Add 10-bit raw bayer format expanded to 16 bits. Adds also definition for 10-bit raw bayer format dpcm-compressed to 8 bits. Signed-off-by: Sergio Aguirre Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/linux/videodev2.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h index d4b03034ee7..4669d7e72e7 100644 --- a/include/linux/videodev2.h +++ b/include/linux/videodev2.h @@ -315,6 +315,13 @@ struct v4l2_pix_format { /* see http://www.siliconimaging.com/RGB%20Bayer.htm */ #define V4L2_PIX_FMT_SBGGR8 v4l2_fourcc('B', 'A', '8', '1') /* 8 BGBG.. GRGR.. */ #define V4L2_PIX_FMT_SGBRG8 v4l2_fourcc('G', 'B', 'R', 'G') /* 8 GBGB.. RGRG.. */ +/* + * 10bit raw bayer, expanded to 16 bits + * xxxxrrrrrrrrrrxxxxgggggggggg xxxxggggggggggxxxxbbbbbbbbbb... + */ +#define V4L2_PIX_FMT_SGRBG10 v4l2_fourcc('B', 'A', '1', '0') +/* 10bit raw bayer DPCM compressed to 8 bits */ +#define V4L2_PIX_FMT_SGRBG10DPCM8 v4l2_fourcc('B', 'D', '1', '0') #define V4L2_PIX_FMT_SBGGR16 v4l2_fourcc('B', 'Y', 'R', '2') /* 16 BGBG.. GRGR.. */ /* compressed formats */ -- cgit v1.2.3-70-g09d2 From d63a5ce3c0d25c96bdadc78792e5b48b846e899d Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 21 Oct 2008 17:44:57 +0100 Subject: dm: publish array_too_big Move array_too_big to include/linux/device-mapper.h because it is used by targets. Remove the test from dm-raid1 as the number of mirror legs is limited such that it can never fail. (Even for stripes it seems rather unlikely.) Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm-raid1.c | 3 --- drivers/md/dm-stripe.c | 4 ++-- drivers/md/dm.h | 9 --------- include/linux/device-mapper.h | 3 +++ 4 files changed, 5 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 29913e42c4a..ecfd82169cb 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -1315,9 +1315,6 @@ static struct mirror_set *alloc_context(unsigned int nr_mirrors, size_t len; struct mirror_set *ms = NULL; - if (array_too_big(sizeof(*ms), sizeof(ms->mirror[0]), nr_mirrors)) - return NULL; - len = sizeof(*ms) + (sizeof(ms->mirror[0]) * nr_mirrors); ms = kzalloc(len, GFP_KERNEL); diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index b745d8ac625..287e2458473 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -60,8 +60,8 @@ static inline struct stripe_c *alloc_context(unsigned int stripes) { size_t len; - if (array_too_big(sizeof(struct stripe_c), sizeof(struct stripe), - stripes)) + if (dm_array_too_big(sizeof(struct stripe_c), sizeof(struct stripe), + stripes)) return NULL; len = sizeof(struct stripe_c) + (sizeof(struct stripe) * stripes); diff --git a/drivers/md/dm.h b/drivers/md/dm.h index cd189da2b2f..0ade60cdef4 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -62,15 +62,6 @@ void dm_put_target_type(struct target_type *t); int dm_target_iterate(void (*iter_func)(struct target_type *tt, void *param), void *param); -/*----------------------------------------------------------------- - * Useful inlines. - *---------------------------------------------------------------*/ -static inline int array_too_big(unsigned long fixed, unsigned long obj, - unsigned long num) -{ - return (num > (ULONG_MAX - fixed) / obj); -} - int dm_split_args(int *argc, char ***argvp, char *input); /* diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 08d783592b7..dfb30db475e 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -354,6 +354,9 @@ void *dm_vcalloc(unsigned long nmemb, unsigned long elem_size); */ #define dm_round_up(n, sz) (dm_div_up((n), (sz)) * (sz)) +#define dm_array_too_big(fixed, obj, num) \ + ((num) > (UINT_MAX - (fixed)) / (obj)) + static inline sector_t to_sector(unsigned long n) { return (n >> SECTOR_SHIFT); -- cgit v1.2.3-70-g09d2 From 1f965b19437017cea6d3f3f46acdc5acae5fd011 Mon Sep 17 00:00:00 2001 From: Heinz Mauelshagen Date: Tue, 21 Oct 2008 17:45:06 +0100 Subject: dm raid1: separate region_hash interface part1 Separate the region hash code from raid1 so it can be shared by forthcoming targets. Use BUG_ON() for failed async dm_io() calls. Signed-off-by: Heinz Mauelshagen Signed-off-by: Alasdair G Kergon --- drivers/md/Makefile | 2 +- drivers/md/dm-raid1.c | 789 ++++++----------------------------------- drivers/md/dm-region-hash.c | 704 ++++++++++++++++++++++++++++++++++++ include/linux/dm-region-hash.h | 104 ++++++ 4 files changed, 912 insertions(+), 687 deletions(-) create mode 100644 drivers/md/dm-region-hash.c create mode 100644 include/linux/dm-region-hash.h (limited to 'include/linux') diff --git a/drivers/md/Makefile b/drivers/md/Makefile index f1ef33dfd8c..1c615804ea7 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile @@ -34,7 +34,7 @@ obj-$(CONFIG_DM_CRYPT) += dm-crypt.o obj-$(CONFIG_DM_DELAY) += dm-delay.o obj-$(CONFIG_DM_MULTIPATH) += dm-multipath.o dm-round-robin.o obj-$(CONFIG_DM_SNAPSHOT) += dm-snapshot.o -obj-$(CONFIG_DM_MIRROR) += dm-mirror.o dm-log.o +obj-$(CONFIG_DM_MIRROR) += dm-mirror.o dm-log.o dm-region-hash.o obj-$(CONFIG_DM_ZERO) += dm-zero.o quiet_cmd_unroll = UNROLL $@ diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index f358853af5c..92dcc06832a 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -1,118 +1,36 @@ /* * Copyright (C) 2003 Sistina Software Limited. + * Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved. * * This file is released under the GPL. */ -#include - #include "dm-bio-list.h" #include "dm-bio-record.h" -#include #include #include #include #include #include -#include -#include #include -#include -#include +#include #include #include #include +#include #define DM_MSG_PREFIX "raid1" + +#define MAX_RECOVERY 1 /* Maximum number of regions recovered in parallel. */ #define DM_IO_PAGES 64 +#define DM_KCOPYD_PAGES 64 #define DM_RAID1_HANDLE_ERRORS 0x01 #define errors_handled(p) ((p)->features & DM_RAID1_HANDLE_ERRORS) static DECLARE_WAIT_QUEUE_HEAD(_kmirrord_recovery_stopped); -/*----------------------------------------------------------------- - * Region hash - * - * The mirror splits itself up into discrete regions. Each - * region can be in one of three states: clean, dirty, - * nosync. There is no need to put clean regions in the hash. - * - * In addition to being present in the hash table a region _may_ - * be present on one of three lists. - * - * clean_regions: Regions on this list have no io pending to - * them, they are in sync, we are no longer interested in them, - * they are dull. rh_update_states() will remove them from the - * hash table. - * - * quiesced_regions: These regions have been spun down, ready - * for recovery. rh_recovery_start() will remove regions from - * this list and hand them to kmirrord, which will schedule the - * recovery io with kcopyd. - * - * recovered_regions: Regions that kcopyd has successfully - * recovered. rh_update_states() will now schedule any delayed - * io, up the recovery_count, and remove the region from the - * hash. - * - * There are 2 locks: - * A rw spin lock 'hash_lock' protects just the hash table, - * this is never held in write mode from interrupt context, - * which I believe means that we only have to disable irqs when - * doing a write lock. - * - * An ordinary spin lock 'region_lock' that protects the three - * lists in the region_hash, with the 'state', 'list' and - * 'bhs_delayed' fields of the regions. This is used from irq - * context, so all other uses will have to suspend local irqs. - *---------------------------------------------------------------*/ -struct mirror_set; -struct region_hash { - struct mirror_set *ms; - uint32_t region_size; - unsigned region_shift; - - /* holds persistent region state */ - struct dm_dirty_log *log; - - /* hash table */ - rwlock_t hash_lock; - mempool_t *region_pool; - unsigned int mask; - unsigned int nr_buckets; - struct list_head *buckets; - - spinlock_t region_lock; - atomic_t recovery_in_flight; - struct semaphore recovery_count; - struct list_head clean_regions; - struct list_head quiesced_regions; - struct list_head recovered_regions; - struct list_head failed_recovered_regions; -}; - -enum { - RH_CLEAN, - RH_DIRTY, - RH_NOSYNC, - RH_RECOVERING -}; - -struct region { - struct region_hash *rh; /* FIXME: can we get rid of this ? */ - region_t key; - int state; - - struct list_head hash_list; - struct list_head list; - - atomic_t pending; - struct bio_list delayed_bios; -}; - - /*----------------------------------------------------------------- * Mirror set structures. *---------------------------------------------------------------*/ @@ -133,8 +51,7 @@ struct mirror { struct mirror_set { struct dm_target *ti; struct list_head list; - struct region_hash rh; - struct dm_kcopyd_client *kcopyd_client; + uint64_t features; spinlock_t lock; /* protects the lists */ @@ -142,6 +59,8 @@ struct mirror_set { struct bio_list writes; struct bio_list failures; + struct dm_region_hash *rh; + struct dm_kcopyd_client *kcopyd_client; struct dm_io_client *io_client; mempool_t *read_record_pool; @@ -160,25 +79,14 @@ struct mirror_set { struct work_struct trigger_event; - unsigned int nr_mirrors; + unsigned nr_mirrors; struct mirror mirror[0]; }; -/* - * Conversion fns - */ -static inline region_t bio_to_region(struct region_hash *rh, struct bio *bio) +static void wakeup_mirrord(void *context) { - return (bio->bi_sector - rh->ms->ti->begin) >> rh->region_shift; -} + struct mirror_set *ms = context; -static inline sector_t region_to_sector(struct region_hash *rh, region_t region) -{ - return region << rh->region_shift; -} - -static void wake(struct mirror_set *ms) -{ queue_work(ms->kmirrord_wq, &ms->kmirrord_work); } @@ -187,7 +95,7 @@ static void delayed_wake_fn(unsigned long data) struct mirror_set *ms = (struct mirror_set *) data; clear_bit(0, &ms->timer_pending); - wake(ms); + wakeup_mirrord(ms); } static void delayed_wake(struct mirror_set *ms) @@ -201,473 +109,34 @@ static void delayed_wake(struct mirror_set *ms) add_timer(&ms->timer); } -/* FIXME move this */ -static void queue_bio(struct mirror_set *ms, struct bio *bio, int rw); - -#define MIN_REGIONS 64 -#define MAX_RECOVERY 1 -static int rh_init(struct region_hash *rh, struct mirror_set *ms, - struct dm_dirty_log *log, uint32_t region_size, - region_t nr_regions) -{ - unsigned int nr_buckets, max_buckets; - size_t i; - - /* - * Calculate a suitable number of buckets for our hash - * table. - */ - max_buckets = nr_regions >> 6; - for (nr_buckets = 128u; nr_buckets < max_buckets; nr_buckets <<= 1) - ; - nr_buckets >>= 1; - - rh->ms = ms; - rh->log = log; - rh->region_size = region_size; - rh->region_shift = ffs(region_size) - 1; - rwlock_init(&rh->hash_lock); - rh->mask = nr_buckets - 1; - rh->nr_buckets = nr_buckets; - - rh->buckets = vmalloc(nr_buckets * sizeof(*rh->buckets)); - if (!rh->buckets) { - DMERR("unable to allocate region hash memory"); - return -ENOMEM; - } - - for (i = 0; i < nr_buckets; i++) - INIT_LIST_HEAD(rh->buckets + i); - - spin_lock_init(&rh->region_lock); - sema_init(&rh->recovery_count, 0); - atomic_set(&rh->recovery_in_flight, 0); - INIT_LIST_HEAD(&rh->clean_regions); - INIT_LIST_HEAD(&rh->quiesced_regions); - INIT_LIST_HEAD(&rh->recovered_regions); - INIT_LIST_HEAD(&rh->failed_recovered_regions); - - rh->region_pool = mempool_create_kmalloc_pool(MIN_REGIONS, - sizeof(struct region)); - if (!rh->region_pool) { - vfree(rh->buckets); - rh->buckets = NULL; - return -ENOMEM; - } - - return 0; -} - -static void rh_exit(struct region_hash *rh) -{ - unsigned int h; - struct region *reg, *nreg; - - BUG_ON(!list_empty(&rh->quiesced_regions)); - for (h = 0; h < rh->nr_buckets; h++) { - list_for_each_entry_safe(reg, nreg, rh->buckets + h, hash_list) { - BUG_ON(atomic_read(®->pending)); - mempool_free(reg, rh->region_pool); - } - } - - if (rh->log) - dm_dirty_log_destroy(rh->log); - if (rh->region_pool) - mempool_destroy(rh->region_pool); - vfree(rh->buckets); -} - -#define RH_HASH_MULT 2654435387U - -static inline unsigned int rh_hash(struct region_hash *rh, region_t region) -{ - return (unsigned int) ((region * RH_HASH_MULT) >> 12) & rh->mask; -} - -static struct region *__rh_lookup(struct region_hash *rh, region_t region) -{ - struct region *reg; - - list_for_each_entry (reg, rh->buckets + rh_hash(rh, region), hash_list) - if (reg->key == region) - return reg; - - return NULL; -} - -static void __rh_insert(struct region_hash *rh, struct region *reg) -{ - unsigned int h = rh_hash(rh, reg->key); - list_add(®->hash_list, rh->buckets + h); -} - -static struct region *__rh_alloc(struct region_hash *rh, region_t region) -{ - struct region *reg, *nreg; - - read_unlock(&rh->hash_lock); - nreg = mempool_alloc(rh->region_pool, GFP_ATOMIC); - if (unlikely(!nreg)) - nreg = kmalloc(sizeof(struct region), GFP_NOIO); - nreg->state = rh->log->type->in_sync(rh->log, region, 1) ? - RH_CLEAN : RH_NOSYNC; - nreg->rh = rh; - nreg->key = region; - - INIT_LIST_HEAD(&nreg->list); - - atomic_set(&nreg->pending, 0); - bio_list_init(&nreg->delayed_bios); - write_lock_irq(&rh->hash_lock); - - reg = __rh_lookup(rh, region); - if (reg) - /* we lost the race */ - mempool_free(nreg, rh->region_pool); - - else { - __rh_insert(rh, nreg); - if (nreg->state == RH_CLEAN) { - spin_lock(&rh->region_lock); - list_add(&nreg->list, &rh->clean_regions); - spin_unlock(&rh->region_lock); - } - reg = nreg; - } - write_unlock_irq(&rh->hash_lock); - read_lock(&rh->hash_lock); - - return reg; -} - -static inline struct region *__rh_find(struct region_hash *rh, region_t region) -{ - struct region *reg; - - reg = __rh_lookup(rh, region); - if (!reg) - reg = __rh_alloc(rh, region); - - return reg; -} - -static int rh_state(struct region_hash *rh, region_t region, int may_block) -{ - int r; - struct region *reg; - - read_lock(&rh->hash_lock); - reg = __rh_lookup(rh, region); - read_unlock(&rh->hash_lock); - - if (reg) - return reg->state; - - /* - * The region wasn't in the hash, so we fall back to the - * dirty log. - */ - r = rh->log->type->in_sync(rh->log, region, may_block); - - /* - * Any error from the dirty log (eg. -EWOULDBLOCK) gets - * taken as a RH_NOSYNC - */ - return r == 1 ? RH_CLEAN : RH_NOSYNC; -} - -static inline int rh_in_sync(struct region_hash *rh, - region_t region, int may_block) -{ - int state = rh_state(rh, region, may_block); - return state == RH_CLEAN || state == RH_DIRTY; -} - -static void dispatch_bios(struct mirror_set *ms, struct bio_list *bio_list) -{ - struct bio *bio; - - while ((bio = bio_list_pop(bio_list))) { - queue_bio(ms, bio, WRITE); - } -} - -static void complete_resync_work(struct region *reg, int success) -{ - struct region_hash *rh = reg->rh; - - rh->log->type->set_region_sync(rh->log, reg->key, success); - - /* - * Dispatch the bios before we call 'wake_up_all'. - * This is important because if we are suspending, - * we want to know that recovery is complete and - * the work queue is flushed. If we wake_up_all - * before we dispatch_bios (queue bios and call wake()), - * then we risk suspending before the work queue - * has been properly flushed. - */ - dispatch_bios(rh->ms, ®->delayed_bios); - if (atomic_dec_and_test(&rh->recovery_in_flight)) - wake_up_all(&_kmirrord_recovery_stopped); - up(&rh->recovery_count); -} - -static void rh_update_states(struct region_hash *rh) -{ - struct region *reg, *next; - - LIST_HEAD(clean); - LIST_HEAD(recovered); - LIST_HEAD(failed_recovered); - - /* - * Quickly grab the lists. - */ - write_lock_irq(&rh->hash_lock); - spin_lock(&rh->region_lock); - if (!list_empty(&rh->clean_regions)) { - list_splice_init(&rh->clean_regions, &clean); - - list_for_each_entry(reg, &clean, list) - list_del(®->hash_list); - } - - if (!list_empty(&rh->recovered_regions)) { - list_splice_init(&rh->recovered_regions, &recovered); - - list_for_each_entry (reg, &recovered, list) - list_del(®->hash_list); - } - - if (!list_empty(&rh->failed_recovered_regions)) { - list_splice_init(&rh->failed_recovered_regions, - &failed_recovered); - - list_for_each_entry(reg, &failed_recovered, list) - list_del(®->hash_list); - } - - spin_unlock(&rh->region_lock); - write_unlock_irq(&rh->hash_lock); - - /* - * All the regions on the recovered and clean lists have - * now been pulled out of the system, so no need to do - * any more locking. - */ - list_for_each_entry_safe (reg, next, &recovered, list) { - rh->log->type->clear_region(rh->log, reg->key); - complete_resync_work(reg, 1); - mempool_free(reg, rh->region_pool); - } - - list_for_each_entry_safe(reg, next, &failed_recovered, list) { - complete_resync_work(reg, errors_handled(rh->ms) ? 0 : 1); - mempool_free(reg, rh->region_pool); - } - - list_for_each_entry_safe(reg, next, &clean, list) { - rh->log->type->clear_region(rh->log, reg->key); - mempool_free(reg, rh->region_pool); - } - - rh->log->type->flush(rh->log); -} - -static void rh_inc(struct region_hash *rh, region_t region) -{ - struct region *reg; - - read_lock(&rh->hash_lock); - reg = __rh_find(rh, region); - - spin_lock_irq(&rh->region_lock); - atomic_inc(®->pending); - - if (reg->state == RH_CLEAN) { - reg->state = RH_DIRTY; - list_del_init(®->list); /* take off the clean list */ - spin_unlock_irq(&rh->region_lock); - - rh->log->type->mark_region(rh->log, reg->key); - } else - spin_unlock_irq(&rh->region_lock); - - - read_unlock(&rh->hash_lock); -} - -static void rh_inc_pending(struct region_hash *rh, struct bio_list *bios) +static void wakeup_all_recovery_waiters(void *context) { - struct bio *bio; - - for (bio = bios->head; bio; bio = bio->bi_next) - rh_inc(rh, bio_to_region(rh, bio)); + wake_up_all(&_kmirrord_recovery_stopped); } -static void rh_dec(struct region_hash *rh, region_t region) +static void queue_bio(struct mirror_set *ms, struct bio *bio, int rw) { unsigned long flags; - struct region *reg; int should_wake = 0; + struct bio_list *bl; - read_lock(&rh->hash_lock); - reg = __rh_lookup(rh, region); - read_unlock(&rh->hash_lock); - - spin_lock_irqsave(&rh->region_lock, flags); - if (atomic_dec_and_test(®->pending)) { - /* - * There is no pending I/O for this region. - * We can move the region to corresponding list for next action. - * At this point, the region is not yet connected to any list. - * - * If the state is RH_NOSYNC, the region should be kept off - * from clean list. - * The hash entry for RH_NOSYNC will remain in memory - * until the region is recovered or the map is reloaded. - */ - - /* do nothing for RH_NOSYNC */ - if (reg->state == RH_RECOVERING) { - list_add_tail(®->list, &rh->quiesced_regions); - } else if (reg->state == RH_DIRTY) { - reg->state = RH_CLEAN; - list_add(®->list, &rh->clean_regions); - } - should_wake = 1; - } - spin_unlock_irqrestore(&rh->region_lock, flags); + bl = (rw == WRITE) ? &ms->writes : &ms->reads; + spin_lock_irqsave(&ms->lock, flags); + should_wake = !(bl->head); + bio_list_add(bl, bio); + spin_unlock_irqrestore(&ms->lock, flags); if (should_wake) - wake(rh->ms); + wakeup_mirrord(ms); } -/* - * Starts quiescing a region in preparation for recovery. - */ -static int __rh_recovery_prepare(struct region_hash *rh) +static void dispatch_bios(void *context, struct bio_list *bio_list) { - int r; - struct region *reg; - region_t region; - - /* - * Ask the dirty log what's next. - */ - r = rh->log->type->get_resync_work(rh->log, ®ion); - if (r <= 0) - return r; - - /* - * Get this region, and start it quiescing by setting the - * recovering flag. - */ - read_lock(&rh->hash_lock); - reg = __rh_find(rh, region); - read_unlock(&rh->hash_lock); - - spin_lock_irq(&rh->region_lock); - reg->state = RH_RECOVERING; - - /* Already quiesced ? */ - if (atomic_read(®->pending)) - list_del_init(®->list); - else - list_move(®->list, &rh->quiesced_regions); - - spin_unlock_irq(&rh->region_lock); - - return 1; -} - -static void rh_recovery_prepare(struct region_hash *rh) -{ - /* Extra reference to avoid race with rh_stop_recovery */ - atomic_inc(&rh->recovery_in_flight); - - while (!down_trylock(&rh->recovery_count)) { - atomic_inc(&rh->recovery_in_flight); - if (__rh_recovery_prepare(rh) <= 0) { - atomic_dec(&rh->recovery_in_flight); - up(&rh->recovery_count); - break; - } - } - - /* Drop the extra reference */ - if (atomic_dec_and_test(&rh->recovery_in_flight)) - wake_up_all(&_kmirrord_recovery_stopped); -} - -/* - * Returns any quiesced regions. - */ -static struct region *rh_recovery_start(struct region_hash *rh) -{ - struct region *reg = NULL; - - spin_lock_irq(&rh->region_lock); - if (!list_empty(&rh->quiesced_regions)) { - reg = list_entry(rh->quiesced_regions.next, - struct region, list); - list_del_init(®->list); /* remove from the quiesced list */ - } - spin_unlock_irq(&rh->region_lock); - - return reg; -} - -static void rh_recovery_end(struct region *reg, int success) -{ - struct region_hash *rh = reg->rh; - - spin_lock_irq(&rh->region_lock); - if (success) - list_add(®->list, ®->rh->recovered_regions); - else { - reg->state = RH_NOSYNC; - list_add(®->list, ®->rh->failed_recovered_regions); - } - spin_unlock_irq(&rh->region_lock); - - wake(rh->ms); -} - -static int rh_flush(struct region_hash *rh) -{ - return rh->log->type->flush(rh->log); -} - -static void rh_delay(struct region_hash *rh, struct bio *bio) -{ - struct region *reg; - - read_lock(&rh->hash_lock); - reg = __rh_find(rh, bio_to_region(rh, bio)); - bio_list_add(®->delayed_bios, bio); - read_unlock(&rh->hash_lock); -} - -static void rh_stop_recovery(struct region_hash *rh) -{ - int i; - - /* wait for any recovering regions */ - for (i = 0; i < MAX_RECOVERY; i++) - down(&rh->recovery_count); -} - -static void rh_start_recovery(struct region_hash *rh) -{ - int i; - - for (i = 0; i < MAX_RECOVERY; i++) - up(&rh->recovery_count); + struct mirror_set *ms = context; + struct bio *bio; - wake(rh->ms); + while ((bio = bio_list_pop(bio_list))) + queue_bio(ms, bio, WRITE); } #define MIN_READ_RECORDS 20 @@ -777,8 +246,8 @@ out: static void recovery_complete(int read_err, unsigned long write_err, void *context) { - struct region *reg = (struct region *)context; - struct mirror_set *ms = reg->rh->ms; + struct dm_region *reg = context; + struct mirror_set *ms = dm_rh_region_context(reg); int m, bit = 0; if (read_err) { @@ -804,31 +273,33 @@ static void recovery_complete(int read_err, unsigned long write_err, } } - rh_recovery_end(reg, !(read_err || write_err)); + dm_rh_recovery_end(reg, !(read_err || write_err)); } -static int recover(struct mirror_set *ms, struct region *reg) +static int recover(struct mirror_set *ms, struct dm_region *reg) { int r; - unsigned int i; + unsigned i; struct dm_io_region from, to[DM_KCOPYD_MAX_REGIONS], *dest; struct mirror *m; unsigned long flags = 0; + region_t key = dm_rh_get_region_key(reg); + sector_t region_size = dm_rh_get_region_size(ms->rh); /* fill in the source */ m = get_default_mirror(ms); from.bdev = m->dev->bdev; - from.sector = m->offset + region_to_sector(reg->rh, reg->key); - if (reg->key == (ms->nr_regions - 1)) { + from.sector = m->offset + dm_rh_region_to_sector(ms->rh, key); + if (key == (ms->nr_regions - 1)) { /* * The final region may be smaller than * region_size. */ - from.count = ms->ti->len & (reg->rh->region_size - 1); + from.count = ms->ti->len & (region_size - 1); if (!from.count) - from.count = reg->rh->region_size; + from.count = region_size; } else - from.count = reg->rh->region_size; + from.count = region_size; /* fill in the destinations */ for (i = 0, dest = to; i < ms->nr_mirrors; i++) { @@ -837,7 +308,7 @@ static int recover(struct mirror_set *ms, struct region *reg) m = ms->mirror + i; dest->bdev = m->dev->bdev; - dest->sector = m->offset + region_to_sector(reg->rh, reg->key); + dest->sector = m->offset + dm_rh_region_to_sector(ms->rh, key); dest->count = from.count; dest++; } @@ -854,22 +325,22 @@ static int recover(struct mirror_set *ms, struct region *reg) static void do_recovery(struct mirror_set *ms) { + struct dm_region *reg; + struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh); int r; - struct region *reg; - struct dm_dirty_log *log = ms->rh.log; /* * Start quiescing some regions. */ - rh_recovery_prepare(&ms->rh); + dm_rh_recovery_prepare(ms->rh); /* * Copy any already quiesced regions. */ - while ((reg = rh_recovery_start(&ms->rh))) { + while ((reg = dm_rh_recovery_start(ms->rh))) { r = recover(ms, reg); if (r) - rh_recovery_end(reg, 0); + dm_rh_recovery_end(reg, 0); } /* @@ -910,9 +381,10 @@ static int default_ok(struct mirror *m) static int mirror_available(struct mirror_set *ms, struct bio *bio) { - region_t region = bio_to_region(&ms->rh, bio); + struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh); + region_t region = dm_rh_bio_to_region(ms->rh, bio); - if (ms->rh.log->type->in_sync(ms->rh.log, region, 0)) + if (log->type->in_sync(log, region, 0)) return choose_mirror(ms, bio->bi_sector) ? 1 : 0; return 0; @@ -986,7 +458,14 @@ static void read_async_bio(struct mirror *m, struct bio *bio) map_region(&io, m, bio); bio_set_m(bio, m); - (void) dm_io(&io_req, 1, &io, NULL); + BUG_ON(dm_io(&io_req, 1, &io, NULL)); +} + +static inline int region_in_sync(struct mirror_set *ms, region_t region, + int may_block) +{ + int state = dm_rh_get_state(ms->rh, region, may_block); + return state == DM_RH_CLEAN || state == DM_RH_DIRTY; } static void do_reads(struct mirror_set *ms, struct bio_list *reads) @@ -996,13 +475,13 @@ static void do_reads(struct mirror_set *ms, struct bio_list *reads) struct mirror *m; while ((bio = bio_list_pop(reads))) { - region = bio_to_region(&ms->rh, bio); + region = dm_rh_bio_to_region(ms->rh, bio); m = get_default_mirror(ms); /* * We can only read balance if the region is in sync. */ - if (likely(rh_in_sync(&ms->rh, region, 1))) + if (likely(region_in_sync(ms, region, 1))) m = choose_mirror(ms, bio->bi_sector); else if (m && atomic_read(&m->error_count)) m = NULL; @@ -1025,57 +504,6 @@ static void do_reads(struct mirror_set *ms, struct bio_list *reads) * NOSYNC: increment pending, just write to the default mirror *---------------------------------------------------------------*/ -/* __bio_mark_nosync - * @ms - * @bio - * @done - * @error - * - * The bio was written on some mirror(s) but failed on other mirror(s). - * We can successfully endio the bio but should avoid the region being - * marked clean by setting the state RH_NOSYNC. - * - * This function is _not_ safe in interrupt context! - */ -static void __bio_mark_nosync(struct mirror_set *ms, - struct bio *bio, unsigned done, int error) -{ - unsigned long flags; - struct region_hash *rh = &ms->rh; - struct dm_dirty_log *log = ms->rh.log; - struct region *reg; - region_t region = bio_to_region(rh, bio); - int recovering = 0; - - /* We must inform the log that the sync count has changed. */ - log->type->set_region_sync(log, region, 0); - ms->in_sync = 0; - - read_lock(&rh->hash_lock); - reg = __rh_find(rh, region); - read_unlock(&rh->hash_lock); - - /* region hash entry should exist because write was in-flight */ - BUG_ON(!reg); - BUG_ON(!list_empty(®->list)); - - spin_lock_irqsave(&rh->region_lock, flags); - /* - * Possible cases: - * 1) RH_DIRTY - * 2) RH_NOSYNC: was dirty, other preceeding writes failed - * 3) RH_RECOVERING: flushing pending writes - * Either case, the region should have not been connected to list. - */ - recovering = (reg->state == RH_RECOVERING); - reg->state = RH_NOSYNC; - BUG_ON(!list_empty(®->list)); - spin_unlock_irqrestore(&rh->region_lock, flags); - - bio_endio(bio, error); - if (recovering) - complete_resync_work(reg, 0); -} static void write_callback(unsigned long error, void *context) { @@ -1120,7 +548,7 @@ static void write_callback(unsigned long error, void *context) bio_list_add(&ms->failures, bio); spin_unlock_irqrestore(&ms->lock, flags); if (should_wake) - wake(ms); + wakeup_mirrord(ms); return; } out: @@ -1150,7 +578,7 @@ static void do_write(struct mirror_set *ms, struct bio *bio) */ bio_set_m(bio, get_default_mirror(ms)); - (void) dm_io(&io_req, ms->nr_mirrors, io, NULL); + BUG_ON(dm_io(&io_req, ms->nr_mirrors, io, NULL)); } static void do_writes(struct mirror_set *ms, struct bio_list *writes) @@ -1170,18 +598,19 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes) bio_list_init(&recover); while ((bio = bio_list_pop(writes))) { - state = rh_state(&ms->rh, bio_to_region(&ms->rh, bio), 1); + state = dm_rh_get_state(ms->rh, + dm_rh_bio_to_region(ms->rh, bio), 1); switch (state) { - case RH_CLEAN: - case RH_DIRTY: + case DM_RH_CLEAN: + case DM_RH_DIRTY: this_list = &sync; break; - case RH_NOSYNC: + case DM_RH_NOSYNC: this_list = &nosync; break; - case RH_RECOVERING: + case DM_RH_RECOVERING: this_list = &recover; break; } @@ -1194,9 +623,9 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes) * be written to (writes to recover regions are going to * be delayed). */ - rh_inc_pending(&ms->rh, &sync); - rh_inc_pending(&ms->rh, &nosync); - ms->log_failure = rh_flush(&ms->rh) ? 1 : 0; + dm_rh_inc_pending(ms->rh, &sync); + dm_rh_inc_pending(ms->rh, &nosync); + ms->log_failure = dm_rh_flush(ms->rh) ? 1 : 0; /* * Dispatch io. @@ -1205,13 +634,13 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes) spin_lock_irq(&ms->lock); bio_list_merge(&ms->failures, &sync); spin_unlock_irq(&ms->lock); - wake(ms); + wakeup_mirrord(ms); } else while ((bio = bio_list_pop(&sync))) do_write(ms, bio); while ((bio = bio_list_pop(&recover))) - rh_delay(&ms->rh, bio); + dm_rh_delay(ms->rh, bio); while ((bio = bio_list_pop(&nosync))) { map_bio(get_default_mirror(ms), bio); @@ -1228,7 +657,8 @@ static void do_failures(struct mirror_set *ms, struct bio_list *failures) if (!ms->log_failure) { while ((bio = bio_list_pop(failures))) - __bio_mark_nosync(ms, bio, bio->bi_size, 0); + ms->in_sync = 0; + dm_rh_mark_nosync(ms->rh, bio, bio->bi_size, 0); return; } @@ -1281,8 +711,8 @@ static void trigger_event(struct work_struct *work) *---------------------------------------------------------------*/ static void do_mirror(struct work_struct *work) { - struct mirror_set *ms =container_of(work, struct mirror_set, - kmirrord_work); + struct mirror_set *ms = container_of(work, struct mirror_set, + kmirrord_work); struct bio_list reads, writes, failures; unsigned long flags; @@ -1295,7 +725,7 @@ static void do_mirror(struct work_struct *work) bio_list_init(&ms->failures); spin_unlock_irqrestore(&ms->lock, flags); - rh_update_states(&ms->rh); + dm_rh_update_states(ms->rh, errors_handled(ms)); do_recovery(ms); do_reads(ms, &reads); do_writes(ms, &writes); @@ -1304,7 +734,6 @@ static void do_mirror(struct work_struct *work) dm_table_unplug_all(ms->ti->table); } - /*----------------------------------------------------------------- * Target functions *---------------------------------------------------------------*/ @@ -1351,7 +780,11 @@ static struct mirror_set *alloc_context(unsigned int nr_mirrors, return NULL; } - if (rh_init(&ms->rh, ms, dl, region_size, ms->nr_regions)) { + ms->rh = dm_region_hash_create(ms, dispatch_bios, wakeup_mirrord, + wakeup_all_recovery_waiters, + ms->ti->begin, MAX_RECOVERY, + dl, region_size, ms->nr_regions); + if (IS_ERR(ms->rh)) { ti->error = "Error creating dirty region hash"; dm_io_client_destroy(ms->io_client); mempool_destroy(ms->read_record_pool); @@ -1369,7 +802,7 @@ static void free_context(struct mirror_set *ms, struct dm_target *ti, dm_put_device(ti, ms->mirror[m].dev); dm_io_client_destroy(ms->io_client); - rh_exit(&ms->rh); + dm_region_hash_destroy(ms->rh); mempool_destroy(ms->read_record_pool); kfree(ms); } @@ -1409,10 +842,10 @@ static int get_mirror(struct mirror_set *ms, struct dm_target *ti, * Create dirty log: log_type #log_params */ static struct dm_dirty_log *create_dirty_log(struct dm_target *ti, - unsigned int argc, char **argv, - unsigned int *args_used) + unsigned argc, char **argv, + unsigned *args_used) { - unsigned int param_count; + unsigned param_count; struct dm_dirty_log *dl; if (argc < 2) { @@ -1543,7 +976,7 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv) } ti->private = ms; - ti->split_io = ms->rh.region_size; + ti->split_io = dm_rh_get_region_size(ms->rh); ms->kmirrord_wq = create_singlethread_workqueue("kmirrord"); if (!ms->kmirrord_wq) { @@ -1578,11 +1011,11 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto err_destroy_wq; } - r = dm_kcopyd_client_create(DM_IO_PAGES, &ms->kcopyd_client); + r = dm_kcopyd_client_create(DM_KCOPYD_PAGES, &ms->kcopyd_client); if (r) goto err_destroy_wq; - wake(ms); + wakeup_mirrord(ms); return 0; err_destroy_wq: @@ -1603,22 +1036,6 @@ static void mirror_dtr(struct dm_target *ti) free_context(ms, ti, ms->nr_mirrors); } -static void queue_bio(struct mirror_set *ms, struct bio *bio, int rw) -{ - unsigned long flags; - int should_wake = 0; - struct bio_list *bl; - - bl = (rw == WRITE) ? &ms->writes : &ms->reads; - spin_lock_irqsave(&ms->lock, flags); - should_wake = !(bl->head); - bio_list_add(bl, bio); - spin_unlock_irqrestore(&ms->lock, flags); - - if (should_wake) - wake(ms); -} - /* * Mirror mapping function */ @@ -1629,16 +1046,16 @@ static int mirror_map(struct dm_target *ti, struct bio *bio, struct mirror *m; struct mirror_set *ms = ti->private; struct dm_raid1_read_record *read_record = NULL; + struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh); if (rw == WRITE) { /* Save region for mirror_end_io() handler */ - map_context->ll = bio_to_region(&ms->rh, bio); + map_context->ll = dm_rh_bio_to_region(ms->rh, bio); queue_bio(ms, bio, rw); return DM_MAPIO_SUBMITTED; } - r = ms->rh.log->type->in_sync(ms->rh.log, - bio_to_region(&ms->rh, bio), 0); + r = log->type->in_sync(log, dm_rh_bio_to_region(ms->rh, bio), 0); if (r < 0 && r != -EWOULDBLOCK) return r; @@ -1686,7 +1103,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, * We need to dec pending if this was a write. */ if (rw == WRITE) { - rh_dec(&ms->rh, map_context->ll); + dm_rh_dec(ms->rh, map_context->ll); return error; } @@ -1742,7 +1159,7 @@ out: static void mirror_presuspend(struct dm_target *ti) { struct mirror_set *ms = (struct mirror_set *) ti->private; - struct dm_dirty_log *log = ms->rh.log; + struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh); atomic_set(&ms->suspend, 1); @@ -1750,10 +1167,10 @@ static void mirror_presuspend(struct dm_target *ti) * We must finish up all the work that we've * generated (i.e. recovery work). */ - rh_stop_recovery(&ms->rh); + dm_rh_stop_recovery(ms->rh); wait_event(_kmirrord_recovery_stopped, - !atomic_read(&ms->rh.recovery_in_flight)); + !dm_rh_recovery_in_flight(ms->rh)); if (log->type->presuspend && log->type->presuspend(log)) /* FIXME: need better error handling */ @@ -1771,7 +1188,7 @@ static void mirror_presuspend(struct dm_target *ti) static void mirror_postsuspend(struct dm_target *ti) { struct mirror_set *ms = ti->private; - struct dm_dirty_log *log = ms->rh.log; + struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh); if (log->type->postsuspend && log->type->postsuspend(log)) /* FIXME: need better error handling */ @@ -1781,13 +1198,13 @@ static void mirror_postsuspend(struct dm_target *ti) static void mirror_resume(struct dm_target *ti) { struct mirror_set *ms = ti->private; - struct dm_dirty_log *log = ms->rh.log; + struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh); atomic_set(&ms->suspend, 0); if (log->type->resume && log->type->resume(log)) /* FIXME: need better error handling */ DMWARN("log resume failed"); - rh_start_recovery(&ms->rh); + dm_rh_start_recovery(ms->rh); } /* @@ -1819,7 +1236,7 @@ static int mirror_status(struct dm_target *ti, status_type_t type, { unsigned int m, sz = 0; struct mirror_set *ms = (struct mirror_set *) ti->private; - struct dm_dirty_log *log = ms->rh.log; + struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh); char buffer[ms->nr_mirrors + 1]; switch (type) { @@ -1832,15 +1249,15 @@ static int mirror_status(struct dm_target *ti, status_type_t type, buffer[m] = '\0'; DMEMIT("%llu/%llu 1 %s ", - (unsigned long long)log->type->get_sync_count(ms->rh.log), + (unsigned long long)log->type->get_sync_count(log), (unsigned long long)ms->nr_regions, buffer); - sz += log->type->status(ms->rh.log, type, result+sz, maxlen-sz); + sz += log->type->status(log, type, result+sz, maxlen-sz); break; case STATUSTYPE_TABLE: - sz = log->type->status(ms->rh.log, type, result, maxlen); + sz = log->type->status(log, type, result, maxlen); DMEMIT("%d", ms->nr_mirrors); for (m = 0; m < ms->nr_mirrors; m++) diff --git a/drivers/md/dm-region-hash.c b/drivers/md/dm-region-hash.c new file mode 100644 index 00000000000..59f8d9df9e1 --- /dev/null +++ b/drivers/md/dm-region-hash.c @@ -0,0 +1,704 @@ +/* + * Copyright (C) 2003 Sistina Software Limited. + * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. + * + * This file is released under the GPL. + */ + +#include +#include + +#include +#include +#include +#include + +#include "dm.h" +#include "dm-bio-list.h" + +#define DM_MSG_PREFIX "region hash" + +/*----------------------------------------------------------------- + * Region hash + * + * The mirror splits itself up into discrete regions. Each + * region can be in one of three states: clean, dirty, + * nosync. There is no need to put clean regions in the hash. + * + * In addition to being present in the hash table a region _may_ + * be present on one of three lists. + * + * clean_regions: Regions on this list have no io pending to + * them, they are in sync, we are no longer interested in them, + * they are dull. dm_rh_update_states() will remove them from the + * hash table. + * + * quiesced_regions: These regions have been spun down, ready + * for recovery. rh_recovery_start() will remove regions from + * this list and hand them to kmirrord, which will schedule the + * recovery io with kcopyd. + * + * recovered_regions: Regions that kcopyd has successfully + * recovered. dm_rh_update_states() will now schedule any delayed + * io, up the recovery_count, and remove the region from the + * hash. + * + * There are 2 locks: + * A rw spin lock 'hash_lock' protects just the hash table, + * this is never held in write mode from interrupt context, + * which I believe means that we only have to disable irqs when + * doing a write lock. + * + * An ordinary spin lock 'region_lock' that protects the three + * lists in the region_hash, with the 'state', 'list' and + * 'delayed_bios' fields of the regions. This is used from irq + * context, so all other uses will have to suspend local irqs. + *---------------------------------------------------------------*/ +struct dm_region_hash { + uint32_t region_size; + unsigned region_shift; + + /* holds persistent region state */ + struct dm_dirty_log *log; + + /* hash table */ + rwlock_t hash_lock; + mempool_t *region_pool; + unsigned mask; + unsigned nr_buckets; + unsigned prime; + unsigned shift; + struct list_head *buckets; + + unsigned max_recovery; /* Max # of regions to recover in parallel */ + + spinlock_t region_lock; + atomic_t recovery_in_flight; + struct semaphore recovery_count; + struct list_head clean_regions; + struct list_head quiesced_regions; + struct list_head recovered_regions; + struct list_head failed_recovered_regions; + + void *context; + sector_t target_begin; + + /* Callback function to schedule bios writes */ + void (*dispatch_bios)(void *context, struct bio_list *bios); + + /* Callback function to wakeup callers worker thread. */ + void (*wakeup_workers)(void *context); + + /* Callback function to wakeup callers recovery waiters. */ + void (*wakeup_all_recovery_waiters)(void *context); +}; + +struct dm_region { + struct dm_region_hash *rh; /* FIXME: can we get rid of this ? */ + region_t key; + int state; + + struct list_head hash_list; + struct list_head list; + + atomic_t pending; + struct bio_list delayed_bios; +}; + +/* + * Conversion fns + */ +static region_t dm_rh_sector_to_region(struct dm_region_hash *rh, sector_t sector) +{ + return sector >> rh->region_shift; +} + +sector_t dm_rh_region_to_sector(struct dm_region_hash *rh, region_t region) +{ + return region << rh->region_shift; +} +EXPORT_SYMBOL_GPL(dm_rh_region_to_sector); + +region_t dm_rh_bio_to_region(struct dm_region_hash *rh, struct bio *bio) +{ + return dm_rh_sector_to_region(rh, bio->bi_sector - rh->target_begin); +} +EXPORT_SYMBOL_GPL(dm_rh_bio_to_region); + +void *dm_rh_region_context(struct dm_region *reg) +{ + return reg->rh->context; +} +EXPORT_SYMBOL_GPL(dm_rh_region_context); + +region_t dm_rh_get_region_key(struct dm_region *reg) +{ + return reg->key; +} +EXPORT_SYMBOL_GPL(dm_rh_get_region_key); + +sector_t dm_rh_get_region_size(struct dm_region_hash *rh) +{ + return rh->region_size; +} +EXPORT_SYMBOL_GPL(dm_rh_get_region_size); + +/* + * FIXME: shall we pass in a structure instead of all these args to + * dm_region_hash_create()???? + */ +#define RH_HASH_MULT 2654435387U +#define RH_HASH_SHIFT 12 + +#define MIN_REGIONS 64 +struct dm_region_hash *dm_region_hash_create( + void *context, void (*dispatch_bios)(void *context, + struct bio_list *bios), + void (*wakeup_workers)(void *context), + void (*wakeup_all_recovery_waiters)(void *context), + sector_t target_begin, unsigned max_recovery, + struct dm_dirty_log *log, uint32_t region_size, + region_t nr_regions) +{ + struct dm_region_hash *rh; + unsigned nr_buckets, max_buckets; + size_t i; + + /* + * Calculate a suitable number of buckets for our hash + * table. + */ + max_buckets = nr_regions >> 6; + for (nr_buckets = 128u; nr_buckets < max_buckets; nr_buckets <<= 1) + ; + nr_buckets >>= 1; + + rh = kmalloc(sizeof(*rh), GFP_KERNEL); + if (!rh) { + DMERR("unable to allocate region hash memory"); + return ERR_PTR(-ENOMEM); + } + + rh->context = context; + rh->dispatch_bios = dispatch_bios; + rh->wakeup_workers = wakeup_workers; + rh->wakeup_all_recovery_waiters = wakeup_all_recovery_waiters; + rh->target_begin = target_begin; + rh->max_recovery = max_recovery; + rh->log = log; + rh->region_size = region_size; + rh->region_shift = ffs(region_size) - 1; + rwlock_init(&rh->hash_lock); + rh->mask = nr_buckets - 1; + rh->nr_buckets = nr_buckets; + + rh->shift = RH_HASH_SHIFT; + rh->prime = RH_HASH_MULT; + + rh->buckets = vmalloc(nr_buckets * sizeof(*rh->buckets)); + if (!rh->buckets) { + DMERR("unable to allocate region hash bucket memory"); + kfree(rh); + return ERR_PTR(-ENOMEM); + } + + for (i = 0; i < nr_buckets; i++) + INIT_LIST_HEAD(rh->buckets + i); + + spin_lock_init(&rh->region_lock); + sema_init(&rh->recovery_count, 0); + atomic_set(&rh->recovery_in_flight, 0); + INIT_LIST_HEAD(&rh->clean_regions); + INIT_LIST_HEAD(&rh->quiesced_regions); + INIT_LIST_HEAD(&rh->recovered_regions); + INIT_LIST_HEAD(&rh->failed_recovered_regions); + + rh->region_pool = mempool_create_kmalloc_pool(MIN_REGIONS, + sizeof(struct dm_region)); + if (!rh->region_pool) { + vfree(rh->buckets); + kfree(rh); + rh = ERR_PTR(-ENOMEM); + } + + return rh; +} +EXPORT_SYMBOL_GPL(dm_region_hash_create); + +void dm_region_hash_destroy(struct dm_region_hash *rh) +{ + unsigned h; + struct dm_region *reg, *nreg; + + BUG_ON(!list_empty(&rh->quiesced_regions)); + for (h = 0; h < rh->nr_buckets; h++) { + list_for_each_entry_safe(reg, nreg, rh->buckets + h, + hash_list) { + BUG_ON(atomic_read(®->pending)); + mempool_free(reg, rh->region_pool); + } + } + + if (rh->log) + dm_dirty_log_destroy(rh->log); + + if (rh->region_pool) + mempool_destroy(rh->region_pool); + + vfree(rh->buckets); + kfree(rh); +} +EXPORT_SYMBOL_GPL(dm_region_hash_destroy); + +struct dm_dirty_log *dm_rh_dirty_log(struct dm_region_hash *rh) +{ + return rh->log; +} +EXPORT_SYMBOL_GPL(dm_rh_dirty_log); + +static unsigned rh_hash(struct dm_region_hash *rh, region_t region) +{ + return (unsigned) ((region * rh->prime) >> rh->shift) & rh->mask; +} + +static struct dm_region *__rh_lookup(struct dm_region_hash *rh, region_t region) +{ + struct dm_region *reg; + struct list_head *bucket = rh->buckets + rh_hash(rh, region); + + list_for_each_entry(reg, bucket, hash_list) + if (reg->key == region) + return reg; + + return NULL; +} + +static void __rh_insert(struct dm_region_hash *rh, struct dm_region *reg) +{ + list_add(®->hash_list, rh->buckets + rh_hash(rh, reg->key)); +} + +static struct dm_region *__rh_alloc(struct dm_region_hash *rh, region_t region) +{ + struct dm_region *reg, *nreg; + + nreg = mempool_alloc(rh->region_pool, GFP_ATOMIC); + if (unlikely(!nreg)) + nreg = kmalloc(sizeof(*nreg), GFP_NOIO); + + nreg->state = rh->log->type->in_sync(rh->log, region, 1) ? + DM_RH_CLEAN : DM_RH_NOSYNC; + nreg->rh = rh; + nreg->key = region; + INIT_LIST_HEAD(&nreg->list); + atomic_set(&nreg->pending, 0); + bio_list_init(&nreg->delayed_bios); + + write_lock_irq(&rh->hash_lock); + reg = __rh_lookup(rh, region); + if (reg) + /* We lost the race. */ + mempool_free(nreg, rh->region_pool); + else { + __rh_insert(rh, nreg); + if (nreg->state == DM_RH_CLEAN) { + spin_lock(&rh->region_lock); + list_add(&nreg->list, &rh->clean_regions); + spin_unlock(&rh->region_lock); + } + + reg = nreg; + } + write_unlock_irq(&rh->hash_lock); + + return reg; +} + +static struct dm_region *__rh_find(struct dm_region_hash *rh, region_t region) +{ + struct dm_region *reg; + + reg = __rh_lookup(rh, region); + if (!reg) { + read_unlock(&rh->hash_lock); + reg = __rh_alloc(rh, region); + read_lock(&rh->hash_lock); + } + + return reg; +} + +int dm_rh_get_state(struct dm_region_hash *rh, region_t region, int may_block) +{ + int r; + struct dm_region *reg; + + read_lock(&rh->hash_lock); + reg = __rh_lookup(rh, region); + read_unlock(&rh->hash_lock); + + if (reg) + return reg->state; + + /* + * The region wasn't in the hash, so we fall back to the + * dirty log. + */ + r = rh->log->type->in_sync(rh->log, region, may_block); + + /* + * Any error from the dirty log (eg. -EWOULDBLOCK) gets + * taken as a DM_RH_NOSYNC + */ + return r == 1 ? DM_RH_CLEAN : DM_RH_NOSYNC; +} +EXPORT_SYMBOL_GPL(dm_rh_get_state); + +static void complete_resync_work(struct dm_region *reg, int success) +{ + struct dm_region_hash *rh = reg->rh; + + rh->log->type->set_region_sync(rh->log, reg->key, success); + + /* + * Dispatch the bios before we call 'wake_up_all'. + * This is important because if we are suspending, + * we want to know that recovery is complete and + * the work queue is flushed. If we wake_up_all + * before we dispatch_bios (queue bios and call wake()), + * then we risk suspending before the work queue + * has been properly flushed. + */ + rh->dispatch_bios(rh->context, ®->delayed_bios); + if (atomic_dec_and_test(&rh->recovery_in_flight)) + rh->wakeup_all_recovery_waiters(rh->context); + up(&rh->recovery_count); +} + +/* dm_rh_mark_nosync + * @ms + * @bio + * @done + * @error + * + * The bio was written on some mirror(s) but failed on other mirror(s). + * We can successfully endio the bio but should avoid the region being + * marked clean by setting the state DM_RH_NOSYNC. + * + * This function is _not_ safe in interrupt context! + */ +void dm_rh_mark_nosync(struct dm_region_hash *rh, + struct bio *bio, unsigned done, int error) +{ + unsigned long flags; + struct dm_dirty_log *log = rh->log; + struct dm_region *reg; + region_t region = dm_rh_bio_to_region(rh, bio); + int recovering = 0; + + /* We must inform the log that the sync count has changed. */ + log->type->set_region_sync(log, region, 0); + + read_lock(&rh->hash_lock); + reg = __rh_find(rh, region); + read_unlock(&rh->hash_lock); + + /* region hash entry should exist because write was in-flight */ + BUG_ON(!reg); + BUG_ON(!list_empty(®->list)); + + spin_lock_irqsave(&rh->region_lock, flags); + /* + * Possible cases: + * 1) DM_RH_DIRTY + * 2) DM_RH_NOSYNC: was dirty, other preceeding writes failed + * 3) DM_RH_RECOVERING: flushing pending writes + * Either case, the region should have not been connected to list. + */ + recovering = (reg->state == DM_RH_RECOVERING); + reg->state = DM_RH_NOSYNC; + BUG_ON(!list_empty(®->list)); + spin_unlock_irqrestore(&rh->region_lock, flags); + + bio_endio(bio, error); + if (recovering) + complete_resync_work(reg, 0); +} +EXPORT_SYMBOL_GPL(dm_rh_mark_nosync); + +void dm_rh_update_states(struct dm_region_hash *rh, int errors_handled) +{ + struct dm_region *reg, *next; + + LIST_HEAD(clean); + LIST_HEAD(recovered); + LIST_HEAD(failed_recovered); + + /* + * Quickly grab the lists. + */ + write_lock_irq(&rh->hash_lock); + spin_lock(&rh->region_lock); + if (!list_empty(&rh->clean_regions)) { + list_splice_init(&rh->clean_regions, &clean); + + list_for_each_entry(reg, &clean, list) + list_del(®->hash_list); + } + + if (!list_empty(&rh->recovered_regions)) { + list_splice_init(&rh->recovered_regions, &recovered); + + list_for_each_entry(reg, &recovered, list) + list_del(®->hash_list); + } + + if (!list_empty(&rh->failed_recovered_regions)) { + list_splice_init(&rh->failed_recovered_regions, + &failed_recovered); + + list_for_each_entry(reg, &failed_recovered, list) + list_del(®->hash_list); + } + + spin_unlock(&rh->region_lock); + write_unlock_irq(&rh->hash_lock); + + /* + * All the regions on the recovered and clean lists have + * now been pulled out of the system, so no need to do + * any more locking. + */ + list_for_each_entry_safe(reg, next, &recovered, list) { + rh->log->type->clear_region(rh->log, reg->key); + complete_resync_work(reg, 1); + mempool_free(reg, rh->region_pool); + } + + list_for_each_entry_safe(reg, next, &failed_recovered, list) { + complete_resync_work(reg, errors_handled ? 0 : 1); + mempool_free(reg, rh->region_pool); + } + + list_for_each_entry_safe(reg, next, &clean, list) { + rh->log->type->clear_region(rh->log, reg->key); + mempool_free(reg, rh->region_pool); + } + + rh->log->type->flush(rh->log); +} +EXPORT_SYMBOL_GPL(dm_rh_update_states); + +static void rh_inc(struct dm_region_hash *rh, region_t region) +{ + struct dm_region *reg; + + read_lock(&rh->hash_lock); + reg = __rh_find(rh, region); + + spin_lock_irq(&rh->region_lock); + atomic_inc(®->pending); + + if (reg->state == DM_RH_CLEAN) { + reg->state = DM_RH_DIRTY; + list_del_init(®->list); /* take off the clean list */ + spin_unlock_irq(&rh->region_lock); + + rh->log->type->mark_region(rh->log, reg->key); + } else + spin_unlock_irq(&rh->region_lock); + + + read_unlock(&rh->hash_lock); +} + +void dm_rh_inc_pending(struct dm_region_hash *rh, struct bio_list *bios) +{ + struct bio *bio; + + for (bio = bios->head; bio; bio = bio->bi_next) + rh_inc(rh, dm_rh_bio_to_region(rh, bio)); +} +EXPORT_SYMBOL_GPL(dm_rh_inc_pending); + +void dm_rh_dec(struct dm_region_hash *rh, region_t region) +{ + unsigned long flags; + struct dm_region *reg; + int should_wake = 0; + + read_lock(&rh->hash_lock); + reg = __rh_lookup(rh, region); + read_unlock(&rh->hash_lock); + + spin_lock_irqsave(&rh->region_lock, flags); + if (atomic_dec_and_test(®->pending)) { + /* + * There is no pending I/O for this region. + * We can move the region to corresponding list for next action. + * At this point, the region is not yet connected to any list. + * + * If the state is DM_RH_NOSYNC, the region should be kept off + * from clean list. + * The hash entry for DM_RH_NOSYNC will remain in memory + * until the region is recovered or the map is reloaded. + */ + + /* do nothing for DM_RH_NOSYNC */ + if (reg->state == DM_RH_RECOVERING) { + list_add_tail(®->list, &rh->quiesced_regions); + } else if (reg->state == DM_RH_DIRTY) { + reg->state = DM_RH_CLEAN; + list_add(®->list, &rh->clean_regions); + } + should_wake = 1; + } + spin_unlock_irqrestore(&rh->region_lock, flags); + + if (should_wake) + rh->wakeup_workers(rh->context); +} +EXPORT_SYMBOL_GPL(dm_rh_dec); + +/* + * Starts quiescing a region in preparation for recovery. + */ +static int __rh_recovery_prepare(struct dm_region_hash *rh) +{ + int r; + region_t region; + struct dm_region *reg; + + /* + * Ask the dirty log what's next. + */ + r = rh->log->type->get_resync_work(rh->log, ®ion); + if (r <= 0) + return r; + + /* + * Get this region, and start it quiescing by setting the + * recovering flag. + */ + read_lock(&rh->hash_lock); + reg = __rh_find(rh, region); + read_unlock(&rh->hash_lock); + + spin_lock_irq(&rh->region_lock); + reg->state = DM_RH_RECOVERING; + + /* Already quiesced ? */ + if (atomic_read(®->pending)) + list_del_init(®->list); + else + list_move(®->list, &rh->quiesced_regions); + + spin_unlock_irq(&rh->region_lock); + + return 1; +} + +void dm_rh_recovery_prepare(struct dm_region_hash *rh) +{ + /* Extra reference to avoid race with dm_rh_stop_recovery */ + atomic_inc(&rh->recovery_in_flight); + + while (!down_trylock(&rh->recovery_count)) { + atomic_inc(&rh->recovery_in_flight); + if (__rh_recovery_prepare(rh) <= 0) { + atomic_dec(&rh->recovery_in_flight); + up(&rh->recovery_count); + break; + } + } + + /* Drop the extra reference */ + if (atomic_dec_and_test(&rh->recovery_in_flight)) + rh->wakeup_all_recovery_waiters(rh->context); +} +EXPORT_SYMBOL_GPL(dm_rh_recovery_prepare); + +/* + * Returns any quiesced regions. + */ +struct dm_region *dm_rh_recovery_start(struct dm_region_hash *rh) +{ + struct dm_region *reg = NULL; + + spin_lock_irq(&rh->region_lock); + if (!list_empty(&rh->quiesced_regions)) { + reg = list_entry(rh->quiesced_regions.next, + struct dm_region, list); + list_del_init(®->list); /* remove from the quiesced list */ + } + spin_unlock_irq(&rh->region_lock); + + return reg; +} +EXPORT_SYMBOL_GPL(dm_rh_recovery_start); + +void dm_rh_recovery_end(struct dm_region *reg, int success) +{ + struct dm_region_hash *rh = reg->rh; + + spin_lock_irq(&rh->region_lock); + if (success) + list_add(®->list, ®->rh->recovered_regions); + else { + reg->state = DM_RH_NOSYNC; + list_add(®->list, ®->rh->failed_recovered_regions); + } + spin_unlock_irq(&rh->region_lock); + + rh->wakeup_workers(rh->context); +} +EXPORT_SYMBOL_GPL(dm_rh_recovery_end); + +/* Return recovery in flight count. */ +int dm_rh_recovery_in_flight(struct dm_region_hash *rh) +{ + return atomic_read(&rh->recovery_in_flight); +} +EXPORT_SYMBOL_GPL(dm_rh_recovery_in_flight); + +int dm_rh_flush(struct dm_region_hash *rh) +{ + return rh->log->type->flush(rh->log); +} +EXPORT_SYMBOL_GPL(dm_rh_flush); + +void dm_rh_delay(struct dm_region_hash *rh, struct bio *bio) +{ + struct dm_region *reg; + + read_lock(&rh->hash_lock); + reg = __rh_find(rh, dm_rh_bio_to_region(rh, bio)); + bio_list_add(®->delayed_bios, bio); + read_unlock(&rh->hash_lock); +} +EXPORT_SYMBOL_GPL(dm_rh_delay); + +void dm_rh_stop_recovery(struct dm_region_hash *rh) +{ + int i; + + /* wait for any recovering regions */ + for (i = 0; i < rh->max_recovery; i++) + down(&rh->recovery_count); +} +EXPORT_SYMBOL_GPL(dm_rh_stop_recovery); + +void dm_rh_start_recovery(struct dm_region_hash *rh) +{ + int i; + + for (i = 0; i < rh->max_recovery; i++) + up(&rh->recovery_count); + + rh->wakeup_workers(rh->context); +} +EXPORT_SYMBOL_GPL(dm_rh_start_recovery); + +MODULE_DESCRIPTION(DM_NAME " region hash"); +MODULE_AUTHOR("Joe Thornber/Heinz Mauelshagen "); +MODULE_LICENSE("GPL"); diff --git a/include/linux/dm-region-hash.h b/include/linux/dm-region-hash.h new file mode 100644 index 00000000000..a9e652a4137 --- /dev/null +++ b/include/linux/dm-region-hash.h @@ -0,0 +1,104 @@ +/* + * Copyright (C) 2003 Sistina Software Limited. + * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. + * + * Device-Mapper dirty region hash interface. + * + * This file is released under the GPL. + */ + +#ifndef DM_REGION_HASH_H +#define DM_REGION_HASH_H + +#include + +/*----------------------------------------------------------------- + * Region hash + *----------------------------------------------------------------*/ +struct dm_region_hash; +struct dm_region; + +/* + * States a region can have. + */ +enum dm_rh_region_states { + DM_RH_CLEAN = 0x01, /* No writes in flight. */ + DM_RH_DIRTY = 0x02, /* Writes in flight. */ + DM_RH_NOSYNC = 0x04, /* Out of sync. */ + DM_RH_RECOVERING = 0x08, /* Under resynchronization. */ +}; + +/* + * Region hash create/destroy. + */ +struct bio_list; +struct dm_region_hash *dm_region_hash_create( + void *context, void (*dispatch_bios)(void *context, + struct bio_list *bios), + void (*wakeup_workers)(void *context), + void (*wakeup_all_recovery_waiters)(void *context), + sector_t target_begin, unsigned max_recovery, + struct dm_dirty_log *log, uint32_t region_size, + region_t nr_regions); +void dm_region_hash_destroy(struct dm_region_hash *rh); + +struct dm_dirty_log *dm_rh_dirty_log(struct dm_region_hash *rh); + +/* + * Conversion functions. + */ +region_t dm_rh_bio_to_region(struct dm_region_hash *rh, struct bio *bio); +sector_t dm_rh_region_to_sector(struct dm_region_hash *rh, region_t region); +void *dm_rh_region_context(struct dm_region *reg); + +/* + * Get region size and key (ie. number of the region). + */ +sector_t dm_rh_get_region_size(struct dm_region_hash *rh); +region_t dm_rh_get_region_key(struct dm_region *reg); + +/* + * Get/set/update region state (and dirty log). + * + */ +int dm_rh_get_state(struct dm_region_hash *rh, region_t region, int may_block); +void dm_rh_set_state(struct dm_region_hash *rh, region_t region, + enum dm_rh_region_states state, int may_block); + +/* Non-zero errors_handled leaves the state of the region NOSYNC */ +void dm_rh_update_states(struct dm_region_hash *rh, int errors_handled); + +/* Flush the region hash and dirty log. */ +int dm_rh_flush(struct dm_region_hash *rh); + +/* Inc/dec pending count on regions. */ +void dm_rh_inc_pending(struct dm_region_hash *rh, struct bio_list *bios); +void dm_rh_dec(struct dm_region_hash *rh, region_t region); + +/* Delay bios on regions. */ +void dm_rh_delay(struct dm_region_hash *rh, struct bio *bio); + +void dm_rh_mark_nosync(struct dm_region_hash *rh, + struct bio *bio, unsigned done, int error); + +/* + * Region recovery control. + */ + +/* Prepare some regions for recovery by starting to quiesce them. */ +void dm_rh_recovery_prepare(struct dm_region_hash *rh); + +/* Try fetching a quiesced region for recovery. */ +struct dm_region *dm_rh_recovery_start(struct dm_region_hash *rh); + +/* Report recovery end on a region. */ +void dm_rh_recovery_end(struct dm_region *reg, int error); + +/* Returns number of regions with recovery work outstanding. */ +int dm_rh_recovery_in_flight(struct dm_region_hash *rh); + +/* Start/stop recovery. */ +void dm_rh_start_recovery(struct dm_region_hash *rh); +void dm_rh_stop_recovery(struct dm_region_hash *rh); + +#endif /* DM_REGION_HASH_H */ -- cgit v1.2.3-70-g09d2 From 5e458cc0f4770eea45d3c07110f01b3a94c72aa5 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 22 Oct 2008 10:00:13 -0500 Subject: module: simplify load_module. Linus' recent catch of stack overflow in load_module lead me to look at the code. A couple of helpers to get a section address and get objects from a section can help clean things up a little. (And in case you're wondering, the stack size also dropped from 328 to 284 bytes). Signed-off-by: Rusty Russell --- include/linux/module.h | 2 +- kernel/module.c | 235 +++++++++++++++++++++---------------------------- 2 files changed, 100 insertions(+), 137 deletions(-) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index 5d2970cdce9..eddf27db442 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -277,7 +277,7 @@ struct module /* Exception table */ unsigned int num_exentries; - const struct exception_table_entry *extable; + struct exception_table_entry *extable; /* Startup function. */ int (*init)(void); diff --git a/kernel/module.c b/kernel/module.c index 0d8d21ee792..3d256681ab6 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -132,6 +132,29 @@ static unsigned int find_sec(Elf_Ehdr *hdr, return 0; } +/* Find a module section, or NULL. */ +static void *section_addr(Elf_Ehdr *hdr, Elf_Shdr *shdrs, + const char *secstrings, const char *name) +{ + /* Section 0 has sh_addr 0. */ + return (void *)shdrs[find_sec(hdr, shdrs, secstrings, name)].sh_addr; +} + +/* Find a module section, or NULL. Fill in number of "objects" in section. */ +static void *section_objs(Elf_Ehdr *hdr, + Elf_Shdr *sechdrs, + const char *secstrings, + const char *name, + size_t object_size, + unsigned int *num) +{ + unsigned int sec = find_sec(hdr, sechdrs, secstrings, name); + + /* Section 0 has sh_addr 0 and sh_size 0. */ + *num = sechdrs[sec].sh_size / object_size; + return (void *)sechdrs[sec].sh_addr; +} + /* Provided by the linker */ extern const struct kernel_symbol __start___ksymtab[]; extern const struct kernel_symbol __stop___ksymtab[]; @@ -1789,32 +1812,20 @@ static inline void add_kallsyms(struct module *mod, } #endif /* CONFIG_KALLSYMS */ -#ifdef CONFIG_DYNAMIC_PRINTK_DEBUG -static void dynamic_printk_setup(Elf_Shdr *sechdrs, unsigned int verboseindex) +static void dynamic_printk_setup(struct mod_debug *debug, unsigned int num) { - struct mod_debug *debug_info; - unsigned long pos, end; - unsigned int num_verbose; - - pos = sechdrs[verboseindex].sh_addr; - num_verbose = sechdrs[verboseindex].sh_size / - sizeof(struct mod_debug); - end = pos + (num_verbose * sizeof(struct mod_debug)); +#ifdef CONFIG_DYNAMIC_PRINTK_DEBUG + unsigned int i; - for (; pos < end; pos += sizeof(struct mod_debug)) { - debug_info = (struct mod_debug *)pos; - register_dynamic_debug_module(debug_info->modname, - debug_info->type, debug_info->logical_modname, - debug_info->flag_names, debug_info->hash, - debug_info->hash2); + for (i = 0; i < num; i++) { + register_dynamic_debug_module(debug[i].modname, + debug[i].type, + debug[i].logical_modname, + debug[i].flag_names, + debug[i].hash, debug[i].hash2); } -} -#else -static inline void dynamic_printk_setup(Elf_Shdr *sechdrs, - unsigned int verboseindex) -{ -} #endif /* CONFIG_DYNAMIC_PRINTK_DEBUG */ +} static void *module_alloc_update_bounds(unsigned long size) { @@ -1843,37 +1854,14 @@ static noinline struct module *load_module(void __user *umod, unsigned int i; unsigned int symindex = 0; unsigned int strindex = 0; - unsigned int setupindex; - unsigned int exindex; - unsigned int exportindex; - unsigned int modindex; - unsigned int obsparmindex; - unsigned int infoindex; - unsigned int gplindex; - unsigned int crcindex; - unsigned int gplcrcindex; - unsigned int versindex; - unsigned int pcpuindex; - unsigned int gplfutureindex; - unsigned int gplfuturecrcindex; + unsigned int modindex, versindex, infoindex, pcpuindex; unsigned int unwindex = 0; -#ifdef CONFIG_UNUSED_SYMBOLS - unsigned int unusedindex; - unsigned int unusedcrcindex; - unsigned int unusedgplindex; - unsigned int unusedgplcrcindex; -#endif - unsigned int markersindex; - unsigned int markersstringsindex; - unsigned int verboseindex; - unsigned int tracepointsindex; - unsigned int tracepointsstringsindex; - unsigned int mcountindex; + unsigned int num_kp, num_mcount; + struct kernel_param *kp; struct module *mod; long err = 0; void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ - void *mseg; - struct exception_table_entry *extable; + unsigned long *mseg; mm_segment_t old_fs; DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n", @@ -1937,6 +1925,7 @@ static noinline struct module *load_module(void __user *umod, err = -ENOEXEC; goto free_hdr; } + /* This is temporary: point mod into copy of data. */ mod = (void *)sechdrs[modindex].sh_addr; if (symindex == 0) { @@ -1946,22 +1935,6 @@ static noinline struct module *load_module(void __user *umod, goto free_hdr; } - /* Optional sections */ - exportindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab"); - gplindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_gpl"); - gplfutureindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_gpl_future"); - crcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab"); - gplcrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_gpl"); - gplfuturecrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_gpl_future"); -#ifdef CONFIG_UNUSED_SYMBOLS - unusedindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_unused"); - unusedgplindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_unused_gpl"); - unusedcrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_unused"); - unusedgplcrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_unused_gpl"); -#endif - setupindex = find_sec(hdr, sechdrs, secstrings, "__param"); - exindex = find_sec(hdr, sechdrs, secstrings, "__ex_table"); - obsparmindex = find_sec(hdr, sechdrs, secstrings, "__obsparm"); versindex = find_sec(hdr, sechdrs, secstrings, "__versions"); infoindex = find_sec(hdr, sechdrs, secstrings, ".modinfo"); pcpuindex = find_pcpusec(hdr, sechdrs, secstrings); @@ -2117,42 +2090,57 @@ static noinline struct module *load_module(void __user *umod, if (err < 0) goto cleanup; - /* Set up EXPORTed & EXPORT_GPLed symbols (section 0 is 0 length) */ - mod->num_syms = sechdrs[exportindex].sh_size / sizeof(*mod->syms); - mod->syms = (void *)sechdrs[exportindex].sh_addr; - if (crcindex) - mod->crcs = (void *)sechdrs[crcindex].sh_addr; - mod->num_gpl_syms = sechdrs[gplindex].sh_size / sizeof(*mod->gpl_syms); - mod->gpl_syms = (void *)sechdrs[gplindex].sh_addr; - if (gplcrcindex) - mod->gpl_crcs = (void *)sechdrs[gplcrcindex].sh_addr; - mod->num_gpl_future_syms = sechdrs[gplfutureindex].sh_size / - sizeof(*mod->gpl_future_syms); - mod->gpl_future_syms = (void *)sechdrs[gplfutureindex].sh_addr; - if (gplfuturecrcindex) - mod->gpl_future_crcs = (void *)sechdrs[gplfuturecrcindex].sh_addr; + /* Now we've got everything in the final locations, we can + * find optional sections. */ + kp = section_objs(hdr, sechdrs, secstrings, "__param", sizeof(*kp), + &num_kp); + mod->syms = section_objs(hdr, sechdrs, secstrings, "__ksymtab", + sizeof(*mod->syms), &mod->num_syms); + mod->crcs = section_addr(hdr, sechdrs, secstrings, "__kcrctab"); + mod->gpl_syms = section_objs(hdr, sechdrs, secstrings, "__ksymtab_gpl", + sizeof(*mod->gpl_syms), + &mod->num_gpl_syms); + mod->gpl_crcs = section_addr(hdr, sechdrs, secstrings, "__kcrctab_gpl"); + mod->gpl_future_syms = section_objs(hdr, sechdrs, secstrings, + "__ksymtab_gpl_future", + sizeof(*mod->gpl_future_syms), + &mod->num_gpl_future_syms); + mod->gpl_future_crcs = section_addr(hdr, sechdrs, secstrings, + "__kcrctab_gpl_future"); #ifdef CONFIG_UNUSED_SYMBOLS - mod->num_unused_syms = sechdrs[unusedindex].sh_size / - sizeof(*mod->unused_syms); - mod->num_unused_gpl_syms = sechdrs[unusedgplindex].sh_size / - sizeof(*mod->unused_gpl_syms); - mod->unused_syms = (void *)sechdrs[unusedindex].sh_addr; - if (unusedcrcindex) - mod->unused_crcs = (void *)sechdrs[unusedcrcindex].sh_addr; - mod->unused_gpl_syms = (void *)sechdrs[unusedgplindex].sh_addr; - if (unusedgplcrcindex) - mod->unused_gpl_crcs - = (void *)sechdrs[unusedgplcrcindex].sh_addr; + mod->unused_syms = section_objs(hdr, sechdrs, secstrings, + "__ksymtab_unused", + sizeof(*mod->unused_syms), + &mod->num_unused_syms); + mod->unused_crcs = section_addr(hdr, sechdrs, secstrings, + "__kcrctab_unused"); + mod->unused_gpl_syms = section_objs(hdr, sechdrs, secstrings, + "__ksymtab_unused_gpl", + sizeof(*mod->unused_gpl_syms), + &mod->num_unused_gpl_syms); + mod->unused_gpl_crcs = section_addr(hdr, sechdrs, secstrings, + "__kcrctab_unused_gpl"); +#endif + +#ifdef CONFIG_MARKERS + mod->markers = section_objs(hdr, sechdrs, secstrings, "__markers", + sizeof(*mod->markers), &mod->num_markers); +#endif +#ifdef CONFIG_TRACEPOINTS + mod->tracepoints = section_objs(hdr, sechdrs, secstrings, + "__tracepoints", + sizeof(*mod->tracepoints), + &mod->num_tracepoints); #endif #ifdef CONFIG_MODVERSIONS - if ((mod->num_syms && !crcindex) - || (mod->num_gpl_syms && !gplcrcindex) - || (mod->num_gpl_future_syms && !gplfuturecrcindex) + if ((mod->num_syms && !mod->crcs) + || (mod->num_gpl_syms && !mod->gpl_crcs) + || (mod->num_gpl_future_syms && !mod->gpl_future_crcs) #ifdef CONFIG_UNUSED_SYMBOLS - || (mod->num_unused_syms && !unusedcrcindex) - || (mod->num_unused_gpl_syms && !unusedgplcrcindex) + || (mod->num_unused_syms && !mod->unused_crcs) + || (mod->num_unused_gpl_syms && !mod->unused_gpl_crcs) #endif ) { printk(KERN_WARNING "%s: No versions for exported symbols.\n", mod->name); @@ -2161,16 +2149,6 @@ static noinline struct module *load_module(void __user *umod, goto cleanup; } #endif - markersindex = find_sec(hdr, sechdrs, secstrings, "__markers"); - markersstringsindex = find_sec(hdr, sechdrs, secstrings, - "__markers_strings"); - verboseindex = find_sec(hdr, sechdrs, secstrings, "__verbose"); - tracepointsindex = find_sec(hdr, sechdrs, secstrings, "__tracepoints"); - tracepointsstringsindex = find_sec(hdr, sechdrs, secstrings, - "__tracepoints_strings"); - - mcountindex = find_sec(hdr, sechdrs, secstrings, - "__mcount_loc"); /* Now do relocations. */ for (i = 1; i < hdr->e_shnum; i++) { @@ -2193,28 +2171,16 @@ static noinline struct module *load_module(void __user *umod, if (err < 0) goto cleanup; } -#ifdef CONFIG_MARKERS - mod->markers = (void *)sechdrs[markersindex].sh_addr; - mod->num_markers = - sechdrs[markersindex].sh_size / sizeof(*mod->markers); -#endif -#ifdef CONFIG_TRACEPOINTS - mod->tracepoints = (void *)sechdrs[tracepointsindex].sh_addr; - mod->num_tracepoints = - sechdrs[tracepointsindex].sh_size / sizeof(*mod->tracepoints); -#endif - /* Find duplicate symbols */ err = verify_export_symbols(mod); - if (err < 0) goto cleanup; /* Set up and sort exception table */ - mod->num_exentries = sechdrs[exindex].sh_size / sizeof(*mod->extable); - mod->extable = extable = (void *)sechdrs[exindex].sh_addr; - sort_extable(extable, extable + mod->num_exentries); + mod->extable = section_objs(hdr, sechdrs, secstrings, "__ex_table", + sizeof(*mod->extable), &mod->num_exentries); + sort_extable(mod->extable, mod->extable + mod->num_exentries); /* Finally, copy percpu area over. */ percpu_modcopy(mod->percpu, (void *)sechdrs[pcpuindex].sh_addr, @@ -2223,11 +2189,17 @@ static noinline struct module *load_module(void __user *umod, add_kallsyms(mod, sechdrs, symindex, strindex, secstrings); if (!mod->taints) { + struct mod_debug *debug; + unsigned int num_debug; + #ifdef CONFIG_MARKERS marker_update_probe_range(mod->markers, mod->markers + mod->num_markers); #endif - dynamic_printk_setup(sechdrs, verboseindex); + debug = section_objs(hdr, sechdrs, secstrings, "__verbose", + sizeof(*debug), &num_debug); + dynamic_printk_setup(debug, num_debug); + #ifdef CONFIG_TRACEPOINTS tracepoint_update_probe_range(mod->tracepoints, mod->tracepoints + mod->num_tracepoints); @@ -2235,8 +2207,9 @@ static noinline struct module *load_module(void __user *umod, } /* sechdrs[0].sh_size is always zero */ - mseg = (void *)sechdrs[mcountindex].sh_addr; - ftrace_init_module(mseg, mseg + sechdrs[mcountindex].sh_size); + mseg = section_objs(hdr, sechdrs, secstrings, "__mcount_loc", + sizeof(*mseg), &num_mcount); + ftrace_init_module(mseg, mseg + num_mcount); err = module_finalize(hdr, sechdrs, mod); if (err < 0) @@ -2261,7 +2234,7 @@ static noinline struct module *load_module(void __user *umod, set_fs(old_fs); mod->args = args; - if (obsparmindex) + if (section_addr(hdr, sechdrs, secstrings, "__obsparm")) printk(KERN_WARNING "%s: Ignoring obsolete parameters\n", mod->name); @@ -2270,21 +2243,11 @@ static noinline struct module *load_module(void __user *umod, * strong_try_module_get() will fail. */ stop_machine(__link_module, mod, NULL); - /* Size of section 0 is 0, so this works well if no params */ - err = parse_args(mod->name, mod->args, - (struct kernel_param *) - sechdrs[setupindex].sh_addr, - sechdrs[setupindex].sh_size - / sizeof(struct kernel_param), - NULL); + err = parse_args(mod->name, mod->args, kp, num_kp, NULL); if (err < 0) goto unlink; - err = mod_sysfs_setup(mod, - (struct kernel_param *) - sechdrs[setupindex].sh_addr, - sechdrs[setupindex].sh_size - / sizeof(struct kernel_param)); + err = mod_sysfs_setup(mod, kp, num_kp); if (err < 0) goto unlink; add_sect_attrs(mod, hdr->e_shnum, secstrings, sechdrs); -- cgit v1.2.3-70-g09d2 From 730b69d225259565c705f5f5a11cb1aba69568f1 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 22 Oct 2008 10:00:22 -0500 Subject: module: check kernel param length at compile time, not runtime The kparam code tries to handle over-length parameter prefixes at runtime. Not only would I bet this has never been tested, it's not clear that truncating names is a good idea either. So let's check at compile time. We need to move the #define to moduleparam.h to do this, though. Signed-off-by: Rusty Russell --- include/linux/module.h | 2 +- include/linux/moduleparam.h | 6 +++++- kernel/params.c | 7 ++----- 3 files changed, 8 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index eddf27db442..196b499270d 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -29,7 +29,7 @@ #define MODULE_SYMBOL_PREFIX "" #endif -#define MODULE_NAME_LEN (64 - sizeof(unsigned long)) +#define MODULE_NAME_LEN MAX_PARAM_PREFIX_LEN struct kernel_symbol { diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index ec624381c84..1eefe6d61b8 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -13,6 +13,9 @@ #define MODULE_PARAM_PREFIX KBUILD_MODNAME "." #endif +/* Chosen so that structs with an unsigned long line up. */ +#define MAX_PARAM_PREFIX_LEN (64 - sizeof(unsigned long)) + #ifdef MODULE #define ___module_cat(a,b) __mod_ ## a ## b #define __module_cat(a,b) ___module_cat(a,b) @@ -79,7 +82,8 @@ struct kparam_array #define __module_param_call(prefix, name, set, get, arg, perm) \ /* Default value instead of permissions? */ \ static int __param_perm_check_##name __attribute__((unused)) = \ - BUILD_BUG_ON_ZERO((perm) < 0 || (perm) > 0777 || ((perm) & 2)); \ + BUILD_BUG_ON_ZERO((perm) < 0 || (perm) > 0777 || ((perm) & 2)) \ + + BUILD_BUG_ON_ZERO(sizeof(""prefix) > MAX_PARAM_PREFIX_LEN); \ static const char __param_str_##name[] = prefix #name; \ static struct kernel_param __moduleparam_const __param_##name \ __used \ diff --git a/kernel/params.c b/kernel/params.c index afc46a23eb6..aca07e1a050 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -585,17 +585,14 @@ static void __init param_sysfs_builtin(void) { struct kernel_param *kp, *kp_begin = NULL; unsigned int i, name_len, count = 0; - char modname[MODULE_NAME_LEN + 1] = ""; + char modname[MODULE_NAME_LEN] = ""; for (i=0; i < __stop___param - __start___param; i++) { char *dot; - size_t max_name_len; kp = &__start___param[i]; - max_name_len = - min_t(size_t, MODULE_NAME_LEN, strlen(kp->name)); - dot = memchr(kp->name, '.', max_name_len); + dot = strchr(kp->name, '.'); if (!dot) { DEBUGP("couldn't find period in first %d characters " "of %s\n", MODULE_NAME_LEN, kp->name); -- cgit v1.2.3-70-g09d2 From 9b473de87209fa86eb421b23386693b461612f30 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 22 Oct 2008 10:00:22 -0500 Subject: param: Fix duplicate module prefixes Instead of insisting each new module_param sysfs entry is unique, handle the case where it already exists (for builtin modules). The current code assumes that all identical prefixes are together in the section: true for normal uses, but not necessarily so if someone overrides MODULE_PARAM_PREFIX. More importantly, it's not true with the new "core_param()" code which uses "kernel" as a prefix. This simplifies the caller for the builtin case, at a slight loss of efficiency (we do the lookup every time to see if the directory exists). Signed-off-by: Rusty Russell Cc: Greg Kroah-Hartman --- include/linux/module.h | 2 +- kernel/params.c | 261 ++++++++++++++++++++++++++----------------------- 2 files changed, 142 insertions(+), 121 deletions(-) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index 196b499270d..3bfed013350 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -60,6 +60,7 @@ struct module_kobject struct kobject kobj; struct module *mod; struct kobject *drivers_dir; + struct module_param_attrs *mp; }; /* These are either module local, or the kernel's dummy ones. */ @@ -242,7 +243,6 @@ struct module /* Sysfs stuff. */ struct module_kobject mkobj; - struct module_param_attrs *param_attrs; struct module_attribute *modinfo_attrs; const char *version; const char *srcversion; diff --git a/kernel/params.c b/kernel/params.c index aca07e1a050..f27c992a462 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -373,6 +373,8 @@ int param_get_string(char *buffer, struct kernel_param *kp) } /* sysfs output in /sys/modules/XYZ/parameters/ */ +#define to_module_attr(n) container_of(n, struct module_attribute, attr); +#define to_module_kobject(n) container_of(n, struct module_kobject, kobj); extern struct kernel_param __start___param[], __stop___param[]; @@ -384,6 +386,7 @@ struct param_attribute struct module_param_attrs { + unsigned int num; struct attribute_group grp; struct param_attribute attrs[0]; }; @@ -434,69 +437,84 @@ static ssize_t param_attr_store(struct module_attribute *mattr, #ifdef CONFIG_SYSFS /* - * param_sysfs_setup - setup sysfs support for one module or KBUILD_MODNAME - * @mk: struct module_kobject (contains parent kobject) - * @kparam: array of struct kernel_param, the actual parameter definitions - * @num_params: number of entries in array - * @name_skip: offset where the parameter name start in kparam[].name. Needed for built-in "modules" + * add_sysfs_param - add a parameter to sysfs + * @mk: struct module_kobject + * @kparam: the actual parameter definition to add to sysfs + * @name: name of parameter * - * Create a kobject for a (per-module) group of parameters, and create files - * in sysfs. A pointer to the param_kobject is returned on success, - * NULL if there's no parameter to export, or other ERR_PTR(err). + * Create a kobject if for a (per-module) parameter if mp NULL, and + * create file in sysfs. Returns an error on out of memory. Always cleans up + * if there's an error. */ -static __modinit struct module_param_attrs * -param_sysfs_setup(struct module_kobject *mk, - struct kernel_param *kparam, - unsigned int num_params, - unsigned int name_skip) +static __modinit int add_sysfs_param(struct module_kobject *mk, + struct kernel_param *kp, + const char *name) { - struct module_param_attrs *mp; - unsigned int valid_attrs = 0; - unsigned int i, size[2]; - struct param_attribute *pattr; - struct attribute **gattr; - int err; - - for (i=0; iperm); + + if (!mk->mp) { + num = 0; + attrs = NULL; + } else { + num = mk->mp->num; + attrs = mk->mp->grp.attrs; } - if (!valid_attrs) - return NULL; - - size[0] = ALIGN(sizeof(*mp) + - valid_attrs * sizeof(mp->attrs[0]), - sizeof(mp->grp.attrs[0])); - size[1] = (valid_attrs + 1) * sizeof(mp->grp.attrs[0]); - - mp = kzalloc(size[0] + size[1], GFP_KERNEL); - if (!mp) - return ERR_PTR(-ENOMEM); + /* Enlarge. */ + new = krealloc(mk->mp, + sizeof(*mk->mp) + sizeof(mk->mp->attrs[0]) * (num+1), + GFP_KERNEL); + if (!new) { + kfree(mk->mp); + err = -ENOMEM; + goto fail; + } + attrs = krealloc(attrs, sizeof(new->grp.attrs[0])*(num+2), GFP_KERNEL); + if (!attrs) { + err = -ENOMEM; + goto fail_free_new; + } - mp->grp.name = "parameters"; - mp->grp.attrs = (void *)mp + size[0]; + /* Sysfs wants everything zeroed. */ + memset(new, 0, sizeof(*new)); + memset(&new->attrs[num], 0, sizeof(new->attrs[num])); + memset(&attrs[num], 0, sizeof(attrs[num])); + new->grp.name = "parameters"; + new->grp.attrs = attrs; + + /* Tack new one on the end. */ + new->attrs[num].param = kp; + new->attrs[num].mattr.show = param_attr_show; + new->attrs[num].mattr.store = param_attr_store; + new->attrs[num].mattr.attr.name = (char *)name; + new->attrs[num].mattr.attr.mode = kp->perm; + new->num = num+1; + + /* Fix up all the pointers, since krealloc can move us */ + for (num = 0; num < new->num; num++) + new->grp.attrs[num] = &new->attrs[num].mattr.attr; + new->grp.attrs[num] = NULL; + + mk->mp = new; + return 0; - pattr = &mp->attrs[0]; - gattr = &mp->grp.attrs[0]; - for (i = 0; i < num_params; i++) { - struct kernel_param *kp = &kparam[i]; - if (kp->perm) { - pattr->param = kp; - pattr->mattr.show = param_attr_show; - pattr->mattr.store = param_attr_store; - pattr->mattr.attr.name = (char *)&kp->name[name_skip]; - pattr->mattr.attr.mode = kp->perm; - *(gattr++) = &(pattr++)->mattr.attr; - } - } - *gattr = NULL; +fail_free_new: + kfree(new); +fail: + mk->mp = NULL; + return err; +} - if ((err = sysfs_create_group(&mk->kobj, &mp->grp))) { - kfree(mp); - return ERR_PTR(err); - } - return mp; +static void free_module_param_attrs(struct module_kobject *mk) +{ + kfree(mk->mp->grp.attrs); + kfree(mk->mp); + mk->mp = NULL; } #ifdef CONFIG_MODULES @@ -506,21 +524,33 @@ param_sysfs_setup(struct module_kobject *mk, * @kparam: module parameters (array) * @num_params: number of module parameters * - * Adds sysfs entries for module parameters, and creates a link from - * /sys/module/[mod->name]/parameters to /sys/parameters/[mod->name]/ + * Adds sysfs entries for module parameters under + * /sys/module/[mod->name]/parameters/ */ int module_param_sysfs_setup(struct module *mod, struct kernel_param *kparam, unsigned int num_params) { - struct module_param_attrs *mp; + int i, err; + bool params = false; + + for (i = 0; i < num_params; i++) { + if (kparam[i].perm == 0) + continue; + err = add_sysfs_param(&mod->mkobj, &kparam[i], kparam[i].name); + if (err) + return err; + params = true; + } - mp = param_sysfs_setup(&mod->mkobj, kparam, num_params, 0); - if (IS_ERR(mp)) - return PTR_ERR(mp); + if (!params) + return 0; - mod->param_attrs = mp; - return 0; + /* Create the param group. */ + err = sysfs_create_group(&mod->mkobj.kobj, &mod->mkobj.mp->grp); + if (err) + free_module_param_attrs(&mod->mkobj); + return err; } /* @@ -532,43 +562,55 @@ int module_param_sysfs_setup(struct module *mod, */ void module_param_sysfs_remove(struct module *mod) { - if (mod->param_attrs) { - sysfs_remove_group(&mod->mkobj.kobj, - &mod->param_attrs->grp); + if (mod->mkobj.mp) { + sysfs_remove_group(&mod->mkobj.kobj, &mod->mkobj.mp->grp); /* We are positive that no one is using any param * attrs at this point. Deallocate immediately. */ - kfree(mod->param_attrs); - mod->param_attrs = NULL; + free_module_param_attrs(&mod->mkobj); } } #endif -/* - * kernel_param_sysfs_setup - wrapper for built-in params support - */ -static void __init kernel_param_sysfs_setup(const char *name, - struct kernel_param *kparam, - unsigned int num_params, - unsigned int name_skip) +static void __init kernel_add_sysfs_param(const char *name, + struct kernel_param *kparam, + unsigned int name_skip) { struct module_kobject *mk; - int ret; + struct kobject *kobj; + int err; - mk = kzalloc(sizeof(struct module_kobject), GFP_KERNEL); - BUG_ON(!mk); - - mk->mod = THIS_MODULE; - mk->kobj.kset = module_kset; - ret = kobject_init_and_add(&mk->kobj, &module_ktype, NULL, "%s", name); - if (ret) { - kobject_put(&mk->kobj); - printk(KERN_ERR "Module '%s' failed to be added to sysfs, " - "error number %d\n", name, ret); - printk(KERN_ERR "The system will be unstable now.\n"); - return; + kobj = kset_find_obj(module_kset, name); + if (kobj) { + /* We already have one. Remove params so we can add more. */ + mk = to_module_kobject(kobj); + /* We need to remove it before adding parameters. */ + sysfs_remove_group(&mk->kobj, &mk->mp->grp); + } else { + mk = kzalloc(sizeof(struct module_kobject), GFP_KERNEL); + BUG_ON(!mk); + + mk->mod = THIS_MODULE; + mk->kobj.kset = module_kset; + err = kobject_init_and_add(&mk->kobj, &module_ktype, NULL, + "%s", name); + if (err) { + kobject_put(&mk->kobj); + printk(KERN_ERR "Module '%s' failed add to sysfs, " + "error number %d\n", name, err); + printk(KERN_ERR "The system will be unstable now.\n"); + return; + } + /* So that exit path is even. */ + kobject_get(&mk->kobj); } - param_sysfs_setup(mk, kparam, num_params, name_skip); + + /* These should not fail at boot. */ + err = add_sysfs_param(mk, kparam, kparam->name + name_skip); + BUG_ON(err); + err = sysfs_create_group(&mk->kobj, &mk->mp->grp); + BUG_ON(err); kobject_uevent(&mk->kobj, KOBJ_ADD); + kobject_put(&mk->kobj); } /* @@ -579,18 +621,19 @@ static void __init kernel_param_sysfs_setup(const char *name, * The "module" name (KBUILD_MODNAME) is stored before a dot, the * "parameter" name is stored behind a dot in kernel_param->name. So, * extract the "module" name for all built-in kernel_param-eters, - * and for all who have the same, call kernel_param_sysfs_setup. + * and for all who have the same, call kernel_add_sysfs_param. */ static void __init param_sysfs_builtin(void) { - struct kernel_param *kp, *kp_begin = NULL; - unsigned int i, name_len, count = 0; - char modname[MODULE_NAME_LEN] = ""; + struct kernel_param *kp; + unsigned int name_len; + char modname[MODULE_NAME_LEN]; - for (i=0; i < __stop___param - __start___param; i++) { + for (kp = __start___param; kp < __stop___param; kp++) { char *dot; - kp = &__start___param[i]; + if (kp->perm == 0) + continue; dot = strchr(kp->name, '.'); if (!dot) { @@ -599,37 +642,15 @@ static void __init param_sysfs_builtin(void) continue; } name_len = dot - kp->name; - - /* new kbuild_modname? */ - if (strlen(modname) != name_len - || strncmp(modname, kp->name, name_len) != 0) { - /* add a new kobject for previous kernel_params. */ - if (count) - kernel_param_sysfs_setup(modname, - kp_begin, - count, - strlen(modname)+1); - - strncpy(modname, kp->name, name_len); - modname[name_len] = '\0'; - count = 0; - kp_begin = kp; - } - count++; + strncpy(modname, kp->name, name_len); + modname[name_len] = '\0'; + kernel_add_sysfs_param(modname, kp, name_len+1); } - - /* last kernel_params need to be registered as well */ - if (count) - kernel_param_sysfs_setup(modname, kp_begin, count, - strlen(modname)+1); } /* module-related sysfs stuff */ -#define to_module_attr(n) container_of(n, struct module_attribute, attr); -#define to_module_kobject(n) container_of(n, struct module_kobject, kobj); - static ssize_t module_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) -- cgit v1.2.3-70-g09d2 From 67e67ceaac5bf55dbdceb704ff2d763d438b5373 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 22 Oct 2008 10:00:23 -0500 Subject: core_param() for genuinely core kernel parameters There are a lot of one-liner uses of __setup() in the kernel: they're cumbersome and not queryable (definitely not settable) via /sys. Yet it's ugly to simplify them to module_param(), because by default that inserts a prefix of the module name (usually filename). So, introduce a "core_param". The parameter gets no prefix, but appears in /sys/module/kernel/parameters/ (if non-zero perms arg). I thought about using the name "core", but that's more common than "kernel". And if you create a module called "kernel", you will die a horrible death. Signed-off-by: Rusty Russell --- include/linux/moduleparam.h | 19 +++++++++++++++++++ kernel/params.c | 14 +++++++------- 2 files changed, 26 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 1eefe6d61b8..e4af3399ef4 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -104,6 +104,25 @@ struct kparam_array #define module_param(name, type, perm) \ module_param_named(name, name, type, perm) +#ifndef MODULE +/** + * core_param - define a historical core kernel parameter. + * @name: the name of the cmdline and sysfs parameter (often the same as var) + * @var: the variable + * @type: the type (for param_set_##type and param_get_##type) + * @perm: visibility in sysfs + * + * core_param is just like module_param(), but cannot be modular and + * doesn't add a prefix (such as "printk."). This is for compatibility + * with __setup(), and it makes sense as truly core parameters aren't + * tied to the particular file they're in. + */ +#define core_param(name, var, type, perm) \ + param_check_##type(name, &(var)); \ + __module_param_call("", name, param_set_##type, param_get_##type, \ + &var, perm) +#endif /* !MODULE */ + /* Actually copy string: maxlen param is usually sizeof(string). */ #define module_param_string(name, string, len, perm) \ static const struct kparam_string __param_string_##name \ diff --git a/kernel/params.c b/kernel/params.c index f27c992a462..b077f1b045d 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -637,14 +637,14 @@ static void __init param_sysfs_builtin(void) dot = strchr(kp->name, '.'); if (!dot) { - DEBUGP("couldn't find period in first %d characters " - "of %s\n", MODULE_NAME_LEN, kp->name); - continue; + /* This happens for core_param() */ + strcpy(modname, "kernel"); + name_len = 0; + } else { + name_len = dot - kp->name + 1; + strlcpy(modname, kp->name, name_len); } - name_len = dot - kp->name; - strncpy(modname, kp->name, name_len); - modname[name_len] = '\0'; - kernel_add_sysfs_param(modname, kp, name_len+1); + kernel_add_sysfs_param(modname, kp, name_len); } } -- cgit v1.2.3-70-g09d2 From 0d557dc97f4bb501f086a03d0f00b99a7855d794 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 13 Oct 2008 23:50:09 +0200 Subject: workqueue: introduce create_rt_workqueue create_rt_workqueue will create a real time prioritized workqueue. This is needed for the conversion of stop_machine to a workqueue based implementation. This patch adds yet another parameter to __create_workqueue_key to tell it that we want an rt workqueue. However it looks like we rather should have something like "int type" instead of singlethread, freezable and rt. Signed-off-by: Heiko Carstens Signed-off-by: Rusty Russell Cc: Ingo Molnar --- include/linux/workqueue.h | 18 ++++++++++-------- kernel/workqueue.c | 7 ++++++- 2 files changed, 16 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 5c158c477ac..89a5a1231ff 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -149,11 +149,11 @@ struct execute_work { extern struct workqueue_struct * __create_workqueue_key(const char *name, int singlethread, - int freezeable, struct lock_class_key *key, + int freezeable, int rt, struct lock_class_key *key, const char *lock_name); #ifdef CONFIG_LOCKDEP -#define __create_workqueue(name, singlethread, freezeable) \ +#define __create_workqueue(name, singlethread, freezeable, rt) \ ({ \ static struct lock_class_key __key; \ const char *__lock_name; \ @@ -164,17 +164,19 @@ __create_workqueue_key(const char *name, int singlethread, __lock_name = #name; \ \ __create_workqueue_key((name), (singlethread), \ - (freezeable), &__key, \ + (freezeable), (rt), &__key, \ __lock_name); \ }) #else -#define __create_workqueue(name, singlethread, freezeable) \ - __create_workqueue_key((name), (singlethread), (freezeable), NULL, NULL) +#define __create_workqueue(name, singlethread, freezeable, rt) \ + __create_workqueue_key((name), (singlethread), (freezeable), (rt), \ + NULL, NULL) #endif -#define create_workqueue(name) __create_workqueue((name), 0, 0) -#define create_freezeable_workqueue(name) __create_workqueue((name), 1, 1) -#define create_singlethread_workqueue(name) __create_workqueue((name), 1, 0) +#define create_workqueue(name) __create_workqueue((name), 0, 0, 0) +#define create_rt_workqueue(name) __create_workqueue((name), 0, 0, 1) +#define create_freezeable_workqueue(name) __create_workqueue((name), 1, 1, 0) +#define create_singlethread_workqueue(name) __create_workqueue((name), 1, 0, 0) extern void destroy_workqueue(struct workqueue_struct *wq); diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 714afad4653..f928f2a87b9 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -62,6 +62,7 @@ struct workqueue_struct { const char *name; int singlethread; int freezeable; /* Freeze threads during suspend */ + int rt; #ifdef CONFIG_LOCKDEP struct lockdep_map lockdep_map; #endif @@ -766,6 +767,7 @@ init_cpu_workqueue(struct workqueue_struct *wq, int cpu) static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu) { + struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; struct workqueue_struct *wq = cwq->wq; const char *fmt = is_single_threaded(wq) ? "%s" : "%s/%d"; struct task_struct *p; @@ -781,7 +783,8 @@ static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu) */ if (IS_ERR(p)) return PTR_ERR(p); - + if (cwq->wq->rt) + sched_setscheduler_nocheck(p, SCHED_FIFO, ¶m); cwq->thread = p; return 0; @@ -801,6 +804,7 @@ static void start_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu) struct workqueue_struct *__create_workqueue_key(const char *name, int singlethread, int freezeable, + int rt, struct lock_class_key *key, const char *lock_name) { @@ -822,6 +826,7 @@ struct workqueue_struct *__create_workqueue_key(const char *name, lockdep_init_map(&wq->lockdep_map, lock_name, key, 0); wq->singlethread = singlethread; wq->freezeable = freezeable; + wq->rt = rt; INIT_LIST_HEAD(&wq->list); if (singlethread) { -- cgit v1.2.3-70-g09d2 From a30d46c042c8a17ef25de02f439fbd120ab8a8de Mon Sep 17 00:00:00 2001 From: David Brownell Date: Mon, 20 Oct 2008 23:46:28 +0200 Subject: mfd: twl4030 IRQ handling update - Move it into a separate file; clean and streamline it - Restructure the init code for reuse during secondary dispatch - Support both levels (primary, secondary) of IRQ dispatch - Use a workqueue for irq mask/unmask and trigger configuration Code for two subchips currently share that secondary handler code. One is the power subchip; its IRQs are now handled by this core, courtesy of this patch. The other is the GPIO module, which will be supported through a later patch. There are also minor changes to the header file, mostly related to GPIO support; nothing yet in mainline cares about those. A few references to OMAP-specific symbols are disabled; when they can all be removed, the TWL4030 support ceases being OMAP-specific. Signed-off-by: David Brownell Signed-off-by: Tony Lindgren Signed-off-by: Samuel Ortiz --- drivers/mfd/Makefile | 2 +- drivers/mfd/twl4030-core.c | 421 +------------------------ drivers/mfd/twl4030-irq.c | 743 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/i2c/twl4030.h | 28 +- 4 files changed, 777 insertions(+), 417 deletions(-) create mode 100644 drivers/mfd/twl4030-irq.c (limited to 'include/linux') diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile index 68e237b830a..0acefe8aff8 100644 --- a/drivers/mfd/Makefile +++ b/drivers/mfd/Makefile @@ -17,7 +17,7 @@ wm8350-objs := wm8350-core.o wm8350-regmap.o wm8350-gpio.o obj-$(CONFIG_MFD_WM8350) += wm8350.o obj-$(CONFIG_MFD_WM8350_I2C) += wm8350-i2c.o -obj-$(CONFIG_TWL4030_CORE) += twl4030-core.o +obj-$(CONFIG_TWL4030_CORE) += twl4030-core.o twl4030-irq.o obj-$(CONFIG_MFD_CORE) += mfd-core.o diff --git a/drivers/mfd/twl4030-core.c b/drivers/mfd/twl4030-core.c index fd9a0160202..dd843c4fbcc 100644 --- a/drivers/mfd/twl4030-core.c +++ b/drivers/mfd/twl4030-core.c @@ -27,15 +27,11 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#include #include #include -#include -#include -#include -#include #include #include +#include #include #include @@ -93,26 +89,6 @@ #define twl_has_usb() false #endif -static inline void activate_irq(int irq) -{ -#ifdef CONFIG_ARM - /* ARM requires an extra step to clear IRQ_NOREQUEST, which it - * sets on behalf of every irq_chip. Also sets IRQ_NOPROBE. - */ - set_irq_flags(irq, IRQF_VALID); -#else - /* same effect on other architectures */ - set_irq_noprobe(irq); -#endif -} - -/* Primary Interrupt Handler on TWL4030 Registers */ - -/* Register Definitions */ - -#define REG_PIH_ISR_P1 (0x1) -#define REG_PIH_ISR_P2 (0x2) -#define REG_PIH_SIR (0x3) /* Triton Core internal information (BEGIN) */ @@ -175,138 +151,6 @@ static inline void activate_irq(int irq) /*----------------------------------------------------------------------*/ -/** - * struct twl4030_mod_iregs - TWL module IMR/ISR regs to mask/clear at init - * @mod_no: TWL4030 module number (e.g., TWL4030_MODULE_GPIO) - * @sih_ctrl: address of module SIH_CTRL register - * @reg_cnt: number of IMR/ISR regs - * @imrs: pointer to array of TWL module interrupt mask register indices - * @isrs: pointer to array of TWL module interrupt status register indices - * - * Ties together TWL4030 modules and lists of IMR/ISR registers to mask/clear - * during twl_init_irq(). - */ -struct twl4030_mod_iregs { - const u8 mod_no; - const u8 sih_ctrl; - const u8 reg_cnt; - const u8 *imrs; - const u8 *isrs; -}; - -/* TWL4030 INT module interrupt mask registers */ -static const u8 __initconst twl4030_int_imr_regs[] = { - TWL4030_INT_PWR_IMR1, - TWL4030_INT_PWR_IMR2, -}; - -/* TWL4030 INT module interrupt status registers */ -static const u8 __initconst twl4030_int_isr_regs[] = { - TWL4030_INT_PWR_ISR1, - TWL4030_INT_PWR_ISR2, -}; - -/* TWL4030 INTERRUPTS module interrupt mask registers */ -static const u8 __initconst twl4030_interrupts_imr_regs[] = { - TWL4030_INTERRUPTS_BCIIMR1A, - TWL4030_INTERRUPTS_BCIIMR1B, - TWL4030_INTERRUPTS_BCIIMR2A, - TWL4030_INTERRUPTS_BCIIMR2B, -}; - -/* TWL4030 INTERRUPTS module interrupt status registers */ -static const u8 __initconst twl4030_interrupts_isr_regs[] = { - TWL4030_INTERRUPTS_BCIISR1A, - TWL4030_INTERRUPTS_BCIISR1B, - TWL4030_INTERRUPTS_BCIISR2A, - TWL4030_INTERRUPTS_BCIISR2B, -}; - -/* TWL4030 MADC module interrupt mask registers */ -static const u8 __initconst twl4030_madc_imr_regs[] = { - TWL4030_MADC_IMR1, - TWL4030_MADC_IMR2, -}; - -/* TWL4030 MADC module interrupt status registers */ -static const u8 __initconst twl4030_madc_isr_regs[] = { - TWL4030_MADC_ISR1, - TWL4030_MADC_ISR2, -}; - -/* TWL4030 keypad module interrupt mask registers */ -static const u8 __initconst twl4030_keypad_imr_regs[] = { - TWL4030_KEYPAD_KEYP_IMR1, - TWL4030_KEYPAD_KEYP_IMR2, -}; - -/* TWL4030 keypad module interrupt status registers */ -static const u8 __initconst twl4030_keypad_isr_regs[] = { - TWL4030_KEYPAD_KEYP_ISR1, - TWL4030_KEYPAD_KEYP_ISR2, -}; - -/* TWL4030 GPIO module interrupt mask registers */ -static const u8 __initconst twl4030_gpio_imr_regs[] = { - REG_GPIO_IMR1A, - REG_GPIO_IMR1B, - REG_GPIO_IMR2A, - REG_GPIO_IMR2B, - REG_GPIO_IMR3A, - REG_GPIO_IMR3B, -}; - -/* TWL4030 GPIO module interrupt status registers */ -static const u8 __initconst twl4030_gpio_isr_regs[] = { - REG_GPIO_ISR1A, - REG_GPIO_ISR1B, - REG_GPIO_ISR2A, - REG_GPIO_ISR2B, - REG_GPIO_ISR3A, - REG_GPIO_ISR3B, -}; - -/* TWL4030 modules that have IMR/ISR registers that must be masked/cleared */ -static const struct twl4030_mod_iregs __initconst twl4030_mod_regs[] = { - { - .mod_no = TWL4030_MODULE_INT, - .sih_ctrl = TWL4030_INT_PWR_SIH_CTRL, - .reg_cnt = ARRAY_SIZE(twl4030_int_imr_regs), - .imrs = twl4030_int_imr_regs, - .isrs = twl4030_int_isr_regs, - }, - { - .mod_no = TWL4030_MODULE_INTERRUPTS, - .sih_ctrl = TWL4030_INTERRUPTS_BCISIHCTRL, - .reg_cnt = ARRAY_SIZE(twl4030_interrupts_imr_regs), - .imrs = twl4030_interrupts_imr_regs, - .isrs = twl4030_interrupts_isr_regs, - }, - { - .mod_no = TWL4030_MODULE_MADC, - .sih_ctrl = TWL4030_MADC_SIH_CTRL, - .reg_cnt = ARRAY_SIZE(twl4030_madc_imr_regs), - .imrs = twl4030_madc_imr_regs, - .isrs = twl4030_madc_isr_regs, - }, - { - .mod_no = TWL4030_MODULE_KEYPAD, - .sih_ctrl = TWL4030_KEYPAD_KEYP_SIH_CTRL, - .reg_cnt = ARRAY_SIZE(twl4030_keypad_imr_regs), - .imrs = twl4030_keypad_imr_regs, - .isrs = twl4030_keypad_isr_regs, - }, - { - .mod_no = TWL4030_MODULE_GPIO, - .sih_ctrl = REG_GPIO_SIH_CTRL, - .reg_cnt = ARRAY_SIZE(twl4030_gpio_imr_regs), - .imrs = twl4030_gpio_imr_regs, - .isrs = twl4030_gpio_isr_regs, - }, -}; - -/*----------------------------------------------------------------*/ - /* is driver active, bound to a chip? */ static bool inuse; @@ -367,33 +211,6 @@ static struct twl4030mapping twl4030_map[TWL4030_MODULE_LAST + 1] = { /*----------------------------------------------------------------------*/ -/* - * TWL4030 doesn't have PIH mask, hence dummy function for mask - * and unmask of the (eight) interrupts reported at that level ... - * masking is only available from SIH (secondary) modules. - */ - -static void twl4030_i2c_ackirq(unsigned int irq) -{ -} - -static void twl4030_i2c_disableint(unsigned int irq) -{ -} - -static void twl4030_i2c_enableint(unsigned int irq) -{ -} - -static struct irq_chip twl4030_irq_chip = { - .name = "twl4030", - .ack = twl4030_i2c_ackirq, - .mask = twl4030_i2c_disableint, - .unmask = twl4030_i2c_enableint, -}; - -/*----------------------------------------------------------------------*/ - /* Exported Functions */ /** @@ -535,108 +352,11 @@ EXPORT_SYMBOL(twl4030_i2c_read_u8); /*----------------------------------------------------------------------*/ -static unsigned twl4030_irq_base; - -static struct completion irq_event; - -/* - * This thread processes interrupts reported by the Primary Interrupt Handler. - */ -static int twl4030_irq_thread(void *data) -{ - long irq = (long)data; - irq_desc_t *desc = irq_desc + irq; - static unsigned i2c_errors; - const static unsigned max_i2c_errors = 100; - - current->flags |= PF_NOFREEZE; - - while (!kthread_should_stop()) { - int ret; - int module_irq; - u8 pih_isr; - - /* Wait for IRQ, then read PIH irq status (also blocking) */ - wait_for_completion_interruptible(&irq_event); - - ret = twl4030_i2c_read_u8(TWL4030_MODULE_PIH, &pih_isr, - REG_PIH_ISR_P1); - if (ret) { - pr_warning("%s: I2C error %d reading PIH ISR\n", - DRIVER_NAME, ret); - if (++i2c_errors >= max_i2c_errors) { - printk(KERN_ERR "Maximum I2C error count" - " exceeded. Terminating %s.\n", - __func__); - break; - } - complete(&irq_event); - continue; - } - - /* these handlers deal with the relevant SIH irq status */ - local_irq_disable(); - for (module_irq = twl4030_irq_base; - pih_isr; - pih_isr >>= 1, module_irq++) { - if (pih_isr & 0x1) { - irq_desc_t *d = irq_desc + module_irq; - - d->handle_irq(module_irq, d); - } - } - local_irq_enable(); - - desc->chip->unmask(irq); - } - - return 0; -} - /* - * do_twl4030_irq() is the desc->handle method for the twl4030 interrupt. - * This is a chained interrupt, so there is no desc->action method for it. - * Now we need to query the interrupt controller in the twl4030 to determine - * which module is generating the interrupt request. However, we can't do i2c - * transactions in interrupt context, so we must defer that work to a kernel - * thread. All we do here is acknowledge and mask the interrupt and wakeup - * the kernel thread. + * NOTE: We know the first 8 IRQs after pdata->base_irq are + * for the PIH, and the next are for the PWR_INT SIH, since + * that's how twl_init_irq() sets things up. */ -static void do_twl4030_irq(unsigned int irq, irq_desc_t *desc) -{ - const unsigned int cpu = smp_processor_id(); - - /* - * Earlier this was desc->triggered = 1; - */ - desc->status |= IRQ_LEVEL; - - /* - * Acknowledge, clear _AND_ disable the interrupt. - */ - desc->chip->ack(irq); - - if (!desc->depth) { - kstat_cpu(cpu).irqs[irq]++; - - complete(&irq_event); - } -} - -static struct task_struct * __init start_twl4030_irq_thread(long irq) -{ - struct task_struct *thread; - - init_completion(&irq_event); - thread = kthread_run(twl4030_irq_thread, (void *)irq, "twl4030-irq"); - if (!thread) - pr_err("%s: could not create twl4030 irq %ld thread!\n", - DRIVER_NAME, irq); - - return thread; -} - -/*----------------------------------------------------------------------*/ static int add_children(struct twl4030_platform_data *pdata) { @@ -668,7 +388,7 @@ static int add_children(struct twl4030_platform_data *pdata) if (status == 0) { struct resource r = { - .start = TWL4030_PWRIRQ_CHG_PRES, + .start = pdata->irq_base + 8 + 1, .flags = IORESOURCE_IRQ, }; @@ -817,8 +537,7 @@ static int add_children(struct twl4030_platform_data *pdata) /* RTC module IRQ */ if (status == 0) { struct resource r = { - /* REVISIT don't hard-wire this stuff */ - .start = TWL4030_PWRIRQ_RTC, + .start = pdata->irq_base + 8 + 3, .flags = IORESOURCE_IRQ, }; @@ -863,7 +582,7 @@ static int add_children(struct twl4030_platform_data *pdata) if (status == 0) { struct resource r = { - .start = TWL4030_PWRIRQ_USB_PRES, + .start = pdata->irq_base + 8 + 2, .flags = IORESOURCE_IRQ, }; @@ -965,123 +684,17 @@ static void __init clocks_init(void) /*----------------------------------------------------------------------*/ -/** - * twl4030_i2c_clear_isr - clear TWL4030 SIH ISR regs via read + write - * @mod_no: TWL4030 module number - * @reg: register index to clear - * @cor: value of the _SIH_CTRL.COR bit (1 or 0) - * - * Either reads (cor == 1) or writes (cor == 0) to a TWL4030 interrupt - * status register to ensure that any prior interrupts are cleared. - * Returns the status from the I2C read operation. - */ -static int __init twl4030_i2c_clear_isr(u8 mod_no, u8 reg, u8 cor) -{ - u8 tmp; - - return (cor) ? twl4030_i2c_read_u8(mod_no, &tmp, reg) : - twl4030_i2c_write_u8(mod_no, 0xff, reg); -} - -/** - * twl4030_read_cor_bit - are TWL module ISRs cleared by reads or writes? - * @mod_no: TWL4030 module number - * @reg: register index to clear - * - * Returns 1 if the TWL4030 SIH interrupt status registers (ISRs) for - * the specified TWL module are cleared by reads, or 0 if cleared by - * writes. - */ -static int twl4030_read_cor_bit(u8 mod_no, u8 reg) -{ - u8 tmp = 0; - - WARN_ON(twl4030_i2c_read_u8(mod_no, &tmp, reg) < 0); - - tmp &= TWL4030_SIH_CTRL_COR_MASK; - tmp >>= __ffs(TWL4030_SIH_CTRL_COR_MASK); - - return tmp; -} - -/** - * twl4030_mask_clear_intrs - mask and clear all TWL4030 interrupts - * @t: pointer to twl4030_mod_iregs array - * @t_sz: ARRAY_SIZE(t) (starting at 1) - * - * Mask all TWL4030 interrupt mask registers (IMRs) and clear all - * interrupt status registers (ISRs). No return value, but will WARN if - * any I2C operations fail. - */ -static void __init twl4030_mask_clear_intrs(const struct twl4030_mod_iregs *t, - const u8 t_sz) -{ - int i, j; - - /* - * N.B. - further efficiency is possible here. Eight I2C - * operations on BCI and GPIO modules are avoidable if I2C - * burst read/write transactions were implemented. Would - * probably save about 1ms of boot time and a small amount of - * power. - */ - for (i = 0; i < t_sz; i++) { - const struct twl4030_mod_iregs tmr = t[i]; - int cor; - - /* Are ISRs cleared by reads or writes? */ - cor = twl4030_read_cor_bit(tmr.mod_no, tmr.sih_ctrl); - - for (j = 0; j < tmr.reg_cnt; j++) { - - /* Mask interrupts at the TWL4030 */ - WARN_ON(twl4030_i2c_write_u8(tmr.mod_no, 0xff, - tmr.imrs[j]) < 0); - - /* Clear TWL4030 ISRs */ - WARN_ON(twl4030_i2c_clear_isr(tmr.mod_no, - tmr.isrs[j], cor) < 0); - } - } -} - - -static void twl_init_irq(int irq_num, unsigned irq_base, unsigned irq_end) -{ - int i; - - /* - * Mask and clear all TWL4030 interrupts since initially we do - * not have any TWL4030 module interrupt handlers present - */ - twl4030_mask_clear_intrs(twl4030_mod_regs, - ARRAY_SIZE(twl4030_mod_regs)); - - twl4030_irq_base = irq_base; - - /* install an irq handler for each of the PIH modules */ - for (i = irq_base; i < irq_end; i++) { - set_irq_chip_and_handler(i, &twl4030_irq_chip, - handle_simple_irq); - activate_irq(i); - } - - /* install an irq handler to demultiplex the TWL4030 interrupt */ - set_irq_data(irq_num, start_twl4030_irq_thread(irq_num)); - set_irq_chained_handler(irq_num, do_twl4030_irq); -} - -/*----------------------------------------------------------------------*/ +int twl_init_irq(int irq_num, unsigned irq_base, unsigned irq_end); +int twl_exit_irq(void); static int twl4030_remove(struct i2c_client *client) { unsigned i; + int status; - /* FIXME undo twl_init_irq() */ - if (twl4030_irq_base) { - dev_err(&client->dev, "can't yet clean up IRQs?\n"); - return -ENOSYS; - } + status = twl_exit_irq(); + if (status < 0) + return status; for (i = 0; i < TWL4030_NUM_SLAVES; i++) { struct twl4030_client *twl = &twl4030_modules[i]; @@ -1112,7 +725,7 @@ twl4030_probe(struct i2c_client *client, const struct i2c_device_id *id) return -EIO; } - if (inuse || twl4030_irq_base) { + if (inuse) { dev_dbg(&client->dev, "driver is already in use\n"); return -EBUSY; } @@ -1146,9 +759,9 @@ twl4030_probe(struct i2c_client *client, const struct i2c_device_id *id) if (client->irq && pdata->irq_base && pdata->irq_end > pdata->irq_base) { - twl_init_irq(client->irq, pdata->irq_base, pdata->irq_end); - dev_info(&client->dev, "IRQ %d chains IRQs %d..%d\n", - client->irq, pdata->irq_base, pdata->irq_end - 1); + status = twl_init_irq(client->irq, pdata->irq_base, pdata->irq_end); + if (status < 0) + goto fail; } status = add_children(pdata); diff --git a/drivers/mfd/twl4030-irq.c b/drivers/mfd/twl4030-irq.c new file mode 100644 index 00000000000..fae868a8d49 --- /dev/null +++ b/drivers/mfd/twl4030-irq.c @@ -0,0 +1,743 @@ +/* + * twl4030-irq.c - TWL4030/TPS659x0 irq support + * + * Copyright (C) 2005-2006 Texas Instruments, Inc. + * + * Modifications to defer interrupt handling to a kernel thread: + * Copyright (C) 2006 MontaVista Software, Inc. + * + * Based on tlv320aic23.c: + * Copyright (c) by Kai Svahn + * + * Code cleanup and modifications to IRQ handler. + * by syed khasim + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include + +#include + + +/* + * TWL4030 IRQ handling has two stages in hardware, and thus in software. + * The Primary Interrupt Handler (PIH) stage exposes status bits saying + * which Secondary Interrupt Handler (SIH) stage is raising an interrupt. + * SIH modules are more traditional IRQ components, which support per-IRQ + * enable/disable and trigger controls; they do most of the work. + * + * These chips are designed to support IRQ handling from two different + * I2C masters. Each has a dedicated IRQ line, and dedicated IRQ status + * and mask registers in the PIH and SIH modules. + * + * We set up IRQs starting at a platform-specified base, always starting + * with PIH and the SIH for PWR_INT and then usually adding GPIO: + * base + 0 .. base + 7 PIH + * base + 8 .. base + 15 SIH for PWR_INT + * base + 16 .. base + 33 SIH for GPIO + */ + +/* PIH register offsets */ +#define REG_PIH_ISR_P1 0x01 +#define REG_PIH_ISR_P2 0x02 +#define REG_PIH_SIR 0x03 /* for testing */ + + +/* Linux could (eventually) use either IRQ line */ +static int irq_line; + +struct sih { + char name[8]; + u8 module; /* module id */ + u8 control_offset; /* for SIH_CTRL */ + bool set_cor; + + u8 bits; /* valid in isr/imr */ + u8 bytes_ixr; /* bytelen of ISR/IMR/SIR */ + + u8 edr_offset; + u8 bytes_edr; /* bytelen of EDR */ + + /* SIR ignored -- set interrupt, for testing only */ + struct irq_data { + u8 isr_offset; + u8 imr_offset; + } mask[2]; + /* + 2 bytes padding */ +}; + +#define SIH_INITIALIZER(modname, nbits) \ + .module = TWL4030_MODULE_ ## modname, \ + .control_offset = TWL4030_ ## modname ## _SIH_CTRL, \ + .bits = nbits, \ + .bytes_ixr = DIV_ROUND_UP(nbits, 8), \ + .edr_offset = TWL4030_ ## modname ## _EDR, \ + .bytes_edr = DIV_ROUND_UP((2*(nbits)), 8), \ + .mask = { { \ + .isr_offset = TWL4030_ ## modname ## _ISR1, \ + .imr_offset = TWL4030_ ## modname ## _IMR1, \ + }, \ + { \ + .isr_offset = TWL4030_ ## modname ## _ISR2, \ + .imr_offset = TWL4030_ ## modname ## _IMR2, \ + }, }, + +/* register naming policies are inconsistent ... */ +#define TWL4030_INT_PWR_EDR TWL4030_INT_PWR_EDR1 +#define TWL4030_MODULE_KEYPAD_KEYP TWL4030_MODULE_KEYPAD +#define TWL4030_MODULE_INT_PWR TWL4030_MODULE_INT + + +/* Order in this table matches order in PIH_ISR. That is, + * BIT(n) in PIH_ISR is sih_modules[n]. + */ +static const struct sih sih_modules[6] = { + [0] = { + .name = "gpio", + .module = TWL4030_MODULE_GPIO, + .control_offset = REG_GPIO_SIH_CTRL, + .set_cor = true, + .bits = TWL4030_GPIO_MAX, + .bytes_ixr = 3, + /* Note: *all* of these IRQs default to no-trigger */ + .edr_offset = REG_GPIO_EDR1, + .bytes_edr = 5, + .mask = { { + .isr_offset = REG_GPIO_ISR1A, + .imr_offset = REG_GPIO_IMR1A, + }, { + .isr_offset = REG_GPIO_ISR1B, + .imr_offset = REG_GPIO_IMR1B, + }, }, + }, + [1] = { + .name = "keypad", + .set_cor = true, + SIH_INITIALIZER(KEYPAD_KEYP, 4) + }, + [2] = { + .name = "bci", + .module = TWL4030_MODULE_INTERRUPTS, + .control_offset = TWL4030_INTERRUPTS_BCISIHCTRL, + .bits = 12, + .bytes_ixr = 2, + .edr_offset = TWL4030_INTERRUPTS_BCIEDR1, + /* Note: most of these IRQs default to no-trigger */ + .bytes_edr = 3, + .mask = { { + .isr_offset = TWL4030_INTERRUPTS_BCIISR1A, + .imr_offset = TWL4030_INTERRUPTS_BCIIMR1A, + }, { + .isr_offset = TWL4030_INTERRUPTS_BCIISR1B, + .imr_offset = TWL4030_INTERRUPTS_BCIIMR1B, + }, }, + }, + [3] = { + .name = "madc", + SIH_INITIALIZER(MADC, 4) + }, + [4] = { + /* USB doesn't use the same SIH organization */ + .name = "usb", + }, + [5] = { + .name = "power", + .set_cor = true, + SIH_INITIALIZER(INT_PWR, 8) + }, + /* there are no SIH modules #6 or #7 ... */ +}; + +#undef TWL4030_MODULE_KEYPAD_KEYP +#undef TWL4030_MODULE_INT_PWR +#undef TWL4030_INT_PWR_EDR + +/*----------------------------------------------------------------------*/ + +static unsigned twl4030_irq_base; + +static struct completion irq_event; + +/* + * This thread processes interrupts reported by the Primary Interrupt Handler. + */ +static int twl4030_irq_thread(void *data) +{ + long irq = (long)data; + irq_desc_t *desc = irq_desc + irq; + static unsigned i2c_errors; + const static unsigned max_i2c_errors = 100; + + current->flags |= PF_NOFREEZE; + + while (!kthread_should_stop()) { + int ret; + int module_irq; + u8 pih_isr; + + /* Wait for IRQ, then read PIH irq status (also blocking) */ + wait_for_completion_interruptible(&irq_event); + + ret = twl4030_i2c_read_u8(TWL4030_MODULE_PIH, &pih_isr, + REG_PIH_ISR_P1); + if (ret) { + pr_warning("twl4030: I2C error %d reading PIH ISR\n", + ret); + if (++i2c_errors >= max_i2c_errors) { + printk(KERN_ERR "Maximum I2C error count" + " exceeded. Terminating %s.\n", + __func__); + break; + } + complete(&irq_event); + continue; + } + + /* these handlers deal with the relevant SIH irq status */ + local_irq_disable(); + for (module_irq = twl4030_irq_base; + pih_isr; + pih_isr >>= 1, module_irq++) { + if (pih_isr & 0x1) { + irq_desc_t *d = irq_desc + module_irq; + + /* These can't be masked ... always warn + * if we get any surprises. + */ + if (d->status & IRQ_DISABLED) + note_interrupt(module_irq, d, + IRQ_NONE); + else + d->handle_irq(module_irq, d); + } + } + local_irq_enable(); + + desc->chip->unmask(irq); + } + + return 0; +} + +/* + * handle_twl4030_pih() is the desc->handle method for the twl4030 interrupt. + * This is a chained interrupt, so there is no desc->action method for it. + * Now we need to query the interrupt controller in the twl4030 to determine + * which module is generating the interrupt request. However, we can't do i2c + * transactions in interrupt context, so we must defer that work to a kernel + * thread. All we do here is acknowledge and mask the interrupt and wakeup + * the kernel thread. + */ +static void handle_twl4030_pih(unsigned int irq, irq_desc_t *desc) +{ + /* Acknowledge, clear *AND* mask the interrupt... */ + desc->chip->ack(irq); + complete(&irq_event); +} + +static struct task_struct *start_twl4030_irq_thread(long irq) +{ + struct task_struct *thread; + + init_completion(&irq_event); + thread = kthread_run(twl4030_irq_thread, (void *)irq, "twl4030-irq"); + if (!thread) + pr_err("twl4030: could not create irq %ld thread!\n", irq); + + return thread; +} + +/*----------------------------------------------------------------------*/ + +/* + * twl4030_init_sih_modules() ... start from a known state where no + * IRQs will be coming in, and where we can quickly enable them then + * handle them as they arrive. Mask all IRQs: maybe init SIH_CTRL. + * + * NOTE: we don't touch EDR registers here; they stay with hardware + * defaults or whatever the last value was. Note that when both EDR + * bits for an IRQ are clear, that's as if its IMR bit is set... + */ +static int twl4030_init_sih_modules(unsigned line) +{ + const struct sih *sih; + u8 buf[4]; + int i; + int status; + + /* line 0 == int1_n signal; line 1 == int2_n signal */ + if (line > 1) + return -EINVAL; + + irq_line = line; + + /* disable all interrupts on our line */ + memset(buf, 0xff, sizeof buf); + sih = sih_modules; + for (i = 0; i < ARRAY_SIZE(sih_modules); i++, sih++) { + + /* skip USB -- it's funky */ + if (!sih->bytes_ixr) + continue; + + status = twl4030_i2c_write(sih->module, buf, + sih->mask[line].imr_offset, sih->bytes_ixr); + if (status < 0) + pr_err("twl4030: err %d initializing %s %s\n", + status, sih->name, "IMR"); + + /* Maybe disable "exclusive" mode; buffer second pending irq; + * set Clear-On-Read (COR) bit. + * + * NOTE that sometimes COR polarity is documented as being + * inverted: for MADC and BCI, COR=1 means "clear on write". + * And for PWR_INT it's not documented... + */ + if (sih->set_cor) { + status = twl4030_i2c_write_u8(sih->module, + TWL4030_SIH_CTRL_COR_MASK, + sih->control_offset); + if (status < 0) + pr_err("twl4030: err %d initializing %s %s\n", + status, sih->name, "SIH_CTRL"); + } + } + + sih = sih_modules; + for (i = 0; i < ARRAY_SIZE(sih_modules); i++, sih++) { + u8 rxbuf[4]; + int j; + + /* skip USB */ + if (!sih->bytes_ixr) + continue; + + /* Clear pending interrupt status. Either the read was + * enough, or we need to write those bits. Repeat, in + * case an IRQ is pending (PENDDIS=0) ... that's not + * uncommon with PWR_INT.PWRON. + */ + for (j = 0; j < 2; j++) { + status = twl4030_i2c_read(sih->module, rxbuf, + sih->mask[line].isr_offset, sih->bytes_ixr); + if (status < 0) + pr_err("twl4030: err %d initializing %s %s\n", + status, sih->name, "ISR"); + + if (!sih->set_cor) + status = twl4030_i2c_write(sih->module, buf, + sih->mask[line].isr_offset, + sih->bytes_ixr); + /* else COR=1 means read sufficed. + * (for most SIH modules...) + */ + } + } + + return 0; +} + +static inline void activate_irq(int irq) +{ +#ifdef CONFIG_ARM + /* ARM requires an extra step to clear IRQ_NOREQUEST, which it + * sets on behalf of every irq_chip. Also sets IRQ_NOPROBE. + */ + set_irq_flags(irq, IRQF_VALID); +#else + /* same effect on other architectures */ + set_irq_noprobe(irq); +#endif +} + +/*----------------------------------------------------------------------*/ + +static DEFINE_SPINLOCK(sih_agent_lock); + +static struct workqueue_struct *wq; + +struct sih_agent { + int irq_base; + const struct sih *sih; + + u32 imr; + bool imr_change_pending; + struct work_struct mask_work; + + u32 edge_change; + struct work_struct edge_work; +}; + +static void twl4030_sih_do_mask(struct work_struct *work) +{ + struct sih_agent *agent; + const struct sih *sih; + union { + u8 bytes[4]; + u32 word; + } imr; + int status; + + agent = container_of(work, struct sih_agent, mask_work); + + /* see what work we have */ + spin_lock_irq(&sih_agent_lock); + if (agent->imr_change_pending) { + sih = agent->sih; + /* byte[0] gets overwritten as we write ... */ + imr.word = cpu_to_le32(agent->imr << 8); + agent->imr_change_pending = false; + } else + sih = NULL; + spin_unlock_irq(&sih_agent_lock); + if (!sih) + return; + + /* write the whole mask ... simpler than subsetting it */ + status = twl4030_i2c_write(sih->module, imr.bytes, + sih->mask[irq_line].imr_offset, sih->bytes_ixr); + if (status) + pr_err("twl4030: %s, %s --> %d\n", __func__, + "write", status); +} + +static void twl4030_sih_do_edge(struct work_struct *work) +{ + struct sih_agent *agent; + const struct sih *sih; + u8 bytes[6]; + u32 edge_change; + int status; + + agent = container_of(work, struct sih_agent, edge_work); + + /* see what work we have */ + spin_lock_irq(&sih_agent_lock); + edge_change = agent->edge_change; + agent->edge_change = 0;; + sih = edge_change ? agent->sih : NULL; + spin_unlock_irq(&sih_agent_lock); + if (!sih) + return; + + /* Read, reserving first byte for write scratch. Yes, this + * could be cached for some speedup ... but be careful about + * any processor on the other IRQ line, EDR registers are + * shared. + */ + status = twl4030_i2c_read(sih->module, bytes + 1, + sih->edr_offset, sih->bytes_edr); + if (status) { + pr_err("twl4030: %s, %s --> %d\n", __func__, + "read", status); + return; + } + + /* Modify only the bits we know must change */ + while (edge_change) { + int i = fls(edge_change) - 1; + struct irq_desc *d = irq_desc + i + agent->irq_base; + int byte = 1 + (i >> 2); + int off = (i & 0x3) * 2; + + bytes[byte] &= ~(0x03 << off); + + spin_lock_irq(&d->lock); + if (d->status & IRQ_TYPE_EDGE_RISING) + bytes[byte] |= BIT(off + 1); + if (d->status & IRQ_TYPE_EDGE_FALLING) + bytes[byte] |= BIT(off + 0); + spin_unlock_irq(&d->lock); + + edge_change &= ~BIT(i); + } + + /* Write */ + status = twl4030_i2c_write(sih->module, bytes, + sih->edr_offset, sih->bytes_edr); + if (status) + pr_err("twl4030: %s, %s --> %d\n", __func__, + "write", status); +} + +/*----------------------------------------------------------------------*/ + +/* + * All irq_chip methods get issued from code holding irq_desc[irq].lock, + * which can't perform the underlying I2C operations (because they sleep). + * So we must hand them off to a thread (workqueue) and cope with asynch + * completion, potentially including some re-ordering, of these requests. + */ + +static void twl4030_sih_mask(unsigned irq) +{ + struct sih_agent *sih = get_irq_chip_data(irq); + unsigned long flags; + + spin_lock_irqsave(&sih_agent_lock, flags); + sih->imr |= BIT(irq - sih->irq_base); + sih->imr_change_pending = true; + queue_work(wq, &sih->mask_work); + spin_unlock_irqrestore(&sih_agent_lock, flags); +} + +static void twl4030_sih_unmask(unsigned irq) +{ + struct sih_agent *sih = get_irq_chip_data(irq); + unsigned long flags; + + spin_lock_irqsave(&sih_agent_lock, flags); + sih->imr &= ~BIT(irq - sih->irq_base); + sih->imr_change_pending = true; + queue_work(wq, &sih->mask_work); + spin_unlock_irqrestore(&sih_agent_lock, flags); +} + +static int twl4030_sih_set_type(unsigned irq, unsigned trigger) +{ + struct sih_agent *sih = get_irq_chip_data(irq); + struct irq_desc *desc = irq_desc + irq; + unsigned long flags; + + if (trigger & ~(IRQ_TYPE_EDGE_FALLING | IRQ_TYPE_EDGE_RISING)) + return -EINVAL; + + spin_lock_irqsave(&sih_agent_lock, flags); + if ((desc->status & IRQ_TYPE_SENSE_MASK) != trigger) { + desc->status &= ~IRQ_TYPE_SENSE_MASK; + desc->status |= trigger; + sih->edge_change |= BIT(irq - sih->irq_base); + queue_work(wq, &sih->edge_work); + } + spin_unlock_irqrestore(&sih_agent_lock, flags); + return 0; +} + +static struct irq_chip twl4030_sih_irq_chip = { + .name = "twl4030", + .mask = twl4030_sih_mask, + .unmask = twl4030_sih_unmask, + .set_type = twl4030_sih_set_type, +}; + +/*----------------------------------------------------------------------*/ + +static inline int sih_read_isr(const struct sih *sih) +{ + int status; + union { + u8 bytes[4]; + u32 word; + } isr; + + /* FIXME need retry-on-error ... */ + + isr.word = 0; + status = twl4030_i2c_read(sih->module, isr.bytes, + sih->mask[irq_line].isr_offset, sih->bytes_ixr); + + return (status < 0) ? status : le32_to_cpu(isr.word); +} + +/* + * Generic handler for SIH interrupts ... we "know" this is called + * in task context, with IRQs enabled. + */ +static void handle_twl4030_sih(unsigned irq, struct irq_desc *desc) +{ + struct sih_agent *agent = get_irq_data(irq); + const struct sih *sih = agent->sih; + int isr; + + /* reading ISR acks the IRQs, using clear-on-read mode */ + local_irq_enable(); + isr = sih_read_isr(sih); + local_irq_disable(); + + if (isr < 0) { + pr_err("twl4030: %s SIH, read ISR error %d\n", + sih->name, isr); + /* REVISIT: recover; eventually mask it all, etc */ + return; + } + + while (isr) { + irq = fls(isr); + irq--; + isr &= ~BIT(irq); + + if (irq < sih->bits) + generic_handle_irq(agent->irq_base + irq); + else + pr_err("twl4030: %s SIH, invalid ISR bit %d\n", + sih->name, irq); + } +} + +static unsigned twl4030_irq_next; + +/* returns the first IRQ used by this SIH bank, + * or negative errno + */ +int twl4030_sih_setup(int module) +{ + int sih_mod; + const struct sih *sih = NULL; + struct sih_agent *agent; + int i, irq; + int status = -EINVAL; + unsigned irq_base = twl4030_irq_next; + + /* only support modules with standard clear-on-read for now */ + for (sih_mod = 0, sih = sih_modules; + sih_mod < ARRAY_SIZE(sih_modules); + sih_mod++, sih++) { + if (sih->module == module && sih->set_cor) { + if (!WARN((irq_base + sih->bits) > NR_IRQS, + "irq %d for %s too big\n", + irq_base + sih->bits, + sih->name)) + status = 0; + break; + } + } + if (status < 0) + return status; + + agent = kzalloc(sizeof *agent, GFP_KERNEL); + if (!agent) + return -ENOMEM; + + status = 0; + + agent->irq_base = irq_base; + agent->sih = sih; + agent->imr = ~0; + INIT_WORK(&agent->mask_work, twl4030_sih_do_mask); + INIT_WORK(&agent->edge_work, twl4030_sih_do_edge); + + for (i = 0; i < sih->bits; i++) { + irq = irq_base + i; + + set_irq_chip_and_handler(irq, &twl4030_sih_irq_chip, + handle_edge_irq); + set_irq_chip_data(irq, agent); + activate_irq(irq); + } + + status = irq_base; + twl4030_irq_next += i; + + /* replace generic PIH handler (handle_simple_irq) */ + irq = sih_mod + twl4030_irq_base; + set_irq_data(irq, agent); + set_irq_chained_handler(irq, handle_twl4030_sih); + + pr_info("twl4030: %s (irq %d) chaining IRQs %d..%d\n", sih->name, + irq, irq_base, twl4030_irq_next - 1); + + return status; +} + +/* FIXME need a call to reverse twl4030_sih_setup() ... */ + + +/*----------------------------------------------------------------------*/ + +/* FIXME pass in which interrupt line we'll use ... */ +#define twl_irq_line 0 + +int twl_init_irq(int irq_num, unsigned irq_base, unsigned irq_end) +{ + static struct irq_chip twl4030_irq_chip; + + int status; + int i; + struct task_struct *task; + + /* + * Mask and clear all TWL4030 interrupts since initially we do + * not have any TWL4030 module interrupt handlers present + */ + status = twl4030_init_sih_modules(twl_irq_line); + if (status < 0) + return status; + + wq = create_singlethread_workqueue("twl4030-irqchip"); + if (!wq) { + pr_err("twl4030: workqueue FAIL\n"); + return -ESRCH; + } + + twl4030_irq_base = irq_base; + + /* install an irq handler for each of the SIH modules; + * clone dummy irq_chip since PIH can't *do* anything + */ + twl4030_irq_chip = dummy_irq_chip; + twl4030_irq_chip.name = "twl4030"; + + twl4030_sih_irq_chip.ack = dummy_irq_chip.ack; + + for (i = irq_base; i < irq_end; i++) { + set_irq_chip_and_handler(i, &twl4030_irq_chip, + handle_simple_irq); + activate_irq(i); + } + twl4030_irq_next = i; + pr_info("twl4030: %s (irq %d) chaining IRQs %d..%d\n", "PIH", + irq_num, irq_base, twl4030_irq_next - 1); + + /* ... and the PWR_INT module ... */ + status = twl4030_sih_setup(TWL4030_MODULE_INT); + if (status < 0) { + pr_err("twl4030: sih_setup PWR INT --> %d\n", status); + goto fail; + } + + /* install an irq handler to demultiplex the TWL4030 interrupt */ + task = start_twl4030_irq_thread(irq_num); + if (!task) { + pr_err("twl4030: irq thread FAIL\n"); + status = -ESRCH; + goto fail; + } + + set_irq_data(irq_num, task); + set_irq_chained_handler(irq_num, handle_twl4030_pih); + + return status; + +fail: + for (i = irq_base; i < irq_end; i++) + set_irq_chip_and_handler(i, NULL, NULL); + destroy_workqueue(wq); + wq = NULL; + return status; +} + +int twl_exit_irq(void) +{ + /* FIXME undo twl_init_irq() */ + if (twl4030_irq_base) { + pr_err("twl4030: can't yet clean up IRQs?\n"); + return -ENOSYS; + } + return 0; +} diff --git a/include/linux/i2c/twl4030.h b/include/linux/i2c/twl4030.h index cdb453162a9..fb604dcd38f 100644 --- a/include/linux/i2c/twl4030.h +++ b/include/linux/i2c/twl4030.h @@ -228,6 +228,12 @@ struct twl4030_gpio_platform_data { int gpio_base; unsigned irq_base, irq_end; + /* package the two LED signals as output-only GPIOs? */ + bool use_leds; + + /* gpio-n should control VMMC(n+1) if BIT(n) in mmc_cd is set */ + u8 mmc_cd; + /* For gpio-N, bit (1 << N) in "pullups" is set if that pullup * should be enabled. Else, if that bit is set in "pulldowns", * that pulldown is enabled. Don't waste power by letting any @@ -277,6 +283,8 @@ struct twl4030_platform_data { /*----------------------------------------------------------------------*/ +int twl4030_sih_setup(int module); + /* * FIXME completely stop using TWL4030_IRQ_BASE ... instead, pass the * IRQ data to subsidiary devices using platform device resources. @@ -291,16 +299,16 @@ struct twl4030_platform_data { #define TWL4030_MODIRQ_BCI (TWL4030_IRQ_BASE + 2) #define TWL4030_MODIRQ_MADC (TWL4030_IRQ_BASE + 3) /* #define TWL4030_MODIRQ_USB (TWL4030_IRQ_BASE + 4) */ -#define TWL4030_MODIRQ_PWR (TWL4030_IRQ_BASE + 5) +/* #define TWL4030_MODIRQ_PWR (TWL4030_IRQ_BASE + 5) */ #define TWL4030_PWRIRQ_PWRBTN (TWL4030_PWR_IRQ_BASE + 0) -#define TWL4030_PWRIRQ_CHG_PRES (TWL4030_PWR_IRQ_BASE + 1) -#define TWL4030_PWRIRQ_USB_PRES (TWL4030_PWR_IRQ_BASE + 2) -#define TWL4030_PWRIRQ_RTC (TWL4030_PWR_IRQ_BASE + 3) -#define TWL4030_PWRIRQ_HOT_DIE (TWL4030_PWR_IRQ_BASE + 4) -#define TWL4030_PWRIRQ_PWROK_TIMEOUT (TWL4030_PWR_IRQ_BASE + 5) -#define TWL4030_PWRIRQ_MBCHG (TWL4030_PWR_IRQ_BASE + 6) -#define TWL4030_PWRIRQ_SC_DETECT (TWL4030_PWR_IRQ_BASE + 7) +/* #define TWL4030_PWRIRQ_CHG_PRES (TWL4030_PWR_IRQ_BASE + 1) */ +/* #define TWL4030_PWRIRQ_USB_PRES (TWL4030_PWR_IRQ_BASE + 2) */ +/* #define TWL4030_PWRIRQ_RTC (TWL4030_PWR_IRQ_BASE + 3) */ +/* #define TWL4030_PWRIRQ_HOT_DIE (TWL4030_PWR_IRQ_BASE + 4) */ +/* #define TWL4030_PWRIRQ_PWROK_TIMEOUT (TWL4030_PWR_IRQ_BASE + 5) */ +/* #define TWL4030_PWRIRQ_MBCHG (TWL4030_PWR_IRQ_BASE + 6) */ +/* #define TWL4030_PWRIRQ_SC_DETECT (TWL4030_PWR_IRQ_BASE + 7) */ /* Rest are unsued currently*/ @@ -317,17 +325,13 @@ struct twl4030_platform_data { /* TWL4030 GPIO interrupt definitions */ #define TWL4030_GPIO_IRQ_NO(n) (TWL4030_GPIO_IRQ_BASE + (n)) -#define TWL4030_GPIO_IS_ENABLE 1 /* * Exported TWL4030 GPIO APIs * * WARNING -- use standard GPIO and IRQ calls instead; these will vanish. */ -int twl4030_get_gpio_datain(int gpio); -int twl4030_request_gpio(int gpio); int twl4030_set_gpio_debounce(int gpio, int enable); -int twl4030_free_gpio(int gpio); #if defined(CONFIG_TWL4030_BCI_BATTERY) || \ defined(CONFIG_TWL4030_BCI_BATTERY_MODULE) -- cgit v1.2.3-70-g09d2 From 27471fdb32e77ecb92f09d4ac5757785b4dc33bc Mon Sep 17 00:00:00 2001 From: Andy Henroid Date: Thu, 9 Oct 2008 11:45:22 -0700 Subject: i7300_idle driver v1.55 The Intel 7300 Memory Controller supports dynamic throttling of memory which can be used to save power when system is idle. This driver does the memory throttling when all CPUs are idle on such a system. Refer to "Intel 7300 Memory Controller Hub (MCH)" datasheet for the config space description. Signed-off-by: Andy Henroid Signed-off-by: Len Brown Signed-off-by: Venkatesh Pallipadi --- MAINTAINERS | 6 + arch/x86/Kconfig | 2 + drivers/Makefile | 1 + drivers/dma/ioat_dma.c | 3 + drivers/idle/Kconfig | 16 ++ drivers/idle/Makefile | 2 + drivers/idle/i7300_idle.c | 674 ++++++++++++++++++++++++++++++++++++++++++++++ include/asm-x86/idle.h | 1 + include/linux/pci_ids.h | 1 + 9 files changed, 706 insertions(+) create mode 100644 drivers/idle/Kconfig create mode 100644 drivers/idle/Makefile create mode 100644 drivers/idle/i7300_idle.c (limited to 'include/linux') diff --git a/MAINTAINERS b/MAINTAINERS index 8dae4555f10..43f71b0d2a2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2078,6 +2078,12 @@ L: linux-ide@vger.kernel.org L: linux-scsi@vger.kernel.org S: Orphan +IDLE-I7300 +P: Andy Henroid +M: andrew.d.henroid@intel.com +L: linux-pm@lists.linux-foundation.org +S: Supported + IEEE 1394 SUBSYSTEM (drivers/ieee1394) P: Ben Collins M: ben.collins@ubuntu.com diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index ed92864d132..19cdfe1f237 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1536,6 +1536,8 @@ source "arch/x86/kernel/cpu/cpufreq/Kconfig" source "drivers/cpuidle/Kconfig" +source "drivers/idle/Kconfig" + endmenu diff --git a/drivers/Makefile b/drivers/Makefile index 2735bde7347..f443a8a9d46 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -82,6 +82,7 @@ obj-$(CONFIG_EISA) += eisa/ obj-y += lguest/ obj-$(CONFIG_CPU_FREQ) += cpufreq/ obj-$(CONFIG_CPU_IDLE) += cpuidle/ +obj-y += idle/ obj-$(CONFIG_MMC) += mmc/ obj-$(CONFIG_MEMSTICK) += memstick/ obj-$(CONFIG_NEW_LEDS) += leds/ diff --git a/drivers/dma/ioat_dma.c b/drivers/dma/ioat_dma.c index bc8c6e3470c..f8396cafa05 100644 --- a/drivers/dma/ioat_dma.c +++ b/drivers/dma/ioat_dma.c @@ -171,6 +171,9 @@ static int ioat_dma_enumerate_channels(struct ioatdma_device *device) xfercap_scale = readb(device->reg_base + IOAT_XFERCAP_OFFSET); xfercap = (xfercap_scale == 0 ? -1 : (1UL << xfercap_scale)); +#if CONFIG_I7300_IDLE_IOAT_CHANNEL + device->common.chancnt--; +#endif for (i = 0; i < device->common.chancnt; i++) { ioat_chan = kzalloc(sizeof(*ioat_chan), GFP_KERNEL); if (!ioat_chan) { diff --git a/drivers/idle/Kconfig b/drivers/idle/Kconfig new file mode 100644 index 00000000000..f5b26dd579e --- /dev/null +++ b/drivers/idle/Kconfig @@ -0,0 +1,16 @@ + +menu "Memory power savings" + +config I7300_IDLE_IOAT_CHANNEL + bool + +config I7300_IDLE + tristate "Intel chipset idle power saving driver" + select I7300_IDLE_IOAT_CHANNEL + depends on X86_64 + help + Enable idle power savings with certain Intel server chipsets. + The chipset must have I/O AT support, such as the Intel 7300. + The power savings depends on the type and quantity of DRAM devices. + +endmenu diff --git a/drivers/idle/Makefile b/drivers/idle/Makefile new file mode 100644 index 00000000000..5f68fc377e2 --- /dev/null +++ b/drivers/idle/Makefile @@ -0,0 +1,2 @@ +obj-$(CONFIG_I7300_IDLE) += i7300_idle.o + diff --git a/drivers/idle/i7300_idle.c b/drivers/idle/i7300_idle.c new file mode 100644 index 00000000000..59d1bbc3cd3 --- /dev/null +++ b/drivers/idle/i7300_idle.c @@ -0,0 +1,674 @@ +/* + * (C) Copyright 2008 Intel Corporation + * Authors: + * Andy Henroid + * Venkatesh Pallipadi + */ + +/* + * Save DIMM power on Intel 7300-based platforms when all CPUs/cores + * are idle, using the DIMM thermal throttling capability. + * + * This driver depends on the Intel integrated DMA controller (I/O AT). + * If the driver for I/O AT (drivers/dma/ioatdma*) is also enabled, + * this driver should work cooperatively. + */ + +/* #define DEBUG */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "../dma/ioatdma_hw.h" +#include "../dma/ioatdma_registers.h" + +#define I7300_IDLE_DRIVER_VERSION "1.55" +#define I7300_PRINT "i7300_idle:" + +static int debug; +module_param_named(debug, debug, uint, 0644); +MODULE_PARM_DESC(debug, "Enable debug printks in this driver"); + +#define dprintk(fmt, arg...) \ + do { if (debug) printk(KERN_INFO I7300_PRINT fmt, ##arg); } while (0) + +/* + * Value to set THRTLOW to when initiating throttling + * 0 = No throttling + * 1 = Throttle when > 4 activations per eval window (Maximum throttling) + * 2 = Throttle when > 8 activations + * 168 = Throttle when > 168 activations (Minimum throttling) + */ +#define MAX_THRTLWLIMIT 168 +static uint i7300_idle_thrtlowlm = 1; +module_param_named(thrtlwlimit, i7300_idle_thrtlowlm, uint, 0644); +MODULE_PARM_DESC(thrtlwlimit, + "Value for THRTLOWLM activation field " + "(0 = disable throttle, 1 = Max throttle, 168 = Min throttle)"); + +/* + * simple invocation and duration statistics + */ +static unsigned long total_starts; +static unsigned long total_us; + +#ifdef DEBUG +static unsigned long past_skip; +#endif + +static struct pci_dev *fbd_dev; + +static spinlock_t i7300_idle_lock; +static int i7300_idle_active; + +static u8 i7300_idle_thrtctl_saved; +static u8 i7300_idle_thrtlow_saved; +static u32 i7300_idle_mc_saved; + +static cpumask_t idle_cpumask; +static ktime_t start_ktime; +static unsigned long avg_idle_us; + +static struct dentry *debugfs_dir; + +/* Begin: I/O AT Helper routines */ + +#define IOAT_CHANBASE(ioat_ctl, chan) (ioat_ctl + 0x80 + 0x80 * chan) +/* Snoop control (disable snoops when coherency is not important) */ +#define IOAT_DESC_SADDR_SNP_CTL (1UL << 1) +#define IOAT_DESC_DADDR_SNP_CTL (1UL << 2) + +static struct pci_dev *ioat_dev; +static struct ioat_dma_descriptor *ioat_desc; /* I/O AT desc & data (1 page) */ +static unsigned long ioat_desc_phys; +static u8 *ioat_iomap; /* I/O AT memory-mapped control regs (aka CB_BAR) */ +static u8 *ioat_chanbase; + +/* Start I/O AT memory copy */ +static int i7300_idle_ioat_start(void) +{ + u32 err; + /* Clear error (due to circular descriptor pointer) */ + err = readl(ioat_chanbase + IOAT_CHANERR_OFFSET); + if (err) + writel(err, ioat_chanbase + IOAT_CHANERR_OFFSET); + + writeb(IOAT_CHANCMD_START, ioat_chanbase + IOAT1_CHANCMD_OFFSET); + return 0; +} + +/* Stop I/O AT memory copy */ +static void i7300_idle_ioat_stop(void) +{ + int i; + u8 sts; + + for (i = 0; i < 5; i++) { + writeb(IOAT_CHANCMD_RESET, + ioat_chanbase + IOAT1_CHANCMD_OFFSET); + + udelay(10); + + sts = readq(ioat_chanbase + IOAT1_CHANSTS_OFFSET) & + IOAT_CHANSTS_DMA_TRANSFER_STATUS; + + if (sts != IOAT_CHANSTS_DMA_TRANSFER_STATUS_ACTIVE) + break; + + } + + if (i == 5) + dprintk("failed to suspend+reset I/O AT after 5 retries\n"); + +} + +/* Test I/O AT by copying 1024 byte from 2k to 1k */ +static int __init i7300_idle_ioat_selftest(u8 *ctl, + struct ioat_dma_descriptor *desc, unsigned long desc_phys) +{ + u64 chan_sts; + + memset(desc, 0, 2048); + memset((u8 *) desc + 2048, 0xab, 1024); + + desc[0].size = 1024; + desc[0].ctl = 0; + desc[0].src_addr = desc_phys + 2048; + desc[0].dst_addr = desc_phys + 1024; + desc[0].next = 0; + + writeb(IOAT_CHANCMD_RESET, ioat_chanbase + IOAT1_CHANCMD_OFFSET); + writeb(IOAT_CHANCMD_START, ioat_chanbase + IOAT1_CHANCMD_OFFSET); + + udelay(1000); + + chan_sts = readq(ioat_chanbase + IOAT1_CHANSTS_OFFSET) & + IOAT_CHANSTS_DMA_TRANSFER_STATUS; + + if (chan_sts != IOAT_CHANSTS_DMA_TRANSFER_STATUS_DONE) { + /* Not complete, reset the channel */ + writeb(IOAT_CHANCMD_RESET, + ioat_chanbase + IOAT1_CHANCMD_OFFSET); + return -1; + } + + if (*(u32 *) ((u8 *) desc + 3068) != 0xabababab || + *(u32 *) ((u8 *) desc + 2044) != 0xabababab) { + dprintk("Data values src 0x%x, dest 0x%x, memset 0x%x\n", + *(u32 *) ((u8 *) desc + 2048), + *(u32 *) ((u8 *) desc + 1024), + *(u32 *) ((u8 *) desc + 3072)); + return -1; + } + return 0; +} + +static struct device dummy_dma_dev = { + .bus_id = "fallback device", + .coherent_dma_mask = DMA_64BIT_MASK, + .dma_mask = &dummy_dma_dev.coherent_dma_mask, +}; + +/* Setup and initialize I/O AT */ +/* This driver needs I/O AT as the throttling takes effect only when there is + * some memory activity. We use I/O AT to set up a dummy copy, while all CPUs + * go idle and memory is throttled. + */ +static int __init i7300_idle_ioat_init(void) +{ + u8 ver, chan_count, ioat_chan; + u16 chan_ctl; + + ioat_iomap = (u8 *) ioremap_nocache(pci_resource_start(ioat_dev, 0), + pci_resource_len(ioat_dev, 0)); + + if (!ioat_iomap) { + printk(KERN_ERR I7300_PRINT "failed to map I/O AT registers\n"); + goto err_ret; + } + + ver = readb(ioat_iomap + IOAT_VER_OFFSET); + if (ver != IOAT_VER_1_2) { + printk(KERN_ERR I7300_PRINT "unknown I/O AT version (%u.%u)\n", + ver >> 4, ver & 0xf); + goto err_unmap; + } + + chan_count = readb(ioat_iomap + IOAT_CHANCNT_OFFSET); + if (!chan_count) { + printk(KERN_ERR I7300_PRINT "unexpected # of I/O AT channels " + "(%u)\n", + chan_count); + goto err_unmap; + } + + ioat_chan = chan_count - 1; + ioat_chanbase = IOAT_CHANBASE(ioat_iomap, ioat_chan); + + chan_ctl = readw(ioat_chanbase + IOAT_CHANCTRL_OFFSET); + if (chan_ctl & IOAT_CHANCTRL_CHANNEL_IN_USE) { + printk(KERN_ERR I7300_PRINT "channel %d in use\n", ioat_chan); + goto err_unmap; + } + + writew(IOAT_CHANCTRL_CHANNEL_IN_USE, + ioat_chanbase + IOAT_CHANCTRL_OFFSET); + + ioat_desc = (struct ioat_dma_descriptor *)dma_alloc_coherent( + &dummy_dma_dev, 4096, + (dma_addr_t *)&ioat_desc_phys, GFP_KERNEL); + if (!ioat_desc) { + printk(KERN_ERR I7300_PRINT "failed to allocate I/O AT desc\n"); + goto err_mark_unused; + } + + writel(ioat_desc_phys & 0xffffffffUL, + ioat_chanbase + IOAT1_CHAINADDR_OFFSET_LOW); + writel(ioat_desc_phys >> 32, + ioat_chanbase + IOAT1_CHAINADDR_OFFSET_HIGH); + + if (i7300_idle_ioat_selftest(ioat_iomap, ioat_desc, ioat_desc_phys)) { + printk(KERN_ERR I7300_PRINT "I/O AT self-test failed\n"); + goto err_free; + } + + /* Setup circular I/O AT descriptor chain */ + ioat_desc[0].ctl = IOAT_DESC_SADDR_SNP_CTL | IOAT_DESC_DADDR_SNP_CTL; + ioat_desc[0].src_addr = ioat_desc_phys + 2048; + ioat_desc[0].dst_addr = ioat_desc_phys + 3072; + ioat_desc[0].size = 128; + ioat_desc[0].next = ioat_desc_phys + sizeof(struct ioat_dma_descriptor); + + ioat_desc[1].ctl = ioat_desc[0].ctl; + ioat_desc[1].src_addr = ioat_desc[0].src_addr; + ioat_desc[1].dst_addr = ioat_desc[0].dst_addr; + ioat_desc[1].size = ioat_desc[0].size; + ioat_desc[1].next = ioat_desc_phys; + + return 0; + +err_free: + dma_free_coherent(&dummy_dma_dev, 4096, (void *)ioat_desc, 0); +err_mark_unused: + writew(0, ioat_chanbase + IOAT_CHANCTRL_OFFSET); +err_unmap: + iounmap(ioat_iomap); +err_ret: + return -ENODEV; +} + +/* Cleanup I/O AT */ +static void __exit i7300_idle_ioat_exit(void) +{ + int i; + u64 chan_sts; + + i7300_idle_ioat_stop(); + + /* Wait for a while for the channel to halt before releasing */ + for (i = 0; i < 10; i++) { + writeb(IOAT_CHANCMD_RESET, + ioat_chanbase + IOAT1_CHANCMD_OFFSET); + + chan_sts = readq(ioat_chanbase + IOAT1_CHANSTS_OFFSET) & + IOAT_CHANSTS_DMA_TRANSFER_STATUS; + + if (chan_sts != IOAT_CHANSTS_DMA_TRANSFER_STATUS_ACTIVE) { + writew(0, ioat_chanbase + IOAT_CHANCTRL_OFFSET); + break; + } + udelay(1000); + } + + chan_sts = readq(ioat_chanbase + IOAT1_CHANSTS_OFFSET) & + IOAT_CHANSTS_DMA_TRANSFER_STATUS; + + /* + * We tried to reset multiple times. If IO A/T channel is still active + * flag an error and return without cleanup. Memory leak is better + * than random corruption in that extreme error situation. + */ + if (chan_sts == IOAT_CHANSTS_DMA_TRANSFER_STATUS_ACTIVE) { + printk(KERN_ERR I7300_PRINT "Unable to stop IO A/T channels." + " Not freeing resources\n"); + return; + } + + dma_free_coherent(&dummy_dma_dev, 4096, (void *)ioat_desc, 0); + iounmap(ioat_iomap); +} + +/* End: I/O AT Helper routines */ + +#define DIMM_THRTLOW 0x64 +#define DIMM_THRTCTL 0x67 +#define DIMM_THRTCTL_THRMHUNT (1UL << 0) +#define DIMM_MC 0x40 +#define DIMM_GTW_MODE (1UL << 17) +#define DIMM_GBLACT 0x60 + +/* + * Keep track of an exponential-decaying average of recent idle durations. + * The latest duration gets DURATION_WEIGHT_PCT percentage weight + * in this average, with the old average getting the remaining weight. + * + * High weights emphasize recent history, low weights include long history. + */ +#define DURATION_WEIGHT_PCT 55 + +/* + * When the decaying average of recent durations or the predicted duration + * of the next timer interrupt is shorter than duration_threshold, the + * driver will decline to throttle. + */ +#define DURATION_THRESHOLD_US 100 + + +/* Store DIMM thermal throttle configuration */ +static int i7300_idle_thrt_save(void) +{ + u32 new_mc_val; + u8 gblactlm; + + pci_read_config_byte(fbd_dev, DIMM_THRTCTL, &i7300_idle_thrtctl_saved); + pci_read_config_byte(fbd_dev, DIMM_THRTLOW, &i7300_idle_thrtlow_saved); + pci_read_config_dword(fbd_dev, DIMM_MC, &i7300_idle_mc_saved); + /* + * Make sure we have Global Throttling Window Mode set to have a + * "short" window. This (mostly) works around an issue where + * throttling persists until the end of the global throttling window + * size. On the tested system, this was resulting in a maximum of + * 64 ms to exit throttling (average 32 ms). The actual numbers + * depends on system frequencies. Setting the short window reduces + * this by a factor of 4096. + * + * We will only do this only if the system is set for + * unlimited-activations while in open-loop throttling (i.e., when + * Global Activation Throttle Limit is zero). + */ + pci_read_config_byte(fbd_dev, DIMM_GBLACT, &gblactlm); + dprintk("thrtctl_saved = 0x%02x, thrtlow_saved = 0x%02x\n", + i7300_idle_thrtctl_saved, + i7300_idle_thrtlow_saved); + dprintk("mc_saved = 0x%08x, gblactlm = 0x%02x\n", + i7300_idle_mc_saved, + gblactlm); + if (gblactlm == 0) { + new_mc_val = i7300_idle_mc_saved | DIMM_GTW_MODE; + pci_write_config_dword(fbd_dev, DIMM_MC, new_mc_val); + return 0; + } else { + dprintk("could not set GTW_MODE = 1 (OLTT enabled)\n"); + return -ENODEV; + } +} + +/* Restore DIMM thermal throttle configuration */ +static void i7300_idle_thrt_restore(void) +{ + pci_write_config_dword(fbd_dev, DIMM_MC, i7300_idle_mc_saved); + pci_write_config_byte(fbd_dev, DIMM_THRTLOW, i7300_idle_thrtlow_saved); + pci_write_config_byte(fbd_dev, DIMM_THRTCTL, i7300_idle_thrtctl_saved); +} + +/* Enable DIMM thermal throttling */ +static void i7300_idle_start(void) +{ + u8 new_ctl; + u8 limit; + + new_ctl = i7300_idle_thrtctl_saved & ~DIMM_THRTCTL_THRMHUNT; + pci_write_config_byte(fbd_dev, DIMM_THRTCTL, new_ctl); + + limit = i7300_idle_thrtlowlm; + if (unlikely(limit > MAX_THRTLWLIMIT)) + limit = MAX_THRTLWLIMIT; + + pci_write_config_byte(fbd_dev, DIMM_THRTLOW, limit); + + new_ctl = i7300_idle_thrtctl_saved | DIMM_THRTCTL_THRMHUNT; + pci_write_config_byte(fbd_dev, DIMM_THRTCTL, new_ctl); +} + +/* Disable DIMM thermal throttling */ +static void i7300_idle_stop(void) +{ + u8 new_ctl; + u8 got_ctl; + + new_ctl = i7300_idle_thrtctl_saved & ~DIMM_THRTCTL_THRMHUNT; + pci_write_config_byte(fbd_dev, DIMM_THRTCTL, new_ctl); + + pci_write_config_byte(fbd_dev, DIMM_THRTLOW, i7300_idle_thrtlow_saved); + pci_write_config_byte(fbd_dev, DIMM_THRTCTL, i7300_idle_thrtctl_saved); + pci_read_config_byte(fbd_dev, DIMM_THRTCTL, &got_ctl); + WARN_ON_ONCE(got_ctl != i7300_idle_thrtctl_saved); +} + + +/* + * i7300_avg_duration_check() + * return 0 if the decaying average of recent idle durations is + * more than DURATION_THRESHOLD_US + */ +static int i7300_avg_duration_check(void) +{ + if (avg_idle_us >= DURATION_THRESHOLD_US) + return 0; + +#ifdef DEBUG + past_skip++; +#endif + return 1; +} + +/* Idle notifier to look at idle CPUs */ +static int i7300_idle_notifier(struct notifier_block *nb, unsigned long val, + void *data) +{ + unsigned long flags; + ktime_t now_ktime; + static ktime_t idle_begin_time; + static int time_init = 1; + + if (!i7300_idle_thrtlowlm) + return 0; + + if (unlikely(time_init)) { + time_init = 0; + idle_begin_time = ktime_get(); + } + + spin_lock_irqsave(&i7300_idle_lock, flags); + if (val == IDLE_START) { + + cpu_set(smp_processor_id(), idle_cpumask); + + if (cpus_weight(idle_cpumask) != num_online_cpus()) + goto end; + + now_ktime = ktime_get(); + idle_begin_time = now_ktime; + + if (i7300_avg_duration_check()) + goto end; + + i7300_idle_active = 1; + total_starts++; + start_ktime = now_ktime; + + i7300_idle_start(); + i7300_idle_ioat_start(); + + } else if (val == IDLE_END) { + cpu_clear(smp_processor_id(), idle_cpumask); + if (cpus_weight(idle_cpumask) == (num_online_cpus() - 1)) { + /* First CPU coming out of idle */ + u64 idle_duration_us; + + now_ktime = ktime_get(); + + idle_duration_us = ktime_to_us(ktime_sub + (now_ktime, idle_begin_time)); + + avg_idle_us = + ((100 - DURATION_WEIGHT_PCT) * avg_idle_us + + DURATION_WEIGHT_PCT * idle_duration_us) / 100; + + if (i7300_idle_active) { + ktime_t idle_ktime; + + idle_ktime = ktime_sub(now_ktime, start_ktime); + total_us += ktime_to_us(idle_ktime); + + i7300_idle_ioat_stop(); + i7300_idle_stop(); + i7300_idle_active = 0; + } + } + } +end: + spin_unlock_irqrestore(&i7300_idle_lock, flags); + return 0; +} + +static struct notifier_block i7300_idle_nb = { + .notifier_call = i7300_idle_notifier, +}; + +/* + * I/O AT controls (PCI bus 0 device 8 function 0) + * DIMM controls (PCI bus 0 device 16 function 1) + */ +#define IOAT_BUS 0 +#define IOAT_DEVFN PCI_DEVFN(8, 0) +#define MEMCTL_BUS 0 +#define MEMCTL_DEVFN PCI_DEVFN(16, 1) + +struct fbd_ioat { + unsigned int vendor; + unsigned int ioat_dev; +}; + +/* + * The i5000 chip-set has the same hooks as the i7300 + * but support is disabled by default because this driver + * has not been validated on that platform. + */ +#define SUPPORT_I5000 0 + +static const struct fbd_ioat fbd_ioat_list[] = { + {PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_CNB}, +#if SUPPORT_I5000 + {PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT}, +#endif + {0, 0} +}; + +/* table of devices that work with this driver */ +static const struct pci_device_id pci_tbl[] = { + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_FBD_CNB) }, +#if SUPPORT_I5000 + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_5000_ERR) }, +#endif + { } /* Terminating entry */ +}; + +MODULE_DEVICE_TABLE(pci, pci_tbl); + +/* Check for known platforms with I/O-AT */ +static int __init i7300_idle_platform_probe(void) +{ + int i; + + fbd_dev = pci_get_bus_and_slot(MEMCTL_BUS, MEMCTL_DEVFN); + if (!fbd_dev) + return -ENODEV; + + for (i = 0; pci_tbl[i].vendor != 0; i++) { + if (fbd_dev->vendor == pci_tbl[i].vendor && + fbd_dev->device == pci_tbl[i].device) { + break; + } + } + if (pci_tbl[i].vendor == 0) + return -ENODEV; + + ioat_dev = pci_get_bus_and_slot(IOAT_BUS, IOAT_DEVFN); + if (!ioat_dev) + return -ENODEV; + + for (i = 0; fbd_ioat_list[i].vendor != 0; i++) { + if (ioat_dev->vendor == fbd_ioat_list[i].vendor && + ioat_dev->device == fbd_ioat_list[i].ioat_dev) { + return 0; + } + } + return -ENODEV; +} + +int stats_open_generic(struct inode *inode, struct file *fp) +{ + fp->private_data = inode->i_private; + return 0; +} + +static ssize_t stats_read_ul(struct file *fp, char __user *ubuf, size_t count, + loff_t *off) +{ + unsigned long *p = fp->private_data; + char buf[32]; + int len; + + len = snprintf(buf, 32, "%lu\n", *p); + return simple_read_from_buffer(ubuf, count, off, buf, len); +} + +static const struct file_operations idle_fops = { + .open = stats_open_generic, + .read = stats_read_ul, +}; + +struct debugfs_file_info { + void *ptr; + char name[32]; + struct dentry *file; +} debugfs_file_list[] = { + {&total_starts, "total_starts", NULL}, + {&total_us, "total_us", NULL}, +#ifdef DEBUG + {&past_skip, "past_skip", NULL}, +#endif + {NULL, "", NULL} + }; + +static int __init i7300_idle_init(void) +{ + spin_lock_init(&i7300_idle_lock); + cpus_clear(idle_cpumask); + total_us = 0; + + if (i7300_idle_platform_probe()) + return -ENODEV; + + if (i7300_idle_thrt_save()) + return -ENODEV; + + if (i7300_idle_ioat_init()) + return -ENODEV; + + debugfs_dir = debugfs_create_dir("i7300_idle", NULL); + if (debugfs_dir) { + int i = 0; + + while (debugfs_file_list[i].ptr != NULL) { + debugfs_file_list[i].file = debugfs_create_file( + debugfs_file_list[i].name, + S_IRUSR, + debugfs_dir, + debugfs_file_list[i].ptr, + &idle_fops); + i++; + } + } + + idle_notifier_register(&i7300_idle_nb); + + printk(KERN_INFO "i7300_idle: loaded v%s\n", I7300_IDLE_DRIVER_VERSION); + return 0; +} + +static void __exit i7300_idle_exit(void) +{ + idle_notifier_unregister(&i7300_idle_nb); + + if (debugfs_dir) { + int i = 0; + + while (debugfs_file_list[i].file != NULL) { + debugfs_remove(debugfs_file_list[i].file); + i++; + } + + debugfs_remove(debugfs_dir); + } + i7300_idle_thrt_restore(); + i7300_idle_ioat_exit(); +} + +module_init(i7300_idle_init); +module_exit(i7300_idle_exit); + +MODULE_AUTHOR("Andy Henroid "); +MODULE_DESCRIPTION("Intel Chipset DIMM Idle Power Saving Driver v" + I7300_IDLE_DRIVER_VERSION); +MODULE_LICENSE("GPL"); diff --git a/include/asm-x86/idle.h b/include/asm-x86/idle.h index cbb64912361..54ce018d4b6 100644 --- a/include/asm-x86/idle.h +++ b/include/asm-x86/idle.h @@ -6,6 +6,7 @@ struct notifier_block; void idle_notifier_register(struct notifier_block *n); +void idle_notifier_unregister(struct notifier_block *n); void enter_idle(void); void exit_idle(void); diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index f1624b39675..efb786d11f2 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2422,6 +2422,7 @@ #define PCI_DEVICE_ID_INTEL_MCH_PC1 0x359a #define PCI_DEVICE_ID_INTEL_E7525_MCH 0x359e #define PCI_DEVICE_ID_INTEL_IOAT_CNB 0x360b +#define PCI_DEVICE_ID_INTEL_FBD_CNB 0x360c #define PCI_DEVICE_ID_INTEL_ICH10_0 0x3a14 #define PCI_DEVICE_ID_INTEL_ICH10_1 0x3a16 #define PCI_DEVICE_ID_INTEL_ICH10_2 0x3a18 -- cgit v1.2.3-70-g09d2 From 319edafef64406c971035c56bd68480e5a82b581 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 20 Oct 2008 18:15:30 +0100 Subject: smc911x: Add IRQ polarity configuration Platforms like ARM Ltd's RealView require the IRQ polarity bit to be set for the SMC9118 chip. This patch allows the dynamic configuration via the smc911x_platdata structure. This patch also changes the smc91x_platdata structure name to the correct smc911x_platdata in the smc911x_drv_probe() function. Signed-off-by: Catalin Marinas Signed-off-by: Jeff Garzik --- drivers/net/smc911x.c | 11 ++++++++--- include/linux/smc911x.h | 1 + 2 files changed, 9 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/smc911x.c b/drivers/net/smc911x.c index ec32b5d89c9..2c78229ad04 100644 --- a/drivers/net/smc911x.c +++ b/drivers/net/smc911x.c @@ -180,7 +180,7 @@ static void PRINT_PKT(u_char *buf, int length) static void smc911x_reset(struct net_device *dev) { struct smc911x_local *lp = netdev_priv(dev); - unsigned int reg, timeout=0, resets=1; + unsigned int reg, timeout=0, resets=1, irq_cfg; unsigned long flags; DBG(SMC_DEBUG_FUNC, "%s: --> %s\n", dev->name, __func__); @@ -252,7 +252,12 @@ static void smc911x_reset(struct net_device *dev) * Deassert IRQ for 1*10us for edge type interrupts * and drive IRQ pin push-pull */ - SMC_SET_IRQ_CFG(lp, (1 << 24) | INT_CFG_IRQ_EN_ | INT_CFG_IRQ_TYPE_); + irq_cfg = (1 << 24) | INT_CFG_IRQ_EN_ | INT_CFG_IRQ_TYPE_; +#ifdef SMC_DYNAMIC_BUS_CONFIG + if (lp->cfg.irq_polarity) + irq_cfg |= INT_CFG_IRQ_POL_; +#endif + SMC_SET_IRQ_CFG(lp, irq_cfg); /* clear anything saved */ if (lp->pending_tx_skb != NULL) { @@ -2054,7 +2059,7 @@ err_out: */ static int smc911x_drv_probe(struct platform_device *pdev) { - struct smc91x_platdata *pd = pdev->dev.platform_data; + struct smc911x_platdata *pd = pdev->dev.platform_data; struct net_device *ndev; struct resource *res; struct smc911x_local *lp; diff --git a/include/linux/smc911x.h b/include/linux/smc911x.h index b58f54c2418..521f37143fa 100644 --- a/include/linux/smc911x.h +++ b/include/linux/smc911x.h @@ -7,6 +7,7 @@ struct smc911x_platdata { unsigned long flags; unsigned long irq_flags; /* IRQF_... */ + int irq_polarity; }; #endif /* __SMC911X_H__ */ -- cgit v1.2.3-70-g09d2 From 93fc9e1bb6507dde945c2eab68c93e1066ac3691 Mon Sep 17 00:00:00 2001 From: Yevgeny Petrilin Date: Wed, 22 Oct 2008 10:25:29 -0700 Subject: mlx4_core: Support multiple pre-reserved QP regions For ethernet support, we need to reserve QPs for the ethernet and fibre channel driver. The QPs are reserved at the end of the QP table. (This way we assure that they are aligned to their size) We need to consider these reserved ranges in bitmap creation, so we extend the mlx4 bitmap utility functions to allow reserved ranges at both the bottom and the top of the range. Signed-off-by: Yevgeny Petrilin Signed-off-by: Roland Dreier --- drivers/net/mlx4/alloc.c | 29 +++++++++++++-------- drivers/net/mlx4/cq.c | 2 +- drivers/net/mlx4/eq.c | 2 +- drivers/net/mlx4/fw.c | 5 ++++ drivers/net/mlx4/fw.h | 2 ++ drivers/net/mlx4/main.c | 62 ++++++++++++++++++++++++++++++++++++++++----- drivers/net/mlx4/mcg.c | 4 +-- drivers/net/mlx4/mlx4.h | 4 ++- drivers/net/mlx4/mr.c | 2 +- drivers/net/mlx4/pd.c | 4 +-- drivers/net/mlx4/qp.c | 36 ++++++++++++++++++++++++-- drivers/net/mlx4/srq.c | 2 +- include/linux/mlx4/device.h | 19 +++++++++++++- 13 files changed, 144 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/mlx4/alloc.c b/drivers/net/mlx4/alloc.c index e6c0d5bb5dc..e2bc7ecf162 100644 --- a/drivers/net/mlx4/alloc.c +++ b/drivers/net/mlx4/alloc.c @@ -47,13 +47,16 @@ u32 mlx4_bitmap_alloc(struct mlx4_bitmap *bitmap) obj = find_next_zero_bit(bitmap->table, bitmap->max, bitmap->last); if (obj >= bitmap->max) { - bitmap->top = (bitmap->top + bitmap->max) & bitmap->mask; + bitmap->top = (bitmap->top + bitmap->max + bitmap->reserved_top) + & bitmap->mask; obj = find_first_zero_bit(bitmap->table, bitmap->max); } if (obj < bitmap->max) { set_bit(obj, bitmap->table); - bitmap->last = (obj + 1) & (bitmap->max - 1); + bitmap->last = (obj + 1); + if (bitmap->last == bitmap->max) + bitmap->last = 0; obj |= bitmap->top; } else obj = -1; @@ -109,9 +112,9 @@ u32 mlx4_bitmap_alloc_range(struct mlx4_bitmap *bitmap, int cnt, int align) obj = find_aligned_range(bitmap->table, bitmap->last, bitmap->max, cnt, align); if (obj >= bitmap->max) { - bitmap->top = (bitmap->top + bitmap->max) & bitmap->mask; - obj = find_aligned_range(bitmap->table, 0, - bitmap->max, + bitmap->top = (bitmap->top + bitmap->max + bitmap->reserved_top) + & bitmap->mask; + obj = find_aligned_range(bitmap->table, 0, bitmap->max, cnt, align); } @@ -136,17 +139,19 @@ void mlx4_bitmap_free_range(struct mlx4_bitmap *bitmap, u32 obj, int cnt) { u32 i; - obj &= bitmap->max - 1; + obj &= bitmap->max + bitmap->reserved_top - 1; spin_lock(&bitmap->lock); for (i = 0; i < cnt; i++) clear_bit(obj + i, bitmap->table); bitmap->last = min(bitmap->last, obj); - bitmap->top = (bitmap->top + bitmap->max) & bitmap->mask; + bitmap->top = (bitmap->top + bitmap->max + bitmap->reserved_top) + & bitmap->mask; spin_unlock(&bitmap->lock); } -int mlx4_bitmap_init(struct mlx4_bitmap *bitmap, u32 num, u32 mask, u32 reserved) +int mlx4_bitmap_init(struct mlx4_bitmap *bitmap, u32 num, u32 mask, + u32 reserved_bot, u32 reserved_top) { int i; @@ -156,14 +161,16 @@ int mlx4_bitmap_init(struct mlx4_bitmap *bitmap, u32 num, u32 mask, u32 reserved bitmap->last = 0; bitmap->top = 0; - bitmap->max = num; + bitmap->max = num - reserved_top; bitmap->mask = mask; + bitmap->reserved_top = reserved_top; spin_lock_init(&bitmap->lock); - bitmap->table = kzalloc(BITS_TO_LONGS(num) * sizeof (long), GFP_KERNEL); + bitmap->table = kzalloc(BITS_TO_LONGS(bitmap->max) * + sizeof (long), GFP_KERNEL); if (!bitmap->table) return -ENOMEM; - for (i = 0; i < reserved; ++i) + for (i = 0; i < reserved_bot; ++i) set_bit(i, bitmap->table); return 0; diff --git a/drivers/net/mlx4/cq.c b/drivers/net/mlx4/cq.c index 9bb50e3f897..b7ad2829d67 100644 --- a/drivers/net/mlx4/cq.c +++ b/drivers/net/mlx4/cq.c @@ -300,7 +300,7 @@ int mlx4_init_cq_table(struct mlx4_dev *dev) INIT_RADIX_TREE(&cq_table->tree, GFP_ATOMIC); err = mlx4_bitmap_init(&cq_table->bitmap, dev->caps.num_cqs, - dev->caps.num_cqs - 1, dev->caps.reserved_cqs); + dev->caps.num_cqs - 1, dev->caps.reserved_cqs, 0); if (err) return err; diff --git a/drivers/net/mlx4/eq.c b/drivers/net/mlx4/eq.c index 8a8b56135a5..de169338cd9 100644 --- a/drivers/net/mlx4/eq.c +++ b/drivers/net/mlx4/eq.c @@ -558,7 +558,7 @@ int mlx4_init_eq_table(struct mlx4_dev *dev) int i; err = mlx4_bitmap_init(&priv->eq_table.bitmap, dev->caps.num_eqs, - dev->caps.num_eqs - 1, dev->caps.reserved_eqs); + dev->caps.num_eqs - 1, dev->caps.reserved_eqs, 0); if (err) return err; diff --git a/drivers/net/mlx4/fw.c b/drivers/net/mlx4/fw.c index 7e32955da98..40d8142c23b 100644 --- a/drivers/net/mlx4/fw.c +++ b/drivers/net/mlx4/fw.c @@ -357,6 +357,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) #define QUERY_PORT_MTU_OFFSET 0x01 #define QUERY_PORT_WIDTH_OFFSET 0x06 #define QUERY_PORT_MAX_GID_PKEY_OFFSET 0x07 +#define QUERY_PORT_MAX_MACVLAN_OFFSET 0x0a #define QUERY_PORT_MAX_VL_OFFSET 0x0b for (i = 1; i <= dev_cap->num_ports; ++i) { @@ -374,6 +375,10 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev_cap->max_pkeys[i] = 1 << (field & 0xf); MLX4_GET(field, outbox, QUERY_PORT_MAX_VL_OFFSET); dev_cap->max_vl[i] = field & 0xf; + MLX4_GET(field, outbox, QUERY_PORT_MAX_MACVLAN_OFFSET); + dev_cap->log_max_macs[i] = field & 0xf; + dev_cap->log_max_vlans[i] = field >> 4; + } } diff --git a/drivers/net/mlx4/fw.h b/drivers/net/mlx4/fw.h index decbb5c2ad4..c34e726d66e 100644 --- a/drivers/net/mlx4/fw.h +++ b/drivers/net/mlx4/fw.h @@ -102,6 +102,8 @@ struct mlx4_dev_cap { u32 reserved_lkey; u64 max_icm_sz; int max_gso_sz; + u8 log_max_macs[MLX4_MAX_PORTS + 1]; + u8 log_max_vlans[MLX4_MAX_PORTS + 1]; }; struct mlx4_adapter { diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c index 1252a919de2..560e1962212 100644 --- a/drivers/net/mlx4/main.c +++ b/drivers/net/mlx4/main.c @@ -85,6 +85,19 @@ static struct mlx4_profile default_profile = { .num_mtt = 1 << 20, }; +static int log_num_mac = 2; +module_param_named(log_num_mac, log_num_mac, int, 0444); +MODULE_PARM_DESC(log_num_mac, "Log2 max number of MACs per ETH port (1-7)"); + +static int log_num_vlan; +module_param_named(log_num_vlan, log_num_vlan, int, 0444); +MODULE_PARM_DESC(log_num_vlan, "Log2 max number of VLANs per ETH port (0-7)"); + +static int use_prio; +module_param_named(use_prio, use_prio, bool, 0444); +MODULE_PARM_DESC(use_prio, "Enable steering by VLAN priority on ETH ports " + "(0/1, default 0)"); + static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) { int err; @@ -134,7 +147,6 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev->caps.max_rq_sg = dev_cap->max_rq_sg; dev->caps.max_wqes = dev_cap->max_qp_sz; dev->caps.max_qp_init_rdma = dev_cap->max_requester_per_qp; - dev->caps.reserved_qps = dev_cap->reserved_qps; dev->caps.max_srq_wqes = dev_cap->max_srq_sz; dev->caps.max_srq_sge = dev_cap->max_rq_sg - 1; dev->caps.reserved_srqs = dev_cap->reserved_srqs; @@ -163,6 +175,39 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev->caps.stat_rate_support = dev_cap->stat_rate_support; dev->caps.max_gso_sz = dev_cap->max_gso_sz; + dev->caps.log_num_macs = log_num_mac; + dev->caps.log_num_vlans = log_num_vlan; + dev->caps.log_num_prios = use_prio ? 3 : 0; + + for (i = 1; i <= dev->caps.num_ports; ++i) { + if (dev->caps.log_num_macs > dev_cap->log_max_macs[i]) { + dev->caps.log_num_macs = dev_cap->log_max_macs[i]; + mlx4_warn(dev, "Requested number of MACs is too much " + "for port %d, reducing to %d.\n", + i, 1 << dev->caps.log_num_macs); + } + if (dev->caps.log_num_vlans > dev_cap->log_max_vlans[i]) { + dev->caps.log_num_vlans = dev_cap->log_max_vlans[i]; + mlx4_warn(dev, "Requested number of VLANs is too much " + "for port %d, reducing to %d.\n", + i, 1 << dev->caps.log_num_vlans); + } + } + + dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] = dev_cap->reserved_qps; + dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] = + dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] = + (1 << dev->caps.log_num_macs) * + (1 << dev->caps.log_num_vlans) * + (1 << dev->caps.log_num_prios) * + dev->caps.num_ports; + dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH] = MLX4_NUM_FEXCH; + + dev->caps.reserved_qps = dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] + + dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] + + dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] + + dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH]; + return 0; } @@ -211,7 +256,8 @@ static int mlx4_init_cmpt_table(struct mlx4_dev *dev, u64 cmpt_base, ((u64) (MLX4_CMPT_TYPE_QP * cmpt_entry_sz) << MLX4_CMPT_SHIFT), cmpt_entry_sz, dev->caps.num_qps, - dev->caps.reserved_qps, 0, 0); + dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], + 0, 0); if (err) goto err; @@ -336,7 +382,8 @@ static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap, init_hca->qpc_base, dev_cap->qpc_entry_sz, dev->caps.num_qps, - dev->caps.reserved_qps, 0, 0); + dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], + 0, 0); if (err) { mlx4_err(dev, "Failed to map QP context memory, aborting.\n"); goto err_unmap_dmpt; @@ -346,7 +393,8 @@ static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap, init_hca->auxc_base, dev_cap->aux_entry_sz, dev->caps.num_qps, - dev->caps.reserved_qps, 0, 0); + dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], + 0, 0); if (err) { mlx4_err(dev, "Failed to map AUXC context memory, aborting.\n"); goto err_unmap_qp; @@ -356,7 +404,8 @@ static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap, init_hca->altc_base, dev_cap->altc_entry_sz, dev->caps.num_qps, - dev->caps.reserved_qps, 0, 0); + dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], + 0, 0); if (err) { mlx4_err(dev, "Failed to map ALTC context memory, aborting.\n"); goto err_unmap_auxc; @@ -366,7 +415,8 @@ static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap, init_hca->rdmarc_base, dev_cap->rdmarc_entry_sz << priv->qp_table.rdmarc_shift, dev->caps.num_qps, - dev->caps.reserved_qps, 0, 0); + dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], + 0, 0); if (err) { mlx4_err(dev, "Failed to map RDMARC context memory, aborting\n"); goto err_unmap_altc; diff --git a/drivers/net/mlx4/mcg.c b/drivers/net/mlx4/mcg.c index c83f88ce073..592c01ae2c5 100644 --- a/drivers/net/mlx4/mcg.c +++ b/drivers/net/mlx4/mcg.c @@ -368,8 +368,8 @@ int mlx4_init_mcg_table(struct mlx4_dev *dev) struct mlx4_priv *priv = mlx4_priv(dev); int err; - err = mlx4_bitmap_init(&priv->mcg_table.bitmap, - dev->caps.num_amgms, dev->caps.num_amgms - 1, 0); + err = mlx4_bitmap_init(&priv->mcg_table.bitmap, dev->caps.num_amgms, + dev->caps.num_amgms - 1, 0, 0); if (err) return err; diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h index b55ddab73f6..9e2f44c3181 100644 --- a/drivers/net/mlx4/mlx4.h +++ b/drivers/net/mlx4/mlx4.h @@ -111,6 +111,7 @@ struct mlx4_bitmap { u32 last; u32 top; u32 max; + u32 reserved_top; u32 mask; spinlock_t lock; unsigned long *table; @@ -290,7 +291,8 @@ u32 mlx4_bitmap_alloc(struct mlx4_bitmap *bitmap); void mlx4_bitmap_free(struct mlx4_bitmap *bitmap, u32 obj); u32 mlx4_bitmap_alloc_range(struct mlx4_bitmap *bitmap, int cnt, int align); void mlx4_bitmap_free_range(struct mlx4_bitmap *bitmap, u32 obj, int cnt); -int mlx4_bitmap_init(struct mlx4_bitmap *bitmap, u32 num, u32 mask, u32 reserved); +int mlx4_bitmap_init(struct mlx4_bitmap *bitmap, u32 num, u32 mask, + u32 reserved_bot, u32 resetrved_top); void mlx4_bitmap_cleanup(struct mlx4_bitmap *bitmap); int mlx4_reset(struct mlx4_dev *dev); diff --git a/drivers/net/mlx4/mr.c b/drivers/net/mlx4/mr.c index d1dd5b48dbd..0caf74cae8b 100644 --- a/drivers/net/mlx4/mr.c +++ b/drivers/net/mlx4/mr.c @@ -461,7 +461,7 @@ int mlx4_init_mr_table(struct mlx4_dev *dev) int err; err = mlx4_bitmap_init(&mr_table->mpt_bitmap, dev->caps.num_mpts, - ~0, dev->caps.reserved_mrws); + ~0, dev->caps.reserved_mrws, 0); if (err) return err; diff --git a/drivers/net/mlx4/pd.c b/drivers/net/mlx4/pd.c index aa616892d09..26d1a7a9e37 100644 --- a/drivers/net/mlx4/pd.c +++ b/drivers/net/mlx4/pd.c @@ -62,7 +62,7 @@ int mlx4_init_pd_table(struct mlx4_dev *dev) struct mlx4_priv *priv = mlx4_priv(dev); return mlx4_bitmap_init(&priv->pd_bitmap, dev->caps.num_pds, - (1 << 24) - 1, dev->caps.reserved_pds); + (1 << 24) - 1, dev->caps.reserved_pds, 0); } void mlx4_cleanup_pd_table(struct mlx4_dev *dev) @@ -100,7 +100,7 @@ int mlx4_init_uar_table(struct mlx4_dev *dev) return mlx4_bitmap_init(&mlx4_priv(dev)->uar_table.bitmap, dev->caps.num_uars, dev->caps.num_uars - 1, - max(128, dev->caps.reserved_uars)); + max(128, dev->caps.reserved_uars), 0); } void mlx4_cleanup_uar_table(struct mlx4_dev *dev) diff --git a/drivers/net/mlx4/qp.c b/drivers/net/mlx4/qp.c index 98e0c40ba36..1c565ef8d17 100644 --- a/drivers/net/mlx4/qp.c +++ b/drivers/net/mlx4/qp.c @@ -272,6 +272,7 @@ int mlx4_init_qp_table(struct mlx4_dev *dev) { struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table; int err; + int reserved_from_top = 0; spin_lock_init(&qp_table->lock); INIT_RADIX_TREE(&dev->qp_table_tree, GFP_ATOMIC); @@ -281,9 +282,40 @@ int mlx4_init_qp_table(struct mlx4_dev *dev) * block of special QPs must be aligned to a multiple of 8, so * round up. */ - dev->caps.sqp_start = ALIGN(dev->caps.reserved_qps, 8); + dev->caps.sqp_start = + ALIGN(dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 8); + + { + int sort[MLX4_NUM_QP_REGION]; + int i, j, tmp; + int last_base = dev->caps.num_qps; + + for (i = 1; i < MLX4_NUM_QP_REGION; ++i) + sort[i] = i; + + for (i = MLX4_NUM_QP_REGION; i > 0; --i) { + for (j = 2; j < i; ++j) { + if (dev->caps.reserved_qps_cnt[sort[j]] > + dev->caps.reserved_qps_cnt[sort[j - 1]]) { + tmp = sort[j]; + sort[j] = sort[j - 1]; + sort[j - 1] = tmp; + } + } + } + + for (i = 1; i < MLX4_NUM_QP_REGION; ++i) { + last_base -= dev->caps.reserved_qps_cnt[sort[i]]; + dev->caps.reserved_qps_base[sort[i]] = last_base; + reserved_from_top += + dev->caps.reserved_qps_cnt[sort[i]]; + } + + } + err = mlx4_bitmap_init(&qp_table->bitmap, dev->caps.num_qps, - (1 << 24) - 1, dev->caps.sqp_start + 8); + (1 << 23) - 1, dev->caps.sqp_start + 8, + reserved_from_top); if (err) return err; diff --git a/drivers/net/mlx4/srq.c b/drivers/net/mlx4/srq.c index 533eb6db24b..fe9f218691f 100644 --- a/drivers/net/mlx4/srq.c +++ b/drivers/net/mlx4/srq.c @@ -245,7 +245,7 @@ int mlx4_init_srq_table(struct mlx4_dev *dev) INIT_RADIX_TREE(&srq_table->tree, GFP_ATOMIC); err = mlx4_bitmap_init(&srq_table->bitmap, dev->caps.num_srqs, - dev->caps.num_srqs - 1, dev->caps.reserved_srqs); + dev->caps.num_srqs - 1, dev->caps.reserved_srqs, 0); if (err) return err; diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index d21e879f3c9..693f93cd29e 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -145,6 +145,18 @@ enum { MLX4_MTT_FLAG_PRESENT = 1 }; +enum mlx4_qp_region { + MLX4_QP_REGION_FW = 0, + MLX4_QP_REGION_ETH_ADDR, + MLX4_QP_REGION_FC_ADDR, + MLX4_QP_REGION_FC_EXCH, + MLX4_NUM_QP_REGION +}; + +enum { + MLX4_NUM_FEXCH = 64 * 1024, +}; + static inline u64 mlx4_fw_ver(u64 major, u64 minor, u64 subminor) { return (major << 32) | (minor << 16) | subminor; @@ -169,7 +181,6 @@ struct mlx4_caps { int max_rq_desc_sz; int max_qp_init_rdma; int max_qp_dest_rdma; - int reserved_qps; int sqp_start; int num_srqs; int max_srq_wqes; @@ -201,6 +212,12 @@ struct mlx4_caps { u16 stat_rate_support; u8 port_width_cap[MLX4_MAX_PORTS + 1]; int max_gso_sz; + int reserved_qps_cnt[MLX4_NUM_QP_REGION]; + int reserved_qps; + int reserved_qps_base[MLX4_NUM_QP_REGION]; + int log_num_macs; + int log_num_vlans; + int log_num_prios; }; struct mlx4_buf_list { -- cgit v1.2.3-70-g09d2 From b79acb49de6c2ab9ff0245f0f2b573d48b9a2d93 Mon Sep 17 00:00:00 2001 From: Yevgeny Petrilin Date: Wed, 22 Oct 2008 10:56:48 -0700 Subject: mlx4_core: Get ethernet MTU and default address from firmware Get maximum ethernet MTU and default MAC address from the firmware QUERY_DEV_CAP command. Signed-off-by: Yevgeny Petrilin Signed-off-by: Roland Dreier --- drivers/net/mlx4/fw.c | 13 ++++++++----- drivers/net/mlx4/fw.h | 4 +++- drivers/net/mlx4/main.c | 4 +++- include/linux/mlx4/device.h | 4 +++- 4 files changed, 17 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/mlx4/fw.c b/drivers/net/mlx4/fw.c index 40d8142c23b..8d402db9a03 100644 --- a/drivers/net/mlx4/fw.c +++ b/drivers/net/mlx4/fw.c @@ -346,7 +346,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) MLX4_GET(field, outbox, QUERY_DEV_CAP_VL_PORT_OFFSET); dev_cap->max_vl[i] = field >> 4; MLX4_GET(field, outbox, QUERY_DEV_CAP_MTU_WIDTH_OFFSET); - dev_cap->max_mtu[i] = field >> 4; + dev_cap->ib_mtu[i] = field >> 4; dev_cap->max_port_width[i] = field & 0xf; MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_GID_OFFSET); dev_cap->max_gids[i] = 1 << (field & 0xf); @@ -355,8 +355,10 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) } } else { #define QUERY_PORT_MTU_OFFSET 0x01 +#define QUERY_PORT_ETH_MTU_OFFSET 0x02 #define QUERY_PORT_WIDTH_OFFSET 0x06 #define QUERY_PORT_MAX_GID_PKEY_OFFSET 0x07 +#define QUERY_PORT_MAC_OFFSET 0x08 #define QUERY_PORT_MAX_MACVLAN_OFFSET 0x0a #define QUERY_PORT_MAX_VL_OFFSET 0x0b @@ -367,7 +369,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) goto out; MLX4_GET(field, outbox, QUERY_PORT_MTU_OFFSET); - dev_cap->max_mtu[i] = field & 0xf; + dev_cap->ib_mtu[i] = field & 0xf; MLX4_GET(field, outbox, QUERY_PORT_WIDTH_OFFSET); dev_cap->max_port_width[i] = field & 0xf; MLX4_GET(field, outbox, QUERY_PORT_MAX_GID_PKEY_OFFSET); @@ -378,7 +380,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) MLX4_GET(field, outbox, QUERY_PORT_MAX_MACVLAN_OFFSET); dev_cap->log_max_macs[i] = field & 0xf; dev_cap->log_max_vlans[i] = field >> 4; - + MLX4_GET(dev_cap->eth_mtu[i], outbox, QUERY_PORT_ETH_MTU_OFFSET); + MLX4_GET(dev_cap->def_mac[i], outbox, QUERY_PORT_MAC_OFFSET); } } @@ -412,7 +415,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) mlx4_dbg(dev, "Max CQEs: %d, max WQEs: %d, max SRQ WQEs: %d\n", dev_cap->max_cq_sz, dev_cap->max_qp_sz, dev_cap->max_srq_sz); mlx4_dbg(dev, "Local CA ACK delay: %d, max MTU: %d, port width cap: %d\n", - dev_cap->local_ca_ack_delay, 128 << dev_cap->max_mtu[1], + dev_cap->local_ca_ack_delay, 128 << dev_cap->ib_mtu[1], dev_cap->max_port_width[1]); mlx4_dbg(dev, "Max SQ desc size: %d, max SQ S/G: %d\n", dev_cap->max_sq_desc_sz, dev_cap->max_sq_sg); @@ -824,7 +827,7 @@ int mlx4_INIT_PORT(struct mlx4_dev *dev, int port) flags |= (dev->caps.port_width_cap[port] & 0xf) << INIT_PORT_PORT_WIDTH_SHIFT; MLX4_PUT(inbox, flags, INIT_PORT_FLAGS_OFFSET); - field = 128 << dev->caps.mtu_cap[port]; + field = 128 << dev->caps.ib_mtu_cap[port]; MLX4_PUT(inbox, field, INIT_PORT_MTU_OFFSET); field = dev->caps.gid_table_len[port]; MLX4_PUT(inbox, field, INIT_PORT_MAX_GID_OFFSET); diff --git a/drivers/net/mlx4/fw.h b/drivers/net/mlx4/fw.h index c34e726d66e..d0913d4d262 100644 --- a/drivers/net/mlx4/fw.h +++ b/drivers/net/mlx4/fw.h @@ -66,11 +66,13 @@ struct mlx4_dev_cap { int local_ca_ack_delay; int num_ports; u32 max_msg_sz; - int max_mtu[MLX4_MAX_PORTS + 1]; + int ib_mtu[MLX4_MAX_PORTS + 1]; int max_port_width[MLX4_MAX_PORTS + 1]; int max_vl[MLX4_MAX_PORTS + 1]; int max_gids[MLX4_MAX_PORTS + 1]; int max_pkeys[MLX4_MAX_PORTS + 1]; + u64 def_mac[MLX4_MAX_PORTS + 1]; + u16 eth_mtu[MLX4_MAX_PORTS + 1]; u16 stat_rate_support; u32 flags; int reserved_uars; diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c index 560e1962212..28f36b88de3 100644 --- a/drivers/net/mlx4/main.c +++ b/drivers/net/mlx4/main.c @@ -133,10 +133,12 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev->caps.num_ports = dev_cap->num_ports; for (i = 1; i <= dev->caps.num_ports; ++i) { dev->caps.vl_cap[i] = dev_cap->max_vl[i]; - dev->caps.mtu_cap[i] = dev_cap->max_mtu[i]; + dev->caps.ib_mtu_cap[i] = dev_cap->ib_mtu[i]; dev->caps.gid_table_len[i] = dev_cap->max_gids[i]; dev->caps.pkey_table_len[i] = dev_cap->max_pkeys[i]; dev->caps.port_width_cap[i] = dev_cap->max_port_width[i]; + dev->caps.eth_mtu_cap[i] = dev_cap->eth_mtu[i]; + dev->caps.def_mac[i] = dev_cap->def_mac[i]; } dev->caps.num_uars = dev_cap->uar_size / PAGE_SIZE; diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 693f93cd29e..f9e73cfc540 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -166,7 +166,9 @@ struct mlx4_caps { u64 fw_ver; int num_ports; int vl_cap[MLX4_MAX_PORTS + 1]; - int mtu_cap[MLX4_MAX_PORTS + 1]; + int ib_mtu_cap[MLX4_MAX_PORTS + 1]; + u64 def_mac[MLX4_MAX_PORTS + 1]; + int eth_mtu_cap[MLX4_MAX_PORTS + 1]; int gid_table_len[MLX4_MAX_PORTS + 1]; int pkey_table_len[MLX4_MAX_PORTS + 1]; int local_ca_ack_delay; -- cgit v1.2.3-70-g09d2 From 08e5338d119daeb3c7746fa80fa916b8d3d48e89 Mon Sep 17 00:00:00 2001 From: David Miller Date: Wed, 22 Oct 2008 20:21:29 +0200 Subject: i2c-algo-pcf: Pass adapter data into ->waitforpin() method Pass adapter data into ->waitforpin() method. Signed-off-by: David S. Miller Signed-off-by: Jean Delvare --- drivers/i2c/algos/i2c-algo-pcf.c | 2 +- drivers/i2c/busses/i2c-elektor.c | 3 ++- include/linux/i2c-algo-pcf.h | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/i2c/algos/i2c-algo-pcf.c b/drivers/i2c/algos/i2c-algo-pcf.c index 1e328d19cd6..a8a5b6d1dd8 100644 --- a/drivers/i2c/algos/i2c-algo-pcf.c +++ b/drivers/i2c/algos/i2c-algo-pcf.c @@ -135,7 +135,7 @@ static int wait_for_pin(struct i2c_algo_pcf_data *adap, int *status) { *status = get_pcf(adap, 1); #ifndef STUB_I2C while (timeout-- && (*status & I2C_PCF_PIN)) { - adap->waitforpin(); + adap->waitforpin(adap->data); *status = get_pcf(adap, 1); } if (*status & I2C_PCF_LAB) { diff --git a/drivers/i2c/busses/i2c-elektor.c b/drivers/i2c/busses/i2c-elektor.c index 7f38c01fb3a..0ed3ccb81b6 100644 --- a/drivers/i2c/busses/i2c-elektor.c +++ b/drivers/i2c/busses/i2c-elektor.c @@ -104,7 +104,8 @@ static int pcf_isa_getclock(void *data) return (clock); } -static void pcf_isa_waitforpin(void) { +static void pcf_isa_waitforpin(void *data) +{ DEFINE_WAIT(wait); int timeout = 2; unsigned long flags; diff --git a/include/linux/i2c-algo-pcf.h b/include/linux/i2c-algo-pcf.h index 0177d280f73..5de8a319bf1 100644 --- a/include/linux/i2c-algo-pcf.h +++ b/include/linux/i2c-algo-pcf.h @@ -31,7 +31,7 @@ struct i2c_algo_pcf_data { int (*getpcf) (void *data, int ctl); int (*getown) (void *data); int (*getclock) (void *data); - void (*waitforpin) (void); + void (*waitforpin) (void *data); /* Multi-master lost arbitration back-off delay (msecs) * This should be set by the bus adapter or knowledgable client -- cgit v1.2.3-70-g09d2 From 30091404af5a7cd515e7b565df76932e295d8f6f Mon Sep 17 00:00:00 2001 From: David Miller Date: Wed, 22 Oct 2008 20:21:30 +0200 Subject: i2c-algo-pcf: Add adapter hooks around xfer begin and end Some I2C bus implementations need to synchronize with external entities, such as system firmware, which might also be programming the same I2C bus. In order to facilitate this add ->xfer_begin() and ->xfer_end() hooks which are invoked around pcf_xfer(). [JD: Make these hooks optional.] Signed-off-by: David S. Miller Signed-off-by: Jean Delvare --- drivers/i2c/algos/i2c-algo-pcf.c | 17 +++++++++++++---- include/linux/i2c-algo-pcf.h | 3 +++ 2 files changed, 16 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/i2c/algos/i2c-algo-pcf.c b/drivers/i2c/algos/i2c-algo-pcf.c index a8a5b6d1dd8..b8a6f3bcbae 100644 --- a/drivers/i2c/algos/i2c-algo-pcf.c +++ b/drivers/i2c/algos/i2c-algo-pcf.c @@ -331,13 +331,16 @@ static int pcf_xfer(struct i2c_adapter *i2c_adap, int i; int ret=0, timeout, status; + if (adap->xfer_begin) + adap->xfer_begin(adap->data); /* Check for bus busy */ timeout = wait_for_bb(adap); if (timeout) { DEB2(printk(KERN_ERR "i2c-algo-pcf.o: " "Timeout waiting for BB in pcf_xfer\n");) - return -EIO; + i = -EIO; + goto out; } for (i = 0;ret >= 0 && i < num; i++) { @@ -359,12 +362,14 @@ static int pcf_xfer(struct i2c_adapter *i2c_adap, if (timeout) { if (timeout == -EINTR) { /* arbitration lost */ - return (-EINTR); + i = -EINTR; + goto out; } i2c_stop(adap); DEB2(printk(KERN_ERR "i2c-algo-pcf.o: Timeout waiting " "for PIN(1) in pcf_xfer\n");) - return (-EREMOTEIO); + i = -EREMOTEIO; + goto out; } #ifndef STUB_I2C @@ -372,7 +377,8 @@ static int pcf_xfer(struct i2c_adapter *i2c_adap, if (status & I2C_PCF_LRB) { i2c_stop(adap); DEB2(printk(KERN_ERR "i2c-algo-pcf.o: No LRB(1) in pcf_xfer\n");) - return (-EREMOTEIO); + i = -EREMOTEIO; + goto out; } #endif @@ -404,6 +410,9 @@ static int pcf_xfer(struct i2c_adapter *i2c_adap, } } +out: + if (adap->xfer_end) + adap->xfer_end(adap->data); return (i); } diff --git a/include/linux/i2c-algo-pcf.h b/include/linux/i2c-algo-pcf.h index 5de8a319bf1..0f91a957a69 100644 --- a/include/linux/i2c-algo-pcf.h +++ b/include/linux/i2c-algo-pcf.h @@ -33,6 +33,9 @@ struct i2c_algo_pcf_data { int (*getclock) (void *data); void (*waitforpin) (void *data); + void (*xfer_begin) (void *data); + void (*xfer_end) (void *data); + /* Multi-master lost arbitration back-off delay (msecs) * This should be set by the bus adapter or knowledgable client * if bus is multi-mastered, else zero -- cgit v1.2.3-70-g09d2 From 14f55f7a033f86a4e8f0310dd4d54b5464322e6e Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Wed, 22 Oct 2008 20:21:30 +0200 Subject: i2c: Make clear what the class field of i2c_adapter is good for Make clear what the class field of i2c_adapter is good for. Signed-off-by: Wolfram Sang Signed-off-by: Jean Delvare --- include/linux/i2c.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 06115128047..4ac8ec3c792 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -345,7 +345,7 @@ struct i2c_algorithm { struct i2c_adapter { struct module *owner; unsigned int id; - unsigned int class; + unsigned int class; /* classes to allow probing for */ const struct i2c_algorithm *algo; /* the algorithm to access the bus */ void *algo_data; -- cgit v1.2.3-70-g09d2 From 7d1d8999b4bec0ba09f935e648a688bb25596d06 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 22 Oct 2008 20:21:31 +0200 Subject: i2c: Constify i2c_get_clientdata's parameter i2c_get_clientdata doesn't change the i2c_client it is passed as a parameter, so it can be constified. Same for i2c_get_adapdata. Signed-off-by: Jean Delvare --- Documentation/i2c/writing-clients | 2 +- include/linux/device.h | 2 +- include/linux/i2c.h | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/Documentation/i2c/writing-clients b/Documentation/i2c/writing-clients index d73ee117a8c..3b01350c149 100644 --- a/Documentation/i2c/writing-clients +++ b/Documentation/i2c/writing-clients @@ -83,7 +83,7 @@ be very useful. void i2c_set_clientdata(struct i2c_client *client, void *data); /* retrieve the value */ - void *i2c_get_clientdata(struct i2c_client *client); + void *i2c_get_clientdata(const struct i2c_client *client); An example structure is below. diff --git a/include/linux/device.h b/include/linux/device.h index 987f5912720..1a3686d15f9 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -450,7 +450,7 @@ static inline void set_dev_node(struct device *dev, int node) } #endif -static inline void *dev_get_drvdata(struct device *dev) +static inline void *dev_get_drvdata(const struct device *dev) { return dev->driver_data; } diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 4ac8ec3c792..a411f0b70e8 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -224,7 +224,7 @@ static inline struct i2c_client *kobj_to_i2c_client(struct kobject *kobj) return to_i2c_client(dev); } -static inline void *i2c_get_clientdata (struct i2c_client *dev) +static inline void *i2c_get_clientdata(const struct i2c_client *dev) { return dev_get_drvdata (&dev->dev); } @@ -369,7 +369,7 @@ struct i2c_adapter { }; #define to_i2c_adapter(d) container_of(d, struct i2c_adapter, dev) -static inline void *i2c_get_adapdata (struct i2c_adapter *dev) +static inline void *i2c_get_adapdata(const struct i2c_adapter *dev) { return dev_get_drvdata (&dev->dev); } -- cgit v1.2.3-70-g09d2 From c0589d4bc19294a49934af1be736eb6e9ad11673 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 22 Oct 2008 20:21:31 +0200 Subject: i2c: Drop 2-byte address block transfer defines We have no users and no implementers for these transfer types so it makes little sense to define functionality bits for them. Signed-off-by: Jean Delvare --- include/linux/i2c.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index a411f0b70e8..7d21aba819f 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -541,8 +541,6 @@ struct i2c_msg { #define I2C_FUNC_SMBUS_WRITE_BLOCK_DATA 0x02000000 #define I2C_FUNC_SMBUS_READ_I2C_BLOCK 0x04000000 /* I2C-like block xfer */ #define I2C_FUNC_SMBUS_WRITE_I2C_BLOCK 0x08000000 /* w/ 1-byte reg. addr. */ -#define I2C_FUNC_SMBUS_READ_I2C_BLOCK_2 0x10000000 /* I2C-like block xfer */ -#define I2C_FUNC_SMBUS_WRITE_I2C_BLOCK_2 0x20000000 /* w/ 2-byte reg. addr. */ #define I2C_FUNC_SMBUS_BYTE (I2C_FUNC_SMBUS_READ_BYTE | \ I2C_FUNC_SMBUS_WRITE_BYTE) @@ -554,8 +552,6 @@ struct i2c_msg { I2C_FUNC_SMBUS_WRITE_BLOCK_DATA) #define I2C_FUNC_SMBUS_I2C_BLOCK (I2C_FUNC_SMBUS_READ_I2C_BLOCK | \ I2C_FUNC_SMBUS_WRITE_I2C_BLOCK) -#define I2C_FUNC_SMBUS_I2C_BLOCK_2 (I2C_FUNC_SMBUS_READ_I2C_BLOCK_2 | \ - I2C_FUNC_SMBUS_WRITE_I2C_BLOCK_2) #define I2C_FUNC_SMBUS_EMUL (I2C_FUNC_SMBUS_QUICK | \ I2C_FUNC_SMBUS_BYTE | \ -- cgit v1.2.3-70-g09d2 From 3ae70deef0a5cc34a96aa1972697d01606bc7933 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 22 Oct 2008 20:21:32 +0200 Subject: i2c: Clean up Fix most checkpatch.pl errors and warnings. This includes replacing spaces with tabs in many places, adding and removing spaces, and folding long lines. Also complete a couple prototypes to make it clearer what the parameters represent. Signed-off-by: Jean Delvare --- include/linux/i2c.h | 149 ++++++++++++++++++++++++++-------------------------- 1 file changed, 75 insertions(+), 74 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 7d21aba819f..12d7364ad3f 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -53,45 +53,44 @@ struct i2c_board_info; * transmit one message at a time, a more complex version can be used to * transmit an arbitrary number of messages without interruption. */ -extern int i2c_master_send(struct i2c_client *,const char* ,int); -extern int i2c_master_recv(struct i2c_client *,char* ,int); +extern int i2c_master_send(struct i2c_client *client, const char *buf, + int count); +extern int i2c_master_recv(struct i2c_client *client, char *buf, int count); /* Transfer num messages. */ -extern int i2c_transfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num); - +extern int i2c_transfer(struct i2c_adapter *adap, struct i2c_msg *msgs, + int num); /* This is the very generalized SMBus access routine. You probably do not want to use this, though; one of the functions below may be much easier, and probably just as fast. Note that we use i2c_adapter here, because you do not need a specific smbus adapter to call this function. */ -extern s32 i2c_smbus_xfer (struct i2c_adapter * adapter, u16 addr, - unsigned short flags, - char read_write, u8 command, int size, - union i2c_smbus_data * data); +extern s32 i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr, + unsigned short flags, char read_write, u8 command, + int size, union i2c_smbus_data *data); /* Now follow the 'nice' access routines. These also document the calling conventions of i2c_smbus_xfer. */ -extern s32 i2c_smbus_read_byte(struct i2c_client * client); -extern s32 i2c_smbus_write_byte(struct i2c_client * client, u8 value); -extern s32 i2c_smbus_read_byte_data(struct i2c_client * client, u8 command); -extern s32 i2c_smbus_write_byte_data(struct i2c_client * client, - u8 command, u8 value); -extern s32 i2c_smbus_read_word_data(struct i2c_client * client, u8 command); -extern s32 i2c_smbus_write_word_data(struct i2c_client * client, - u8 command, u16 value); +extern s32 i2c_smbus_read_byte(struct i2c_client *client); +extern s32 i2c_smbus_write_byte(struct i2c_client *client, u8 value); +extern s32 i2c_smbus_read_byte_data(struct i2c_client *client, u8 command); +extern s32 i2c_smbus_write_byte_data(struct i2c_client *client, + u8 command, u8 value); +extern s32 i2c_smbus_read_word_data(struct i2c_client *client, u8 command); +extern s32 i2c_smbus_write_word_data(struct i2c_client *client, + u8 command, u16 value); /* Returns the number of read bytes */ extern s32 i2c_smbus_read_block_data(struct i2c_client *client, u8 command, u8 *values); -extern s32 i2c_smbus_write_block_data(struct i2c_client * client, - u8 command, u8 length, - const u8 *values); +extern s32 i2c_smbus_write_block_data(struct i2c_client *client, + u8 command, u8 length, const u8 *values); /* Returns the number of read bytes */ -extern s32 i2c_smbus_read_i2c_block_data(struct i2c_client * client, +extern s32 i2c_smbus_read_i2c_block_data(struct i2c_client *client, u8 command, u8 length, u8 *values); -extern s32 i2c_smbus_write_i2c_block_data(struct i2c_client * client, +extern s32 i2c_smbus_write_i2c_block_data(struct i2c_client *client, u8 command, u8 length, const u8 *values); @@ -169,7 +168,7 @@ struct i2c_driver { /* a ioctl like command that can be used to perform specific functions * with the device. */ - int (*command)(struct i2c_client *client,unsigned int cmd, void *arg); + int (*command)(struct i2c_client *client, unsigned int cmd, void *arg); struct device_driver driver; const struct i2c_device_id *id_table; @@ -226,12 +225,12 @@ static inline struct i2c_client *kobj_to_i2c_client(struct kobject *kobj) static inline void *i2c_get_clientdata(const struct i2c_client *dev) { - return dev_get_drvdata (&dev->dev); + return dev_get_drvdata(&dev->dev); } -static inline void i2c_set_clientdata (struct i2c_client *dev, void *data) +static inline void i2c_set_clientdata(struct i2c_client *dev, void *data) { - dev_set_drvdata (&dev->dev, data); + dev_set_drvdata(&dev->dev, data); } /** @@ -272,7 +271,7 @@ struct i2c_board_info { * fields (such as associated irq, or device-specific platform_data) * are provided using conventional syntax. */ -#define I2C_BOARD_INFO(dev_type,dev_addr) \ +#define I2C_BOARD_INFO(dev_type, dev_addr) \ .type = (dev_type), .addr = (dev_addr) @@ -306,10 +305,12 @@ extern void i2c_unregister_device(struct i2c_client *); */ #ifdef CONFIG_I2C_BOARDINFO extern int -i2c_register_board_info(int busnum, struct i2c_board_info const *info, unsigned n); +i2c_register_board_info(int busnum, struct i2c_board_info const *info, + unsigned n); #else static inline int -i2c_register_board_info(int busnum, struct i2c_board_info const *info, unsigned n) +i2c_register_board_info(int busnum, struct i2c_board_info const *info, + unsigned n) { return 0; } @@ -328,11 +329,11 @@ struct i2c_algorithm { using common I2C messages */ /* master_xfer should return the number of messages successfully processed, or a negative value on error */ - int (*master_xfer)(struct i2c_adapter *adap,struct i2c_msg *msgs, - int num); + int (*master_xfer)(struct i2c_adapter *adap, struct i2c_msg *msgs, + int num); int (*smbus_xfer) (struct i2c_adapter *adap, u16 addr, - unsigned short flags, char read_write, - u8 command, int size, union i2c_smbus_data * data); + unsigned short flags, char read_write, + u8 command, int size, union i2c_smbus_data *data); /* To determine what the adapter supports */ u32 (*functionality) (struct i2c_adapter *); @@ -371,12 +372,12 @@ struct i2c_adapter { static inline void *i2c_get_adapdata(const struct i2c_adapter *dev) { - return dev_get_drvdata (&dev->dev); + return dev_get_drvdata(&dev->dev); } -static inline void i2c_set_adapdata (struct i2c_adapter *dev, void *data) +static inline void i2c_set_adapdata(struct i2c_adapter *dev, void *data) { - dev_set_drvdata (&dev->dev, data); + dev_set_drvdata(&dev->dev, data); } /*flags for the client struct: */ @@ -449,7 +450,7 @@ extern int i2c_probe(struct i2c_adapter *adapter, const struct i2c_client_address_data *address_data, int (*found_proc) (struct i2c_adapter *, int, int)); -extern struct i2c_adapter* i2c_get_adapter(int id); +extern struct i2c_adapter *i2c_get_adapter(int id); extern void i2c_put_adapter(struct i2c_adapter *adap); @@ -465,7 +466,7 @@ static inline int i2c_check_functionality(struct i2c_adapter *adap, u32 func) return (func & i2c_get_functionality(adap)) == func; } -/* Return id number for a specific adapter */ +/* Return the adapter number for a specific adapter */ static inline int i2c_adapter_id(struct i2c_adapter *adap) { return adap->nr; @@ -526,7 +527,7 @@ struct i2c_msg { #define I2C_FUNC_I2C 0x00000001 #define I2C_FUNC_10BIT_ADDR 0x00000002 -#define I2C_FUNC_PROTOCOL_MANGLING 0x00000004 /* I2C_M_{REV_DIR_ADDR,NOSTART,..} */ +#define I2C_FUNC_PROTOCOL_MANGLING 0x00000004 /* I2C_M_NOSTART etc. */ #define I2C_FUNC_SMBUS_PEC 0x00000008 #define I2C_FUNC_SMBUS_BLOCK_PROC_CALL 0x00008000 /* SMBus 2.0 */ #define I2C_FUNC_SMBUS_QUICK 0x00010000 @@ -542,25 +543,25 @@ struct i2c_msg { #define I2C_FUNC_SMBUS_READ_I2C_BLOCK 0x04000000 /* I2C-like block xfer */ #define I2C_FUNC_SMBUS_WRITE_I2C_BLOCK 0x08000000 /* w/ 1-byte reg. addr. */ -#define I2C_FUNC_SMBUS_BYTE (I2C_FUNC_SMBUS_READ_BYTE | \ - I2C_FUNC_SMBUS_WRITE_BYTE) -#define I2C_FUNC_SMBUS_BYTE_DATA (I2C_FUNC_SMBUS_READ_BYTE_DATA | \ - I2C_FUNC_SMBUS_WRITE_BYTE_DATA) -#define I2C_FUNC_SMBUS_WORD_DATA (I2C_FUNC_SMBUS_READ_WORD_DATA | \ - I2C_FUNC_SMBUS_WRITE_WORD_DATA) -#define I2C_FUNC_SMBUS_BLOCK_DATA (I2C_FUNC_SMBUS_READ_BLOCK_DATA | \ - I2C_FUNC_SMBUS_WRITE_BLOCK_DATA) -#define I2C_FUNC_SMBUS_I2C_BLOCK (I2C_FUNC_SMBUS_READ_I2C_BLOCK | \ - I2C_FUNC_SMBUS_WRITE_I2C_BLOCK) - -#define I2C_FUNC_SMBUS_EMUL (I2C_FUNC_SMBUS_QUICK | \ - I2C_FUNC_SMBUS_BYTE | \ - I2C_FUNC_SMBUS_BYTE_DATA | \ - I2C_FUNC_SMBUS_WORD_DATA | \ - I2C_FUNC_SMBUS_PROC_CALL | \ - I2C_FUNC_SMBUS_WRITE_BLOCK_DATA | \ - I2C_FUNC_SMBUS_I2C_BLOCK | \ - I2C_FUNC_SMBUS_PEC) +#define I2C_FUNC_SMBUS_BYTE (I2C_FUNC_SMBUS_READ_BYTE | \ + I2C_FUNC_SMBUS_WRITE_BYTE) +#define I2C_FUNC_SMBUS_BYTE_DATA (I2C_FUNC_SMBUS_READ_BYTE_DATA | \ + I2C_FUNC_SMBUS_WRITE_BYTE_DATA) +#define I2C_FUNC_SMBUS_WORD_DATA (I2C_FUNC_SMBUS_READ_WORD_DATA | \ + I2C_FUNC_SMBUS_WRITE_WORD_DATA) +#define I2C_FUNC_SMBUS_BLOCK_DATA (I2C_FUNC_SMBUS_READ_BLOCK_DATA | \ + I2C_FUNC_SMBUS_WRITE_BLOCK_DATA) +#define I2C_FUNC_SMBUS_I2C_BLOCK (I2C_FUNC_SMBUS_READ_I2C_BLOCK | \ + I2C_FUNC_SMBUS_WRITE_I2C_BLOCK) + +#define I2C_FUNC_SMBUS_EMUL (I2C_FUNC_SMBUS_QUICK | \ + I2C_FUNC_SMBUS_BYTE | \ + I2C_FUNC_SMBUS_BYTE_DATA | \ + I2C_FUNC_SMBUS_WORD_DATA | \ + I2C_FUNC_SMBUS_PROC_CALL | \ + I2C_FUNC_SMBUS_WRITE_BLOCK_DATA | \ + I2C_FUNC_SMBUS_I2C_BLOCK | \ + I2C_FUNC_SMBUS_PEC) /* * Data for SMBus Messages @@ -570,7 +571,7 @@ union i2c_smbus_data { __u8 byte; __u16 word; __u8 block[I2C_SMBUS_BLOCK_MAX + 2]; /* block[0] is used for length */ - /* and one more for user-space compatibility */ + /* and one more for user-space compatibility */ }; /* i2c_smbus_xfer read or write markers */ @@ -598,21 +599,21 @@ union i2c_smbus_data { /* Default fill of many variables */ #define I2C_CLIENT_DEFAULTS {I2C_CLIENT_END, I2C_CLIENT_END, I2C_CLIENT_END, \ - I2C_CLIENT_END, I2C_CLIENT_END, I2C_CLIENT_END, \ - I2C_CLIENT_END, I2C_CLIENT_END, I2C_CLIENT_END, \ - I2C_CLIENT_END, I2C_CLIENT_END, I2C_CLIENT_END, \ - I2C_CLIENT_END, I2C_CLIENT_END, I2C_CLIENT_END, \ - I2C_CLIENT_END, I2C_CLIENT_END, I2C_CLIENT_END, \ - I2C_CLIENT_END, I2C_CLIENT_END, I2C_CLIENT_END, \ - I2C_CLIENT_END, I2C_CLIENT_END, I2C_CLIENT_END, \ - I2C_CLIENT_END, I2C_CLIENT_END, I2C_CLIENT_END, \ - I2C_CLIENT_END, I2C_CLIENT_END, I2C_CLIENT_END, \ - I2C_CLIENT_END, I2C_CLIENT_END, I2C_CLIENT_END, \ - I2C_CLIENT_END, I2C_CLIENT_END, I2C_CLIENT_END, \ - I2C_CLIENT_END, I2C_CLIENT_END, I2C_CLIENT_END, \ - I2C_CLIENT_END, I2C_CLIENT_END, I2C_CLIENT_END, \ - I2C_CLIENT_END, I2C_CLIENT_END, I2C_CLIENT_END, \ - I2C_CLIENT_END, I2C_CLIENT_END, I2C_CLIENT_END} + I2C_CLIENT_END, I2C_CLIENT_END, I2C_CLIENT_END, \ + I2C_CLIENT_END, I2C_CLIENT_END, I2C_CLIENT_END, \ + I2C_CLIENT_END, I2C_CLIENT_END, I2C_CLIENT_END, \ + I2C_CLIENT_END, I2C_CLIENT_END, I2C_CLIENT_END, \ + I2C_CLIENT_END, I2C_CLIENT_END, I2C_CLIENT_END, \ + I2C_CLIENT_END, I2C_CLIENT_END, I2C_CLIENT_END, \ + I2C_CLIENT_END, I2C_CLIENT_END, I2C_CLIENT_END, \ + I2C_CLIENT_END, I2C_CLIENT_END, I2C_CLIENT_END, \ + I2C_CLIENT_END, I2C_CLIENT_END, I2C_CLIENT_END, \ + I2C_CLIENT_END, I2C_CLIENT_END, I2C_CLIENT_END, \ + I2C_CLIENT_END, I2C_CLIENT_END, I2C_CLIENT_END, \ + I2C_CLIENT_END, I2C_CLIENT_END, I2C_CLIENT_END, \ + I2C_CLIENT_END, I2C_CLIENT_END, I2C_CLIENT_END, \ + I2C_CLIENT_END, I2C_CLIENT_END, I2C_CLIENT_END, \ + I2C_CLIENT_END, I2C_CLIENT_END, I2C_CLIENT_END} /* I2C_CLIENT_MODULE_PARM creates a module parameter, and puts it in the module header */ @@ -621,7 +622,7 @@ union i2c_smbus_data { static unsigned short var[I2C_CLIENT_MAX_OPTS] = I2C_CLIENT_DEFAULTS; \ static unsigned int var##_num; \ module_param_array(var, short, &var##_num, 0); \ - MODULE_PARM_DESC(var,desc) + MODULE_PARM_DESC(var, desc) #define I2C_CLIENT_MODULE_PARM_FORCE(name) \ I2C_CLIENT_MODULE_PARM(force_##name, \ -- cgit v1.2.3-70-g09d2 From 11f1f2afd6b07729b12aaba479344d7f12d88ff9 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Wed, 22 Oct 2008 20:21:33 +0200 Subject: i2c: Add info->archdata field If present the info->archdata is copied into the dev->archdata. Some (OpenFirmware) platforms need it. Signed-off-by: Anton Vorontsov Signed-off-by: Jean Delvare --- drivers/i2c/i2c-core.c | 3 +++ include/linux/i2c.h | 2 ++ 2 files changed, 5 insertions(+) (limited to 'include/linux') diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c index 42e852d79ff..5a485c22660 100644 --- a/drivers/i2c/i2c-core.c +++ b/drivers/i2c/i2c-core.c @@ -266,6 +266,9 @@ i2c_new_device(struct i2c_adapter *adap, struct i2c_board_info const *info) client->dev.platform_data = info->platform_data; + if (info->archdata) + client->dev.archdata = *info->archdata; + client->flags = info->flags; client->addr = info->addr; client->irq = info->irq; diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 12d7364ad3f..33a5992d493 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -239,6 +239,7 @@ static inline void i2c_set_clientdata(struct i2c_client *dev, void *data) * @flags: to initialize i2c_client.flags * @addr: stored in i2c_client.addr * @platform_data: stored in i2c_client.dev.platform_data + * @archdata: copied into i2c_client.dev.archdata * @irq: stored in i2c_client.irq * * I2C doesn't actually support hardware probing, although controllers and @@ -258,6 +259,7 @@ struct i2c_board_info { unsigned short flags; unsigned short addr; void *platform_data; + struct dev_archdata *archdata; int irq; }; -- cgit v1.2.3-70-g09d2 From 2a2336f8228292b8197f4187e54b0748903e6645 Mon Sep 17 00:00:00 2001 From: Yevgeny Petrilin Date: Wed, 22 Oct 2008 11:44:46 -0700 Subject: mlx4_core: Ethernet MAC/VLAN management Add support for managing MAC and VLAN filters for each port. Signed-off-by: Yevgeny Petrilin Signed-off-by: Oren Duer Signed-off-by: Roland Dreier --- drivers/net/mlx4/Makefile | 2 +- drivers/net/mlx4/main.c | 14 +++ drivers/net/mlx4/mlx4.h | 33 ++++++ drivers/net/mlx4/port.c | 259 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/mlx4/cmd.h | 9 ++ include/linux/mlx4/device.h | 12 ++ 6 files changed, 328 insertions(+), 1 deletion(-) create mode 100644 drivers/net/mlx4/port.c (limited to 'include/linux') diff --git a/drivers/net/mlx4/Makefile b/drivers/net/mlx4/Makefile index 0952a6528f5..9f493666e27 100644 --- a/drivers/net/mlx4/Makefile +++ b/drivers/net/mlx4/Makefile @@ -1,4 +1,4 @@ obj-$(CONFIG_MLX4_CORE) += mlx4_core.o mlx4_core-y := alloc.o catas.o cmd.o cq.o eq.o fw.o icm.o intf.o main.o mcg.o \ - mr.o pd.o profile.o qp.o reset.o srq.o + mr.o pd.o port.o profile.o qp.o reset.o srq.o diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c index 28f36b88de3..0a5c8bfb3f1 100644 --- a/drivers/net/mlx4/main.c +++ b/drivers/net/mlx4/main.c @@ -780,11 +780,22 @@ no_msi: priv->eq_table.eq[i].irq = dev->pdev->irq; } +static void mlx4_init_port_info(struct mlx4_dev *dev, int port) +{ + struct mlx4_port_info *info = &mlx4_priv(dev)->port[port]; + + info->dev = dev; + info->port = port; + mlx4_init_mac_table(dev, &info->mac_table); + mlx4_init_vlan_table(dev, &info->vlan_table); +} + static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id) { struct mlx4_priv *priv; struct mlx4_dev *dev; int err; + int port; printk(KERN_INFO PFX "Initializing %s\n", pci_name(pdev)); @@ -894,6 +905,9 @@ static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id) if (err) goto err_close; + for (port = 1; port <= dev->caps.num_ports; port++) + mlx4_init_port_info(dev, port); + err = mlx4_register_device(dev); if (err) goto err_cleanup; diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h index 9e2f44c3181..23309f381ee 100644 --- a/drivers/net/mlx4/mlx4.h +++ b/drivers/net/mlx4/mlx4.h @@ -252,6 +252,35 @@ struct mlx4_catas_err { struct list_head list; }; +#define MLX4_MAX_MAC_NUM 128 +#define MLX4_MAC_TABLE_SIZE (MLX4_MAX_MAC_NUM << 3) + +struct mlx4_mac_table { + __be64 entries[MLX4_MAX_MAC_NUM]; + int refs[MLX4_MAX_MAC_NUM]; + struct mutex mutex; + int total; + int max; +}; + +#define MLX4_MAX_VLAN_NUM 128 +#define MLX4_VLAN_TABLE_SIZE (MLX4_MAX_VLAN_NUM << 2) + +struct mlx4_vlan_table { + __be32 entries[MLX4_MAX_VLAN_NUM]; + int refs[MLX4_MAX_VLAN_NUM]; + struct mutex mutex; + int total; + int max; +}; + +struct mlx4_port_info { + struct mlx4_dev *dev; + int port; + struct mlx4_mac_table mac_table; + struct mlx4_vlan_table vlan_table; +}; + struct mlx4_priv { struct mlx4_dev dev; @@ -280,6 +309,7 @@ struct mlx4_priv { struct mlx4_uar driver_uar; void __iomem *kar; + struct mlx4_port_info port[MLX4_MAX_PORTS + 1]; }; static inline struct mlx4_priv *mlx4_priv(struct mlx4_dev *dev) @@ -350,4 +380,7 @@ void mlx4_srq_event(struct mlx4_dev *dev, u32 srqn, int event_type); void mlx4_handle_catas_err(struct mlx4_dev *dev); +void mlx4_init_mac_table(struct mlx4_dev *dev, struct mlx4_mac_table *table); +void mlx4_init_vlan_table(struct mlx4_dev *dev, struct mlx4_vlan_table *table); + #endif /* MLX4_H */ diff --git a/drivers/net/mlx4/port.c b/drivers/net/mlx4/port.c new file mode 100644 index 00000000000..8644f3d978e --- /dev/null +++ b/drivers/net/mlx4/port.c @@ -0,0 +1,259 @@ +/* + * Copyright (c) 2007 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include + +#include + +#include "mlx4.h" + +#define MLX4_MAC_VALID (1ull << 63) +#define MLX4_MAC_MASK 0xffffffffffffULL + +#define MLX4_VLAN_VALID (1u << 31) +#define MLX4_VLAN_MASK 0xfff + +void mlx4_init_mac_table(struct mlx4_dev *dev, struct mlx4_mac_table *table) +{ + int i; + + mutex_init(&table->mutex); + for (i = 0; i < MLX4_MAX_MAC_NUM; i++) { + table->entries[i] = 0; + table->refs[i] = 0; + } + table->max = 1 << dev->caps.log_num_macs; + table->total = 0; +} + +void mlx4_init_vlan_table(struct mlx4_dev *dev, struct mlx4_vlan_table *table) +{ + int i; + + mutex_init(&table->mutex); + for (i = 0; i < MLX4_MAX_VLAN_NUM; i++) { + table->entries[i] = 0; + table->refs[i] = 0; + } + table->max = 1 << dev->caps.log_num_vlans; + table->total = 0; +} + +static int mlx4_set_port_mac_table(struct mlx4_dev *dev, u8 port, + __be64 *entries) +{ + struct mlx4_cmd_mailbox *mailbox; + u32 in_mod; + int err; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + memcpy(mailbox->buf, entries, MLX4_MAC_TABLE_SIZE); + + in_mod = MLX4_SET_PORT_MAC_TABLE << 8 | port; + err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT, + MLX4_CMD_TIME_CLASS_B); + + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} + +int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *index) +{ + struct mlx4_mac_table *table = &mlx4_priv(dev)->port[port].mac_table; + int i, err = 0; + int free = -1; + + mlx4_dbg(dev, "Registering MAC: 0x%llx\n", (unsigned long long) mac); + mutex_lock(&table->mutex); + for (i = 0; i < MLX4_MAX_MAC_NUM - 1; i++) { + if (free < 0 && !table->refs[i]) { + free = i; + continue; + } + + if (mac == (MLX4_MAC_MASK & be64_to_cpu(table->entries[i]))) { + /* MAC already registered, increase refernce count */ + *index = i; + ++table->refs[i]; + goto out; + } + } + mlx4_dbg(dev, "Free MAC index is %d\n", free); + + if (table->total == table->max) { + /* No free mac entries */ + err = -ENOSPC; + goto out; + } + + /* Register new MAC */ + table->refs[free] = 1; + table->entries[free] = cpu_to_be64(mac | MLX4_MAC_VALID); + + err = mlx4_set_port_mac_table(dev, port, table->entries); + if (unlikely(err)) { + mlx4_err(dev, "Failed adding MAC: 0x%llx\n", (unsigned long long) mac); + table->refs[free] = 0; + table->entries[free] = 0; + goto out; + } + + *index = free; + ++table->total; +out: + mutex_unlock(&table->mutex); + return err; +} +EXPORT_SYMBOL_GPL(mlx4_register_mac); + +void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, int index) +{ + struct mlx4_mac_table *table = &mlx4_priv(dev)->port[port].mac_table; + + mutex_lock(&table->mutex); + if (!table->refs[index]) { + mlx4_warn(dev, "No MAC entry for index %d\n", index); + goto out; + } + if (--table->refs[index]) { + mlx4_warn(dev, "Have more references for index %d," + "no need to modify MAC table\n", index); + goto out; + } + table->entries[index] = 0; + mlx4_set_port_mac_table(dev, port, table->entries); + --table->total; +out: + mutex_unlock(&table->mutex); +} +EXPORT_SYMBOL_GPL(mlx4_unregister_mac); + +static int mlx4_set_port_vlan_table(struct mlx4_dev *dev, u8 port, + __be32 *entries) +{ + struct mlx4_cmd_mailbox *mailbox; + u32 in_mod; + int err; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + memcpy(mailbox->buf, entries, MLX4_VLAN_TABLE_SIZE); + in_mod = MLX4_SET_PORT_VLAN_TABLE << 8 | port; + err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT, + MLX4_CMD_TIME_CLASS_B); + + mlx4_free_cmd_mailbox(dev, mailbox); + + return err; +} + +int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index) +{ + struct mlx4_vlan_table *table = &mlx4_priv(dev)->port[port].vlan_table; + int i, err = 0; + int free = -1; + + mutex_lock(&table->mutex); + for (i = MLX4_VLAN_REGULAR; i < MLX4_MAX_VLAN_NUM; i++) { + if (free < 0 && (table->refs[i] == 0)) { + free = i; + continue; + } + + if (table->refs[i] && + (vlan == (MLX4_VLAN_MASK & + be32_to_cpu(table->entries[i])))) { + /* Vlan already registered, increase refernce count */ + *index = i; + ++table->refs[i]; + goto out; + } + } + + if (table->total == table->max) { + /* No free vlan entries */ + err = -ENOSPC; + goto out; + } + + /* Register new MAC */ + table->refs[free] = 1; + table->entries[free] = cpu_to_be32(vlan | MLX4_VLAN_VALID); + + err = mlx4_set_port_vlan_table(dev, port, table->entries); + if (unlikely(err)) { + mlx4_warn(dev, "Failed adding vlan: %u\n", vlan); + table->refs[free] = 0; + table->entries[free] = 0; + goto out; + } + + *index = free; + ++table->total; +out: + mutex_unlock(&table->mutex); + return err; +} +EXPORT_SYMBOL_GPL(mlx4_register_vlan); + +void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, int index) +{ + struct mlx4_vlan_table *table = &mlx4_priv(dev)->port[port].vlan_table; + + if (index < MLX4_VLAN_REGULAR) { + mlx4_warn(dev, "Trying to free special vlan index %d\n", index); + return; + } + + mutex_lock(&table->mutex); + if (!table->refs[index]) { + mlx4_warn(dev, "No vlan entry for index %d\n", index); + goto out; + } + if (--table->refs[index]) { + mlx4_dbg(dev, "Have more references for index %d," + "no need to modify vlan table\n", index); + goto out; + } + table->entries[index] = 0; + mlx4_set_port_vlan_table(dev, port, table->entries); + --table->total; +out: + mutex_unlock(&table->mutex); +} +EXPORT_SYMBOL_GPL(mlx4_unregister_vlan); diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index 77323a72dd3..cf9c679ab38 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -132,6 +132,15 @@ enum { MLX4_MAILBOX_SIZE = 4096 }; +enum { + /* set port opcode modifiers */ + MLX4_SET_PORT_GENERAL = 0x0, + MLX4_SET_PORT_RQP_CALC = 0x1, + MLX4_SET_PORT_MAC_TABLE = 0x2, + MLX4_SET_PORT_VLAN_TABLE = 0x3, + MLX4_SET_PORT_PRIO_MAP = 0x4, +}; + struct mlx4_dev; struct mlx4_cmd_mailbox { diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index f9e73cfc540..1951fe70a25 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -153,6 +153,12 @@ enum mlx4_qp_region { MLX4_NUM_QP_REGION }; +enum mlx4_special_vlan_idx { + MLX4_NO_VLAN_IDX = 0, + MLX4_VLAN_MISS_IDX, + MLX4_VLAN_REGULAR +}; + enum { MLX4_NUM_FEXCH = 64 * 1024, }; @@ -438,6 +444,12 @@ int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], int block_mcast_loopback); int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16]); +int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *index); +void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, int index); + +int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index); +void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, int index); + int mlx4_map_phys_fmr(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u64 *page_list, int npages, u64 iova, u32 *lkey, u32 *rkey); int mlx4_fmr_alloc(struct mlx4_dev *dev, u32 pd, u32 access, int max_pages, -- cgit v1.2.3-70-g09d2 From 7ff93f8b7ecbc36e7ffc5c11a61643821c1bfee5 Mon Sep 17 00:00:00 2001 From: Yevgeny Petrilin Date: Wed, 22 Oct 2008 15:38:42 -0700 Subject: mlx4_core: Multiple port type support Multi-protocol adapters support different port types. Each consumer of mlx4_core queries for supported port types; in particular mlx4_ib can no longer assume that all physical ports belong to it. Port type is configured through a sysfs interface. When the type of a port is changed, all mlx4 interfaces are unregistered, and then registered again with the new port types. Signed-off-by: Yevgeny Petrilin Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/mad.c | 6 +- drivers/infiniband/hw/mlx4/main.c | 11 +- drivers/infiniband/hw/mlx4/mlx4_ib.h | 1 + drivers/net/mlx4/fw.c | 4 + drivers/net/mlx4/fw.h | 1 + drivers/net/mlx4/main.c | 211 ++++++++++++++++++++++++++++++++++- drivers/net/mlx4/mlx4.h | 6 + drivers/net/mlx4/port.c | 23 ++++ include/linux/mlx4/device.h | 14 +++ 9 files changed, 266 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index cdca3a511e1..606f1e2ef28 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -298,7 +298,7 @@ int mlx4_ib_mad_init(struct mlx4_ib_dev *dev) int p, q; int ret; - for (p = 0; p < dev->dev->caps.num_ports; ++p) + for (p = 0; p < dev->num_ports; ++p) for (q = 0; q <= 1; ++q) { agent = ib_register_mad_agent(&dev->ib_dev, p + 1, q ? IB_QPT_GSI : IB_QPT_SMI, @@ -314,7 +314,7 @@ int mlx4_ib_mad_init(struct mlx4_ib_dev *dev) return 0; err: - for (p = 0; p < dev->dev->caps.num_ports; ++p) + for (p = 0; p < dev->num_ports; ++p) for (q = 0; q <= 1; ++q) if (dev->send_agent[p][q]) ib_unregister_mad_agent(dev->send_agent[p][q]); @@ -327,7 +327,7 @@ void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev) struct ib_mad_agent *agent; int p, q; - for (p = 0; p < dev->dev->caps.num_ports; ++p) { + for (p = 0; p < dev->num_ports; ++p) { for (q = 0; q <= 1; ++q) { agent = dev->send_agent[p][q]; dev->send_agent[p][q] = NULL; diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index a3c2851c054..2e80f8f47b0 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -574,7 +574,10 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_dev.owner = THIS_MODULE; ibdev->ib_dev.node_type = RDMA_NODE_IB_CA; ibdev->ib_dev.local_dma_lkey = dev->caps.reserved_lkey; - ibdev->ib_dev.phys_port_cnt = dev->caps.num_ports; + ibdev->num_ports = 0; + mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) + ibdev->num_ports++; + ibdev->ib_dev.phys_port_cnt = ibdev->num_ports; ibdev->ib_dev.num_comp_vectors = 1; ibdev->ib_dev.dma_device = &dev->pdev->dev; @@ -691,7 +694,7 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr) struct mlx4_ib_dev *ibdev = ibdev_ptr; int p; - for (p = 1; p <= dev->caps.num_ports; ++p) + for (p = 1; p <= ibdev->num_ports; ++p) mlx4_CLOSE_PORT(dev, p); mlx4_ib_mad_cleanup(ibdev); @@ -706,6 +709,10 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr, enum mlx4_dev_event event, int port) { struct ib_event ibev; + struct mlx4_ib_dev *ibdev = to_mdev((struct ib_device *) ibdev_ptr); + + if (port > ibdev->num_ports) + return; switch (event) { case MLX4_DEV_EVENT_PORT_UP: diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 6e2b0dc21b6..9974e886b8d 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -162,6 +162,7 @@ struct mlx4_ib_ah { struct mlx4_ib_dev { struct ib_device ib_dev; struct mlx4_dev *dev; + int num_ports; void __iomem *uar_map; struct mlx4_uar priv_uar; diff --git a/drivers/net/mlx4/fw.c b/drivers/net/mlx4/fw.c index 8d402db9a03..be09fdb79cb 100644 --- a/drivers/net/mlx4/fw.c +++ b/drivers/net/mlx4/fw.c @@ -88,6 +88,7 @@ static void dump_dev_cap_flags(struct mlx4_dev *dev, u32 flags) [ 8] = "P_Key violation counter", [ 9] = "Q_Key violation counter", [10] = "VMM", + [12] = "DPDP", [16] = "MW support", [17] = "APM support", [18] = "Atomic ops support", @@ -354,6 +355,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev_cap->max_pkeys[i] = 1 << (field & 0xf); } } else { +#define QUERY_PORT_SUPPORTED_TYPE_OFFSET 0x00 #define QUERY_PORT_MTU_OFFSET 0x01 #define QUERY_PORT_ETH_MTU_OFFSET 0x02 #define QUERY_PORT_WIDTH_OFFSET 0x06 @@ -368,6 +370,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) if (err) goto out; + MLX4_GET(field, outbox, QUERY_PORT_SUPPORTED_TYPE_OFFSET); + dev_cap->supported_port_types[i] = field & 3; MLX4_GET(field, outbox, QUERY_PORT_MTU_OFFSET); dev_cap->ib_mtu[i] = field & 0xf; MLX4_GET(field, outbox, QUERY_PORT_WIDTH_OFFSET); diff --git a/drivers/net/mlx4/fw.h b/drivers/net/mlx4/fw.h index d0913d4d262..526d7f30c04 100644 --- a/drivers/net/mlx4/fw.h +++ b/drivers/net/mlx4/fw.h @@ -104,6 +104,7 @@ struct mlx4_dev_cap { u32 reserved_lkey; u64 max_icm_sz; int max_gso_sz; + u8 supported_port_types[MLX4_MAX_PORTS + 1]; u8 log_max_macs[MLX4_MAX_PORTS + 1]; u8 log_max_vlans[MLX4_MAX_PORTS + 1]; }; diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c index 0a5c8bfb3f1..c1d447873bf 100644 --- a/drivers/net/mlx4/main.c +++ b/drivers/net/mlx4/main.c @@ -98,6 +98,44 @@ module_param_named(use_prio, use_prio, bool, 0444); MODULE_PARM_DESC(use_prio, "Enable steering by VLAN priority on ETH ports " "(0/1, default 0)"); +static int mlx4_check_port_params(struct mlx4_dev *dev, + enum mlx4_port_type *port_type) +{ + int i; + + for (i = 0; i < dev->caps.num_ports - 1; i++) { + if (port_type[i] != port_type[i+1] && + !(dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP)) { + mlx4_err(dev, "Only same port types supported " + "on this HCA, aborting.\n"); + return -EINVAL; + } + } + if ((port_type[0] == MLX4_PORT_TYPE_ETH) && + (port_type[1] == MLX4_PORT_TYPE_IB)) { + mlx4_err(dev, "eth-ib configuration is not supported.\n"); + return -EINVAL; + } + + for (i = 0; i < dev->caps.num_ports; i++) { + if (!(port_type[i] & dev->caps.supported_type[i+1])) { + mlx4_err(dev, "Requested port type for port %d is not " + "supported on this HCA\n", i + 1); + return -EINVAL; + } + } + return 0; +} + +static void mlx4_set_port_mask(struct mlx4_dev *dev) +{ + int i; + + dev->caps.port_mask = 0; + for (i = 1; i <= dev->caps.num_ports; ++i) + if (dev->caps.port_type[i] == MLX4_PORT_TYPE_IB) + dev->caps.port_mask |= 1 << (i - 1); +} static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) { int err; @@ -139,6 +177,7 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev->caps.port_width_cap[i] = dev_cap->max_port_width[i]; dev->caps.eth_mtu_cap[i] = dev_cap->eth_mtu[i]; dev->caps.def_mac[i] = dev_cap->def_mac[i]; + dev->caps.supported_type[i] = dev_cap->supported_port_types[i]; } dev->caps.num_uars = dev_cap->uar_size / PAGE_SIZE; @@ -182,6 +221,11 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev->caps.log_num_prios = use_prio ? 3 : 0; for (i = 1; i <= dev->caps.num_ports; ++i) { + if (dev->caps.supported_type[i] != MLX4_PORT_TYPE_ETH) + dev->caps.port_type[i] = MLX4_PORT_TYPE_IB; + else + dev->caps.port_type[i] = MLX4_PORT_TYPE_ETH; + if (dev->caps.log_num_macs > dev_cap->log_max_macs[i]) { dev->caps.log_num_macs = dev_cap->log_max_macs[i]; mlx4_warn(dev, "Requested number of MACs is too much " @@ -196,6 +240,8 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) } } + mlx4_set_port_mask(dev); + dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] = dev_cap->reserved_qps; dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] = dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] = @@ -213,6 +259,95 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) return 0; } +/* + * Change the port configuration of the device. + * Every user of this function must hold the port mutex. + */ +static int mlx4_change_port_types(struct mlx4_dev *dev, + enum mlx4_port_type *port_types) +{ + int err = 0; + int change = 0; + int port; + + for (port = 0; port < dev->caps.num_ports; port++) { + if (port_types[port] != dev->caps.port_type[port + 1]) { + change = 1; + dev->caps.port_type[port + 1] = port_types[port]; + } + } + if (change) { + mlx4_unregister_device(dev); + for (port = 1; port <= dev->caps.num_ports; port++) { + mlx4_CLOSE_PORT(dev, port); + err = mlx4_SET_PORT(dev, port); + if (err) { + mlx4_err(dev, "Failed to set port %d, " + "aborting\n", port); + goto out; + } + } + mlx4_set_port_mask(dev); + err = mlx4_register_device(dev); + } + +out: + return err; +} + +static ssize_t show_port_type(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info, + port_attr); + struct mlx4_dev *mdev = info->dev; + + return sprintf(buf, "%s\n", + mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_IB ? + "ib" : "eth"); +} + +static ssize_t set_port_type(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info, + port_attr); + struct mlx4_dev *mdev = info->dev; + struct mlx4_priv *priv = mlx4_priv(mdev); + enum mlx4_port_type types[MLX4_MAX_PORTS]; + int i; + int err = 0; + + if (!strcmp(buf, "ib\n")) + info->tmp_type = MLX4_PORT_TYPE_IB; + else if (!strcmp(buf, "eth\n")) + info->tmp_type = MLX4_PORT_TYPE_ETH; + else { + mlx4_err(mdev, "%s is not supported port type\n", buf); + return -EINVAL; + } + + mutex_lock(&priv->port_mutex); + for (i = 0; i < mdev->caps.num_ports; i++) + types[i] = priv->port[i+1].tmp_type ? priv->port[i+1].tmp_type : + mdev->caps.port_type[i+1]; + + err = mlx4_check_port_params(mdev, types); + if (err) + goto out; + + for (i = 1; i <= mdev->caps.num_ports; i++) + priv->port[i].tmp_type = 0; + + err = mlx4_change_port_types(mdev, types); + +out: + mutex_unlock(&priv->port_mutex); + return err ? err : count; +} + static int mlx4_load_fw(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); @@ -617,6 +752,7 @@ static int mlx4_setup_hca(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); int err; + int port; err = mlx4_init_uar_table(dev); if (err) { @@ -715,8 +851,20 @@ static int mlx4_setup_hca(struct mlx4_dev *dev) goto err_qp_table_free; } + for (port = 1; port <= dev->caps.num_ports; port++) { + err = mlx4_SET_PORT(dev, port); + if (err) { + mlx4_err(dev, "Failed to set port %d, aborting\n", + port); + goto err_mcg_table_free; + } + } + return 0; +err_mcg_table_free: + mlx4_cleanup_mcg_table(dev); + err_qp_table_free: mlx4_cleanup_qp_table(dev); @@ -780,14 +928,37 @@ no_msi: priv->eq_table.eq[i].irq = dev->pdev->irq; } -static void mlx4_init_port_info(struct mlx4_dev *dev, int port) +static int mlx4_init_port_info(struct mlx4_dev *dev, int port) { struct mlx4_port_info *info = &mlx4_priv(dev)->port[port]; + int err = 0; info->dev = dev; info->port = port; mlx4_init_mac_table(dev, &info->mac_table); mlx4_init_vlan_table(dev, &info->vlan_table); + + sprintf(info->dev_name, "mlx4_port%d", port); + info->port_attr.attr.name = info->dev_name; + info->port_attr.attr.mode = S_IRUGO | S_IWUSR; + info->port_attr.show = show_port_type; + info->port_attr.store = set_port_type; + + err = device_create_file(&dev->pdev->dev, &info->port_attr); + if (err) { + mlx4_err(dev, "Failed to create file for port %d\n", port); + info->port = -1; + } + + return err; +} + +static void mlx4_cleanup_port_info(struct mlx4_port_info *info) +{ + if (info->port < 0) + return; + + device_remove_file(&info->dev->pdev->dev, &info->port_attr); } static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id) @@ -870,6 +1041,8 @@ static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id) INIT_LIST_HEAD(&priv->ctx_list); spin_lock_init(&priv->ctx_lock); + mutex_init(&priv->port_mutex); + INIT_LIST_HEAD(&priv->pgdir_list); mutex_init(&priv->pgdir_mutex); @@ -905,18 +1078,24 @@ static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id) if (err) goto err_close; - for (port = 1; port <= dev->caps.num_ports; port++) - mlx4_init_port_info(dev, port); + for (port = 1; port <= dev->caps.num_ports; port++) { + err = mlx4_init_port_info(dev, port); + if (err) + goto err_port; + } err = mlx4_register_device(dev); if (err) - goto err_cleanup; + goto err_port; pci_set_drvdata(pdev, dev); return 0; -err_cleanup: +err_port: + for (port = 1; port <= dev->caps.num_ports; port++) + mlx4_cleanup_port_info(&priv->port[port]); + mlx4_cleanup_mcg_table(dev); mlx4_cleanup_qp_table(dev); mlx4_cleanup_srq_table(dev); @@ -973,8 +1152,10 @@ static void mlx4_remove_one(struct pci_dev *pdev) if (dev) { mlx4_unregister_device(dev); - for (p = 1; p <= dev->caps.num_ports; ++p) + for (p = 1; p <= dev->caps.num_ports; p++) { + mlx4_cleanup_port_info(&priv->port[p]); mlx4_CLOSE_PORT(dev, p); + } mlx4_cleanup_mcg_table(dev); mlx4_cleanup_qp_table(dev); @@ -1026,10 +1207,28 @@ static struct pci_driver mlx4_driver = { .remove = __devexit_p(mlx4_remove_one) }; +static int __init mlx4_verify_params(void) +{ + if ((log_num_mac < 0) || (log_num_mac > 7)) { + printk(KERN_WARNING "mlx4_core: bad num_mac: %d\n", log_num_mac); + return -1; + } + + if ((log_num_vlan < 0) || (log_num_vlan > 7)) { + printk(KERN_WARNING "mlx4_core: bad num_vlan: %d\n", log_num_vlan); + return -1; + } + + return 0; +} + static int __init mlx4_init(void) { int ret; + if (mlx4_verify_params()) + return -EINVAL; + ret = mlx4_catas_init(); if (ret) return ret; diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h index 23309f381ee..fa431fad0ee 100644 --- a/drivers/net/mlx4/mlx4.h +++ b/drivers/net/mlx4/mlx4.h @@ -277,6 +277,9 @@ struct mlx4_vlan_table { struct mlx4_port_info { struct mlx4_dev *dev; int port; + char dev_name[16]; + struct device_attribute port_attr; + enum mlx4_port_type tmp_type; struct mlx4_mac_table mac_table; struct mlx4_vlan_table vlan_table; }; @@ -310,6 +313,7 @@ struct mlx4_priv { struct mlx4_uar driver_uar; void __iomem *kar; struct mlx4_port_info port[MLX4_MAX_PORTS + 1]; + struct mutex port_mutex; }; static inline struct mlx4_priv *mlx4_priv(struct mlx4_dev *dev) @@ -383,4 +387,6 @@ void mlx4_handle_catas_err(struct mlx4_dev *dev); void mlx4_init_mac_table(struct mlx4_dev *dev, struct mlx4_mac_table *table); void mlx4_init_vlan_table(struct mlx4_dev *dev, struct mlx4_vlan_table *table); +int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port); + #endif /* MLX4_H */ diff --git a/drivers/net/mlx4/port.c b/drivers/net/mlx4/port.c index 8644f3d978e..e2fdab42c4c 100644 --- a/drivers/net/mlx4/port.c +++ b/drivers/net/mlx4/port.c @@ -257,3 +257,26 @@ out: mutex_unlock(&table->mutex); } EXPORT_SYMBOL_GPL(mlx4_unregister_vlan); + +int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port) +{ + struct mlx4_cmd_mailbox *mailbox; + int err; + u8 is_eth = dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + memset(mailbox->buf, 0, 256); + if (is_eth) { + ((u8 *) mailbox->buf)[3] = 6; + ((__be16 *) mailbox->buf)[4] = cpu_to_be16(1 << 15); + ((__be16 *) mailbox->buf)[6] = cpu_to_be16(1 << 15); + } + err = mlx4_cmd(dev, mailbox->dma, port, is_eth, MLX4_CMD_SET_PORT, + MLX4_CMD_TIME_CLASS_B); + + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 1951fe70a25..bd9977b8949 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -60,6 +60,7 @@ enum { MLX4_DEV_CAP_FLAG_IPOIB_CSUM = 1 << 7, MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR = 1 << 8, MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR = 1 << 9, + MLX4_DEV_CAP_FLAG_DPDP = 1 << 12, MLX4_DEV_CAP_FLAG_MEM_WINDOW = 1 << 16, MLX4_DEV_CAP_FLAG_APM = 1 << 17, MLX4_DEV_CAP_FLAG_ATOMIC = 1 << 18, @@ -153,6 +154,11 @@ enum mlx4_qp_region { MLX4_NUM_QP_REGION }; +enum mlx4_port_type { + MLX4_PORT_TYPE_IB = 1 << 0, + MLX4_PORT_TYPE_ETH = 1 << 1, +}; + enum mlx4_special_vlan_idx { MLX4_NO_VLAN_IDX = 0, MLX4_VLAN_MISS_IDX, @@ -226,6 +232,9 @@ struct mlx4_caps { int log_num_macs; int log_num_vlans; int log_num_prios; + enum mlx4_port_type port_type[MLX4_MAX_PORTS + 1]; + u8 supported_type[MLX4_MAX_PORTS + 1]; + u32 port_mask; }; struct mlx4_buf_list { @@ -380,6 +389,11 @@ struct mlx4_init_port_param { u64 si_guid; }; +#define mlx4_foreach_port(port, dev, type) \ + for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \ + if (((type) == MLX4_PORT_TYPE_IB ? (dev)->caps.port_mask : \ + ~(dev)->caps.port_mask) & 1 << ((port) - 1)) + int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct, struct mlx4_buf *buf); void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf); -- cgit v1.2.3-70-g09d2 From 8dd7f8036c123296fc4214f9d8810eb485570422 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Tue, 21 Oct 2008 17:38:25 +0800 Subject: PCI: add support for function level reset Sometimes, it's necessary to enable software's ability to quiesce and reset endpoint hardware with function-level granularity, so provide support for it. The patch implement Function Level Reset(FLR) feature following PCI-e spec. And this is the first step. We would add more generic method, like D0/D3, to allow more devices support this function. The patch contains two functions. pcie_reset_function() is the new driver API, and, contains some action to quiesce a device. The other function is a helper: pcie_execute_reset_function() just executes the reset for a particular device function. Current the usage model is in KVM. Function reset is necessary for assigning device to a guest, or moving it between partitions. For Function Level Reset(FLR), please refer to PCI Express spec chapter 6.6.2. Signed-off-by: Sheng Yang Signed-off-by: Matthew Wilcox Signed-off-by: Jesse Barnes --- drivers/pci/pci.c | 98 ++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/pci.h | 2 + include/linux/pci_regs.h | 2 + 3 files changed, 102 insertions(+) (limited to 'include/linux') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index aee73cf251b..533aeb5fcbe 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -18,6 +18,7 @@ #include #include #include +#include #include /* isa_dma_bridge_buggy */ #include "pci.h" @@ -1745,6 +1746,103 @@ int pci_set_dma_seg_boundary(struct pci_dev *dev, unsigned long mask) EXPORT_SYMBOL(pci_set_dma_seg_boundary); #endif +/** + * pci_execute_reset_function() - Reset a PCI device function + * @dev: Device function to reset + * + * Some devices allow an individual function to be reset without affecting + * other functions in the same device. The PCI device must be responsive + * to PCI config space in order to use this function. + * + * The device function is presumed to be unused when this function is called. + * Resetting the device will make the contents of PCI configuration space + * random, so any caller of this must be prepared to reinitialise the + * device including MSI, bus mastering, BARs, decoding IO and memory spaces, + * etc. + * + * Returns 0 if the device function was successfully reset or -ENOTTY if the + * device doesn't support resetting a single function. + */ +int pci_execute_reset_function(struct pci_dev *dev) +{ + u16 status; + u32 cap; + int exppos = pci_find_capability(dev, PCI_CAP_ID_EXP); + + if (!exppos) + return -ENOTTY; + pci_read_config_dword(dev, exppos + PCI_EXP_DEVCAP, &cap); + if (!(cap & PCI_EXP_DEVCAP_FLR)) + return -ENOTTY; + + pci_block_user_cfg_access(dev); + + /* Wait for Transaction Pending bit clean */ + msleep(100); + pci_read_config_word(dev, exppos + PCI_EXP_DEVSTA, &status); + if (status & PCI_EXP_DEVSTA_TRPND) { + dev_info(&dev->dev, "Busy after 100ms while trying to reset; " + "sleeping for 1 second\n"); + ssleep(1); + pci_read_config_word(dev, exppos + PCI_EXP_DEVSTA, &status); + if (status & PCI_EXP_DEVSTA_TRPND) + dev_info(&dev->dev, "Still busy after 1s; " + "proceeding with reset anyway\n"); + } + + pci_write_config_word(dev, exppos + PCI_EXP_DEVCTL, + PCI_EXP_DEVCTL_BCR_FLR); + mdelay(100); + + pci_unblock_user_cfg_access(dev); + return 0; +} +EXPORT_SYMBOL_GPL(pci_execute_reset_function); + +/** + * pci_reset_function() - quiesce and reset a PCI device function + * @dev: Device function to reset + * + * Some devices allow an individual function to be reset without affecting + * other functions in the same device. The PCI device must be responsive + * to PCI config space in order to use this function. + * + * This function does not just reset the PCI portion of a device, but + * clears all the state associated with the device. This function differs + * from pci_execute_reset_function in that it saves and restores device state + * over the reset. + * + * Returns 0 if the device function was successfully reset or -ENOTTY if the + * device doesn't support resetting a single function. + */ +int pci_reset_function(struct pci_dev *dev) +{ + u32 cap; + int exppos = pci_find_capability(dev, PCI_CAP_ID_EXP); + int r; + + if (!exppos) + return -ENOTTY; + pci_read_config_dword(dev, exppos + PCI_EXP_DEVCAP, &cap); + if (!(cap & PCI_EXP_DEVCAP_FLR)) + return -ENOTTY; + + if (!dev->msi_enabled && !dev->msix_enabled) + disable_irq(dev->irq); + pci_save_state(dev); + + pci_write_config_word(dev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE); + + r = pci_execute_reset_function(dev); + + pci_restore_state(dev); + if (!dev->msi_enabled && !dev->msix_enabled) + enable_irq(dev->irq); + + return r; +} +EXPORT_SYMBOL_GPL(pci_reset_function); + /** * pcix_get_max_mmrbc - get PCI-X maximum designed memory read byte count * @dev: PCI device to query diff --git a/include/linux/pci.h b/include/linux/pci.h index 085187be29c..f6f6810296e 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -626,6 +626,8 @@ int pcix_get_mmrbc(struct pci_dev *dev); int pcix_set_mmrbc(struct pci_dev *dev, int mmrbc); int pcie_get_readrq(struct pci_dev *dev); int pcie_set_readrq(struct pci_dev *dev, int rq); +int pci_reset_function(struct pci_dev *dev); +int pci_execute_reset_function(struct pci_dev *dev); void pci_update_resource(struct pci_dev *dev, struct resource *res, int resno); int __must_check pci_assign_resource(struct pci_dev *dev, int i); int pci_select_bars(struct pci_dev *dev, unsigned long flags); diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h index eb6686b88f9..e5effd47ed7 100644 --- a/include/linux/pci_regs.h +++ b/include/linux/pci_regs.h @@ -377,6 +377,7 @@ #define PCI_EXP_DEVCAP_RBER 0x8000 /* Role-Based Error Reporting */ #define PCI_EXP_DEVCAP_PWR_VAL 0x3fc0000 /* Slot Power Limit Value */ #define PCI_EXP_DEVCAP_PWR_SCL 0xc000000 /* Slot Power Limit Scale */ +#define PCI_EXP_DEVCAP_FLR 0x10000000 /* Function Level Reset */ #define PCI_EXP_DEVCTL 8 /* Device Control */ #define PCI_EXP_DEVCTL_CERE 0x0001 /* Correctable Error Reporting En. */ #define PCI_EXP_DEVCTL_NFERE 0x0002 /* Non-Fatal Error Reporting Enable */ @@ -389,6 +390,7 @@ #define PCI_EXP_DEVCTL_AUX_PME 0x0400 /* Auxiliary Power PM Enable */ #define PCI_EXP_DEVCTL_NOSNOOP_EN 0x0800 /* Enable No Snoop */ #define PCI_EXP_DEVCTL_READRQ 0x7000 /* Max_Read_Request_Size */ +#define PCI_EXP_DEVCTL_BCR_FLR 0x8000 /* Bridge Configuration Retry / FLR */ #define PCI_EXP_DEVSTA 10 /* Device Status */ #define PCI_EXP_DEVSTA_CED 0x01 /* Correctable Error Detected */ #define PCI_EXP_DEVSTA_NFED 0x02 /* Non-Fatal Error Detected */ -- cgit v1.2.3-70-g09d2 From 64c7f63c1b5c26f057c26f7920f397fed2f590d9 Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Tue, 21 Oct 2008 10:09:05 -0700 Subject: PCI: include io.h in pci.h so that ioremap_nocache is defined Ingo pointed out that the m32r build was broken by pci_ioremap. It looks like some files include pci.h w/o including io.h. The latter defines ioremap_* if present, so it makes sense to include it in pci.h now that we have pci_ioremap there. Reported-by: Ingo Molnar Signed-off-by: Jesse Barnes --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index f6f6810296e..ee2fd6304e0 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -18,6 +18,7 @@ #define LINUX_PCI_H #include /* The pci register defines */ +#include /* * The PCI interface treats multi-function devices as independent -- cgit v1.2.3-70-g09d2 From 1359f2701b96abd9bb69c1273fb995a093b6409a Mon Sep 17 00:00:00 2001 From: Alex Chiang Date: Mon, 20 Oct 2008 17:40:42 -0600 Subject: PCI Hotplug core: add 'name' param pci_hp_register interface Update pci_hp_register() to take a const char *name parameter. The motivation for this is to clean up the individual hotplug drivers so that each one does not have to manage its own name. The PCI core should be the place where we manage the name. We update the interface and all callsites first, in a "no functional change" manner, and clean up the drivers later. Cc: kristen.c.accardi@intel.com Acked-by: Kenji Kaneshige Reviewed-by: Matthew Wilcox Signed-off-by: Alex Chiang Signed-off-by: Jesse Barnes --- drivers/pci/hotplug/acpiphp_core.c | 3 ++- drivers/pci/hotplug/cpci_hotplug_core.c | 3 ++- drivers/pci/hotplug/cpqphp_core.c | 3 ++- drivers/pci/hotplug/fakephp.c | 3 ++- drivers/pci/hotplug/ibmphp_ebda.c | 3 ++- drivers/pci/hotplug/pci_hotplug_core.c | 15 ++++++++------- drivers/pci/hotplug/pciehp_core.c | 3 ++- drivers/pci/hotplug/rpaphp_slot.c | 2 +- drivers/pci/hotplug/sgi_hotplug.c | 3 ++- drivers/pci/hotplug/shpchp_core.c | 3 ++- include/linux/pci_hotplug.h | 3 ++- 11 files changed, 27 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/hotplug/acpiphp_core.c b/drivers/pci/hotplug/acpiphp_core.c index 0e496e866a8..e9841765339 100644 --- a/drivers/pci/hotplug/acpiphp_core.c +++ b/drivers/pci/hotplug/acpiphp_core.c @@ -340,7 +340,8 @@ int acpiphp_register_hotplug_slot(struct acpiphp_slot *acpiphp_slot) retval = pci_hp_register(slot->hotplug_slot, acpiphp_slot->bridge->pci_bus, - acpiphp_slot->device); + acpiphp_slot->device, + slot->name); if (retval == -EBUSY) goto error_hpslot; if (retval) { diff --git a/drivers/pci/hotplug/cpci_hotplug_core.c b/drivers/pci/hotplug/cpci_hotplug_core.c index 935947991dc..5e5dee85763 100644 --- a/drivers/pci/hotplug/cpci_hotplug_core.c +++ b/drivers/pci/hotplug/cpci_hotplug_core.c @@ -285,7 +285,8 @@ cpci_hp_register_bus(struct pci_bus *bus, u8 first, u8 last) info->attention_status = cpci_get_attention_status(slot); dbg("registering slot %s", slot->hotplug_slot->name); - status = pci_hp_register(slot->hotplug_slot, bus, i); + status = pci_hp_register(slot->hotplug_slot, bus, i, + slot->hotplug_slot->name); if (status) { err("pci_hp_register failed with error %d", status); goto error_name; diff --git a/drivers/pci/hotplug/cpqphp_core.c b/drivers/pci/hotplug/cpqphp_core.c index 54defec51d0..a7fe4584f00 100644 --- a/drivers/pci/hotplug/cpqphp_core.c +++ b/drivers/pci/hotplug/cpqphp_core.c @@ -436,7 +436,8 @@ static int ctrl_slot_setup(struct controller *ctrl, slot_number); result = pci_hp_register(hotplug_slot, ctrl->pci_dev->subordinate, - slot->device); + slot->device, + hotplug_slot->name); if (result) { err("pci_hp_register failed with error %d\n", result); goto error_name; diff --git a/drivers/pci/hotplug/fakephp.c b/drivers/pci/hotplug/fakephp.c index 146ca9cd156..3069f215383 100644 --- a/drivers/pci/hotplug/fakephp.c +++ b/drivers/pci/hotplug/fakephp.c @@ -126,7 +126,8 @@ static int add_slot(struct pci_dev *dev) slot->release = &dummy_release; slot->private = dslot; - retval = pci_hp_register(slot, dev->bus, PCI_SLOT(dev->devfn)); + retval = pci_hp_register(slot, dev->bus, PCI_SLOT(dev->devfn), + slot->name); if (retval) { err("pci_hp_register failed with error %d\n", retval); goto error_dslot; diff --git a/drivers/pci/hotplug/ibmphp_ebda.c b/drivers/pci/hotplug/ibmphp_ebda.c index 8cfd1c4926c..342d3e8f77c 100644 --- a/drivers/pci/hotplug/ibmphp_ebda.c +++ b/drivers/pci/hotplug/ibmphp_ebda.c @@ -966,7 +966,8 @@ static int __init ebda_rsrc_controller (void) list_for_each_entry(tmp_slot, &ibmphp_slot_head, ibm_slot_list) { snprintf (tmp_slot->hotplug_slot->name, 30, "%s", create_file_name (tmp_slot)); pci_hp_register(tmp_slot->hotplug_slot, - pci_find_bus(0, tmp_slot->bus), tmp_slot->device); + pci_find_bus(0, tmp_slot->bus), tmp_slot->device, + tmp_slot->hotplug_slot->name); } print_ebda_hpc (); diff --git a/drivers/pci/hotplug/pci_hotplug_core.c b/drivers/pci/hotplug/pci_hotplug_core.c index 2e6c4474644..02b1ae12a2e 100644 --- a/drivers/pci/hotplug/pci_hotplug_core.c +++ b/drivers/pci/hotplug/pci_hotplug_core.c @@ -547,13 +547,15 @@ out: * @bus: bus this slot is on * @slot: pointer to the &struct hotplug_slot to register * @slot_nr: slot number + * @name: name registered with kobject core * * Registers a hotplug slot with the pci hotplug subsystem, which will allow * userspace interaction to the slot. * * Returns 0 if successful, anything else for an error. */ -int pci_hp_register(struct hotplug_slot *slot, struct pci_bus *bus, int slot_nr) +int pci_hp_register(struct hotplug_slot *slot, struct pci_bus *bus, int slot_nr, + const char *name) { int result; struct pci_slot *pci_slot; @@ -569,7 +571,7 @@ int pci_hp_register(struct hotplug_slot *slot, struct pci_bus *bus, int slot_nr) } /* Check if we have already registered a slot with the same name. */ - if (get_slot_from_name(slot->name)) + if (get_slot_from_name(name)) return -EEXIST; /* @@ -577,7 +579,7 @@ int pci_hp_register(struct hotplug_slot *slot, struct pci_bus *bus, int slot_nr) * driver and call it here again. If we've already created the * pci_slot, the interface will simply bump the refcount. */ - pci_slot = pci_create_slot(bus, slot_nr, slot->name); + pci_slot = pci_create_slot(bus, slot_nr, name); if (IS_ERR(pci_slot)) return PTR_ERR(pci_slot); @@ -593,8 +595,8 @@ int pci_hp_register(struct hotplug_slot *slot, struct pci_bus *bus, int slot_nr) /* * Allow pcihp drivers to override the ACPI_PCI_SLOT name. */ - if (strcmp(kobject_name(&pci_slot->kobj), slot->name)) { - result = kobject_rename(&pci_slot->kobj, slot->name); + if (strcmp(kobject_name(&pci_slot->kobj), name)) { + result = kobject_rename(&pci_slot->kobj, name); if (result) { pci_destroy_slot(pci_slot); return result; @@ -607,8 +609,7 @@ int pci_hp_register(struct hotplug_slot *slot, struct pci_bus *bus, int slot_nr) result = fs_add_slot(pci_slot); kobject_uevent(&pci_slot->kobj, KOBJ_ADD); - dbg("Added slot %s to the list\n", slot->name); - + dbg("Added slot %s to the list\n", name); return result; } diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c index c748a19db89..3ace5e05760 100644 --- a/drivers/pci/hotplug/pciehp_core.c +++ b/drivers/pci/hotplug/pciehp_core.c @@ -226,7 +226,8 @@ static int init_slots(struct controller *ctrl) duplicate_name: retval = pci_hp_register(hotplug_slot, ctrl->pci_dev->subordinate, - slot->device); + slot->device, + slot->name); if (retval) { /* * If slot N already exists, we'll try to create diff --git a/drivers/pci/hotplug/rpaphp_slot.c b/drivers/pci/hotplug/rpaphp_slot.c index 50884507b8b..736d3b43ed0 100644 --- a/drivers/pci/hotplug/rpaphp_slot.c +++ b/drivers/pci/hotplug/rpaphp_slot.c @@ -137,7 +137,7 @@ int rpaphp_register_slot(struct slot *slot) slotno = PCI_SLOT(PCI_DN(slot->dn->child)->devfn); else slotno = -1; - retval = pci_hp_register(php_slot, slot->bus, slotno); + retval = pci_hp_register(php_slot, slot->bus, slotno, slot->name); if (retval) { err("pci_hp_register failed with error %d\n", retval); return retval; diff --git a/drivers/pci/hotplug/sgi_hotplug.c b/drivers/pci/hotplug/sgi_hotplug.c index 410fe0394a8..6d20bbd4359 100644 --- a/drivers/pci/hotplug/sgi_hotplug.c +++ b/drivers/pci/hotplug/sgi_hotplug.c @@ -653,7 +653,8 @@ static int sn_hotplug_slot_register(struct pci_bus *pci_bus) bss_hotplug_slot->ops = &sn_hotplug_slot_ops; bss_hotplug_slot->release = &sn_release_slot; - rc = pci_hp_register(bss_hotplug_slot, pci_bus, device); + rc = pci_hp_register(bss_hotplug_slot, pci_bus, device, + bss_hotplug_slot->name); if (rc) goto register_err; diff --git a/drivers/pci/hotplug/shpchp_core.c b/drivers/pci/hotplug/shpchp_core.c index cc38615395f..bf5096612aa 100644 --- a/drivers/pci/hotplug/shpchp_core.c +++ b/drivers/pci/hotplug/shpchp_core.c @@ -146,7 +146,8 @@ static int init_slots(struct controller *ctrl) slot->hp_slot, slot->number, ctrl->slot_device_offset); duplicate_name: retval = pci_hp_register(slot->hotplug_slot, - ctrl->pci_dev->subordinate, slot->device); + ctrl->pci_dev->subordinate, slot->device, + hotplug_slot->name); if (retval) { /* * If slot N already exists, we'll try to create diff --git a/include/linux/pci_hotplug.h b/include/linux/pci_hotplug.h index a08cd06b541..5efba667186 100644 --- a/include/linux/pci_hotplug.h +++ b/include/linux/pci_hotplug.h @@ -165,7 +165,8 @@ struct hotplug_slot { }; #define to_hotplug_slot(n) container_of(n, struct hotplug_slot, kobj) -extern int pci_hp_register(struct hotplug_slot *, struct pci_bus *, int nr); +extern int pci_hp_register(struct hotplug_slot *, struct pci_bus *, int nr, + const char *name); extern int pci_hp_deregister(struct hotplug_slot *slot); extern int __must_check pci_hp_change_slot_info (struct hotplug_slot *slot, struct hotplug_slot_info *info); -- cgit v1.2.3-70-g09d2 From d25b7c8d6ba2735602003d75a28894772fe8ad6a Mon Sep 17 00:00:00 2001 From: Alex Chiang Date: Mon, 20 Oct 2008 17:40:47 -0600 Subject: PCI: rename pci_update_slot_number to pci_renumber_slot The GPL exported symbol pci_update_slot_number has been renamed to pci_renumber_slot. Some of the safety checks were unnecessary and were removed. Cc: kristen.c.accardi@intel.com Cc: matthew@wil.cx Acked-by: Kenji Kaneshige Signed-off-by: Alex Chiang Signed-off-by: Jesse Barnes --- drivers/pci/slot.c | 15 +++++---------- include/linux/pci.h | 2 +- 2 files changed, 6 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/slot.c b/drivers/pci/slot.c index 0c6db03698e..b9b90ab6b86 100644 --- a/drivers/pci/slot.c +++ b/drivers/pci/slot.c @@ -175,7 +175,7 @@ placeholder: EXPORT_SYMBOL_GPL(pci_create_slot); /** - * pci_update_slot_number - update %struct pci_slot -> number + * pci_renumber_slot - update %struct pci_slot -> number * @slot - %struct pci_slot to update * @slot_nr - new number for slot * @@ -183,27 +183,22 @@ EXPORT_SYMBOL_GPL(pci_create_slot); * created a placeholder slot in pci_create_slot() by passing a -1 as * slot_nr, to update their %struct pci_slot with the correct @slot_nr. */ - -void pci_update_slot_number(struct pci_slot *slot, int slot_nr) +void pci_renumber_slot(struct pci_slot *slot, int slot_nr) { - int name_count = 0; struct pci_slot *tmp; down_write(&pci_bus_sem); list_for_each_entry(tmp, &slot->bus->slots, list) { WARN_ON(tmp->number == slot_nr); - if (!strcmp(kobject_name(&tmp->kobj), kobject_name(&slot->kobj))) - name_count++; + goto out; } - if (name_count > 1) - printk(KERN_WARNING "pci_update_slot_number found %d slots with the same name: %s\n", name_count, kobject_name(&slot->kobj)); - slot->number = slot_nr; +out: up_write(&pci_bus_sem); } -EXPORT_SYMBOL_GPL(pci_update_slot_number); +EXPORT_SYMBOL_GPL(pci_renumber_slot); /** * pci_destroy_slot - decrement refcount for physical PCI slot diff --git a/include/linux/pci.h b/include/linux/pci.h index ee2fd6304e0..41717ae9807 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -512,7 +512,7 @@ struct pci_bus *pci_add_new_bus(struct pci_bus *parent, struct pci_dev *dev, struct pci_slot *pci_create_slot(struct pci_bus *parent, int slot_nr, const char *name); void pci_destroy_slot(struct pci_slot *slot); -void pci_update_slot_number(struct pci_slot *slot, int slot_nr); +void pci_renumber_slot(struct pci_slot *slot, int slot_nr); int pci_scan_slot(struct pci_bus *bus, int devfn); struct pci_dev *pci_scan_single_device(struct pci_bus *bus, int devfn); void pci_device_add(struct pci_dev *dev, struct pci_bus *bus); -- cgit v1.2.3-70-g09d2 From 828f37683e6d3ab5912989df0d04201db7ad798e Mon Sep 17 00:00:00 2001 From: Alex Chiang Date: Mon, 20 Oct 2008 17:40:52 -0600 Subject: PCI: update pci_create_slot() to take a 'hotplug' param Slot detection drivers can co-exist with hotplug drivers. The names of the detected/claimed slots may be different depending on module load order. For legacy reasons, we need to allow hotplug drivers to override the slot name if a detection driver is loaded first (and they find the same slots). Creating and overriding slot names should be an atomic operation, otherwise you get a locking nightmare as various drivers race to call pci_create_slot(). pci_create_slot() is already serialized by grabbing the pci_bus_sem. We update the API and add a 'hotplug' param, which is: set if the caller is a hotplug driver NULL if the caller is a detection driver pci_create_slot() does not actually use the 'hotplug' parameter in this patch. A later patch will add the logic that uses it. Cc: kristen.c.accardi@intel.com Cc: matthew@wil.cx Acked-by: Kenji Kaneshige Signed-off-by: Alex Chiang Signed-off-by: Jesse Barnes --- drivers/acpi/pci_slot.c | 2 +- drivers/pci/hotplug/pci_hotplug_core.c | 2 +- drivers/pci/slot.c | 4 +++- include/linux/pci.h | 3 ++- 4 files changed, 7 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/acpi/pci_slot.c b/drivers/acpi/pci_slot.c index d5b4ef89887..8d4a568be1c 100644 --- a/drivers/acpi/pci_slot.c +++ b/drivers/acpi/pci_slot.c @@ -150,7 +150,7 @@ register_slot(acpi_handle handle, u32 lvl, void *context, void **rv) } snprintf(name, sizeof(name), "%u", (u32)sun); - pci_slot = pci_create_slot(pci_bus, device, name); + pci_slot = pci_create_slot(pci_bus, device, name, NULL); if (IS_ERR(pci_slot)) { err("pci_create_slot returned %ld\n", PTR_ERR(pci_slot)); kfree(slot); diff --git a/drivers/pci/hotplug/pci_hotplug_core.c b/drivers/pci/hotplug/pci_hotplug_core.c index 02b1ae12a2e..1cdeb642fdc 100644 --- a/drivers/pci/hotplug/pci_hotplug_core.c +++ b/drivers/pci/hotplug/pci_hotplug_core.c @@ -579,7 +579,7 @@ int pci_hp_register(struct hotplug_slot *slot, struct pci_bus *bus, int slot_nr, * driver and call it here again. If we've already created the * pci_slot, the interface will simply bump the refcount. */ - pci_slot = pci_create_slot(bus, slot_nr, name); + pci_slot = pci_create_slot(bus, slot_nr, name, slot); if (IS_ERR(pci_slot)) return PTR_ERR(pci_slot); diff --git a/drivers/pci/slot.c b/drivers/pci/slot.c index b9b90ab6b86..0e009c3ba5f 100644 --- a/drivers/pci/slot.c +++ b/drivers/pci/slot.c @@ -83,6 +83,7 @@ static struct kobj_type pci_slot_ktype = { * @parent: struct pci_bus of parent bridge * @slot_nr: PCI_SLOT(pci_dev->devfn) or -1 for placeholder * @name: user visible string presented in /sys/bus/pci/slots/ + * @hotplug: set if caller is hotplug driver, NULL otherwise * * PCI slots have first class attributes such as address, speed, width, * and a &struct pci_slot is used to manage them. This interface will @@ -111,7 +112,8 @@ static struct kobj_type pci_slot_ktype = { */ struct pci_slot *pci_create_slot(struct pci_bus *parent, int slot_nr, - const char *name) + const char *name, + struct hotplug_slot *hotplug) { struct pci_dev *dev; struct pci_slot *slot; diff --git a/include/linux/pci.h b/include/linux/pci.h index 41717ae9807..9a8cee62330 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -510,7 +510,8 @@ struct pci_bus *pci_create_bus(struct device *parent, int bus, struct pci_bus *pci_add_new_bus(struct pci_bus *parent, struct pci_dev *dev, int busnr); struct pci_slot *pci_create_slot(struct pci_bus *parent, int slot_nr, - const char *name); + const char *name, + struct hotplug_slot *hotplug); void pci_destroy_slot(struct pci_slot *slot); void pci_renumber_slot(struct pci_slot *slot, int slot_nr); int pci_scan_slot(struct pci_bus *bus, int devfn); -- cgit v1.2.3-70-g09d2 From 0ad772ec464d3fcf9d210836b97e654f393606c4 Mon Sep 17 00:00:00 2001 From: Alex Chiang Date: Mon, 20 Oct 2008 17:41:07 -0600 Subject: PCI, PCI Hotplug: introduce slot_name helpers In preparation for cleaning up the various hotplug drivers such that they don't have to manage their own 'name' parameters anymore, we provide the following convenience functions: pci_slot_name() hotplug_slot_name() These helpers will be used by individual hotplug drivers. Cc: kristen.c.accardi@intel.com Cc: matthew@wil.cx Acked-by: Kenji Kaneshige Signed-off-by: Alex Chiang Signed-off-by: Jesse Barnes --- drivers/pci/slot.c | 2 +- include/linux/pci.h | 5 +++++ include/linux/pci_hotplug.h | 5 +++++ 3 files changed, 11 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/pci/slot.c b/drivers/pci/slot.c index b6ee352ae45..4dd1c3e157a 100644 --- a/drivers/pci/slot.c +++ b/drivers/pci/slot.c @@ -121,7 +121,7 @@ static int rename_slot(struct pci_slot *slot, const char *name) int result = 0; char *slot_name; - if (strcmp(kobject_name(&slot->kobj), name) == 0) + if (strcmp(pci_slot_name(slot), name) == 0) return result; slot_name = make_slot_name(name); diff --git a/include/linux/pci.h b/include/linux/pci.h index 9a8cee62330..955ab705c05 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -65,6 +65,11 @@ struct pci_slot { struct kobject kobj; }; +static inline const char *pci_slot_name(const struct pci_slot *slot) +{ + return kobject_name(&slot->kobj); +} + /* File state for mmap()s on /proc/bus/pci/X/Y */ enum pci_mmap_state { pci_mmap_io, diff --git a/include/linux/pci_hotplug.h b/include/linux/pci_hotplug.h index 5efba667186..a3a3245943b 100644 --- a/include/linux/pci_hotplug.h +++ b/include/linux/pci_hotplug.h @@ -165,6 +165,11 @@ struct hotplug_slot { }; #define to_hotplug_slot(n) container_of(n, struct hotplug_slot, kobj) +static inline const char *hotplug_slot_name(const struct hotplug_slot *slot) +{ + return pci_slot_name(slot->pci_slot); +} + extern int pci_hp_register(struct hotplug_slot *, struct pci_bus *, int nr, const char *name); extern int pci_hp_deregister(struct hotplug_slot *slot); -- cgit v1.2.3-70-g09d2 From 58319b802a614f10f1b5238fbde7a4b2e9a60069 Mon Sep 17 00:00:00 2001 From: Alex Chiang Date: Mon, 20 Oct 2008 17:41:58 -0600 Subject: PCI: Hotplug core: remove 'name' Now that the PCI core manages the 'name' for each individual hotplug driver, and all drivers (except rpaphp) have been converted to use hotplug_slot_name(), there is no need for the PCI hotplug core to drag around its own copy of name either. Cc: kristen.c.accardi@intel.com Cc: matthew@wil.cx Acked-by: Kenji Kaneshige Signed-off-by: Alex Chiang Signed-off-by: Jesse Barnes --- drivers/pci/hotplug/pci_hotplug_core.c | 6 +++--- include/linux/pci_hotplug.h | 3 --- 2 files changed, 3 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/hotplug/pci_hotplug_core.c b/drivers/pci/hotplug/pci_hotplug_core.c index a6f1f282b68..535fce0f07f 100644 --- a/drivers/pci/hotplug/pci_hotplug_core.c +++ b/drivers/pci/hotplug/pci_hotplug_core.c @@ -533,7 +533,7 @@ static struct hotplug_slot *get_slot_from_name (const char *name) list_for_each (tmp, &pci_hotplug_slot_list) { slot = list_entry (tmp, struct hotplug_slot, slot_list); - if (strcmp(slot->name, name) == 0) + if (strcmp(hotplug_slot_name(slot), name) == 0) return slot; } return NULL; @@ -611,7 +611,7 @@ int pci_hp_deregister(struct hotplug_slot *hotplug) return -ENODEV; mutex_lock(&pci_hp_mutex); - temp = get_slot_from_name(hotplug->name); + temp = get_slot_from_name(hotplug_slot_name(hotplug)); if (temp != hotplug) { mutex_unlock(&pci_hp_mutex); return -ENODEV; @@ -621,7 +621,7 @@ int pci_hp_deregister(struct hotplug_slot *hotplug) slot = hotplug->pci_slot; fs_remove_slot(slot); - dbg("Removed slot %s from the list\n", hotplug->name); + dbg("Removed slot %s from the list\n", hotplug_slot_name(hotplug)); hotplug->release(hotplug); slot->hotplug = NULL; diff --git a/include/linux/pci_hotplug.h b/include/linux/pci_hotplug.h index a3a3245943b..a00bd1a0f15 100644 --- a/include/linux/pci_hotplug.h +++ b/include/linux/pci_hotplug.h @@ -142,8 +142,6 @@ struct hotplug_slot_info { /** * struct hotplug_slot - used to register a physical slot with the hotplug pci core - * @name: the name of the slot being registered. This string must - * be unique amoung slots registered on this system. * @ops: pointer to the &struct hotplug_slot_ops to be used for this slot * @info: pointer to the &struct hotplug_slot_info for the initial values for * this slot. @@ -153,7 +151,6 @@ struct hotplug_slot_info { * needs. */ struct hotplug_slot { - char *name; struct hotplug_slot_ops *ops; struct hotplug_slot_info *info; void (*release) (struct hotplug_slot *slot); -- cgit v1.2.3-70-g09d2 From 1388cc964e680c1086ca0edae35be094cb29d51e Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Wed, 22 Oct 2008 13:39:55 +1100 Subject: PCI: don't export linux/io.h from pci.h Move the include of io.h down into the #ifdef __KERNEL__ protected region. Signed-off-by: Stephen Rothwell Signed-off-by: Jesse Barnes --- include/linux/pci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 955ab705c05..752def8a2ef 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -18,7 +18,6 @@ #define LINUX_PCI_H #include /* The pci register defines */ -#include /* * The PCI interface treats multi-function devices as independent @@ -52,6 +51,7 @@ #include #include #include +#include /* Include the ID list */ #include -- cgit v1.2.3-70-g09d2 From 848e4c68c4695beae563f9a3d59fce596b466a74 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 21 Oct 2008 14:26:39 +0900 Subject: libata: transfer EHI control flags to slave ehc.i ATA_EHI_NO_AUTOPSY and ATA_EHI_QUIET are used to control the behavior of EH. As only the master link is visible outside EH, these flags are set only for the master link although they should also apply to the slave link, which causes spurious EH messages during probe and suspend/resume. This patch transfers those two flags to slave ehc.i before performing slave autopsy and reporting. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- drivers/ata/libata-eh.c | 5 +++++ include/linux/libata.h | 3 +++ 2 files changed, 8 insertions(+) (limited to 'include/linux') diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index a93247cc395..d2409a8acec 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -2010,8 +2010,13 @@ void ata_eh_autopsy(struct ata_port *ap) struct ata_eh_context *mehc = &ap->link.eh_context; struct ata_eh_context *sehc = &ap->slave_link->eh_context; + /* transfer control flags from master to slave */ + sehc->i.flags |= mehc->i.flags & ATA_EHI_TO_SLAVE_MASK; + + /* perform autopsy on the slave link */ ata_eh_link_autopsy(ap->slave_link); + /* transfer actions from slave to master and clear slave */ ata_eh_about_to_do(ap->slave_link, NULL, ATA_EH_ALL_ACTIONS); mehc->i.action |= sehc->i.action; mehc->i.dev_action[1] |= sehc->i.dev_action[1]; diff --git a/include/linux/libata.h b/include/linux/libata.h index 947cf84e555..c261aa0584b 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -340,6 +340,9 @@ enum { ATA_EHI_DID_RESET = ATA_EHI_DID_SOFTRESET | ATA_EHI_DID_HARDRESET, + /* mask of flags to transfer *to* the slave link */ + ATA_EHI_TO_SLAVE_MASK = ATA_EHI_NO_AUTOPSY | ATA_EHI_QUIET, + /* max tries if error condition is still set after ->error_handler */ ATA_EH_MAX_TRIES = 5, -- cgit v1.2.3-70-g09d2 From d181146572c4fa9af2a068b967cb53dcac7da944 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 2 Aug 2008 00:49:18 -0400 Subject: [PATCH] new helper - kern_path() Analog of lookup_path(), takes struct path *. Signed-off-by: Al Viro --- fs/namei.c | 10 ++++++++++ include/linux/namei.h | 2 ++ 2 files changed, 12 insertions(+) (limited to 'include/linux') diff --git a/fs/namei.c b/fs/namei.c index 4ea63ed5e79..4a56f9b59e8 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1106,6 +1106,15 @@ int path_lookup(const char *name, unsigned int flags, return do_path_lookup(AT_FDCWD, name, flags, nd); } +int kern_path(const char *name, unsigned int flags, struct path *path) +{ + struct nameidata nd; + int res = do_path_lookup(AT_FDCWD, name, flags, &nd); + if (!res) + *path = nd.path; + return res; +} + /** * vfs_path_lookup - lookup a file path relative to a dentry-vfsmount pair * @dentry: pointer to dentry of the base directory @@ -2855,6 +2864,7 @@ EXPORT_SYMBOL(__page_symlink); EXPORT_SYMBOL(page_symlink); EXPORT_SYMBOL(page_symlink_inode_operations); EXPORT_SYMBOL(path_lookup); +EXPORT_SYMBOL(kern_path); EXPORT_SYMBOL(vfs_path_lookup); EXPORT_SYMBOL(inode_permission); EXPORT_SYMBOL(vfs_permission); diff --git a/include/linux/namei.h b/include/linux/namei.h index 68f8c3203c8..221e8bc894b 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -61,6 +61,8 @@ extern int user_path_at(int, const char __user *, unsigned, struct path *); #define user_path_dir(name, path) \ user_path_at(AT_FDCWD, name, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, path) +extern int kern_path(const char *, unsigned, struct path *); + extern int path_lookup(const char *, unsigned, struct nameidata *); extern int vfs_path_lookup(struct dentry *, struct vfsmount *, const char *, unsigned int, struct nameidata *); -- cgit v1.2.3-70-g09d2 From b63365a2d60268a3988285d6c3c6003d7066f93a Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 23 Oct 2008 01:11:29 -0700 Subject: net: Fix disjunct computation of netdev features My change commit e2a6b85247aacc52d6ba0d9b37a99b8d1a3e0d83 net: Enable TSO if supported by at least one device didn't do what was intended because the netdev_compute_features function was designed for conjunctions. So what happened was that it would simply take the TSO status of the last constituent device. This patch extends it to support both conjunctions and disjunctions under the new name of netdev_increment_features. It also adds a new function netdev_fix_features which does the sanity checking that usually occurs upon registration. This ensures that the computation doesn't result in an illegal combination since this checking is absent when the change is initiated via ethtool. The two users of netdev_compute_features have been converted. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 16 +++-- include/linux/netdevice.h | 12 +++- net/bridge/br_device.c | 2 +- net/bridge/br_if.c | 14 +++-- net/core/dev.c | 135 +++++++++++++++++++++------------------- 5 files changed, 104 insertions(+), 75 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 8e2be24f3fe..832739f38db 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1341,18 +1341,24 @@ static int bond_compute_features(struct bonding *bond) int i; features &= ~(NETIF_F_ALL_CSUM | BOND_VLAN_FEATURES); - features |= NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | - NETIF_F_GSO_MASK | NETIF_F_NO_CSUM; + features |= NETIF_F_GSO_MASK | NETIF_F_NO_CSUM; + + if (!bond->first_slave) + goto done; + + features &= ~NETIF_F_ONE_FOR_ALL; bond_for_each_slave(bond, slave, i) { - features = netdev_compute_features(features, - slave->dev->features); + features = netdev_increment_features(features, + slave->dev->features, + NETIF_F_ONE_FOR_ALL); if (slave->dev->hard_header_len > max_hard_header_len) max_hard_header_len = slave->dev->hard_header_len; } +done: features |= (bond_dev->features & BOND_VLAN_FEATURES); - bond_dev->features = features; + bond_dev->features = netdev_fix_features(features, NULL); bond_dev->hard_header_len = max_hard_header_len; return 0; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 64875859d65..c8bcb59adfd 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -541,6 +541,14 @@ struct net_device #define NETIF_F_V6_CSUM (NETIF_F_GEN_CSUM | NETIF_F_IPV6_CSUM) #define NETIF_F_ALL_CSUM (NETIF_F_V4_CSUM | NETIF_F_V6_CSUM) + /* + * If one device supports one of these features, then enable them + * for all in netdev_increment_features. + */ +#define NETIF_F_ONE_FOR_ALL (NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ROBUST | \ + NETIF_F_SG | NETIF_F_HIGHDMA | \ + NETIF_F_FRAGLIST) + /* Interface index. Unique device identifier */ int ifindex; int iflink; @@ -1698,7 +1706,9 @@ extern char *netdev_drivername(const struct net_device *dev, char *buffer, int l extern void linkwatch_run_queue(void); -extern int netdev_compute_features(unsigned long all, unsigned long one); +unsigned long netdev_increment_features(unsigned long all, unsigned long one, + unsigned long mask); +unsigned long netdev_fix_features(unsigned long features, const char *name); static inline int net_gso_ok(int features, int gso_type) { diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 22ba8632196..6c023f0f825 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -179,5 +179,5 @@ void br_dev_setup(struct net_device *dev) dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | NETIF_F_GSO_MASK | NETIF_F_NO_CSUM | NETIF_F_LLTX | - NETIF_F_NETNS_LOCAL; + NETIF_F_NETNS_LOCAL | NETIF_F_GSO; } diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 573e20f7dba..0a09ccf68c1 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -347,15 +347,21 @@ int br_min_mtu(const struct net_bridge *br) void br_features_recompute(struct net_bridge *br) { struct net_bridge_port *p; - unsigned long features; + unsigned long features, mask; - features = br->feature_mask; + features = mask = br->feature_mask; + if (list_empty(&br->port_list)) + goto done; + + features &= ~NETIF_F_ONE_FOR_ALL; list_for_each_entry(p, &br->port_list, list) { - features = netdev_compute_features(features, p->dev->features); + features = netdev_increment_features(features, + p->dev->features, mask); } - br->dev->features = features; +done: + br->dev->features = netdev_fix_features(features, NULL); } /* called with RTNL */ diff --git a/net/core/dev.c b/net/core/dev.c index b8a4fd0806a..d9038e328cc 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3947,6 +3947,46 @@ static void netdev_init_queue_locks(struct net_device *dev) __netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL); } +unsigned long netdev_fix_features(unsigned long features, const char *name) +{ + /* Fix illegal SG+CSUM combinations. */ + if ((features & NETIF_F_SG) && + !(features & NETIF_F_ALL_CSUM)) { + if (name) + printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no " + "checksum feature.\n", name); + features &= ~NETIF_F_SG; + } + + /* TSO requires that SG is present as well. */ + if ((features & NETIF_F_TSO) && !(features & NETIF_F_SG)) { + if (name) + printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no " + "SG feature.\n", name); + features &= ~NETIF_F_TSO; + } + + if (features & NETIF_F_UFO) { + if (!(features & NETIF_F_GEN_CSUM)) { + if (name) + printk(KERN_ERR "%s: Dropping NETIF_F_UFO " + "since no NETIF_F_HW_CSUM feature.\n", + name); + features &= ~NETIF_F_UFO; + } + + if (!(features & NETIF_F_SG)) { + if (name) + printk(KERN_ERR "%s: Dropping NETIF_F_UFO " + "since no NETIF_F_SG feature.\n", name); + features &= ~NETIF_F_UFO; + } + } + + return features; +} +EXPORT_SYMBOL(netdev_fix_features); + /** * register_netdevice - register a network device * @dev: device to register @@ -4032,36 +4072,7 @@ int register_netdevice(struct net_device *dev) dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM); } - - /* Fix illegal SG+CSUM combinations. */ - if ((dev->features & NETIF_F_SG) && - !(dev->features & NETIF_F_ALL_CSUM)) { - printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no checksum feature.\n", - dev->name); - dev->features &= ~NETIF_F_SG; - } - - /* TSO requires that SG is present as well. */ - if ((dev->features & NETIF_F_TSO) && - !(dev->features & NETIF_F_SG)) { - printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no SG feature.\n", - dev->name); - dev->features &= ~NETIF_F_TSO; - } - if (dev->features & NETIF_F_UFO) { - if (!(dev->features & NETIF_F_HW_CSUM)) { - printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no " - "NETIF_F_HW_CSUM feature.\n", - dev->name); - dev->features &= ~NETIF_F_UFO; - } - if (!(dev->features & NETIF_F_SG)) { - printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no " - "NETIF_F_SG feature.\n", - dev->name); - dev->features &= ~NETIF_F_UFO; - } - } + dev->features = netdev_fix_features(dev->features, dev->name); /* Enable software GSO if SG is supported. */ if (dev->features & NETIF_F_SG) @@ -4700,49 +4711,45 @@ static int __init netdev_dma_register(void) { return -ENODEV; } #endif /* CONFIG_NET_DMA */ /** - * netdev_compute_feature - compute conjunction of two feature sets - * @all: first feature set - * @one: second feature set + * netdev_increment_features - increment feature set by one + * @all: current feature set + * @one: new feature set + * @mask: mask feature set * * Computes a new feature set after adding a device with feature set - * @one to the master device with current feature set @all. Returns - * the new feature set. + * @one to the master device with current feature set @all. Will not + * enable anything that is off in @mask. Returns the new feature set. */ -int netdev_compute_features(unsigned long all, unsigned long one) -{ - /* if device needs checksumming, downgrade to hw checksumming */ - if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM)) - all ^= NETIF_F_NO_CSUM | NETIF_F_HW_CSUM; - - /* if device can't do all checksum, downgrade to ipv4/ipv6 */ - if (all & NETIF_F_HW_CSUM && !(one & NETIF_F_HW_CSUM)) - all ^= NETIF_F_HW_CSUM - | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; - - if (one & NETIF_F_GSO) - one |= NETIF_F_GSO_SOFTWARE; - one |= NETIF_F_GSO; - - /* - * If even one device supports a GSO protocol with software fallback, - * enable it for all. - */ - all |= one & NETIF_F_GSO_SOFTWARE; +unsigned long netdev_increment_features(unsigned long all, unsigned long one, + unsigned long mask) +{ + /* If device needs checksumming, downgrade to it. */ + if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM)) + all ^= NETIF_F_NO_CSUM | (one & NETIF_F_ALL_CSUM); + else if (mask & NETIF_F_ALL_CSUM) { + /* If one device supports v4/v6 checksumming, set for all. */ + if (one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM) && + !(all & NETIF_F_GEN_CSUM)) { + all &= ~NETIF_F_ALL_CSUM; + all |= one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM); + } - /* If even one device supports robust GSO, enable it for all. */ - if (one & NETIF_F_GSO_ROBUST) - all |= NETIF_F_GSO_ROBUST; + /* If one device supports hw checksumming, set for all. */ + if (one & NETIF_F_GEN_CSUM && !(all & NETIF_F_GEN_CSUM)) { + all &= ~NETIF_F_ALL_CSUM; + all |= NETIF_F_HW_CSUM; + } + } - all &= one | NETIF_F_LLTX; + one |= NETIF_F_ALL_CSUM; - if (!(all & NETIF_F_ALL_CSUM)) - all &= ~NETIF_F_SG; - if (!(all & NETIF_F_SG)) - all &= ~NETIF_F_GSO_MASK; + one |= all & NETIF_F_ONE_FOR_ALL; + all &= one | NETIF_F_LLTX | NETIF_F_GSO; + all |= one & mask & NETIF_F_ONE_FOR_ALL; return all; } -EXPORT_SYMBOL(netdev_compute_features); +EXPORT_SYMBOL(netdev_increment_features); static struct hlist_head *netdev_create_hash(void) { -- cgit v1.2.3-70-g09d2 From 3516586a424ea5727be089da6541cbd5644f0497 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 5 Aug 2008 03:00:49 -0400 Subject: [PATCH] make O_EXCL in nd->intent.flags visible in nd->flags New flag: LOOKUP_EXCL. Set before doing the final step of pathname resolution on the paths that have LOOKUP_CREATE and O_EXCL. Signed-off-by: Al Viro --- fs/gfs2/ops_inode.c | 2 +- fs/namei.c | 4 +++- fs/nfs/dir.c | 6 ++---- include/linux/namei.h | 5 +++-- 4 files changed, 9 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index 534e1e2c65c..d232991b904 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c @@ -69,7 +69,7 @@ static int gfs2_create(struct inode *dir, struct dentry *dentry, mark_inode_dirty(inode); break; } else if (PTR_ERR(inode) != -EEXIST || - (nd && (nd->intent.open.flags & O_EXCL))) { + (nd && nd->flags & LOOKUP_EXCL)) { gfs2_holder_uninit(ghs); return PTR_ERR(inode); } diff --git a/fs/namei.c b/fs/namei.c index e584f04745b..2b8f823eda4 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1709,6 +1709,8 @@ struct file *do_filp_open(int dfd, const char *pathname, dir = nd.path.dentry; nd.flags &= ~LOOKUP_PARENT; nd.flags |= LOOKUP_CREATE | LOOKUP_OPEN; + if (flag & O_EXCL) + nd.flags |= LOOKUP_EXCL; mutex_lock(&dir->d_inode->i_mutex); path.dentry = lookup_hash(&nd); path.mnt = nd.path.mnt; @@ -1906,7 +1908,7 @@ struct dentry *lookup_create(struct nameidata *nd, int is_dir) if (nd->last_type != LAST_NORM) goto fail; nd->flags &= ~LOOKUP_PARENT; - nd->flags |= LOOKUP_CREATE; + nd->flags |= LOOKUP_CREATE | LOOKUP_EXCL; nd->intent.open.flags = O_EXCL; /* diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index efdba2e802d..c216c8786c5 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -707,9 +707,7 @@ static int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd) { if (NFS_PROTO(dir)->version == 2) return 0; - if (nd == NULL || nfs_lookup_check_intent(nd, LOOKUP_CREATE) == 0) - return 0; - return (nd->intent.open.flags & O_EXCL) != 0; + return nd && nfs_lookup_check_intent(nd, LOOKUP_EXCL); } /* @@ -1009,7 +1007,7 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry /* Let vfs_create() deal with O_EXCL. Instantiate, but don't hash * the dentry. */ - if (nd->intent.open.flags & O_EXCL) { + if (nd->flags & LOOKUP_EXCL) { d_instantiate(dentry, NULL); goto out; } diff --git a/include/linux/namei.h b/include/linux/namei.h index 221e8bc894b..6b5627afd2e 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -51,8 +51,9 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND}; /* * Intent data */ -#define LOOKUP_OPEN (0x0100) -#define LOOKUP_CREATE (0x0200) +#define LOOKUP_OPEN 0x0100 +#define LOOKUP_CREATE 0x0200 +#define LOOKUP_EXCL 0x0400 extern int user_path_at(int, const char __user *, unsigned, struct path *); -- cgit v1.2.3-70-g09d2 From 4ea3ada2955e4519befa98ff55dd62d6dfbd1705 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 11 Aug 2008 15:48:57 +0200 Subject: [PATCH] new helper: d_obtain_alias The calling conventions of d_alloc_anon are rather unfortunate for all users, and it's name is not very descriptive either. Add d_obtain_alias as a new exported helper that drops the inode reference in the failure case, too and allows to pass-through NULL pointers and inodes to allow for tail-calls in the export operations. Incidentally this helper already existed as a private function in libfs.c as exportfs_d_alloc so kill that one and switch the callers to d_obtain_alias. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/dcache.c | 35 +++++++++++++++++++++++++++++++++++ fs/libfs.c | 26 ++------------------------ include/linux/dcache.h | 1 + 3 files changed, 38 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/fs/dcache.c b/fs/dcache.c index e7a1a99b746..46fc7820678 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1174,6 +1174,41 @@ struct dentry * d_alloc_anon(struct inode *inode) return res; } +/** + * d_obtain_alias - find or allocate a dentry for a given inode + * @inode: inode to allocate the dentry for + * + * Obtain a dentry for an inode resulting from NFS filehandle conversion or + * similar open by handle operations. The returned dentry may be anonymous, + * or may have a full name (if the inode was already in the cache). + * + * When called on a directory inode, we must ensure that the inode only ever + * has one dentry. If a dentry is found, that is returned instead of + * allocating a new one. + * + * On successful return, the reference to the inode has been transferred + * to the dentry. If %NULL is returned (indicating kmalloc failure), + * the reference on the inode has been released. To make it easier + * to use in export operations a NULL or IS_ERR inode may be passed in + * and will be casted to the corresponding NULL or IS_ERR dentry. + */ +struct dentry *d_obtain_alias(struct inode *inode) +{ + struct dentry *dentry; + + if (!inode) + return NULL; + if (IS_ERR(inode)) + return ERR_CAST(inode); + + dentry = d_alloc_anon(inode); + if (!dentry) { + iput(inode); + dentry = ERR_PTR(-ENOMEM); + } + return dentry; +} +EXPORT_SYMBOL_GPL(d_obtain_alias); /** * d_splice_alias - splice a disconnected dentry into the tree if one exists diff --git a/fs/libfs.c b/fs/libfs.c index 1add676a19d..74688598bcf 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -732,28 +732,6 @@ out: return ret; } -/* - * This is what d_alloc_anon should have been. Once the exportfs - * argument transition has been finished I will update d_alloc_anon - * to this prototype and this wrapper will go away. --hch - */ -static struct dentry *exportfs_d_alloc(struct inode *inode) -{ - struct dentry *dentry; - - if (!inode) - return NULL; - if (IS_ERR(inode)) - return ERR_PTR(PTR_ERR(inode)); - - dentry = d_alloc_anon(inode); - if (!dentry) { - iput(inode); - dentry = ERR_PTR(-ENOMEM); - } - return dentry; -} - /** * generic_fh_to_dentry - generic helper for the fh_to_dentry export operation * @sb: filesystem to do the file handle conversion on @@ -782,7 +760,7 @@ struct dentry *generic_fh_to_dentry(struct super_block *sb, struct fid *fid, break; } - return exportfs_d_alloc(inode); + return d_obtain_alias(inode); } EXPORT_SYMBOL_GPL(generic_fh_to_dentry); @@ -815,7 +793,7 @@ struct dentry *generic_fh_to_parent(struct super_block *sb, struct fid *fid, break; } - return exportfs_d_alloc(inode); + return d_obtain_alias(inode); } EXPORT_SYMBOL_GPL(generic_fh_to_parent); diff --git a/include/linux/dcache.h b/include/linux/dcache.h index efba1de629a..2404257d6c6 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -231,6 +231,7 @@ extern struct dentry * d_alloc(struct dentry *, const struct qstr *); extern struct dentry * d_alloc_anon(struct inode *); extern struct dentry * d_splice_alias(struct inode *, struct dentry *); extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *); +extern struct dentry * d_obtain_alias(struct inode *); extern void shrink_dcache_sb(struct super_block *); extern void shrink_dcache_parent(struct dentry *); extern void shrink_dcache_for_umount(struct super_block *); -- cgit v1.2.3-70-g09d2 From 9308a6128d9074e348d9f9b5822546fe12a794a9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 11 Aug 2008 15:49:12 +0200 Subject: [PATCH] kill d_alloc_anon Remove d_alloc_anon now that no users are left. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/dcache.c | 108 +++++++++++++++++-------------------------------- include/linux/dcache.h | 1 - 2 files changed, 37 insertions(+), 72 deletions(-) (limited to 'include/linux') diff --git a/fs/dcache.c b/fs/dcache.c index d45ff7f5ecc..1710d2484fd 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1110,70 +1110,6 @@ static inline struct hlist_head *d_hash(struct dentry *parent, return dentry_hashtable + (hash & D_HASHMASK); } -/** - * d_alloc_anon - allocate an anonymous dentry - * @inode: inode to allocate the dentry for - * - * This is similar to d_alloc_root. It is used by filesystems when - * creating a dentry for a given inode, often in the process of - * mapping a filehandle to a dentry. The returned dentry may be - * anonymous, or may have a full name (if the inode was already - * in the cache). The file system may need to make further - * efforts to connect this dentry into the dcache properly. - * - * When called on a directory inode, we must ensure that - * the inode only ever has one dentry. If a dentry is - * found, that is returned instead of allocating a new one. - * - * On successful return, the reference to the inode has been transferred - * to the dentry. If %NULL is returned (indicating kmalloc failure), - * the reference on the inode has not been released. - */ - -struct dentry * d_alloc_anon(struct inode *inode) -{ - static const struct qstr anonstring = { .name = "" }; - struct dentry *tmp; - struct dentry *res; - - if ((res = d_find_alias(inode))) { - iput(inode); - return res; - } - - tmp = d_alloc(NULL, &anonstring); - if (!tmp) - return NULL; - - tmp->d_parent = tmp; /* make sure dput doesn't croak */ - - spin_lock(&dcache_lock); - res = __d_find_alias(inode, 0); - if (!res) { - /* attach a disconnected dentry */ - res = tmp; - tmp = NULL; - spin_lock(&res->d_lock); - res->d_sb = inode->i_sb; - res->d_parent = res; - res->d_inode = inode; - res->d_flags |= DCACHE_DISCONNECTED; - res->d_flags &= ~DCACHE_UNHASHED; - list_add(&res->d_alias, &inode->i_dentry); - hlist_add_head(&res->d_hash, &inode->i_sb->s_anon); - spin_unlock(&res->d_lock); - - inode = NULL; /* don't drop reference */ - } - spin_unlock(&dcache_lock); - - if (inode) - iput(inode); - if (tmp) - dput(tmp); - return res; -} - /** * d_obtain_alias - find or allocate a dentry for a given inode * @inode: inode to allocate the dentry for @@ -1194,19 +1130,50 @@ struct dentry * d_alloc_anon(struct inode *inode) */ struct dentry *d_obtain_alias(struct inode *inode) { - struct dentry *dentry; + static const struct qstr anonstring = { .name = "" }; + struct dentry *tmp; + struct dentry *res; if (!inode) return ERR_PTR(-ESTALE); if (IS_ERR(inode)) return ERR_CAST(inode); - dentry = d_alloc_anon(inode); - if (!dentry) { - iput(inode); - dentry = ERR_PTR(-ENOMEM); + res = d_find_alias(inode); + if (res) + goto out_iput; + + tmp = d_alloc(NULL, &anonstring); + if (!tmp) { + res = ERR_PTR(-ENOMEM); + goto out_iput; } - return dentry; + tmp->d_parent = tmp; /* make sure dput doesn't croak */ + + spin_lock(&dcache_lock); + res = __d_find_alias(inode, 0); + if (res) { + spin_unlock(&dcache_lock); + dput(tmp); + goto out_iput; + } + + /* attach a disconnected dentry */ + spin_lock(&tmp->d_lock); + tmp->d_sb = inode->i_sb; + tmp->d_inode = inode; + tmp->d_flags |= DCACHE_DISCONNECTED; + tmp->d_flags &= ~DCACHE_UNHASHED; + list_add(&tmp->d_alias, &inode->i_dentry); + hlist_add_head(&tmp->d_hash, &inode->i_sb->s_anon); + spin_unlock(&tmp->d_lock); + + spin_unlock(&dcache_lock); + return tmp; + + out_iput: + iput(inode); + return res; } EXPORT_SYMBOL_GPL(d_obtain_alias); @@ -2379,7 +2346,6 @@ void __init vfs_caches_init(unsigned long mempages) } EXPORT_SYMBOL(d_alloc); -EXPORT_SYMBOL(d_alloc_anon); EXPORT_SYMBOL(d_alloc_root); EXPORT_SYMBOL(d_delete); EXPORT_SYMBOL(d_find_alias); diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 2404257d6c6..74c64ae30cf 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -228,7 +228,6 @@ extern void d_delete(struct dentry *); /* allocate/de-allocate */ extern struct dentry * d_alloc(struct dentry *, const struct qstr *); -extern struct dentry * d_alloc_anon(struct inode *); extern struct dentry * d_splice_alias(struct inode *, struct dentry *); extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *); extern struct dentry * d_obtain_alias(struct inode *); -- cgit v1.2.3-70-g09d2 From 6de24f0ed08054b2a202902e4d63beff27654db8 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 28 Aug 2008 06:25:49 +0400 Subject: [PATCH 1/2] anondev: init IDR statically Signed-off-by: Alexey Dobriyan --- fs/super.c | 7 +------ include/linux/fs.h | 1 - init/main.c | 1 - 3 files changed, 1 insertion(+), 8 deletions(-) (limited to 'include/linux') diff --git a/fs/super.c b/fs/super.c index e931ae9511f..dd23bf927fb 100644 --- a/fs/super.c +++ b/fs/super.c @@ -682,7 +682,7 @@ void emergency_remount(void) * filesystems which don't use real block-devices. -- jrs */ -static struct idr unnamed_dev_idr; +static DEFINE_IDR(unnamed_dev_idr); static DEFINE_SPINLOCK(unnamed_dev_lock);/* protects the above */ int set_anon_super(struct super_block *s, void *data) @@ -726,11 +726,6 @@ void kill_anon_super(struct super_block *sb) EXPORT_SYMBOL(kill_anon_super); -void __init unnamed_dev_init(void) -{ - idr_init(&unnamed_dev_idr); -} - void kill_litter_super(struct super_block *sb) { if (sb->s_root) diff --git a/include/linux/fs.h b/include/linux/fs.h index a6a625be13f..5f70aa62cf0 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1593,7 +1593,6 @@ extern int get_sb_pseudo(struct file_system_type *, char *, struct vfsmount *mnt); extern int simple_set_mnt(struct vfsmount *mnt, struct super_block *sb); int __put_super_and_need_restart(struct super_block *sb); -void unnamed_dev_init(void); /* Alas, no aliases. Too much hassle with bringing module.h everywhere */ #define fops_get(fops) \ diff --git a/init/main.c b/init/main.c index 3e17a3bafe6..c6a1024a27a 100644 --- a/init/main.c +++ b/init/main.c @@ -670,7 +670,6 @@ asmlinkage void __init start_kernel(void) fork_init(num_physpages); proc_caches_init(); buffer_init(); - unnamed_dev_init(); key_init(); security_init(); vfs_caches_init(num_physpages); -- cgit v1.2.3-70-g09d2 From e2761a1167633ed943fea29002f990194923d060 Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Thu, 16 Oct 2008 07:50:28 +0900 Subject: [PATCH vfs-2.6 2/6] vfs: add d_ancestor() This adds d_ancestor() instead of d_isparent(), then use it. If new_dentry == old_dentry, is_subdir() returns 1, looks strange. "new_dentry == old_dentry" is not subdir obviously. But I'm not checking callers for now, so this keeps current behavior. Signed-off-by: OGAWA Hirofumi --- fs/dcache.c | 45 +++++++++++++++++++++++---------------------- fs/namei.c | 22 ++++++++++------------ include/linux/dcache.h | 1 + 3 files changed, 34 insertions(+), 34 deletions(-) (limited to 'include/linux') diff --git a/fs/dcache.c b/fs/dcache.c index c6fd1f27da5..64024005da4 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1720,18 +1720,23 @@ void d_move(struct dentry * dentry, struct dentry * target) spin_unlock(&dcache_lock); } -/* - * Helper that returns 1 if p1 is a parent of p2, else 0 +/** + * d_ancestor - search for an ancestor + * @p1: ancestor dentry + * @p2: child dentry + * + * Returns the ancestor dentry of p2 which is a child of p1, if p1 is + * an ancestor of p2, else NULL. */ -static int d_isparent(struct dentry *p1, struct dentry *p2) +struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2) { struct dentry *p; for (p = p2; !IS_ROOT(p); p = p->d_parent) { if (p->d_parent == p1) - return 1; + return p; } - return 0; + return NULL; } /* @@ -1755,7 +1760,7 @@ static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias) /* Check for loops */ ret = ERR_PTR(-ELOOP); - if (d_isparent(alias, dentry)) + if (d_ancestor(alias, dentry)) goto out_err; /* See lock_rename() */ @@ -2155,31 +2160,27 @@ out: * Caller must ensure that "new_dentry" is pinned before calling is_subdir() */ -int is_subdir(struct dentry * new_dentry, struct dentry * old_dentry) +int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry) { int result; - struct dentry * saved = new_dentry; unsigned long seq; - /* need rcu_readlock to protect against the d_parent trashing due to - * d_move + /* FIXME: This is old behavior, needed? Please check callers. */ + if (new_dentry == old_dentry) + return 1; + + /* + * Need rcu_readlock to protect against the d_parent trashing + * due to d_move */ rcu_read_lock(); - do { + do { /* for restarting inner loop in case of seq retry */ - new_dentry = saved; - result = 0; seq = read_seqbegin(&rename_lock); - for (;;) { - if (new_dentry != old_dentry) { - if (IS_ROOT(new_dentry)) - break; - new_dentry = new_dentry->d_parent; - continue; - } + if (d_ancestor(old_dentry, new_dentry)) result = 1; - break; - } + else + result = 0; } while (read_seqretry(&rename_lock, seq)); rcu_read_unlock(); diff --git a/fs/namei.c b/fs/namei.c index 068a9e50c8c..b7cd65224d6 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1454,20 +1454,18 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2) mutex_lock(&p1->d_inode->i_sb->s_vfs_rename_mutex); - for (p = p1; !IS_ROOT(p); p = p->d_parent) { - if (p->d_parent == p2) { - mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_PARENT); - mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_CHILD); - return p; - } + p = d_ancestor(p2, p1); + if (p) { + mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_PARENT); + mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_CHILD); + return p; } - for (p = p2; !IS_ROOT(p); p = p->d_parent) { - if (p->d_parent == p1) { - mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT); - mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_CHILD); - return p; - } + p = d_ancestor(p1, p2); + if (p) { + mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT); + mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_CHILD); + return p; } mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT); diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 74c64ae30cf..a37359d0bad 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -287,6 +287,7 @@ static inline struct dentry *d_add_unique(struct dentry *entry, struct inode *in /* used for rename() and baskets */ extern void d_move(struct dentry *, struct dentry *); +extern struct dentry *d_ancestor(struct dentry *, struct dentry *); /* appendix may either be NULL or be used for transname suffixes */ extern struct dentry * d_lookup(struct dentry *, struct qstr *); -- cgit v1.2.3-70-g09d2 From 4e9ed2f85af7adfa7c3f0efa839a53186254fdcb Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Thu, 16 Oct 2008 07:50:29 +0900 Subject: [PATCH vfs-2.6 6/6] vfs: add LOOKUP_RENAME_TARGET intent This adds LOOKUP_RENAME_TARGET intent for lookup of rename destination. LOOKUP_RENAME_TARGET is going to be used like LOOKUP_CREATE. But since the destination of rename() can be existing directory entry, so it has a difference. Although that difference doesn't matter in my usage, this tells it to user of this intent. Signed-off-by: OGAWA Hirofumi --- fs/namei.c | 1 + include/linux/namei.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/fs/namei.c b/fs/namei.c index 18894fdf048..9e2a534383d 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2654,6 +2654,7 @@ asmlinkage long sys_renameat(int olddfd, const char __user *oldname, oldnd.flags &= ~LOOKUP_PARENT; newnd.flags &= ~LOOKUP_PARENT; + newnd.flags |= LOOKUP_RENAME_TARGET; trap = lock_rename(new_dir, old_dir); diff --git a/include/linux/namei.h b/include/linux/namei.h index 6b5627afd2e..99eb80306dc 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -54,6 +54,7 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND}; #define LOOKUP_OPEN 0x0100 #define LOOKUP_CREATE 0x0200 #define LOOKUP_EXCL 0x0400 +#define LOOKUP_RENAME_TARGET 0x0800 extern int user_path_at(int, const char __user *, unsigned, struct path *); -- cgit v1.2.3-70-g09d2 From f696a3659fc4b3a3bf4bc83d9dbec5e5a2ffd929 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 31 Jul 2008 13:41:58 +0200 Subject: [PATCH] move executable checking into ->permission() For execute permission on a regular files we need to check if file has any execute bits at all, regardless of capabilites. This check is normally performed by generic_permission() but was also added to the case when the filesystem defines its own ->permission() method. In the latter case the filesystem should be responsible for performing this check. Move the check from inode_permission() inside filesystems which are not calling generic_permission(). Create a helper function execute_ok() that returns true if the inode is a directory or if any execute bits are present in i_mode. Also fix up the following code: - coda control file is never executable - sysctl files are never executable - hfs_permission seems broken on MAY_EXEC, remove - hfsplus_permission is eqivalent to generic_permission(), remove Signed-off-by: Miklos Szeredi --- fs/cifs/cifsfs.c | 9 ++++++--- fs/coda/dir.c | 3 +++ fs/coda/pioctl.c | 2 +- fs/hfs/inode.c | 8 -------- fs/hfsplus/inode.c | 13 ------------- fs/namei.c | 21 ++++----------------- fs/nfs/dir.c | 3 +++ fs/proc/proc_sysctl.c | 10 ++++++++-- include/linux/fs.h | 5 +++++ 9 files changed, 30 insertions(+), 44 deletions(-) (limited to 'include/linux') diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 89c64a8dcb9..84cc011a16e 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -275,9 +275,12 @@ static int cifs_permission(struct inode *inode, int mask) cifs_sb = CIFS_SB(inode->i_sb); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) - return 0; - else /* file mode might have been restricted at mount time + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) { + if ((mask & MAY_EXEC) && !execute_ok(inode)) + return -EACCES; + else + return 0; + } else /* file mode might have been restricted at mount time on the client (above and beyond ACL on servers) for servers which do not support setting and viewing mode bits, so allowing client to check permissions is useful */ diff --git a/fs/coda/dir.c b/fs/coda/dir.c index c5916228243..75b1fa90b2c 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -146,6 +146,9 @@ int coda_permission(struct inode *inode, int mask) if (!mask) return 0; + if ((mask & MAY_EXEC) && !execute_ok(inode)) + return -EACCES; + lock_kernel(); if (coda_cache_check(inode, mask)) diff --git a/fs/coda/pioctl.c b/fs/coda/pioctl.c index c51365422aa..773f2ce9aa0 100644 --- a/fs/coda/pioctl.c +++ b/fs/coda/pioctl.c @@ -43,7 +43,7 @@ const struct file_operations coda_ioctl_operations = { /* the coda pioctl inode ops */ static int coda_ioctl_permission(struct inode *inode, int mask) { - return 0; + return (mask & MAY_EXEC) ? -EACCES : 0; } static int coda_pioctl(struct inode * inode, struct file * filp, diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index 7e19835efa2..c69b7ac75bf 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -511,13 +511,6 @@ void hfs_clear_inode(struct inode *inode) } } -static int hfs_permission(struct inode *inode, int mask) -{ - if (S_ISREG(inode->i_mode) && mask & MAY_EXEC) - return 0; - return generic_permission(inode, mask, NULL); -} - static int hfs_file_open(struct inode *inode, struct file *file) { if (HFS_IS_RSRC(inode)) @@ -616,7 +609,6 @@ static const struct inode_operations hfs_file_inode_operations = { .lookup = hfs_file_lookup, .truncate = hfs_file_truncate, .setattr = hfs_inode_setattr, - .permission = hfs_permission, .setxattr = hfs_setxattr, .getxattr = hfs_getxattr, .listxattr = hfs_listxattr, diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index 963be644297..b207f0e6fc2 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -238,18 +238,6 @@ static void hfsplus_set_perms(struct inode *inode, struct hfsplus_perm *perms) perms->dev = cpu_to_be32(HFSPLUS_I(inode).dev); } -static int hfsplus_permission(struct inode *inode, int mask) -{ - /* MAY_EXEC is also used for lookup, if no x bit is set allow lookup, - * open_exec has the same test, so it's still not executable, if a x bit - * is set fall back to standard permission check. - */ - if (S_ISREG(inode->i_mode) && mask & MAY_EXEC && !(inode->i_mode & 0111)) - return 0; - return generic_permission(inode, mask, NULL); -} - - static int hfsplus_file_open(struct inode *inode, struct file *file) { if (HFSPLUS_IS_RSRC(inode)) @@ -281,7 +269,6 @@ static int hfsplus_file_release(struct inode *inode, struct file *file) static const struct inode_operations hfsplus_file_inode_operations = { .lookup = hfsplus_file_lookup, .truncate = hfsplus_file_truncate, - .permission = hfsplus_permission, .setxattr = hfsplus_setxattr, .getxattr = hfsplus_getxattr, .listxattr = hfsplus_listxattr, diff --git a/fs/namei.c b/fs/namei.c index 9e2a534383d..09ce58e49e7 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -212,8 +212,7 @@ int generic_permission(struct inode *inode, int mask, * Read/write DACs are always overridable. * Executable DACs are overridable if at least one exec bit is set. */ - if (!(mask & MAY_EXEC) || - (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode)) + if (!(mask & MAY_EXEC) || execute_ok(inode)) if (capable(CAP_DAC_OVERRIDE)) return 0; @@ -249,23 +248,11 @@ int inode_permission(struct inode *inode, int mask) } /* Ordinary permission routines do not understand MAY_APPEND. */ - if (inode->i_op && inode->i_op->permission) { + if (inode->i_op && inode->i_op->permission) retval = inode->i_op->permission(inode, mask); - if (!retval) { - /* - * Exec permission on a regular file is denied if none - * of the execute bits are set. - * - * This check should be done by the ->permission() - * method. - */ - if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode) && - !(inode->i_mode & S_IXUGO)) - return -EACCES; - } - } else { + else retval = generic_permission(inode, mask, NULL); - } + if (retval) return retval; diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index c216c8786c5..3e64b98f3a9 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1957,6 +1957,9 @@ force_lookup: } else res = PTR_ERR(cred); out: + if (!res && (mask & MAY_EXEC) && !execute_ok(inode)) + res = -EACCES; + dfprintk(VFS, "NFS: permission(%s/%ld), mask=0x%x, res=%d\n", inode->i_sb->s_id, inode->i_ino, mask, res); return res; diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 5fe210c0917..7b997754a25 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -298,13 +298,19 @@ static int proc_sys_permission(struct inode *inode, int mask) * sysctl entries that are not writeable, * are _NOT_ writeable, capabilities or not. */ - struct ctl_table_header *head = grab_header(inode); - struct ctl_table *table = PROC_I(inode)->sysctl_entry; + struct ctl_table_header *head; + struct ctl_table *table; int error; + /* Executable files are not allowed under /proc/sys/ */ + if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) + return -EACCES; + + head = grab_header(inode); if (IS_ERR(head)) return PTR_ERR(head); + table = PROC_I(inode)->sysctl_entry; if (!table) /* global root - r-xr-xr-x */ error = mask & MAY_WRITE ? -EACCES : 0; else /* Use the permissions on the sysctl table entry */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 5f70aa62cf0..025a4a251b6 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1851,6 +1851,11 @@ extern int inode_permission(struct inode *, int); extern int generic_permission(struct inode *, int, int (*check_acl)(struct inode *, int)); +static inline bool execute_ok(struct inode *inode) +{ + return (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode); +} + extern int get_write_access(struct inode *); extern int deny_write_access(struct file *); static inline void put_write_access(struct inode * inode) -- cgit v1.2.3-70-g09d2 From 08b9fe6b12d32324f311c46b88102b6b9067d434 Mon Sep 17 00:00:00 2001 From: Mimi Zohar Date: Mon, 13 Oct 2008 00:09:50 -0400 Subject: [PATCH] i_version: remount support Add support for remounting a filesystem with the i_version option. Signed-off-by: Mimi Zohar --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 025a4a251b6..7d719c1a18e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -136,7 +136,7 @@ extern int dir_notify_enable; /* * Superblock flags that can be altered by MS_REMOUNT */ -#define MS_RMT_MASK (MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK) +#define MS_RMT_MASK (MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK|MS_I_VERSION) /* * Old magic mount flag and mask -- cgit v1.2.3-70-g09d2 From e1759c215bee5abbcb6cb066590ab20905154ed5 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 15 Oct 2008 23:50:22 +0400 Subject: proc: switch /proc/meminfo to seq_file and move it to fs/proc/meminfo.c while I'm at it. Signed-off-by: Alexey Dobriyan --- arch/x86/mm/pageattr.c | 11 ++- fs/proc/Makefile | 1 + fs/proc/meminfo.c | 168 ++++++++++++++++++++++++++++++++++++++++++++++ fs/proc/proc_misc.c | 137 ------------------------------------- include/asm-x86/pgtable.h | 3 +- include/linux/hugetlb.h | 6 +- mm/hugetlb.c | 5 +- 7 files changed, 183 insertions(+), 148 deletions(-) create mode 100644 fs/proc/meminfo.c (limited to 'include/linux') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 407d8784f66..f1dc1b75d16 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -65,23 +65,22 @@ static void split_page_count(int level) direct_pages_count[level - 1] += PTRS_PER_PTE; } -int arch_report_meminfo(char *page) +void arch_report_meminfo(struct seq_file *m) { - int n = sprintf(page, "DirectMap4k: %8lu kB\n", + seq_printf(m, "DirectMap4k: %8lu kB\n", direct_pages_count[PG_LEVEL_4K] << 2); #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) - n += sprintf(page + n, "DirectMap2M: %8lu kB\n", + seq_printf(m, "DirectMap2M: %8lu kB\n", direct_pages_count[PG_LEVEL_2M] << 11); #else - n += sprintf(page + n, "DirectMap4M: %8lu kB\n", + seq_printf(m, "DirectMap4M: %8lu kB\n", direct_pages_count[PG_LEVEL_2M] << 12); #endif #ifdef CONFIG_X86_64 if (direct_gbpages) - n += sprintf(page + n, "DirectMap1G: %8lu kB\n", + seq_printf(m, "DirectMap1G: %8lu kB\n", direct_pages_count[PG_LEVEL_1G] << 20); #endif - return n; } #else static inline void split_page_count(int level) { } diff --git a/fs/proc/Makefile b/fs/proc/Makefile index 27efa14963b..70607a03839 100644 --- a/fs/proc/Makefile +++ b/fs/proc/Makefile @@ -10,6 +10,7 @@ proc-$(CONFIG_MMU) := mmu.o task_mmu.o proc-y += inode.o root.o base.o generic.o array.o \ proc_tty.o proc_misc.o proc-y += loadavg.o +proc-y += meminfo.o proc-y += uptime.o proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o proc-$(CONFIG_NET) += proc_net.o diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c new file mode 100644 index 00000000000..b1675c4e66d --- /dev/null +++ b/fs/proc/meminfo.c @@ -0,0 +1,168 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "internal.h" + +void __attribute__((weak)) arch_report_meminfo(struct seq_file *m) +{ +} + +static int meminfo_proc_show(struct seq_file *m, void *v) +{ + struct sysinfo i; + unsigned long committed; + unsigned long allowed; + struct vmalloc_info vmi; + long cached; + unsigned long pages[NR_LRU_LISTS]; + int lru; + +/* + * display in kilobytes. + */ +#define K(x) ((x) << (PAGE_SHIFT - 10)) + si_meminfo(&i); + si_swapinfo(&i); + committed = atomic_long_read(&vm_committed_space); + allowed = ((totalram_pages - hugetlb_total_pages()) + * sysctl_overcommit_ratio / 100) + total_swap_pages; + + cached = global_page_state(NR_FILE_PAGES) - + total_swapcache_pages - i.bufferram; + if (cached < 0) + cached = 0; + + get_vmalloc_info(&vmi); + + for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++) + pages[lru] = global_page_state(NR_LRU_BASE + lru); + + /* + * Tagged format, for easy grepping and expansion. + */ + seq_printf(m, + "MemTotal: %8lu kB\n" + "MemFree: %8lu kB\n" + "Buffers: %8lu kB\n" + "Cached: %8lu kB\n" + "SwapCached: %8lu kB\n" + "Active: %8lu kB\n" + "Inactive: %8lu kB\n" + "Active(anon): %8lu kB\n" + "Inactive(anon): %8lu kB\n" + "Active(file): %8lu kB\n" + "Inactive(file): %8lu kB\n" +#ifdef CONFIG_UNEVICTABLE_LRU + "Unevictable: %8lu kB\n" + "Mlocked: %8lu kB\n" +#endif +#ifdef CONFIG_HIGHMEM + "HighTotal: %8lu kB\n" + "HighFree: %8lu kB\n" + "LowTotal: %8lu kB\n" + "LowFree: %8lu kB\n" +#endif + "SwapTotal: %8lu kB\n" + "SwapFree: %8lu kB\n" + "Dirty: %8lu kB\n" + "Writeback: %8lu kB\n" + "AnonPages: %8lu kB\n" + "Mapped: %8lu kB\n" + "Slab: %8lu kB\n" + "SReclaimable: %8lu kB\n" + "SUnreclaim: %8lu kB\n" + "PageTables: %8lu kB\n" +#ifdef CONFIG_QUICKLIST + "Quicklists: %8lu kB\n" +#endif + "NFS_Unstable: %8lu kB\n" + "Bounce: %8lu kB\n" + "WritebackTmp: %8lu kB\n" + "CommitLimit: %8lu kB\n" + "Committed_AS: %8lu kB\n" + "VmallocTotal: %8lu kB\n" + "VmallocUsed: %8lu kB\n" + "VmallocChunk: %8lu kB\n", + K(i.totalram), + K(i.freeram), + K(i.bufferram), + K(cached), + K(total_swapcache_pages), + K(pages[LRU_ACTIVE_ANON] + pages[LRU_ACTIVE_FILE]), + K(pages[LRU_INACTIVE_ANON] + pages[LRU_INACTIVE_FILE]), + K(pages[LRU_ACTIVE_ANON]), + K(pages[LRU_INACTIVE_ANON]), + K(pages[LRU_ACTIVE_FILE]), + K(pages[LRU_INACTIVE_FILE]), +#ifdef CONFIG_UNEVICTABLE_LRU + K(pages[LRU_UNEVICTABLE]), + K(global_page_state(NR_MLOCK)), +#endif +#ifdef CONFIG_HIGHMEM + K(i.totalhigh), + K(i.freehigh), + K(i.totalram-i.totalhigh), + K(i.freeram-i.freehigh), +#endif + K(i.totalswap), + K(i.freeswap), + K(global_page_state(NR_FILE_DIRTY)), + K(global_page_state(NR_WRITEBACK)), + K(global_page_state(NR_ANON_PAGES)), + K(global_page_state(NR_FILE_MAPPED)), + K(global_page_state(NR_SLAB_RECLAIMABLE) + + global_page_state(NR_SLAB_UNRECLAIMABLE)), + K(global_page_state(NR_SLAB_RECLAIMABLE)), + K(global_page_state(NR_SLAB_UNRECLAIMABLE)), + K(global_page_state(NR_PAGETABLE)), +#ifdef CONFIG_QUICKLIST + K(quicklist_total_size()), +#endif + K(global_page_state(NR_UNSTABLE_NFS)), + K(global_page_state(NR_BOUNCE)), + K(global_page_state(NR_WRITEBACK_TEMP)), + K(allowed), + K(committed), + (unsigned long)VMALLOC_TOTAL >> 10, + vmi.used >> 10, + vmi.largest_chunk >> 10 + ); + + hugetlb_report_meminfo(m); + + arch_report_meminfo(m); + + return 0; +#undef K +} + +static int meminfo_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, meminfo_proc_show, NULL); +} + +static const struct file_operations meminfo_proc_fops = { + .open = meminfo_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int __init proc_meminfo_init(void) +{ + proc_create("meminfo", 0, NULL, &meminfo_proc_fops); + return 0; +} +module_init(proc_meminfo_init); diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index 484b6011bf0..1aba51b0a0c 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -78,142 +78,6 @@ static int proc_calc_metrics(char *page, char **start, off_t off, return len; } -int __attribute__((weak)) arch_report_meminfo(char *page) -{ - return 0; -} - -static int meminfo_read_proc(char *page, char **start, off_t off, - int count, int *eof, void *data) -{ - struct sysinfo i; - int len; - unsigned long committed; - unsigned long allowed; - struct vmalloc_info vmi; - long cached; - unsigned long pages[NR_LRU_LISTS]; - int lru; - -/* - * display in kilobytes. - */ -#define K(x) ((x) << (PAGE_SHIFT - 10)) - si_meminfo(&i); - si_swapinfo(&i); - committed = atomic_long_read(&vm_committed_space); - allowed = ((totalram_pages - hugetlb_total_pages()) - * sysctl_overcommit_ratio / 100) + total_swap_pages; - - cached = global_page_state(NR_FILE_PAGES) - - total_swapcache_pages - i.bufferram; - if (cached < 0) - cached = 0; - - get_vmalloc_info(&vmi); - - for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++) - pages[lru] = global_page_state(NR_LRU_BASE + lru); - - /* - * Tagged format, for easy grepping and expansion. - */ - len = sprintf(page, - "MemTotal: %8lu kB\n" - "MemFree: %8lu kB\n" - "Buffers: %8lu kB\n" - "Cached: %8lu kB\n" - "SwapCached: %8lu kB\n" - "Active: %8lu kB\n" - "Inactive: %8lu kB\n" - "Active(anon): %8lu kB\n" - "Inactive(anon): %8lu kB\n" - "Active(file): %8lu kB\n" - "Inactive(file): %8lu kB\n" -#ifdef CONFIG_UNEVICTABLE_LRU - "Unevictable: %8lu kB\n" - "Mlocked: %8lu kB\n" -#endif -#ifdef CONFIG_HIGHMEM - "HighTotal: %8lu kB\n" - "HighFree: %8lu kB\n" - "LowTotal: %8lu kB\n" - "LowFree: %8lu kB\n" -#endif - "SwapTotal: %8lu kB\n" - "SwapFree: %8lu kB\n" - "Dirty: %8lu kB\n" - "Writeback: %8lu kB\n" - "AnonPages: %8lu kB\n" - "Mapped: %8lu kB\n" - "Slab: %8lu kB\n" - "SReclaimable: %8lu kB\n" - "SUnreclaim: %8lu kB\n" - "PageTables: %8lu kB\n" -#ifdef CONFIG_QUICKLIST - "Quicklists: %8lu kB\n" -#endif - "NFS_Unstable: %8lu kB\n" - "Bounce: %8lu kB\n" - "WritebackTmp: %8lu kB\n" - "CommitLimit: %8lu kB\n" - "Committed_AS: %8lu kB\n" - "VmallocTotal: %8lu kB\n" - "VmallocUsed: %8lu kB\n" - "VmallocChunk: %8lu kB\n", - K(i.totalram), - K(i.freeram), - K(i.bufferram), - K(cached), - K(total_swapcache_pages), - K(pages[LRU_ACTIVE_ANON] + pages[LRU_ACTIVE_FILE]), - K(pages[LRU_INACTIVE_ANON] + pages[LRU_INACTIVE_FILE]), - K(pages[LRU_ACTIVE_ANON]), - K(pages[LRU_INACTIVE_ANON]), - K(pages[LRU_ACTIVE_FILE]), - K(pages[LRU_INACTIVE_FILE]), -#ifdef CONFIG_UNEVICTABLE_LRU - K(pages[LRU_UNEVICTABLE]), - K(global_page_state(NR_MLOCK)), -#endif -#ifdef CONFIG_HIGHMEM - K(i.totalhigh), - K(i.freehigh), - K(i.totalram-i.totalhigh), - K(i.freeram-i.freehigh), -#endif - K(i.totalswap), - K(i.freeswap), - K(global_page_state(NR_FILE_DIRTY)), - K(global_page_state(NR_WRITEBACK)), - K(global_page_state(NR_ANON_PAGES)), - K(global_page_state(NR_FILE_MAPPED)), - K(global_page_state(NR_SLAB_RECLAIMABLE) + - global_page_state(NR_SLAB_UNRECLAIMABLE)), - K(global_page_state(NR_SLAB_RECLAIMABLE)), - K(global_page_state(NR_SLAB_UNRECLAIMABLE)), - K(global_page_state(NR_PAGETABLE)), -#ifdef CONFIG_QUICKLIST - K(quicklist_total_size()), -#endif - K(global_page_state(NR_UNSTABLE_NFS)), - K(global_page_state(NR_BOUNCE)), - K(global_page_state(NR_WRITEBACK_TEMP)), - K(allowed), - K(committed), - (unsigned long)VMALLOC_TOTAL >> 10, - vmi.used >> 10, - vmi.largest_chunk >> 10 - ); - - len += hugetlb_report_meminfo(page + len); - - len += arch_report_meminfo(page + len); - - return proc_calc_metrics(page, start, off, count, eof, len); -#undef K -} - static int fragmentation_open(struct inode *inode, struct file *file) { (void)inode; @@ -816,7 +680,6 @@ void __init proc_misc_init(void) char *name; int (*read_proc)(char*,char**,off_t,int,int*,void*); } *p, simple_ones[] = { - {"meminfo", meminfo_read_proc}, {"version", version_read_proc}, #ifdef CONFIG_PROC_HARDWARE {"hardware", hardware_read_proc}, diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h index 88a53b1a17f..a3dda6d615b 100644 --- a/include/asm-x86/pgtable.h +++ b/include/asm-x86/pgtable.h @@ -348,7 +348,8 @@ static inline void native_pagetable_setup_start(pgd_t *base) {} static inline void native_pagetable_setup_done(pgd_t *base) {} #endif -extern int arch_report_meminfo(char *page); +struct seq_file; +extern void arch_report_meminfo(struct seq_file *m); #ifdef CONFIG_PARAVIRT #include diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 32e0ef0f6e1..e1c8afc002c 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -27,7 +27,7 @@ void unmap_hugepage_range(struct vm_area_struct *, void __unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long, struct page *); int hugetlb_prefault(struct address_space *, struct vm_area_struct *); -int hugetlb_report_meminfo(char *); +void hugetlb_report_meminfo(struct seq_file *); int hugetlb_report_node_meminfo(int, char *); unsigned long hugetlb_total_pages(void); int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, @@ -79,7 +79,9 @@ static inline unsigned long hugetlb_total_pages(void) #define copy_hugetlb_page_range(src, dst, vma) ({ BUG(); 0; }) #define hugetlb_prefault(mapping, vma) ({ BUG(); 0; }) #define unmap_hugepage_range(vma, start, end, page) BUG() -#define hugetlb_report_meminfo(buf) 0 +static inline void hugetlb_report_meminfo(struct seq_file *m) +{ +} #define hugetlb_report_node_meminfo(n, buf) 0 #define follow_huge_pmd(mm, addr, pmd, write) NULL #define follow_huge_pud(mm, addr, pud, write) NULL diff --git a/mm/hugetlb.c b/mm/hugetlb.c index ce8cbb29860..421aee99b84 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -1455,10 +1456,10 @@ int hugetlb_overcommit_handler(struct ctl_table *table, int write, #endif /* CONFIG_SYSCTL */ -int hugetlb_report_meminfo(char *buf) +void hugetlb_report_meminfo(struct seq_file *m) { struct hstate *h = &default_hstate; - return sprintf(buf, + seq_printf(m, "HugePages_Total: %5lu\n" "HugePages_Free: %5lu\n" "HugePages_Rsvd: %5lu\n" -- cgit v1.2.3-70-g09d2 From d8ba7a363393f803c93c8cffabd6d0362618bc2a Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sat, 4 Oct 2008 22:34:18 +0400 Subject: proc: move rest of /proc/locks to fs/locks.c Signed-off-by: Alexey Dobriyan --- fs/locks.c | 22 +++++++++++++++++++++- fs/proc/proc_misc.c | 17 ----------------- include/linux/fs.h | 1 - 3 files changed, 21 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/fs/locks.c b/fs/locks.c index 5eb259e3cd3..90e87f57b33 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -2078,6 +2078,7 @@ int vfs_cancel_lock(struct file *filp, struct file_lock *fl) EXPORT_SYMBOL_GPL(vfs_cancel_lock); #ifdef CONFIG_PROC_FS +#include #include static void lock_get_status(struct seq_file *f, struct file_lock *fl, @@ -2183,12 +2184,31 @@ static void locks_stop(struct seq_file *f, void *v) unlock_kernel(); } -struct seq_operations locks_seq_operations = { +static const struct seq_operations locks_seq_operations = { .start = locks_start, .next = locks_next, .stop = locks_stop, .show = locks_show, }; + +static int locks_open(struct inode *inode, struct file *filp) +{ + return seq_open(filp, &locks_seq_operations); +} + +static const struct file_operations proc_locks_operations = { + .open = locks_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +static int __init proc_locks_init(void) +{ + proc_create("locks", 0, NULL, &proc_locks_operations); + return 0; +} +module_init(proc_locks_init); #endif /** diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index fcac25edaef..fea7d658fff 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -453,20 +453,6 @@ static const struct file_operations proc_interrupts_operations = { .release = seq_release, }; -#ifdef CONFIG_FILE_LOCKING -static int locks_open(struct inode *inode, struct file *filp) -{ - return seq_open(filp, &locks_seq_operations); -} - -static const struct file_operations proc_locks_operations = { - .open = locks_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; -#endif /* CONFIG_FILE_LOCKING */ - #ifdef CONFIG_PROC_PAGE_MONITOR #define KPMSIZE sizeof(u64) #define KPMMASK (KPMSIZE - 1) @@ -605,9 +591,6 @@ void __init proc_misc_init(void) proc_symlink("mounts", NULL, "self/mounts"); /* And now for trickier ones */ -#ifdef CONFIG_FILE_LOCKING - proc_create("locks", 0, NULL, &proc_locks_operations); -#endif proc_create("devices", 0, NULL, &proc_devinfo_operations); proc_create("cpuinfo", 0, NULL, &proc_cpuinfo_operations); #ifdef CONFIG_BLOCK diff --git a/include/linux/fs.h b/include/linux/fs.h index a6a625be13f..024049543ae 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1037,7 +1037,6 @@ extern int vfs_setlease(struct file *, long, struct file_lock **); extern int lease_modify(struct file_lock **, int); extern int lock_may_read(struct inode *, loff_t start, unsigned long count); extern int lock_may_write(struct inode *, loff_t start, unsigned long count); -extern struct seq_operations locks_seq_operations; #else /* !CONFIG_FILE_LOCKING */ #define fcntl_getlk(a, b) ({ -EINVAL; }) #define fcntl_setlk(a, b, c, d) ({ -EACCES; }) -- cgit v1.2.3-70-g09d2 From f500975a3f3ecf3611d79f1d933906753460b9f2 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sat, 4 Oct 2008 23:53:21 +0400 Subject: proc: move rest of /proc/partitions code to block/genhd.c Signed-off-by: Alexey Dobriyan Acked-by: Jens Axboe --- block/genhd.c | 22 +++++++++++++++++++++- fs/proc/proc_misc.c | 14 -------------- include/linux/genhd.h | 1 - 3 files changed, 21 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/block/genhd.c b/block/genhd.c index 646e1d2507c..15f4d2b12c4 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -727,12 +728,24 @@ static int show_partition(struct seq_file *seqf, void *v) return 0; } -const struct seq_operations partitions_op = { +static const struct seq_operations partitions_op = { .start = show_partition_start, .next = disk_seqf_next, .stop = disk_seqf_stop, .show = show_partition }; + +static int partitions_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &partitions_op); +} + +static const struct file_operations proc_partitions_operations = { + .open = partitions_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; #endif @@ -998,6 +1011,13 @@ const struct seq_operations diskstats_op = { .stop = disk_seqf_stop, .show = diskstats_show }; + +static int __init proc_genhd_init(void) +{ + proc_create("partitions", 0, NULL, &proc_partitions_operations); + return 0; +} +module_init(proc_genhd_init); #endif /* CONFIG_PROC_FS */ static void media_change_notify_thread(struct work_struct *work) diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index 8974809be5f..253ea50c439 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -106,17 +106,6 @@ static const struct file_operations proc_vmstat_file_operations = { }; #ifdef CONFIG_BLOCK -static int partitions_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &partitions_op); -} -static const struct file_operations proc_partitions_operations = { - .open = partitions_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - static int diskstats_open(struct inode *inode, struct file *file) { return seq_open(file, &diskstats_op); @@ -519,9 +508,6 @@ void __init proc_misc_init(void) proc_symlink("mounts", NULL, "self/mounts"); /* And now for trickier ones */ -#ifdef CONFIG_BLOCK - proc_create("partitions", 0, NULL, &proc_partitions_operations); -#endif proc_create("stat", 0, NULL, &proc_stat_operations); proc_create("interrupts", 0, NULL, &proc_interrupts_operations); #ifdef CONFIG_SLABINFO diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 206cdf96c3a..074a4fdf436 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -25,7 +25,6 @@ extern struct device_type part_type; extern struct kobject *block_depr; extern struct class block_class; -extern const struct seq_operations partitions_op; extern const struct seq_operations diskstats_op; enum { -- cgit v1.2.3-70-g09d2 From 7b3c3a50a3e0ea46815150d420fa276ac254572b Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 6 Oct 2008 02:42:17 +0400 Subject: proc: move /proc/slabinfo boilerplate to mm/slub.c, mm/slab.c Lose dummy ->write hook in case of SLUB, it's possible now. Signed-off-by: Alexey Dobriyan Acked-by: Pekka Enberg --- fs/proc/proc_misc.c | 17 ----------------- include/linux/slab.h | 5 ----- mm/slab.c | 16 +++++++++++++++- mm/slub.c | 29 ++++++++++++++++++++--------- 4 files changed, 35 insertions(+), 32 deletions(-) (limited to 'include/linux') diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index 5bca02842d0..1d6d5c5cc2a 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -132,20 +132,6 @@ static const struct file_operations proc_modules_operations = { }; #endif -#ifdef CONFIG_SLABINFO -static int slabinfo_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &slabinfo_op); -} -static const struct file_operations proc_slabinfo_operations = { - .open = slabinfo_open, - .read = seq_read, - .write = slabinfo_write, - .llseek = seq_lseek, - .release = seq_release, -}; -#endif - #ifdef CONFIG_MMU static int vmalloc_open(struct inode *inode, struct file *file) { @@ -309,9 +295,6 @@ void __init proc_misc_init(void) proc_symlink("mounts", NULL, "self/mounts"); /* And now for trickier ones */ -#ifdef CONFIG_SLABINFO - proc_create("slabinfo",S_IWUSR|S_IRUGO,NULL,&proc_slabinfo_operations); -#endif #ifdef CONFIG_MMU proc_create("vmallocinfo", S_IRUSR, NULL, &proc_vmalloc_operations); #endif diff --git a/include/linux/slab.h b/include/linux/slab.h index 5ff9676c1e2..ba965c84ae0 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -288,9 +288,4 @@ static inline void *kzalloc_node(size_t size, gfp_t flags, int node) return kmalloc_node(size, flags | __GFP_ZERO, node); } -#ifdef CONFIG_SLABINFO -extern const struct seq_operations slabinfo_op; -ssize_t slabinfo_write(struct file *, const char __user *, size_t, loff_t *); -#endif - #endif /* _LINUX_SLAB_H */ diff --git a/mm/slab.c b/mm/slab.c index d53ac9c26ab..09187517f9d 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -4259,7 +4259,7 @@ static int s_show(struct seq_file *m, void *p) * + further values on SMP and with statistics enabled */ -const struct seq_operations slabinfo_op = { +static const struct seq_operations slabinfo_op = { .start = s_start, .next = s_next, .stop = s_stop, @@ -4316,6 +4316,19 @@ ssize_t slabinfo_write(struct file *file, const char __user * buffer, return res; } +static int slabinfo_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &slabinfo_op); +} + +static const struct file_operations proc_slabinfo_operations = { + .open = slabinfo_open, + .read = seq_read, + .write = slabinfo_write, + .llseek = seq_lseek, + .release = seq_release, +}; + #ifdef CONFIG_DEBUG_SLAB_LEAK static void *leaks_start(struct seq_file *m, loff_t *pos) @@ -4478,6 +4491,7 @@ static const struct file_operations proc_slabstats_operations = { static int __init slab_proc_init(void) { + proc_create("slabinfo",S_IWUSR|S_IRUGO,NULL,&proc_slabinfo_operations); #ifdef CONFIG_DEBUG_SLAB_LEAK proc_create("slab_allocators", 0, NULL, &proc_slabstats_operations); #endif diff --git a/mm/slub.c b/mm/slub.c index 0c83e6afe7b..7ad489af956 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -4417,14 +4418,6 @@ __initcall(slab_sysfs_init); * The /proc/slabinfo ABI */ #ifdef CONFIG_SLABINFO - -ssize_t slabinfo_write(struct file *file, const char __user *buffer, - size_t count, loff_t *ppos) -{ - return -EINVAL; -} - - static void print_slabinfo_header(struct seq_file *m) { seq_puts(m, "slabinfo - version: 2.1\n"); @@ -4492,11 +4485,29 @@ static int s_show(struct seq_file *m, void *p) return 0; } -const struct seq_operations slabinfo_op = { +static const struct seq_operations slabinfo_op = { .start = s_start, .next = s_next, .stop = s_stop, .show = s_show, }; +static int slabinfo_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &slabinfo_op); +} + +static const struct file_operations proc_slabinfo_operations = { + .open = slabinfo_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +static int __init slab_proc_init(void) +{ + proc_create("slabinfo",S_IWUSR|S_IRUGO,NULL,&proc_slabinfo_operations); + return 0; +} +module_init(slab_proc_init); #endif /* CONFIG_SLABINFO */ -- cgit v1.2.3-70-g09d2 From 5f6a6a9c4e4d790aae55cb412a7643329057c5e0 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 6 Oct 2008 03:50:47 +0400 Subject: proc: move /proc/vmallocinfo to mm/vmalloc.c Signed-off-by: Alexey Dobriyan Acked-by: Christoph Lameter --- fs/proc/proc_misc.c | 28 ---------------------------- include/linux/vmalloc.h | 2 -- mm/vmalloc.c | 33 ++++++++++++++++++++++++++++++++- 3 files changed, 32 insertions(+), 31 deletions(-) (limited to 'include/linux') diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index 1d6d5c5cc2a..fd41a032456 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -132,31 +132,6 @@ static const struct file_operations proc_modules_operations = { }; #endif -#ifdef CONFIG_MMU -static int vmalloc_open(struct inode *inode, struct file *file) -{ - unsigned int *ptr = NULL; - int ret; - - if (NUMA_BUILD) - ptr = kmalloc(nr_node_ids * sizeof(unsigned int), GFP_KERNEL); - ret = seq_open(file, &vmalloc_op); - if (!ret) { - struct seq_file *m = file->private_data; - m->private = ptr; - } else - kfree(ptr); - return ret; -} - -static const struct file_operations proc_vmalloc_operations = { - .open = vmalloc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_private, -}; -#endif - #ifdef CONFIG_PROC_PAGE_MONITOR #define KPMSIZE sizeof(u64) #define KPMMASK (KPMSIZE - 1) @@ -295,9 +270,6 @@ void __init proc_misc_init(void) proc_symlink("mounts", NULL, "self/mounts"); /* And now for trickier ones */ -#ifdef CONFIG_MMU - proc_create("vmallocinfo", S_IRUSR, NULL, &proc_vmalloc_operations); -#endif proc_create("buddyinfo", S_IRUGO, NULL, &fragmentation_file_operations); proc_create("pagetypeinfo", S_IRUGO, NULL, &pagetypeinfo_file_ops); proc_create("vmstat", S_IRUGO, NULL, &proc_vmstat_file_operations); diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 4c28c4d564e..307b88577ea 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -103,6 +103,4 @@ extern void free_vm_area(struct vm_struct *area); extern rwlock_t vmlist_lock; extern struct vm_struct *vmlist; -extern const struct seq_operations vmalloc_op; - #endif /* _LINUX_VMALLOC_H */ diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 65ae576030d..036536945dd 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -1718,11 +1719,41 @@ static int s_show(struct seq_file *m, void *p) return 0; } -const struct seq_operations vmalloc_op = { +static const struct seq_operations vmalloc_op = { .start = s_start, .next = s_next, .stop = s_stop, .show = s_show, }; + +static int vmalloc_open(struct inode *inode, struct file *file) +{ + unsigned int *ptr = NULL; + int ret; + + if (NUMA_BUILD) + ptr = kmalloc(nr_node_ids * sizeof(unsigned int), GFP_KERNEL); + ret = seq_open(file, &vmalloc_op); + if (!ret) { + struct seq_file *m = file->private_data; + m->private = ptr; + } else + kfree(ptr); + return ret; +} + +static const struct file_operations proc_vmalloc_operations = { + .open = vmalloc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_private, +}; + +static int __init proc_vmalloc_init(void) +{ + proc_create("vmallocinfo", S_IRUSR, NULL, &proc_vmalloc_operations); + return 0; +} +module_init(proc_vmalloc_init); #endif -- cgit v1.2.3-70-g09d2 From 8f32f7e5ac2ed11b0659b6b55af926f3d58ffd9d Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 6 Oct 2008 04:13:52 +0400 Subject: proc: move /proc/buddyinfo boilerplate to mm/vmstat.c Signed-off-by: Alexey Dobriyan --- fs/proc/proc_misc.c | 14 -------------- include/linux/vmstat.h | 1 - mm/vmstat.c | 25 +++++++++++++++++++++---- 3 files changed, 21 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index fd41a032456..a35e50659b8 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -57,19 +57,6 @@ #include #include "internal.h" -static int fragmentation_open(struct inode *inode, struct file *file) -{ - (void)inode; - return seq_open(file, &fragmentation_op); -} - -static const struct file_operations fragmentation_file_operations = { - .open = fragmentation_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - static int pagetypeinfo_open(struct inode *inode, struct file *file) { return seq_open(file, &pagetypeinfo_op); @@ -270,7 +257,6 @@ void __init proc_misc_init(void) proc_symlink("mounts", NULL, "self/mounts"); /* And now for trickier ones */ - proc_create("buddyinfo", S_IRUGO, NULL, &fragmentation_file_operations); proc_create("pagetypeinfo", S_IRUGO, NULL, &pagetypeinfo_file_ops); proc_create("vmstat", S_IRUGO, NULL, &proc_vmstat_file_operations); proc_create("zoneinfo", S_IRUGO, NULL, &proc_zoneinfo_file_operations); diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 9cd3ab0f554..d4551f20640 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -54,7 +54,6 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, NR_VM_EVENT_ITEMS }; -extern const struct seq_operations fragmentation_op; extern const struct seq_operations pagetypeinfo_op; extern const struct seq_operations zoneinfo_op; extern const struct seq_operations vmstat_op; diff --git a/mm/vmstat.c b/mm/vmstat.c index 9343227c5c6..f45d7245a28 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -8,7 +8,7 @@ * Copyright (C) 2006 Silicon Graphics, Inc., * Christoph Lameter */ - +#include #include #include #include @@ -384,7 +384,7 @@ void zone_statistics(struct zone *preferred_zone, struct zone *z) #endif #ifdef CONFIG_PROC_FS - +#include #include static char * const migratetype_names[MIGRATE_TYPES] = { @@ -581,13 +581,25 @@ static int pagetypeinfo_show(struct seq_file *m, void *arg) return 0; } -const struct seq_operations fragmentation_op = { +static const struct seq_operations fragmentation_op = { .start = frag_start, .next = frag_next, .stop = frag_stop, .show = frag_show, }; +static int fragmentation_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &fragmentation_op); +} + +static const struct file_operations fragmentation_file_operations = { + .open = fragmentation_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + const struct seq_operations pagetypeinfo_op = { .start = frag_start, .next = frag_next, @@ -898,9 +910,11 @@ static int __cpuinit vmstat_cpuup_callback(struct notifier_block *nfb, static struct notifier_block __cpuinitdata vmstat_notifier = { &vmstat_cpuup_callback, NULL, 0 }; +#endif static int __init setup_vmstat(void) { +#ifdef CONFIG_SMP int cpu; refresh_zone_stat_thresholds(); @@ -908,7 +922,10 @@ static int __init setup_vmstat(void) for_each_online_cpu(cpu) start_cpu_timer(cpu); +#endif +#ifdef CONFIG_PROC_FS + proc_create("buddyinfo", S_IRUGO, NULL, &fragmentation_file_operations); +#endif return 0; } module_init(setup_vmstat) -#endif -- cgit v1.2.3-70-g09d2 From 74e2e8e8ce7b3c0f878a349f9fa6cf2831548eef Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 6 Oct 2008 04:15:36 +0400 Subject: proc: move /proc/pagetypeinfo boilerplate to mm/vmstat.c Signed-off-by: Alexey Dobriyan --- fs/proc/proc_misc.c | 13 ------------- include/linux/vmstat.h | 1 - mm/vmstat.c | 15 ++++++++++++++- 3 files changed, 14 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index a35e50659b8..900331a634e 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -57,18 +57,6 @@ #include #include "internal.h" -static int pagetypeinfo_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &pagetypeinfo_op); -} - -static const struct file_operations pagetypeinfo_file_ops = { - .open = pagetypeinfo_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - static int zoneinfo_open(struct inode *inode, struct file *file) { return seq_open(file, &zoneinfo_op); @@ -257,7 +245,6 @@ void __init proc_misc_init(void) proc_symlink("mounts", NULL, "self/mounts"); /* And now for trickier ones */ - proc_create("pagetypeinfo", S_IRUGO, NULL, &pagetypeinfo_file_ops); proc_create("vmstat", S_IRUGO, NULL, &proc_vmstat_file_operations); proc_create("zoneinfo", S_IRUGO, NULL, &proc_zoneinfo_file_operations); #ifdef CONFIG_BLOCK diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index d4551f20640..33ffd89a88a 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -54,7 +54,6 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, NR_VM_EVENT_ITEMS }; -extern const struct seq_operations pagetypeinfo_op; extern const struct seq_operations zoneinfo_op; extern const struct seq_operations vmstat_op; extern int sysctl_stat_interval; diff --git a/mm/vmstat.c b/mm/vmstat.c index f45d7245a28..d624d251946 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -600,13 +600,25 @@ static const struct file_operations fragmentation_file_operations = { .release = seq_release, }; -const struct seq_operations pagetypeinfo_op = { +static const struct seq_operations pagetypeinfo_op = { .start = frag_start, .next = frag_next, .stop = frag_stop, .show = pagetypeinfo_show, }; +static int pagetypeinfo_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &pagetypeinfo_op); +} + +static const struct file_operations pagetypeinfo_file_ops = { + .open = pagetypeinfo_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + #ifdef CONFIG_ZONE_DMA #define TEXT_FOR_DMA(xx) xx "_dma", #else @@ -925,6 +937,7 @@ static int __init setup_vmstat(void) #endif #ifdef CONFIG_PROC_FS proc_create("buddyinfo", S_IRUGO, NULL, &fragmentation_file_operations); + proc_create("pagetypeinfo", S_IRUGO, NULL, &pagetypeinfo_file_ops); #endif return 0; } -- cgit v1.2.3-70-g09d2 From b6aa44ab698c7df9d951d3eb45c4fcb8ba68fb25 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 6 Oct 2008 04:17:48 +0400 Subject: proc: move /proc/vmstat boilerplate to mm/vmstat.c Signed-off-by: Alexey Dobriyan Acked-by: Christoph Lameter --- fs/proc/proc_misc.c | 12 ------------ include/linux/vmstat.h | 1 - mm/vmstat.c | 14 +++++++++++++- 3 files changed, 13 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index 900331a634e..e7a301d5d43 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -69,17 +69,6 @@ static const struct file_operations proc_zoneinfo_file_operations = { .release = seq_release, }; -static int vmstat_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &vmstat_op); -} -static const struct file_operations proc_vmstat_file_operations = { - .open = vmstat_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - #ifdef CONFIG_BLOCK static int diskstats_open(struct inode *inode, struct file *file) { @@ -245,7 +234,6 @@ void __init proc_misc_init(void) proc_symlink("mounts", NULL, "self/mounts"); /* And now for trickier ones */ - proc_create("vmstat", S_IRUGO, NULL, &proc_vmstat_file_operations); proc_create("zoneinfo", S_IRUGO, NULL, &proc_zoneinfo_file_operations); #ifdef CONFIG_BLOCK proc_create("diskstats", 0, NULL, &proc_diskstats_operations); diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 33ffd89a88a..7b68c4c1e19 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -55,7 +55,6 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, }; extern const struct seq_operations zoneinfo_op; -extern const struct seq_operations vmstat_op; extern int sysctl_stat_interval; #ifdef CONFIG_VM_EVENT_COUNTERS diff --git a/mm/vmstat.c b/mm/vmstat.c index d624d251946..7e1854b8186 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -858,13 +858,24 @@ static void vmstat_stop(struct seq_file *m, void *arg) m->private = NULL; } -const struct seq_operations vmstat_op = { +static const struct seq_operations vmstat_op = { .start = vmstat_start, .next = vmstat_next, .stop = vmstat_stop, .show = vmstat_show, }; +static int vmstat_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &vmstat_op); +} + +static const struct file_operations proc_vmstat_file_operations = { + .open = vmstat_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; #endif /* CONFIG_PROC_FS */ #ifdef CONFIG_SMP @@ -938,6 +949,7 @@ static int __init setup_vmstat(void) #ifdef CONFIG_PROC_FS proc_create("buddyinfo", S_IRUGO, NULL, &fragmentation_file_operations); proc_create("pagetypeinfo", S_IRUGO, NULL, &pagetypeinfo_file_ops); + proc_create("vmstat", S_IRUGO, NULL, &proc_vmstat_file_operations); #endif return 0; } -- cgit v1.2.3-70-g09d2 From 5c9fe6281b75832e8d2555ec8700ea763d9a865e Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 6 Oct 2008 04:19:42 +0400 Subject: proc: move /proc/zoneinfo boilerplate to mm/vmstat.c Signed-off-by: Alexey Dobriyan Acked-by: Christoph Lameter --- fs/proc/proc_misc.c | 13 ------------- include/linux/vmstat.h | 1 - mm/vmstat.c | 15 ++++++++++++++- 3 files changed, 14 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index e7a301d5d43..8f3a6f085c5 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -57,18 +57,6 @@ #include #include "internal.h" -static int zoneinfo_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &zoneinfo_op); -} - -static const struct file_operations proc_zoneinfo_file_operations = { - .open = zoneinfo_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - #ifdef CONFIG_BLOCK static int diskstats_open(struct inode *inode, struct file *file) { @@ -234,7 +222,6 @@ void __init proc_misc_init(void) proc_symlink("mounts", NULL, "self/mounts"); /* And now for trickier ones */ - proc_create("zoneinfo", S_IRUGO, NULL, &proc_zoneinfo_file_operations); #ifdef CONFIG_BLOCK proc_create("diskstats", 0, NULL, &proc_diskstats_operations); #endif diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 7b68c4c1e19..524cd1b28ec 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -54,7 +54,6 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, NR_VM_EVENT_ITEMS }; -extern const struct seq_operations zoneinfo_op; extern int sysctl_stat_interval; #ifdef CONFIG_VM_EVENT_COUNTERS diff --git a/mm/vmstat.c b/mm/vmstat.c index 7e1854b8186..c3ccfda23ad 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -795,7 +795,7 @@ static int zoneinfo_show(struct seq_file *m, void *arg) return 0; } -const struct seq_operations zoneinfo_op = { +static const struct seq_operations zoneinfo_op = { .start = frag_start, /* iterate over all zones. The same as in * fragmentation. */ .next = frag_next, @@ -803,6 +803,18 @@ const struct seq_operations zoneinfo_op = { .show = zoneinfo_show, }; +static int zoneinfo_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &zoneinfo_op); +} + +static const struct file_operations proc_zoneinfo_file_operations = { + .open = zoneinfo_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + static void *vmstat_start(struct seq_file *m, loff_t *pos) { unsigned long *v; @@ -950,6 +962,7 @@ static int __init setup_vmstat(void) proc_create("buddyinfo", S_IRUGO, NULL, &fragmentation_file_operations); proc_create("pagetypeinfo", S_IRUGO, NULL, &pagetypeinfo_file_ops); proc_create("vmstat", S_IRUGO, NULL, &proc_vmstat_file_operations); + proc_create("zoneinfo", S_IRUGO, NULL, &proc_zoneinfo_file_operations); #endif return 0; } -- cgit v1.2.3-70-g09d2 From 31d85ab28e71b0c938e0ef48af45747e80d99b53 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 6 Oct 2008 12:55:38 +0400 Subject: proc: move /proc/diskstats boilerplate to block/genhd.c Signed-off-by: Alexey Dobriyan Acked-by: Jens Axboe --- block/genhd.c | 15 ++++++++++++++- fs/proc/proc_misc.c | 16 ---------------- include/linux/genhd.h | 2 -- 3 files changed, 14 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/block/genhd.c b/block/genhd.c index 15f4d2b12c4..4e5e7493f67 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1005,15 +1005,28 @@ static int diskstats_show(struct seq_file *seqf, void *v) return 0; } -const struct seq_operations diskstats_op = { +static const struct seq_operations diskstats_op = { .start = disk_seqf_start, .next = disk_seqf_next, .stop = disk_seqf_stop, .show = diskstats_show }; +static int diskstats_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &diskstats_op); +} + +static const struct file_operations proc_diskstats_operations = { + .open = diskstats_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + static int __init proc_genhd_init(void) { + proc_create("diskstats", 0, NULL, &proc_diskstats_operations); proc_create("partitions", 0, NULL, &proc_partitions_operations); return 0; } diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index 8f3a6f085c5..7c22831efd9 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -57,19 +57,6 @@ #include #include "internal.h" -#ifdef CONFIG_BLOCK -static int diskstats_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &diskstats_op); -} -static const struct file_operations proc_diskstats_operations = { - .open = diskstats_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; -#endif - #ifdef CONFIG_MODULES extern const struct seq_operations modules_op; static int modules_open(struct inode *inode, struct file *file) @@ -222,9 +209,6 @@ void __init proc_misc_init(void) proc_symlink("mounts", NULL, "self/mounts"); /* And now for trickier ones */ -#ifdef CONFIG_BLOCK - proc_create("diskstats", 0, NULL, &proc_diskstats_operations); -#endif #ifdef CONFIG_MODULES proc_create("modules", 0, NULL, &proc_modules_operations); #endif diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 074a4fdf436..e439e6aed83 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -25,8 +25,6 @@ extern struct device_type part_type; extern struct kobject *block_depr; extern struct class block_class; -extern const struct seq_operations diskstats_op; - enum { /* These three have identical behaviour; use the second one if DOS FDISK gets confused about extended/logical partitions starting past cylinder 1023. */ -- cgit v1.2.3-70-g09d2 From b5aadf7f14c1acc94956aa257e018e9de3881f41 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 6 Oct 2008 13:23:43 +0400 Subject: proc: move /proc/schedstat boilerplate to kernel/sched_stats.h Signed-off-by: Alexey Dobriyan --- fs/proc/proc_misc.c | 3 --- include/linux/sched.h | 4 ---- kernel/sched.c | 1 + kernel/sched_stats.h | 9 ++++++++- 4 files changed, 9 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index f6d25db9892..4a768ed5da2 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -195,9 +195,6 @@ void __init proc_misc_init(void) proc_symlink("mounts", NULL, "self/mounts"); /* And now for trickier ones */ -#ifdef CONFIG_SCHEDSTATS - proc_create("schedstat", 0, NULL, &proc_schedstat_operations); -#endif #ifdef CONFIG_PROC_KCORE proc_root_kcore = proc_create("kcore", S_IRUSR, NULL, &proc_kcore_operations); if (proc_root_kcore) diff --git a/include/linux/sched.h b/include/linux/sched.h index 5c38db536e0..7f60cb9b53c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -681,10 +681,6 @@ struct sched_info { }; #endif /* defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) */ -#ifdef CONFIG_SCHEDSTATS -extern const struct file_operations proc_schedstat_operations; -#endif /* CONFIG_SCHEDSTATS */ - #ifdef CONFIG_TASK_DELAY_ACCT struct task_delay_info { spinlock_t lock; diff --git a/kernel/sched.c b/kernel/sched.c index d906f72b42d..5a70189d505 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -55,6 +55,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h index b8c156979cf..3d14ce27390 100644 --- a/kernel/sched_stats.h +++ b/kernel/sched_stats.h @@ -90,13 +90,20 @@ static int schedstat_open(struct inode *inode, struct file *file) return res; } -const struct file_operations proc_schedstat_operations = { +static const struct file_operations proc_schedstat_operations = { .open = schedstat_open, .read = seq_read, .llseek = seq_lseek, .release = single_release, }; +static int __init proc_schedstat_init(void) +{ + proc_create("schedstat", 0, NULL, &proc_schedstat_operations); + return 0; +} +module_init(proc_schedstat_init); + /* * Expects runqueue lock to be held for atomicity of update */ -- cgit v1.2.3-70-g09d2 From 97ce5d6dcb07c403c0fc6001b755aacc38b5d7ff Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 6 Oct 2008 14:14:19 +0400 Subject: proc: move all /proc/kcore stuff to fs/proc/kcore.c Signed-off-by: Alexey Dobriyan --- fs/proc/kcore.c | 14 +++++++++++++- fs/proc/proc_misc.c | 8 -------- include/linux/proc_fs.h | 4 ---- 3 files changed, 13 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index c2370c76fb7..59b43a06887 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -27,6 +27,8 @@ #define ELF_CORE_EFLAGS 0 #endif +static struct proc_dir_entry *proc_root_kcore; + static int open_kcore(struct inode * inode, struct file * filp) { return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; @@ -34,7 +36,7 @@ static int open_kcore(struct inode * inode, struct file * filp) static ssize_t read_kcore(struct file *, char __user *, size_t, loff_t *); -const struct file_operations proc_kcore_operations = { +static const struct file_operations proc_kcore_operations = { .read = read_kcore, .open = open_kcore, }; @@ -399,3 +401,13 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) return acc; } + +static int __init proc_kcore_init(void) +{ + proc_root_kcore = proc_create("kcore", S_IRUSR, NULL, &proc_kcore_operations); + if (proc_root_kcore) + proc_root_kcore->size = + (size_t)high_memory - PAGE_OFFSET + PAGE_SIZE; + return 0; +} +module_init(proc_kcore_init); diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index 4a768ed5da2..5ed15ff8fd1 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -188,19 +188,11 @@ static struct file_operations proc_kpageflags_operations = { }; #endif /* CONFIG_PROC_PAGE_MONITOR */ -struct proc_dir_entry *proc_root_kcore; - void __init proc_misc_init(void) { proc_symlink("mounts", NULL, "self/mounts"); /* And now for trickier ones */ -#ifdef CONFIG_PROC_KCORE - proc_root_kcore = proc_create("kcore", S_IRUSR, NULL, &proc_kcore_operations); - if (proc_root_kcore) - proc_root_kcore->size = - (size_t)high_memory - PAGE_OFFSET + PAGE_SIZE; -#endif #ifdef CONFIG_PROC_PAGE_MONITOR proc_create("kpagecount", S_IRUSR, NULL, &proc_kpagecount_operations); proc_create("kpageflags", S_IRUSR, NULL, &proc_kpageflags_operations); diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 27d534f4470..9d830890505 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -97,8 +97,6 @@ struct vmcore { #ifdef CONFIG_PROC_FS -extern struct proc_dir_entry *proc_root_kcore; - extern spinlock_t proc_subdir_lock; extern void proc_root_init(void); @@ -138,8 +136,6 @@ extern struct inode *proc_get_inode(struct super_block *, unsigned int, struct p extern int proc_readdir(struct file *, void *, filldir_t); extern struct dentry *proc_lookup(struct inode *, struct dentry *, struct nameidata *); -extern const struct file_operations proc_kcore_operations; - extern int pid_ns_prepare_proc(struct pid_namespace *ns); extern void pid_ns_release_proc(struct pid_namespace *ns); -- cgit v1.2.3-70-g09d2 From 5aa140c2deca3701238d5acddf436ad7b02664c7 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 6 Oct 2008 14:36:31 +0400 Subject: proc: move /proc/vmcore creation to fs/proc/vmcore.c Signed-off-by: Alexey Dobriyan --- fs/proc/proc_misc.c | 3 --- fs/proc/vmcore.c | 6 +++--- include/linux/crash_dump.h | 2 -- 3 files changed, 3 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index 2ef9ef9bc8c..e2db35006c0 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -62,7 +62,4 @@ void __init proc_misc_init(void) proc_symlink("mounts", NULL, "self/mounts"); /* And now for trickier ones */ -#ifdef CONFIG_PROC_VMCORE - proc_vmcore = proc_create("vmcore", S_IRUSR, NULL, &proc_vmcore_operations); -#endif } diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index cd9ca67f841..03ec5950490 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -32,7 +32,7 @@ static size_t elfcorebuf_sz; /* Total size of vmcore file. */ static u64 vmcore_size; -struct proc_dir_entry *proc_vmcore = NULL; +static struct proc_dir_entry *proc_vmcore = NULL; /* Reads a page from the oldmem device from given offset. */ static ssize_t read_from_oldmem(char *buf, size_t count, @@ -162,7 +162,7 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer, return acc; } -const struct file_operations proc_vmcore_operations = { +static const struct file_operations proc_vmcore_operations = { .read = read_vmcore, }; @@ -652,7 +652,7 @@ static int __init vmcore_init(void) return rc; } - /* Initialize /proc/vmcore size if proc is already up. */ + proc_vmcore = proc_create("vmcore", S_IRUSR, NULL, &proc_vmcore_operations); if (proc_vmcore) proc_vmcore->size = vmcore_size; return 0; diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h index 0acf3b737e2..2dac064d835 100644 --- a/include/linux/crash_dump.h +++ b/include/linux/crash_dump.h @@ -14,8 +14,6 @@ extern unsigned long long elfcorehdr_addr; extern ssize_t copy_oldmem_page(unsigned long, char *, size_t, unsigned long, int); -extern const struct file_operations proc_vmcore_operations; -extern struct proc_dir_entry *proc_vmcore; /* Architecture code defines this if there are other possible ELF * machine types, e.g. on bi-arch capable hardware. */ -- cgit v1.2.3-70-g09d2 From 59c7572e82d69483a66eaa67b46548baeb69ecf4 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 6 Oct 2008 14:49:39 +0400 Subject: proc: remove fs/proc/proc_misc.c Now that everything was moved to their more or less expected places, apply rm(1). Signed-off-by: Alexey Dobriyan --- fs/proc/Makefile | 2 +- fs/proc/proc_misc.c | 65 ------------------------------------------------- fs/proc/root.c | 2 +- include/linux/proc_fs.h | 1 - 4 files changed, 2 insertions(+), 68 deletions(-) delete mode 100644 fs/proc/proc_misc.c (limited to 'include/linux') diff --git a/fs/proc/Makefile b/fs/proc/Makefile index fef524410e8..63d965193b2 100644 --- a/fs/proc/Makefile +++ b/fs/proc/Makefile @@ -8,7 +8,7 @@ proc-y := nommu.o task_nommu.o proc-$(CONFIG_MMU) := mmu.o task_mmu.o proc-y += inode.o root.o base.o generic.o array.o \ - proc_tty.o proc_misc.o + proc_tty.o proc-y += cmdline.o proc-y += cpuinfo.o proc-y += devices.o diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c deleted file mode 100644 index e2db35006c0..00000000000 --- a/fs/proc/proc_misc.c +++ /dev/null @@ -1,65 +0,0 @@ -/* - * linux/fs/proc/proc_misc.c - * - * linux/fs/proc/array.c - * Copyright (C) 1992 by Linus Torvalds - * based on ideas by Darren Senn - * - * This used to be the part of array.c. See the rest of history and credits - * there. I took this into a separate file and switched the thing to generic - * proc_file_inode_operations, leaving in array.c only per-process stuff. - * Inumbers allocation made dynamic (via create_proc_entry()). AV, May 1999. - * - * Changes: - * Fulton Green : Encapsulated position metric calculations. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "internal.h" - -void __init proc_misc_init(void) -{ - proc_symlink("mounts", NULL, "self/mounts"); - - /* And now for trickier ones */ -} diff --git a/fs/proc/root.c b/fs/proc/root.c index 2a3abd25b30..7761602af9d 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -117,7 +117,7 @@ void __init proc_root_init(void) return; } - proc_misc_init(); + proc_symlink("mounts", NULL, "self/mounts"); proc_net_init(); diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 9d830890505..b8bdb96eff7 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -100,7 +100,6 @@ struct vmcore { extern spinlock_t proc_subdir_lock; extern void proc_root_init(void); -extern void proc_misc_init(void); void proc_flush_task(struct task_struct *task); struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *); -- cgit v1.2.3-70-g09d2 From 66f50ee3cee4c9d98eea0add6f439e6e5e0ca4a5 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Wed, 22 Oct 2008 14:14:59 -0700 Subject: profiling: fix up CONFIG_PROC_FS=n build In the case where procfs is disabled, create_proc_profile() does not exist. Stub it in with the others. Signed-off-by: Paul Mundt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/profile.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/profile.h b/include/linux/profile.h index 570045053ce..a0fc32279fc 100644 --- a/include/linux/profile.h +++ b/include/linux/profile.h @@ -19,10 +19,16 @@ struct notifier_block; #if defined(CONFIG_PROFILING) && defined(CONFIG_PROC_FS) void create_prof_cpu_mask(struct proc_dir_entry *de); +int create_proc_profile(void); #else static inline void create_prof_cpu_mask(struct proc_dir_entry *de) { } + +static inline int create_proc_profile(void) +{ + return 0; +} #endif enum profile_type { @@ -37,7 +43,6 @@ extern int prof_on __read_mostly; /* init basic kernel profiler */ int profile_init(void); int profile_setup(char *str); -int create_proc_profile(void); void profile_tick(int type); /* -- cgit v1.2.3-70-g09d2 From 4afe978530702c934dfdb11f54073136818b2119 Mon Sep 17 00:00:00 2001 From: Hidehiro Kawai Date: Wed, 22 Oct 2008 14:15:00 -0700 Subject: jbd: fix error handling for checkpoint io When a checkpointing IO fails, current JBD code doesn't check the error and continue journaling. This means latest metadata can be lost from both the journal and filesystem. This patch leaves the failed metadata blocks in the journal space and aborts journaling in the case of log_do_checkpoint(). To achieve this, we need to do: 1. don't remove the failed buffer from the checkpoint list where in the case of __try_to_free_cp_buf() because it may be released or overwritten by a later transaction 2. log_do_checkpoint() is the last chance, remove the failed buffer from the checkpoint list and abort the journal 3. when checkpointing fails, don't update the journal super block to prevent the journaled contents from being cleaned. For safety, don't update j_tail and j_tail_sequence either 4. when checkpointing fails, notify this error to the ext3 layer so that ext3 don't clear the needs_recovery flag, otherwise the journaled contents are ignored and cleaned in the recovery phase 5. if the recovery fails, keep the needs_recovery flag 6. prevent cleanup_journal_tail() from being called between __journal_drop_transaction() and journal_abort() (a race issue between journal_flush() and __log_wait_for_space() Signed-off-by: Hidehiro Kawai Acked-by: Jan Kara Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/jbd/checkpoint.c | 49 +++++++++++++++++++++++++++++++++++++------------ fs/jbd/journal.c | 28 ++++++++++++++++++++++------ fs/jbd/recovery.c | 7 +++++-- include/linux/jbd.h | 2 +- 4 files changed, 65 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c index a5432bbbfb8..e29293501d4 100644 --- a/fs/jbd/checkpoint.c +++ b/fs/jbd/checkpoint.c @@ -93,7 +93,8 @@ static int __try_to_free_cp_buf(struct journal_head *jh) int ret = 0; struct buffer_head *bh = jh2bh(jh); - if (jh->b_jlist == BJ_None && !buffer_locked(bh) && !buffer_dirty(bh)) { + if (jh->b_jlist == BJ_None && !buffer_locked(bh) && + !buffer_dirty(bh) && buffer_uptodate(bh)) { JBUFFER_TRACE(jh, "remove from checkpoint list"); ret = __journal_remove_checkpoint(jh) + 1; jbd_unlock_bh_state(bh); @@ -160,21 +161,25 @@ static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh) * buffers. Note that we take the buffers in the opposite ordering * from the one in which they were submitted for IO. * + * Return 0 on success, and return <0 if some buffers have failed + * to be written out. + * * Called with j_list_lock held. */ -static void __wait_cp_io(journal_t *journal, transaction_t *transaction) +static int __wait_cp_io(journal_t *journal, transaction_t *transaction) { struct journal_head *jh; struct buffer_head *bh; tid_t this_tid; int released = 0; + int ret = 0; this_tid = transaction->t_tid; restart: /* Did somebody clean up the transaction in the meanwhile? */ if (journal->j_checkpoint_transactions != transaction || transaction->t_tid != this_tid) - return; + return ret; while (!released && transaction->t_checkpoint_io_list) { jh = transaction->t_checkpoint_io_list; bh = jh2bh(jh); @@ -194,6 +199,9 @@ restart: spin_lock(&journal->j_list_lock); goto restart; } + if (unlikely(!buffer_uptodate(bh))) + ret = -EIO; + /* * Now in whatever state the buffer currently is, we know that * it has been written out and so we can drop it from the list @@ -203,6 +211,8 @@ restart: journal_remove_journal_head(bh); __brelse(bh); } + + return ret; } #define NR_BATCH 64 @@ -226,7 +236,8 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count) * Try to flush one buffer from the checkpoint list to disk. * * Return 1 if something happened which requires us to abort the current - * scan of the checkpoint list. + * scan of the checkpoint list. Return <0 if the buffer has failed to + * be written out. * * Called with j_list_lock held and drops it if 1 is returned * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it @@ -256,6 +267,9 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, log_wait_commit(journal, tid); ret = 1; } else if (!buffer_dirty(bh)) { + ret = 1; + if (unlikely(!buffer_uptodate(bh))) + ret = -EIO; J_ASSERT_JH(jh, !buffer_jbddirty(bh)); BUFFER_TRACE(bh, "remove from checkpoint"); __journal_remove_checkpoint(jh); @@ -263,7 +277,6 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, jbd_unlock_bh_state(bh); journal_remove_journal_head(bh); __brelse(bh); - ret = 1; } else { /* * Important: we are about to write the buffer, and @@ -295,6 +308,7 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, * to disk. We submit larger chunks of data at once. * * The journal should be locked before calling this function. + * Called with j_checkpoint_mutex held. */ int log_do_checkpoint(journal_t *journal) { @@ -318,6 +332,7 @@ int log_do_checkpoint(journal_t *journal) * OK, we need to start writing disk blocks. Take one transaction * and write it. */ + result = 0; spin_lock(&journal->j_list_lock); if (!journal->j_checkpoint_transactions) goto out; @@ -334,7 +349,7 @@ restart: int batch_count = 0; struct buffer_head *bhs[NR_BATCH]; struct journal_head *jh; - int retry = 0; + int retry = 0, err; while (!retry && transaction->t_checkpoint_list) { struct buffer_head *bh; @@ -347,6 +362,8 @@ restart: break; } retry = __process_buffer(journal, jh, bhs,&batch_count); + if (retry < 0 && !result) + result = retry; if (!retry && (need_resched() || spin_needbreak(&journal->j_list_lock))) { spin_unlock(&journal->j_list_lock); @@ -371,14 +388,18 @@ restart: * Now we have cleaned up the first transaction's checkpoint * list. Let's clean up the second one */ - __wait_cp_io(journal, transaction); + err = __wait_cp_io(journal, transaction); + if (!result) + result = err; } out: spin_unlock(&journal->j_list_lock); - result = cleanup_journal_tail(journal); if (result < 0) - return result; - return 0; + journal_abort(journal, result); + else + result = cleanup_journal_tail(journal); + + return (result < 0) ? result : 0; } /* @@ -394,8 +415,9 @@ out: * This is the only part of the journaling code which really needs to be * aware of transaction aborts. Checkpointing involves writing to the * main filesystem area rather than to the journal, so it can proceed - * even in abort state, but we must not update the journal superblock if - * we have an abort error outstanding. + * even in abort state, but we must not update the super block if + * checkpointing may have failed. Otherwise, we would lose some metadata + * buffers which should be written-back to the filesystem. */ int cleanup_journal_tail(journal_t *journal) @@ -404,6 +426,9 @@ int cleanup_journal_tail(journal_t *journal) tid_t first_tid; unsigned long blocknr, freed; + if (is_journal_aborted(journal)) + return 1; + /* OK, work out the oldest transaction remaining in the log, and * the log block it starts at. * diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index aa7143a8349..9e4fa52d7dc 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -1121,9 +1121,12 @@ recovery_error: * * Release a journal_t structure once it is no longer in use by the * journaled object. + * Return <0 if we couldn't clean up the journal. */ -void journal_destroy(journal_t *journal) +int journal_destroy(journal_t *journal) { + int err = 0; + /* Wait for the commit thread to wake up and die. */ journal_kill_thread(journal); @@ -1146,11 +1149,16 @@ void journal_destroy(journal_t *journal) J_ASSERT(journal->j_checkpoint_transactions == NULL); spin_unlock(&journal->j_list_lock); - /* We can now mark the journal as empty. */ - journal->j_tail = 0; - journal->j_tail_sequence = ++journal->j_transaction_sequence; if (journal->j_sb_buffer) { - journal_update_superblock(journal, 1); + if (!is_journal_aborted(journal)) { + /* We can now mark the journal as empty. */ + journal->j_tail = 0; + journal->j_tail_sequence = + ++journal->j_transaction_sequence; + journal_update_superblock(journal, 1); + } else { + err = -EIO; + } brelse(journal->j_sb_buffer); } @@ -1160,6 +1168,8 @@ void journal_destroy(journal_t *journal) journal_destroy_revoke(journal); kfree(journal->j_wbuf); kfree(journal); + + return err; } @@ -1359,10 +1369,16 @@ int journal_flush(journal_t *journal) spin_lock(&journal->j_list_lock); while (!err && journal->j_checkpoint_transactions != NULL) { spin_unlock(&journal->j_list_lock); + mutex_lock(&journal->j_checkpoint_mutex); err = log_do_checkpoint(journal); + mutex_unlock(&journal->j_checkpoint_mutex); spin_lock(&journal->j_list_lock); } spin_unlock(&journal->j_list_lock); + + if (is_journal_aborted(journal)) + return -EIO; + cleanup_journal_tail(journal); /* Finally, mark the journal as really needing no recovery. @@ -1384,7 +1400,7 @@ int journal_flush(journal_t *journal) J_ASSERT(journal->j_head == journal->j_tail); J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence); spin_unlock(&journal->j_state_lock); - return err; + return 0; } /** diff --git a/fs/jbd/recovery.c b/fs/jbd/recovery.c index 43bc5e5ed06..db5e982c5dd 100644 --- a/fs/jbd/recovery.c +++ b/fs/jbd/recovery.c @@ -223,7 +223,7 @@ do { \ */ int journal_recover(journal_t *journal) { - int err; + int err, err2; journal_superblock_t * sb; struct recovery_info info; @@ -261,7 +261,10 @@ int journal_recover(journal_t *journal) journal->j_transaction_sequence = ++info.end_transaction; journal_clear_revoke(journal); - sync_blockdev(journal->j_fs_dev); + err2 = sync_blockdev(journal->j_fs_dev); + if (!err) + err = err2; + return err; } diff --git a/include/linux/jbd.h b/include/linux/jbd.h index 35d4f6342fa..346e2b80be7 100644 --- a/include/linux/jbd.h +++ b/include/linux/jbd.h @@ -911,7 +911,7 @@ extern int journal_set_features (journal_t *, unsigned long, unsigned long, unsigned long); extern int journal_create (journal_t *); extern int journal_load (journal_t *journal); -extern void journal_destroy (journal_t *); +extern int journal_destroy (journal_t *); extern int journal_recover (journal_t *journal); extern int journal_wipe (journal_t *, int); extern int journal_skip_recovery (journal_t *); -- cgit v1.2.3-70-g09d2 From 94b6da5ab8293b04a300ba35c72eddfa94db8b02 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 22 Oct 2008 14:15:05 -0700 Subject: memcg: fix page_cgroup allocation page_cgroup_init() is called from mem_cgroup_init(). But at this point, we cannot call alloc_bootmem(). (and this caused panic at boot.) This patch moves page_cgroup_init() to init/main.c. Time table is following: == parse_args(). # we can trust mem_cgroup_subsys.disabled bit after this. .... cgroup_init_early() # "early" init of cgroup. .... setup_arch() # memmap is allocated. ... page_cgroup_init(); mem_init(); # we cannot call alloc_bootmem after this. .... cgroup_init() # mem_cgroup is initialized. == Before page_cgroup_init(), mem_map must be initialized. So, I added page_cgroup_init() to init/main.c directly. (*) maybe this is not very clean but - cgroup_init_early() is too early - in cgroup_init(), we have to use vmalloc instead of alloc_bootmem(). use of vmalloc area in x86-32 is important and we should avoid very large vmalloc() in x86-32. So, we want to use alloc_bootmem() and added page_cgroup_init() directly to init/main.c [akpm@linux-foundation.org: remove unneeded/bad mem_cgroup_subsys declaration] [akpm@linux-foundation.org: fix build] Acked-by: Balbir Singh Tested-by: Balbir Singh Signed-off-by: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page_cgroup.h | 5 +++++ init/main.c | 2 ++ mm/memcontrol.c | 1 - mm/page_cgroup.c | 32 +++++++++++++++++++++++++------- 4 files changed, 32 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h index 0fd39f2231e..f546ad6fc02 100644 --- a/include/linux/page_cgroup.h +++ b/include/linux/page_cgroup.h @@ -99,5 +99,10 @@ static inline struct page_cgroup *lookup_page_cgroup(struct page *page) { return NULL; } + +static inline void page_cgroup_init(void) +{ +} + #endif #endif diff --git a/init/main.c b/init/main.c index 3e17a3bafe6..672ae75b205 100644 --- a/init/main.c +++ b/init/main.c @@ -52,6 +52,7 @@ #include #include #include +#include #include #include #include @@ -647,6 +648,7 @@ asmlinkage void __init start_kernel(void) vmalloc_init(); vfs_caches_init_early(); cpuset_init_early(); + page_cgroup_init(); mem_init(); enable_debug_pagealloc(); cpu_hotplug_init(); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index d4a92b63e98..866dcc7eeb0 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1088,7 +1088,6 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) int node; if (unlikely((cont->parent) == NULL)) { - page_cgroup_init(); mem = &init_mem_cgroup; } else { mem = mem_cgroup_alloc(); diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c index 78242b4d7ed..f59d797dc5a 100644 --- a/mm/page_cgroup.c +++ b/mm/page_cgroup.c @@ -4,8 +4,10 @@ #include #include #include +#include #include #include +#include static void __meminit __init_page_cgroup(struct page_cgroup *pc, unsigned long pfn) @@ -67,6 +69,9 @@ void __init page_cgroup_init(void) int nid, fail; + if (mem_cgroup_subsys.disabled) + return; + for_each_online_node(nid) { fail = alloc_node_page_cgroup(nid); if (fail) @@ -107,9 +112,14 @@ int __meminit init_section_page_cgroup(unsigned long pfn) nid = page_to_nid(pfn_to_page(pfn)); table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION; - base = kmalloc_node(table_size, GFP_KERNEL, nid); - if (!base) - base = vmalloc_node(table_size, nid); + if (slab_is_available()) { + base = kmalloc_node(table_size, GFP_KERNEL, nid); + if (!base) + base = vmalloc_node(table_size, nid); + } else { + base = __alloc_bootmem_node_nopanic(NODE_DATA(nid), table_size, + PAGE_SIZE, __pa(MAX_DMA_ADDRESS)); + } if (!base) { printk(KERN_ERR "page cgroup allocation failure\n"); @@ -136,11 +146,16 @@ void __free_page_cgroup(unsigned long pfn) if (!ms || !ms->page_cgroup) return; base = ms->page_cgroup + pfn; - ms->page_cgroup = NULL; - if (is_vmalloc_addr(base)) + if (is_vmalloc_addr(base)) { vfree(base); - else - kfree(base); + ms->page_cgroup = NULL; + } else { + struct page *page = virt_to_page(base); + if (!PageReserved(page)) { /* Is bootmem ? */ + kfree(base); + ms->page_cgroup = NULL; + } + } } int online_page_cgroup(unsigned long start_pfn, @@ -214,6 +229,9 @@ void __init page_cgroup_init(void) unsigned long pfn; int fail = 0; + if (mem_cgroup_subsys.disabled) + return; + for (pfn = 0; !fail && pfn < max_pfn; pfn += PAGES_PER_SECTION) { if (!pfn_present(pfn)) continue; -- cgit v1.2.3-70-g09d2 From 7106b4e333baeaf3c596e4d240438059b8a7616d Mon Sep 17 00:00:00 2001 From: Lee Howard Date: Tue, 21 Oct 2008 13:48:58 +0100 Subject: 8250: Oxford Semiconductor Devices Add support for the OxSemi 'Tornado' devices. Reformatted and reworked a bit by Alan Cox Signed-off-by: Lee Howard Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- drivers/parport/parport_pc.c | 20 +++- drivers/serial/8250_pci.c | 211 +++++++++++++++++++++++++++++++++++++++++++ include/linux/pci_ids.h | 8 ++ 3 files changed, 238 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/parport/parport_pc.c b/drivers/parport/parport_pc.c index 8a846adf1dc..96f3bdf0ec4 100644 --- a/drivers/parport/parport_pc.c +++ b/drivers/parport/parport_pc.c @@ -2791,6 +2791,7 @@ enum parport_pc_pci_cards { oxsemi_952, oxsemi_954, oxsemi_840, + oxsemi_pcie_pport, aks_0100, mobility_pp, netmos_9705, @@ -2868,6 +2869,7 @@ static struct parport_pc_pci { /* oxsemi_952 */ { 1, { { 0, 1 }, } }, /* oxsemi_954 */ { 1, { { 0, -1 }, } }, /* oxsemi_840 */ { 1, { { 0, 1 }, } }, + /* oxsemi_pcie_pport */ { 1, { { 0, 1 }, } }, /* aks_0100 */ { 1, { { 0, -1 }, } }, /* mobility_pp */ { 1, { { 0, 1 }, } }, /* netmos_9705 */ { 1, { { 0, -1 }, } }, /* untested */ @@ -2928,7 +2930,6 @@ static const struct pci_device_id parport_pc_pci_tbl[] = { { 0x1409, 0x7268, 0x1409, 0x0103, 0, 0, timedia_4008a }, { 0x1409, 0x7268, 0x1409, 0x0104, 0, 0, timedia_4018 }, { 0x1409, 0x7268, 0x1409, 0x9018, 0, 0, timedia_9018a }, - { 0x14f2, 0x0121, PCI_ANY_ID, PCI_ANY_ID, 0, 0, mobility_pp }, { PCI_VENDOR_ID_SYBA, PCI_DEVICE_ID_SYBA_2P_EPP, PCI_ANY_ID, PCI_ANY_ID, 0, 0, syba_2p_epp }, { PCI_VENDOR_ID_SYBA, PCI_DEVICE_ID_SYBA_1P_ECP, @@ -2946,8 +2947,25 @@ static const struct pci_device_id parport_pc_pci_tbl[] = { PCI_ANY_ID, PCI_ANY_ID, 0, 0, oxsemi_954 }, { PCI_VENDOR_ID_OXSEMI, PCI_DEVICE_ID_OXSEMI_12PCI840, PCI_ANY_ID, PCI_ANY_ID, 0, 0, oxsemi_840 }, + { PCI_VENDOR_ID_OXSEMI, PCI_DEVICE_ID_OXSEMI_PCIe840, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, oxsemi_pcie_pport }, + { PCI_VENDOR_ID_OXSEMI, PCI_DEVICE_ID_OXSEMI_PCIe840_G, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, oxsemi_pcie_pport }, + { PCI_VENDOR_ID_OXSEMI, PCI_DEVICE_ID_OXSEMI_PCIe952_0, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, oxsemi_pcie_pport }, + { PCI_VENDOR_ID_OXSEMI, PCI_DEVICE_ID_OXSEMI_PCIe952_0_G, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, oxsemi_pcie_pport }, + { PCI_VENDOR_ID_OXSEMI, PCI_DEVICE_ID_OXSEMI_PCIe952_1, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, oxsemi_pcie_pport }, + { PCI_VENDOR_ID_OXSEMI, PCI_DEVICE_ID_OXSEMI_PCIe952_1_G, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, oxsemi_pcie_pport }, + { PCI_VENDOR_ID_OXSEMI, PCI_DEVICE_ID_OXSEMI_PCIe952_1_U, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, oxsemi_pcie_pport }, + { PCI_VENDOR_ID_OXSEMI, PCI_DEVICE_ID_OXSEMI_PCIe952_1_GU, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, oxsemi_pcie_pport }, { PCI_VENDOR_ID_AKS, PCI_DEVICE_ID_AKS_ALADDINCARD, PCI_ANY_ID, PCI_ANY_ID, 0, 0, aks_0100 }, + { 0x14f2, 0x0121, PCI_ANY_ID, PCI_ANY_ID, 0, 0, mobility_pp }, /* NetMos communication controllers */ { PCI_VENDOR_ID_NETMOS, PCI_DEVICE_ID_NETMOS_9705, PCI_ANY_ID, PCI_ANY_ID, 0, 0, netmos_9705 }, diff --git a/drivers/serial/8250_pci.c b/drivers/serial/8250_pci.c index c014ffb110e..1bdb08b41f7 100644 --- a/drivers/serial/8250_pci.c +++ b/drivers/serial/8250_pci.c @@ -1100,6 +1100,8 @@ enum pci_board_num_t { pbn_b0_4_1843200_200, pbn_b0_8_1843200_200, + pbn_b0_1_4000000, + pbn_b0_bt_1_115200, pbn_b0_bt_2_115200, pbn_b0_bt_8_115200, @@ -1167,6 +1169,10 @@ enum pci_board_num_t { pbn_exsys_4055, pbn_plx_romulus, pbn_oxsemi, + pbn_oxsemi_1_4000000, + pbn_oxsemi_2_4000000, + pbn_oxsemi_4_4000000, + pbn_oxsemi_8_4000000, pbn_intel_i960, pbn_sgi_ioc3, pbn_computone_4, @@ -1290,6 +1296,12 @@ static struct pciserial_board pci_boards[] __devinitdata = { .base_baud = 1843200, .uart_offset = 0x200, }, + [pbn_b0_1_4000000] = { + .flags = FL_BASE0, + .num_ports = 1, + .base_baud = 4000000, + .uart_offset = 8, + }, [pbn_b0_bt_1_115200] = { .flags = FL_BASE0|FL_BASE_BARS, @@ -1625,6 +1637,35 @@ static struct pciserial_board pci_boards[] __devinitdata = { .base_baud = 115200, .uart_offset = 8, }, + [pbn_oxsemi_1_4000000] = { + .flags = FL_BASE0, + .num_ports = 1, + .base_baud = 4000000, + .uart_offset = 0x200, + .first_offset = 0x1000, + }, + [pbn_oxsemi_2_4000000] = { + .flags = FL_BASE0, + .num_ports = 2, + .base_baud = 4000000, + .uart_offset = 0x200, + .first_offset = 0x1000, + }, + [pbn_oxsemi_4_4000000] = { + .flags = FL_BASE0, + .num_ports = 4, + .base_baud = 4000000, + .uart_offset = 0x200, + .first_offset = 0x1000, + }, + [pbn_oxsemi_8_4000000] = { + .flags = FL_BASE0, + .num_ports = 8, + .base_baud = 4000000, + .uart_offset = 0x200, + .first_offset = 0x1000, + }, + /* * EKF addition for i960 Boards form EKF with serial port. @@ -1813,6 +1854,34 @@ serial_pci_matches(struct pciserial_board *board, board->first_offset == guessed->first_offset; } +/* + * Oxford Semiconductor Inc. + * Check that device is part of the Tornado range of devices, then determine + * the number of ports available on the device. + */ +static int pci_oxsemi_tornado_init(struct pci_dev *dev, struct pciserial_board *board) +{ + u8 __iomem *p; + unsigned long deviceID; + unsigned int number_uarts; + + p = pci_iomap(dev, 0, 5); + if (p == NULL) + return -ENOMEM; + + deviceID = ioread32(p); + /* Tornado device */ + if (deviceID == 0x07000200) { + number_uarts = ioread8(p + 4); + board->num_ports = number_uarts; + printk(KERN_DEBUG + "%d ports detected on Oxford PCI Express device\n", + number_uarts); + } + pci_iounmap(dev, p); + return 0; +} + struct serial_private * pciserial_init_ports(struct pci_dev *dev, struct pciserial_board *board) { @@ -1821,6 +1890,12 @@ pciserial_init_ports(struct pci_dev *dev, struct pciserial_board *board) struct pci_serial_quirk *quirk; int rc, nr_ports, i; + /* + * Find number of ports on board + */ + if (dev->vendor == PCI_VENDOR_ID_OXSEMI) + pci_oxsemi_tornado_init(dev, board); + nr_ports = board->num_ports; /* @@ -2300,6 +2375,142 @@ static struct pci_device_id serial_pci_tbl[] = { PCI_ANY_ID, PCI_ANY_ID, 0, 0, pbn_b0_bt_2_921600 }, + /* + * Oxford Semiconductor Inc. Tornado PCI express device range. + */ + { PCI_VENDOR_ID_OXSEMI, 0xc101, /* OXPCIe952 1 Legacy UART */ + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_b0_1_4000000 }, + { PCI_VENDOR_ID_OXSEMI, 0xc105, /* OXPCIe952 1 Legacy UART */ + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_b0_1_4000000 }, + { PCI_VENDOR_ID_OXSEMI, 0xc11b, /* OXPCIe952 1 Native UART */ + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_oxsemi_1_4000000 }, + { PCI_VENDOR_ID_OXSEMI, 0xc11f, /* OXPCIe952 1 Native UART */ + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_oxsemi_1_4000000 }, + { PCI_VENDOR_ID_OXSEMI, 0xc120, /* OXPCIe952 1 Legacy UART */ + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_b0_1_4000000 }, + { PCI_VENDOR_ID_OXSEMI, 0xc124, /* OXPCIe952 1 Legacy UART */ + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_b0_1_4000000 }, + { PCI_VENDOR_ID_OXSEMI, 0xc138, /* OXPCIe952 1 Native UART */ + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_oxsemi_1_4000000 }, + { PCI_VENDOR_ID_OXSEMI, 0xc13d, /* OXPCIe952 1 Native UART */ + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_oxsemi_1_4000000 }, + { PCI_VENDOR_ID_OXSEMI, 0xc140, /* OXPCIe952 1 Legacy UART */ + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_b0_1_4000000 }, + { PCI_VENDOR_ID_OXSEMI, 0xc141, /* OXPCIe952 1 Legacy UART */ + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_b0_1_4000000 }, + { PCI_VENDOR_ID_OXSEMI, 0xc144, /* OXPCIe952 1 Legacy UART */ + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_b0_1_4000000 }, + { PCI_VENDOR_ID_OXSEMI, 0xc145, /* OXPCIe952 1 Legacy UART */ + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_b0_1_4000000 }, + { PCI_VENDOR_ID_OXSEMI, 0xc158, /* OXPCIe952 2 Native UART */ + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_oxsemi_2_4000000 }, + { PCI_VENDOR_ID_OXSEMI, 0xc15d, /* OXPCIe952 2 Native UART */ + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_oxsemi_2_4000000 }, + { PCI_VENDOR_ID_OXSEMI, 0xc208, /* OXPCIe954 4 Native UART */ + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_oxsemi_4_4000000 }, + { PCI_VENDOR_ID_OXSEMI, 0xc20d, /* OXPCIe954 4 Native UART */ + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_oxsemi_4_4000000 }, + { PCI_VENDOR_ID_OXSEMI, 0xc308, /* OXPCIe958 8 Native UART */ + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_oxsemi_8_4000000 }, + { PCI_VENDOR_ID_OXSEMI, 0xc30d, /* OXPCIe958 8 Native UART */ + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_oxsemi_8_4000000 }, + { PCI_VENDOR_ID_OXSEMI, 0xc40b, /* OXPCIe200 1 Native UART */ + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_oxsemi_1_4000000 }, + { PCI_VENDOR_ID_OXSEMI, 0xc40f, /* OXPCIe200 1 Native UART */ + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_oxsemi_1_4000000 }, + { PCI_VENDOR_ID_OXSEMI, 0xc41b, /* OXPCIe200 1 Native UART */ + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_oxsemi_1_4000000 }, + { PCI_VENDOR_ID_OXSEMI, 0xc41f, /* OXPCIe200 1 Native UART */ + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_oxsemi_1_4000000 }, + { PCI_VENDOR_ID_OXSEMI, 0xc42b, /* OXPCIe200 1 Native UART */ + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_oxsemi_1_4000000 }, + { PCI_VENDOR_ID_OXSEMI, 0xc42f, /* OXPCIe200 1 Native UART */ + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_oxsemi_1_4000000 }, + { PCI_VENDOR_ID_OXSEMI, 0xc43b, /* OXPCIe200 1 Native UART */ + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_oxsemi_1_4000000 }, + { PCI_VENDOR_ID_OXSEMI, 0xc43f, /* OXPCIe200 1 Native UART */ + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_oxsemi_1_4000000 }, + { PCI_VENDOR_ID_OXSEMI, 0xc44b, /* OXPCIe200 1 Native UART */ + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_oxsemi_1_4000000 }, + { PCI_VENDOR_ID_OXSEMI, 0xc44f, /* OXPCIe200 1 Native UART */ + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_oxsemi_1_4000000 }, + { PCI_VENDOR_ID_OXSEMI, 0xc45b, /* OXPCIe200 1 Native UART */ + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_oxsemi_1_4000000 }, + { PCI_VENDOR_ID_OXSEMI, 0xc45f, /* OXPCIe200 1 Native UART */ + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_oxsemi_1_4000000 }, + { PCI_VENDOR_ID_OXSEMI, 0xc46b, /* OXPCIe200 1 Native UART */ + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_oxsemi_1_4000000 }, + { PCI_VENDOR_ID_OXSEMI, 0xc46f, /* OXPCIe200 1 Native UART */ + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_oxsemi_1_4000000 }, + { PCI_VENDOR_ID_OXSEMI, 0xc47b, /* OXPCIe200 1 Native UART */ + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_oxsemi_1_4000000 }, + { PCI_VENDOR_ID_OXSEMI, 0xc47f, /* OXPCIe200 1 Native UART */ + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_oxsemi_1_4000000 }, + { PCI_VENDOR_ID_OXSEMI, 0xc48b, /* OXPCIe200 1 Native UART */ + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_oxsemi_1_4000000 }, + { PCI_VENDOR_ID_OXSEMI, 0xc48f, /* OXPCIe200 1 Native UART */ + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_oxsemi_1_4000000 }, + { PCI_VENDOR_ID_OXSEMI, 0xc49b, /* OXPCIe200 1 Native UART */ + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_oxsemi_1_4000000 }, + { PCI_VENDOR_ID_OXSEMI, 0xc49f, /* OXPCIe200 1 Native UART */ + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_oxsemi_1_4000000 }, + { PCI_VENDOR_ID_OXSEMI, 0xc4ab, /* OXPCIe200 1 Native UART */ + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_oxsemi_1_4000000 }, + { PCI_VENDOR_ID_OXSEMI, 0xc4af, /* OXPCIe200 1 Native UART */ + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_oxsemi_1_4000000 }, + { PCI_VENDOR_ID_OXSEMI, 0xc4bb, /* OXPCIe200 1 Native UART */ + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_oxsemi_1_4000000 }, + { PCI_VENDOR_ID_OXSEMI, 0xc4bf, /* OXPCIe200 1 Native UART */ + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_oxsemi_1_4000000 }, + { PCI_VENDOR_ID_OXSEMI, 0xc4cb, /* OXPCIe200 1 Native UART */ + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_oxsemi_1_4000000 }, + { PCI_VENDOR_ID_OXSEMI, 0xc4cf, /* OXPCIe200 1 Native UART */ + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_oxsemi_1_4000000 }, + /* * SBS Technologies, Inc. P-Octal and PMC-OCTPRO cards, * from skokodyn@yahoo.com diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index e5d344bfcb7..369f4428635 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1944,6 +1944,14 @@ #define PCI_VENDOR_ID_OXSEMI 0x1415 #define PCI_DEVICE_ID_OXSEMI_12PCI840 0x8403 +#define PCI_DEVICE_ID_OXSEMI_PCIe840 0xC000 +#define PCI_DEVICE_ID_OXSEMI_PCIe840_G 0xC004 +#define PCI_DEVICE_ID_OXSEMI_PCIe952_0 0xC100 +#define PCI_DEVICE_ID_OXSEMI_PCIe952_0_G 0xC104 +#define PCI_DEVICE_ID_OXSEMI_PCIe952_1 0xC110 +#define PCI_DEVICE_ID_OXSEMI_PCIe952_1_G 0xC114 +#define PCI_DEVICE_ID_OXSEMI_PCIe952_1_U 0xC118 +#define PCI_DEVICE_ID_OXSEMI_PCIe952_1_GU 0xC11C #define PCI_DEVICE_ID_OXSEMI_16PCI954 0x9501 #define PCI_DEVICE_ID_OXSEMI_16PCI95N 0x9511 #define PCI_DEVICE_ID_OXSEMI_16PCI954PP 0x9513 -- cgit v1.2.3-70-g09d2 From 388c8c16abafc2e74dff173b5de9ee519ea8d32f Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Sun, 3 Aug 2008 13:02:12 -0500 Subject: PCI: add routines for debugging and handling lost interrupts We're getting a lot of storage drivers blamed for interrupt misrouting issues. This patch provides a standard way of reporting the problem ... and, if possible, correcting it. Signed-off-by: James Bottomley Signed-off-by: Jesse Barnes --- drivers/pci/Makefile | 3 ++- drivers/pci/irq.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/pci.h | 7 ++++++ 3 files changed, 69 insertions(+), 1 deletion(-) create mode 100644 drivers/pci/irq.c (limited to 'include/linux') diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile index 4b47f4ece5b..af3bfe22847 100644 --- a/drivers/pci/Makefile +++ b/drivers/pci/Makefile @@ -3,7 +3,8 @@ # obj-y += access.o bus.o probe.o remove.o pci.o quirks.o slot.o \ - pci-driver.o search.o pci-sysfs.o rom.o setup-res.o + pci-driver.o search.o pci-sysfs.o rom.o setup-res.o \ + irq.o obj-$(CONFIG_PROC_FS) += proc.o # Build PCI Express stuff if needed diff --git a/drivers/pci/irq.c b/drivers/pci/irq.c new file mode 100644 index 00000000000..6441dfa969a --- /dev/null +++ b/drivers/pci/irq.c @@ -0,0 +1,60 @@ +/* + * PCI IRQ failure handing code + * + * Copyright (c) 2008 James Bottomley + */ + +#include +#include +#include +#include + +static void pci_note_irq_problem(struct pci_dev *pdev, const char *reason) +{ + struct pci_dev *parent = to_pci_dev(pdev->dev.parent); + + dev_printk(KERN_ERR, &pdev->dev, + "Potentially misrouted IRQ (Bridge %s %04x:%04x)\n", + parent->dev.bus_id, parent->vendor, parent->device); + dev_printk(KERN_ERR, &pdev->dev, "%s\n", reason); + dev_printk(KERN_ERR, &pdev->dev, "Please report to linux-kernel@vger.kernel.org\n"); + WARN_ON(1); +} + +/** + * pci_lost_interrupt - reports a lost PCI interrupt + * @pdev: device whose interrupt is lost + * + * The primary function of this routine is to report a lost interrupt + * in a standard way which users can recognise (instead of blaming the + * driver). + * + * Returns: + * a suggestion for fixing it (although the driver is not required to + * act on this). + */ +enum pci_lost_interrupt_reason pci_lost_interrupt(struct pci_dev *pdev) +{ + if (pdev->msi_enabled || pdev->msix_enabled) { + enum pci_lost_interrupt_reason ret; + + if (pdev->msix_enabled) { + pci_note_irq_problem(pdev, "MSIX routing failure"); + ret = PCI_LOST_IRQ_DISABLE_MSIX; + } else { + pci_note_irq_problem(pdev, "MSI routing failure"); + ret = PCI_LOST_IRQ_DISABLE_MSI; + } + return ret; + } +#ifdef CONFIG_ACPI + if (!(acpi_disabled || acpi_noirq)) { + pci_note_irq_problem(pdev, "Potential ACPI misrouting please reboot with acpi=noirq"); + /* currently no way to fix acpi on the fly */ + return PCI_LOST_IRQ_DISABLE_ACPI; + } +#endif + pci_note_irq_problem(pdev, "unknown cause (not MSI or ACPI)"); + return PCI_LOST_IRQ_NO_INFORMATION; +} +EXPORT_SYMBOL(pci_lost_interrupt); diff --git a/include/linux/pci.h b/include/linux/pci.h index 752def8a2ef..c75b82bda32 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -546,6 +546,13 @@ struct pci_dev __deprecated *pci_find_slot(unsigned int bus, unsigned int devfn); #endif /* CONFIG_PCI_LEGACY */ +enum pci_lost_interrupt_reason { + PCI_LOST_IRQ_NO_INFORMATION = 0, + PCI_LOST_IRQ_DISABLE_MSI, + PCI_LOST_IRQ_DISABLE_MSIX, + PCI_LOST_IRQ_DISABLE_ACPI, +}; +enum pci_lost_interrupt_reason pci_lost_interrupt(struct pci_dev *dev); int pci_find_capability(struct pci_dev *dev, int cap); int pci_find_next_capability(struct pci_dev *dev, u8 pos, int cap); int pci_find_ext_capability(struct pci_dev *dev, int cap); -- cgit v1.2.3-70-g09d2 From 2fca5ccf97d2c28bcfce44f5b07d85e74e3cd18e Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 22 Oct 2008 09:34:49 +0200 Subject: libata: switch to using block layer tagging support libata currently has a pretty dumb ATA_MAX_QUEUE loop for finding a free tag to use. Instead of fixing that up, convert libata to using block layer tagging - gets rid of code in libata, and is also much faster. Signed-off-by: Jens Axboe Acked-by: Tejun Heo Signed-off-by: Linus Torvalds --- drivers/ata/libata-core.c | 66 +++++------------------------------------------ drivers/ata/libata-scsi.c | 10 +++++-- drivers/ata/libata.h | 19 ++++++++++++-- include/linux/libata.h | 1 - 4 files changed, 31 insertions(+), 65 deletions(-) (limited to 'include/linux') diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index bbb3cae5749..8cb0b360bfd 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -1713,8 +1713,6 @@ unsigned ata_exec_internal_sg(struct ata_device *dev, else tag = 0; - if (test_and_set_bit(tag, &ap->qc_allocated)) - BUG(); qc = __ata_qc_from_tag(ap, tag); qc->tag = tag; @@ -4552,37 +4550,6 @@ void swap_buf_le16(u16 *buf, unsigned int buf_words) #endif /* __BIG_ENDIAN */ } -/** - * ata_qc_new - Request an available ATA command, for queueing - * @ap: Port associated with device @dev - * @dev: Device from whom we request an available command structure - * - * LOCKING: - * None. - */ - -static struct ata_queued_cmd *ata_qc_new(struct ata_port *ap) -{ - struct ata_queued_cmd *qc = NULL; - unsigned int i; - - /* no command while frozen */ - if (unlikely(ap->pflags & ATA_PFLAG_FROZEN)) - return NULL; - - /* the last tag is reserved for internal command. */ - for (i = 0; i < ATA_MAX_QUEUE - 1; i++) - if (!test_and_set_bit(i, &ap->qc_allocated)) { - qc = __ata_qc_from_tag(ap, i); - break; - } - - if (qc) - qc->tag = i; - - return qc; -} - /** * ata_qc_new_init - Request an available ATA command, and initialize it * @dev: Device from whom we request an available command structure @@ -4591,16 +4558,20 @@ static struct ata_queued_cmd *ata_qc_new(struct ata_port *ap) * None. */ -struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev) +struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev, int tag) { struct ata_port *ap = dev->link->ap; struct ata_queued_cmd *qc; - qc = ata_qc_new(ap); + if (unlikely(ap->pflags & ATA_PFLAG_FROZEN)) + return NULL; + + qc = __ata_qc_from_tag(ap, tag); if (qc) { qc->scsicmd = NULL; qc->ap = ap; qc->dev = dev; + qc->tag = tag; ata_qc_reinit(qc); } @@ -4608,31 +4579,6 @@ struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev) return qc; } -/** - * ata_qc_free - free unused ata_queued_cmd - * @qc: Command to complete - * - * Designed to free unused ata_queued_cmd object - * in case something prevents using it. - * - * LOCKING: - * spin_lock_irqsave(host lock) - */ -void ata_qc_free(struct ata_queued_cmd *qc) -{ - struct ata_port *ap = qc->ap; - unsigned int tag; - - WARN_ON(qc == NULL); /* ata_qc_from_tag _might_ return NULL */ - - qc->flags = 0; - tag = qc->tag; - if (likely(ata_tag_valid(tag))) { - qc->tag = ATA_TAG_POISON; - clear_bit(tag, &ap->qc_allocated); - } -} - void __ata_qc_complete(struct ata_queued_cmd *qc) { struct ata_port *ap = qc->ap; diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 5d312dc9be9..d5b9b7266c8 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -708,7 +708,7 @@ static struct ata_queued_cmd *ata_scsi_qc_new(struct ata_device *dev, { struct ata_queued_cmd *qc; - qc = ata_qc_new_init(dev); + qc = ata_qc_new_init(dev, cmd->request->tag); if (qc) { qc->scsicmd = cmd; qc->scsidone = done; @@ -1103,7 +1103,8 @@ static int ata_scsi_dev_config(struct scsi_device *sdev, depth = min(sdev->host->can_queue, ata_id_queue_depth(dev->id)); depth = min(ATA_MAX_QUEUE - 1, depth); - scsi_adjust_queue_depth(sdev, MSG_SIMPLE_TAG, depth); + scsi_set_tag_type(sdev, MSG_SIMPLE_TAG); + scsi_activate_tcq(sdev, depth); } return 0; @@ -1943,6 +1944,11 @@ static unsigned int ata_scsiop_inq_std(struct ata_scsi_args *args, u8 *rbuf) hdr[1] |= (1 << 7); memcpy(rbuf, hdr, sizeof(hdr)); + + /* if ncq, set tags supported */ + if (ata_id_has_ncq(args->id)) + rbuf[7] |= (1 << 1); + memcpy(&rbuf[8], "ATA ", 8); ata_id_string(args->id, &rbuf[16], ATA_ID_PROD, 16); ata_id_string(args->id, &rbuf[32], ATA_ID_FW_REV, 4); diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h index fe2839e5877..d3831d39bda 100644 --- a/drivers/ata/libata.h +++ b/drivers/ata/libata.h @@ -74,7 +74,7 @@ extern struct ata_link *ata_dev_phys_link(struct ata_device *dev); extern void ata_force_cbl(struct ata_port *ap); extern u64 ata_tf_to_lba(const struct ata_taskfile *tf); extern u64 ata_tf_to_lba48(const struct ata_taskfile *tf); -extern struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev); +extern struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev, int tag); extern int ata_build_rw_tf(struct ata_taskfile *tf, struct ata_device *dev, u64 block, u32 n_block, unsigned int tf_flags, unsigned int tag); @@ -103,7 +103,6 @@ extern int ata_dev_configure(struct ata_device *dev); extern int sata_down_spd_limit(struct ata_link *link); extern int ata_down_xfermask_limit(struct ata_device *dev, unsigned int sel); extern void ata_sg_clean(struct ata_queued_cmd *qc); -extern void ata_qc_free(struct ata_queued_cmd *qc); extern void ata_qc_issue(struct ata_queued_cmd *qc); extern void __ata_qc_complete(struct ata_queued_cmd *qc); extern int atapi_check_dma(struct ata_queued_cmd *qc); @@ -119,6 +118,22 @@ extern struct ata_port *ata_port_alloc(struct ata_host *host); extern void ata_dev_enable_pm(struct ata_device *dev, enum link_pm policy); extern void ata_lpm_schedule(struct ata_port *ap, enum link_pm); +/** + * ata_qc_free - free unused ata_queued_cmd + * @qc: Command to complete + * + * Designed to free unused ata_queued_cmd object + * in case something prevents using it. + * + * LOCKING: + * spin_lock_irqsave(host lock) + */ +static inline void ata_qc_free(struct ata_queued_cmd *qc) +{ + qc->flags = 0; + qc->tag = ATA_TAG_POISON; +} + /* libata-acpi.c */ #ifdef CONFIG_ATA_ACPI extern void ata_acpi_associate_sata_port(struct ata_port *ap); diff --git a/include/linux/libata.h b/include/linux/libata.h index c261aa0584b..507f53ef803 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -695,7 +695,6 @@ struct ata_port { unsigned int cbl; /* cable type; ATA_CBL_xxx */ struct ata_queued_cmd qcmd[ATA_MAX_QUEUE]; - unsigned long qc_allocated; unsigned int qc_active; int nr_active_links; /* #links with active qcs */ -- cgit v1.2.3-70-g09d2